@platforma-open/milaboratories.immune-assay-data.workflow 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +6 -0
- package/dist/index.cjs +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/run-alignment.plj.gz +0 -0
- package/package.json +3 -2
- package/src/main.tpl.tengo +297 -221
- package/src/run-alignment.tpl.tengo +47 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.immune-assay-data.workflow@1.0
|
|
3
|
+
> @platforma-open/milaboratories.immune-assay-data.workflow@1.1.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/main.tpl.tengo"...
|
|
7
|
+
Processing "src/run-alignment.tpl.tengo"...
|
|
7
8
|
No syntax errors found.
|
|
8
9
|
info: Compiling 'dist'...
|
|
10
|
+
info: - writing /home/runner/work/immune-assay-data/immune-assay-data/workflow/dist/tengo/tpl/run-alignment.plj.gz
|
|
9
11
|
info: - writing /home/runner/work/immune-assay-data/immune-assay-data/workflow/dist/tengo/tpl/main.plj.gz
|
|
10
12
|
info: Template Pack build done.
|
|
11
13
|
info: Template Pack build done.
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.immune-assay-data.workflow
|
|
2
2
|
|
|
3
|
+
## 1.1.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 40fd1d2: Updated matching parameters. Handle empty results (no matching clonotypes). Fixed logic for assay sequence column detection. Allow for assay column selection.
|
|
8
|
+
|
|
3
9
|
## 1.0.2
|
|
4
10
|
|
|
5
11
|
### Patch Changes
|
package/dist/index.cjs
CHANGED
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { resolve } from 'node:path';
|
|
2
2
|
export const Templates = {
|
|
3
|
+
'run-alignment': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/run-alignment.plj.gz') },
|
|
3
4
|
'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
|
|
4
5
|
};
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.immune-assay-data.workflow",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Block Workflow",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@platforma-sdk/workflow-tengo": "^4.7.1",
|
|
8
8
|
"@platforma-open/soedinglab.software-mmseqs2": "^1.0.0",
|
|
9
9
|
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.0.3",
|
|
10
|
-
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.2"
|
|
10
|
+
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.2",
|
|
11
|
+
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.0.0"
|
|
11
12
|
},
|
|
12
13
|
"devDependencies": {
|
|
13
14
|
"@platforma-sdk/tengo-builder": "^2.1.7",
|
package/src/main.tpl.tengo
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// light block with no workflow
|
|
2
1
|
wf := import("@platforma-sdk/workflow-tengo:workflow")
|
|
3
2
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
4
3
|
file := import("@platforma-sdk/workflow-tengo:file")
|
|
@@ -10,10 +9,17 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
|
10
9
|
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
|
|
11
10
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
12
11
|
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
12
|
+
path := import("@platforma-sdk/workflow-tengo:path")
|
|
13
|
+
json := import("json")
|
|
14
|
+
strings := import("@platforma-sdk/workflow-tengo:strings")
|
|
15
|
+
text := import("text")
|
|
16
|
+
render := import("@platforma-sdk/workflow-tengo:render")
|
|
17
|
+
runAlignmentTpl := assets.importTemplate(":run-alignment")
|
|
13
18
|
|
|
14
19
|
prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
|
|
15
20
|
mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
|
|
16
21
|
addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
|
|
22
|
+
covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
|
|
17
23
|
|
|
18
24
|
wf.prepare(func(args){
|
|
19
25
|
bundleBuilder := wf.createPBundleBuilder()
|
|
@@ -24,12 +30,12 @@ wf.prepare(func(args){
|
|
|
24
30
|
}
|
|
25
31
|
})
|
|
26
32
|
|
|
27
|
-
|
|
33
|
+
prepareAssayFile := func(args, file, xsvType) {
|
|
28
34
|
// assign ids to assay sequences
|
|
29
35
|
ptw := pt.workflow()
|
|
30
36
|
df := ptw.frame({
|
|
31
37
|
file: file,
|
|
32
|
-
xsvType:
|
|
38
|
+
xsvType: xsvType
|
|
33
39
|
})
|
|
34
40
|
|
|
35
41
|
//////// calculate sequence id ////////
|
|
@@ -81,9 +87,9 @@ prepareClonesTsv := func(args) {
|
|
|
81
87
|
/**
|
|
82
88
|
* Convert tsv file to fasta file
|
|
83
89
|
* @param fileTsv - tsv file
|
|
84
|
-
* @return fasta file
|
|
90
|
+
* @return fasta file run result
|
|
85
91
|
*/
|
|
86
|
-
|
|
92
|
+
runTsvToFasta := func(fileTsv) {
|
|
87
93
|
e := exec.builder().
|
|
88
94
|
software(prepareFastaSw).
|
|
89
95
|
addFile("input.tsv", fileTsv).
|
|
@@ -93,7 +99,7 @@ tsv2Fasta := func(fileTsv) {
|
|
|
93
99
|
arg("--id_col").arg("seqId").
|
|
94
100
|
saveFile("output.fasta")
|
|
95
101
|
|
|
96
|
-
return e.run()
|
|
102
|
+
return e.run()
|
|
97
103
|
}
|
|
98
104
|
|
|
99
105
|
wf.body(func(args) {
|
|
@@ -122,13 +128,47 @@ wf.body(func(args) {
|
|
|
122
128
|
ll.panic("Assay sequence type is undefined")
|
|
123
129
|
}
|
|
124
130
|
|
|
125
|
-
|
|
131
|
+
handleUrl := ll.parseUrl(args.fileHandle)
|
|
132
|
+
jsonPayload := handleUrl.Path[1:]
|
|
133
|
+
fileInfo := json.decode(jsonPayload)
|
|
134
|
+
|
|
135
|
+
fileName := ""
|
|
136
|
+
if fileInfo.localPath != undefined {
|
|
137
|
+
fileName = fileInfo.localPath
|
|
138
|
+
} else if fileInfo.path != undefined {
|
|
139
|
+
fileName = fileInfo.path
|
|
140
|
+
} else {
|
|
141
|
+
ll.panic("Could not determine filename from file handle: ", args.fileHandle)
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
fileNameParts := path.split(fileName, ".")
|
|
145
|
+
xsvType := "tsv"
|
|
146
|
+
if len(fileNameParts) > 1 {
|
|
147
|
+
xsvType = fileNameParts[len(fileNameParts)-1]
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
assayTsv := prepareAssayFile(args, importFile.file, xsvType)
|
|
126
151
|
clonesTsv := prepareClonesTsv(args)
|
|
127
152
|
|
|
128
153
|
// prepare fasta
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
154
|
+
clonesFastaRun := runTsvToFasta(clonesTsv)
|
|
155
|
+
assayFastaRun := runTsvToFasta(assayTsv)
|
|
156
|
+
clonesFasta := clonesFastaRun.getFile("output.fasta")
|
|
157
|
+
assayFasta := assayFastaRun.getFile("output.fasta")
|
|
158
|
+
|
|
159
|
+
// Dynamically determine coverage mode by comparing average sequence lengths
|
|
160
|
+
coverageMode := exec.builder().
|
|
161
|
+
software(covModeCalcSw).
|
|
162
|
+
addFile("clones.fasta", clonesFasta).
|
|
163
|
+
addFile("assay.fasta", assayFasta).
|
|
164
|
+
arg("--clones-fasta").arg("clones.fasta").
|
|
165
|
+
arg("--assay-fasta").arg("assay.fasta").
|
|
166
|
+
arg("--output").arg("coverage_mode.txt").
|
|
167
|
+
saveFileContent("coverage_mode.txt").
|
|
168
|
+
run()
|
|
169
|
+
|
|
170
|
+
covMode := coverageMode.getFileContent("coverage_mode.txt")
|
|
171
|
+
|
|
132
172
|
mmseqsSearchType := "0"
|
|
133
173
|
if targetSequenceType == "aminoacid" && assaySequenceType == "aminoacid" {
|
|
134
174
|
//1: amino acid
|
|
@@ -143,266 +183,302 @@ wf.body(func(args) {
|
|
|
143
183
|
// 2: nucleotide
|
|
144
184
|
mmseqsSearchType = "2"
|
|
145
185
|
}
|
|
146
|
-
// run search
|
|
147
|
-
mmseqs := exec.builder().
|
|
148
|
-
software(mmseqsSw).
|
|
149
|
-
dontSaveStdoutOrStderr(). // important to avoid CID conflict problems coming from different stdout output on same datasets
|
|
150
|
-
arg("easy-search").
|
|
151
|
-
arg("clones.fasta").
|
|
152
|
-
arg("assay.fasta").
|
|
153
|
-
arg("results.tsv").
|
|
154
|
-
arg("tmp").
|
|
155
|
-
arg("--search-type").arg(mmseqsSearchType).
|
|
156
|
-
arg("--cov-mode").arg(string(args.settings.coverageMode)).
|
|
157
|
-
arg("-c").arg(string(args.settings.coverageThreshold)).
|
|
158
|
-
addFile("clones.fasta", clonesFasta).
|
|
159
|
-
addFile("assay.fasta", assayFasta).
|
|
160
|
-
saveFile("results.tsv").
|
|
161
|
-
run()
|
|
162
186
|
|
|
163
|
-
|
|
187
|
+
runMmseqs := render.create(runAlignmentTpl, {
|
|
188
|
+
covMode: covMode,
|
|
189
|
+
mmseqsSearchType: mmseqsSearchType,
|
|
190
|
+
coverageThreshold: args.settings.coverageThreshold,
|
|
191
|
+
identityThreshold: args.settings.identity,
|
|
192
|
+
similarityType: args.settings.similarityType,
|
|
193
|
+
clonesFasta: clonesFasta,
|
|
194
|
+
assayFasta: assayFasta
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
mmseqsOutput := runMmseqs.output("mmseqsOutput")
|
|
198
|
+
|
|
164
199
|
// @TODO remove header stuff and replace with pt when available (!)
|
|
165
|
-
|
|
200
|
+
addHeaderRunResult := exec.builder().
|
|
166
201
|
software(addHeaderSw).
|
|
167
202
|
arg("-i").arg("results.tsv").
|
|
168
203
|
arg("-o").arg("results_with_header.tsv").
|
|
169
204
|
addFile("results.tsv", mmseqsOutput).
|
|
170
205
|
saveFile("results_with_header.tsv").
|
|
171
|
-
run()
|
|
172
|
-
getFile("results_with_header.tsv")
|
|
206
|
+
run()
|
|
173
207
|
|
|
208
|
+
mmseqsResultTsv := addHeaderRunResult.getFile("results_with_header.tsv")
|
|
209
|
+
mmseqsResultTsvContent := addHeaderRunResult.getFileContent("results_with_header.tsv")
|
|
174
210
|
|
|
175
|
-
|
|
211
|
+
emptyResults := len(text.trim_space(string(mmseqsResultTsvContent))) == 0
|
|
212
|
+
blockId := wf.blockId().getDataAsJson()
|
|
176
213
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
file: mmseqsResultTsv,
|
|
180
|
-
xsvType: "tsv"
|
|
181
|
-
})
|
|
214
|
+
assayPframe := undefined
|
|
215
|
+
epf := undefined
|
|
182
216
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
df = df.groupBy("query").agg(cols...)
|
|
194
|
-
df.save("results.tsv")
|
|
217
|
+
if emptyResults {
|
|
218
|
+
assayPframe = pframes.emptyPFrame()
|
|
219
|
+
epf = pframes.emptyPFrame()
|
|
220
|
+
} else {
|
|
221
|
+
//////// Process tables ////////
|
|
222
|
+
ptw := pt.workflow()
|
|
223
|
+
df := ptw.frame({
|
|
224
|
+
file: mmseqsResultTsv,
|
|
225
|
+
xsvType: "tsv"
|
|
226
|
+
})
|
|
195
227
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
228
|
+
// Cast columns to ensure correct types for aggregation
|
|
229
|
+
df = df.withColumns(
|
|
230
|
+
pt.col("evalue").cast("Float64").alias("evalue"),
|
|
231
|
+
pt.col("bits").cast("Float64").alias("bits")
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
cols := []
|
|
235
|
+
for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
|
|
236
|
+
cols = append(cols,
|
|
237
|
+
pt.col(col).maxBy(
|
|
238
|
+
pt.col("evalue").multiply(-1),
|
|
239
|
+
pt.col("bits")
|
|
240
|
+
).alias(col)
|
|
241
|
+
)
|
|
210
242
|
}
|
|
211
|
-
|
|
212
|
-
|
|
243
|
+
|
|
244
|
+
df = df.groupBy("query").agg(cols...)
|
|
245
|
+
df.save("results.tsv")
|
|
213
246
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
247
|
+
// assay data import summary
|
|
248
|
+
assayDf := ptw.frame({
|
|
249
|
+
file: assayTsv,
|
|
250
|
+
xsvType: "tsv"
|
|
251
|
+
})
|
|
252
|
+
// import how many matches per assay sequence found
|
|
253
|
+
assayDf = assayDf.join(
|
|
254
|
+
df.groupBy("target").agg(
|
|
255
|
+
pt.col("query").count().alias("queryCount")
|
|
256
|
+
),
|
|
257
|
+
{
|
|
258
|
+
how: "left",
|
|
259
|
+
leftOn: "seqId",
|
|
260
|
+
rightOn: "target"
|
|
261
|
+
}
|
|
262
|
+
)
|
|
263
|
+
assayDf.save("assayData.tsv")
|
|
264
|
+
|
|
265
|
+
// clones
|
|
266
|
+
clonesDf := df.join(assayDf,
|
|
267
|
+
{
|
|
268
|
+
how: "left",
|
|
269
|
+
leftOn: "target",
|
|
270
|
+
rightOn: "seqId"
|
|
271
|
+
}
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
clonesDf.save("clonesData.tsv")
|
|
275
|
+
ptw = ptw.run()
|
|
276
|
+
|
|
277
|
+
//////// Building outputs & exports ////////
|
|
278
|
+
assayColumns := [
|
|
279
|
+
{
|
|
280
|
+
column: "seqIdLabel",
|
|
281
|
+
spec: {
|
|
282
|
+
name: "pl7.app/label",
|
|
283
|
+
valueType: "String",
|
|
284
|
+
annotations: {
|
|
285
|
+
"pl7.app/label": "Sequence Id",
|
|
286
|
+
"pl7.app/table/fontFamily": "monospace"
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
column: "queryCount",
|
|
292
|
+
spec: {
|
|
293
|
+
name: "pl7.app/vdj/assay/queryCount",
|
|
294
|
+
valueType: "Int",
|
|
295
|
+
annotations: {
|
|
296
|
+
"pl7.app/label": "Matched Clones",
|
|
297
|
+
"pl7.app/table/orderPriority": "9000"
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
column: sequenceColumnInfo.header,
|
|
303
|
+
spec: {
|
|
304
|
+
name: "pl7.app/vdj/sequence",
|
|
305
|
+
valueType: "String",
|
|
306
|
+
domain: {
|
|
307
|
+
"pl7.app/alphabet": assaySequenceType
|
|
308
|
+
},
|
|
309
|
+
annotations: {
|
|
310
|
+
"pl7.app/label": sequenceColumnInfo.header,
|
|
311
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
312
|
+
"pl7.app/table/orderPriority": "10000"
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
|
|
318
|
+
columnsToImport := args.importColumns
|
|
319
|
+
if args.selectedColumns != undefined && len(args.selectedColumns) > 0 {
|
|
320
|
+
selectedHeaders := {}
|
|
321
|
+
for header in args.selectedColumns {
|
|
322
|
+
selectedHeaders[header] = true
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
filteredColumns := []
|
|
326
|
+
for col in args.importColumns {
|
|
327
|
+
// Always include the main sequence column
|
|
328
|
+
if col.header == args.sequenceColumnHeader || selectedHeaders[col.header] {
|
|
329
|
+
filteredColumns = append(filteredColumns, col)
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
columnsToImport = filteredColumns
|
|
220
333
|
}
|
|
221
|
-
)
|
|
222
334
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
335
|
+
for h in columnsToImport {
|
|
336
|
+
if h.header == args.sequenceColumnHeader {
|
|
337
|
+
continue
|
|
338
|
+
}
|
|
339
|
+
assayColumns = append(assayColumns, {
|
|
340
|
+
column: h.header,
|
|
341
|
+
spec: {
|
|
342
|
+
name: h.header,
|
|
343
|
+
valueType: h.type,
|
|
344
|
+
annotations: {
|
|
345
|
+
"pl7.app/label": h.header,
|
|
346
|
+
"pl7.app/table/orderPriority": "1000"
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
})
|
|
350
|
+
}
|
|
228
351
|
|
|
229
|
-
|
|
352
|
+
assayImportResults := xsv.importFile(ptw.getFile("assayData.tsv"), "tsv", {
|
|
353
|
+
axes: [{
|
|
354
|
+
column: "seqId",
|
|
355
|
+
spec: {
|
|
356
|
+
name: "pl7.app/vdj/assay/sequenceId",
|
|
357
|
+
type: "String",
|
|
358
|
+
domain: {
|
|
359
|
+
"pl7.app/blockId": blockId
|
|
360
|
+
},
|
|
361
|
+
annotations: {
|
|
362
|
+
"pl7.app/label": "Sequence Id",
|
|
363
|
+
"pl7.app/table/fontFamily": "monospace"
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}],
|
|
367
|
+
columns: assayColumns,
|
|
368
|
+
annotations: {
|
|
369
|
+
"pl7.app/isAnchor": "true"
|
|
370
|
+
}
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
// "bits", "evalue", "pident"
|
|
374
|
+
cloneColumns := [
|
|
230
375
|
{
|
|
231
376
|
column: "seqIdLabel",
|
|
232
377
|
spec: {
|
|
233
|
-
name: "pl7.app/
|
|
378
|
+
name: "pl7.app/vdj/assay/sequenceIdLabel",
|
|
234
379
|
valueType: "String",
|
|
235
380
|
annotations: {
|
|
236
|
-
"pl7.app/label": "Sequence Id",
|
|
237
|
-
"pl7.app/table/fontFamily": "monospace"
|
|
381
|
+
"pl7.app/label": "Assay Sequence Id",
|
|
382
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
383
|
+
"pl7.app/table/visibility": "optional"
|
|
238
384
|
}
|
|
239
385
|
}
|
|
240
|
-
},
|
|
386
|
+
},
|
|
241
387
|
{
|
|
242
|
-
column: "
|
|
388
|
+
column: "bits",
|
|
243
389
|
spec: {
|
|
244
|
-
name: "pl7.app/
|
|
245
|
-
valueType: "
|
|
390
|
+
name: "pl7.app/alignment/bitScore",
|
|
391
|
+
valueType: "Float",
|
|
246
392
|
annotations: {
|
|
247
|
-
"pl7.app/label": "
|
|
248
|
-
"pl7.app/table/
|
|
393
|
+
"pl7.app/label": "Bit Score",
|
|
394
|
+
"pl7.app/table/visibility": "optional"
|
|
249
395
|
}
|
|
250
396
|
}
|
|
251
|
-
},
|
|
397
|
+
},
|
|
252
398
|
{
|
|
253
|
-
column:
|
|
399
|
+
column: "evalue",
|
|
254
400
|
spec: {
|
|
255
|
-
name: "pl7.app/
|
|
256
|
-
valueType: "
|
|
257
|
-
domain: {
|
|
258
|
-
"pl7.app/alphabet": assaySequenceType
|
|
259
|
-
},
|
|
401
|
+
name: "pl7.app/alignment/evalue",
|
|
402
|
+
valueType: "Float",
|
|
260
403
|
annotations: {
|
|
261
|
-
"pl7.app/label":
|
|
262
|
-
"pl7.app/table/
|
|
263
|
-
"pl7.app/table/orderPriority": "10000"
|
|
404
|
+
"pl7.app/label": "E-value",
|
|
405
|
+
"pl7.app/table/visibility": "optional"
|
|
264
406
|
}
|
|
265
407
|
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
if h.header == args.sequenceColumnHeader {
|
|
270
|
-
continue
|
|
271
|
-
}
|
|
272
|
-
assayColumns = append(assayColumns, {
|
|
273
|
-
column: h.header,
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
column: "pident",
|
|
274
411
|
spec: {
|
|
275
|
-
name:
|
|
276
|
-
valueType:
|
|
277
|
-
annotations: {
|
|
278
|
-
"pl7.app/label":
|
|
279
|
-
"pl7.app/table/
|
|
412
|
+
name: "pl7.app/alignment/pident",
|
|
413
|
+
valueType: "Float",
|
|
414
|
+
annotations: {
|
|
415
|
+
"pl7.app/label": "Percentage of identical matches",
|
|
416
|
+
"pl7.app/table/visibility": "optional"
|
|
280
417
|
}
|
|
281
418
|
}
|
|
282
|
-
}
|
|
283
|
-
}
|
|
419
|
+
}]
|
|
284
420
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
"pl7.app/label": "Sequence Id",
|
|
296
|
-
"pl7.app/table/fontFamily": "monospace"
|
|
421
|
+
for h in columnsToImport {
|
|
422
|
+
cloneColumns = append(cloneColumns, {
|
|
423
|
+
column: h.header,
|
|
424
|
+
spec: {
|
|
425
|
+
name: h.header,
|
|
426
|
+
valueType: h.type,
|
|
427
|
+
annotations: {
|
|
428
|
+
"pl7.app/label": h.header,
|
|
429
|
+
"pl7.app/table/visibility": h.header == args.sequenceColumnHeader ? "optional" : "default"
|
|
430
|
+
}
|
|
297
431
|
}
|
|
298
|
-
}
|
|
299
|
-
}],
|
|
300
|
-
columns: assayColumns,
|
|
301
|
-
annotations: {
|
|
302
|
-
"pl7.app/isAnchor": "true"
|
|
432
|
+
})
|
|
303
433
|
}
|
|
304
|
-
})
|
|
305
434
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
name: "pl7.app/vdj/assay/sequenceId",
|
|
312
|
-
valueType: "String",
|
|
313
|
-
annotations: {
|
|
314
|
-
"pl7.app/label": "Assay Sequence Id",
|
|
315
|
-
"pl7.app/table/defaultVisibility": "optional"
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
},
|
|
319
|
-
{
|
|
320
|
-
column: "bits",
|
|
321
|
-
spec: {
|
|
322
|
-
name: "pl7.app/alignment/bitScore",
|
|
323
|
-
valueType: "Float",
|
|
324
|
-
annotations: {
|
|
325
|
-
"pl7.app/label": "Bit Score",
|
|
326
|
-
"pl7.app/table/defaultVisibility": "optional"
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
},
|
|
330
|
-
{
|
|
331
|
-
column: "evalue",
|
|
332
|
-
spec: {
|
|
333
|
-
name: "pl7.app/alignment/evalue",
|
|
334
|
-
valueType: "Float",
|
|
335
|
-
annotations: {
|
|
336
|
-
"pl7.app/label": "E-value",
|
|
337
|
-
"pl7.app/table/defaultVisibility": "optional"
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
},
|
|
341
|
-
{
|
|
342
|
-
column: "pident",
|
|
343
|
-
spec: {
|
|
344
|
-
name: "pl7.app/alignment/pident",
|
|
345
|
-
valueType: "Float",
|
|
346
|
-
annotations: {
|
|
347
|
-
"pl7.app/label": "Percentage of identical matches",
|
|
348
|
-
"pl7.app/table/defaultVisibility": "optional"
|
|
349
|
-
}
|
|
435
|
+
// insert domain
|
|
436
|
+
for col in cloneColumns {
|
|
437
|
+
col.spec.domain = maps.deepMerge(col.spec.domain, {
|
|
438
|
+
"pl7.app/blockId": blockId
|
|
439
|
+
})
|
|
350
440
|
}
|
|
351
|
-
}]
|
|
352
|
-
|
|
353
|
-
for h in args.importColumns {
|
|
354
|
-
cloneColumns = append(cloneColumns, {
|
|
355
|
-
column: h.header,
|
|
356
|
-
spec: {
|
|
357
|
-
name: h.header,
|
|
358
|
-
valueType: h.type,
|
|
359
|
-
annotations: {
|
|
360
|
-
"pl7.app/label": h.header,
|
|
361
|
-
"pl7.app/table/defaultVisibility": h.header == args.sequenceColumnHeader ? "optional" : "default"
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
})
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
// insert domain
|
|
368
|
-
for col in cloneColumns {
|
|
369
|
-
col.spec.domain = maps.deepMerge(col.spec.domain, {
|
|
370
|
-
"pl7.app/blockId": blockId
|
|
371
|
-
})
|
|
372
|
-
}
|
|
373
441
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
442
|
+
cloneImportResults := xsv.importFile(
|
|
443
|
+
ptw.getFile("clonesData.tsv"), "tsv", {
|
|
444
|
+
axes: [{
|
|
445
|
+
column: "query",
|
|
446
|
+
spec: datasetSpec.axesSpec[1]
|
|
447
|
+
}],
|
|
448
|
+
columns: cloneColumns
|
|
449
|
+
},
|
|
450
|
+
{ splitDataAndSpec: true }
|
|
451
|
+
)
|
|
384
452
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
453
|
+
trace := pSpec.makeTrace(datasetSpec,
|
|
454
|
+
{
|
|
455
|
+
type: "milaboratories.immune-assay-data",
|
|
456
|
+
importance: 30,
|
|
457
|
+
label: "Assay Data"
|
|
458
|
+
})
|
|
391
459
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
460
|
+
epfB := pframes.pFrameBuilder()
|
|
461
|
+
for k, v in cloneImportResults {
|
|
462
|
+
epfB.add(k, trace.inject(v.spec), v.data)
|
|
463
|
+
}
|
|
464
|
+
epf = epfB.build()
|
|
465
|
+
assayPframe = pframes.exportFrame(assayImportResults)
|
|
395
466
|
}
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
return {
|
|
467
|
+
|
|
468
|
+
result := {
|
|
399
469
|
outputs: {
|
|
400
470
|
dataImportHandle: importFile.handle,
|
|
401
|
-
table:
|
|
402
|
-
mmseqsOutput: mmseqsOutput // @TODO tmp fix to resolve CID conflicts
|
|
403
|
-
|
|
404
|
-
|
|
471
|
+
table: assayPframe,
|
|
472
|
+
mmseqsOutput: mmseqsOutput, // @TODO tmp fix to resolve CID conflicts
|
|
473
|
+
emptyResults: emptyResults
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if !emptyResults {
|
|
478
|
+
result.exports = {
|
|
405
479
|
epf: epf
|
|
406
480
|
}
|
|
407
481
|
}
|
|
482
|
+
|
|
483
|
+
return result
|
|
408
484
|
})
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
self := import("@platforma-sdk/workflow-tengo:tpl")
|
|
2
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
3
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
4
|
+
assets:= import("@platforma-sdk/workflow-tengo:assets")
|
|
5
|
+
mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
|
|
6
|
+
|
|
7
|
+
self.defineOutputs("mmseqsOutput")
|
|
8
|
+
|
|
9
|
+
self.body(func(args) {
|
|
10
|
+
|
|
11
|
+
covMode := args.covMode.getDataAsJson()
|
|
12
|
+
mmseqsSearchType := args.mmseqsSearchType
|
|
13
|
+
coverageThreshold := args.coverageThreshold
|
|
14
|
+
identityThreshold := args.identityThreshold
|
|
15
|
+
similarityType := string(args.similarityType)
|
|
16
|
+
clonesFasta := args.clonesFasta
|
|
17
|
+
assayFasta := args.assayFasta
|
|
18
|
+
|
|
19
|
+
mmseqs := exec.builder().
|
|
20
|
+
software(mmseqsSw).
|
|
21
|
+
dontSaveStdoutOrStderr(). // important to avoid CID conflict problems coming from different stdout output on same datasets
|
|
22
|
+
arg("easy-search").
|
|
23
|
+
arg("clones.fasta").
|
|
24
|
+
arg("assay.fasta").
|
|
25
|
+
arg("results.tsv").
|
|
26
|
+
arg("tmp").
|
|
27
|
+
arg("--search-type").arg(mmseqsSearchType).
|
|
28
|
+
arg("--cov-mode").arg(string(covMode)).
|
|
29
|
+
arg("-c").arg(string(coverageThreshold)).
|
|
30
|
+
arg("--min-seq-id").arg(string(identityThreshold))
|
|
31
|
+
|
|
32
|
+
if similarityType == "sequence-identity" {
|
|
33
|
+
mmseqs = mmseqs.arg("--alignment-mode").arg("3")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
mmseqs = mmseqs.
|
|
37
|
+
addFile("clones.fasta", clonesFasta).
|
|
38
|
+
addFile("assay.fasta", assayFasta).
|
|
39
|
+
saveFile("results.tsv").
|
|
40
|
+
run()
|
|
41
|
+
|
|
42
|
+
mmseqsOutput := mmseqs.getFile("results.tsv")
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
mmseqsOutput: mmseqsOutput
|
|
46
|
+
}
|
|
47
|
+
})
|