@platforma-open/milaboratories.immune-assay-data.workflow 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,13 @@
1
1
   WARN  Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.immune-assay-data.workflow@1.0.2 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
3
+ > @platforma-open/milaboratories.immune-assay-data.workflow@1.2.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/main.tpl.tengo"...
7
+ Processing "src/run-alignment.tpl.tengo"...
7
8
  No syntax errors found.
8
9
  info: Compiling 'dist'...
10
+ info: - writing /home/runner/work/immune-assay-data/immune-assay-data/workflow/dist/tengo/tpl/run-alignment.plj.gz
9
11
  info: - writing /home/runner/work/immune-assay-data/immune-assay-data/workflow/dist/tengo/tpl/main.plj.gz
10
12
  info: Template Pack build done.
11
13
  info: Template Pack build done.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.immune-assay-data.workflow
2
2
 
3
+ ## 1.2.0
4
+
5
+ ### Minor Changes
6
+
7
+ - bd219bf: Update SDK and bugfixes
8
+
9
+ ## 1.1.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 40fd1d2: Updated matching parameters. Handle empty results (no matching clonotypes). Fixed logic for assay sequence column detection. Allow for assay column selection.
14
+
3
15
  ## 1.0.2
4
16
 
5
17
  ### Patch Changes
package/dist/index.cjs CHANGED
@@ -1,3 +1,4 @@
1
1
  module.exports = { Templates: {
2
+ 'run-alignment': { type: 'from-file', path: require.resolve('./tengo/tpl/run-alignment.plj.gz') },
2
3
  'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
3
4
  }};
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
- declare type TplName = "main";
2
+ declare type TplName = "run-alignment" | "main";
3
3
  declare const Templates: Record<TplName, TemplateFromFile>;
4
4
  export { Templates };
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { resolve } from 'node:path';
2
2
  export const Templates = {
3
+ 'run-alignment': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/run-alignment.plj.gz') },
3
4
  'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
4
5
  };
Binary file
package/package.json CHANGED
@@ -1,17 +1,18 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.immune-assay-data.workflow",
3
- "version": "1.0.2",
3
+ "version": "1.2.0",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^4.7.1",
7
+ "@platforma-sdk/workflow-tengo": "^4.9.0",
8
8
  "@platforma-open/soedinglab.software-mmseqs2": "^1.0.0",
9
9
  "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.0.3",
10
- "@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.2"
10
+ "@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.2",
11
+ "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.0.0"
11
12
  },
12
13
  "devDependencies": {
13
- "@platforma-sdk/tengo-builder": "^2.1.7",
14
- "@platforma-sdk/test": "^1.31.16",
14
+ "@platforma-sdk/tengo-builder": "^2.1.11",
15
+ "@platforma-sdk/test": "^1.37.8",
15
16
  "vitest": "^2.1.8"
16
17
  },
17
18
  "scripts": {
@@ -1,4 +1,3 @@
1
- // light block with no workflow
2
1
  wf := import("@platforma-sdk/workflow-tengo:workflow")
3
2
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
3
  file := import("@platforma-sdk/workflow-tengo:file")
@@ -8,12 +7,17 @@ maps:= import("@platforma-sdk/workflow-tengo:maps")
8
7
  xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
9
8
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
10
9
  pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
11
- slices := import("@platforma-sdk/workflow-tengo:slices")
12
10
  pt := import("@platforma-sdk/workflow-tengo:pt")
11
+ path := import("@platforma-sdk/workflow-tengo:path")
12
+ json := import("json")
13
+ text := import("text")
14
+ render := import("@platforma-sdk/workflow-tengo:render")
15
+ strings := import("@platforma-sdk/workflow-tengo:strings")
16
+ runAlignmentTpl := assets.importTemplate(":run-alignment")
13
17
 
14
18
  prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
15
- mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
16
19
  addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
20
+ covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
17
21
 
18
22
  wf.prepare(func(args){
19
23
  bundleBuilder := wf.createPBundleBuilder()
@@ -24,12 +28,12 @@ wf.prepare(func(args){
24
28
  }
25
29
  })
26
30
 
27
- prepareAssayTsv := func(args, file) {
31
+ prepareAssayFile := func(args, file, xsvType) {
28
32
  // assign ids to assay sequences
29
33
  ptw := pt.workflow()
30
34
  df := ptw.frame({
31
35
  file: file,
32
- xsvType: "tsv" // @TODO (!!!)
36
+ xsvType: xsvType
33
37
  })
34
38
 
35
39
  //////// calculate sequence id ////////
@@ -75,17 +79,21 @@ prepareClonesTsv := func(args) {
75
79
  cloneTable.setAxisHeader(datasetSpec.axesSpec[1].name, "seqId")
76
80
  cloneTable.add(columns.getColumn(args.targetRef), {header: "sequence"})
77
81
 
82
+ cloneTable.mem("16GiB")
83
+ cloneTable.cpu(1)
78
84
  return cloneTable.build()
79
85
  }
80
86
 
81
87
  /**
82
88
  * Convert tsv file to fasta file
83
89
  * @param fileTsv - tsv file
84
- * @return fasta file
90
+ * @return fasta file run result
85
91
  */
86
- tsv2Fasta := func(fileTsv) {
92
+ runTsvToFasta := func(fileTsv) {
87
93
  e := exec.builder().
88
94
  software(prepareFastaSw).
95
+ mem("16GiB").
96
+ cpu(1).
89
97
  addFile("input.tsv", fileTsv).
90
98
  arg("-i").arg("input.tsv").
91
99
  arg("-o").arg("output.fasta").
@@ -93,7 +101,11 @@ tsv2Fasta := func(fileTsv) {
93
101
  arg("--id_col").arg("seqId").
94
102
  saveFile("output.fasta")
95
103
 
96
- return e.run().getFile("output.fasta")
104
+ return e.run()
105
+ }
106
+
107
+ assayColumnName := func(header) {
108
+ return "pl7.app/vdj/assay-data/" + strings.substituteSpecialCharacters(header)
97
109
  }
98
110
 
99
111
  wf.body(func(args) {
@@ -122,13 +134,49 @@ wf.body(func(args) {
122
134
  ll.panic("Assay sequence type is undefined")
123
135
  }
124
136
 
125
- assayTsv := prepareAssayTsv(args, importFile.file)
137
+ handleUrl := ll.parseUrl(args.fileHandle)
138
+ jsonPayload := handleUrl.Path[1:]
139
+ fileInfo := json.decode(jsonPayload)
140
+
141
+ fileName := ""
142
+ if fileInfo.localPath != undefined {
143
+ fileName = fileInfo.localPath
144
+ } else if fileInfo.path != undefined {
145
+ fileName = fileInfo.path
146
+ } else {
147
+ ll.panic("Could not determine filename from file handle: ", args.fileHandle)
148
+ }
149
+
150
+ fileNameParts := path.split(fileName, ".")
151
+ xsvType := "tsv"
152
+ if len(fileNameParts) > 1 {
153
+ xsvType = fileNameParts[len(fileNameParts)-1]
154
+ }
155
+
156
+ assayTsv := prepareAssayFile(args, importFile.file, xsvType)
126
157
  clonesTsv := prepareClonesTsv(args)
127
158
 
128
159
  // prepare fasta
129
- clonesFasta := tsv2Fasta(clonesTsv)
130
- assayFasta := tsv2Fasta(assayTsv)
131
-
160
+ clonesFastaRun := runTsvToFasta(clonesTsv)
161
+ assayFastaRun := runTsvToFasta(assayTsv)
162
+ clonesFasta := clonesFastaRun.getFile("output.fasta")
163
+ assayFasta := assayFastaRun.getFile("output.fasta")
164
+
165
+ // Dynamically determine coverage mode by comparing average sequence lengths
166
+ coverageMode := exec.builder().
167
+ software(covModeCalcSw).
168
+ mem("16GiB").
169
+ cpu(1).
170
+ addFile("clones.fasta", clonesFasta).
171
+ addFile("assay.fasta", assayFasta).
172
+ arg("--clones-fasta").arg("clones.fasta").
173
+ arg("--assay-fasta").arg("assay.fasta").
174
+ arg("--output").arg("coverage_mode.txt").
175
+ saveFileContent("coverage_mode.txt").
176
+ run()
177
+
178
+ covMode := coverageMode.getFileContent("coverage_mode.txt")
179
+
132
180
  mmseqsSearchType := "0"
133
181
  if targetSequenceType == "aminoacid" && assaySequenceType == "aminoacid" {
134
182
  //1: amino acid
@@ -143,266 +191,307 @@ wf.body(func(args) {
143
191
  // 2: nucleotide
144
192
  mmseqsSearchType = "2"
145
193
  }
146
- // run search
147
- mmseqs := exec.builder().
148
- software(mmseqsSw).
149
- dontSaveStdoutOrStderr(). // important to avoid CID conflict problems coming from different stdout output on same datasets
150
- arg("easy-search").
151
- arg("clones.fasta").
152
- arg("assay.fasta").
153
- arg("results.tsv").
154
- arg("tmp").
155
- arg("--search-type").arg(mmseqsSearchType).
156
- arg("--cov-mode").arg(string(args.settings.coverageMode)).
157
- arg("-c").arg(string(args.settings.coverageThreshold)).
158
- addFile("clones.fasta", clonesFasta).
159
- addFile("assay.fasta", assayFasta).
160
- saveFile("results.tsv").
161
- run()
162
194
 
163
- mmseqsOutput := mmseqs.getFile("results.tsv")
195
+ runMmseqs := render.create(runAlignmentTpl, {
196
+ covMode: covMode,
197
+ mmseqsSearchType: mmseqsSearchType,
198
+ coverageThreshold: args.settings.coverageThreshold,
199
+ identityThreshold: args.settings.identity,
200
+ similarityType: args.settings.similarityType,
201
+ clonesFasta: clonesFasta,
202
+ assayFasta: assayFasta
203
+ })
204
+
205
+ mmseqsOutput := runMmseqs.output("mmseqsOutput")
206
+
164
207
  // @TODO remove header stuff and replace with pt when available (!)
165
- mmseqsResultTsv := exec.builder().
208
+ addHeaderRunResult := exec.builder().
166
209
  software(addHeaderSw).
210
+ mem("16GiB").
211
+ cpu(1).
167
212
  arg("-i").arg("results.tsv").
168
213
  arg("-o").arg("results_with_header.tsv").
169
214
  addFile("results.tsv", mmseqsOutput).
170
215
  saveFile("results_with_header.tsv").
171
- run().
172
- getFile("results_with_header.tsv")
216
+ run()
173
217
 
218
+ mmseqsResultTsv := addHeaderRunResult.getFile("results_with_header.tsv")
219
+ mmseqsResultTsvContent := addHeaderRunResult.getFileContent("results_with_header.tsv")
174
220
 
175
- //////// Process tables ////////
221
+ emptyResults := len(text.trim_space(string(mmseqsResultTsvContent))) == 0
222
+ blockId := wf.blockId().getDataAsJson()
176
223
 
177
- ptw := pt.workflow()
178
- df := ptw.frame({
179
- file: mmseqsResultTsv,
180
- xsvType: "tsv"
181
- })
224
+ assayPframe := undefined
225
+ epf := undefined
182
226
 
183
- cols := []
184
- for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
185
- cols = append(cols,
186
- pt.col(col).maxBy(
187
- pt.col("evalue").multiply(-1),
188
- pt.col("bits")
189
- ).alias(col)
190
- )
191
- }
192
-
193
- df = df.groupBy("query").agg(cols...)
194
- df.save("results.tsv")
227
+ if emptyResults {
228
+ assayPframe = pframes.emptyPFrame()
229
+ epf = pframes.emptyPFrame()
230
+ } else {
231
+ //////// Process tables ////////
232
+ ptw := pt.workflow()
233
+ df := ptw.frame({
234
+ file: mmseqsResultTsv,
235
+ xsvType: "tsv"
236
+ })
195
237
 
196
- // assay data import summary
197
- assayDf := ptw.frame({
198
- file: assayTsv,
199
- xsvType: "tsv"
200
- })
201
- // import how many matches per assay sequence found
202
- assayDf = assayDf.join(
203
- df.groupBy("target").agg(
204
- pt.col("query").count().alias("queryCount")
205
- ),
206
- {
207
- how: "left",
208
- leftOn: "seqId",
209
- rightOn: "target"
238
+ // Cast columns to ensure correct types for aggregation
239
+ df = df.withColumns(
240
+ pt.col("evalue").cast("Float64").alias("evalue"),
241
+ pt.col("bits").cast("Float64").alias("bits")
242
+ )
243
+
244
+ cols := []
245
+ for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
246
+ cols = append(cols,
247
+ pt.col(col).maxBy(
248
+ pt.col("evalue").multiply(-1),
249
+ pt.col("bits")
250
+ ).alias(col)
251
+ )
210
252
  }
211
- )
212
- assayDf.save("assayData.tsv")
253
+
254
+ df = df.groupBy("query").agg(cols...)
255
+ df.save("results.tsv")
213
256
 
214
- // clones
215
- clonesDf := df.join(assayDf,
216
- {
217
- how: "left",
218
- leftOn: "target",
219
- rightOn: "seqId"
257
+ // assay data import summary
258
+ assayDf := ptw.frame({
259
+ file: assayTsv,
260
+ xsvType: "tsv"
261
+ })
262
+ // import how many matches per assay sequence found
263
+ assayDf = assayDf.join(
264
+ df.groupBy("target").agg(
265
+ pt.col("query").count().alias("queryCount")
266
+ ),
267
+ {
268
+ how: "left",
269
+ leftOn: "seqId",
270
+ rightOn: "target"
271
+ }
272
+ )
273
+ assayDf.save("assayData.tsv")
274
+
275
+ // clones
276
+ clonesDf := df.join(assayDf,
277
+ {
278
+ how: "left",
279
+ leftOn: "target",
280
+ rightOn: "seqId"
281
+ }
282
+ )
283
+
284
+ clonesDf.save("clonesData.tsv")
285
+ ptw = ptw.run()
286
+
287
+ //////// Building outputs & exports ////////
288
+ assayColumns := [
289
+ {
290
+ column: "seqIdLabel",
291
+ spec: {
292
+ name: "pl7.app/label",
293
+ valueType: "String",
294
+ annotations: {
295
+ "pl7.app/label": "Sequence Id",
296
+ "pl7.app/table/fontFamily": "monospace"
297
+ }
298
+ }
299
+ },
300
+ {
301
+ column: "queryCount",
302
+ spec: {
303
+ name: "pl7.app/vdj/assay/queryCount",
304
+ valueType: "Int",
305
+ annotations: {
306
+ "pl7.app/label": "Matched Clones",
307
+ "pl7.app/table/orderPriority": "9000"
308
+ }
309
+ }
310
+ },
311
+ {
312
+ column: sequenceColumnInfo.header,
313
+ id: strings.substituteSpecialCharacters(sequenceColumnInfo.header),
314
+ spec: {
315
+ name: "pl7.app/vdj/sequence",
316
+ valueType: "String",
317
+ domain: {
318
+ "pl7.app/alphabet": assaySequenceType
319
+ },
320
+ annotations: {
321
+ "pl7.app/label": sequenceColumnInfo.header,
322
+ "pl7.app/table/fontFamily": "monospace",
323
+ "pl7.app/table/orderPriority": "10000"
324
+ }
325
+ }
326
+ }
327
+ ]
328
+
329
+ columnsToImport := args.importColumns
330
+ if args.selectedColumns != undefined && len(args.selectedColumns) > 0 {
331
+ selectedHeaders := {}
332
+ for header in args.selectedColumns {
333
+ selectedHeaders[header] = true
334
+ }
335
+
336
+ filteredColumns := []
337
+ for col in args.importColumns {
338
+ // Always include the main sequence column
339
+ if col.header == args.sequenceColumnHeader || selectedHeaders[col.header] {
340
+ filteredColumns = append(filteredColumns, col)
341
+ }
342
+ }
343
+ columnsToImport = filteredColumns
220
344
  }
221
- )
222
345
 
223
- clonesDf.save("clonesData.tsv")
224
- ptw = ptw.run()
225
-
226
- //////// Building outputs & exports ////////
227
- blockId := wf.blockId().getDataAsJson()
346
+ for h in columnsToImport {
347
+ if h.header == args.sequenceColumnHeader {
348
+ continue
349
+ }
350
+ assayColumns = append(assayColumns, {
351
+ column: h.header,
352
+ id: strings.substituteSpecialCharacters(h.header),
353
+ spec: {
354
+ name: assayColumnName(h.header),
355
+ valueType: h.type,
356
+ annotations: {
357
+ "pl7.app/label": h.header,
358
+ "pl7.app/table/orderPriority": "1000"
359
+ }
360
+ }
361
+ })
362
+ }
363
+
364
+ assayImportResults := xsv.importFile(ptw.getFile("assayData.tsv"), "tsv", {
365
+ axes: [{
366
+ column: "seqId",
367
+ spec: {
368
+ name: "pl7.app/vdj/assay/sequenceId",
369
+ type: "String",
370
+ domain: {
371
+ "pl7.app/blockId": blockId
372
+ },
373
+ annotations: {
374
+ "pl7.app/label": "Sequence Id",
375
+ "pl7.app/table/fontFamily": "monospace"
376
+ }
377
+ }
378
+ }],
379
+ columns: assayColumns,
380
+ annotations: {
381
+ "pl7.app/isAnchor": "true"
382
+ }
383
+ }, {cpu: 1, mem: "16GiB"})
228
384
 
229
- assayColumns := [
385
+ // "bits", "evalue", "pident"
386
+ cloneColumns := [
230
387
  {
231
388
  column: "seqIdLabel",
232
389
  spec: {
233
- name: "pl7.app/label",
390
+ name: "pl7.app/vdj/assay/sequenceIdLabel",
234
391
  valueType: "String",
235
392
  annotations: {
236
- "pl7.app/label": "Sequence Id",
237
- "pl7.app/table/fontFamily": "monospace"
393
+ "pl7.app/label": "Assay Sequence Id",
394
+ "pl7.app/table/fontFamily": "monospace",
395
+ "pl7.app/table/visibility": "optional"
238
396
  }
239
397
  }
240
- },
398
+ },
241
399
  {
242
- column: "queryCount",
400
+ column: "bits",
243
401
  spec: {
244
- name: "pl7.app/vdj/assay/queryCount",
245
- valueType: "Int",
402
+ name: "pl7.app/alignment/bitScore",
403
+ valueType: "Float",
246
404
  annotations: {
247
- "pl7.app/label": "Matched Clones",
248
- "pl7.app/table/orderPriority": "9000"
405
+ "pl7.app/label": "Bit Score",
406
+ "pl7.app/table/visibility": "optional"
249
407
  }
250
408
  }
251
- },
409
+ },
252
410
  {
253
- column: sequenceColumnInfo.header,
411
+ column: "evalue",
254
412
  spec: {
255
- name: "pl7.app/vdj/sequence",
256
- valueType: "String",
257
- domain: {
258
- "pl7.app/alphabet": assaySequenceType
259
- },
413
+ name: "pl7.app/alignment/evalue",
414
+ valueType: "Float",
260
415
  annotations: {
261
- "pl7.app/label": sequenceColumnInfo.header,
262
- "pl7.app/table/fontFamily": "monospace",
263
- "pl7.app/table/orderPriority": "10000"
416
+ "pl7.app/label": "E-value",
417
+ "pl7.app/table/visibility": "optional"
264
418
  }
265
419
  }
266
- }
267
- ]
268
- for h in args.importColumns {
269
- if h.header == args.sequenceColumnHeader {
270
- continue
271
- }
272
- assayColumns = append(assayColumns, {
273
- column: h.header,
420
+ },
421
+ {
422
+ column: "pident",
274
423
  spec: {
275
- name: h.header,
276
- valueType: h.type,
277
- annotations: {
278
- "pl7.app/label": h.header,
279
- "pl7.app/table/orderPriority": "1000"
424
+ name: "pl7.app/alignment/pident",
425
+ valueType: "Float",
426
+ annotations: {
427
+ "pl7.app/label": "Percentage of identical matches",
428
+ "pl7.app/table/visibility": "optional"
280
429
  }
281
430
  }
282
- })
283
- }
431
+ }]
284
432
 
285
- assayImportResults := xsv.importFile(ptw.getFile("assayData.tsv"), "tsv", {
286
- axes: [{
287
- column: "seqId",
288
- spec: {
289
- name: "pl7.app/vdj/assay/sequenceId",
290
- type: "String",
291
- domain: {
292
- "pl7.app/blockId": blockId
293
- },
294
- annotations: {
295
- "pl7.app/label": "Sequence Id",
296
- "pl7.app/table/fontFamily": "monospace"
433
+ for h in columnsToImport {
434
+ cloneColumns = append(cloneColumns, {
435
+ column: h.header,
436
+ id: strings.substituteSpecialCharacters(h.header),
437
+ spec: {
438
+ name: assayColumnName(h.header),
439
+ valueType: h.type,
440
+ annotations: {
441
+ "pl7.app/label": h.header,
442
+ "pl7.app/table/visibility": h.header == args.sequenceColumnHeader ? "optional" : "default"
443
+ }
297
444
  }
298
- }
299
- }],
300
- columns: assayColumns,
301
- annotations: {
302
- "pl7.app/isAnchor": "true"
445
+ })
303
446
  }
304
- })
305
447
 
306
- // "bits", "evalue", "pident"
307
- cloneColumns := [
308
- {
309
- column: "target",
310
- spec: {
311
- name: "pl7.app/vdj/assay/sequenceId",
312
- valueType: "String",
313
- annotations: {
314
- "pl7.app/label": "Assay Sequence Id",
315
- "pl7.app/table/defaultVisibility": "optional"
316
- }
317
- }
318
- },
319
- {
320
- column: "bits",
321
- spec: {
322
- name: "pl7.app/alignment/bitScore",
323
- valueType: "Float",
324
- annotations: {
325
- "pl7.app/label": "Bit Score",
326
- "pl7.app/table/defaultVisibility": "optional"
327
- }
448
+ // insert domain
449
+ for col in cloneColumns {
450
+ col.spec.domain = maps.deepMerge(col.spec.domain, {
451
+ "pl7.app/blockId": blockId
452
+ })
328
453
  }
329
- },
330
- {
331
- column: "evalue",
332
- spec: {
333
- name: "pl7.app/alignment/evalue",
334
- valueType: "Float",
335
- annotations: {
336
- "pl7.app/label": "E-value",
337
- "pl7.app/table/defaultVisibility": "optional"
338
- }
339
- }
340
- },
341
- {
342
- column: "pident",
343
- spec: {
344
- name: "pl7.app/alignment/pident",
345
- valueType: "Float",
346
- annotations: {
347
- "pl7.app/label": "Percentage of identical matches",
348
- "pl7.app/table/defaultVisibility": "optional"
349
- }
350
- }
351
- }]
352
-
353
- for h in args.importColumns {
354
- cloneColumns = append(cloneColumns, {
355
- column: h.header,
356
- spec: {
357
- name: h.header,
358
- valueType: h.type,
359
- annotations: {
360
- "pl7.app/label": h.header,
361
- "pl7.app/table/defaultVisibility": h.header == args.sequenceColumnHeader ? "optional" : "default"
362
- }
363
- }
364
- })
365
- }
366
-
367
- // insert domain
368
- for col in cloneColumns {
369
- col.spec.domain = maps.deepMerge(col.spec.domain, {
370
- "pl7.app/blockId": blockId
371
- })
372
- }
373
454
 
374
- cloneImportResults := xsv.importFile(
375
- ptw.getFile("clonesData.tsv"), "tsv", {
376
- axes: [{
377
- column: "query",
378
- spec: datasetSpec.axesSpec[1]
379
- }],
380
- columns: cloneColumns
381
- },
382
- { splitDataAndSpec: true }
383
- )
455
+ cloneImportResults := xsv.importFile(
456
+ ptw.getFile("clonesData.tsv"), "tsv", {
457
+ axes: [{
458
+ column: "query",
459
+ spec: datasetSpec.axesSpec[1]
460
+ }],
461
+ columns: cloneColumns
462
+ },
463
+ { splitDataAndSpec: true, cpu: 1, mem: "16GiB" }
464
+ )
384
465
 
385
- trace := pSpec.makeTrace(datasetSpec,
386
- {
387
- type: "milaboratories.immune-assay-data",
388
- importance: 30,
389
- label: "Assay Data"
390
- })
466
+ trace := pSpec.makeTrace(datasetSpec,
467
+ {
468
+ type: "milaboratories.immune-assay-data",
469
+ importance: 30,
470
+ label: "Assay Data"
471
+ })
391
472
 
392
- epf := pframes.pFrameBuilder()
393
- for k, v in cloneImportResults {
394
- epf.add(k, trace.inject(v.spec), v.data)
473
+ epfB := pframes.pFrameBuilder()
474
+ for k, v in cloneImportResults {
475
+ epfB.add(k, trace.inject(v.spec), v.data)
476
+ }
477
+ epf = epfB.build()
478
+ assayPframe = pframes.exportFrame(assayImportResults)
395
479
  }
396
- epf = epf.build()
397
-
398
- return {
480
+
481
+ result := {
399
482
  outputs: {
400
483
  dataImportHandle: importFile.handle,
401
- table: pframes.exportFrame(assayImportResults),
402
- mmseqsOutput: mmseqsOutput // @TODO tmp fix to resolve CID conflicts
403
- },
404
- exports: {
484
+ table: assayPframe,
485
+ mmseqsOutput: mmseqsOutput, // @TODO tmp fix to resolve CID conflicts
486
+ emptyResults: emptyResults
487
+ }
488
+ }
489
+
490
+ if !emptyResults {
491
+ result.exports = {
405
492
  epf: epf
406
493
  }
407
494
  }
495
+
496
+ return result
408
497
  })
@@ -0,0 +1,49 @@
1
+ self := import("@platforma-sdk/workflow-tengo:tpl")
2
+ ll := import("@platforma-sdk/workflow-tengo:ll")
3
+ exec := import("@platforma-sdk/workflow-tengo:exec")
4
+ assets:= import("@platforma-sdk/workflow-tengo:assets")
5
+ mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
6
+
7
+ self.defineOutputs("mmseqsOutput")
8
+
9
+ self.body(func(args) {
10
+
11
+ covMode := args.covMode.getDataAsJson()
12
+ mmseqsSearchType := args.mmseqsSearchType
13
+ coverageThreshold := args.coverageThreshold
14
+ identityThreshold := args.identityThreshold
15
+ similarityType := string(args.similarityType)
16
+ clonesFasta := args.clonesFasta
17
+ assayFasta := args.assayFasta
18
+
19
+ mmseqs := exec.builder().
20
+ software(mmseqsSw).
21
+ mem("32GiB").
22
+ cpu(1).
23
+ dontSaveStdoutOrStderr(). // important to avoid CID conflict problems coming from different stdout output on same datasets
24
+ arg("easy-search").
25
+ arg("clones.fasta").
26
+ arg("assay.fasta").
27
+ arg("results.tsv").
28
+ arg("tmp").
29
+ arg("--search-type").arg(mmseqsSearchType).
30
+ arg("--cov-mode").arg(string(covMode)).
31
+ arg("-c").arg(string(coverageThreshold)).
32
+ arg("--min-seq-id").arg(string(identityThreshold))
33
+
34
+ if similarityType == "sequence-identity" {
35
+ mmseqs = mmseqs.arg("--alignment-mode").arg("3")
36
+ }
37
+
38
+ mmseqs = mmseqs.
39
+ addFile("clones.fasta", clonesFasta).
40
+ addFile("assay.fasta", assayFasta).
41
+ saveFile("results.tsv").
42
+ run()
43
+
44
+ mmseqsOutput := mmseqs.getFile("results.tsv")
45
+
46
+ return {
47
+ mmseqsOutput: mmseqsOutput
48
+ }
49
+ })