@platforma-open/milaboratories.immune-assay-data.workflow 1.6.3 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.immune-assay-data.workflow@1.6.3 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
3
+ > @platforma-open/milaboratories.immune-assay-data.workflow@1.7.1 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/build-outputs.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # @platforma-open/milaboratories.immune-assay-data.workflow
2
2
 
3
+ ## 1.7.1
4
+
5
+ ### Patch Changes
6
+
7
+ - cc7794e: Update mmseqs binary
8
+
9
+ ## 1.7.0
10
+
11
+ ### Minor Changes
12
+
13
+ - c98d8b5: - Introduce fast mode for sequence match
14
+ - Support XLSX file as assay data input
15
+
16
+ ### Patch Changes
17
+
18
+ - Updated dependencies [c98d8b5]
19
+ - @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.2.0
20
+ - @platforma-open/milaboratories.immune-assay-data.xlsx-to-csv@1.1.0
21
+
3
22
  ## 1.6.3
4
23
 
5
24
  ### Patch Changes
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,15 +1,16 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.immune-assay-data.workflow",
3
- "version": "1.6.3",
3
+ "version": "1.7.1",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "dependencies": {
7
7
  "@platforma-sdk/workflow-tengo": "5.9.0",
8
- "@platforma-open/soedinglab.software-mmseqs2": "^1.17.2",
8
+ "@platforma-open/soedinglab.software-mmseqs2": "1.18.3",
9
9
  "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
10
10
  "@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.3",
11
- "@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3",
12
- "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.1.3"
11
+ "@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
12
+ "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.2.0",
13
+ "@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3"
13
14
  },
14
15
  "devDependencies": {
15
16
  "@platforma-sdk/tengo-builder": "2.4.25"
@@ -191,7 +191,7 @@ self.body(func(inputs) {
191
191
  cloneImportResults := xsv.importFile(
192
192
  inputs.clonesDataTsv, "tsv", {
193
193
  axes: [{
194
- column: "query",
194
+ column: "target",
195
195
  spec: inputs.datasetSpec.axesSpec[1]
196
196
  }],
197
197
  columns: cloneColumns,
@@ -206,11 +206,11 @@ self.body(func(inputs) {
206
206
  inputs.bestAlignmentTsv, "tsv", {
207
207
  axes: [
208
208
  {
209
- column: "query",
209
+ column: "target",
210
210
  spec: inputs.datasetSpec.axesSpec[1]
211
211
  },
212
212
  {
213
- column: "target",
213
+ column: "query",
214
214
  spec: {
215
215
  name: "pl7.app/vdj/assay/sequenceId",
216
216
  type: "String",
@@ -21,6 +21,7 @@ prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-a
21
21
  fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
22
22
  addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
23
23
  covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
24
+ xlsxToCsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv:main")
24
25
 
25
26
  wf.prepare(func(args){
26
27
  bundleBuilder := wf.createPBundleBuilder()
@@ -112,7 +113,7 @@ prepareClonesTsv := func(args) {
112
113
  runTsvToFasta := func(fileTsv) {
113
114
  e := exec.builder().
114
115
  software(prepareFastaSw).
115
- mem("16GiB").
116
+ mem("8GiB").
116
117
  cpu(1).
117
118
  addFile("input.tsv", fileTsv).
118
119
  arg("-i").arg("input.tsv").
@@ -132,7 +133,7 @@ runTsvToFasta := func(fileTsv) {
132
133
  runFastaToTsv := func(fileFasta) {
133
134
  e := exec.builder().
134
135
  software(fastaToTsvSw).
135
- mem("16GiB").
136
+ mem("8GiB").
136
137
  cpu(1).
137
138
  addFile("input.fasta", fileFasta).
138
139
  arg("-i").arg("input.fasta").
@@ -142,6 +143,24 @@ runFastaToTsv := func(fileFasta) {
142
143
  return e.run()
143
144
  }
144
145
 
146
+ /**
147
+ * Convert xlsx file to csv file
148
+ * @param fileXlsx - xlsx file
149
+ * @return csv file run result
150
+ */
151
+ runXlsxToCsv := func(fileXlsx) {
152
+ e := exec.builder().
153
+ software(xlsxToCsvSw).
154
+ mem("16GiB").
155
+ cpu(1).
156
+ addFile("input.xlsx", fileXlsx).
157
+ arg("-i").arg("input.xlsx").
158
+ arg("-o").arg("output.csv").
159
+ saveFile("output.csv")
160
+
161
+ return e.run()
162
+ }
163
+
145
164
  wf.body(func(args) {
146
165
  importFile := file.importFile(args.fileHandle)
147
166
  datasetSpec := args.columns.getSpec(args.datasetRef)
@@ -187,6 +206,18 @@ wf.body(func(args) {
187
206
  xsvType = fileNameParts[len(fileNameParts)-1]
188
207
  }
189
208
 
209
+ // Use detected delimiter from UI if available (file extension may not match actual delimiter)
210
+ if args.detectedXsvType != undefined {
211
+ xsvType = args.detectedXsvType
212
+ }
213
+
214
+ // Handle XLSX files by converting to CSV first
215
+ if xsvType == "xlsx" {
216
+ xlsxToCsvRun := runXlsxToCsv(importFile.file)
217
+ importFile.file = xlsxToCsvRun.getFile("output.csv")
218
+ xsvType = "csv"
219
+ }
220
+
190
221
  // Handle FASTA files by converting to TSV first
191
222
  if xsvType == "fasta" || xsvType == "fa" {
192
223
  fastaToTsvRun := runFastaToTsv(importFile.file)
@@ -240,7 +271,10 @@ wf.body(func(args) {
240
271
  identityThreshold: args.settings.identity,
241
272
  similarityType: args.settings.similarityType,
242
273
  clonesFasta: clonesFasta,
243
- assayFasta: assayFasta
274
+ assayFasta: assayFasta,
275
+ lessSensitive: args.lessSensitive,
276
+ mem: args.mem,
277
+ cpu: args.cpu
244
278
  })
245
279
 
246
280
  mmseqsOutput := runMmseqs.output("mmseqsOutput")
@@ -287,7 +321,7 @@ wf.body(func(args) {
287
321
  )
288
322
 
289
323
  cols := []
290
- for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
324
+ for _, col in ["bits", "evalue", "query", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
291
325
  cols = append(cols,
292
326
  pt.col(col).maxBy(
293
327
  pt.col("evalue").multiply(-1),
@@ -296,7 +330,7 @@ wf.body(func(args) {
296
330
  )
297
331
  }
298
332
 
299
- df = df.groupBy("query").agg(cols...)
333
+ df = df.groupBy("target").agg(cols...)
300
334
 
301
335
  // Add link column for linker pFrame (assayLinkerPframe)
302
336
  df = df.withColumns(
@@ -312,13 +346,13 @@ wf.body(func(args) {
312
346
  })
313
347
  // import how many matches per assay sequence found
314
348
  assayDf = assayDf.join(
315
- df.groupBy("target").agg(
316
- pt.col("query").count().alias("queryCount")
349
+ df.groupBy("query").agg(
350
+ pt.col("target").count().alias("queryCount")
317
351
  ),
318
352
  {
319
353
  how: "left",
320
354
  leftOn: "seqId",
321
- rightOn: "target"
355
+ rightOn: "query"
322
356
  }
323
357
  )
324
358
  assayDf.save("assayData.tsv")
@@ -327,7 +361,7 @@ wf.body(func(args) {
327
361
  clonesDf := df.join(assayDf,
328
362
  {
329
363
  how: "left",
330
- leftOn: "target",
364
+ leftOn: "query",
331
365
  rightOn: "seqId"
332
366
  }
333
367
  )
@@ -16,15 +16,26 @@ self.body(func(args) {
16
16
  clonesFasta := args.clonesFasta
17
17
  assayFasta := args.assayFasta
18
18
 
19
+ mem := "8GiB"
20
+ cpu := 1
21
+ if !is_undefined(args.mem) {
22
+ mem = string(args.mem) + "GiB"
23
+ }
24
+ if !is_undefined(args.cpu) {
25
+ cpu = args.cpu
26
+ }
27
+
19
28
  mmseqs := exec.builder().
20
29
  software(mmseqsSw).
21
- mem("32GiB").
22
- cpu(1).
30
+ mem(mem).
31
+ cpu(cpu).
23
32
  arg("easy-search").
24
- arg("clones.fasta").
25
33
  arg("assay.fasta").
34
+ arg("clones.fasta").
26
35
  arg("results.tsv").
27
36
  arg("tmp").
37
+ arg("--threads").arg(string(cpu)).
38
+ arg("--max-seqs").arg("10000").
28
39
  arg("--search-type").arg(mmseqsSearchType).
29
40
  arg("--cov-mode").arg(string(covMode)).
30
41
  arg("-c").arg(string(coverageThreshold)).
@@ -34,6 +45,15 @@ self.body(func(args) {
34
45
  mmseqs = mmseqs.arg("--alignment-mode").arg("3")
35
46
  }
36
47
 
48
+ lessSensitive := is_undefined(args.lessSensitive) ? false : args.lessSensitive
49
+ if lessSensitive {
50
+ mmseqs = mmseqs.
51
+ arg("--comp-bias-corr").arg("0").
52
+ arg("--mask").arg("0").
53
+ arg("--exact-kmer-matching").arg("1").
54
+ arg("-k").arg("7")
55
+ }
56
+
37
57
  mmseqs = mmseqs.
38
58
  addFile("clones.fasta", clonesFasta).
39
59
  addFile("assay.fasta", assayFasta).