@platforma-open/milaboratories.immune-assay-data.workflow 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.immune-assay-data.workflow@1.9.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
3
+ > @platforma-open/milaboratories.immune-assay-data.workflow@1.11.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/analysis.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.immune-assay-data.workflow
2
2
 
3
+ ## 1.11.0
4
+
5
+ ### Minor Changes
6
+
7
+ - ac74170: Improved performance on large datasets, eliminating disk and memory pressure
8
+
9
+ ## 1.10.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 29a44a2: Improved performance on large datasets
14
+
3
15
  ## 1.9.0
4
16
 
5
17
  ### Minor Changes
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,20 +1,20 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.immune-assay-data.workflow",
3
- "version": "1.9.0",
3
+ "version": "1.11.0",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "5.10.1",
7
+ "@platforma-sdk/workflow-tengo": "5.11.0",
8
8
  "@platforma-open/soedinglab.software-mmseqs2": "1.18.3",
9
- "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
10
- "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.2.0",
11
9
  "@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.3",
10
+ "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.2.0",
11
+ "@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
12
+ "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
12
13
  "@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3",
13
- "@platforma-open/milaboratories.immune-assay-data.check-content-empty": "1.0.1",
14
- "@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0"
14
+ "@platforma-open/milaboratories.immune-assay-data.check-content-empty": "1.0.1"
15
15
  },
16
16
  "devDependencies": {
17
- "@platforma-sdk/tengo-builder": "2.4.30"
17
+ "@platforma-sdk/tengo-builder": "2.5.5"
18
18
  },
19
19
  "scripts": {
20
20
  "build": "rm -rf dist && pl-tengo check && pl-tengo build",
@@ -315,7 +315,7 @@ self.body(func(args) {
315
315
  )
316
316
 
317
317
  cols := []
318
- for _, col in ["bits", "evalue", "query", "pident", "alnlen", "mismatch",
318
+ for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch",
319
319
  "gapopen", "qstart", "qend", "tstart", "tend"] {
320
320
  cols = append(cols,
321
321
  pt.col(col).maxBy(
@@ -325,7 +325,7 @@ self.body(func(args) {
325
325
  )
326
326
  }
327
327
 
328
- dfRes = dfRes.groupBy("target").agg(cols...)
328
+ dfRes = dfRes.groupBy("query").agg(cols...)
329
329
  // Add link column for linker pFrame (assayLinkerPframe)
330
330
  dfRes = dfRes.withColumns(
331
331
  pt.lit(1).cast("Int64").alias("link")
@@ -340,13 +340,13 @@ self.body(func(args) {
340
340
 
341
341
  // import how many matches per assay sequence found
342
342
  assayDf = assayDf.join(
343
- dfRes.groupBy("query").agg(
344
- pt.col("target").count().alias("queryCount")
343
+ dfRes.groupBy("target").agg(
344
+ pt.col("query").count().alias("queryCount")
345
345
  ),
346
346
  {
347
347
  how: "left",
348
348
  leftOn: "seqId",
349
- rightOn: "query"
349
+ rightOn: "target"
350
350
  }
351
351
  )
352
352
  assayDf.save("assay_data.tsv")
@@ -355,7 +355,7 @@ self.body(func(args) {
355
355
  clonesDf := dfRes.join(assayDf,
356
356
  {
357
357
  how: "left",
358
- leftOn: "query",
358
+ leftOn: "target",
359
359
  rightOn: "seqId"
360
360
  }
361
361
  )
@@ -174,7 +174,7 @@ self.body(func(inputs) {
174
174
  cloneImportResults := xsv.importFile(
175
175
  inputs.clonesDataTsv, "tsv", {
176
176
  axes: [{
177
- column: "target",
177
+ column: "query",
178
178
  spec: inputs.datasetSpec.axesSpec[1]
179
179
  }],
180
180
  columns: cloneColumns,
@@ -189,11 +189,11 @@ self.body(func(inputs) {
189
189
  inputs.bestAlignmentTsv, "tsv", {
190
190
  axes: [
191
191
  {
192
- column: "target",
192
+ column: "query",
193
193
  spec: inputs.datasetSpec.axesSpec[1]
194
194
  },
195
195
  {
196
- column: "query",
196
+ column: "target",
197
197
  spec: {
198
198
  name: "pl7.app/vdj/assay/sequenceId",
199
199
  type: "String",
@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
2
2
  ll := import("@platforma-sdk/workflow-tengo:ll")
3
3
  exec := import("@platforma-sdk/workflow-tengo:exec")
4
4
  assets:= import("@platforma-sdk/workflow-tengo:assets")
5
+ math := import("math")
5
6
  mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
6
7
 
7
8
  self.defineOutputs("mmseqsOutput")
@@ -16,11 +17,12 @@ self.body(func(args) {
16
17
  clonesFasta := args.clonesFasta
17
18
  assayFasta := args.assayFasta
18
19
 
19
- mem := "8GiB"
20
- cpu := 1
20
+ baseMemGiB := 64
21
21
  if !is_undefined(args.metaInputs.mem) {
22
- mem = string(args.metaInputs.mem) + "GiB"
22
+ baseMemGiB = args.metaInputs.mem
23
23
  }
24
+ mem := string(int(math.max(64, baseMemGiB))) + "GiB"
25
+ cpu := 1
24
26
  if !is_undefined(args.metaInputs.cpu) {
25
27
  cpu = args.metaInputs.cpu
26
28
  }
@@ -30,8 +32,8 @@ self.body(func(args) {
30
32
  mem(mem).
31
33
  cpu(cpu).
32
34
  arg("easy-search").
33
- arg("assay.fasta").
34
35
  arg("clones.fasta").
36
+ arg("assay.fasta").
35
37
  arg("results.tsv").
36
38
  arg("tmp").
37
39
  arg("--threads").arg(string(cpu)).