@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.10.1 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.10.1 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.12.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.12.0
4
+
5
+ ### Minor Changes
6
+
7
+ - fa19090: clonotypeLabel in export raw data
8
+
9
+ ## 3.11.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 58a43db: Better memory management in export
14
+
3
15
  ## 3.10.1
4
16
 
5
17
  ### Patch Changes
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.10.1",
3
+ "version": "3.12.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "^5.4.2",
6
+ "@platforma-sdk/workflow-tengo": "^5.5.3",
7
7
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-233-develop"
8
8
  },
9
9
  "devDependencies": {
10
10
  "@milaboratories/ts-configs": "^1.0.6",
11
- "@platforma-sdk/tengo-builder": "^2.3.2",
12
- "@platforma-sdk/test": "^1.44.19",
11
+ "@platforma-sdk/tengo-builder": "^2.3.3",
12
+ "@platforma-sdk/test": "^1.45.3",
13
13
  "vitest": "~2.1.9",
14
14
  "typescript": "~5.6.3"
15
15
  },
@@ -32,9 +32,14 @@ self.body(func(inputs) {
32
32
  inputMap := inputData.inputs()
33
33
  numberOfSamples := len(inputMap)
34
34
 
35
+ memGB := int(math.max(numberOfSamples, 64))
36
+ if !is_undefined(inputs.perProcessMemGB) && inputs.perProcessMemGB > memGB {
37
+ memGB = inputs.perProcessMemGB
38
+ }
39
+
35
40
  wf := pt.workflow().
36
41
  inMediumQueue().
37
- mem(int(math.max(numberOfSamples, 64)) * units.GiB).
42
+ mem(memGB * units.GiB).
38
43
  cpu(int(math.max(numberOfSamples, 32)))
39
44
 
40
45
  dataFrames := []
@@ -8,6 +8,7 @@ slices := import("@platforma-sdk/workflow-tengo:slices")
8
8
  assets := import("@platforma-sdk/workflow-tengo:assets")
9
9
  exec := import("@platforma-sdk/workflow-tengo:exec")
10
10
  pt := import("@platforma-sdk/workflow-tengo:pt")
11
+ clonotypeLabel := import(":clonotype-label")
11
12
 
12
13
  json := import("json")
13
14
 
@@ -23,7 +24,7 @@ self.body(func(inputs) {
23
24
  isLibraryFileGzipped := params.isLibraryFileGzipped
24
25
  chains := params.chains
25
26
  exportArgs := params.exportArgs
26
-
27
+
27
28
  clonotypeKeyColumns := params.clonotypeKeyColumns
28
29
  clonotypeKeyArgs := params.clonotypeKeyArgs
29
30
 
@@ -38,6 +39,18 @@ self.body(func(inputs) {
38
39
  mainAbundanceColumnIsReadCount := params.mainAbundanceColumnIsReadCount == true
39
40
  mainIsProductiveArgs := params.mainIsProductiveArgs
40
41
 
42
+ exportMemGB := undefined
43
+ if !is_undefined(inputs.perProcessMemGB) {
44
+ exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
45
+ if exportMemGB < 12 {
46
+ exportMemGB = 12
47
+ }
48
+ } else {
49
+ exportMemGB = 12
50
+ }
51
+ ptMemGB := int(2.0*exportMemGB/3.0)
52
+
53
+
41
54
  hashKeyDerivationExpressionPt := func(sourceColumns) {
42
55
  return pt.concatStr(
43
56
  slices.map(sourceColumns, func(colName) { return pt.col(colName).fillNull("") }),
@@ -50,7 +63,7 @@ self.body(func(inputs) {
50
63
  createExport := func(additionalAction) {
51
64
  mixcrCmdBuilder := exec.builder().
52
65
  inMediumQueue().
53
- ram("12GiB").
66
+ mem(string(exportMemGB) + "GiB").
54
67
  cpu(2).
55
68
  printErrStreamToStdout().
56
69
  dontSaveStdoutOrStderr().
@@ -104,7 +117,7 @@ self.body(func(inputs) {
104
117
  // PTabler processing for main TSV output
105
118
  wfMain := pt.workflow().
106
119
  inMediumQueue().
107
- mem("8GiB").
120
+ mem(ptMemGB).
108
121
  cpu(2)
109
122
 
110
123
  frameInputMap := {
@@ -121,6 +134,9 @@ self.body(func(inputs) {
121
134
  hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
122
135
  )
123
136
 
137
+ // Generate clonotypeLabel (C-XXXXX or C-XXXXX-RANK) from clonotypeKey for bulk data
138
+ dfMain = clonotypeLabel.addClonotypeLabelColumnsPt(dfMain, "clonotypeKey", "clonotypeLabel", pt)
139
+
124
140
  dfMain.save("output.tsv")
125
141
  ptablerResultMain := wfMain.run()
126
142
  processedTsv := ptablerResultMain.getFile("output.tsv")
@@ -144,7 +160,7 @@ self.body(func(inputs) {
144
160
  // PTabler processing for single-cell TSV output
145
161
  wfSingleCell := pt.workflow().
146
162
  inMediumQueue().
147
- mem("4GiB").
163
+ mem(ptMemGB).
148
164
  cpu(2)
149
165
 
150
166
  frameLoadOps := {
@@ -416,6 +416,15 @@ self.body(func(inputs) {
416
416
  isLibraryFileGzipped: isLibraryFileGzipped
417
417
  }, { removeUndefs: true }),
418
418
  library: library
419
+ },
420
+ // by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
421
+ // to pick up the results from executions with different values for CPU and Memory overrides
422
+ //
423
+ // Note: here we are passing the same parameters as in the main process step and will apply formula
424
+ // inside the export template to calculate memory requirements.
425
+ //
426
+ metaExtra: {
427
+ perProcessMemGB: perProcessMemGB
419
428
  }
420
429
  }
421
430
  )
@@ -484,6 +493,15 @@ self.body(func(inputs) {
484
493
  schemaPerClonotypeAggregates: columnsToSchema(columnsSpecPerClonotypeAggregates),
485
494
  schemaPerSample: columnsToSchema(columnsSpecPerSample)
486
495
  }
496
+ },
497
+ // by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
498
+ // to pick up the results from executions with different values for CPU and Memory overrides
499
+ //
500
+ // Note: here we are passing the same parameters as in the main process step and will apply formula
501
+ // inside the agg template to calculate memory requirements.
502
+ //
503
+ metaExtra: {
504
+ perProcessMemGB: perProcessMemGB
487
505
  }
488
506
  }
489
507
  )