@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.10.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.10.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.11.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.11.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 58a43db: Better memory management in export
8
+
9
+ ## 3.10.1
10
+
11
+ ### Patch Changes
12
+
13
+ - fed5c72: Support parquet format (update SDK)
14
+
3
15
  ## 3.10.0
4
16
 
5
17
  ### Minor Changes
@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
942
942
  })
943
943
  }
944
944
  } ],
945
- storageFormat: "Binary",
945
+ storageFormat: "Parquet",
946
946
  partitionKeyLength: 1
947
947
  }
948
948
  }
@@ -1353,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
1353
1353
  reportColumnsSpec: {
1354
1354
  axes: axes,
1355
1355
  columns: columns,
1356
- storageFormat: "Binary",
1356
+ storageFormat: "Parquet",
1357
1357
  partitionKeyLength: 0
1358
1358
  }
1359
1359
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.10.0",
3
+ "version": "3.11.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "^5.3.3",
6
+ "@platforma-sdk/workflow-tengo": "^5.4.4",
7
7
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-233-develop"
8
8
  },
9
9
  "devDependencies": {
10
10
  "@milaboratories/ts-configs": "^1.0.6",
11
- "@platforma-sdk/tengo-builder": "^2.3.0",
12
- "@platforma-sdk/test": "^1.44.0",
11
+ "@platforma-sdk/tengo-builder": "^2.3.2",
12
+ "@platforma-sdk/test": "^1.44.21",
13
13
  "vitest": "~2.1.9",
14
14
  "typescript": "~5.6.3"
15
15
  },
@@ -32,9 +32,14 @@ self.body(func(inputs) {
32
32
  inputMap := inputData.inputs()
33
33
  numberOfSamples := len(inputMap)
34
34
 
35
+ memGB := int(math.max(numberOfSamples, 64))
36
+ if !is_undefined(inputs.perProcessMemGB) && inputs.perProcessMemGB > memGB {
37
+ memGB = inputs.perProcessMemGB
38
+ }
39
+
35
40
  wf := pt.workflow().
36
41
  inMediumQueue().
37
- mem(int(math.max(numberOfSamples, 64)) * units.GiB).
42
+ mem(memGB * units.GiB).
38
43
  cpu(int(math.max(numberOfSamples, 32)))
39
44
 
40
45
  dataFrames := []
@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
942
942
  })
943
943
  }
944
944
  } ],
945
- storageFormat: "Binary",
945
+ storageFormat: "Parquet",
946
946
  partitionKeyLength: 1
947
947
  }
948
948
  }
@@ -23,7 +23,7 @@ self.body(func(inputs) {
23
23
  isLibraryFileGzipped := params.isLibraryFileGzipped
24
24
  chains := params.chains
25
25
  exportArgs := params.exportArgs
26
-
26
+
27
27
  clonotypeKeyColumns := params.clonotypeKeyColumns
28
28
  clonotypeKeyArgs := params.clonotypeKeyArgs
29
29
 
@@ -38,6 +38,18 @@ self.body(func(inputs) {
38
38
  mainAbundanceColumnIsReadCount := params.mainAbundanceColumnIsReadCount == true
39
39
  mainIsProductiveArgs := params.mainIsProductiveArgs
40
40
 
41
+ exportMemGB := undefined
42
+ if !is_undefined(inputs.perProcessMemGB) {
43
+ exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
44
+ if exportMemGB < 12 {
45
+ exportMemGB = 12
46
+ }
47
+ } else {
48
+ exportMemGB = 12
49
+ }
50
+ ptMemGB := int(2.0*exportMemGB/3.0)
51
+
52
+
41
53
  hashKeyDerivationExpressionPt := func(sourceColumns) {
42
54
  return pt.concatStr(
43
55
  slices.map(sourceColumns, func(colName) { return pt.col(colName).fillNull("") }),
@@ -50,7 +62,7 @@ self.body(func(inputs) {
50
62
  createExport := func(additionalAction) {
51
63
  mixcrCmdBuilder := exec.builder().
52
64
  inMediumQueue().
53
- ram("12GiB").
65
+ mem(string(exportMemGB) + "GiB").
54
66
  cpu(2).
55
67
  printErrStreamToStdout().
56
68
  dontSaveStdoutOrStderr().
@@ -104,7 +116,7 @@ self.body(func(inputs) {
104
116
  // PTabler processing for main TSV output
105
117
  wfMain := pt.workflow().
106
118
  inMediumQueue().
107
- mem("8GiB").
119
+ mem(ptMemGB).
108
120
  cpu(2)
109
121
 
110
122
  frameInputMap := {
@@ -144,7 +156,7 @@ self.body(func(inputs) {
144
156
  // PTabler processing for single-cell TSV output
145
157
  wfSingleCell := pt.workflow().
146
158
  inMediumQueue().
147
- mem("4GiB").
159
+ mem(ptMemGB).
148
160
  cpu(2)
149
161
 
150
162
  frameLoadOps := {
@@ -355,7 +355,7 @@ self.body(func(inputs) {
355
355
  settings: {
356
356
  axes: [ axisByClonotypeKeyGen(chain) ],
357
357
  columns: columnsSpecPerSample,
358
- storageFormat: "Binary",
358
+ storageFormat: "Parquet",
359
359
  partitionKeyLength: 0
360
360
  },
361
361
  mem: "16GiB",
@@ -416,6 +416,15 @@ self.body(func(inputs) {
416
416
  isLibraryFileGzipped: isLibraryFileGzipped
417
417
  }, { removeUndefs: true }),
418
418
  library: library
419
+ },
420
+ // by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
421
+ // to pick up the results from executions with different values for CPU and Memory overrides
422
+ //
423
+ // Note: here we are passing the same parameters as in the main process step and will apply formula
424
+ // inside the export template to calculate memory requirements.
425
+ //
426
+ metaExtra: {
427
+ perProcessMemGB: perProcessMemGB
419
428
  }
420
429
  }
421
430
  )
@@ -448,7 +457,7 @@ self.body(func(inputs) {
448
457
  settings: {
449
458
  axes: [ axisByClonotypeKeyGen(chain) ],
450
459
  columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
451
- storageFormat: "Binary",
460
+ storageFormat: "Parquet",
452
461
  partitionKeyLength: 0
453
462
  },
454
463
  mem: "24GiB",
@@ -484,6 +493,15 @@ self.body(func(inputs) {
484
493
  schemaPerClonotypeAggregates: columnsToSchema(columnsSpecPerClonotypeAggregates),
485
494
  schemaPerSample: columnsToSchema(columnsSpecPerSample)
486
495
  }
496
+ },
497
+ // by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
498
+ // to pick up the results from executions with different values for CPU and Memory overrides
499
+ //
500
+ // Note: here we are passing the same parameters as in the main process step and will apply formula
501
+ // inside the agg template to calculate memory requirements.
502
+ //
503
+ metaExtra: {
504
+ perProcessMemGB: perProcessMemGB
487
505
  }
488
506
  }
489
507
  )
@@ -533,7 +551,7 @@ self.body(func(inputs) {
533
551
  spec: sampleIdAxisSpec
534
552
  }, axisByScClonotypeKeyGen(receptor) ],
535
553
  columns: columnsSpecPerSampleSc,
536
- storageFormat: "Binary",
554
+ storageFormat: "Parquet",
537
555
  partitionKeyLength: 1
538
556
  },
539
557
  mem: "16GiB",
@@ -546,7 +564,7 @@ self.body(func(inputs) {
546
564
  settings: {
547
565
  axes: [ axisByScClonotypeKeyGen(receptor) ],
548
566
  columns: columnsSpecPerClonotypeSc,
549
- storageFormat: "Binary"
567
+ storageFormat: "Parquet"
550
568
  },
551
569
  mem: "12GiB",
552
570
  cpu: 2,
@@ -615,7 +633,7 @@ self.body(func(inputs) {
615
633
  annotations: annotationsTransformation
616
634
  }
617
635
  }),
618
- storageFormat: "Binary",
636
+ storageFormat: "Parquet",
619
637
  partitionKeyLength: 0
620
638
  },
621
639
  mem: "24GiB",
@@ -1353,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
1353
1353
  reportColumnsSpec: {
1354
1354
  axes: axes,
1355
1355
  columns: columns,
1356
- storageFormat: "Binary",
1356
+ storageFormat: "Parquet",
1357
1357
  partitionKeyLength: 0
1358
1358
  }
1359
1359
  }