npm - @platforma-open/milaboratories.mixcr-clonotyping-2.workflow - Versions diffs - 3.10.0 → 3.11.0 - Mend

@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.10.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +12 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +1 -1
package/dist/tengo/lib/qc-report-columns.lib.tengo +1 -1
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/list-presets.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/prerun.plj.gz +0 -0
package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
package/package.json +4 -4
package/src/aggregate-by-clonotype-key.tpl.tengo +6 -1
package/src/calculate-export-specs.lib.tengo +1 -1
package/src/mixcr-export.tpl.tengo +16 -4
package/src/process.tpl.tengo +23 -5
package/src/qc-report-columns.lib.tengo +1 -1

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.10.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
+> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.11.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
   info: Skipping unknown file type: test/columns.test.ts

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # @platforma-open/milaboratories.mixcr-clonotyping.workflow
+## 3.11.0
+### Minor Changes
+- 58a43db: Better memory management in export
+## 3.10.1
+### Patch Changes
+- fed5c72: Support parquet format (update SDK)
 ## 3.10.0
 ### Minor Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
 							})
 						}
 					} ],
-					storageFormat: "Binary",
+					storageFormat: "Parquet",
 					partitionKeyLength: 1
 				}
 			}

package/dist/tengo/lib/qc-report-columns.lib.tengo CHANGED Viewed

@@ -1353,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         reportColumnsSpec: {
             axes: axes,
             columns: columns,
-            storageFormat: "Binary",
+            storageFormat: "Parquet",
             partitionKeyLength: 0
         }
     }

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/calculate-preset-info.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/list-presets.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/prerun.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process-single-cell.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns-calculate.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns.test.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
-  "version": "3.10.0",
+  "version": "3.11.0",
   "description": "Tengo-based template",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "^5.3.3",
+    "@platforma-sdk/workflow-tengo": "^5.4.4",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-233-develop"
   },
   "devDependencies": {
     "@milaboratories/ts-configs": "^1.0.6",
-    "@platforma-sdk/tengo-builder": "^2.3.0",
-    "@platforma-sdk/test": "^1.44.0",
+    "@platforma-sdk/tengo-builder": "^2.3.2",
+    "@platforma-sdk/test": "^1.44.21",
     "vitest": "~2.1.9",
     "typescript": "~5.6.3"
   },

package/src/aggregate-by-clonotype-key.tpl.tengo CHANGED Viewed

@@ -32,9 +32,14 @@ self.body(func(inputs) {
 	inputMap := inputData.inputs()
 	numberOfSamples := len(inputMap)
+	memGB := int(math.max(numberOfSamples, 64))
+	if !is_undefined(inputs.perProcessMemGB) && inputs.perProcessMemGB > memGB {
+		memGB = inputs.perProcessMemGB
+	}
 	wf := pt.workflow().
 		inMediumQueue().
-		mem(int(math.max(numberOfSamples, 64)) * units.GiB).
+		mem(memGB * units.GiB).
 		cpu(int(math.max(numberOfSamples, 32)))
 	dataFrames := []

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
 							})
 						}
 					} ],
-					storageFormat: "Binary",
+					storageFormat: "Parquet",
 					partitionKeyLength: 1
 				}
 			}

package/src/mixcr-export.tpl.tengo CHANGED Viewed

@@ -23,7 +23,7 @@ self.body(func(inputs) {
 	isLibraryFileGzipped := params.isLibraryFileGzipped
 	chains := params.chains
 	exportArgs := params.exportArgs
 	clonotypeKeyColumns := params.clonotypeKeyColumns
 	clonotypeKeyArgs := params.clonotypeKeyArgs
@@ -38,6 +38,18 @@ self.body(func(inputs) {
 	mainAbundanceColumnIsReadCount := params.mainAbundanceColumnIsReadCount == true
 	mainIsProductiveArgs := params.mainIsProductiveArgs
+	exportMemGB := undefined
+	if !is_undefined(inputs.perProcessMemGB) {
+		exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
+		if exportMemGB < 12 {
+			exportMemGB = 12
+		}
+	} else {
+		exportMemGB = 12
+	}
+	ptMemGB := int(2.0*exportMemGB/3.0)
 	hashKeyDerivationExpressionPt := func(sourceColumns) {
 		return pt.concatStr(
 			slices.map(sourceColumns, func(colName) { return pt.col(colName).fillNull("") }),
@@ -50,7 +62,7 @@ self.body(func(inputs) {
 	createExport := func(additionalAction) {
 		mixcrCmdBuilder := exec.builder().
 			inMediumQueue().
-			ram("12GiB").
+			mem(string(exportMemGB) + "GiB").
 		    cpu(2).
 			printErrStreamToStdout().
 			dontSaveStdoutOrStderr().
@@ -104,7 +116,7 @@ self.body(func(inputs) {
 	// PTabler processing for main TSV output
 	wfMain := pt.workflow().
 		inMediumQueue().
-		mem("8GiB").
+		mem(ptMemGB).
 		cpu(2)
 	frameInputMap := {
@@ -144,7 +156,7 @@ self.body(func(inputs) {
 		// PTabler processing for single-cell TSV output
 		wfSingleCell := pt.workflow().
 			inMediumQueue().
-			mem("4GiB").
+			mem(ptMemGB).
 			cpu(2)
 		frameLoadOps := {

package/src/process.tpl.tengo CHANGED Viewed

@@ -355,7 +355,7 @@ self.body(func(inputs) {
 				settings: {
 					axes: [ axisByClonotypeKeyGen(chain) ],
 					columns: columnsSpecPerSample,
-					storageFormat: "Binary",
+					storageFormat: "Parquet",
 					partitionKeyLength: 0
 				},
 				mem: "16GiB",
@@ -416,6 +416,15 @@ self.body(func(inputs) {
 						isLibraryFileGzipped: isLibraryFileGzipped
 					}, { removeUndefs: true }),
 					library: library
+				},
+				// by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
+				// to pick up the results from executions with different values for CPU and Memory overrides
+				//
+				// Note: here we are passing the same parameters as in the main process step and will apply formula
+				// inside the export template to calculate memory requirements.
+				//
+				metaExtra: {
+					perProcessMemGB: perProcessMemGB
 				}
 			}
 		)
@@ -448,7 +457,7 @@ self.body(func(inputs) {
 				settings: {
 					axes: [ axisByClonotypeKeyGen(chain) ],
 					columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
-					storageFormat: "Binary",
+					storageFormat: "Parquet",
 					partitionKeyLength: 0
 				},
 				mem: "24GiB",
@@ -484,6 +493,15 @@ self.body(func(inputs) {
 						schemaPerClonotypeAggregates: columnsToSchema(columnsSpecPerClonotypeAggregates),
 						schemaPerSample: columnsToSchema(columnsSpecPerSample)
 					}
+				},
+				// by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
+				// to pick up the results from executions with different values for CPU and Memory overrides
+				//
+				// Note: here we are passing the same parameters as in the main process step and will apply formula
+				// inside the agg template to calculate memory requirements.
+				//
+				metaExtra: {
+					perProcessMemGB: perProcessMemGB
 				}
 			}
 		)
@@ -533,7 +551,7 @@ self.body(func(inputs) {
 						spec: sampleIdAxisSpec
 					}, axisByScClonotypeKeyGen(receptor) ],
 					columns: columnsSpecPerSampleSc,
-					storageFormat: "Binary",
+					storageFormat: "Parquet",
 					partitionKeyLength: 1
 				},
 				mem: "16GiB",
@@ -546,7 +564,7 @@ self.body(func(inputs) {
 				settings: {
 					axes: [ axisByScClonotypeKeyGen(receptor) ],
 					columns: columnsSpecPerClonotypeSc,
-					storageFormat: "Binary"
+					storageFormat: "Parquet"
 				},
 				mem: "12GiB",
 				cpu: 2,
@@ -615,7 +633,7 @@ self.body(func(inputs) {
 									annotations: annotationsTransformation
 								}
 							}),
-							storageFormat: "Binary",
+							storageFormat: "Parquet",
 							partitionKeyLength: 0
 						},
 						mem: "24GiB",

package/src/qc-report-columns.lib.tengo CHANGED Viewed

@@ -1353,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         reportColumnsSpec: {
             axes: axes,
             columns: columns,
-            storageFormat: "Binary",
+            storageFormat: "Parquet",
             partitionKeyLength: 0
         }
     }