npm - @platforma-open/milaboratories.mixcr-clonotyping-2.workflow - Versions diffs - 2.23.3 → 2.25.0 - Mend

@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.23.3 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +12 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +46 -0
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
package/dist/tengo/tpl/list-presets.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/prerun.plj.gz +0 -0
package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
package/package.json +3 -3
package/src/aggregate-by-clonotype-key.tpl.tengo +16 -3
package/src/calculate-export-specs.lib.tengo +46 -0
package/src/main.tpl.tengo +4 -1
package/src/mixcr-analyze.tpl.tengo +0 -2
package/src/mixcr-export.tpl.tengo +2 -0
package/src/process-single-cell.tpl.tengo +8 -2
package/src/process.tpl.tengo +53 -5

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.23.3 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
+> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.25.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
   info: Skipping unknown file type: test/columns.test.ts

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # @platforma-open/milaboratories.mixcr-clonotyping.workflow
+## 2.25.0
+### Minor Changes
+- 427ffa6: Include junction lengths and total number of added nt
+## 2.24.0
+### Minor Changes
+- 35856bb: Fix for caching and deduplication & SDK Upgrade
 ## 2.23.3
 ### Patch Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -541,6 +541,52 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 	}
+	junctionTypes := [ "VJ", "VD", "DJ" ]
+	for junctionType in junctionTypes {
+		feature := junctionType + "Junction"
+		naRegex := junctionType == "VJ" ? "region_not_covered" : "no_d_gene"
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "nLength" + feature,
+			id: "n-length-" + text.to_lower(junctionType) + "-junction",
+			naRegex: naRegex,
+			allowNA: true,
+			spec: {
+				name: "pl7.app/vdj/sequenceLength",
+				valueType: "Int",
+				domain: {
+					"pl7.app/vdj/feature": feature,
+					"pl7.app/alphabet": "nucleotide"
+				},
+				annotations: a(orderP, false, {
+					"pl7.app/label": "Length of " + junctionType + " Junction nt"
+				})
+			}
+		} ]
+		exportArgs += [ [ "-nLength", feature ] ]
+		orderP -= 100
+	}
+	columnsSpecPerClonotypeNoAggregates += [ {
+		column: "nLengthTotalAdded",
+		id: "n-length-total-added",
+		naRegex: "region_not_covered|no_d_gene",
+		allowNA: true,
+		spec: {
+			name: "pl7.app/vdj/sequenceLength",
+			valueType: "Int",
+			domain: {
+				"pl7.app/alphabet": "nucleotide"
+			},
+			annotations: a(orderP, false, {
+				"pl7.app/label": "Total number of added nt"
+			})
+		}
+	} ]
+	orderP -= 100
 	geneHitColumnVariants := [ {
 			name: "pl7.app/vdj/geneHitWithAllele",

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/calculate-preset-info.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/list-presets.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/prerun.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process-single-cell.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns-calculate.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns.test.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
-  "version": "2.23.3",
+  "version": "2.25.0",
   "description": "Tengo-based template",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "^4.9.3",
+    "@platforma-sdk/workflow-tengo": "^4.10.0",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-190-develop"
   },
   "devDependencies": {
     "@platforma-sdk/tengo-builder": "^2.1.12",
-    "@platforma-sdk/test": "^1.39.15",
+    "@platforma-sdk/test": "^1.39.21",
     "vitest": "~2.1.9",
     "typescript": "~5.5.4"
   },

package/src/aggregate-by-clonotype-key.tpl.tengo CHANGED Viewed

@@ -1,13 +1,16 @@
+//tengo:hash_override 8B1CFC68-2542-4C81-8CD4-F927B75F3975
 ll := import("@platforma-sdk/workflow-tengo:ll")
 self := import("@platforma-sdk/workflow-tengo:tpl")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
 slices := import("@platforma-sdk/workflow-tengo:slices")
 maps := import("@platforma-sdk/workflow-tengo:maps")
 units := import("@platforma-sdk/workflow-tengo:units")
-clonotypeLabel := import(":clonotype-label")
 pt := import("@platforma-sdk/workflow-tengo:pt")
-math := import("math")
+clonotypeLabel := import(":clonotype-label")
+math := import("math")
 json := import("json")
 self.defineOutputs("tsv")
@@ -38,6 +41,8 @@ self.body(func(inputs) {
 	baseSchemaForRead := schemaPerSample + [ { column: "clonotypeKey", type: "String" } ]
+	ll.print("__THE_LOG__ AGGREGATE BY CLONOTYPE KEY: " + json.encode(maps.getKeys(inputMap)))
 	for sKey in maps.getKeys(inputMap) {
 		inputFile := inputMap[sKey]
 		key := json.decode(sKey)
@@ -70,7 +75,7 @@ self.body(func(inputs) {
 	aggExpressions := []
 	for colDef in schemaPerClonotypeNoAggregates {
-		if colDef.column == "clonotypeLabel" {
+		if colDef.column == "clonotypeLabel" || colDef.column == "nLengthTotalAdded" {
 			continue
 		}
 		aggExpressions = append(aggExpressions,
@@ -86,6 +91,14 @@ self.body(func(inputs) {
 	aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
+	// Calculate total added nucleotides: VDJunction + DJJunction for chains with D genes, VJJunction for chains without D genes
+	aggregatedDf = aggregatedDf.withColumns(
+		pt.when(pt.col("nLengthVDJunction").isNotNull().and(pt.col("nLengthVDJunction").neq("no_d_gene"))).
+			then(pt.col("nLengthVDJunction").cast("Int").plus(pt.col("nLengthDJJunction").cast("Int"))).
+			otherwise(pt.col("nLengthVJJunction").cast("Int")).
+			alias("nLengthTotalAdded")
+	)
 	aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
 	aggregatedDf.save("output.tsv")

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -540,6 +540,52 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 		}
 	}
+	// Include junction lengths for calculation of total added nucleotides
+	junctionTypes := [ "VJ", "VD", "DJ" ]
+	for junctionType in junctionTypes {
+		feature := junctionType + "Junction"
+		naRegex := junctionType == "VJ" ? "region_not_covered" : "no_d_gene"
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "nLength" + feature,
+			id: "n-length-" + text.to_lower(junctionType) + "-junction",
+			naRegex: naRegex,
+			allowNA: true,
+			spec: {
+				name: "pl7.app/vdj/sequenceLength",
+				valueType: "Int",
+				domain: {
+					"pl7.app/vdj/feature": feature,
+					"pl7.app/alphabet": "nucleotide"
+				},
+				annotations: a(orderP, false, {
+					"pl7.app/label": "Length of " + junctionType + " Junction nt"
+				})
+			}
+		} ]
+		exportArgs += [ [ "-nLength", feature ] ]
+		orderP -= 100
+	}
+	// Spec for total added length (calculated by pt in aggregate-by-clonotype-key, no flag)
+	columnsSpecPerClonotypeNoAggregates += [ {
+		column: "nLengthTotalAdded",
+		id: "n-length-total-added",
+		naRegex: "region_not_covered|no_d_gene",
+		allowNA: true,
+		spec: {
+			name: "pl7.app/vdj/sequenceLength",
+			valueType: "Int",
+			domain: {
+				"pl7.app/alphabet": "nucleotide"
+			},
+			annotations: a(orderP, false, {
+				"pl7.app/label": "Total number of added nt"
+			})
+		}
+	} ]
+	orderP -= 100
 	// VDJC Hits
 	geneHitColumnVariants := [ {

package/src/main.tpl.tengo CHANGED Viewed

@@ -167,7 +167,10 @@ wf.body(func(args) {
 		clonotypes: pframes.exportFrame(runMixcr.output("clonotypes")),
 		fileImports: smart.createMapResource(maps.mapValues(fileImports, func(im) {
 			return im.handle
-		}))
+		})),
+		// will be attached as output, to keep this intermediate results as long as block is not deleted
+		resultsToCache: runMixcr.output("resultsToCache")
 	}
 	if !is_undefined(libraryImportHandle) {

package/src/mixcr-analyze.tpl.tengo CHANGED Viewed

@@ -1,7 +1,5 @@
 //tengo:hash_override D70EDB25-6FF6-4615-966D-B79B04B5751C
-// mixcr analyze
 self := import("@platforma-sdk/workflow-tengo:tpl")
 smart := import("@platforma-sdk/workflow-tengo:smart")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")

package/src/mixcr-export.tpl.tengo CHANGED Viewed

@@ -1,3 +1,5 @@
+//tengo:hash_override 553D3080-FB87-44BC-BEEB-DB9EB5F773D8
 ll := import("@platforma-sdk/workflow-tengo:ll")
 self := import("@platforma-sdk/workflow-tengo:tpl.light")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")

package/src/process-single-cell.tpl.tengo CHANGED Viewed

@@ -1,3 +1,5 @@
+//tengo:hash_override 8E4B6062-534D-4322-9CED-EC75E499A10A
 ll := import("@platforma-sdk/workflow-tengo:ll")
 self := import("@platforma-sdk/workflow-tengo:tpl.light")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
@@ -5,17 +7,21 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
 exec := import("@platforma-sdk/workflow-tengo:exec")
 maps := import("@platforma-sdk/workflow-tengo:maps")
 slices := import("@platforma-sdk/workflow-tengo:slices")
-clonotypeLabel := import(":clonotype-label")
 units := import("@platforma-sdk/workflow-tengo:units")
+pt := import("@platforma-sdk/workflow-tengo:pt")
+clonotypeLabel := import(":clonotype-label")
 json := import("json")
 math := import("math")
-pt := import("@platforma-sdk/workflow-tengo:pt")
 self.defineOutputs("abundanceTsv", "clonotypeTsv", "propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv")
 ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
 self.body(func(inputs) {
+	ll.print("__THE_LOG__ PROCESS SINGLE CELL")
 	byCellTagA := inputs[pConstants.VALUE_FIELD_NAME]
 	inputDataMeta := byCellTagA.getDataAsJson()
 	ll.assert(inputDataMeta.keyLength == 1, "unexpected number of aggregation axes")

package/src/process.tpl.tengo CHANGED Viewed

@@ -8,11 +8,13 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
 slices := import("@platforma-sdk/workflow-tengo:slices")
 maps := import("@platforma-sdk/workflow-tengo:maps")
 sets := import("@platforma-sdk/workflow-tengo:sets")
+anonymize := import("@platforma-sdk/workflow-tengo:anonymize")
 calculateExportSpecs := import(":calculate-export-specs")
 json := import("json")
 text := import("text")
+times := import("times")
 mixcrAnalyzeTpl := assets.importTemplate(":mixcr-analyze")
 mixcrExportTpl := assets.importTemplate(":mixcr-export")
@@ -250,6 +252,10 @@ self.body(func(inputs) {
 			// in the body template
 			passAggregationAxesNames: true,
+			// this will cache each individual step result for 30 minutes, even if execution of some of the steps
+			// failed or whole execution was interrupted
+			stepCache: 30 * times.minute,
 			// will be automatically propagated to all output specs
 			traceSteps: [{type: "milaboratories.mixcr-clonotyping", id: blockId, importance: 20, label: "MiXCR " + presetCommonName}],
@@ -265,6 +271,9 @@ self.body(func(inputs) {
 				library: library,
 				presetContent: presetContent
 			},
+			// by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
+			// to pick up the results from executions with different values for CPU and Memory overrides
             metaExtra: {
                 perProcessMemGB: perProcessMemGB,
                 perProcessCPUs: perProcessCPUs
@@ -309,6 +318,8 @@ self.body(func(inputs) {
 		return schema
 	}
+	resultsToCache := {}
 	for chain in chains {
 		chainInfo := chainInfos[chain]
 		ll.assert(!is_undefined(chainInfo), "chainInfo not found for chain %v", chain)
@@ -378,6 +389,10 @@ self.body(func(inputs) {
 				// will be automatically propagated to all output specs
 				traceSteps: [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}],
+				// this will cache each individual step result for 30 minutes, even if execution of some of the steps
+				// failed or whole execution was interrupted
+				stepCache: 30 * times.minute,
 				extra: {
 					params: {
 						chains: chainInfo.mixcrFilter,
@@ -438,12 +453,22 @@ self.body(func(inputs) {
 			} ]
 		}
+		resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
 		aggregateByCloneKey := pframes.processColumn(
 			exportResults.output("clonotypeTable"),
 			aggregateByClonotypeKeyTpl,
 			aggregationOutputs,
 			{
-				aggregate: ["pl7.app/sampleId"],
+				aggregate: [ {
+					name: "pl7.app/sampleId",
+					anonymize: true
+				} ],
+				// this will cache each individual step result for 30 minutes, even if execution of some of the steps
+				// failed or whole execution was interrupted
+				stepCache: 30 * times.minute,
 				extra: {
 					params: {
 						mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
@@ -457,11 +482,15 @@ self.body(func(inputs) {
 		)
 		if isSingleCell {
-			// collecting results for possible future single cell processing
+			// collecting results for future single cell processing
 			perChainResults[chain] = {
 				tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
 				clonotypeProperties: aggregateByCloneKey.output("clonotypeProperties")
 			}
+			// caching intermediate results until the block is removed
+			resultsToCache["clonotypeTableForSingleCell/" + chain] = exportResults.outputData("clonotypeTableForSingleCell")
+			resultsToCache["clonotypeProperties/" + chain] = aggregateByCloneKey.outputData("clonotypeProperties")
 		} else {
 			// only adding data outputs if we are in bulk mode
 			exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
@@ -597,14 +626,22 @@ self.body(func(inputs) {
 			// Using A chain files as main PColumn for xsv conversion through pframes.processColumn.
 			// Since we aggregate by sample, this is just a single pass through the data.
+			// manual data anonymization
+			anonymizationResult := anonymize.anonymizePKeys({
+				byCellTagAData: perChainResults[chainA].tsvForSingleCell.data,
+				byCellTagBData: perChainResults[chainB].tsvForSingleCell.data
+			}, [0])
+			byCellTagAData := anonymizationResult.result.byCellTagAData
+			byCellTagBData := anonymizationResult.result.byCellTagBData
 			singleCellResult := pframes.processColumn(
-				perChainResults[chainA].tsvForSingleCell,
+				{ spec: perChainResults[chainA].tsvForSingleCell.spec, data: byCellTagAData },
 				processSingleCellTpl,
 				singleCellOutputs,
 				{
 					aggregate: ["pl7.app/sampleId"],
 					extra: {
-						byCellTagB: perChainResults[chainB].tsvForSingleCell.data,
+						byCellTagB: byCellTagBData,
 						propertiesA: perChainResults[chainA].clonotypeProperties.data,
 						propertiesB: perChainResults[chainB].clonotypeProperties.data,
 						params: {
@@ -616,7 +653,16 @@ self.body(func(inputs) {
 				}
 			)
-			singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
+			// singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
+			for columnName in singleCellResult.listXsvColumns("abundanceTable") {
+				anonymizedData := singleCellResult.outputData("abundanceTable", columnName)
+				clonotypes.add(
+					"clonotypeProperties/abundance/" + receptor + "/" + columnName,
+					singleCellResult.outputSpec("abundanceTable", columnName),
+					anonymize.deanonymizePKeys(anonymizedData, 0, [0], anonymizationResult.mapping)
+				)
+				resultsToCache["clonotypeProperties/abundance/" + receptor + "/" + columnName] = anonymizedData
+			}
 			singleCellResult.addXsvOutputToBuilder(clonotypes, "aggregates", "clonotypeProperties/aggregates/" + receptor + "/")
 			singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesAPrimary", "clonotypeProperties/" + receptor + "/aPrimary/")
@@ -639,6 +685,8 @@ self.body(func(inputs) {
 		"clns.spec": mixcrResults.outputSpec("clns"),
 		"clns.data": mixcrResults.outputData("clns"),
+		"resultsToCache": resultsToCache,
 		clonotypes: clonotypes.build()
 	}
 })