npm - @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow - Versions diffs - 1.19.1 → 1.19.3 - Mend

@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.1 → 1.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +12 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +68 -1
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
package/package.json +1 -1
package/src/aggregate-by-clonotype-key.tpl.tengo +23 -0
package/src/calculate-export-specs.lib.tengo +68 -1
package/src/mixcr-export.tpl.tengo +12 -2
package/src/process.tpl.tengo +18 -4

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.1 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
+> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.3 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
 Processing "src/aggregate-by-clonotype-key.tpl.tengo"...

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
+## 1.19.3
+### Patch Changes
+- 8685e8d: Add new mutation columns
+## 1.19.2
+### Patch Changes
+- 4aeac00: Link memory limits of downstream operations (XSV conversion, MiXCR export, PTabler) to user-specified perProcessMemGB override to prevent over-scheduling on local runs
 ## 1.19.1
 ### Patch Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -619,7 +619,74 @@ inFrameFeatures := {
 		exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
 	}
+	if assemblingFeature == "VDJRegion" {
+		orderP = 7500
+		nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
+		crossRegionMutations := [ {
+			exportFlag: "nMutationsCount",
+			feature: nMutationsFeature,
+			id: "n-mutations-total",
+			label: "Nt mutations",
+			specName: "pl7.app/vdj/sequence/nMutations",
+			rankingOrder: "decreasing"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "CDR1,CDR2",
+			id: "aa-mutations-cdr",
+			label: "AA mutations (CDR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
+			rankingOrder: "decreasing"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "FR1,FR2,FR3,FR4",
+			id: "aa-mutations-fwr",
+			label: "AA mutations (FWR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
+			rankingOrder: "increasing"
+		} ]
+		for col in crossRegionMutations {
+			columnsSpecPerClonotypeNoAggregates += [ {
+				column: col.exportFlag + col.feature,
+				id: col.id,
+				allowNA: true,
+				naRegex: "region_not_covered",
+				spec: {
+					valueType: "Int",
+					name: col.specName,
+					annotations: a(orderP, false, {
+						"pl7.app/label": col.label,
+						"pl7.app/isScore": "true",
+						"pl7.app/score/rankingOrder": col.rankingOrder
+					})
+				}
+			} ]
+			exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
+			orderP -= 100
+		}
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "fractionCDRMutations",
+			id: "fraction-cdr-mutations",
+			naRegex: "^[a-z_]*$",
+			allowNA: true,
+			spec: {
+				valueType: "Double",
+				name: "pl7.app/vdj/sequence/fractionCDRMutations",
+				annotations: a(orderP, false, {
+					"pl7.app/label": "CDR mutation fraction",
+					"pl7.app/isScore": "true",
+					"pl7.app/score/rankingOrder": "decreasing",
+					"pl7.app/format": ".2f"
+				})
+			}
+		} ]
+	}
 	germlineVFeature := "GermlineVCDR3Part"

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/repseqio-library.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
-  "version": "1.19.1",
+  "version": "1.19.3",
   "description": "MiXCR Amplicon Alignment Workflow",
   "type": "module",
   "dependencies": {

package/src/aggregate-by-clonotype-key.tpl.tengo CHANGED Viewed

@@ -72,11 +72,16 @@ self.body(func(inputs) {
 	}
 	aggExpressions := []
+	hasFractionCDRMutations := false
 	for colDef in schemaPerClonotypeNoAggregates {
 		if colDef.column == "clonotypeLabel" {
 			continue
 		}
+		if colDef.column == "fractionCDRMutations" {
+			hasFractionCDRMutations = true
+			continue
+		}
 		aggExpressions = append(aggExpressions,
 			pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
 		)
@@ -90,6 +95,24 @@ self.body(func(inputs) {
 	aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
+	// Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
+	if hasFractionCDRMutations {
+		cdr := "aaMutationsCountCDR1,CDR2"
+		fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
+		aggregatedDf = aggregatedDf.withColumns(
+			pt.when(
+				pt.col(cdr).isNotNull().
+					and(pt.col(fwr).isNotNull()).
+					and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
+			).then(
+				pt.col(cdr).cast("Double").truediv(
+					pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
+				)
+			).otherwise(pt.lit(1.0)).
+				alias("fractionCDRMutations")
+		)
+	}
 	aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
 	cdr3Df := aggregatedDf.select(

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -619,7 +619,74 @@ inFrameFeatures := {
 		exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
 	}
+	// Cross-region SHM mutation counts (only for full VDJRegion assembling)
+	if assemblingFeature == "VDJRegion" {
+		orderP = 7500
+		nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
+		crossRegionMutations := [ {
+			exportFlag: "nMutationsCount",
+			feature: nMutationsFeature,
+			id: "n-mutations-total",
+			label: "Nt mutations",
+			specName: "pl7.app/vdj/sequence/nMutations",
+			rankingOrder: "decreasing"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "CDR1,CDR2",
+			id: "aa-mutations-cdr",
+			label: "AA mutations (CDR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
+			rankingOrder: "decreasing"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "FR1,FR2,FR3,FR4",
+			id: "aa-mutations-fwr",
+			label: "AA mutations (FWR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
+			rankingOrder: "increasing"
+		} ]
+		for col in crossRegionMutations {
+			columnsSpecPerClonotypeNoAggregates += [ {
+				column: col.exportFlag + col.feature,
+				id: col.id,
+				allowNA: true,
+				naRegex: "region_not_covered",
+				spec: {
+					valueType: "Int",
+					name: col.specName,
+					annotations: a(orderP, false, {
+						"pl7.app/label": col.label,
+						"pl7.app/isScore": "true",
+						"pl7.app/score/rankingOrder": col.rankingOrder
+					})
+				}
+			} ]
+			exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
+			orderP -= 100
+		}
+		// CDR mutation fraction (computed in aggregate-by-clonotype-key)
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "fractionCDRMutations",
+			id: "fraction-cdr-mutations",
+			naRegex: "^[a-z_]*$",
+			allowNA: true,
+			spec: {
+				valueType: "Double",
+				name: "pl7.app/vdj/sequence/fractionCDRMutations",
+				annotations: a(orderP, false, {
+					"pl7.app/label": "CDR mutation fraction",
+					"pl7.app/isScore": "true",
+					"pl7.app/score/rankingOrder": "decreasing",
+					"pl7.app/format": ".2f"
+				})
+			}
+		} ]
+	}
 	// Export germline CDR3 part mutations for both VDJRegion and CDR3 assembling features
 	// These will be summed to create the CDR3 mutations count columns
 	germlineVFeature := "GermlineVCDR3Part"

package/src/mixcr-export.tpl.tengo CHANGED Viewed

@@ -8,6 +8,7 @@ exec := import("@platforma-sdk/workflow-tengo:exec")
 pt := import("@platforma-sdk/workflow-tengo:pt")
 clonotypeLabel := import(":clonotype-label")
+math := import("math")
 json := import("json")
 text := import("text")
@@ -176,6 +177,15 @@ self.body(func(inputs) {
 	useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
+	// Memory for downstream operations, linked to user override with hardcoded floors
+	baseMemGB := 64
+	if !is_undefined(params.perProcessMemGB) {
+		baseMemGB = params.perProcessMemGB
+	}
+	memGB := func(floorGB, divisor) {
+		return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
+	}
 	hashKeyDerivationExpressionPt := func(sourceColumns) {
 		return pt.concatStr(
 			slices.map(sourceColumns, func(colName) { return pt.col(colName) }),
@@ -186,7 +196,7 @@ self.body(func(inputs) {
 	createExport := func(additionalAction) {
 		mixcrCmdBuilder := exec.builder().
 			inMediumQueue().
-			mem("12GB").
+			mem(memGB(12, 4)).
 		    cpu(2).
 			printErrStreamToStdout().
 			software(mixcrSw).
@@ -233,7 +243,7 @@ self.body(func(inputs) {
 	// Simplified PTabler processing for main TSV output
 	wfMain := pt.workflow().
 		inMediumQueue().
-		mem("8GB").
+		mem(memGB(8, 4)).
 		cpu(2)
 	frameInputMap := {

package/src/process.tpl.tengo CHANGED Viewed

@@ -9,6 +9,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
 slices := import("@platforma-sdk/workflow-tengo:slices")
 maps := import("@platforma-sdk/workflow-tengo:maps")
+math := import("math")
 json := import("json")
 text := import("text")
@@ -71,6 +72,18 @@ self.body(func(inputs) {
 	limitInput := inputs.limitInput
 	perProcessMemGB := params.perProcessMemGB
 	perProcessCPUs := params.perProcessCPUs
+	// Base memory for scheduling downstream operations, linked to user override
+	// with current hardcoded values as floors (see memGB helper below)
+	baseMemGB := 64
+	if !is_undefined(perProcessMemGB) {
+		baseMemGB = perProcessMemGB
+	}
+	// Returns max(floor, baseMemGB / divisor) as a string like "32GB"
+	memGB := func(floorGB, divisor) {
+		return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
+	}
 	fileExtension := inputSpec.domain["pl7.app/fileExtension"]
 	sampleIdAxisSpec := inputSpec.axesSpec[0]
@@ -249,7 +262,7 @@ self.body(func(inputs) {
 			storageFormat: "Parquet",
 			partitionKeyLength: 0
 		},
-		mem: "16GB",
+		mem: memGB(16, 2),
 		cpu: 2,
 		name: "byCloneKeyBySample",
 		path: ["tsv"]
@@ -271,7 +284,8 @@ self.body(func(inputs) {
 					aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
 					cdr3SeqColumns: cdr3SeqColumns,
 					stopCodonTypes: params.stopCodonTypes,
-					stopCodonReplacements: params.stopCodonReplacements
+					stopCodonReplacements: params.stopCodonReplacements,
+					perProcessMemGB: perProcessMemGB
 				}, { removeUndefs: true })
 			}
 		}
@@ -302,7 +316,7 @@ self.body(func(inputs) {
 			columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
 			storageFormat: "Parquet"
 		},
-		mem: "12GB",
+		mem: memGB(12, 4),
 		cpu: 2,
 		name: "aggregates",
 		path: ["tsv"]
@@ -314,7 +328,7 @@ self.body(func(inputs) {
 			columns: cdr3DistanceColumnsSpec,
 			storageFormat: "Parquet"
 		},
-		mem: "8GB",
+		mem: memGB(8, 8),
 		cpu: 1,
 		name: "cdr3Distances",
 		path: ["cdr3DistancesTsv"]