npm - @platforma-open/milaboratories.mixcr-clonotyping-2.workflow - Versions diffs - 3.23.5 → 3.23.6 - Mend

@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.23.5 → 3.23.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +6 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +80 -1
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/list-presets.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/prerun.plj.gz +0 -0
package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
package/package.json +1 -1
package/src/aggregate-by-clonotype-key.tpl.tengo +23 -0
package/src/calculate-export-specs.lib.tengo +80 -1
package/src/process-single-cell.tpl.tengo +101 -4
package/src/process.tpl.tengo +100 -2

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.5 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
+> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.6 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
 > shx rm -rf dist && pl-tengo check && pl-tengo build
   info: Skipping unknown file type: test/columns.test.ts

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # @platforma-open/milaboratories.mixcr-clonotyping.workflow
+## 3.23.6
+### Patch Changes
+- b8a3a50: New mutation columns
 ## 3.23.5
 ### Patch Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -767,6 +767,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
+	shmMapping := []
+	if assemblingFeature == "VDJRegion" {
+		nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
+		crossRegionMutations := [ {
+			exportFlag: "nMutationsCount",
+			feature: nMutationsFeature,
+			id: "n-mutations-total",
+			label: "Nt mutations",
+			specName: "pl7.app/vdj/sequence/nMutations",
+			rankingOrder: "decreasing",
+			outputColumn: "nMutations"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "CDR1,CDR2",
+			id: "aa-mutations-cdr",
+			label: "AA mutations (CDR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
+			rankingOrder: "decreasing",
+			outputColumn: "nAAMutationsCDR"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "FR1,FR2,FR3,FR4",
+			id: "aa-mutations-fwr",
+			label: "AA mutations (FWR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
+			rankingOrder: "increasing",
+			outputColumn: "nAAMutationsFWR"
+		} ]
+		for col in crossRegionMutations {
+			columnsSpecPerClonotypeNoAggregates += [ {
+				column: col.exportFlag + col.feature,
+				id: col.id,
+				allowNA: true,
+				naRegex: "region_not_covered",
+				spec: {
+					valueType: "Int",
+					name: col.specName,
+					annotations: a(orderP, false, {
+						"pl7.app/label": col.label,
+						"pl7.app/isScore": "true",
+						"pl7.app/score/rankingOrder": col.rankingOrder
+					})
+				}
+			} ]
+			exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
+			shmMapping = append(shmMapping, {
+				outputColumn: col.outputColumn,
+				tsvColumn: col.exportFlag + col.feature
+			})
+			orderP -= 100
+		}
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "fractionCDRMutations",
+			id: "fraction-cdr-mutations",
+			naRegex: "^[a-z_]*$",
+			allowNA: true,
+			spec: {
+				valueType: "Double",
+				name: "pl7.app/vdj/sequence/fractionCDRMutations",
+				annotations: a(orderP, false, {
+					"pl7.app/label": "CDR mutation fraction",
+					"pl7.app/isScore": "true",
+					"pl7.app/score/rankingOrder": "decreasing",
+					"pl7.app/format": ".2f"
+				})
+			}
+		} ]
+		orderP -= 100
+	}
 	flagColumnVariants := [ {
 			columnPrefix: "isProductive",
 			arg: "-isProductive",
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 		exportArgs: exportArgs,
 		hashCellKey: hashCellKey,
-		cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
+		cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
+		shmMapping: shmMapping
 	}
 }

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/calculate-preset-info.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/list-presets.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/prerun.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process-single-cell.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns-calculate.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns.test.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
-  "version": "3.23.5",
+  "version": "3.23.6",
   "description": "Tengo-based template",
   "dependencies": {
     "@platforma-sdk/workflow-tengo": "5.8.2",

package/src/aggregate-by-clonotype-key.tpl.tengo CHANGED Viewed

@@ -78,11 +78,16 @@ self.body(func(inputs) {
 	}
 	aggExpressions := []
+	hasFractionCDRMutations := false
 	for colDef in schemaPerClonotypeNoAggregates {
 		if colDef.column == "clonotypeLabel" || colDef.column == "nLengthTotalAdded" {
 			continue
 		}
+		if colDef.column == "fractionCDRMutations" {
+			hasFractionCDRMutations = true
+			continue
+		}
 		aggExpressions = append(aggExpressions,
 			pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
 		)
@@ -104,6 +109,24 @@ self.body(func(inputs) {
 			alias("nLengthTotalAdded")
 	)
+	// Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
+	if hasFractionCDRMutations {
+		cdr := "aaMutationsCountCDR1,CDR2"
+		fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
+		aggregatedDf = aggregatedDf.withColumns(
+			pt.when(
+				pt.col(cdr).isNotNull().
+					and(pt.col(fwr).isNotNull()).
+					and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
+			).then(
+				pt.col(cdr).cast("Double").truediv(
+					pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
+				)
+			).otherwise(pt.lit(1.0)).
+				alias("fractionCDRMutations")
+		)
+	}
 	aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
 	aggregatedDf.save("output.tsv")

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -765,6 +765,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 		}
 	}
+	// Cross-region SHM mutation counts (only for full VDJRegion assembling)
+	// shmMapping: [{ outputColumn, tsvColumn }] for single-cell chain summing
+	shmMapping := []
+	if assemblingFeature == "VDJRegion" {
+		nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
+		crossRegionMutations := [ {
+			exportFlag: "nMutationsCount",
+			feature: nMutationsFeature,
+			id: "n-mutations-total",
+			label: "Nt mutations",
+			specName: "pl7.app/vdj/sequence/nMutations",
+			rankingOrder: "decreasing",
+			outputColumn: "nMutations"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "CDR1,CDR2",
+			id: "aa-mutations-cdr",
+			label: "AA mutations (CDR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
+			rankingOrder: "decreasing",
+			outputColumn: "nAAMutationsCDR"
+		}, {
+			exportFlag: "aaMutationsCount",
+			feature: "FR1,FR2,FR3,FR4",
+			id: "aa-mutations-fwr",
+			label: "AA mutations (FWR)",
+			specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
+			rankingOrder: "increasing",
+			outputColumn: "nAAMutationsFWR"
+		} ]
+		for col in crossRegionMutations {
+			columnsSpecPerClonotypeNoAggregates += [ {
+				column: col.exportFlag + col.feature,
+				id: col.id,
+				allowNA: true,
+				naRegex: "region_not_covered",
+				spec: {
+					valueType: "Int",
+					name: col.specName,
+					annotations: a(orderP, false, {
+						"pl7.app/label": col.label,
+						"pl7.app/isScore": "true",
+						"pl7.app/score/rankingOrder": col.rankingOrder
+					})
+				}
+			} ]
+			exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
+			shmMapping = append(shmMapping, {
+				outputColumn: col.outputColumn,
+				tsvColumn: col.exportFlag + col.feature
+			})
+			orderP -= 100
+		}
+		// CDR mutation fraction (computed in aggregate-by-clonotype-key)
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: "fractionCDRMutations",
+			id: "fraction-cdr-mutations",
+			naRegex: "^[a-z_]*$",
+			allowNA: true,
+			spec: {
+				valueType: "Double",
+				name: "pl7.app/vdj/sequence/fractionCDRMutations",
+				annotations: a(orderP, false, {
+					"pl7.app/label": "CDR mutation fraction",
+					"pl7.app/isScore": "true",
+					"pl7.app/score/rankingOrder": "decreasing",
+					"pl7.app/format": ".2f"
+				})
+			}
+		} ]
+		orderP -= 100
+	}
 	// Flags: productive, oof, stop codons
 	flagColumnVariants := [ {
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 		exportArgs: exportArgs,
 		hashCellKey: hashCellKey,
-		cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
+		cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
+		shmMapping: shmMapping
 	}
 }

package/src/process-single-cell.tpl.tengo CHANGED Viewed

@@ -17,7 +17,7 @@ math := import("math")
 self.defineOutputs("abundanceTsv", "clonotypeTsv",
 	"propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv",
-	"cellsTsv")
+	"cellsTsv", "shmTsv")
 ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
@@ -37,6 +37,11 @@ self.body(func(inputs) {
 	schemaPerClonotypeNoAggregates := inputs.params.schemaPerClonotypeNoAggregates
+	shmMapping := inputs.params.shmMapping
+	if is_undefined(shmMapping) {
+		shmMapping = []
+	}
 	//
 	// Preprocessing
 	//
@@ -303,6 +308,96 @@ self.body(func(inputs) {
 	outputProcessingRunResult := outputProcessingWf.run()
+	propsAPrimaryFile := outputProcessingRunResult.getFile(chainMappings[0].finalOutFile)
+	propsBPrimaryFile := outputProcessingRunResult.getFile(chainMappings[2].finalOutFile)
+	// Sum SHM mutation columns across primary A+B chains
+	shmTsv := undefined
+	if len(shmMapping) > 0 {
+		shmSchema := [{ column: "scClonotypeKey", type: "String" }]
+		for m in shmMapping {
+			shmSchema = append(shmSchema, { column: m.tsvColumn, type: "String" })
+		}
+		shmWf := pt.workflow()
+		propsAShmDf := shmWf.frame(propsAPrimaryFile, {
+			xsvType: "tsv",
+			schema: shmSchema,
+			inferSchema: false
+		})
+		propsBShmDf := shmWf.frame(propsBPrimaryFile, {
+			xsvType: "tsv",
+			schema: shmSchema,
+			inferSchema: false,
+			id: "props_b_shm"
+		})
+		// Cast "region_not_covered" to null, then to Int
+		castExprs := []
+		for m in shmMapping {
+			castExprs = append(castExprs,
+				pt.when(pt.col(m.tsvColumn).eq("region_not_covered")).
+					then(pt.lit(undefined)).
+					otherwise(pt.col(m.tsvColumn)).
+					cast("Int").
+					alias(m.tsvColumn)
+			)
+		}
+		propsAShmDf = propsAShmDf.withColumns(castExprs...)
+		propsBShmDf = propsBShmDf.withColumns(castExprs...)
+		//Join A and B chains
+		leftCols := []
+		rightCols := []
+		for m in shmMapping {
+			leftCols = append(leftCols, { column: m.tsvColumn, rename: m.outputColumn + "_A" })
+			rightCols = append(rightCols, { column: m.tsvColumn, rename: m.outputColumn + "_B" })
+		}
+		shmCombinedDf := propsAShmDf.join(propsBShmDf, {
+			how: "full",
+			on: ["scClonotypeKey"],
+			coalesce: true,
+			leftColumns: leftCols,
+			rightColumns: rightCols
+		})
+		// Sum SHM mutation columns across primary A+B chains
+		for m in shmMapping {
+			shmCombinedDf = shmCombinedDf.withColumns(
+				pt.col(m.outputColumn + "_A").plus(pt.col(m.outputColumn + "_B")).
+					alias(m.outputColumn)
+			)
+		}
+		// Calculate CDR mutation fraction
+		cdr := "nAAMutationsCDR"
+		fwr := "nAAMutationsFWR"
+		shmCombinedDf = shmCombinedDf.withColumns(
+			pt.when(
+				pt.col(cdr).isNotNull().
+					and(pt.col(fwr).isNotNull()).
+					and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
+			).then(
+				pt.col(cdr).cast("Double").truediv(
+					pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
+				)
+			).otherwise(pt.lit(1.0)).
+				alias("fractionCDRMutations")
+		)
+		shmOutputCols := ["scClonotypeKey"]
+		for m in shmMapping {
+			shmOutputCols = append(shmOutputCols, m.outputColumn)
+		}
+		shmOutputCols = append(shmOutputCols, "fractionCDRMutations")
+		shmCombinedDf.save("shm.tsv", { columns: shmOutputCols, xsvType: "tsv" })
+		shmRunResult := shmWf.run()
+		shmTsv = shmRunResult.getFile("shm.tsv")
+	}
 	return {
 		// must have sampleId and scClonotypeKey columns
 		abundanceTsv: abundanceTsv,
@@ -314,9 +409,11 @@ self.body(func(inputs) {
 		cellsTsv: cellsTsv,
 		// must have scClonotypeKey columns
-		propertiesAPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[0].finalOutFile),
+		propertiesAPrimaryTsv: propsAPrimaryFile,
 		propertiesASecondaryTsv: outputProcessingRunResult.getFile(chainMappings[1].finalOutFile),
-		propertiesBPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[2].finalOutFile),
-		propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile)
+		propertiesBPrimaryTsv: propsBPrimaryFile,
+		propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile),
+		shmTsv: shmTsv
 	}
 })

package/src/process.tpl.tengo CHANGED Viewed

@@ -233,6 +233,8 @@ self.body(func(inputs) {
 	mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
 	mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
+	shmMapping := exportSpecs.shmMapping
 	mainAbundanceColumnNormalizedArgs := exportSpecs.mainAbundanceColumnNormalizedArgs
 	mainAbundanceColumnUnnormalizedArgs := exportSpecs.mainAbundanceColumnUnnormalizedArgs
@@ -566,6 +568,77 @@ self.body(func(inputs) {
 	}
 	if isSingleCell {
+		// SHM mutation columns: build filtering set and combined output specs
+		hasShm := len(shmMapping) > 0
+		shmColumnNames := {}
+		shmOutputSpecs := []
+		if hasShm {
+			for m in shmMapping {
+				shmColumnNames[m.tsvColumn] = true
+			}
+			shmColumnNames["fractionCDRMutations"] = true
+			orderP := 10400
+			shmDefs := [ {
+				outputColumn: "nMutations",
+				label: "Nt mutations",
+				specName: "pl7.app/vdj/sequence/nMutations",
+				valueType: "Int",
+				rankingOrder: "decreasing"
+			}, {
+				outputColumn: "nAAMutationsCDR",
+				label: "AA mutations (CDR)",
+				specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
+				valueType: "Int",
+				rankingOrder: "decreasing"
+			}, {
+				outputColumn: "nAAMutationsFWR",
+				label: "AA mutations (FWR)",
+				specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
+				valueType: "Int",
+				rankingOrder: "increasing"
+			} ]
+			for def in shmDefs {
+				shmOutputSpecs = append(shmOutputSpecs, {
+					column: def.outputColumn,
+					id: def.outputColumn,
+					allowNA: true,
+					naRegex: "^[a-z_]*$",
+					spec: {
+						valueType: def.valueType,
+						name: def.specName,
+						annotations: {
+							"pl7.app/label": def.label,
+							"pl7.app/table/orderPriority": string(orderP),
+							"pl7.app/table/visibility": "optional",
+							"pl7.app/isScore": "true",
+							"pl7.app/score/rankingOrder": def.rankingOrder
+						}
+					}
+				})
+				orderP -= 100
+			}
+			shmOutputSpecs = append(shmOutputSpecs, {
+				column: "fractionCDRMutations",
+				id: "fraction-cdr-mutations",
+				naRegex: "^[a-z_]*$",
+				allowNA: true,
+				spec: {
+					valueType: "Double",
+					name: "pl7.app/vdj/sequence/fractionCDRMutations",
+					annotations: {
+						"pl7.app/label": "CDR mutation fraction",
+						"pl7.app/table/orderPriority": string(orderP),
+						"pl7.app/table/visibility": "optional",
+						"pl7.app/isScore": "true",
+						"pl7.app/score/rankingOrder": "decreasing"
+					}
+				}
+			})
+		}
 		for receptor in receptors {
 			receptorInfo := receptorInfos[receptor]
@@ -627,7 +700,10 @@ self.body(func(inputs) {
 				// Modify column visibility for TCR chains
 				isTCRChain := text.has_prefix(chain, "TCR")
-				columnsForSingleCell := columnsSpecPerClonotypeNoAggregates
+				// Filter out SHM mutation columns (We will generate chain-agnostic columns)
+				columnsForSingleCell := hasShm ? slices.filter(columnsSpecPerClonotypeNoAggregates, func(col) {
+					return !shmColumnNames[col.column]
+				}) : columnsSpecPerClonotypeNoAggregates
 				if isTCRChain {
 					visibilitySettings := {
 						"bestCGene":      "optional",
@@ -718,6 +794,23 @@ self.body(func(inputs) {
 				path: ["cellsTsv"]
 			} ]
+			if hasShm {
+				singleCellOutputs += [ {
+					type: "Xsv",
+					xsvType: "tsv",
+					settings: {
+						axes: [ axisByScClonotypeKeyGen(receptor) ],
+						columns: shmOutputSpecs,
+						storageFormat: "Parquet",
+						partitionKeyLength: 0
+					},
+					mem: "12GiB",
+					cpu: 2,
+					name: "shmCombined",
+					path: ["shmTsv"]
+				} ]
+			}
 			chainA := receptorInfo.chains[0]
 			chainB := receptorInfo.chains[1]
@@ -746,7 +839,8 @@ self.body(func(inputs) {
 						params: {
 							mainAbundanceColumn: mainAbundanceColumnUnnormalized,
 							mainIsProductiveColumn: mainIsProductiveColumn,
-							schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates)
+							schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates),
+							shmMapping: shmMapping
 						}
 					}
 				}
@@ -769,6 +863,10 @@ self.body(func(inputs) {
 			singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBPrimary", "clonotypeProperties/" + receptor + "/bPrimary/")
 			singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBSecondary", "clonotypeProperties/" + receptor + "/bSecondary/")
+			if hasShm {
+				singleCellResult.addXsvOutputToBuilder(clonotypes, "shmCombined", "clonotypeProperties/" + receptor + "/shmCombined/")
+			}
 			for columnName in singleCellResult.listXsvColumns("cellsLinkerTable") {
 				anonymizedData := singleCellResult.outputData("cellsLinkerTable", columnName)
 				clonotypes.add(