npm - @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow - Versions diffs - 1.19.7 → 1.19.9 - Mend

@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.7 → 1.19.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +21 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +282 -100
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
package/package.json +1 -1
package/src/calculate-export-specs.lib.tengo +285 -103
package/src/main.tpl.tengo +1 -0
package/src/mixcr-analyze.tpl.tengo +13 -1
package/src/process.tpl.tengo +2 -0

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.7 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
+> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.9 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
 Processing "src/aggregate-by-clonotype-key.tpl.tengo"...

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,26 @@
 # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
+## 1.19.9
+### Patch Changes
+- 2149d28: Fix column naming for range assembling features (e.g. CDR1:CDR3, FR2:FR4) without imputation.
+  When using a range assembling feature without "Impute non-covered part", the workflow would fail with
+  "column nSeqVDJRegion does not exist in export" because VDJRegion is never exported for non-full-range features.
+  Changes:
+  - Use the assembling feature itself as clonotype key column when VDJRegion is unavailable
+  - Fix column naming to match MiXCR output format (e.g. `CDR1_TO_FR4` instead of `{CDR1Begin:FR4End}`)
+  - Add unit tests covering column naming for all assembling feature variants with/without imputation
+## 1.19.8
+### Patch Changes
+- cd0f414: Support custom assembling feature and imputation in amplicon alignment
 ## 1.19.7
 ### Patch Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
-assemblingFeature := "VDJRegion"
-productiveFeature := "VDJRegion"
-coreVFeature := "{FR1Begin:FR3End}"
-coreJFeature := "FR4"
-splitByC := false
 formatId := func(input) {
@@ -57,22 +52,142 @@ addSpec := func(columns, additionalSpec) {
 	})
 }
+parseAssemblingFeature := func(assemblingFeature) {
+	if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
+		return {
+			imputed: [],
+			nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
+			coreGeneFeatures: {
+				V: "{FR1Begin:FR3End}",
+				J: "FR4"
+			}
+		}
+	}
+	be := text.split(assemblingFeature, ":")
+	if len(be) != 2 {
+		ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
+	}
+	begin := be[0]
+	end := be[1]
+	features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
+	iBegin := -1
+	iEnd := -1
+	for i, f in features {
+		if f == begin {
+			iBegin = i
+		}
+		if f == end {
+			iEnd = i
+		}
+	}
+	if iBegin == -1 || iEnd == -1 {
+		ll.panic("begin or end not found in features: " + assemblingFeature)
+	}
+	if iBegin > iEnd {
+		ll.panic("begin is after end: " + assemblingFeature)
+	}
+	imputed := []
+	nonImputed := []
+	for i := 0; i < iBegin; i++ {
+		imputed = append(imputed, features[i])
+	}
+	for i := iEnd + 1; i < len(features); i++ {
+		imputed = append(imputed, features[i])
+	}
+	for i := iBegin; i <= iEnd; i++ {
+		nonImputed = append(nonImputed, features[i])
+	}
+	coreVFeature := undefined
+	coreJFeature := undefined
+	if begin != "CDR3" {
+		coreVFeature = "{"+begin+"Begin:FR3End}"
+	}
+	if end == "FR4" {
+		coreJFeature = "FR4"
+	}
+	if begin == "FR1" && end == "FR4" {
+		nonImputed = append(nonImputed, "VDJRegion")
+	} else {
+		imputed = append(imputed, "VDJRegion")
+	}
+	return {
+		imputed: imputed,
+		nonImputed: nonImputed,
+		coreGeneFeatures: {
+			V: coreVFeature,
+			J: coreJFeature
+		}
+	}
+}
 calculateExportSpecs := func(presetSpecForBack, blockId) {
-	assemblingFeature = presetSpecForBack.assemblingFeature
+	assemblingFeature := presetSpecForBack.assemblingFeature
+	imputeGermline := presetSpecForBack.imputeGermline
+	if is_undefined(imputeGermline) {
+		imputeGermline = false
+	}
 	splitByC := false
-	productiveFeature := assemblingFeature
-	coreGeneFeatures := {
-		V: "{FR1Begin:FR3End}",
-		J: "FR4"
+	parsedFeature := parseAssemblingFeature(assemblingFeature)
+	imputedFeaturesMap := {}
+	for f in parsedFeature.imputed {
+		imputedFeaturesMap[f] = true
+	}
+	formatAssemblingFeature := func(fstr) {
+		if fstr == "VDJRegion" || fstr == "CDR3" {
+			return fstr
+		}
+		parts := text.split(fstr, ":")
+		if len(parts) == 1 {
+			return "{" + parts[0] + "Begin:" + parts[0] + "End}"
+		}
+		return "{" + parts[0] + "Begin:" + parts[1] + "End}"
+	}
+	productiveFeature := formatAssemblingFeature(assemblingFeature)
+	outputProductiveFeature := productiveFeature
+	if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
+		parts := text.split(assemblingFeature, ":")
+		if len(parts) == 2 && parts[1] == "FR4" {
+			outputProductiveFeature = parts[0] + "_TO_FR4"
+		}
 	}
+	coreGeneFeatures := parsedFeature.coreGeneFeatures
 	anchorFeature := assemblingFeature
-	features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
+	features := parsedFeature.nonImputed
+	if imputeGermline {
+		features = features + parsedFeature.imputed
+	}
 	clonotypeKeyColumns := []
 	clonotypeKeyArgs := []
@@ -84,18 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 			[ "-jGene" ]
 		]
 	} else {
-		clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
-		clonotypeKeyArgs = [
-			[ "-nFeature", "VDJRegion" ],
-			[ "-vGene" ],
-			[ "-jGene" ]
-		]
+		vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
+		if vdjIsAssemblingFeature {
+			clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
+			clonotypeKeyArgs = [
+				[ "-nFeature", "VDJRegion" ],
+				[ "-vGene" ],
+				[ "-jGene" ]
+			]
+		} else {
+			keyColName := "nSeq" + outputProductiveFeature
+			clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
+			clonotypeKeyArgs = [
+				[ "-nFeature", productiveFeature ],
+				[ "-vGene" ],
+				[ "-jGene" ]
+			]
+		}
 	}
 	columnsSpecPerSample := []
 	columnsSpecPerClonotypeNoAggregates := []
 	mutationColumns := []
+	needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
+	if needsAssemblingFeatureExport {
+		featureIdL := text.to_lower(formatId(assemblingFeature))
+		keyColName := "nSeq" + outputProductiveFeature
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: keyColName,
+			id: "n-seq-" + featureIdL,
+			naRegex: "region_not_covered",
+			spec: {
+				name: "pl7.app/vdj/sequence",
+				valueType: "String",
+				domain: {
+					"pl7.app/vdj/feature": outputProductiveFeature,
+					"pl7.app/alphabet": "nucleotide"
+				},
+				annotations: a(80100, false, {
+					"pl7.app/vdj/isAssemblingFeature": "true",
+					"pl7.app/vdj/isMainSequence": "false",
+					"pl7.app/vdj/imputed": "false",
+					"pl7.app/table/fontFamily": "monospace",
+					"pl7.app/label": outputProductiveFeature + " nt"
+				})
+			}
+		} ]
+	}
 	clonotypeLabelColumn := {
 		column: "clonotypeLabel",
 		id: "clonotype-label",
@@ -113,6 +277,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 	exportArgs := []
+	if needsAssemblingFeatureExport {
+		exportArgs += [ [ "-nFeature", productiveFeature ] ]
+	}
 	hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
@@ -314,100 +483,113 @@ inFrameFeatures := {
 		"CDR3": "CDR3"
 	}
-	for featureU in features {
-		featureL := text.to_lower(formatId(featureU))
-		for isAminoAcid in [true, false] {
-			featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
-			if is_undefined(featureInFrameU) {
-				featureInFrameU = featureU
-			}
-			featureInFrameL := text.to_lower(formatId(featureInFrameU))
+	for isImputed in [false, true] {
+		featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
+		if len(featuresList) == 0 {
+			continue
+		}
+		if isImputed && !imputeGermline {
+			continue
+		}
-			alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
-			alphabetShort := isAminoAcid ? "aa" : "nt"
-			alphabetShortMixcr := isAminoAcid ? "aa" : "n"
-			columnName := alphabetShortMixcr + "Seq" + featureInFrameU
-			visibility := featureU == "VDJRegion" || featureU == "CDR3"
-			if featureU == "CDR3" {
-				cdr3SeqColumns += [ columnName ]
-			}
-			if isAminoAcid {
-				aminoAcidSeqColumns += [ columnName ]
-				aminoAcidSeqColumnPairs += [ {
-						aa: columnName,
-						nt: "nSeq" + featureU
-					} ]
-			}
-			columnsSpecPerClonotypeNoAggregates += [ {
-					column: columnName,
-					id: alphabetShortMixcr + "-seq-" + featureInFrameL,
-					naRegex: "region_not_covered",
-					spec: {
-						name: "pl7.app/vdj/sequence",
-						valueType: "String",
-						domain: {
-							"pl7.app/vdj/feature": featureInFrameU,
-							"pl7.app/alphabet": alphabet
-						},
-						annotations: a(orderP, visibility, {
-							"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
-							"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
-							"pl7.app/vdj/imputed": "false",
-							"pl7.app/table/fontFamily": "monospace",
-							"pl7.app/label": featureInFrameU + " " + alphabetShort
-						})
-					}
-				} ]
-			exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
-			orderP -= 100
+		imputedU := isImputed ? "Imputed" : ""
+		imputedL := text.to_lower(imputedU)
-			if featureU == assemblingFeature {
-				for annotationType in annotationTypes {
-					columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
-					columnsSpecPerClonotypeNoAggregates += [ {
+		for featureU in featuresList {
+			featureL := text.to_lower(formatId(featureU))
+			for isAminoAcid in [true, false] {
+				featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
+				if is_undefined(featureInFrameU) {
+					featureInFrameU = featureU
+				}
+				featureInFrameL := text.to_lower(formatId(featureInFrameU))
+				alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
+				alphabetShort := isAminoAcid ? "aa" : "nt"
+				alphabetShortMixcr := isAminoAcid ? "aa" : "n"
+				columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
+				visibility := featureU == "VDJRegion" || featureU == "CDR3"
+				if featureU == "CDR3" {
+					cdr3SeqColumns += [ columnName ]
+				}
+				if isAminoAcid {
+					aminoAcidSeqColumns += [ columnName ]
+					aminoAcidSeqColumnPairs += [ {
+							aa: columnName,
+							nt: "nSeq" + imputedU + featureU
+						} ]
+				}
+				columnsSpecPerClonotypeNoAggregates += [ {
 						column: columnName,
-						id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
+						id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
 						naRegex: "region_not_covered",
 						spec: {
-							name: "pl7.app/vdj/sequence/annotation",
+							name: "pl7.app/vdj/sequence",
 							valueType: "String",
 							domain: {
 								"pl7.app/vdj/feature": featureInFrameU,
-								"pl7.app/alphabet": alphabet,
-								"pl7.app/sequence/annotation/type": annotationType
+								"pl7.app/alphabet": alphabet
 							},
-							annotations: a(orderP, undefined, {
-								"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
-								"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
-								"pl7.app/sequence/isAnnotation": "true"
+							annotations: a(orderP, visibility, {
+								"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
+								"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
+								"pl7.app/vdj/imputed": string(isImputed),
+								"pl7.app/table/fontFamily": "monospace",
+								"pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
 							})
 						}
 					} ]
-					exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
-					orderP -= 100
-				}
-			}
+				exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
+				orderP -= 100
-			if featureU == "CDR3" {
-				columnsSpecPerClonotypeNoAggregates += [ {
-					column: alphabetShortMixcr + "Length" + featureU,
-					id: alphabetShortMixcr + "-length-" + featureL,
-					naRegex: "region_not_covered",
-					spec: {
-						name: "pl7.app/vdj/sequenceLength",
-						valueType: "Int",
-						domain: {
-							"pl7.app/vdj/feature": featureU,
-							"pl7.app/alphabet": alphabet
-						},
-						annotations: a(orderP, false, {
-							"pl7.app/label": "Length of " + featureU + " " + alphabetShort
-						})
+				if !isImputed && featureU == assemblingFeature {
+					for annotationType in annotationTypes {
+						columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
+						columnsSpecPerClonotypeNoAggregates += [ {
+							column: columnName,
+							id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
+							naRegex: "region_not_covered",
+							spec: {
+								name: "pl7.app/vdj/sequence/annotation",
+								valueType: "String",
+								domain: {
+									"pl7.app/vdj/feature": featureInFrameU,
+									"pl7.app/alphabet": alphabet,
+									"pl7.app/sequence/annotation/type": annotationType
+								},
+								annotations: a(orderP, undefined, {
+									"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
+									"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
+									"pl7.app/sequence/isAnnotation": "true"
+								})
+							}
+						} ]
+						exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
+						orderP -= 100
 					}
-				} ]
-				exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
+				}
+				if !isImputed && featureU == "CDR3" {
+					columnsSpecPerClonotypeNoAggregates += [ {
+						column: alphabetShortMixcr + "Length" + featureU,
+						id: alphabetShortMixcr + "-length-" + featureL,
+						naRegex: "region_not_covered",
+						spec: {
+							name: "pl7.app/vdj/sequenceLength",
+							valueType: "Int",
+							domain: {
+								"pl7.app/vdj/feature": featureU,
+								"pl7.app/alphabet": alphabet
+							},
+							annotations: a(orderP, false, {
+								"pl7.app/label": "Length of " + featureU + " " + alphabetShort
+							})
+						}
+					} ]
+					exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
+				}
 			}
 		}
 	}
@@ -510,7 +692,7 @@ inFrameFeatures := {
 	}
-	if assemblingFeature == "VDJRegion" {
+	if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
 		orderP = 9500
@@ -793,11 +975,11 @@ inFrameFeatures := {
 			visibility: false
 		}
 	]
-	mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
+	mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
 	mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
 	for variant in flagColumnVariants {
 		columnsSpecPerClonotypeNoAggregates += [ {
-				column: variant.columnPrefix + productiveFeature,
+				column: variant.columnPrefix + outputProductiveFeature,
 				id: variant.id,
 				allowNA: false,
 				spec: {

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/repseqio-library.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
-  "version": "1.19.7",
+  "version": "1.19.9",
   "description": "MiXCR Amplicon Alignment Workflow",
   "type": "module",
   "dependencies": {

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
  *     - V: formatted identifier for the V gene core feature
  *     - J: formatted identifier for the J gene core feature (if applicable)
  */
-assemblingFeature := "VDJRegion"
-productiveFeature := "VDJRegion"
-coreVFeature := "{FR1Begin:FR3End}"
-coreJFeature := "FR4"
-splitByC := false
 // sometimes we need to format assembling feature to be used in column ids
 formatId := func(input) {
@@ -57,22 +52,142 @@ addSpec := func(columns, additionalSpec) {
 	})
 }
+parseAssemblingFeature := func(assemblingFeature) {
+	if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
+		return {
+			imputed: [],
+			nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
+			coreGeneFeatures: {
+				V: "{FR1Begin:FR3End}",
+				J: "FR4"
+			}
+		}
+	}
+	be := text.split(assemblingFeature, ":")
+	if len(be) != 2 {
+		ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
+	}
+	begin := be[0]
+	end := be[1]
+	features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
+	iBegin := -1
+	iEnd := -1
+	for i, f in features {
+		if f == begin {
+			iBegin = i
+		}
+		if f == end {
+			iEnd = i
+		}
+	}
+	if iBegin == -1 || iEnd == -1 {
+		ll.panic("begin or end not found in features: " + assemblingFeature)
+	}
+	if iBegin > iEnd {
+		ll.panic("begin is after end: " + assemblingFeature)
+	}
+	imputed := []
+	nonImputed := []
+	for i := 0; i < iBegin; i++ {
+		imputed = append(imputed, features[i])
+	}
+	for i := iEnd + 1; i < len(features); i++ {
+		imputed = append(imputed, features[i])
+	}
+	for i := iBegin; i <= iEnd; i++ {
+		nonImputed = append(nonImputed, features[i])
+	}
+	coreVFeature := undefined
+	coreJFeature := undefined
+	if begin != "CDR3" {
+		coreVFeature = "{"+begin+"Begin:FR3End}"
+	}
+	if end == "FR4" {
+		coreJFeature = "FR4"
+	}
+	if begin == "FR1" && end == "FR4" {
+		nonImputed = append(nonImputed, "VDJRegion")
+	} else {
+		imputed = append(imputed, "VDJRegion")
+	}
+	return {
+		imputed: imputed,
+		nonImputed: nonImputed,
+		coreGeneFeatures: {
+			V: coreVFeature,
+			J: coreJFeature
+		}
+	}
+}
 calculateExportSpecs := func(presetSpecForBack, blockId) {
-	assemblingFeature = presetSpecForBack.assemblingFeature
+	assemblingFeature := presetSpecForBack.assemblingFeature
+	imputeGermline := presetSpecForBack.imputeGermline
+	if is_undefined(imputeGermline) {
+		imputeGermline = false
+	}
 	splitByC := false
-	productiveFeature := assemblingFeature
-	coreGeneFeatures := {
-		V: "{FR1Begin:FR3End}",
-		J: "FR4"
+	parsedFeature := parseAssemblingFeature(assemblingFeature)
+	imputedFeaturesMap := {}
+	for f in parsedFeature.imputed {
+		imputedFeaturesMap[f] = true
 	}
+	formatAssemblingFeature := func(fstr) {
+		if fstr == "VDJRegion" || fstr == "CDR3" {
+			return fstr
+		}
+		parts := text.split(fstr, ":")
+		if len(parts) == 1 {
+			return "{" + parts[0] + "Begin:" + parts[0] + "End}"
+		}
+		return "{" + parts[0] + "Begin:" + parts[1] + "End}"
+	}
+	productiveFeature := formatAssemblingFeature(assemblingFeature)
+	// MiXCR column naming for range features:
+	// - Ranges ending at FR4 have named aliases: CDR1_TO_FR4, FR2_TO_FR4, CDR2_TO_FR4, FR3_TO_FR4
+	//   (defined in repseqio GeneFeature.java)
+	// - All other ranges use {XBegin:YEnd} format (e.g. {CDR1Begin:CDR3End})
+	// - Simple features (CDR3, VDJRegion) use their name directly
+	outputProductiveFeature := productiveFeature
+	if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
+		parts := text.split(assemblingFeature, ":")
+		if len(parts) == 2 && parts[1] == "FR4" {
+			// MiXCR has a named alias for this range
+			outputProductiveFeature = parts[0] + "_TO_FR4"
+		}
+	}
+	coreGeneFeatures := parsedFeature.coreGeneFeatures
 	// column with nucleotide sequence of this feature will be marked as anchor
 	anchorFeature := assemblingFeature
-	features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
+	features := parsedFeature.nonImputed
+	if imputeGermline {
+		features = features + parsedFeature.imputed
+	}
 	clonotypeKeyColumns := []
 	clonotypeKeyArgs := []
@@ -84,18 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 			[ "-jGene" ]
 		]
 	} else {
-		clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
-		clonotypeKeyArgs = [
-			[ "-nFeature", "VDJRegion" ],
-			[ "-vGene" ],
-			[ "-jGene" ]
-		]
+		// VDJRegion is the assembling feature itself only when it's NOT in the imputed list
+		// (e.g. VDJRegion or FR1:FR4 as the assembling feature)
+		vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
+		if vdjIsAssemblingFeature {
+			// VDJRegion IS the assembling feature, use it directly as the key
+			clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
+			clonotypeKeyArgs = [
+				[ "-nFeature", "VDJRegion" ],
+				[ "-vGene" ],
+				[ "-jGene" ]
+			]
+		} else {
+			// Range feature where VDJRegion is NOT the assembling feature (e.g. CDR1:CDR3, FR2:FR4)
+			// Always use the assembling feature itself as the key, even with imputation enabled.
+			// Imputed VDJRegion is NOT guaranteed unique per clone (two clones with different
+			// assembling feature sequences can produce the same imputed VDJRegion).
+			// The assembling feature sequence IS unique by definition (it defines the clone).
+			keyColName := "nSeq" + outputProductiveFeature
+			clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
+			clonotypeKeyArgs = [
+				[ "-nFeature", productiveFeature ],
+				[ "-vGene" ],
+				[ "-jGene" ]
+			]
+		}
 	}
 	columnsSpecPerSample := []
 	columnsSpecPerClonotypeNoAggregates := []
 	mutationColumns := []
+	// For range features where VDJRegion is not the assembling feature, we need to export
+	// the combined assembling feature sequence column explicitly (individual features are
+	// exported in the loop below, but the combined feature like {CDR1Begin:CDR3End} is not)
+	needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
+	if needsAssemblingFeatureExport {
+		featureIdL := text.to_lower(formatId(assemblingFeature))
+		keyColName := "nSeq" + outputProductiveFeature
+		columnsSpecPerClonotypeNoAggregates += [ {
+			column: keyColName,
+			id: "n-seq-" + featureIdL,
+			naRegex: "region_not_covered",
+			spec: {
+				name: "pl7.app/vdj/sequence",
+				valueType: "String",
+				domain: {
+					"pl7.app/vdj/feature": outputProductiveFeature,
+					"pl7.app/alphabet": "nucleotide"
+				},
+				annotations: a(80100, false, {
+					"pl7.app/vdj/isAssemblingFeature": "true",
+					"pl7.app/vdj/isMainSequence": "false",
+					"pl7.app/vdj/imputed": "false",
+					"pl7.app/table/fontFamily": "monospace",
+					"pl7.app/label": outputProductiveFeature + " nt"
+				})
+			}
+		} ]
+	}
 	clonotypeLabelColumn := {
 		column: "clonotypeLabel",
 		id: "clonotype-label",
@@ -112,6 +276,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 	// array of array of arg groups
 	exportArgs := []
+	// Add the assembling feature export arg if needed (column spec was added above)
+	if needsAssemblingFeatureExport {
+		exportArgs += [ [ "-nFeature", productiveFeature ] ]
+	}
 	// Abundance - reads by default; switch to UMI columns if umiTags are present
 	hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
@@ -314,100 +483,113 @@ inFrameFeatures := {
 		"CDR3": "CDR3"
 	}
-	for featureU in features {
-		featureL := text.to_lower(formatId(featureU))
-		for isAminoAcid in [true, false] {
-			featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
-			if is_undefined(featureInFrameU) {
-				featureInFrameU = featureU
-			}
-			featureInFrameL := text.to_lower(formatId(featureInFrameU))
+	for isImputed in [false, true] {
+		featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
+		if len(featuresList) == 0 {
+			continue
+		}
+		if isImputed && !imputeGermline {
+			continue
+		}
-			alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
-			alphabetShort := isAminoAcid ? "aa" : "nt"
-			alphabetShortMixcr := isAminoAcid ? "aa" : "n"
-			columnName := alphabetShortMixcr + "Seq" + featureInFrameU
-			visibility := featureU == "VDJRegion" || featureU == "CDR3"
-			if featureU == "CDR3" {
-				cdr3SeqColumns += [ columnName ]
-			}
-			if isAminoAcid {
-				aminoAcidSeqColumns += [ columnName ]
-				aminoAcidSeqColumnPairs += [ {
-						aa: columnName,
-						nt: "nSeq" + featureU
-					} ]
-			}
-			columnsSpecPerClonotypeNoAggregates += [ {
-					column: columnName,
-					id: alphabetShortMixcr + "-seq-" + featureInFrameL,
-					naRegex: "region_not_covered",
-					spec: {
-						name: "pl7.app/vdj/sequence",
-						valueType: "String",
-						domain: {
-							"pl7.app/vdj/feature": featureInFrameU,
-							"pl7.app/alphabet": alphabet
-						},
-						annotations: a(orderP, visibility, {
-							"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
-							"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
-							"pl7.app/vdj/imputed": "false",
-							"pl7.app/table/fontFamily": "monospace",
-							"pl7.app/label": featureInFrameU + " " + alphabetShort
-						})
-					}
-				} ]
-			exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
-			orderP -= 100
-			// Adding sequence annotation columns for assembling feature
-			if featureU == assemblingFeature {
-				for annotationType in annotationTypes {
-					columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
-					columnsSpecPerClonotypeNoAggregates += [ {
+		imputedU := isImputed ? "Imputed" : ""
+		imputedL := text.to_lower(imputedU)
+		for featureU in featuresList {
+			featureL := text.to_lower(formatId(featureU))
+			for isAminoAcid in [true, false] {
+				featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
+				if is_undefined(featureInFrameU) {
+					featureInFrameU = featureU
+				}
+				featureInFrameL := text.to_lower(formatId(featureInFrameU))
+				alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
+				alphabetShort := isAminoAcid ? "aa" : "nt"
+				alphabetShortMixcr := isAminoAcid ? "aa" : "n"
+				columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
+				visibility := featureU == "VDJRegion" || featureU == "CDR3"
+				if featureU == "CDR3" {
+					cdr3SeqColumns += [ columnName ]
+				}
+				if isAminoAcid {
+					aminoAcidSeqColumns += [ columnName ]
+					aminoAcidSeqColumnPairs += [ {
+							aa: columnName,
+							nt: "nSeq" + imputedU + featureU
+						} ]
+				}
+				columnsSpecPerClonotypeNoAggregates += [ {
 						column: columnName,
-						id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
+						id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
 						naRegex: "region_not_covered",
 						spec: {
-							name: "pl7.app/vdj/sequence/annotation",
+							name: "pl7.app/vdj/sequence",
 							valueType: "String",
 							domain: {
 								"pl7.app/vdj/feature": featureInFrameU,
-								"pl7.app/alphabet": alphabet,
-								"pl7.app/sequence/annotation/type": annotationType
+								"pl7.app/alphabet": alphabet
 							},
-							annotations: a(orderP, undefined, {
-								"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
-								"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
-								"pl7.app/sequence/isAnnotation": "true"
+							annotations: a(orderP, visibility, {
+								"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
+								"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
+								"pl7.app/vdj/imputed": string(isImputed),
+								"pl7.app/table/fontFamily": "monospace",
+								"pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
 							})
 						}
 					} ]
-					exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
-					orderP -= 100
-				}
-			}
-			// For now calculate length only for CDR3 to keep the number of columns manageable
-			if featureU == "CDR3" {
-				columnsSpecPerClonotypeNoAggregates += [ {
-					column: alphabetShortMixcr + "Length" + featureU,
-					id: alphabetShortMixcr + "-length-" + featureL,
-					naRegex: "region_not_covered",
-					spec: {
-						name: "pl7.app/vdj/sequenceLength",
-						valueType: "Int",
-						domain: {
-							"pl7.app/vdj/feature": featureU,
-							"pl7.app/alphabet": alphabet
-						},
-						annotations: a(orderP, false, {
-							"pl7.app/label": "Length of " + featureU + " " + alphabetShort
-						})
+				exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
+				orderP -= 100
+				// Adding sequence annotation columns for assembling feature
+				if !isImputed && featureU == assemblingFeature {
+					for annotationType in annotationTypes {
+						columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
+						columnsSpecPerClonotypeNoAggregates += [ {
+							column: columnName,
+							id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
+							naRegex: "region_not_covered",
+							spec: {
+								name: "pl7.app/vdj/sequence/annotation",
+								valueType: "String",
+								domain: {
+									"pl7.app/vdj/feature": featureInFrameU,
+									"pl7.app/alphabet": alphabet,
+									"pl7.app/sequence/annotation/type": annotationType
+								},
+								annotations: a(orderP, undefined, {
+									"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
+									"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
+									"pl7.app/sequence/isAnnotation": "true"
+								})
+							}
+						} ]
+						exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
+						orderP -= 100
 					}
-				} ]
-				exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
+				}
+				// For now calculate length only for CDR3 to keep the number of columns manageable
+				if !isImputed && featureU == "CDR3" {
+					columnsSpecPerClonotypeNoAggregates += [ {
+						column: alphabetShortMixcr + "Length" + featureU,
+						id: alphabetShortMixcr + "-length-" + featureL,
+						naRegex: "region_not_covered",
+						spec: {
+							name: "pl7.app/vdj/sequenceLength",
+							valueType: "Int",
+							domain: {
+								"pl7.app/vdj/feature": featureU,
+								"pl7.app/alphabet": alphabet
+							},
+							annotations: a(orderP, false, {
+								"pl7.app/label": "Length of " + featureU + " " + alphabetShort
+							})
+						}
+					} ]
+					exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
+				}
 			}
 		}
 	}
@@ -509,8 +691,8 @@ inFrameFeatures := {
 		}
 	}
-	// All nucleotide mutations count for each feature (only for VDJRegion assembling feature)
-	if assemblingFeature == "VDJRegion" {
+	// All nucleotide mutations count for each feature (only for VDJRegion or FR1:FR4 assembling feature)
+	if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
 		orderP = 9500
 		// MixCR -allNMutationsCount exports columns for all features between specified boundaries.
@@ -793,11 +975,11 @@ inFrameFeatures := {
 			visibility: false
 		}
 	]
-	mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
+	mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
 	mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
 	for variant in flagColumnVariants {
 		columnsSpecPerClonotypeNoAggregates += [ {
-				column: variant.columnPrefix + productiveFeature,
+				column: variant.columnPrefix + outputProductiveFeature,
 				id: variant.id,
 				allowNA: false,
 				spec: {

package/src/main.tpl.tengo CHANGED Viewed

@@ -69,6 +69,7 @@ wf.body(func(args) {
 			cloneClusteringMode: cloneClusteringMode,
 			tagPattern: args.tagPattern,
 			assemblingFeature: args.assemblingFeature,
+			imputeGermline: args.imputeGermline,
 			badQualityThreshold: args.badQualityThreshold,
 			stopCodonTypes: args.stopCodonTypes,
 			stopCodonReplacements: args.stopCodonReplacements

package/src/mixcr-analyze.tpl.tengo CHANGED Viewed

@@ -11,6 +11,7 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
 pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
 times := import("times")
 maps := import("@platforma-sdk/workflow-tengo:maps")
+text := import("text")
 json := import("json")
@@ -69,8 +70,19 @@ self.body(func(inputs) {
 		mixcrCmdBuilder.arg("generic-amplicon")
 	}
+	formatAssemblingFeature := func(fstr) {
+		if fstr == "VDJRegion" || fstr == "CDR3" {
+			return fstr
+		}
+		parts := text.split(fstr, ":")
+		if len(parts) == 1 {
+			return "{" + parts[0] + "Begin:" + parts[0] + "End}"
+		}
+		return "{" + parts[0] + "Begin:" + parts[1] + "End}"
+	}
     mixcrCmdBuilder.
-        arg("--assemble-clonotypes-by").arg(params.assemblingFeature).
+        arg("--assemble-clonotypes-by").arg(formatAssemblingFeature(params.assemblingFeature)).
         arg("--species").arg("custom").
 		arg("--library").arg("library.json").
 		addFile("library.json", params.referenceLibrary).

package/src/process.tpl.tengo CHANGED Viewed

@@ -90,6 +90,7 @@ self.body(func(inputs) {
 	// Use calculateExportSpecs for output columns
 	presetSpecForBack := {
 		assemblingFeature: params.assemblingFeature,
+		imputeGermline: params.imputeGermline,
 		splitByC: true,
 		umiTags: hasUMI ? umiTags : undefined,
 		cellTags: []
@@ -221,6 +222,7 @@ self.body(func(inputs) {
 					hasUMI: hasUMI,
 					tagPattern: tagPattern,
 					assemblingFeature: params.assemblingFeature,
+					imputeGermline: params.imputeGermline,
 					badQualityThreshold: params.badQualityThreshold
 				}, { removeUndefs: true }),
 				limitInput: limitInput