npm - @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow - Versions diffs - 1.17.0 → 1.18.0 - Mend

@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +11 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +17 -0
package/dist/tengo/lib/qc-report-columns.lib.tengo +32 -0
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
package/package.json +4 -4
package/src/calculate-export-specs.lib.tengo +17 -0
package/src/export-report.tpl.tengo +144 -0
package/src/main.tpl.tengo +3 -1
package/src/mixcr-export.tpl.tengo +169 -2
package/src/process.tpl.tengo +17 -5
package/src/qc-report-columns.lib.tengo +32 -0

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.17.0 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
+> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.18.0 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
 Processing "src/aggregate-by-clonotype-key.tpl.tengo"...

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
+## 1.18.0
+### Minor Changes
+- 656f2fe: stop codon replacement and dep updates
+### Patch Changes
+- Updated dependencies [656f2fe]
+  - @platforma-open/milaboratories.mixcr-amplicon-alignment.software@1.1.0
 ## 1.17.0
 ### Minor Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -298,6 +298,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 	columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
 	orderP := 80000
+	aminoAcidSeqColumns := []
+	aminoAcidSeqColumnPairs := []
+	cdr3SeqColumns := []
@@ -325,6 +328,16 @@ inFrameFeatures := {
 			alphabetShortMixcr := isAminoAcid ? "aa" : "n"
 			columnName := alphabetShortMixcr + "Seq" + featureInFrameU
 			visibility := featureU == "VDJRegion" || featureU == "CDR3"
+			if featureU == "CDR3" {
+				cdr3SeqColumns += [ columnName ]
+			}
+			if isAminoAcid {
+				aminoAcidSeqColumns += [ columnName ]
+				aminoAcidSeqColumnPairs += [ {
+						aa: columnName,
+						nt: "nSeq" + featureU
+					} ]
+			}
 			columnsSpecPerClonotypeNoAggregates += [ {
 					column: columnName,
 					id: alphabetShortMixcr + "-seq-" + featureInFrameL,
@@ -807,6 +820,7 @@ inFrameFeatures := {
 	} ]
 	return {
+		productiveFeature: productiveFeature,
 		clonotypeKeyColumns: clonotypeKeyColumns,
 		clonotypeKeyArgs: clonotypeKeyArgs,
@@ -816,6 +830,9 @@ inFrameFeatures := {
 		columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
 		columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
 		cdr3DistanceColumnsSpec: cdr3DistanceColumnsSpec,
+		aminoAcidSeqColumns: aminoAcidSeqColumns,
+		aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+		cdr3SeqColumns: cdr3SeqColumns,
 		columnsSpec: columnsSpec,

package/dist/tengo/lib/qc-report-columns.lib.tengo CHANGED Viewed

@@ -571,6 +571,38 @@ getQcReportColumns := func(hasUmi, sampleIdAxisSpec, chains, umiTags) {
             }
         }
     },
+    {
+        column: "assemble.clonotypesDroppedByStopCodons",
+        id: "assemble-clonotypes-dropped-by-stop-codons",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/assemble/clonotypesDroppedByStopCodons",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "108200",
+                "pl7.app/table/visibility": "optional",
+                "pl7.app/label": "Clonotypes Dropped - Stop Codons"
+            }
+        }
+    },
+    {
+        column: "assemble.clonotypesDroppedByOutOfFrame",
+        id: "assemble-clonotypes-dropped-by-out-of-frame",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/assemble/clonotypesDroppedByOutOfFrame",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "108100",
+                "pl7.app/table/visibility": "optional",
+                "pl7.app/label": "Clonotypes Dropped - Out of Frame"
+            }
+        }
+    },
     {
         column: "totalClonotypes",
         id: "total-clonotypes",

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/repseqio-library.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
-  "version": "1.17.0",
+  "version": "1.18.0",
   "description": "MiXCR Amplicon Alignment Workflow",
   "type": "module",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "5.8.0",
+    "@platforma-sdk/workflow-tengo": "5.8.1",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-279-develop",
     "@platforma-open/milaboratories.software-repseqio": "^2.5.0-13-master",
-    "@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.0.0"
+    "@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.1.0"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "2.4.11"
+    "@platforma-sdk/tengo-builder": "2.4.12"
   },
   "scripts": {
     "build": "rm -rf dist && pl-tengo check && pl-tengo build",

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -298,6 +298,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
 	columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
 	orderP := 80000
+	aminoAcidSeqColumns := []
+	aminoAcidSeqColumnPairs := []
+	cdr3SeqColumns := []
 	// Sequences
@@ -325,6 +328,16 @@ inFrameFeatures := {
 			alphabetShortMixcr := isAminoAcid ? "aa" : "n"
 			columnName := alphabetShortMixcr + "Seq" + featureInFrameU
 			visibility := featureU == "VDJRegion" || featureU == "CDR3"
+			if featureU == "CDR3" {
+				cdr3SeqColumns += [ columnName ]
+			}
+			if isAminoAcid {
+				aminoAcidSeqColumns += [ columnName ]
+				aminoAcidSeqColumnPairs += [ {
+						aa: columnName,
+						nt: "nSeq" + featureU
+					} ]
+			}
 			columnsSpecPerClonotypeNoAggregates += [ {
 					column: columnName,
 					id: alphabetShortMixcr + "-seq-" + featureInFrameL,
@@ -807,6 +820,7 @@ inFrameFeatures := {
 	} ]
 	return {
+		productiveFeature: productiveFeature,
 		clonotypeKeyColumns: clonotypeKeyColumns,
 		clonotypeKeyArgs: clonotypeKeyArgs,
@@ -816,6 +830,9 @@ inFrameFeatures := {
 		columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
 		columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
 		cdr3DistanceColumnsSpec: cdr3DistanceColumnsSpec,
+		aminoAcidSeqColumns: aminoAcidSeqColumns,
+		aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+		cdr3SeqColumns: cdr3SeqColumns,
 		columnsSpec: columnsSpec,

package/src/export-report.tpl.tengo CHANGED Viewed

@@ -32,6 +32,75 @@ self.body(func(inputs) {
 	umiTags := inputs.umiTags
 	hasUmi := !is_undefined(umiTags) && len(umiTags) > 0
+	stopCodonTypes := inputs.stopCodonTypes
+	stopCodonReplacements := inputs.stopCodonReplacements
+	if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) {
+		stopCodonTypes = []
+	}
+	useStopCodonReplacement := !is_undefined(stopCodonTypes) && is_array(stopCodonTypes) && len(stopCodonTypes) > 0
+	if is_undefined(stopCodonReplacements) || !is_map(stopCodonReplacements) {
+		stopCodonReplacements = {}
+	}
+	featureForFlags := inputs.productiveFeature
+	if is_undefined(featureForFlags) || featureForFlags == "" {
+		featureForFlags = "CDR3"
+	}
+	if is_array(featureForFlags) && len(featureForFlags) > 0 {
+		featureForFlags = featureForFlags[0]
+	}
+	isOOFColumn := "isOOF" + featureForFlags
+	hasStopsColumn := "hasStopsIn" + featureForFlags
+	contains := func(arr, value) {
+		for v in arr {
+			if v == value { return true }
+		}
+		return false
+	}
+	stopReplacement := func(stopType) {
+		if !contains(stopCodonTypes, stopType) {
+			return "*"
+		}
+		aa := stopCodonReplacements[stopType]
+		if is_undefined(aa) || aa == "" {
+			return "*"
+		}
+		return text.to_upper(aa)
+	}
+	codonMapReplace := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
+		"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
+		"TGA": stopReplacement("opal"), "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	translateNtToAaExpr := func(ntExpr, codonMap) {
+		seq := ntExpr.fillNull("").strToUpper()
+		seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
+		for codon, aa in codonMap {
+			seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
+		}
+		seq = seq.strReplace("\\|$", "", { replaceAll: false })
+		seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
+		seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
+		return seq
+	}
     chainInfos := {
 	"IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
@@ -124,6 +193,77 @@ self.body(func(inputs) {
     // Join counts and overwrite totalClonotypes to reflect exported (productive) clones
     joinedDf := processedDf.join(aggregatedCounts, { how: "left", on: ["sampleId"] })
+	// Count clonotypes filtered by stop codons and out-of-frame per sample
+	filterCountDfs := []
+	mixcrChainsArg := text.join(chainsForMixcr, ",")
+	for key, clnsFile in clnsFiles {
+		sampleId := json.decode(key)[0]
+		exportFiltersCmd := exec.builder().
+			inMediumQueue().
+			mem("16GiB").
+			cpu(2).
+			software(mixcrSw).
+			env("MI_USE_SYSTEM_CA", "true").
+			secret("MI_LICENSE", "MI_LICENSE").
+			arg("exportClones").
+			arg("--dont-split-files").
+			arg("--drop-default-fields").
+			arg("--reset-export-clone-table-splitting")
+		if mixcrChainsArg != "" {
+			exportFiltersCmd.arg("--chains").arg(mixcrChainsArg)
+		}
+		exportFiltersCmd = exportFiltersCmd.
+			arg("-isOOF").arg(featureForFlags).
+			arg("-hasStops").arg(featureForFlags)
+		if useStopCodonReplacement {
+			exportFiltersCmd = exportFiltersCmd.arg("-nFeature").arg(featureForFlags)
+		}
+		exportFiltersCmd = exportFiltersCmd.
+			arg("clones.clns").
+			addFile("clones.clns", clnsFile).
+			arg("clones.tsv").
+			saveFile("clones.tsv")
+		if library {
+			if isLibraryFileGzipped {
+				exportFiltersCmd.addFile("library.json.gz", library)
+			} else {
+				exportFiltersCmd.addFile("library.json", library)
+			}
+		}
+		exportFiltersResult := exportFiltersCmd.cacheHours(3).run()
+		filterTsv := exportFiltersResult.getFile("clones.tsv")
+		schema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
+		if useStopCodonReplacement {
+			schema = append(schema, { column: "nSeq" + featureForFlags, type: "String" })
+		}
+		dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: schema })
+		stopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
+		if useStopCodonReplacement {
+			translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
+			stopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
+		}
+		dfFilters = dfFilters.withColumns(
+			pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
+			stopExpr.alias("__stop")
+		)
+		dfFilterCount := dfFilters.select(
+			pt.lit(sampleId).alias("sampleId"),
+			pt.col("__oof").sum().alias("assemble.clonotypesDroppedByOutOfFrame"),
+			pt.col("__stop").sum().alias("assemble.clonotypesDroppedByStopCodons")
+		)
+		filterCountDfs = append(filterCountDfs, dfFilterCount)
+	}
+	if len(filterCountDfs) > 0 {
+		filterCountsDf := len(filterCountDfs) > 1 ? pt.concat(filterCountDfs) : filterCountDfs[0]
+		joinedDf = joinedDf.join(filterCountsDf, { how: "left", on: ["sampleId"] })
+	} else {
+		joinedDf = joinedDf.withColumns(
+			pt.lit(0).alias("assemble.clonotypesDroppedByOutOfFrame"),
+			pt.lit(0).alias("assemble.clonotypesDroppedByStopCodons")
+		)
+	}
     // Per-chain clonotype counts
     perChainJoined := joinedDf
     for chain in chains {
@@ -159,6 +299,10 @@ self.body(func(inputs) {
         pt.col("exportedClonotypes").fillNull(0).cast("Long").alias("totalClonotypes"),
         pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
     )
+	finalDf = finalDf.withColumns(
+		pt.col("assemble.clonotypesDroppedByOutOfFrame").fillNull(0).cast("Long").alias("assemble.clonotypesDroppedByOutOfFrame"),
+		pt.col("assemble.clonotypesDroppedByStopCodons").fillNull(0).cast("Long").alias("assemble.clonotypesDroppedByStopCodons")
+	)
     for chain in chains {
         col := "clonotypesByChain." + chain
         finalDf = finalDf.withColumns(pt.col(col).fillNull(0).cast("Long").alias(col))

package/src/main.tpl.tengo CHANGED Viewed

@@ -68,7 +68,9 @@ wf.body(func(args) {
 			mixcrChains: chainInfos[chains].mixcrFilter,
 			cloneClusteringMode: cloneClusteringMode,
 			tagPattern: args.tagPattern,
-			assemblingFeature: args.assemblingFeature
+			assemblingFeature: args.assemblingFeature,
+			stopCodonTypes: args.stopCodonTypes,
+			stopCodonReplacements: args.stopCodonReplacements
 		})
 	})

package/src/mixcr-export.tpl.tengo CHANGED Viewed

@@ -9,11 +9,157 @@ pt := import("@platforma-sdk/workflow-tengo:pt")
 clonotypeLabel := import(":clonotype-label")
 json := import("json")
+text := import("text")
 mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
 self.defineOutputs("tsv")
+applyStopCodonReplacementsPt := func(df, opts) {
+	if is_undefined(opts) {
+		return df
+	}
+	aminoAcidSeqColumns := opts.aminoAcidSeqColumns
+	cdr3SeqColumns := opts.cdr3SeqColumns
+	stopCodonTypes := opts.stopCodonTypes
+	stopCodonReplacements := opts.stopCodonReplacements
+	if is_undefined(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
+		return df
+	}
+	if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
+		return df
+	}
+	if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
+		stopCodonReplacements = undefined
+	}
+	contains := func(arr, value) {
+		for v in arr {
+			if v == value { return true }
+		}
+		return false
+	}
+	stopReplacement := func(stopType) {
+		if !contains(stopCodonTypes, stopType) {
+			return "*"
+		}
+		if is_undefined(stopCodonReplacements) {
+			return "*"
+		}
+		aa := stopCodonReplacements[stopType]
+		if is_undefined(aa) || aa == "" {
+			return "*"
+		}
+		return text.to_upper(aa)
+	}
+	codonMapBase := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": "*",
+		"TAG": "*", "TGT": "C", "TGC": "C",
+		"TGA": "*", "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	codonMapReplace := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
+		"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
+		"TGA": stopReplacement("opal"), "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	translateNtToAaExpr := func(ntExpr, codonMap) {
+		seq := ntExpr.fillNull("").strToUpper()
+		seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
+		for codon, aa in codonMap {
+			seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
+		}
+		seq = seq.strReplace("\\|$", "", { replaceAll: false })
+		seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
+		seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
+		return seq
+	}
+	pairs := []
+	for aaCol in aminoAcidSeqColumns {
+		ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
+		pairs = append(pairs, { aa: aaCol, nt: ntCol })
+	}
+	expressions := []
+	replacedAnyExprs := []
+	replacedColsExprs := []
+	for pair in pairs {
+		aaCol := pair.aa
+		ntCol := pair.nt
+		translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
+		translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
+		expressions = append(expressions, translatedReplaced.alias(aaCol))
+		cond := translatedReplaced.neq(translatedBase)
+		replacedAnyExprs = append(replacedAnyExprs, cond)
+		replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
+	}
+	if len(expressions) > 0 {
+		df = df.withColumns(expressions...)
+	}
+	if len(replacedAnyExprs) > 0 {
+		colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
+		colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
+		df = df.withColumns(
+			pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
+			colsList.alias("stopCodonReplacedColumns")
+		)
+	}
+	stopChecks := []
+	for colName in aminoAcidSeqColumns {
+		stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
+	}
+	if len(stopChecks) > 0 {
+		df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
+	}
+	if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
+		regionChecks := []
+		for colName in cdr3SeqColumns {
+			regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
+		}
+		if len(regionChecks) > 0 {
+			df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
+		}
+	}
+	return df
+}
 self.body(func(inputs) {
 	clnsFile := inputs[pConstants.VALUE_FIELD_NAME]
@@ -22,6 +168,13 @@ self.body(func(inputs) {
 	clonotypeKeyColumns := params.clonotypeKeyColumns
 	mainIsProductiveColumn := params.mainIsProductiveColumn
+	aminoAcidSeqColumns := params.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := params.cdr3SeqColumns
+	stopCodonTypes := params.stopCodonTypes
+	stopCodonReplacements := params.stopCodonReplacements
+	useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
 	hashKeyDerivationExpressionPt := func(sourceColumns) {
 		return pt.concatStr(
@@ -42,8 +195,13 @@ self.body(func(inputs) {
 			arg("--dont-split-files").
 			arg("--drop-default-fields").
 			arg("--reset-export-clone-table-splitting").
-			arg("--chains").arg(params.mixcrChains).
-			arg("--export-productive-clones-only")
+			arg("--chains").arg(params.mixcrChains)
+		if useProductiveFilter {
+			mixcrCmdBuilder = mixcrCmdBuilder.arg("--export-productive-clones-only")
+		} else {
+			mixcrCmdBuilder = mixcrCmdBuilder.arg("--filter-out-of-frames")
+		}
 		additionalAction(mixcrCmdBuilder)
@@ -96,6 +254,15 @@ self.body(func(inputs) {
 				alias(mainIsProductiveColumn)
 		)
 	}
+	if !is_undefined(stopCodonTypes) && len(stopCodonTypes) > 0 {
+		dfMain = applyStopCodonReplacementsPt(dfMain, {
+			aminoAcidSeqColumns: aminoAcidSeqColumns,
+			aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+			cdr3SeqColumns: cdr3SeqColumns,
+			stopCodonTypes: stopCodonTypes,
+			stopCodonReplacements: stopCodonReplacements
+		})
+	}
 	dfMain.addColumns(
 		hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
 	)

package/src/process.tpl.tengo CHANGED Viewed

@@ -94,7 +94,11 @@ self.body(func(inputs) {
 	mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
 	mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
 	mainIsProductiveColumn := exportSpecs.mainIsProductiveColumn
+	productiveFeature := exportSpecs.productiveFeature
 	axesByClonotypeKey := exportSpecs.axesByClonotypeKey
+	aminoAcidSeqColumns := exportSpecs.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := exportSpecs.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := exportSpecs.cdr3SeqColumns
 	columnsToSchema := func(columns) {
 		schema := []
@@ -256,13 +260,18 @@ self.body(func(inputs) {
 		exportOutputs,
 		{
 			extra: {
-				params: {
+				params: maps.clone({
 					clonotypeKeyColumns: clonotypeKeyColumns,
 					exportArgs: exportArgs,
 					referenceLibrary: referenceLibrary,
-						mixcrChains: mixcrChains,
-						mainIsProductiveColumn: mainIsProductiveColumn
-				}
+					mixcrChains: mixcrChains,
+					mainIsProductiveColumn: mainIsProductiveColumn,
+					aminoAcidSeqColumns: aminoAcidSeqColumns,
+					aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+					cdr3SeqColumns: cdr3SeqColumns,
+					stopCodonTypes: params.stopCodonTypes,
+					stopCodonReplacements: params.stopCodonReplacements
+				}, { removeUndefs: true })
 			}
 		}
 	)
@@ -346,7 +355,10 @@ self.body(func(inputs) {
 		isLibraryFileGzipped: false,
 		clonotypeTablesData: clonotypeTablesData,
 		hasUmi: hasUMI,
-		umiTags: umiTags
+		umiTags: umiTags,
+		productiveFeature: productiveFeature,
+		stopCodonTypes: params.stopCodonTypes,
+		stopCodonReplacements: params.stopCodonReplacements
 	})
 	return {

package/src/qc-report-columns.lib.tengo CHANGED Viewed

@@ -571,6 +571,38 @@ getQcReportColumns := func(hasUmi, sampleIdAxisSpec, chains, umiTags) {
             }
         }
     },
+    {
+        column: "assemble.clonotypesDroppedByStopCodons",
+        id: "assemble-clonotypes-dropped-by-stop-codons",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/assemble/clonotypesDroppedByStopCodons",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "108200",
+                "pl7.app/table/visibility": "optional",
+                "pl7.app/label": "Clonotypes Dropped - Stop Codons"
+            }
+        }
+    },
+    {
+        column: "assemble.clonotypesDroppedByOutOfFrame",
+        id: "assemble-clonotypes-dropped-by-out-of-frame",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/assemble/clonotypesDroppedByOutOfFrame",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "108100",
+                "pl7.app/table/visibility": "optional",
+                "pl7.app/label": "Clonotypes Dropped - Out of Frame"
+            }
+        }
+    },
     {
         column: "totalClonotypes",
         id: "total-clonotypes",