npm - @platforma-open/milaboratories.mixcr-clonotyping-2.workflow - Versions diffs - 3.21.0 → 3.23.0 - Mend

@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.21.0 → 3.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/.turbo/turbo-build.log +4 -2
package/CHANGELOG.md +12 -0
package/dist/tengo/lib/calculate-export-specs.lib.tengo +18 -0
package/dist/tengo/lib/qc-report-columns.lib.tengo +77 -64
package/dist/tengo/lib/stop-codon-replacement.lib.tengo +179 -0
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/list-presets.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/prerun.plj.gz +0 -0
package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
package/package.json +5 -5
package/src/calculate-export-specs.lib.tengo +18 -0
package/src/export-report.tpl.tengo +91 -7
package/src/main.tpl.tengo +3 -1
package/src/mixcr-export.tpl.tengo +39 -1
package/src/process.tpl.tengo +14 -1
package/src/qc-report-columns.lib.tengo +77 -64
package/src/stop-codon-replacement.lib.tengo +179 -0

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,7 +1,7 @@
  WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.21.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
-> rm -rf dist && pl-tengo check && pl-tengo build
+> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
+> shx rm -rf dist && pl-tengo check && pl-tengo build
   info: Skipping unknown file type: test/columns.test.ts
 Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
@@ -17,6 +17,7 @@ Processing "src/prerun.tpl.tengo"...
 Processing "src/process-single-cell.tpl.tengo"...
 Processing "src/process.tpl.tengo"...
 Processing "src/qc-report-columns.lib.tengo"...
+Processing "src/stop-codon-replacement.lib.tengo"...
 Processing "src/test/columns-calculate.tpl.tengo"...
 Processing "src/test/columns.test.tpl.tengo"...
 No syntax errors found.
@@ -25,6 +26,7 @@ No syntax errors found.
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/calculate-export-specs.lib.tengo
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/clonotype-label.lib.tengo
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/qc-report-columns.lib.tengo
+  info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/stop-codon-replacement.lib.tengo
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/calculate-preset-info.plj.gz
   info:   - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/export-report.plj.gz

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # @platforma-open/milaboratories.mixcr-clonotyping.workflow
+## 3.23.0
+### Minor Changes
+- 3c8ed71: stop codon replacement and dep updates
+## 3.22.0
+### Minor Changes
+- 22562b1: qc table multiple UMI support
 ## 3.21.0
 ### Minor Changes

package/dist/tengo/lib/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 	}
 	orderP := 80000
+	aminoAcidSeqColumns := []
+	aminoAcidSeqColumnPairs := []
+	cdr3SeqColumns := []
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 				alphabetShortMixcr := isAminoAcid ? "aa" : "n"
 				columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
 				visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
+				if featureU == "CDR3" {
+					cdr3SeqColumns += [ columnName ]
+				}
+				if isAminoAcid {
+					aminoAcidSeqColumns += [ columnName ]
+					aminoAcidSeqColumnPairs += [ {
+						aa: columnName,
+						nt: "nSeq" + imputedU + featureU
+					} ]
+				}
 				columnsSpecPerClonotypeNoAggregates += [ {
 						column: columnName,
 						id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 	}
 	return {
+		productiveFeature: productiveFeature,
 		clonotypeKeyColumns: clonotypeKeyColumns,
 		clonotypeKeyArgs: clonotypeKeyArgs,
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 		axisByClonotypeKeyGen: axisByClonotypeKeyGen,
 		axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
+		aminoAcidSeqColumns: aminoAcidSeqColumns,
+		aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+		cdr3SeqColumns: cdr3SeqColumns,
 		columnsSpecPerSample: columnsSpecPerSample,
 		columnsSpecPerSampleSc: columnsSpecPerSampleSc,
 		columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,

package/dist/tengo/lib/qc-report-columns.lib.tengo CHANGED Viewed

@@ -3,9 +3,13 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
+text := import("text")
-getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
+getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
+    if is_undefined(umiTags) {
+        umiTags = []
+    }
     baseColumns := [
     {
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }
-    dataWithUmiColumns := [    {
-        column: "refineTags.UMI.outputCount",
-        id: "refine-tags-umi-output-count",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputCount",
-            valueType: "Long",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/table/orderPriority": "85000",
-                "pl7.app/table/visibility": "optional",
-                "pl7.app/label": "Refine Tags UMI - Output Count"
+    dataWithUmiColumns := []
+    for idx, umiTag in umiTags {
+        orderBase := 85000 + idx * 10
+        orderBasePercents := 85100 + idx * 10
+        orderDiversity := 85200 + idx * 10
+        orderDiversityPercents := 85300 + idx * 10
+        tagL := text.to_lower(umiTag)
+        dataWithUmiColumns = dataWithUmiColumns + [{
+            column: "refineTags." + umiTag + ".outputCount",
+            id: "refine-tags-" + tagL + "-output-count",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(orderBase),
+                    "pl7.app/table/visibility": "optional",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputCountPercents",
-        id: "refine-tags-umi-output-count-percents",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputCountPercents",
-            valueType: "Double",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/max": "100",
-                "pl7.app/table/orderPriority": "85100",
-                "pl7.app/table/visibility": "default",
-                "pl7.app/label": "Refine Tags UMI - Output Count (%)"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputCountPercents",
+            id: "refine-tags-" + tagL + "-output-count-percents",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
+                valueType: "Double",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/max": "100",
+                    "pl7.app/table/orderPriority": string(orderBasePercents),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputDiversity",
-        id: "refine-tags-umi-output-diversity",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputDiversity",
-            valueType: "Long",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/table/orderPriority": "85200",
-                "pl7.app/table/visibility": "optional",
-                "pl7.app/label": "Refine Tags UMI - Output Diversity"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputDiversity",
+            id: "refine-tags-" + tagL + "-output-diversity",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(orderDiversity),
+                    "pl7.app/table/visibility": "optional",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputDiversityPercents",
-        id: "refine-tags-umi-output-diversity-percents",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputDiversityPercents",
-            valueType: "Double",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/max": "100",
-                "pl7.app/table/orderPriority": "85300",
-                "pl7.app/table/visibility": "default",
-                "pl7.app/label": "Refine Tags UMI - Output Diversity (%)"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputDiversityPercents",
+            id: "refine-tags-" + tagL + "-output-diversity-percents",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
+                valueType: "Double",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/max": "100",
+                    "pl7.app/table/orderPriority": string(orderDiversityPercents),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
+                }
             }
-        }
-    },
-    {
+        }]
+    }
+    dataWithUmiColumns = dataWithUmiColumns + [{
         column: "refineTags.numberOfGroupsAccepted",
         id: "refine-tags-number-of-groups-accepted",
         allowNA: true,

package/dist/tengo/lib/stop-codon-replacement.lib.tengo ADDED Viewed

@@ -0,0 +1,179 @@
+pt := import("@platforma-sdk/workflow-tengo:pt")
+text := import("text")
+applyStopCodonReplacementsPt := func(df, opts) {
+	if is_undefined(opts) {
+		return df
+	}
+	aminoAcidSeqColumns := opts.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := opts.cdr3SeqColumns
+	stopCodonTypes := opts.stopCodonTypes
+	stopCodonReplacements := opts.stopCodonReplacements
+	allowedNtColumns := opts.allowedNtColumns
+	if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
+		return df
+	}
+	if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
+		return df
+	}
+	if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
+		stopCodonReplacements = undefined
+	}
+	if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
+		allowedNtColumns = undefined
+	}
+	contains := func(arr, value) {
+		for v in arr {
+			if v == value { return true }
+		}
+		return false
+	}
+	stopReplacement := func(stopType) {
+		if !contains(stopCodonTypes, stopType) {
+			return "*"
+		}
+		if is_undefined(stopCodonReplacements) {
+			return "*"
+		}
+		aa := stopCodonReplacements[stopType]
+		if is_undefined(aa) || aa == "" {
+			return "*"
+		}
+		return text.to_upper(aa)
+	}
+	codonMapBase := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": "*",
+		"TAG": "*", "TGT": "C", "TGC": "C",
+		"TGA": "*", "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	codonMapReplace := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
+		"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
+		"TGA": stopReplacement("opal"), "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	translateNtToAaExpr := func(ntExpr, codonMap) {
+		seq := ntExpr.fillNull("").strToUpper()
+		seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
+		for codon, aa in codonMap {
+			seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
+		}
+		seq = seq.strReplace("\\|$", "", { replaceAll: false })
+		seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
+		seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
+		return seq
+	}
+	pairs := []
+	if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
+		for p in aminoAcidSeqColumnPairs {
+			if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
+				if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
+					continue
+				}
+				pairs = append(pairs, p)
+			}
+		}
+	}
+	if len(pairs) == 0 {
+		for aaCol in aminoAcidSeqColumns {
+			ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
+			if text.has_suffix(ntCol, "InFrame") {
+				ntCol = text.replace(ntCol, "InFrame", "", 1)
+			}
+			if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
+				continue
+			}
+			pairs = append(pairs, { aa: aaCol, nt: ntCol })
+		}
+	}
+	expressions := []
+	replacedAnyExprs := []
+	replacedColsExprs := []
+	aaColumnsUsed := []
+	for pair in pairs {
+		aaCol := pair.aa
+		ntCol := pair.nt
+		translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
+		translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
+		expressions = append(expressions, translatedReplaced.alias(aaCol))
+		cond := translatedReplaced.neq(translatedBase)
+		replacedAnyExprs = append(replacedAnyExprs, cond)
+		replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
+		aaColumnsUsed = append(aaColumnsUsed, aaCol)
+	}
+	if len(expressions) > 0 {
+		df = df.withColumns(expressions...)
+	}
+	if len(replacedAnyExprs) > 0 {
+		colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
+		colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
+		df = df.withColumns(
+			pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
+			colsList.alias("stopCodonReplacedColumns")
+		)
+	}
+	stopChecks := []
+	for colName in aaColumnsUsed {
+		stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
+	}
+	if len(stopChecks) > 0 {
+		df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
+	}
+	if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
+		regionChecks := []
+		for colName in cdr3SeqColumns {
+			if !contains(aaColumnsUsed, colName) {
+				continue
+			}
+			regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
+		}
+		if len(regionChecks) > 0 {
+			df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
+		}
+	}
+	return df
+}
+export {
+	applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
+}

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/calculate-preset-info.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/list-presets.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/prerun.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process-single-cell.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns-calculate.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns.test.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,17 +1,17 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
-  "version": "3.21.0",
+  "version": "3.23.0",
   "description": "Tengo-based template",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "5.8.0",
+    "@platforma-sdk/workflow-tengo": "5.8.1",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-254-develop"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "2.4.8"
+    "@platforma-sdk/tengo-builder": "2.4.12"
   },
   "scripts": {
-    "build": "rm -rf dist && pl-tengo check && pl-tengo build",
+    "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
     "format": "/usr/bin/env emacs --script ./format.el",
-    "do-pack": "rm -f *.tgz && pnpm pack && mv *.tgz package.tgz"
+    "do-pack": "shx rm -f *.tgz && pnpm pack && shx mv *.tgz package.tgz"
   }
 }

package/src/calculate-export-specs.lib.tengo CHANGED Viewed

@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 	}
 	orderP := 80000
+	aminoAcidSeqColumns := []
+	aminoAcidSeqColumnPairs := []
+	cdr3SeqColumns := []
 	// Sequences
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 				alphabetShortMixcr := isAminoAcid ? "aa" : "n"
 				columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
 				visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
+				if featureU == "CDR3" {
+					cdr3SeqColumns += [ columnName ]
+				}
+				if isAminoAcid {
+					aminoAcidSeqColumns += [ columnName ]
+					aminoAcidSeqColumnPairs += [ {
+						aa: columnName,
+						nt: "nSeq" + imputedU + featureU
+					} ]
+				}
 				columnsSpecPerClonotypeNoAggregates += [ {
 						column: columnName,
 						id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 	}
 	return {
+		productiveFeature: productiveFeature,
 		clonotypeKeyColumns: clonotypeKeyColumns,
 		clonotypeKeyArgs: clonotypeKeyArgs,
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
 		axisByClonotypeKeyGen: axisByClonotypeKeyGen,
 		axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
+		aminoAcidSeqColumns: aminoAcidSeqColumns,
+		aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+		cdr3SeqColumns: cdr3SeqColumns,
 		columnsSpecPerSample: columnsSpecPerSample,
 		columnsSpecPerSampleSc: columnsSpecPerSampleSc,
 		columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,

package/src/export-report.tpl.tengo CHANGED Viewed

@@ -29,16 +29,80 @@ self.body(func(inputs) {
     library := inputs.library
     isLibraryFileGzipped := inputs.isLibraryFileGzipped
     clonotypeTablesData := inputs.clonotypeTablesData
+	stopCodonTypes := inputs.stopCodonTypes
+	stopCodonReplacements := inputs.stopCodonReplacements
     isSingleCell := len(presetSpecForBack.cellTags) > 0
-	hasUmi := !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
+    umiTags := presetSpecForBack.umiTags
+	hasUmi := !is_undefined(umiTags) && len(umiTags) > 0
     cellTags := presetSpecForBack.cellTags
     singleCellChainTsvsData := inputs.singleCellChainTsvsData
+	useStopCodonReplacement := !is_undefined(stopCodonTypes) && is_array(stopCodonTypes) && len(stopCodonTypes) > 0
+	if is_undefined(stopCodonReplacements) || !is_map(stopCodonReplacements) {
+		stopCodonReplacements = {}
+	}
-    featureForFlags := "CDR3"
+    featureForFlags := inputs.productiveFeature
+    if is_undefined(featureForFlags) || featureForFlags == "" {
+        featureForFlags = "CDR3"
+    }
     isOOFColumn := "isOOF" + featureForFlags
     hasStopsColumn := "hasStopsIn" + featureForFlags
+	contains := func(arr, value) {
+		for v in arr {
+			if v == value { return true }
+		}
+		return false
+	}
+	stopReplacement := func(stopType) {
+		if !contains(stopCodonTypes, stopType) {
+			return "*"
+		}
+		aa := stopCodonReplacements[stopType]
+		if is_undefined(aa) || aa == "" {
+			return "*"
+		}
+		return text.to_upper(aa)
+	}
+    ll.print("__THE_LOG__", stopReplacement("ochre"))
+    ll.print("__THE_LOG__", stopReplacement("amber"))
+    ll.print("__THE_LOG__", stopReplacement("opal"))
+	codonMapReplace := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
+		"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
+		"TGA": stopReplacement("opal"), "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	translateNtToAaExpr := func(ntExpr, codonMap) {
+		seq := ntExpr.fillNull("").strToUpper()
+		seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
+		for codon, aa in codonMap {
+			seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
+		}
+		seq = seq.strReplace("\\|$", "", { replaceAll: false })
+		seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
+		seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
+		return seq
+	}
     chainInfos := {
 	"IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
 	"IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light", shortName: "Light" },
@@ -152,6 +216,7 @@ self.body(func(inputs) {
         exportFiltersCmd = exportFiltersCmd.
             arg("-isOOF").arg(featureForFlags).
             arg("-hasStops").arg(featureForFlags).
+            arg("-nFeature").arg(featureForFlags).
             arg("clones.clns").
             addFile("clones.clns", clnsFile).
             arg("clones.tsv").
@@ -165,10 +230,19 @@ self.body(func(inputs) {
 		}
         exportFiltersResult := exportFiltersCmd.cacheHours(3).run()
         filterTsv := exportFiltersResult.getFile("clones.tsv")
-        dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ] })
+        schema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
+        if useStopCodonReplacement {
+            schema = append(schema, { column: "nSeq" + featureForFlags, type: "String" })
+        }
+        dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: schema })
+        stopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
+        if useStopCodonReplacement {
+            translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
+            stopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
+        }
         dfFilters = dfFilters.withColumns(
             pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
-            pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__stop")
+            stopExpr.alias("__stop")
         )
         dfFilterCount := dfFilters.select(
             pt.lit(sampleId).alias("sampleId"),
@@ -210,6 +284,7 @@ self.body(func(inputs) {
                 exportChainFiltersResult := exportChainFiltersCmd.
                     arg("-isOOF").arg(featureForFlags).
                     arg("-hasStops").arg(featureForFlags).
+                    arg("-nFeature").arg(featureForFlags).
                     arg("clones.clns").
                     addFile("clones.clns", clnsFile).
                     arg("clones.tsv").
@@ -217,10 +292,19 @@ self.body(func(inputs) {
                     cacheHours(3).
                     run()
                 chainFilterTsv := exportChainFiltersResult.getFile("clones.tsv")
-                dfChainFilters := wf.frame(chainFilterTsv, { xsvType: "tsv", inferSchema: false, schema: [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ] })
+                chainSchema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
+                if useStopCodonReplacement {
+                    chainSchema = append(chainSchema, { column: "nSeq" + featureForFlags, type: "String" })
+                }
+                dfChainFilters := wf.frame(chainFilterTsv, { xsvType: "tsv", inferSchema: false, schema: chainSchema })
+                chainStopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
+                if useStopCodonReplacement {
+                    translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
+                    chainStopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
+                }
                 dfChainFilters = dfChainFilters.withColumns(
                     pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
-                    pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__stop")
+                    chainStopExpr.alias("__stop")
                 )
                 dfChainCount := dfChainFilters.select(
                     pt.lit(sampleId).alias("sampleId"),
@@ -409,7 +493,7 @@ self.body(func(inputs) {
     tsvFile := wfResult.getFile("qc-report-processed.tsv")
-    qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags)
+    qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags)
     reportColumnsSpec := qcReportColumns.reportColumnsSpec
     qcReportTable := xsv.importFile(

package/src/main.tpl.tengo CHANGED Viewed

@@ -132,7 +132,9 @@ wf.body(func(args) {
 				materialType: args.materialType,
 				tagPattern: args.tagPattern,
 				assembleClonesBy: args.assembleClonesBy,
-				exportMinQuality: args.exportMinQuality
+				exportMinQuality: args.exportMinQuality,
+				stopCodonTypes: args.stopCodonTypes,
+				stopCodonReplacements: args.stopCodonReplacements
 		})
 	})

package/src/mixcr-export.tpl.tengo CHANGED Viewed

@@ -9,8 +9,10 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
 exec := import("@platforma-sdk/workflow-tengo:exec")
 pt := import("@platforma-sdk/workflow-tengo:pt")
 clonotypeLabel := import(":clonotype-label")
+stopCodonReplacement := import(":stop-codon-replacement")
 json := import("json")
+text := import("text")
 self.defineOutputs("tsv", "tsvForSingleCell")
@@ -24,6 +26,11 @@ self.body(func(inputs) {
 	isLibraryFileGzipped := params.isLibraryFileGzipped
 	chains := params.chains
 	exportArgs := params.exportArgs
+	stopCodonTypes := params.stopCodonTypes
+	stopCodonReplacements := params.stopCodonReplacements
+	aminoAcidSeqColumns := params.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := params.cdr3SeqColumns
 	clonotypeKeyColumns := params.clonotypeKeyColumns
 	clonotypeKeyArgs := params.clonotypeKeyArgs
@@ -40,6 +47,8 @@ self.body(func(inputs) {
     mainIsProductiveArgs := params.mainIsProductiveArgs
     mainIsProductiveColumn := params.mainIsProductiveColumn
+	useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
 	exportMemGB := undefined
 	if !is_undefined(inputs.perProcessMemGB) {
 		exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
@@ -73,8 +82,12 @@ self.body(func(inputs) {
 			arg("--dont-split-files").
 			arg("--drop-default-fields").
 			arg("--reset-export-clone-table-splitting").
-			arg("--export-productive-clones-only").
 			arg("--chains").arg(chains)
+		if useProductiveFilter {
+			mixcrCmdBuilder = mixcrCmdBuilder.arg("--export-productive-clones-only")
+		} else {
+			mixcrCmdBuilder = mixcrCmdBuilder.arg("--filter-out-of-frames")
+		}
 		if library {
 			if isLibraryFileGzipped {
@@ -138,6 +151,13 @@ self.body(func(inputs) {
                 alias(mainIsProductiveColumn)
         )
     }
+	dfMain = stopCodonReplacement.applyStopCodonReplacementsPt(dfMain, {
+		aminoAcidSeqColumns: aminoAcidSeqColumns,
+		aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+		cdr3SeqColumns: cdr3SeqColumns,
+		stopCodonTypes: stopCodonTypes,
+		stopCodonReplacements: stopCodonReplacements
+	})
 	dfMain.addColumns(
 		hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
 	)
@@ -197,6 +217,24 @@ self.body(func(inputs) {
             )
         }
+		dfSingleCell = stopCodonReplacement.applyStopCodonReplacementsPt(dfSingleCell, {
+			aminoAcidSeqColumns: aminoAcidSeqColumns,
+			aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+			cdr3SeqColumns: cdr3SeqColumns,
+			stopCodonTypes: stopCodonTypes,
+		stopCodonReplacements: stopCodonReplacements,
+		allowedNtColumns: func() {
+			allowed := []
+			if !is_undefined(clonotypeKeyColumns) {
+				for col in clonotypeKeyColumns {
+					if text.has_prefix(col, "nSeq") {
+						allowed = append(allowed, col)
+					}
+				}
+			}
+			return allowed
+		}()
+		})
 		dfSingleCell.addColumns(
 			hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey"),
 			hashCellKey ? hashKeyDerivationExpressionPt(cellTagColumns).alias("cellKey") : pt.col(cellTagColumns[0]).alias("cellKey")

package/src/process.tpl.tengo CHANGED Viewed

@@ -210,6 +210,10 @@ self.body(func(inputs) {
 	columnsSpecPerClonotypeNoAggregates := exportSpecs.columnsSpecPerClonotypeNoAggregates
 	columnsSpecPerClonotypeAggregates := exportSpecs.columnsSpecPerClonotypeAggregates
 	columnsSpecPerClonotypeSc := exportSpecs.columnsSpecPerClonotypeSc
+	aminoAcidSeqColumns := exportSpecs.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := exportSpecs.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := exportSpecs.cdr3SeqColumns
+	productiveFeature := exportSpecs.productiveFeature
 	clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
 	clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
@@ -406,6 +410,10 @@ self.body(func(inputs) {
 						clonotypeKeyColumns: clonotypeKeyColumns,
 						clonotypeKeyArgs: clonotypeKeyArgs,
+						aminoAcidSeqColumns: aminoAcidSeqColumns,
+						aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
+						cdr3SeqColumns: cdr3SeqColumns,
 						mainIsProductiveColumn: mainIsProductiveColumn,
 						mainIsProductiveArgs: mainIsProductiveArgs,
 						mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
@@ -419,7 +427,9 @@ self.body(func(inputs) {
 						mainAbundanceColumnIsReadCount: (!is_undefined(cellTagColumns) && len(cellTagColumns) > 0 && mainAbundanceColumnUnnormalized == "readCount") ? true : undefined,
 						exportArgs: exportArgs,
-						isLibraryFileGzipped: isLibraryFileGzipped
+						isLibraryFileGzipped: isLibraryFileGzipped,
+						stopCodonTypes: params.stopCodonTypes,
+						stopCodonReplacements: params.stopCodonReplacements
 					}, { removeUndefs: true }),
 					library: library
 				},
@@ -779,6 +789,9 @@ self.body(func(inputs) {
 		library: library,
 		isLibraryFileGzipped: isLibraryFileGzipped,
         clonotypeTablesData: clonotypeTablesData,
+		productiveFeature: productiveFeature,
+		stopCodonTypes: params.stopCodonTypes,
+		stopCodonReplacements: params.stopCodonReplacements,
 		singleCellChainTsvsData: singleCellChainTsvs
 	})

package/src/qc-report-columns.lib.tengo CHANGED Viewed

@@ -3,9 +3,13 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
+text := import("text")
 // QC Report column specifications function
-getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
+getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
+    if is_undefined(umiTags) {
+        umiTags = []
+    }
     // Bulk sequencing columns
     baseColumns := [
     {
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }
-    dataWithUmiColumns := [    {
-        column: "refineTags.UMI.outputCount",
-        id: "refine-tags-umi-output-count",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputCount",
-            valueType: "Long",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/table/orderPriority": "85000",
-                "pl7.app/table/visibility": "optional",
-                "pl7.app/label": "Refine Tags UMI - Output Count"
+    dataWithUmiColumns := []
+    for idx, umiTag in umiTags {
+        orderBase := 85000 + idx * 10
+        orderBasePercents := 85100 + idx * 10
+        orderDiversity := 85200 + idx * 10
+        orderDiversityPercents := 85300 + idx * 10
+        tagL := text.to_lower(umiTag)
+        dataWithUmiColumns = dataWithUmiColumns + [{
+            column: "refineTags." + umiTag + ".outputCount",
+            id: "refine-tags-" + tagL + "-output-count",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(orderBase),
+                    "pl7.app/table/visibility": "optional",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputCountPercents",
-        id: "refine-tags-umi-output-count-percents",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputCountPercents",
-            valueType: "Double",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/max": "100",
-                "pl7.app/table/orderPriority": "85100",
-                "pl7.app/table/visibility": "default",
-                "pl7.app/label": "Refine Tags UMI - Output Count (%)"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputCountPercents",
+            id: "refine-tags-" + tagL + "-output-count-percents",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
+                valueType: "Double",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/max": "100",
+                    "pl7.app/table/orderPriority": string(orderBasePercents),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputDiversity",
-        id: "refine-tags-umi-output-diversity",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputDiversity",
-            valueType: "Long",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/table/orderPriority": "85200",
-                "pl7.app/table/visibility": "optional",
-                "pl7.app/label": "Refine Tags UMI - Output Diversity"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputDiversity",
+            id: "refine-tags-" + tagL + "-output-diversity",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(orderDiversity),
+                    "pl7.app/table/visibility": "optional",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
+                }
             }
-        }
-    },
-    {
-        column: "refineTags.UMI.outputDiversityPercents",
-        id: "refine-tags-umi-output-diversity-percents",
-        allowNA: true,
-        naRegex: "NaN",
-        spec: {
-            name: "mixcr.com/reports/refineTags/UMI/outputDiversityPercents",
-            valueType: "Double",
-            annotations: {
-                "pl7.app/min": "0",
-                "pl7.app/max": "100",
-                "pl7.app/table/orderPriority": "85300",
-                "pl7.app/table/visibility": "default",
-                "pl7.app/label": "Refine Tags UMI - Output Diversity (%)"
+        },
+        {
+            column: "refineTags." + umiTag + ".outputDiversityPercents",
+            id: "refine-tags-" + tagL + "-output-diversity-percents",
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
+                valueType: "Double",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/max": "100",
+                    "pl7.app/table/orderPriority": string(orderDiversityPercents),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
+                }
             }
-        }
-    },
-    {
+        }]
+    }
+    dataWithUmiColumns = dataWithUmiColumns + [{
         column: "refineTags.numberOfGroupsAccepted",
         id: "refine-tags-number-of-groups-accepted",
         allowNA: true,

package/src/stop-codon-replacement.lib.tengo ADDED Viewed

@@ -0,0 +1,179 @@
+pt := import("@platforma-sdk/workflow-tengo:pt")
+text := import("text")
+applyStopCodonReplacementsPt := func(df, opts) {
+	if is_undefined(opts) {
+		return df
+	}
+	aminoAcidSeqColumns := opts.aminoAcidSeqColumns
+	aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
+	cdr3SeqColumns := opts.cdr3SeqColumns
+	stopCodonTypes := opts.stopCodonTypes
+	stopCodonReplacements := opts.stopCodonReplacements
+	allowedNtColumns := opts.allowedNtColumns
+	if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
+		return df
+	}
+	if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
+		return df
+	}
+	if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
+		stopCodonReplacements = undefined
+	}
+	if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
+		allowedNtColumns = undefined
+	}
+	contains := func(arr, value) {
+		for v in arr {
+			if v == value { return true }
+		}
+		return false
+	}
+	stopReplacement := func(stopType) {
+		if !contains(stopCodonTypes, stopType) {
+			return "*"
+		}
+		if is_undefined(stopCodonReplacements) {
+			return "*"
+		}
+		aa := stopCodonReplacements[stopType]
+		if is_undefined(aa) || aa == "" {
+			return "*"
+		}
+		return text.to_upper(aa)
+	}
+	codonMapBase := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": "*",
+		"TAG": "*", "TGT": "C", "TGC": "C",
+		"TGA": "*", "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	codonMapReplace := {
+		"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
+		"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
+		"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
+		"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
+		"TGA": stopReplacement("opal"), "TGG": "W",
+		"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
+		"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
+		"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
+		"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
+		"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
+		"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
+		"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
+		"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
+		"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
+		"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
+		"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
+		"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
+	}
+	translateNtToAaExpr := func(ntExpr, codonMap) {
+		seq := ntExpr.fillNull("").strToUpper()
+		seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
+		for codon, aa in codonMap {
+			seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
+		}
+		seq = seq.strReplace("\\|$", "", { replaceAll: false })
+		seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
+		seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
+		return seq
+	}
+	pairs := []
+	if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
+		for p in aminoAcidSeqColumnPairs {
+			if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
+				if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
+					continue
+				}
+				pairs = append(pairs, p)
+			}
+		}
+	}
+	if len(pairs) == 0 {
+		for aaCol in aminoAcidSeqColumns {
+			ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
+			if text.has_suffix(ntCol, "InFrame") {
+				ntCol = text.replace(ntCol, "InFrame", "", 1)
+			}
+			if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
+				continue
+			}
+			pairs = append(pairs, { aa: aaCol, nt: ntCol })
+		}
+	}
+	expressions := []
+	replacedAnyExprs := []
+	replacedColsExprs := []
+	aaColumnsUsed := []
+	for pair in pairs {
+		aaCol := pair.aa
+		ntCol := pair.nt
+		translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
+		translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
+		expressions = append(expressions, translatedReplaced.alias(aaCol))
+		cond := translatedReplaced.neq(translatedBase)
+		replacedAnyExprs = append(replacedAnyExprs, cond)
+		replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
+		aaColumnsUsed = append(aaColumnsUsed, aaCol)
+	}
+	if len(expressions) > 0 {
+		df = df.withColumns(expressions...)
+	}
+	if len(replacedAnyExprs) > 0 {
+		colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
+		colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
+		df = df.withColumns(
+			pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
+			colsList.alias("stopCodonReplacedColumns")
+		)
+	}
+	stopChecks := []
+	for colName in aaColumnsUsed {
+		stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
+	}
+	if len(stopChecks) > 0 {
+		df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
+	}
+	if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
+		regionChecks := []
+		for colName in cdr3SeqColumns {
+			if !contains(aaColumnsUsed, colName) {
+				continue
+			}
+			regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
+		}
+		if len(regionChecks) > 0 {
+			df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
+		}
+	}
+	return df
+}
+export {
+	applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
+}