@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.21.0 → 3.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.21.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
- > rm -rf dist && pl-tengo check && pl-tengo build
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
+ > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
7
7
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
@@ -17,6 +17,7 @@ Processing "src/prerun.tpl.tengo"...
17
17
  Processing "src/process-single-cell.tpl.tengo"...
18
18
  Processing "src/process.tpl.tengo"...
19
19
  Processing "src/qc-report-columns.lib.tengo"...
20
+ Processing "src/stop-codon-replacement.lib.tengo"...
20
21
  Processing "src/test/columns-calculate.tpl.tengo"...
21
22
  Processing "src/test/columns.test.tpl.tengo"...
22
23
  No syntax errors found.
@@ -25,6 +26,7 @@ No syntax errors found.
25
26
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/calculate-export-specs.lib.tengo
26
27
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/clonotype-label.lib.tengo
27
28
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/qc-report-columns.lib.tengo
29
+ info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/stop-codon-replacement.lib.tengo
28
30
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz
29
31
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/calculate-preset-info.plj.gz
30
32
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/export-report.plj.gz
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.23.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 3c8ed71: stop codon replacement and dep updates
8
+
9
+ ## 3.22.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 22562b1: qc table multiple UMI support
14
+
3
15
  ## 3.21.0
4
16
 
5
17
  ### Minor Changes
@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
442
442
  }
443
443
 
444
444
  orderP := 80000
445
+ aminoAcidSeqColumns := []
446
+ aminoAcidSeqColumnPairs := []
447
+ cdr3SeqColumns := []
445
448
 
446
449
 
447
450
 
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
466
469
  alphabetShortMixcr := isAminoAcid ? "aa" : "n"
467
470
  columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
468
471
  visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
472
+ if featureU == "CDR3" {
473
+ cdr3SeqColumns += [ columnName ]
474
+ }
475
+ if isAminoAcid {
476
+ aminoAcidSeqColumns += [ columnName ]
477
+ aminoAcidSeqColumnPairs += [ {
478
+ aa: columnName,
479
+ nt: "nSeq" + imputedU + featureU
480
+ } ]
481
+ }
469
482
  columnsSpecPerClonotypeNoAggregates += [ {
470
483
  column: columnName,
471
484
  id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
973
986
  }
974
987
 
975
988
  return {
989
+ productiveFeature: productiveFeature,
976
990
  clonotypeKeyColumns: clonotypeKeyColumns,
977
991
  clonotypeKeyArgs: clonotypeKeyArgs,
978
992
 
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
981
995
  axisByClonotypeKeyGen: axisByClonotypeKeyGen,
982
996
  axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
983
997
 
998
+ aminoAcidSeqColumns: aminoAcidSeqColumns,
999
+ aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
1000
+ cdr3SeqColumns: cdr3SeqColumns,
1001
+
984
1002
  columnsSpecPerSample: columnsSpecPerSample,
985
1003
  columnsSpecPerSampleSc: columnsSpecPerSampleSc,
986
1004
  columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
@@ -3,9 +3,13 @@
3
3
 
4
4
  ll := import("@platforma-sdk/workflow-tengo:ll")
5
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
6
+ text := import("text")
6
7
 
7
8
 
8
- getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
9
+ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
10
+ if is_undefined(umiTags) {
11
+ umiTags = []
12
+ }
9
13
 
10
14
  baseColumns := [
11
15
  {
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
656
660
  }
657
661
  }
658
662
 
659
- dataWithUmiColumns := [ {
660
- column: "refineTags.UMI.outputCount",
661
- id: "refine-tags-umi-output-count",
662
- allowNA: true,
663
- naRegex: "NaN",
664
- spec: {
665
- name: "mixcr.com/reports/refineTags/UMI/outputCount",
666
- valueType: "Long",
667
- annotations: {
668
- "pl7.app/min": "0",
669
- "pl7.app/table/orderPriority": "85000",
670
- "pl7.app/table/visibility": "optional",
671
- "pl7.app/label": "Refine Tags UMI - Output Count"
663
+ dataWithUmiColumns := []
664
+ for idx, umiTag in umiTags {
665
+ orderBase := 85000 + idx * 10
666
+ orderBasePercents := 85100 + idx * 10
667
+ orderDiversity := 85200 + idx * 10
668
+ orderDiversityPercents := 85300 + idx * 10
669
+ tagL := text.to_lower(umiTag)
670
+ dataWithUmiColumns = dataWithUmiColumns + [{
671
+ column: "refineTags." + umiTag + ".outputCount",
672
+ id: "refine-tags-" + tagL + "-output-count",
673
+ allowNA: true,
674
+ naRegex: "NaN",
675
+ spec: {
676
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
677
+ valueType: "Long",
678
+ annotations: {
679
+ "pl7.app/min": "0",
680
+ "pl7.app/table/orderPriority": string(orderBase),
681
+ "pl7.app/table/visibility": "optional",
682
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
683
+ }
672
684
  }
673
- }
674
- },
675
- {
676
- column: "refineTags.UMI.outputCountPercents",
677
- id: "refine-tags-umi-output-count-percents",
678
- allowNA: true,
679
- naRegex: "NaN",
680
- spec: {
681
- name: "mixcr.com/reports/refineTags/UMI/outputCountPercents",
682
- valueType: "Double",
683
- annotations: {
684
- "pl7.app/min": "0",
685
- "pl7.app/max": "100",
686
- "pl7.app/table/orderPriority": "85100",
687
- "pl7.app/table/visibility": "default",
688
- "pl7.app/label": "Refine Tags UMI - Output Count (%)"
685
+ },
686
+ {
687
+ column: "refineTags." + umiTag + ".outputCountPercents",
688
+ id: "refine-tags-" + tagL + "-output-count-percents",
689
+ allowNA: true,
690
+ naRegex: "NaN",
691
+ spec: {
692
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
693
+ valueType: "Double",
694
+ annotations: {
695
+ "pl7.app/min": "0",
696
+ "pl7.app/max": "100",
697
+ "pl7.app/table/orderPriority": string(orderBasePercents),
698
+ "pl7.app/table/visibility": "default",
699
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
700
+ }
689
701
  }
690
- }
691
- },
692
- {
693
- column: "refineTags.UMI.outputDiversity",
694
- id: "refine-tags-umi-output-diversity",
695
- allowNA: true,
696
- naRegex: "NaN",
697
- spec: {
698
- name: "mixcr.com/reports/refineTags/UMI/outputDiversity",
699
- valueType: "Long",
700
- annotations: {
701
- "pl7.app/min": "0",
702
- "pl7.app/table/orderPriority": "85200",
703
- "pl7.app/table/visibility": "optional",
704
- "pl7.app/label": "Refine Tags UMI - Output Diversity"
702
+ },
703
+ {
704
+ column: "refineTags." + umiTag + ".outputDiversity",
705
+ id: "refine-tags-" + tagL + "-output-diversity",
706
+ allowNA: true,
707
+ naRegex: "NaN",
708
+ spec: {
709
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
710
+ valueType: "Long",
711
+ annotations: {
712
+ "pl7.app/min": "0",
713
+ "pl7.app/table/orderPriority": string(orderDiversity),
714
+ "pl7.app/table/visibility": "optional",
715
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
716
+ }
705
717
  }
706
- }
707
- },
708
- {
709
- column: "refineTags.UMI.outputDiversityPercents",
710
- id: "refine-tags-umi-output-diversity-percents",
711
- allowNA: true,
712
- naRegex: "NaN",
713
- spec: {
714
- name: "mixcr.com/reports/refineTags/UMI/outputDiversityPercents",
715
- valueType: "Double",
716
- annotations: {
717
- "pl7.app/min": "0",
718
- "pl7.app/max": "100",
719
- "pl7.app/table/orderPriority": "85300",
720
- "pl7.app/table/visibility": "default",
721
- "pl7.app/label": "Refine Tags UMI - Output Diversity (%)"
718
+ },
719
+ {
720
+ column: "refineTags." + umiTag + ".outputDiversityPercents",
721
+ id: "refine-tags-" + tagL + "-output-diversity-percents",
722
+ allowNA: true,
723
+ naRegex: "NaN",
724
+ spec: {
725
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
726
+ valueType: "Double",
727
+ annotations: {
728
+ "pl7.app/min": "0",
729
+ "pl7.app/max": "100",
730
+ "pl7.app/table/orderPriority": string(orderDiversityPercents),
731
+ "pl7.app/table/visibility": "default",
732
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
733
+ }
722
734
  }
723
- }
724
- },
725
- {
735
+ }]
736
+ }
737
+
738
+ dataWithUmiColumns = dataWithUmiColumns + [{
726
739
  column: "refineTags.numberOfGroupsAccepted",
727
740
  id: "refine-tags-number-of-groups-accepted",
728
741
  allowNA: true,
@@ -0,0 +1,179 @@
1
+ pt := import("@platforma-sdk/workflow-tengo:pt")
2
+ text := import("text")
3
+
4
+ applyStopCodonReplacementsPt := func(df, opts) {
5
+ if is_undefined(opts) {
6
+ return df
7
+ }
8
+ aminoAcidSeqColumns := opts.aminoAcidSeqColumns
9
+ aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
10
+ cdr3SeqColumns := opts.cdr3SeqColumns
11
+ stopCodonTypes := opts.stopCodonTypes
12
+ stopCodonReplacements := opts.stopCodonReplacements
13
+ allowedNtColumns := opts.allowedNtColumns
14
+
15
+ if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
16
+ return df
17
+ }
18
+ if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
19
+ return df
20
+ }
21
+ if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
22
+ stopCodonReplacements = undefined
23
+ }
24
+ if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
25
+ allowedNtColumns = undefined
26
+ }
27
+
28
+ contains := func(arr, value) {
29
+ for v in arr {
30
+ if v == value { return true }
31
+ }
32
+ return false
33
+ }
34
+
35
+ stopReplacement := func(stopType) {
36
+ if !contains(stopCodonTypes, stopType) {
37
+ return "*"
38
+ }
39
+ if is_undefined(stopCodonReplacements) {
40
+ return "*"
41
+ }
42
+ aa := stopCodonReplacements[stopType]
43
+ if is_undefined(aa) || aa == "" {
44
+ return "*"
45
+ }
46
+ return text.to_upper(aa)
47
+ }
48
+
49
+ codonMapBase := {
50
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
51
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
52
+ "TAT": "Y", "TAC": "Y", "TAA": "*",
53
+ "TAG": "*", "TGT": "C", "TGC": "C",
54
+ "TGA": "*", "TGG": "W",
55
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
56
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
57
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
58
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
59
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
60
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
61
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
62
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
63
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
64
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
65
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
66
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
67
+ }
68
+
69
+ codonMapReplace := {
70
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
71
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
72
+ "TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
73
+ "TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
74
+ "TGA": stopReplacement("opal"), "TGG": "W",
75
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
76
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
77
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
78
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
79
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
80
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
81
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
82
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
83
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
84
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
85
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
86
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
87
+ }
88
+
89
+ translateNtToAaExpr := func(ntExpr, codonMap) {
90
+ seq := ntExpr.fillNull("").strToUpper()
91
+ seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
92
+ for codon, aa in codonMap {
93
+ seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
94
+ }
95
+ seq = seq.strReplace("\\|$", "", { replaceAll: false })
96
+ seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
97
+ seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
98
+ return seq
99
+ }
100
+
101
+ pairs := []
102
+ if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
103
+ for p in aminoAcidSeqColumnPairs {
104
+ if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
105
+ if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
106
+ continue
107
+ }
108
+ pairs = append(pairs, p)
109
+ }
110
+ }
111
+ }
112
+ if len(pairs) == 0 {
113
+ for aaCol in aminoAcidSeqColumns {
114
+ ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
115
+ if text.has_suffix(ntCol, "InFrame") {
116
+ ntCol = text.replace(ntCol, "InFrame", "", 1)
117
+ }
118
+ if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
119
+ continue
120
+ }
121
+ pairs = append(pairs, { aa: aaCol, nt: ntCol })
122
+ }
123
+ }
124
+
125
+ expressions := []
126
+ replacedAnyExprs := []
127
+ replacedColsExprs := []
128
+ aaColumnsUsed := []
129
+ for pair in pairs {
130
+ aaCol := pair.aa
131
+ ntCol := pair.nt
132
+ translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
133
+ translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
134
+ expressions = append(expressions, translatedReplaced.alias(aaCol))
135
+ cond := translatedReplaced.neq(translatedBase)
136
+ replacedAnyExprs = append(replacedAnyExprs, cond)
137
+ replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
138
+ aaColumnsUsed = append(aaColumnsUsed, aaCol)
139
+ }
140
+ if len(expressions) > 0 {
141
+ df = df.withColumns(expressions...)
142
+ }
143
+
144
+ if len(replacedAnyExprs) > 0 {
145
+ colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
146
+ colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
147
+ df = df.withColumns(
148
+ pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
149
+ colsList.alias("stopCodonReplacedColumns")
150
+ )
151
+ }
152
+
153
+ stopChecks := []
154
+ for colName in aaColumnsUsed {
155
+ stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
156
+ }
157
+ if len(stopChecks) > 0 {
158
+ df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
159
+ }
160
+
161
+ if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
162
+ regionChecks := []
163
+ for colName in cdr3SeqColumns {
164
+ if !contains(aaColumnsUsed, colName) {
165
+ continue
166
+ }
167
+ regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
168
+ }
169
+ if len(regionChecks) > 0 {
170
+ df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
171
+ }
172
+ }
173
+
174
+ return df
175
+ }
176
+
177
+ export {
178
+ applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
179
+ }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.21.0",
3
+ "version": "3.23.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "5.8.0",
6
+ "@platforma-sdk/workflow-tengo": "5.8.1",
7
7
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-254-develop"
8
8
  },
9
9
  "devDependencies": {
10
- "@platforma-sdk/tengo-builder": "2.4.8"
10
+ "@platforma-sdk/tengo-builder": "2.4.12"
11
11
  },
12
12
  "scripts": {
13
- "build": "rm -rf dist && pl-tengo check && pl-tengo build",
13
+ "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
14
14
  "format": "/usr/bin/env emacs --script ./format.el",
15
- "do-pack": "rm -f *.tgz && pnpm pack && mv *.tgz package.tgz"
15
+ "do-pack": "shx rm -f *.tgz && pnpm pack && shx mv *.tgz package.tgz"
16
16
  }
17
17
  }
@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
442
442
  }
443
443
 
444
444
  orderP := 80000
445
+ aminoAcidSeqColumns := []
446
+ aminoAcidSeqColumnPairs := []
447
+ cdr3SeqColumns := []
445
448
 
446
449
  // Sequences
447
450
 
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
466
469
  alphabetShortMixcr := isAminoAcid ? "aa" : "n"
467
470
  columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
468
471
  visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
472
+ if featureU == "CDR3" {
473
+ cdr3SeqColumns += [ columnName ]
474
+ }
475
+ if isAminoAcid {
476
+ aminoAcidSeqColumns += [ columnName ]
477
+ aminoAcidSeqColumnPairs += [ {
478
+ aa: columnName,
479
+ nt: "nSeq" + imputedU + featureU
480
+ } ]
481
+ }
469
482
  columnsSpecPerClonotypeNoAggregates += [ {
470
483
  column: columnName,
471
484
  id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
973
986
  }
974
987
 
975
988
  return {
989
+ productiveFeature: productiveFeature,
976
990
  clonotypeKeyColumns: clonotypeKeyColumns,
977
991
  clonotypeKeyArgs: clonotypeKeyArgs,
978
992
 
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
981
995
  axisByClonotypeKeyGen: axisByClonotypeKeyGen,
982
996
  axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
983
997
 
998
+ aminoAcidSeqColumns: aminoAcidSeqColumns,
999
+ aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
1000
+ cdr3SeqColumns: cdr3SeqColumns,
1001
+
984
1002
  columnsSpecPerSample: columnsSpecPerSample,
985
1003
  columnsSpecPerSampleSc: columnsSpecPerSampleSc,
986
1004
  columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
@@ -29,16 +29,80 @@ self.body(func(inputs) {
29
29
  library := inputs.library
30
30
  isLibraryFileGzipped := inputs.isLibraryFileGzipped
31
31
  clonotypeTablesData := inputs.clonotypeTablesData
32
+ stopCodonTypes := inputs.stopCodonTypes
33
+ stopCodonReplacements := inputs.stopCodonReplacements
32
34
 
33
35
  isSingleCell := len(presetSpecForBack.cellTags) > 0
34
- hasUmi := !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
36
+ umiTags := presetSpecForBack.umiTags
37
+ hasUmi := !is_undefined(umiTags) && len(umiTags) > 0
35
38
  cellTags := presetSpecForBack.cellTags
36
39
  singleCellChainTsvsData := inputs.singleCellChainTsvsData
40
+ useStopCodonReplacement := !is_undefined(stopCodonTypes) && is_array(stopCodonTypes) && len(stopCodonTypes) > 0
41
+ if is_undefined(stopCodonReplacements) || !is_map(stopCodonReplacements) {
42
+ stopCodonReplacements = {}
43
+ }
37
44
 
38
- featureForFlags := "CDR3"
45
+ featureForFlags := inputs.productiveFeature
46
+ if is_undefined(featureForFlags) || featureForFlags == "" {
47
+ featureForFlags = "CDR3"
48
+ }
39
49
  isOOFColumn := "isOOF" + featureForFlags
40
50
  hasStopsColumn := "hasStopsIn" + featureForFlags
41
51
 
52
+ contains := func(arr, value) {
53
+ for v in arr {
54
+ if v == value { return true }
55
+ }
56
+ return false
57
+ }
58
+
59
+ stopReplacement := func(stopType) {
60
+ if !contains(stopCodonTypes, stopType) {
61
+ return "*"
62
+ }
63
+ aa := stopCodonReplacements[stopType]
64
+ if is_undefined(aa) || aa == "" {
65
+ return "*"
66
+ }
67
+ return text.to_upper(aa)
68
+ }
69
+
70
+ ll.print("__THE_LOG__", stopReplacement("ochre"))
71
+ ll.print("__THE_LOG__", stopReplacement("amber"))
72
+ ll.print("__THE_LOG__", stopReplacement("opal"))
73
+
74
+ codonMapReplace := {
75
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
76
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
77
+ "TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
78
+ "TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
79
+ "TGA": stopReplacement("opal"), "TGG": "W",
80
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
81
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
82
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
83
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
84
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
85
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
86
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
87
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
88
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
89
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
90
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
91
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
92
+ }
93
+
94
+ translateNtToAaExpr := func(ntExpr, codonMap) {
95
+ seq := ntExpr.fillNull("").strToUpper()
96
+ seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
97
+ for codon, aa in codonMap {
98
+ seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
99
+ }
100
+ seq = seq.strReplace("\\|$", "", { replaceAll: false })
101
+ seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
102
+ seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
103
+ return seq
104
+ }
105
+
42
106
  chainInfos := {
43
107
  "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
44
108
  "IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light", shortName: "Light" },
@@ -152,6 +216,7 @@ self.body(func(inputs) {
152
216
  exportFiltersCmd = exportFiltersCmd.
153
217
  arg("-isOOF").arg(featureForFlags).
154
218
  arg("-hasStops").arg(featureForFlags).
219
+ arg("-nFeature").arg(featureForFlags).
155
220
  arg("clones.clns").
156
221
  addFile("clones.clns", clnsFile).
157
222
  arg("clones.tsv").
@@ -165,10 +230,19 @@ self.body(func(inputs) {
165
230
  }
166
231
  exportFiltersResult := exportFiltersCmd.cacheHours(3).run()
167
232
  filterTsv := exportFiltersResult.getFile("clones.tsv")
168
- dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ] })
233
+ schema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
234
+ if useStopCodonReplacement {
235
+ schema = append(schema, { column: "nSeq" + featureForFlags, type: "String" })
236
+ }
237
+ dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: schema })
238
+ stopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
239
+ if useStopCodonReplacement {
240
+ translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
241
+ stopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
242
+ }
169
243
  dfFilters = dfFilters.withColumns(
170
244
  pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
171
- pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__stop")
245
+ stopExpr.alias("__stop")
172
246
  )
173
247
  dfFilterCount := dfFilters.select(
174
248
  pt.lit(sampleId).alias("sampleId"),
@@ -210,6 +284,7 @@ self.body(func(inputs) {
210
284
  exportChainFiltersResult := exportChainFiltersCmd.
211
285
  arg("-isOOF").arg(featureForFlags).
212
286
  arg("-hasStops").arg(featureForFlags).
287
+ arg("-nFeature").arg(featureForFlags).
213
288
  arg("clones.clns").
214
289
  addFile("clones.clns", clnsFile).
215
290
  arg("clones.tsv").
@@ -217,10 +292,19 @@ self.body(func(inputs) {
217
292
  cacheHours(3).
218
293
  run()
219
294
  chainFilterTsv := exportChainFiltersResult.getFile("clones.tsv")
220
- dfChainFilters := wf.frame(chainFilterTsv, { xsvType: "tsv", inferSchema: false, schema: [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ] })
295
+ chainSchema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
296
+ if useStopCodonReplacement {
297
+ chainSchema = append(chainSchema, { column: "nSeq" + featureForFlags, type: "String" })
298
+ }
299
+ dfChainFilters := wf.frame(chainFilterTsv, { xsvType: "tsv", inferSchema: false, schema: chainSchema })
300
+ chainStopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
301
+ if useStopCodonReplacement {
302
+ translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
303
+ chainStopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
304
+ }
221
305
  dfChainFilters = dfChainFilters.withColumns(
222
306
  pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
223
- pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__stop")
307
+ chainStopExpr.alias("__stop")
224
308
  )
225
309
  dfChainCount := dfChainFilters.select(
226
310
  pt.lit(sampleId).alias("sampleId"),
@@ -409,7 +493,7 @@ self.body(func(inputs) {
409
493
 
410
494
  tsvFile := wfResult.getFile("qc-report-processed.tsv")
411
495
 
412
- qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags)
496
+ qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags)
413
497
  reportColumnsSpec := qcReportColumns.reportColumnsSpec
414
498
 
415
499
  qcReportTable := xsv.importFile(
@@ -132,7 +132,9 @@ wf.body(func(args) {
132
132
  materialType: args.materialType,
133
133
  tagPattern: args.tagPattern,
134
134
  assembleClonesBy: args.assembleClonesBy,
135
- exportMinQuality: args.exportMinQuality
135
+ exportMinQuality: args.exportMinQuality,
136
+ stopCodonTypes: args.stopCodonTypes,
137
+ stopCodonReplacements: args.stopCodonReplacements
136
138
  })
137
139
  })
138
140
 
@@ -9,8 +9,10 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
9
9
  exec := import("@platforma-sdk/workflow-tengo:exec")
10
10
  pt := import("@platforma-sdk/workflow-tengo:pt")
11
11
  clonotypeLabel := import(":clonotype-label")
12
+ stopCodonReplacement := import(":stop-codon-replacement")
12
13
 
13
14
  json := import("json")
15
+ text := import("text")
14
16
 
15
17
  self.defineOutputs("tsv", "tsvForSingleCell")
16
18
 
@@ -24,6 +26,11 @@ self.body(func(inputs) {
24
26
  isLibraryFileGzipped := params.isLibraryFileGzipped
25
27
  chains := params.chains
26
28
  exportArgs := params.exportArgs
29
+ stopCodonTypes := params.stopCodonTypes
30
+ stopCodonReplacements := params.stopCodonReplacements
31
+ aminoAcidSeqColumns := params.aminoAcidSeqColumns
32
+ aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
33
+ cdr3SeqColumns := params.cdr3SeqColumns
27
34
 
28
35
  clonotypeKeyColumns := params.clonotypeKeyColumns
29
36
  clonotypeKeyArgs := params.clonotypeKeyArgs
@@ -40,6 +47,8 @@ self.body(func(inputs) {
40
47
  mainIsProductiveArgs := params.mainIsProductiveArgs
41
48
  mainIsProductiveColumn := params.mainIsProductiveColumn
42
49
 
50
+ useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
51
+
43
52
  exportMemGB := undefined
44
53
  if !is_undefined(inputs.perProcessMemGB) {
45
54
  exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
@@ -73,8 +82,12 @@ self.body(func(inputs) {
73
82
  arg("--dont-split-files").
74
83
  arg("--drop-default-fields").
75
84
  arg("--reset-export-clone-table-splitting").
76
- arg("--export-productive-clones-only").
77
85
  arg("--chains").arg(chains)
86
+ if useProductiveFilter {
87
+ mixcrCmdBuilder = mixcrCmdBuilder.arg("--export-productive-clones-only")
88
+ } else {
89
+ mixcrCmdBuilder = mixcrCmdBuilder.arg("--filter-out-of-frames")
90
+ }
78
91
 
79
92
  if library {
80
93
  if isLibraryFileGzipped {
@@ -138,6 +151,13 @@ self.body(func(inputs) {
138
151
  alias(mainIsProductiveColumn)
139
152
  )
140
153
  }
154
+ dfMain = stopCodonReplacement.applyStopCodonReplacementsPt(dfMain, {
155
+ aminoAcidSeqColumns: aminoAcidSeqColumns,
156
+ aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
157
+ cdr3SeqColumns: cdr3SeqColumns,
158
+ stopCodonTypes: stopCodonTypes,
159
+ stopCodonReplacements: stopCodonReplacements
160
+ })
141
161
  dfMain.addColumns(
142
162
  hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
143
163
  )
@@ -197,6 +217,24 @@ self.body(func(inputs) {
197
217
  )
198
218
  }
199
219
 
220
+ dfSingleCell = stopCodonReplacement.applyStopCodonReplacementsPt(dfSingleCell, {
221
+ aminoAcidSeqColumns: aminoAcidSeqColumns,
222
+ aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
223
+ cdr3SeqColumns: cdr3SeqColumns,
224
+ stopCodonTypes: stopCodonTypes,
225
+ stopCodonReplacements: stopCodonReplacements,
226
+ allowedNtColumns: func() {
227
+ allowed := []
228
+ if !is_undefined(clonotypeKeyColumns) {
229
+ for col in clonotypeKeyColumns {
230
+ if text.has_prefix(col, "nSeq") {
231
+ allowed = append(allowed, col)
232
+ }
233
+ }
234
+ }
235
+ return allowed
236
+ }()
237
+ })
200
238
  dfSingleCell.addColumns(
201
239
  hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey"),
202
240
  hashCellKey ? hashKeyDerivationExpressionPt(cellTagColumns).alias("cellKey") : pt.col(cellTagColumns[0]).alias("cellKey")
@@ -210,6 +210,10 @@ self.body(func(inputs) {
210
210
  columnsSpecPerClonotypeNoAggregates := exportSpecs.columnsSpecPerClonotypeNoAggregates
211
211
  columnsSpecPerClonotypeAggregates := exportSpecs.columnsSpecPerClonotypeAggregates
212
212
  columnsSpecPerClonotypeSc := exportSpecs.columnsSpecPerClonotypeSc
213
+ aminoAcidSeqColumns := exportSpecs.aminoAcidSeqColumns
214
+ aminoAcidSeqColumnPairs := exportSpecs.aminoAcidSeqColumnPairs
215
+ cdr3SeqColumns := exportSpecs.cdr3SeqColumns
216
+ productiveFeature := exportSpecs.productiveFeature
213
217
 
214
218
  clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
215
219
  clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
@@ -406,6 +410,10 @@ self.body(func(inputs) {
406
410
  clonotypeKeyColumns: clonotypeKeyColumns,
407
411
  clonotypeKeyArgs: clonotypeKeyArgs,
408
412
 
413
+ aminoAcidSeqColumns: aminoAcidSeqColumns,
414
+ aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
415
+ cdr3SeqColumns: cdr3SeqColumns,
416
+
409
417
  mainIsProductiveColumn: mainIsProductiveColumn,
410
418
  mainIsProductiveArgs: mainIsProductiveArgs,
411
419
  mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
@@ -419,7 +427,9 @@ self.body(func(inputs) {
419
427
  mainAbundanceColumnIsReadCount: (!is_undefined(cellTagColumns) && len(cellTagColumns) > 0 && mainAbundanceColumnUnnormalized == "readCount") ? true : undefined,
420
428
 
421
429
  exportArgs: exportArgs,
422
- isLibraryFileGzipped: isLibraryFileGzipped
430
+ isLibraryFileGzipped: isLibraryFileGzipped,
431
+ stopCodonTypes: params.stopCodonTypes,
432
+ stopCodonReplacements: params.stopCodonReplacements
423
433
  }, { removeUndefs: true }),
424
434
  library: library
425
435
  },
@@ -779,6 +789,9 @@ self.body(func(inputs) {
779
789
  library: library,
780
790
  isLibraryFileGzipped: isLibraryFileGzipped,
781
791
  clonotypeTablesData: clonotypeTablesData,
792
+ productiveFeature: productiveFeature,
793
+ stopCodonTypes: params.stopCodonTypes,
794
+ stopCodonReplacements: params.stopCodonReplacements,
782
795
  singleCellChainTsvsData: singleCellChainTsvs
783
796
  })
784
797
 
@@ -3,9 +3,13 @@
3
3
 
4
4
  ll := import("@platforma-sdk/workflow-tengo:ll")
5
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
6
+ text := import("text")
6
7
 
7
8
  // QC Report column specifications function
8
- getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
9
+ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
10
+ if is_undefined(umiTags) {
11
+ umiTags = []
12
+ }
9
13
  // Bulk sequencing columns
10
14
  baseColumns := [
11
15
  {
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
656
660
  }
657
661
  }
658
662
 
659
- dataWithUmiColumns := [ {
660
- column: "refineTags.UMI.outputCount",
661
- id: "refine-tags-umi-output-count",
662
- allowNA: true,
663
- naRegex: "NaN",
664
- spec: {
665
- name: "mixcr.com/reports/refineTags/UMI/outputCount",
666
- valueType: "Long",
667
- annotations: {
668
- "pl7.app/min": "0",
669
- "pl7.app/table/orderPriority": "85000",
670
- "pl7.app/table/visibility": "optional",
671
- "pl7.app/label": "Refine Tags UMI - Output Count"
663
+ dataWithUmiColumns := []
664
+ for idx, umiTag in umiTags {
665
+ orderBase := 85000 + idx * 10
666
+ orderBasePercents := 85100 + idx * 10
667
+ orderDiversity := 85200 + idx * 10
668
+ orderDiversityPercents := 85300 + idx * 10
669
+ tagL := text.to_lower(umiTag)
670
+ dataWithUmiColumns = dataWithUmiColumns + [{
671
+ column: "refineTags." + umiTag + ".outputCount",
672
+ id: "refine-tags-" + tagL + "-output-count",
673
+ allowNA: true,
674
+ naRegex: "NaN",
675
+ spec: {
676
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
677
+ valueType: "Long",
678
+ annotations: {
679
+ "pl7.app/min": "0",
680
+ "pl7.app/table/orderPriority": string(orderBase),
681
+ "pl7.app/table/visibility": "optional",
682
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
683
+ }
672
684
  }
673
- }
674
- },
675
- {
676
- column: "refineTags.UMI.outputCountPercents",
677
- id: "refine-tags-umi-output-count-percents",
678
- allowNA: true,
679
- naRegex: "NaN",
680
- spec: {
681
- name: "mixcr.com/reports/refineTags/UMI/outputCountPercents",
682
- valueType: "Double",
683
- annotations: {
684
- "pl7.app/min": "0",
685
- "pl7.app/max": "100",
686
- "pl7.app/table/orderPriority": "85100",
687
- "pl7.app/table/visibility": "default",
688
- "pl7.app/label": "Refine Tags UMI - Output Count (%)"
685
+ },
686
+ {
687
+ column: "refineTags." + umiTag + ".outputCountPercents",
688
+ id: "refine-tags-" + tagL + "-output-count-percents",
689
+ allowNA: true,
690
+ naRegex: "NaN",
691
+ spec: {
692
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
693
+ valueType: "Double",
694
+ annotations: {
695
+ "pl7.app/min": "0",
696
+ "pl7.app/max": "100",
697
+ "pl7.app/table/orderPriority": string(orderBasePercents),
698
+ "pl7.app/table/visibility": "default",
699
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
700
+ }
689
701
  }
690
- }
691
- },
692
- {
693
- column: "refineTags.UMI.outputDiversity",
694
- id: "refine-tags-umi-output-diversity",
695
- allowNA: true,
696
- naRegex: "NaN",
697
- spec: {
698
- name: "mixcr.com/reports/refineTags/UMI/outputDiversity",
699
- valueType: "Long",
700
- annotations: {
701
- "pl7.app/min": "0",
702
- "pl7.app/table/orderPriority": "85200",
703
- "pl7.app/table/visibility": "optional",
704
- "pl7.app/label": "Refine Tags UMI - Output Diversity"
702
+ },
703
+ {
704
+ column: "refineTags." + umiTag + ".outputDiversity",
705
+ id: "refine-tags-" + tagL + "-output-diversity",
706
+ allowNA: true,
707
+ naRegex: "NaN",
708
+ spec: {
709
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
710
+ valueType: "Long",
711
+ annotations: {
712
+ "pl7.app/min": "0",
713
+ "pl7.app/table/orderPriority": string(orderDiversity),
714
+ "pl7.app/table/visibility": "optional",
715
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
716
+ }
705
717
  }
706
- }
707
- },
708
- {
709
- column: "refineTags.UMI.outputDiversityPercents",
710
- id: "refine-tags-umi-output-diversity-percents",
711
- allowNA: true,
712
- naRegex: "NaN",
713
- spec: {
714
- name: "mixcr.com/reports/refineTags/UMI/outputDiversityPercents",
715
- valueType: "Double",
716
- annotations: {
717
- "pl7.app/min": "0",
718
- "pl7.app/max": "100",
719
- "pl7.app/table/orderPriority": "85300",
720
- "pl7.app/table/visibility": "default",
721
- "pl7.app/label": "Refine Tags UMI - Output Diversity (%)"
718
+ },
719
+ {
720
+ column: "refineTags." + umiTag + ".outputDiversityPercents",
721
+ id: "refine-tags-" + tagL + "-output-diversity-percents",
722
+ allowNA: true,
723
+ naRegex: "NaN",
724
+ spec: {
725
+ name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
726
+ valueType: "Double",
727
+ annotations: {
728
+ "pl7.app/min": "0",
729
+ "pl7.app/max": "100",
730
+ "pl7.app/table/orderPriority": string(orderDiversityPercents),
731
+ "pl7.app/table/visibility": "default",
732
+ "pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
733
+ }
722
734
  }
723
- }
724
- },
725
- {
735
+ }]
736
+ }
737
+
738
+ dataWithUmiColumns = dataWithUmiColumns + [{
726
739
  column: "refineTags.numberOfGroupsAccepted",
727
740
  id: "refine-tags-number-of-groups-accepted",
728
741
  allowNA: true,
@@ -0,0 +1,179 @@
1
+ pt := import("@platforma-sdk/workflow-tengo:pt")
2
+ text := import("text")
3
+
4
+ applyStopCodonReplacementsPt := func(df, opts) {
5
+ if is_undefined(opts) {
6
+ return df
7
+ }
8
+ aminoAcidSeqColumns := opts.aminoAcidSeqColumns
9
+ aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
10
+ cdr3SeqColumns := opts.cdr3SeqColumns
11
+ stopCodonTypes := opts.stopCodonTypes
12
+ stopCodonReplacements := opts.stopCodonReplacements
13
+ allowedNtColumns := opts.allowedNtColumns
14
+
15
+ if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
16
+ return df
17
+ }
18
+ if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
19
+ return df
20
+ }
21
+ if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
22
+ stopCodonReplacements = undefined
23
+ }
24
+ if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
25
+ allowedNtColumns = undefined
26
+ }
27
+
28
+ contains := func(arr, value) {
29
+ for v in arr {
30
+ if v == value { return true }
31
+ }
32
+ return false
33
+ }
34
+
35
+ stopReplacement := func(stopType) {
36
+ if !contains(stopCodonTypes, stopType) {
37
+ return "*"
38
+ }
39
+ if is_undefined(stopCodonReplacements) {
40
+ return "*"
41
+ }
42
+ aa := stopCodonReplacements[stopType]
43
+ if is_undefined(aa) || aa == "" {
44
+ return "*"
45
+ }
46
+ return text.to_upper(aa)
47
+ }
48
+
49
+ codonMapBase := {
50
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
51
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
52
+ "TAT": "Y", "TAC": "Y", "TAA": "*",
53
+ "TAG": "*", "TGT": "C", "TGC": "C",
54
+ "TGA": "*", "TGG": "W",
55
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
56
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
57
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
58
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
59
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
60
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
61
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
62
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
63
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
64
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
65
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
66
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
67
+ }
68
+
69
+ codonMapReplace := {
70
+ "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
71
+ "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
72
+ "TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
73
+ "TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
74
+ "TGA": stopReplacement("opal"), "TGG": "W",
75
+ "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
76
+ "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
77
+ "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
78
+ "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
79
+ "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
80
+ "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
81
+ "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
82
+ "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
83
+ "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
84
+ "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
85
+ "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
86
+ "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
87
+ }
88
+
89
+ translateNtToAaExpr := func(ntExpr, codonMap) {
90
+ seq := ntExpr.fillNull("").strToUpper()
91
+ seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
92
+ for codon, aa in codonMap {
93
+ seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
94
+ }
95
+ seq = seq.strReplace("\\|$", "", { replaceAll: false })
96
+ seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
97
+ seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
98
+ return seq
99
+ }
100
+
101
+ pairs := []
102
+ if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
103
+ for p in aminoAcidSeqColumnPairs {
104
+ if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
105
+ if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
106
+ continue
107
+ }
108
+ pairs = append(pairs, p)
109
+ }
110
+ }
111
+ }
112
+ if len(pairs) == 0 {
113
+ for aaCol in aminoAcidSeqColumns {
114
+ ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
115
+ if text.has_suffix(ntCol, "InFrame") {
116
+ ntCol = text.replace(ntCol, "InFrame", "", 1)
117
+ }
118
+ if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
119
+ continue
120
+ }
121
+ pairs = append(pairs, { aa: aaCol, nt: ntCol })
122
+ }
123
+ }
124
+
125
+ expressions := []
126
+ replacedAnyExprs := []
127
+ replacedColsExprs := []
128
+ aaColumnsUsed := []
129
+ for pair in pairs {
130
+ aaCol := pair.aa
131
+ ntCol := pair.nt
132
+ translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
133
+ translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
134
+ expressions = append(expressions, translatedReplaced.alias(aaCol))
135
+ cond := translatedReplaced.neq(translatedBase)
136
+ replacedAnyExprs = append(replacedAnyExprs, cond)
137
+ replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
138
+ aaColumnsUsed = append(aaColumnsUsed, aaCol)
139
+ }
140
+ if len(expressions) > 0 {
141
+ df = df.withColumns(expressions...)
142
+ }
143
+
144
+ if len(replacedAnyExprs) > 0 {
145
+ colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
146
+ colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
147
+ df = df.withColumns(
148
+ pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
149
+ colsList.alias("stopCodonReplacedColumns")
150
+ )
151
+ }
152
+
153
+ stopChecks := []
154
+ for colName in aaColumnsUsed {
155
+ stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
156
+ }
157
+ if len(stopChecks) > 0 {
158
+ df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
159
+ }
160
+
161
+ if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
162
+ regionChecks := []
163
+ for colName in cdr3SeqColumns {
164
+ if !contains(aaColumnsUsed, colName) {
165
+ continue
166
+ }
167
+ regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
168
+ }
169
+ if len(regionChecks) > 0 {
170
+ df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
171
+ }
172
+ }
173
+
174
+ return df
175
+ }
176
+
177
+ export {
178
+ applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
179
+ }