@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.21.0 → 3.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -2
- package/CHANGELOG.md +12 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +18 -0
- package/dist/tengo/lib/qc-report-columns.lib.tengo +77 -64
- package/dist/tengo/lib/stop-codon-replacement.lib.tengo +179 -0
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
- package/dist/tengo/tpl/export-report.plj.gz +0 -0
- package/dist/tengo/tpl/list-presets.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
- package/package.json +5 -5
- package/src/calculate-export-specs.lib.tengo +18 -0
- package/src/export-report.tpl.tengo +91 -7
- package/src/main.tpl.tengo +3 -1
- package/src/mixcr-export.tpl.tengo +39 -1
- package/src/process.tpl.tengo +14 -1
- package/src/qc-report-columns.lib.tengo +77 -64
- package/src/stop-codon-replacement.lib.tengo +179 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.
|
|
4
|
-
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
|
|
4
|
+
> shx rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
info: Skipping unknown file type: test/columns.test.ts
|
|
7
7
|
Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
|
|
@@ -17,6 +17,7 @@ Processing "src/prerun.tpl.tengo"...
|
|
|
17
17
|
Processing "src/process-single-cell.tpl.tengo"...
|
|
18
18
|
Processing "src/process.tpl.tengo"...
|
|
19
19
|
Processing "src/qc-report-columns.lib.tengo"...
|
|
20
|
+
Processing "src/stop-codon-replacement.lib.tengo"...
|
|
20
21
|
Processing "src/test/columns-calculate.tpl.tengo"...
|
|
21
22
|
Processing "src/test/columns.test.tpl.tengo"...
|
|
22
23
|
No syntax errors found.
|
|
@@ -25,6 +26,7 @@ No syntax errors found.
|
|
|
25
26
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/calculate-export-specs.lib.tengo
|
|
26
27
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/clonotype-label.lib.tengo
|
|
27
28
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/qc-report-columns.lib.tengo
|
|
29
|
+
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/stop-codon-replacement.lib.tengo
|
|
28
30
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz
|
|
29
31
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/calculate-preset-info.plj.gz
|
|
30
32
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/export-report.plj.gz
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-clonotyping.workflow
|
|
2
2
|
|
|
3
|
+
## 3.23.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 3c8ed71: stop codon replacement and dep updates
|
|
8
|
+
|
|
9
|
+
## 3.22.0
|
|
10
|
+
|
|
11
|
+
### Minor Changes
|
|
12
|
+
|
|
13
|
+
- 22562b1: qc table multiple UMI support
|
|
14
|
+
|
|
3
15
|
## 3.21.0
|
|
4
16
|
|
|
5
17
|
### Minor Changes
|
|
@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
442
442
|
}
|
|
443
443
|
|
|
444
444
|
orderP := 80000
|
|
445
|
+
aminoAcidSeqColumns := []
|
|
446
|
+
aminoAcidSeqColumnPairs := []
|
|
447
|
+
cdr3SeqColumns := []
|
|
445
448
|
|
|
446
449
|
|
|
447
450
|
|
|
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
466
469
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
467
470
|
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
468
471
|
visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
|
|
472
|
+
if featureU == "CDR3" {
|
|
473
|
+
cdr3SeqColumns += [ columnName ]
|
|
474
|
+
}
|
|
475
|
+
if isAminoAcid {
|
|
476
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
477
|
+
aminoAcidSeqColumnPairs += [ {
|
|
478
|
+
aa: columnName,
|
|
479
|
+
nt: "nSeq" + imputedU + featureU
|
|
480
|
+
} ]
|
|
481
|
+
}
|
|
469
482
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
470
483
|
column: columnName,
|
|
471
484
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
973
986
|
}
|
|
974
987
|
|
|
975
988
|
return {
|
|
989
|
+
productiveFeature: productiveFeature,
|
|
976
990
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
977
991
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
978
992
|
|
|
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
981
995
|
axisByClonotypeKeyGen: axisByClonotypeKeyGen,
|
|
982
996
|
axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
|
|
983
997
|
|
|
998
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
999
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
1000
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
1001
|
+
|
|
984
1002
|
columnsSpecPerSample: columnsSpecPerSample,
|
|
985
1003
|
columnsSpecPerSampleSc: columnsSpecPerSampleSc,
|
|
986
1004
|
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
@@ -3,9 +3,13 @@
|
|
|
3
3
|
|
|
4
4
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
5
5
|
pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
6
|
+
text := import("text")
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
|
|
9
|
+
getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
|
|
10
|
+
if is_undefined(umiTags) {
|
|
11
|
+
umiTags = []
|
|
12
|
+
}
|
|
9
13
|
|
|
10
14
|
baseColumns := [
|
|
11
15
|
{
|
|
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
|
|
|
656
660
|
}
|
|
657
661
|
}
|
|
658
662
|
|
|
659
|
-
dataWithUmiColumns := [
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
663
|
+
dataWithUmiColumns := []
|
|
664
|
+
for idx, umiTag in umiTags {
|
|
665
|
+
orderBase := 85000 + idx * 10
|
|
666
|
+
orderBasePercents := 85100 + idx * 10
|
|
667
|
+
orderDiversity := 85200 + idx * 10
|
|
668
|
+
orderDiversityPercents := 85300 + idx * 10
|
|
669
|
+
tagL := text.to_lower(umiTag)
|
|
670
|
+
dataWithUmiColumns = dataWithUmiColumns + [{
|
|
671
|
+
column: "refineTags." + umiTag + ".outputCount",
|
|
672
|
+
id: "refine-tags-" + tagL + "-output-count",
|
|
673
|
+
allowNA: true,
|
|
674
|
+
naRegex: "NaN",
|
|
675
|
+
spec: {
|
|
676
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
|
|
677
|
+
valueType: "Long",
|
|
678
|
+
annotations: {
|
|
679
|
+
"pl7.app/min": "0",
|
|
680
|
+
"pl7.app/table/orderPriority": string(orderBase),
|
|
681
|
+
"pl7.app/table/visibility": "optional",
|
|
682
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
|
|
683
|
+
}
|
|
672
684
|
}
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
685
|
+
},
|
|
686
|
+
{
|
|
687
|
+
column: "refineTags." + umiTag + ".outputCountPercents",
|
|
688
|
+
id: "refine-tags-" + tagL + "-output-count-percents",
|
|
689
|
+
allowNA: true,
|
|
690
|
+
naRegex: "NaN",
|
|
691
|
+
spec: {
|
|
692
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
|
|
693
|
+
valueType: "Double",
|
|
694
|
+
annotations: {
|
|
695
|
+
"pl7.app/min": "0",
|
|
696
|
+
"pl7.app/max": "100",
|
|
697
|
+
"pl7.app/table/orderPriority": string(orderBasePercents),
|
|
698
|
+
"pl7.app/table/visibility": "default",
|
|
699
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
|
|
700
|
+
}
|
|
689
701
|
}
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
702
|
+
},
|
|
703
|
+
{
|
|
704
|
+
column: "refineTags." + umiTag + ".outputDiversity",
|
|
705
|
+
id: "refine-tags-" + tagL + "-output-diversity",
|
|
706
|
+
allowNA: true,
|
|
707
|
+
naRegex: "NaN",
|
|
708
|
+
spec: {
|
|
709
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
|
|
710
|
+
valueType: "Long",
|
|
711
|
+
annotations: {
|
|
712
|
+
"pl7.app/min": "0",
|
|
713
|
+
"pl7.app/table/orderPriority": string(orderDiversity),
|
|
714
|
+
"pl7.app/table/visibility": "optional",
|
|
715
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
|
|
716
|
+
}
|
|
705
717
|
}
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
718
|
+
},
|
|
719
|
+
{
|
|
720
|
+
column: "refineTags." + umiTag + ".outputDiversityPercents",
|
|
721
|
+
id: "refine-tags-" + tagL + "-output-diversity-percents",
|
|
722
|
+
allowNA: true,
|
|
723
|
+
naRegex: "NaN",
|
|
724
|
+
spec: {
|
|
725
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
|
|
726
|
+
valueType: "Double",
|
|
727
|
+
annotations: {
|
|
728
|
+
"pl7.app/min": "0",
|
|
729
|
+
"pl7.app/max": "100",
|
|
730
|
+
"pl7.app/table/orderPriority": string(orderDiversityPercents),
|
|
731
|
+
"pl7.app/table/visibility": "default",
|
|
732
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
|
|
733
|
+
}
|
|
722
734
|
}
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
|
|
735
|
+
}]
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
dataWithUmiColumns = dataWithUmiColumns + [{
|
|
726
739
|
column: "refineTags.numberOfGroupsAccepted",
|
|
727
740
|
id: "refine-tags-number-of-groups-accepted",
|
|
728
741
|
allowNA: true,
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
2
|
+
text := import("text")
|
|
3
|
+
|
|
4
|
+
applyStopCodonReplacementsPt := func(df, opts) {
|
|
5
|
+
if is_undefined(opts) {
|
|
6
|
+
return df
|
|
7
|
+
}
|
|
8
|
+
aminoAcidSeqColumns := opts.aminoAcidSeqColumns
|
|
9
|
+
aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
|
|
10
|
+
cdr3SeqColumns := opts.cdr3SeqColumns
|
|
11
|
+
stopCodonTypes := opts.stopCodonTypes
|
|
12
|
+
stopCodonReplacements := opts.stopCodonReplacements
|
|
13
|
+
allowedNtColumns := opts.allowedNtColumns
|
|
14
|
+
|
|
15
|
+
if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
|
|
16
|
+
return df
|
|
17
|
+
}
|
|
18
|
+
if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
|
|
19
|
+
return df
|
|
20
|
+
}
|
|
21
|
+
if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
|
|
22
|
+
stopCodonReplacements = undefined
|
|
23
|
+
}
|
|
24
|
+
if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
|
|
25
|
+
allowedNtColumns = undefined
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
contains := func(arr, value) {
|
|
29
|
+
for v in arr {
|
|
30
|
+
if v == value { return true }
|
|
31
|
+
}
|
|
32
|
+
return false
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
stopReplacement := func(stopType) {
|
|
36
|
+
if !contains(stopCodonTypes, stopType) {
|
|
37
|
+
return "*"
|
|
38
|
+
}
|
|
39
|
+
if is_undefined(stopCodonReplacements) {
|
|
40
|
+
return "*"
|
|
41
|
+
}
|
|
42
|
+
aa := stopCodonReplacements[stopType]
|
|
43
|
+
if is_undefined(aa) || aa == "" {
|
|
44
|
+
return "*"
|
|
45
|
+
}
|
|
46
|
+
return text.to_upper(aa)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
codonMapBase := {
|
|
50
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
51
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
52
|
+
"TAT": "Y", "TAC": "Y", "TAA": "*",
|
|
53
|
+
"TAG": "*", "TGT": "C", "TGC": "C",
|
|
54
|
+
"TGA": "*", "TGG": "W",
|
|
55
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
56
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
57
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
58
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
59
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
60
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
61
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
62
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
63
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
64
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
65
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
66
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
codonMapReplace := {
|
|
70
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
71
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
72
|
+
"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
|
|
73
|
+
"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
|
|
74
|
+
"TGA": stopReplacement("opal"), "TGG": "W",
|
|
75
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
76
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
77
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
78
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
79
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
80
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
81
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
82
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
83
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
84
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
85
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
86
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
translateNtToAaExpr := func(ntExpr, codonMap) {
|
|
90
|
+
seq := ntExpr.fillNull("").strToUpper()
|
|
91
|
+
seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
|
|
92
|
+
for codon, aa in codonMap {
|
|
93
|
+
seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
|
|
94
|
+
}
|
|
95
|
+
seq = seq.strReplace("\\|$", "", { replaceAll: false })
|
|
96
|
+
seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
|
|
97
|
+
seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
|
|
98
|
+
return seq
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
pairs := []
|
|
102
|
+
if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
|
|
103
|
+
for p in aminoAcidSeqColumnPairs {
|
|
104
|
+
if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
|
|
105
|
+
if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
|
|
106
|
+
continue
|
|
107
|
+
}
|
|
108
|
+
pairs = append(pairs, p)
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if len(pairs) == 0 {
|
|
113
|
+
for aaCol in aminoAcidSeqColumns {
|
|
114
|
+
ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
|
|
115
|
+
if text.has_suffix(ntCol, "InFrame") {
|
|
116
|
+
ntCol = text.replace(ntCol, "InFrame", "", 1)
|
|
117
|
+
}
|
|
118
|
+
if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
|
|
119
|
+
continue
|
|
120
|
+
}
|
|
121
|
+
pairs = append(pairs, { aa: aaCol, nt: ntCol })
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
expressions := []
|
|
126
|
+
replacedAnyExprs := []
|
|
127
|
+
replacedColsExprs := []
|
|
128
|
+
aaColumnsUsed := []
|
|
129
|
+
for pair in pairs {
|
|
130
|
+
aaCol := pair.aa
|
|
131
|
+
ntCol := pair.nt
|
|
132
|
+
translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
|
|
133
|
+
translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
|
|
134
|
+
expressions = append(expressions, translatedReplaced.alias(aaCol))
|
|
135
|
+
cond := translatedReplaced.neq(translatedBase)
|
|
136
|
+
replacedAnyExprs = append(replacedAnyExprs, cond)
|
|
137
|
+
replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
|
|
138
|
+
aaColumnsUsed = append(aaColumnsUsed, aaCol)
|
|
139
|
+
}
|
|
140
|
+
if len(expressions) > 0 {
|
|
141
|
+
df = df.withColumns(expressions...)
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if len(replacedAnyExprs) > 0 {
|
|
145
|
+
colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
|
|
146
|
+
colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
|
|
147
|
+
df = df.withColumns(
|
|
148
|
+
pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
|
|
149
|
+
colsList.alias("stopCodonReplacedColumns")
|
|
150
|
+
)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
stopChecks := []
|
|
154
|
+
for colName in aaColumnsUsed {
|
|
155
|
+
stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
|
|
156
|
+
}
|
|
157
|
+
if len(stopChecks) > 0 {
|
|
158
|
+
df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
|
|
162
|
+
regionChecks := []
|
|
163
|
+
for colName in cdr3SeqColumns {
|
|
164
|
+
if !contains(aaColumnsUsed, colName) {
|
|
165
|
+
continue
|
|
166
|
+
}
|
|
167
|
+
regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
|
|
168
|
+
}
|
|
169
|
+
if len(regionChecks) > 0 {
|
|
170
|
+
df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return df
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export {
|
|
178
|
+
applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
|
|
179
|
+
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.23.0",
|
|
4
4
|
"description": "Tengo-based template",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@platforma-sdk/workflow-tengo": "5.8.
|
|
6
|
+
"@platforma-sdk/workflow-tengo": "5.8.1",
|
|
7
7
|
"@platforma-open/milaboratories.software-mixcr": "4.7.0-254-develop"
|
|
8
8
|
},
|
|
9
9
|
"devDependencies": {
|
|
10
|
-
"@platforma-sdk/tengo-builder": "2.4.
|
|
10
|
+
"@platforma-sdk/tengo-builder": "2.4.12"
|
|
11
11
|
},
|
|
12
12
|
"scripts": {
|
|
13
|
-
"build": "rm -rf dist && pl-tengo check && pl-tengo build",
|
|
13
|
+
"build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
|
|
14
14
|
"format": "/usr/bin/env emacs --script ./format.el",
|
|
15
|
-
"do-pack": "rm -f *.tgz && pnpm pack && mv *.tgz package.tgz"
|
|
15
|
+
"do-pack": "shx rm -f *.tgz && pnpm pack && shx mv *.tgz package.tgz"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -442,6 +442,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
442
442
|
}
|
|
443
443
|
|
|
444
444
|
orderP := 80000
|
|
445
|
+
aminoAcidSeqColumns := []
|
|
446
|
+
aminoAcidSeqColumnPairs := []
|
|
447
|
+
cdr3SeqColumns := []
|
|
445
448
|
|
|
446
449
|
// Sequences
|
|
447
450
|
|
|
@@ -466,6 +469,16 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
466
469
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
467
470
|
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
468
471
|
visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
|
|
472
|
+
if featureU == "CDR3" {
|
|
473
|
+
cdr3SeqColumns += [ columnName ]
|
|
474
|
+
}
|
|
475
|
+
if isAminoAcid {
|
|
476
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
477
|
+
aminoAcidSeqColumnPairs += [ {
|
|
478
|
+
aa: columnName,
|
|
479
|
+
nt: "nSeq" + imputedU + featureU
|
|
480
|
+
} ]
|
|
481
|
+
}
|
|
469
482
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
470
483
|
column: columnName,
|
|
471
484
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
@@ -973,6 +986,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
973
986
|
}
|
|
974
987
|
|
|
975
988
|
return {
|
|
989
|
+
productiveFeature: productiveFeature,
|
|
976
990
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
977
991
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
978
992
|
|
|
@@ -981,6 +995,10 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
981
995
|
axisByClonotypeKeyGen: axisByClonotypeKeyGen,
|
|
982
996
|
axisByScClonotypeKeyGen: axisByScClonotypeKeyGen,
|
|
983
997
|
|
|
998
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
999
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
1000
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
1001
|
+
|
|
984
1002
|
columnsSpecPerSample: columnsSpecPerSample,
|
|
985
1003
|
columnsSpecPerSampleSc: columnsSpecPerSampleSc,
|
|
986
1004
|
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
@@ -29,16 +29,80 @@ self.body(func(inputs) {
|
|
|
29
29
|
library := inputs.library
|
|
30
30
|
isLibraryFileGzipped := inputs.isLibraryFileGzipped
|
|
31
31
|
clonotypeTablesData := inputs.clonotypeTablesData
|
|
32
|
+
stopCodonTypes := inputs.stopCodonTypes
|
|
33
|
+
stopCodonReplacements := inputs.stopCodonReplacements
|
|
32
34
|
|
|
33
35
|
isSingleCell := len(presetSpecForBack.cellTags) > 0
|
|
34
|
-
|
|
36
|
+
umiTags := presetSpecForBack.umiTags
|
|
37
|
+
hasUmi := !is_undefined(umiTags) && len(umiTags) > 0
|
|
35
38
|
cellTags := presetSpecForBack.cellTags
|
|
36
39
|
singleCellChainTsvsData := inputs.singleCellChainTsvsData
|
|
40
|
+
useStopCodonReplacement := !is_undefined(stopCodonTypes) && is_array(stopCodonTypes) && len(stopCodonTypes) > 0
|
|
41
|
+
if is_undefined(stopCodonReplacements) || !is_map(stopCodonReplacements) {
|
|
42
|
+
stopCodonReplacements = {}
|
|
43
|
+
}
|
|
37
44
|
|
|
38
|
-
featureForFlags :=
|
|
45
|
+
featureForFlags := inputs.productiveFeature
|
|
46
|
+
if is_undefined(featureForFlags) || featureForFlags == "" {
|
|
47
|
+
featureForFlags = "CDR3"
|
|
48
|
+
}
|
|
39
49
|
isOOFColumn := "isOOF" + featureForFlags
|
|
40
50
|
hasStopsColumn := "hasStopsIn" + featureForFlags
|
|
41
51
|
|
|
52
|
+
contains := func(arr, value) {
|
|
53
|
+
for v in arr {
|
|
54
|
+
if v == value { return true }
|
|
55
|
+
}
|
|
56
|
+
return false
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
stopReplacement := func(stopType) {
|
|
60
|
+
if !contains(stopCodonTypes, stopType) {
|
|
61
|
+
return "*"
|
|
62
|
+
}
|
|
63
|
+
aa := stopCodonReplacements[stopType]
|
|
64
|
+
if is_undefined(aa) || aa == "" {
|
|
65
|
+
return "*"
|
|
66
|
+
}
|
|
67
|
+
return text.to_upper(aa)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
ll.print("__THE_LOG__", stopReplacement("ochre"))
|
|
71
|
+
ll.print("__THE_LOG__", stopReplacement("amber"))
|
|
72
|
+
ll.print("__THE_LOG__", stopReplacement("opal"))
|
|
73
|
+
|
|
74
|
+
codonMapReplace := {
|
|
75
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
76
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
77
|
+
"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
|
|
78
|
+
"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
|
|
79
|
+
"TGA": stopReplacement("opal"), "TGG": "W",
|
|
80
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
81
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
82
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
83
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
84
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
85
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
86
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
87
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
88
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
89
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
90
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
91
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
translateNtToAaExpr := func(ntExpr, codonMap) {
|
|
95
|
+
seq := ntExpr.fillNull("").strToUpper()
|
|
96
|
+
seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
|
|
97
|
+
for codon, aa in codonMap {
|
|
98
|
+
seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
|
|
99
|
+
}
|
|
100
|
+
seq = seq.strReplace("\\|$", "", { replaceAll: false })
|
|
101
|
+
seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
|
|
102
|
+
seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
|
|
103
|
+
return seq
|
|
104
|
+
}
|
|
105
|
+
|
|
42
106
|
chainInfos := {
|
|
43
107
|
"IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
|
|
44
108
|
"IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light", shortName: "Light" },
|
|
@@ -152,6 +216,7 @@ self.body(func(inputs) {
|
|
|
152
216
|
exportFiltersCmd = exportFiltersCmd.
|
|
153
217
|
arg("-isOOF").arg(featureForFlags).
|
|
154
218
|
arg("-hasStops").arg(featureForFlags).
|
|
219
|
+
arg("-nFeature").arg(featureForFlags).
|
|
155
220
|
arg("clones.clns").
|
|
156
221
|
addFile("clones.clns", clnsFile).
|
|
157
222
|
arg("clones.tsv").
|
|
@@ -165,10 +230,19 @@ self.body(func(inputs) {
|
|
|
165
230
|
}
|
|
166
231
|
exportFiltersResult := exportFiltersCmd.cacheHours(3).run()
|
|
167
232
|
filterTsv := exportFiltersResult.getFile("clones.tsv")
|
|
168
|
-
|
|
233
|
+
schema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
|
|
234
|
+
if useStopCodonReplacement {
|
|
235
|
+
schema = append(schema, { column: "nSeq" + featureForFlags, type: "String" })
|
|
236
|
+
}
|
|
237
|
+
dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: schema })
|
|
238
|
+
stopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
239
|
+
if useStopCodonReplacement {
|
|
240
|
+
translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
|
|
241
|
+
stopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
242
|
+
}
|
|
169
243
|
dfFilters = dfFilters.withColumns(
|
|
170
244
|
pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
|
|
171
|
-
|
|
245
|
+
stopExpr.alias("__stop")
|
|
172
246
|
)
|
|
173
247
|
dfFilterCount := dfFilters.select(
|
|
174
248
|
pt.lit(sampleId).alias("sampleId"),
|
|
@@ -210,6 +284,7 @@ self.body(func(inputs) {
|
|
|
210
284
|
exportChainFiltersResult := exportChainFiltersCmd.
|
|
211
285
|
arg("-isOOF").arg(featureForFlags).
|
|
212
286
|
arg("-hasStops").arg(featureForFlags).
|
|
287
|
+
arg("-nFeature").arg(featureForFlags).
|
|
213
288
|
arg("clones.clns").
|
|
214
289
|
addFile("clones.clns", clnsFile).
|
|
215
290
|
arg("clones.tsv").
|
|
@@ -217,10 +292,19 @@ self.body(func(inputs) {
|
|
|
217
292
|
cacheHours(3).
|
|
218
293
|
run()
|
|
219
294
|
chainFilterTsv := exportChainFiltersResult.getFile("clones.tsv")
|
|
220
|
-
|
|
295
|
+
chainSchema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
|
|
296
|
+
if useStopCodonReplacement {
|
|
297
|
+
chainSchema = append(chainSchema, { column: "nSeq" + featureForFlags, type: "String" })
|
|
298
|
+
}
|
|
299
|
+
dfChainFilters := wf.frame(chainFilterTsv, { xsvType: "tsv", inferSchema: false, schema: chainSchema })
|
|
300
|
+
chainStopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
301
|
+
if useStopCodonReplacement {
|
|
302
|
+
translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
|
|
303
|
+
chainStopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
304
|
+
}
|
|
221
305
|
dfChainFilters = dfChainFilters.withColumns(
|
|
222
306
|
pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
|
|
223
|
-
|
|
307
|
+
chainStopExpr.alias("__stop")
|
|
224
308
|
)
|
|
225
309
|
dfChainCount := dfChainFilters.select(
|
|
226
310
|
pt.lit(sampleId).alias("sampleId"),
|
|
@@ -409,7 +493,7 @@ self.body(func(inputs) {
|
|
|
409
493
|
|
|
410
494
|
tsvFile := wfResult.getFile("qc-report-processed.tsv")
|
|
411
495
|
|
|
412
|
-
qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags)
|
|
496
|
+
qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags)
|
|
413
497
|
reportColumnsSpec := qcReportColumns.reportColumnsSpec
|
|
414
498
|
|
|
415
499
|
qcReportTable := xsv.importFile(
|
package/src/main.tpl.tengo
CHANGED
|
@@ -132,7 +132,9 @@ wf.body(func(args) {
|
|
|
132
132
|
materialType: args.materialType,
|
|
133
133
|
tagPattern: args.tagPattern,
|
|
134
134
|
assembleClonesBy: args.assembleClonesBy,
|
|
135
|
-
exportMinQuality: args.exportMinQuality
|
|
135
|
+
exportMinQuality: args.exportMinQuality,
|
|
136
|
+
stopCodonTypes: args.stopCodonTypes,
|
|
137
|
+
stopCodonReplacements: args.stopCodonReplacements
|
|
136
138
|
})
|
|
137
139
|
})
|
|
138
140
|
|
|
@@ -9,8 +9,10 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
|
9
9
|
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
10
10
|
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
11
11
|
clonotypeLabel := import(":clonotype-label")
|
|
12
|
+
stopCodonReplacement := import(":stop-codon-replacement")
|
|
12
13
|
|
|
13
14
|
json := import("json")
|
|
15
|
+
text := import("text")
|
|
14
16
|
|
|
15
17
|
self.defineOutputs("tsv", "tsvForSingleCell")
|
|
16
18
|
|
|
@@ -24,6 +26,11 @@ self.body(func(inputs) {
|
|
|
24
26
|
isLibraryFileGzipped := params.isLibraryFileGzipped
|
|
25
27
|
chains := params.chains
|
|
26
28
|
exportArgs := params.exportArgs
|
|
29
|
+
stopCodonTypes := params.stopCodonTypes
|
|
30
|
+
stopCodonReplacements := params.stopCodonReplacements
|
|
31
|
+
aminoAcidSeqColumns := params.aminoAcidSeqColumns
|
|
32
|
+
aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
|
|
33
|
+
cdr3SeqColumns := params.cdr3SeqColumns
|
|
27
34
|
|
|
28
35
|
clonotypeKeyColumns := params.clonotypeKeyColumns
|
|
29
36
|
clonotypeKeyArgs := params.clonotypeKeyArgs
|
|
@@ -40,6 +47,8 @@ self.body(func(inputs) {
|
|
|
40
47
|
mainIsProductiveArgs := params.mainIsProductiveArgs
|
|
41
48
|
mainIsProductiveColumn := params.mainIsProductiveColumn
|
|
42
49
|
|
|
50
|
+
useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
|
|
51
|
+
|
|
43
52
|
exportMemGB := undefined
|
|
44
53
|
if !is_undefined(inputs.perProcessMemGB) {
|
|
45
54
|
exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
|
|
@@ -73,8 +82,12 @@ self.body(func(inputs) {
|
|
|
73
82
|
arg("--dont-split-files").
|
|
74
83
|
arg("--drop-default-fields").
|
|
75
84
|
arg("--reset-export-clone-table-splitting").
|
|
76
|
-
arg("--export-productive-clones-only").
|
|
77
85
|
arg("--chains").arg(chains)
|
|
86
|
+
if useProductiveFilter {
|
|
87
|
+
mixcrCmdBuilder = mixcrCmdBuilder.arg("--export-productive-clones-only")
|
|
88
|
+
} else {
|
|
89
|
+
mixcrCmdBuilder = mixcrCmdBuilder.arg("--filter-out-of-frames")
|
|
90
|
+
}
|
|
78
91
|
|
|
79
92
|
if library {
|
|
80
93
|
if isLibraryFileGzipped {
|
|
@@ -138,6 +151,13 @@ self.body(func(inputs) {
|
|
|
138
151
|
alias(mainIsProductiveColumn)
|
|
139
152
|
)
|
|
140
153
|
}
|
|
154
|
+
dfMain = stopCodonReplacement.applyStopCodonReplacementsPt(dfMain, {
|
|
155
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
156
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
157
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
158
|
+
stopCodonTypes: stopCodonTypes,
|
|
159
|
+
stopCodonReplacements: stopCodonReplacements
|
|
160
|
+
})
|
|
141
161
|
dfMain.addColumns(
|
|
142
162
|
hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
|
|
143
163
|
)
|
|
@@ -197,6 +217,24 @@ self.body(func(inputs) {
|
|
|
197
217
|
)
|
|
198
218
|
}
|
|
199
219
|
|
|
220
|
+
dfSingleCell = stopCodonReplacement.applyStopCodonReplacementsPt(dfSingleCell, {
|
|
221
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
222
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
223
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
224
|
+
stopCodonTypes: stopCodonTypes,
|
|
225
|
+
stopCodonReplacements: stopCodonReplacements,
|
|
226
|
+
allowedNtColumns: func() {
|
|
227
|
+
allowed := []
|
|
228
|
+
if !is_undefined(clonotypeKeyColumns) {
|
|
229
|
+
for col in clonotypeKeyColumns {
|
|
230
|
+
if text.has_prefix(col, "nSeq") {
|
|
231
|
+
allowed = append(allowed, col)
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return allowed
|
|
236
|
+
}()
|
|
237
|
+
})
|
|
200
238
|
dfSingleCell.addColumns(
|
|
201
239
|
hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey"),
|
|
202
240
|
hashCellKey ? hashKeyDerivationExpressionPt(cellTagColumns).alias("cellKey") : pt.col(cellTagColumns[0]).alias("cellKey")
|
package/src/process.tpl.tengo
CHANGED
|
@@ -210,6 +210,10 @@ self.body(func(inputs) {
|
|
|
210
210
|
columnsSpecPerClonotypeNoAggregates := exportSpecs.columnsSpecPerClonotypeNoAggregates
|
|
211
211
|
columnsSpecPerClonotypeAggregates := exportSpecs.columnsSpecPerClonotypeAggregates
|
|
212
212
|
columnsSpecPerClonotypeSc := exportSpecs.columnsSpecPerClonotypeSc
|
|
213
|
+
aminoAcidSeqColumns := exportSpecs.aminoAcidSeqColumns
|
|
214
|
+
aminoAcidSeqColumnPairs := exportSpecs.aminoAcidSeqColumnPairs
|
|
215
|
+
cdr3SeqColumns := exportSpecs.cdr3SeqColumns
|
|
216
|
+
productiveFeature := exportSpecs.productiveFeature
|
|
213
217
|
|
|
214
218
|
clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
|
|
215
219
|
clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
|
|
@@ -406,6 +410,10 @@ self.body(func(inputs) {
|
|
|
406
410
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
407
411
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
408
412
|
|
|
413
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
414
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
415
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
416
|
+
|
|
409
417
|
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
410
418
|
mainIsProductiveArgs: mainIsProductiveArgs,
|
|
411
419
|
mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
|
|
@@ -419,7 +427,9 @@ self.body(func(inputs) {
|
|
|
419
427
|
mainAbundanceColumnIsReadCount: (!is_undefined(cellTagColumns) && len(cellTagColumns) > 0 && mainAbundanceColumnUnnormalized == "readCount") ? true : undefined,
|
|
420
428
|
|
|
421
429
|
exportArgs: exportArgs,
|
|
422
|
-
isLibraryFileGzipped: isLibraryFileGzipped
|
|
430
|
+
isLibraryFileGzipped: isLibraryFileGzipped,
|
|
431
|
+
stopCodonTypes: params.stopCodonTypes,
|
|
432
|
+
stopCodonReplacements: params.stopCodonReplacements
|
|
423
433
|
}, { removeUndefs: true }),
|
|
424
434
|
library: library
|
|
425
435
|
},
|
|
@@ -779,6 +789,9 @@ self.body(func(inputs) {
|
|
|
779
789
|
library: library,
|
|
780
790
|
isLibraryFileGzipped: isLibraryFileGzipped,
|
|
781
791
|
clonotypeTablesData: clonotypeTablesData,
|
|
792
|
+
productiveFeature: productiveFeature,
|
|
793
|
+
stopCodonTypes: params.stopCodonTypes,
|
|
794
|
+
stopCodonReplacements: params.stopCodonReplacements,
|
|
782
795
|
singleCellChainTsvsData: singleCellChainTsvs
|
|
783
796
|
})
|
|
784
797
|
|
|
@@ -3,9 +3,13 @@
|
|
|
3
3
|
|
|
4
4
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
5
5
|
pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
6
|
+
text := import("text")
|
|
6
7
|
|
|
7
8
|
// QC Report column specifications function
|
|
8
|
-
getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags) {
|
|
9
|
+
getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags, umiTags) {
|
|
10
|
+
if is_undefined(umiTags) {
|
|
11
|
+
umiTags = []
|
|
12
|
+
}
|
|
9
13
|
// Bulk sequencing columns
|
|
10
14
|
baseColumns := [
|
|
11
15
|
{
|
|
@@ -656,73 +660,82 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
|
|
|
656
660
|
}
|
|
657
661
|
}
|
|
658
662
|
|
|
659
|
-
dataWithUmiColumns := [
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
663
|
+
dataWithUmiColumns := []
|
|
664
|
+
for idx, umiTag in umiTags {
|
|
665
|
+
orderBase := 85000 + idx * 10
|
|
666
|
+
orderBasePercents := 85100 + idx * 10
|
|
667
|
+
orderDiversity := 85200 + idx * 10
|
|
668
|
+
orderDiversityPercents := 85300 + idx * 10
|
|
669
|
+
tagL := text.to_lower(umiTag)
|
|
670
|
+
dataWithUmiColumns = dataWithUmiColumns + [{
|
|
671
|
+
column: "refineTags." + umiTag + ".outputCount",
|
|
672
|
+
id: "refine-tags-" + tagL + "-output-count",
|
|
673
|
+
allowNA: true,
|
|
674
|
+
naRegex: "NaN",
|
|
675
|
+
spec: {
|
|
676
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCount",
|
|
677
|
+
valueType: "Long",
|
|
678
|
+
annotations: {
|
|
679
|
+
"pl7.app/min": "0",
|
|
680
|
+
"pl7.app/table/orderPriority": string(orderBase),
|
|
681
|
+
"pl7.app/table/visibility": "optional",
|
|
682
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Count"
|
|
683
|
+
}
|
|
672
684
|
}
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
685
|
+
},
|
|
686
|
+
{
|
|
687
|
+
column: "refineTags." + umiTag + ".outputCountPercents",
|
|
688
|
+
id: "refine-tags-" + tagL + "-output-count-percents",
|
|
689
|
+
allowNA: true,
|
|
690
|
+
naRegex: "NaN",
|
|
691
|
+
spec: {
|
|
692
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputCountPercents",
|
|
693
|
+
valueType: "Double",
|
|
694
|
+
annotations: {
|
|
695
|
+
"pl7.app/min": "0",
|
|
696
|
+
"pl7.app/max": "100",
|
|
697
|
+
"pl7.app/table/orderPriority": string(orderBasePercents),
|
|
698
|
+
"pl7.app/table/visibility": "default",
|
|
699
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Count (%)"
|
|
700
|
+
}
|
|
689
701
|
}
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
702
|
+
},
|
|
703
|
+
{
|
|
704
|
+
column: "refineTags." + umiTag + ".outputDiversity",
|
|
705
|
+
id: "refine-tags-" + tagL + "-output-diversity",
|
|
706
|
+
allowNA: true,
|
|
707
|
+
naRegex: "NaN",
|
|
708
|
+
spec: {
|
|
709
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversity",
|
|
710
|
+
valueType: "Long",
|
|
711
|
+
annotations: {
|
|
712
|
+
"pl7.app/min": "0",
|
|
713
|
+
"pl7.app/table/orderPriority": string(orderDiversity),
|
|
714
|
+
"pl7.app/table/visibility": "optional",
|
|
715
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity"
|
|
716
|
+
}
|
|
705
717
|
}
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
718
|
+
},
|
|
719
|
+
{
|
|
720
|
+
column: "refineTags." + umiTag + ".outputDiversityPercents",
|
|
721
|
+
id: "refine-tags-" + tagL + "-output-diversity-percents",
|
|
722
|
+
allowNA: true,
|
|
723
|
+
naRegex: "NaN",
|
|
724
|
+
spec: {
|
|
725
|
+
name: "mixcr.com/reports/refineTags/" + umiTag + "/outputDiversityPercents",
|
|
726
|
+
valueType: "Double",
|
|
727
|
+
annotations: {
|
|
728
|
+
"pl7.app/min": "0",
|
|
729
|
+
"pl7.app/max": "100",
|
|
730
|
+
"pl7.app/table/orderPriority": string(orderDiversityPercents),
|
|
731
|
+
"pl7.app/table/visibility": "default",
|
|
732
|
+
"pl7.app/label": "Refine Tags " + umiTag + " - Output Diversity (%)"
|
|
733
|
+
}
|
|
722
734
|
}
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
|
|
735
|
+
}]
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
dataWithUmiColumns = dataWithUmiColumns + [{
|
|
726
739
|
column: "refineTags.numberOfGroupsAccepted",
|
|
727
740
|
id: "refine-tags-number-of-groups-accepted",
|
|
728
741
|
allowNA: true,
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
2
|
+
text := import("text")
|
|
3
|
+
|
|
4
|
+
applyStopCodonReplacementsPt := func(df, opts) {
|
|
5
|
+
if is_undefined(opts) {
|
|
6
|
+
return df
|
|
7
|
+
}
|
|
8
|
+
aminoAcidSeqColumns := opts.aminoAcidSeqColumns
|
|
9
|
+
aminoAcidSeqColumnPairs := opts.aminoAcidSeqColumnPairs
|
|
10
|
+
cdr3SeqColumns := opts.cdr3SeqColumns
|
|
11
|
+
stopCodonTypes := opts.stopCodonTypes
|
|
12
|
+
stopCodonReplacements := opts.stopCodonReplacements
|
|
13
|
+
allowedNtColumns := opts.allowedNtColumns
|
|
14
|
+
|
|
15
|
+
if is_undefined(aminoAcidSeqColumns) || !is_array(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
|
|
16
|
+
return df
|
|
17
|
+
}
|
|
18
|
+
if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
|
|
19
|
+
return df
|
|
20
|
+
}
|
|
21
|
+
if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
|
|
22
|
+
stopCodonReplacements = undefined
|
|
23
|
+
}
|
|
24
|
+
if !is_undefined(allowedNtColumns) && !is_array(allowedNtColumns) {
|
|
25
|
+
allowedNtColumns = undefined
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
contains := func(arr, value) {
|
|
29
|
+
for v in arr {
|
|
30
|
+
if v == value { return true }
|
|
31
|
+
}
|
|
32
|
+
return false
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
stopReplacement := func(stopType) {
|
|
36
|
+
if !contains(stopCodonTypes, stopType) {
|
|
37
|
+
return "*"
|
|
38
|
+
}
|
|
39
|
+
if is_undefined(stopCodonReplacements) {
|
|
40
|
+
return "*"
|
|
41
|
+
}
|
|
42
|
+
aa := stopCodonReplacements[stopType]
|
|
43
|
+
if is_undefined(aa) || aa == "" {
|
|
44
|
+
return "*"
|
|
45
|
+
}
|
|
46
|
+
return text.to_upper(aa)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
codonMapBase := {
|
|
50
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
51
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
52
|
+
"TAT": "Y", "TAC": "Y", "TAA": "*",
|
|
53
|
+
"TAG": "*", "TGT": "C", "TGC": "C",
|
|
54
|
+
"TGA": "*", "TGG": "W",
|
|
55
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
56
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
57
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
58
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
59
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
60
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
61
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
62
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
63
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
64
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
65
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
66
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
codonMapReplace := {
|
|
70
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
71
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
72
|
+
"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
|
|
73
|
+
"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
|
|
74
|
+
"TGA": stopReplacement("opal"), "TGG": "W",
|
|
75
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
76
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
77
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
78
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
79
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
80
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
81
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
82
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
83
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
84
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
85
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
86
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
translateNtToAaExpr := func(ntExpr, codonMap) {
|
|
90
|
+
seq := ntExpr.fillNull("").strToUpper()
|
|
91
|
+
seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
|
|
92
|
+
for codon, aa in codonMap {
|
|
93
|
+
seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
|
|
94
|
+
}
|
|
95
|
+
seq = seq.strReplace("\\|$", "", { replaceAll: false })
|
|
96
|
+
seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
|
|
97
|
+
seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
|
|
98
|
+
return seq
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
pairs := []
|
|
102
|
+
if is_array(aminoAcidSeqColumnPairs) && len(aminoAcidSeqColumnPairs) > 0 {
|
|
103
|
+
for p in aminoAcidSeqColumnPairs {
|
|
104
|
+
if is_map(p) && !is_undefined(p.aa) && !is_undefined(p.nt) {
|
|
105
|
+
if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, p.nt) {
|
|
106
|
+
continue
|
|
107
|
+
}
|
|
108
|
+
pairs = append(pairs, p)
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if len(pairs) == 0 {
|
|
113
|
+
for aaCol in aminoAcidSeqColumns {
|
|
114
|
+
ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
|
|
115
|
+
if text.has_suffix(ntCol, "InFrame") {
|
|
116
|
+
ntCol = text.replace(ntCol, "InFrame", "", 1)
|
|
117
|
+
}
|
|
118
|
+
if !is_undefined(allowedNtColumns) && !contains(allowedNtColumns, ntCol) {
|
|
119
|
+
continue
|
|
120
|
+
}
|
|
121
|
+
pairs = append(pairs, { aa: aaCol, nt: ntCol })
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
expressions := []
|
|
126
|
+
replacedAnyExprs := []
|
|
127
|
+
replacedColsExprs := []
|
|
128
|
+
aaColumnsUsed := []
|
|
129
|
+
for pair in pairs {
|
|
130
|
+
aaCol := pair.aa
|
|
131
|
+
ntCol := pair.nt
|
|
132
|
+
translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
|
|
133
|
+
translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
|
|
134
|
+
expressions = append(expressions, translatedReplaced.alias(aaCol))
|
|
135
|
+
cond := translatedReplaced.neq(translatedBase)
|
|
136
|
+
replacedAnyExprs = append(replacedAnyExprs, cond)
|
|
137
|
+
replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
|
|
138
|
+
aaColumnsUsed = append(aaColumnsUsed, aaCol)
|
|
139
|
+
}
|
|
140
|
+
if len(expressions) > 0 {
|
|
141
|
+
df = df.withColumns(expressions...)
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if len(replacedAnyExprs) > 0 {
|
|
145
|
+
colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
|
|
146
|
+
colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
|
|
147
|
+
df = df.withColumns(
|
|
148
|
+
pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
|
|
149
|
+
colsList.alias("stopCodonReplacedColumns")
|
|
150
|
+
)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
stopChecks := []
|
|
154
|
+
for colName in aaColumnsUsed {
|
|
155
|
+
stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
|
|
156
|
+
}
|
|
157
|
+
if len(stopChecks) > 0 {
|
|
158
|
+
df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
|
|
162
|
+
regionChecks := []
|
|
163
|
+
for colName in cdr3SeqColumns {
|
|
164
|
+
if !contains(aaColumnsUsed, colName) {
|
|
165
|
+
continue
|
|
166
|
+
}
|
|
167
|
+
regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
|
|
168
|
+
}
|
|
169
|
+
if len(regionChecks) > 0 {
|
|
170
|
+
df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return df
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export {
|
|
178
|
+
applyStopCodonReplacementsPt: applyStopCodonReplacementsPt
|
|
179
|
+
}
|