@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.17.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +11 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +17 -0
- package/dist/tengo/lib/qc-report-columns.lib.tengo +32 -0
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/export-report.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
- package/package.json +4 -4
- package/src/calculate-export-specs.lib.tengo +17 -0
- package/src/export-report.tpl.tengo +144 -0
- package/src/main.tpl.tengo +3 -1
- package/src/mixcr-export.tpl.tengo +169 -2
- package/src/process.tpl.tengo +17 -5
- package/src/qc-report-columns.lib.tengo +32 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.18.0 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
|
|
2
2
|
|
|
3
|
+
## 1.18.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 656f2fe: stop codon replacement and dep updates
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [656f2fe]
|
|
12
|
+
- @platforma-open/milaboratories.mixcr-amplicon-alignment.software@1.1.0
|
|
13
|
+
|
|
3
14
|
## 1.17.0
|
|
4
15
|
|
|
5
16
|
### Minor Changes
|
|
@@ -298,6 +298,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
298
298
|
columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
|
|
299
299
|
|
|
300
300
|
orderP := 80000
|
|
301
|
+
aminoAcidSeqColumns := []
|
|
302
|
+
aminoAcidSeqColumnPairs := []
|
|
303
|
+
cdr3SeqColumns := []
|
|
301
304
|
|
|
302
305
|
|
|
303
306
|
|
|
@@ -325,6 +328,16 @@ inFrameFeatures := {
|
|
|
325
328
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
326
329
|
columnName := alphabetShortMixcr + "Seq" + featureInFrameU
|
|
327
330
|
visibility := featureU == "VDJRegion" || featureU == "CDR3"
|
|
331
|
+
if featureU == "CDR3" {
|
|
332
|
+
cdr3SeqColumns += [ columnName ]
|
|
333
|
+
}
|
|
334
|
+
if isAminoAcid {
|
|
335
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
336
|
+
aminoAcidSeqColumnPairs += [ {
|
|
337
|
+
aa: columnName,
|
|
338
|
+
nt: "nSeq" + featureU
|
|
339
|
+
} ]
|
|
340
|
+
}
|
|
328
341
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
329
342
|
column: columnName,
|
|
330
343
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL,
|
|
@@ -807,6 +820,7 @@ inFrameFeatures := {
|
|
|
807
820
|
} ]
|
|
808
821
|
|
|
809
822
|
return {
|
|
823
|
+
productiveFeature: productiveFeature,
|
|
810
824
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
811
825
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
812
826
|
|
|
@@ -816,6 +830,9 @@ inFrameFeatures := {
|
|
|
816
830
|
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
817
831
|
columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
|
|
818
832
|
cdr3DistanceColumnsSpec: cdr3DistanceColumnsSpec,
|
|
833
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
834
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
835
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
819
836
|
|
|
820
837
|
columnsSpec: columnsSpec,
|
|
821
838
|
|
|
@@ -571,6 +571,38 @@ getQcReportColumns := func(hasUmi, sampleIdAxisSpec, chains, umiTags) {
|
|
|
571
571
|
}
|
|
572
572
|
}
|
|
573
573
|
},
|
|
574
|
+
{
|
|
575
|
+
column: "assemble.clonotypesDroppedByStopCodons",
|
|
576
|
+
id: "assemble-clonotypes-dropped-by-stop-codons",
|
|
577
|
+
allowNA: true,
|
|
578
|
+
naRegex: "NaN",
|
|
579
|
+
spec: {
|
|
580
|
+
name: "mixcr.com/reports/assemble/clonotypesDroppedByStopCodons",
|
|
581
|
+
valueType: "Long",
|
|
582
|
+
annotations: {
|
|
583
|
+
"pl7.app/min": "0",
|
|
584
|
+
"pl7.app/table/orderPriority": "108200",
|
|
585
|
+
"pl7.app/table/visibility": "optional",
|
|
586
|
+
"pl7.app/label": "Clonotypes Dropped - Stop Codons"
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
column: "assemble.clonotypesDroppedByOutOfFrame",
|
|
592
|
+
id: "assemble-clonotypes-dropped-by-out-of-frame",
|
|
593
|
+
allowNA: true,
|
|
594
|
+
naRegex: "NaN",
|
|
595
|
+
spec: {
|
|
596
|
+
name: "mixcr.com/reports/assemble/clonotypesDroppedByOutOfFrame",
|
|
597
|
+
valueType: "Long",
|
|
598
|
+
annotations: {
|
|
599
|
+
"pl7.app/min": "0",
|
|
600
|
+
"pl7.app/table/orderPriority": "108100",
|
|
601
|
+
"pl7.app/table/visibility": "optional",
|
|
602
|
+
"pl7.app/label": "Clonotypes Dropped - Out of Frame"
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
},
|
|
574
606
|
{
|
|
575
607
|
column: "totalClonotypes",
|
|
576
608
|
id: "total-clonotypes",
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.18.0",
|
|
4
4
|
"description": "MiXCR Amplicon Alignment Workflow",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"dependencies": {
|
|
7
|
-
"@platforma-sdk/workflow-tengo": "5.8.
|
|
7
|
+
"@platforma-sdk/workflow-tengo": "5.8.1",
|
|
8
8
|
"@platforma-open/milaboratories.software-mixcr": "4.7.0-279-develop",
|
|
9
9
|
"@platforma-open/milaboratories.software-repseqio": "^2.5.0-13-master",
|
|
10
|
-
"@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.
|
|
10
|
+
"@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.1.0"
|
|
11
11
|
},
|
|
12
12
|
"devDependencies": {
|
|
13
|
-
"@platforma-sdk/tengo-builder": "2.4.
|
|
13
|
+
"@platforma-sdk/tengo-builder": "2.4.12"
|
|
14
14
|
},
|
|
15
15
|
"scripts": {
|
|
16
16
|
"build": "rm -rf dist && pl-tengo check && pl-tengo build",
|
|
@@ -298,6 +298,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
298
298
|
columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
|
|
299
299
|
|
|
300
300
|
orderP := 80000
|
|
301
|
+
aminoAcidSeqColumns := []
|
|
302
|
+
aminoAcidSeqColumnPairs := []
|
|
303
|
+
cdr3SeqColumns := []
|
|
301
304
|
|
|
302
305
|
// Sequences
|
|
303
306
|
|
|
@@ -325,6 +328,16 @@ inFrameFeatures := {
|
|
|
325
328
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
326
329
|
columnName := alphabetShortMixcr + "Seq" + featureInFrameU
|
|
327
330
|
visibility := featureU == "VDJRegion" || featureU == "CDR3"
|
|
331
|
+
if featureU == "CDR3" {
|
|
332
|
+
cdr3SeqColumns += [ columnName ]
|
|
333
|
+
}
|
|
334
|
+
if isAminoAcid {
|
|
335
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
336
|
+
aminoAcidSeqColumnPairs += [ {
|
|
337
|
+
aa: columnName,
|
|
338
|
+
nt: "nSeq" + featureU
|
|
339
|
+
} ]
|
|
340
|
+
}
|
|
328
341
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
329
342
|
column: columnName,
|
|
330
343
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL,
|
|
@@ -807,6 +820,7 @@ inFrameFeatures := {
|
|
|
807
820
|
} ]
|
|
808
821
|
|
|
809
822
|
return {
|
|
823
|
+
productiveFeature: productiveFeature,
|
|
810
824
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
811
825
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
812
826
|
|
|
@@ -816,6 +830,9 @@ inFrameFeatures := {
|
|
|
816
830
|
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
817
831
|
columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
|
|
818
832
|
cdr3DistanceColumnsSpec: cdr3DistanceColumnsSpec,
|
|
833
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
834
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
835
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
819
836
|
|
|
820
837
|
columnsSpec: columnsSpec,
|
|
821
838
|
|
|
@@ -32,6 +32,75 @@ self.body(func(inputs) {
|
|
|
32
32
|
|
|
33
33
|
umiTags := inputs.umiTags
|
|
34
34
|
hasUmi := !is_undefined(umiTags) && len(umiTags) > 0
|
|
35
|
+
stopCodonTypes := inputs.stopCodonTypes
|
|
36
|
+
stopCodonReplacements := inputs.stopCodonReplacements
|
|
37
|
+
if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) {
|
|
38
|
+
stopCodonTypes = []
|
|
39
|
+
}
|
|
40
|
+
useStopCodonReplacement := !is_undefined(stopCodonTypes) && is_array(stopCodonTypes) && len(stopCodonTypes) > 0
|
|
41
|
+
if is_undefined(stopCodonReplacements) || !is_map(stopCodonReplacements) {
|
|
42
|
+
stopCodonReplacements = {}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
featureForFlags := inputs.productiveFeature
|
|
46
|
+
if is_undefined(featureForFlags) || featureForFlags == "" {
|
|
47
|
+
featureForFlags = "CDR3"
|
|
48
|
+
}
|
|
49
|
+
if is_array(featureForFlags) && len(featureForFlags) > 0 {
|
|
50
|
+
featureForFlags = featureForFlags[0]
|
|
51
|
+
}
|
|
52
|
+
isOOFColumn := "isOOF" + featureForFlags
|
|
53
|
+
hasStopsColumn := "hasStopsIn" + featureForFlags
|
|
54
|
+
|
|
55
|
+
contains := func(arr, value) {
|
|
56
|
+
for v in arr {
|
|
57
|
+
if v == value { return true }
|
|
58
|
+
}
|
|
59
|
+
return false
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
stopReplacement := func(stopType) {
|
|
63
|
+
if !contains(stopCodonTypes, stopType) {
|
|
64
|
+
return "*"
|
|
65
|
+
}
|
|
66
|
+
aa := stopCodonReplacements[stopType]
|
|
67
|
+
if is_undefined(aa) || aa == "" {
|
|
68
|
+
return "*"
|
|
69
|
+
}
|
|
70
|
+
return text.to_upper(aa)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
codonMapReplace := {
|
|
74
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
75
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
76
|
+
"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
|
|
77
|
+
"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
|
|
78
|
+
"TGA": stopReplacement("opal"), "TGG": "W",
|
|
79
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
80
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
81
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
82
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
83
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
84
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
85
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
86
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
87
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
88
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
89
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
90
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
translateNtToAaExpr := func(ntExpr, codonMap) {
|
|
94
|
+
seq := ntExpr.fillNull("").strToUpper()
|
|
95
|
+
seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
|
|
96
|
+
for codon, aa in codonMap {
|
|
97
|
+
seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
|
|
98
|
+
}
|
|
99
|
+
seq = seq.strReplace("\\|$", "", { replaceAll: false })
|
|
100
|
+
seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
|
|
101
|
+
seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
|
|
102
|
+
return seq
|
|
103
|
+
}
|
|
35
104
|
|
|
36
105
|
chainInfos := {
|
|
37
106
|
"IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
|
|
@@ -124,6 +193,77 @@ self.body(func(inputs) {
|
|
|
124
193
|
// Join counts and overwrite totalClonotypes to reflect exported (productive) clones
|
|
125
194
|
joinedDf := processedDf.join(aggregatedCounts, { how: "left", on: ["sampleId"] })
|
|
126
195
|
|
|
196
|
+
// Count clonotypes filtered by stop codons and out-of-frame per sample
|
|
197
|
+
filterCountDfs := []
|
|
198
|
+
mixcrChainsArg := text.join(chainsForMixcr, ",")
|
|
199
|
+
for key, clnsFile in clnsFiles {
|
|
200
|
+
sampleId := json.decode(key)[0]
|
|
201
|
+
exportFiltersCmd := exec.builder().
|
|
202
|
+
inMediumQueue().
|
|
203
|
+
mem("16GiB").
|
|
204
|
+
cpu(2).
|
|
205
|
+
software(mixcrSw).
|
|
206
|
+
env("MI_USE_SYSTEM_CA", "true").
|
|
207
|
+
secret("MI_LICENSE", "MI_LICENSE").
|
|
208
|
+
arg("exportClones").
|
|
209
|
+
arg("--dont-split-files").
|
|
210
|
+
arg("--drop-default-fields").
|
|
211
|
+
arg("--reset-export-clone-table-splitting")
|
|
212
|
+
if mixcrChainsArg != "" {
|
|
213
|
+
exportFiltersCmd.arg("--chains").arg(mixcrChainsArg)
|
|
214
|
+
}
|
|
215
|
+
exportFiltersCmd = exportFiltersCmd.
|
|
216
|
+
arg("-isOOF").arg(featureForFlags).
|
|
217
|
+
arg("-hasStops").arg(featureForFlags)
|
|
218
|
+
if useStopCodonReplacement {
|
|
219
|
+
exportFiltersCmd = exportFiltersCmd.arg("-nFeature").arg(featureForFlags)
|
|
220
|
+
}
|
|
221
|
+
exportFiltersCmd = exportFiltersCmd.
|
|
222
|
+
arg("clones.clns").
|
|
223
|
+
addFile("clones.clns", clnsFile).
|
|
224
|
+
arg("clones.tsv").
|
|
225
|
+
saveFile("clones.tsv")
|
|
226
|
+
if library {
|
|
227
|
+
if isLibraryFileGzipped {
|
|
228
|
+
exportFiltersCmd.addFile("library.json.gz", library)
|
|
229
|
+
} else {
|
|
230
|
+
exportFiltersCmd.addFile("library.json", library)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
exportFiltersResult := exportFiltersCmd.cacheHours(3).run()
|
|
234
|
+
filterTsv := exportFiltersResult.getFile("clones.tsv")
|
|
235
|
+
schema := [ { column: isOOFColumn, type: "String" }, { column: hasStopsColumn, type: "String" } ]
|
|
236
|
+
if useStopCodonReplacement {
|
|
237
|
+
schema = append(schema, { column: "nSeq" + featureForFlags, type: "String" })
|
|
238
|
+
}
|
|
239
|
+
dfFilters := wf.frame(filterTsv, { xsvType: "tsv", inferSchema: false, schema: schema })
|
|
240
|
+
stopExpr := pt.when(pt.col(hasStopsColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
241
|
+
if useStopCodonReplacement {
|
|
242
|
+
translated := translateNtToAaExpr(pt.col("nSeq" + featureForFlags), codonMapReplace)
|
|
243
|
+
stopExpr = pt.when(translated.strContains("*", { literal: true })).then(pt.lit(1)).otherwise(pt.lit(0))
|
|
244
|
+
}
|
|
245
|
+
dfFilters = dfFilters.withColumns(
|
|
246
|
+
pt.when(pt.col(isOOFColumn).strToUpper().eq("TRUE")).then(pt.lit(1)).otherwise(pt.lit(0)).alias("__oof"),
|
|
247
|
+
stopExpr.alias("__stop")
|
|
248
|
+
)
|
|
249
|
+
dfFilterCount := dfFilters.select(
|
|
250
|
+
pt.lit(sampleId).alias("sampleId"),
|
|
251
|
+
pt.col("__oof").sum().alias("assemble.clonotypesDroppedByOutOfFrame"),
|
|
252
|
+
pt.col("__stop").sum().alias("assemble.clonotypesDroppedByStopCodons")
|
|
253
|
+
)
|
|
254
|
+
filterCountDfs = append(filterCountDfs, dfFilterCount)
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if len(filterCountDfs) > 0 {
|
|
258
|
+
filterCountsDf := len(filterCountDfs) > 1 ? pt.concat(filterCountDfs) : filterCountDfs[0]
|
|
259
|
+
joinedDf = joinedDf.join(filterCountsDf, { how: "left", on: ["sampleId"] })
|
|
260
|
+
} else {
|
|
261
|
+
joinedDf = joinedDf.withColumns(
|
|
262
|
+
pt.lit(0).alias("assemble.clonotypesDroppedByOutOfFrame"),
|
|
263
|
+
pt.lit(0).alias("assemble.clonotypesDroppedByStopCodons")
|
|
264
|
+
)
|
|
265
|
+
}
|
|
266
|
+
|
|
127
267
|
// Per-chain clonotype counts
|
|
128
268
|
perChainJoined := joinedDf
|
|
129
269
|
for chain in chains {
|
|
@@ -159,6 +299,10 @@ self.body(func(inputs) {
|
|
|
159
299
|
pt.col("exportedClonotypes").fillNull(0).cast("Long").alias("totalClonotypes"),
|
|
160
300
|
pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
|
|
161
301
|
)
|
|
302
|
+
finalDf = finalDf.withColumns(
|
|
303
|
+
pt.col("assemble.clonotypesDroppedByOutOfFrame").fillNull(0).cast("Long").alias("assemble.clonotypesDroppedByOutOfFrame"),
|
|
304
|
+
pt.col("assemble.clonotypesDroppedByStopCodons").fillNull(0).cast("Long").alias("assemble.clonotypesDroppedByStopCodons")
|
|
305
|
+
)
|
|
162
306
|
for chain in chains {
|
|
163
307
|
col := "clonotypesByChain." + chain
|
|
164
308
|
finalDf = finalDf.withColumns(pt.col(col).fillNull(0).cast("Long").alias(col))
|
package/src/main.tpl.tengo
CHANGED
|
@@ -68,7 +68,9 @@ wf.body(func(args) {
|
|
|
68
68
|
mixcrChains: chainInfos[chains].mixcrFilter,
|
|
69
69
|
cloneClusteringMode: cloneClusteringMode,
|
|
70
70
|
tagPattern: args.tagPattern,
|
|
71
|
-
assemblingFeature: args.assemblingFeature
|
|
71
|
+
assemblingFeature: args.assemblingFeature,
|
|
72
|
+
stopCodonTypes: args.stopCodonTypes,
|
|
73
|
+
stopCodonReplacements: args.stopCodonReplacements
|
|
72
74
|
})
|
|
73
75
|
})
|
|
74
76
|
|
|
@@ -9,11 +9,157 @@ pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
|
9
9
|
clonotypeLabel := import(":clonotype-label")
|
|
10
10
|
|
|
11
11
|
json := import("json")
|
|
12
|
+
text := import("text")
|
|
12
13
|
|
|
13
14
|
mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
|
|
14
15
|
|
|
15
16
|
self.defineOutputs("tsv")
|
|
16
17
|
|
|
18
|
+
applyStopCodonReplacementsPt := func(df, opts) {
|
|
19
|
+
if is_undefined(opts) {
|
|
20
|
+
return df
|
|
21
|
+
}
|
|
22
|
+
aminoAcidSeqColumns := opts.aminoAcidSeqColumns
|
|
23
|
+
cdr3SeqColumns := opts.cdr3SeqColumns
|
|
24
|
+
stopCodonTypes := opts.stopCodonTypes
|
|
25
|
+
stopCodonReplacements := opts.stopCodonReplacements
|
|
26
|
+
|
|
27
|
+
if is_undefined(aminoAcidSeqColumns) || len(aminoAcidSeqColumns) == 0 {
|
|
28
|
+
return df
|
|
29
|
+
}
|
|
30
|
+
if is_undefined(stopCodonTypes) || !is_array(stopCodonTypes) || len(stopCodonTypes) == 0 {
|
|
31
|
+
return df
|
|
32
|
+
}
|
|
33
|
+
if !is_undefined(stopCodonReplacements) && !is_map(stopCodonReplacements) {
|
|
34
|
+
stopCodonReplacements = undefined
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
contains := func(arr, value) {
|
|
38
|
+
for v in arr {
|
|
39
|
+
if v == value { return true }
|
|
40
|
+
}
|
|
41
|
+
return false
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
stopReplacement := func(stopType) {
|
|
45
|
+
if !contains(stopCodonTypes, stopType) {
|
|
46
|
+
return "*"
|
|
47
|
+
}
|
|
48
|
+
if is_undefined(stopCodonReplacements) {
|
|
49
|
+
return "*"
|
|
50
|
+
}
|
|
51
|
+
aa := stopCodonReplacements[stopType]
|
|
52
|
+
if is_undefined(aa) || aa == "" {
|
|
53
|
+
return "*"
|
|
54
|
+
}
|
|
55
|
+
return text.to_upper(aa)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
codonMapBase := {
|
|
59
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
60
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
61
|
+
"TAT": "Y", "TAC": "Y", "TAA": "*",
|
|
62
|
+
"TAG": "*", "TGT": "C", "TGC": "C",
|
|
63
|
+
"TGA": "*", "TGG": "W",
|
|
64
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
65
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
66
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
67
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
68
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
69
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
70
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
71
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
72
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
73
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
74
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
75
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
codonMapReplace := {
|
|
79
|
+
"TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
|
|
80
|
+
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
|
|
81
|
+
"TAT": "Y", "TAC": "Y", "TAA": stopReplacement("ochre"),
|
|
82
|
+
"TAG": stopReplacement("amber"), "TGT": "C", "TGC": "C",
|
|
83
|
+
"TGA": stopReplacement("opal"), "TGG": "W",
|
|
84
|
+
"CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
|
|
85
|
+
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
|
|
86
|
+
"CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
|
|
87
|
+
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
|
|
88
|
+
"ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
|
|
89
|
+
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
|
|
90
|
+
"AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
|
|
91
|
+
"AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
|
|
92
|
+
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
|
|
93
|
+
"GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
|
|
94
|
+
"GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
|
|
95
|
+
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G"
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
translateNtToAaExpr := func(ntExpr, codonMap) {
|
|
99
|
+
seq := ntExpr.fillNull("").strToUpper()
|
|
100
|
+
seq = seq.strReplace("(.{3})", "$1|", { replaceAll: true })
|
|
101
|
+
for codon, aa in codonMap {
|
|
102
|
+
seq = seq.strReplace(codon + "|", aa + "|", { replaceAll: true, literal: true })
|
|
103
|
+
}
|
|
104
|
+
seq = seq.strReplace("\\|$", "", { replaceAll: false })
|
|
105
|
+
seq = seq.strReplace("|", "", { replaceAll: true, literal: true })
|
|
106
|
+
seq = seq.strReplace("[ACGT]{1,2}$", "", { replaceAll: true })
|
|
107
|
+
return seq
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
pairs := []
|
|
111
|
+
for aaCol in aminoAcidSeqColumns {
|
|
112
|
+
ntCol := text.replace(aaCol, "aaSeq", "nSeq", 1)
|
|
113
|
+
pairs = append(pairs, { aa: aaCol, nt: ntCol })
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
expressions := []
|
|
117
|
+
replacedAnyExprs := []
|
|
118
|
+
replacedColsExprs := []
|
|
119
|
+
for pair in pairs {
|
|
120
|
+
aaCol := pair.aa
|
|
121
|
+
ntCol := pair.nt
|
|
122
|
+
translatedBase := translateNtToAaExpr(pt.col(ntCol), codonMapBase)
|
|
123
|
+
translatedReplaced := translateNtToAaExpr(pt.col(ntCol), codonMapReplace)
|
|
124
|
+
expressions = append(expressions, translatedReplaced.alias(aaCol))
|
|
125
|
+
cond := translatedReplaced.neq(translatedBase)
|
|
126
|
+
replacedAnyExprs = append(replacedAnyExprs, cond)
|
|
127
|
+
replacedColsExprs = append(replacedColsExprs, pt.when(cond).then(pt.lit(aaCol)).otherwise(pt.lit("")))
|
|
128
|
+
}
|
|
129
|
+
if len(expressions) > 0 {
|
|
130
|
+
df = df.withColumns(expressions...)
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if len(replacedAnyExprs) > 0 {
|
|
134
|
+
colsList := pt.concatStr(replacedColsExprs, { delimiter: "," })
|
|
135
|
+
colsList = colsList.strReplace(",+", ",", { replaceAll: true }).strReplace("^,|,$", "", { replaceAll: true })
|
|
136
|
+
df = df.withColumns(
|
|
137
|
+
pt.anyHorizontal(replacedAnyExprs...).alias("stopCodonReplaced"),
|
|
138
|
+
colsList.alias("stopCodonReplacedColumns")
|
|
139
|
+
)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
stopChecks := []
|
|
143
|
+
for colName in aminoAcidSeqColumns {
|
|
144
|
+
stopChecks = append(stopChecks, pt.col(colName).strContains("*", { literal: true }))
|
|
145
|
+
}
|
|
146
|
+
if len(stopChecks) > 0 {
|
|
147
|
+
df = df.filter(pt.anyHorizontal(stopChecks...).eq(false))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if !is_undefined(cdr3SeqColumns) && len(cdr3SeqColumns) > 0 {
|
|
151
|
+
regionChecks := []
|
|
152
|
+
for colName in cdr3SeqColumns {
|
|
153
|
+
regionChecks = append(regionChecks, pt.col(colName).strToUpper().eq("REGION_NOT_COVERED"))
|
|
154
|
+
}
|
|
155
|
+
if len(regionChecks) > 0 {
|
|
156
|
+
df = df.filter(pt.anyHorizontal(regionChecks...).eq(false))
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return df
|
|
161
|
+
}
|
|
162
|
+
|
|
17
163
|
self.body(func(inputs) {
|
|
18
164
|
clnsFile := inputs[pConstants.VALUE_FIELD_NAME]
|
|
19
165
|
|
|
@@ -22,6 +168,13 @@ self.body(func(inputs) {
|
|
|
22
168
|
|
|
23
169
|
clonotypeKeyColumns := params.clonotypeKeyColumns
|
|
24
170
|
mainIsProductiveColumn := params.mainIsProductiveColumn
|
|
171
|
+
aminoAcidSeqColumns := params.aminoAcidSeqColumns
|
|
172
|
+
aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
|
|
173
|
+
cdr3SeqColumns := params.cdr3SeqColumns
|
|
174
|
+
stopCodonTypes := params.stopCodonTypes
|
|
175
|
+
stopCodonReplacements := params.stopCodonReplacements
|
|
176
|
+
|
|
177
|
+
useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
|
|
25
178
|
|
|
26
179
|
hashKeyDerivationExpressionPt := func(sourceColumns) {
|
|
27
180
|
return pt.concatStr(
|
|
@@ -42,8 +195,13 @@ self.body(func(inputs) {
|
|
|
42
195
|
arg("--dont-split-files").
|
|
43
196
|
arg("--drop-default-fields").
|
|
44
197
|
arg("--reset-export-clone-table-splitting").
|
|
45
|
-
arg("--chains").arg(params.mixcrChains)
|
|
46
|
-
|
|
198
|
+
arg("--chains").arg(params.mixcrChains)
|
|
199
|
+
|
|
200
|
+
if useProductiveFilter {
|
|
201
|
+
mixcrCmdBuilder = mixcrCmdBuilder.arg("--export-productive-clones-only")
|
|
202
|
+
} else {
|
|
203
|
+
mixcrCmdBuilder = mixcrCmdBuilder.arg("--filter-out-of-frames")
|
|
204
|
+
}
|
|
47
205
|
|
|
48
206
|
additionalAction(mixcrCmdBuilder)
|
|
49
207
|
|
|
@@ -96,6 +254,15 @@ self.body(func(inputs) {
|
|
|
96
254
|
alias(mainIsProductiveColumn)
|
|
97
255
|
)
|
|
98
256
|
}
|
|
257
|
+
if !is_undefined(stopCodonTypes) && len(stopCodonTypes) > 0 {
|
|
258
|
+
dfMain = applyStopCodonReplacementsPt(dfMain, {
|
|
259
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
260
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
261
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
262
|
+
stopCodonTypes: stopCodonTypes,
|
|
263
|
+
stopCodonReplacements: stopCodonReplacements
|
|
264
|
+
})
|
|
265
|
+
}
|
|
99
266
|
dfMain.addColumns(
|
|
100
267
|
hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
|
|
101
268
|
)
|
package/src/process.tpl.tengo
CHANGED
|
@@ -94,7 +94,11 @@ self.body(func(inputs) {
|
|
|
94
94
|
mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
|
|
95
95
|
mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
|
|
96
96
|
mainIsProductiveColumn := exportSpecs.mainIsProductiveColumn
|
|
97
|
+
productiveFeature := exportSpecs.productiveFeature
|
|
97
98
|
axesByClonotypeKey := exportSpecs.axesByClonotypeKey
|
|
99
|
+
aminoAcidSeqColumns := exportSpecs.aminoAcidSeqColumns
|
|
100
|
+
aminoAcidSeqColumnPairs := exportSpecs.aminoAcidSeqColumnPairs
|
|
101
|
+
cdr3SeqColumns := exportSpecs.cdr3SeqColumns
|
|
98
102
|
|
|
99
103
|
columnsToSchema := func(columns) {
|
|
100
104
|
schema := []
|
|
@@ -256,13 +260,18 @@ self.body(func(inputs) {
|
|
|
256
260
|
exportOutputs,
|
|
257
261
|
{
|
|
258
262
|
extra: {
|
|
259
|
-
params: {
|
|
263
|
+
params: maps.clone({
|
|
260
264
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
261
265
|
exportArgs: exportArgs,
|
|
262
266
|
referenceLibrary: referenceLibrary,
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
267
|
+
mixcrChains: mixcrChains,
|
|
268
|
+
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
269
|
+
aminoAcidSeqColumns: aminoAcidSeqColumns,
|
|
270
|
+
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
271
|
+
cdr3SeqColumns: cdr3SeqColumns,
|
|
272
|
+
stopCodonTypes: params.stopCodonTypes,
|
|
273
|
+
stopCodonReplacements: params.stopCodonReplacements
|
|
274
|
+
}, { removeUndefs: true })
|
|
266
275
|
}
|
|
267
276
|
}
|
|
268
277
|
)
|
|
@@ -346,7 +355,10 @@ self.body(func(inputs) {
|
|
|
346
355
|
isLibraryFileGzipped: false,
|
|
347
356
|
clonotypeTablesData: clonotypeTablesData,
|
|
348
357
|
hasUmi: hasUMI,
|
|
349
|
-
umiTags: umiTags
|
|
358
|
+
umiTags: umiTags,
|
|
359
|
+
productiveFeature: productiveFeature,
|
|
360
|
+
stopCodonTypes: params.stopCodonTypes,
|
|
361
|
+
stopCodonReplacements: params.stopCodonReplacements
|
|
350
362
|
})
|
|
351
363
|
|
|
352
364
|
return {
|
|
@@ -571,6 +571,38 @@ getQcReportColumns := func(hasUmi, sampleIdAxisSpec, chains, umiTags) {
|
|
|
571
571
|
}
|
|
572
572
|
}
|
|
573
573
|
},
|
|
574
|
+
{
|
|
575
|
+
column: "assemble.clonotypesDroppedByStopCodons",
|
|
576
|
+
id: "assemble-clonotypes-dropped-by-stop-codons",
|
|
577
|
+
allowNA: true,
|
|
578
|
+
naRegex: "NaN",
|
|
579
|
+
spec: {
|
|
580
|
+
name: "mixcr.com/reports/assemble/clonotypesDroppedByStopCodons",
|
|
581
|
+
valueType: "Long",
|
|
582
|
+
annotations: {
|
|
583
|
+
"pl7.app/min": "0",
|
|
584
|
+
"pl7.app/table/orderPriority": "108200",
|
|
585
|
+
"pl7.app/table/visibility": "optional",
|
|
586
|
+
"pl7.app/label": "Clonotypes Dropped - Stop Codons"
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
column: "assemble.clonotypesDroppedByOutOfFrame",
|
|
592
|
+
id: "assemble-clonotypes-dropped-by-out-of-frame",
|
|
593
|
+
allowNA: true,
|
|
594
|
+
naRegex: "NaN",
|
|
595
|
+
spec: {
|
|
596
|
+
name: "mixcr.com/reports/assemble/clonotypesDroppedByOutOfFrame",
|
|
597
|
+
valueType: "Long",
|
|
598
|
+
annotations: {
|
|
599
|
+
"pl7.app/min": "0",
|
|
600
|
+
"pl7.app/table/orderPriority": "108100",
|
|
601
|
+
"pl7.app/table/visibility": "optional",
|
|
602
|
+
"pl7.app/label": "Clonotypes Dropped - Out of Frame"
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
},
|
|
574
606
|
{
|
|
575
607
|
column: "totalClonotypes",
|
|
576
608
|
id: "total-clonotypes",
|