@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.23.4 → 3.23.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +12 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +80 -1
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
- package/dist/tengo/tpl/export-report.plj.gz +0 -0
- package/dist/tengo/tpl/list-presets.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
- package/package.json +1 -1
- package/src/aggregate-by-clonotype-key.tpl.tengo +23 -0
- package/src/calculate-export-specs.lib.tengo +80 -1
- package/src/mixcr-export.tpl.tengo +5 -5
- package/src/process-single-cell.tpl.tengo +101 -4
- package/src/process.tpl.tengo +100 -2
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.6 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
|
|
4
4
|
> shx rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
info: Skipping unknown file type: test/columns.test.ts
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-clonotyping.workflow
|
|
2
2
|
|
|
3
|
+
## 3.23.6
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- b8a3a50: New mutation columns
|
|
8
|
+
|
|
9
|
+
## 3.23.5
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 5279399: Fix memory request for pt invocation
|
|
14
|
+
|
|
3
15
|
## 3.23.4
|
|
4
16
|
|
|
5
17
|
### Patch Changes
|
|
@@ -767,6 +767,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
767
767
|
|
|
768
768
|
|
|
769
769
|
|
|
770
|
+
shmMapping := []
|
|
771
|
+
|
|
772
|
+
if assemblingFeature == "VDJRegion" {
|
|
773
|
+
nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
|
|
774
|
+
|
|
775
|
+
crossRegionMutations := [ {
|
|
776
|
+
exportFlag: "nMutationsCount",
|
|
777
|
+
feature: nMutationsFeature,
|
|
778
|
+
id: "n-mutations-total",
|
|
779
|
+
label: "Nt mutations",
|
|
780
|
+
specName: "pl7.app/vdj/sequence/nMutations",
|
|
781
|
+
rankingOrder: "decreasing",
|
|
782
|
+
outputColumn: "nMutations"
|
|
783
|
+
}, {
|
|
784
|
+
exportFlag: "aaMutationsCount",
|
|
785
|
+
feature: "CDR1,CDR2",
|
|
786
|
+
id: "aa-mutations-cdr",
|
|
787
|
+
label: "AA mutations (CDR)",
|
|
788
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
|
|
789
|
+
rankingOrder: "decreasing",
|
|
790
|
+
outputColumn: "nAAMutationsCDR"
|
|
791
|
+
}, {
|
|
792
|
+
exportFlag: "aaMutationsCount",
|
|
793
|
+
feature: "FR1,FR2,FR3,FR4",
|
|
794
|
+
id: "aa-mutations-fwr",
|
|
795
|
+
label: "AA mutations (FWR)",
|
|
796
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
|
|
797
|
+
rankingOrder: "increasing",
|
|
798
|
+
outputColumn: "nAAMutationsFWR"
|
|
799
|
+
} ]
|
|
800
|
+
|
|
801
|
+
for col in crossRegionMutations {
|
|
802
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
803
|
+
column: col.exportFlag + col.feature,
|
|
804
|
+
id: col.id,
|
|
805
|
+
allowNA: true,
|
|
806
|
+
naRegex: "region_not_covered",
|
|
807
|
+
spec: {
|
|
808
|
+
valueType: "Int",
|
|
809
|
+
name: col.specName,
|
|
810
|
+
annotations: a(orderP, false, {
|
|
811
|
+
"pl7.app/label": col.label,
|
|
812
|
+
"pl7.app/isScore": "true",
|
|
813
|
+
"pl7.app/score/rankingOrder": col.rankingOrder
|
|
814
|
+
})
|
|
815
|
+
}
|
|
816
|
+
} ]
|
|
817
|
+
exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
|
|
818
|
+
shmMapping = append(shmMapping, {
|
|
819
|
+
outputColumn: col.outputColumn,
|
|
820
|
+
tsvColumn: col.exportFlag + col.feature
|
|
821
|
+
})
|
|
822
|
+
orderP -= 100
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
827
|
+
column: "fractionCDRMutations",
|
|
828
|
+
id: "fraction-cdr-mutations",
|
|
829
|
+
naRegex: "^[a-z_]*$",
|
|
830
|
+
allowNA: true,
|
|
831
|
+
spec: {
|
|
832
|
+
valueType: "Double",
|
|
833
|
+
name: "pl7.app/vdj/sequence/fractionCDRMutations",
|
|
834
|
+
annotations: a(orderP, false, {
|
|
835
|
+
"pl7.app/label": "CDR mutation fraction",
|
|
836
|
+
"pl7.app/isScore": "true",
|
|
837
|
+
"pl7.app/score/rankingOrder": "decreasing",
|
|
838
|
+
"pl7.app/format": ".2f"
|
|
839
|
+
})
|
|
840
|
+
}
|
|
841
|
+
} ]
|
|
842
|
+
orderP -= 100
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
|
|
770
847
|
flagColumnVariants := [ {
|
|
771
848
|
columnPrefix: "isProductive",
|
|
772
849
|
arg: "-isProductive",
|
|
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
1042
1119
|
exportArgs: exportArgs,
|
|
1043
1120
|
|
|
1044
1121
|
hashCellKey: hashCellKey,
|
|
1045
|
-
cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
|
|
1122
|
+
cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
|
|
1123
|
+
|
|
1124
|
+
shmMapping: shmMapping
|
|
1046
1125
|
}
|
|
1047
1126
|
}
|
|
1048
1127
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -78,11 +78,16 @@ self.body(func(inputs) {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
aggExpressions := []
|
|
81
|
+
hasFractionCDRMutations := false
|
|
81
82
|
|
|
82
83
|
for colDef in schemaPerClonotypeNoAggregates {
|
|
83
84
|
if colDef.column == "clonotypeLabel" || colDef.column == "nLengthTotalAdded" {
|
|
84
85
|
continue
|
|
85
86
|
}
|
|
87
|
+
if colDef.column == "fractionCDRMutations" {
|
|
88
|
+
hasFractionCDRMutations = true
|
|
89
|
+
continue
|
|
90
|
+
}
|
|
86
91
|
aggExpressions = append(aggExpressions,
|
|
87
92
|
pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
|
|
88
93
|
)
|
|
@@ -104,6 +109,24 @@ self.body(func(inputs) {
|
|
|
104
109
|
alias("nLengthTotalAdded")
|
|
105
110
|
)
|
|
106
111
|
|
|
112
|
+
// Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
|
|
113
|
+
if hasFractionCDRMutations {
|
|
114
|
+
cdr := "aaMutationsCountCDR1,CDR2"
|
|
115
|
+
fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
|
|
116
|
+
aggregatedDf = aggregatedDf.withColumns(
|
|
117
|
+
pt.when(
|
|
118
|
+
pt.col(cdr).isNotNull().
|
|
119
|
+
and(pt.col(fwr).isNotNull()).
|
|
120
|
+
and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
|
|
121
|
+
).then(
|
|
122
|
+
pt.col(cdr).cast("Double").truediv(
|
|
123
|
+
pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
|
|
124
|
+
)
|
|
125
|
+
).otherwise(pt.lit(1.0)).
|
|
126
|
+
alias("fractionCDRMutations")
|
|
127
|
+
)
|
|
128
|
+
}
|
|
129
|
+
|
|
107
130
|
aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
|
|
108
131
|
|
|
109
132
|
aggregatedDf.save("output.tsv")
|
|
@@ -765,6 +765,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
765
765
|
}
|
|
766
766
|
}
|
|
767
767
|
|
|
768
|
+
// Cross-region SHM mutation counts (only for full VDJRegion assembling)
|
|
769
|
+
// shmMapping: [{ outputColumn, tsvColumn }] for single-cell chain summing
|
|
770
|
+
shmMapping := []
|
|
771
|
+
|
|
772
|
+
if assemblingFeature == "VDJRegion" {
|
|
773
|
+
nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
|
|
774
|
+
|
|
775
|
+
crossRegionMutations := [ {
|
|
776
|
+
exportFlag: "nMutationsCount",
|
|
777
|
+
feature: nMutationsFeature,
|
|
778
|
+
id: "n-mutations-total",
|
|
779
|
+
label: "Nt mutations",
|
|
780
|
+
specName: "pl7.app/vdj/sequence/nMutations",
|
|
781
|
+
rankingOrder: "decreasing",
|
|
782
|
+
outputColumn: "nMutations"
|
|
783
|
+
}, {
|
|
784
|
+
exportFlag: "aaMutationsCount",
|
|
785
|
+
feature: "CDR1,CDR2",
|
|
786
|
+
id: "aa-mutations-cdr",
|
|
787
|
+
label: "AA mutations (CDR)",
|
|
788
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
|
|
789
|
+
rankingOrder: "decreasing",
|
|
790
|
+
outputColumn: "nAAMutationsCDR"
|
|
791
|
+
}, {
|
|
792
|
+
exportFlag: "aaMutationsCount",
|
|
793
|
+
feature: "FR1,FR2,FR3,FR4",
|
|
794
|
+
id: "aa-mutations-fwr",
|
|
795
|
+
label: "AA mutations (FWR)",
|
|
796
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
|
|
797
|
+
rankingOrder: "increasing",
|
|
798
|
+
outputColumn: "nAAMutationsFWR"
|
|
799
|
+
} ]
|
|
800
|
+
|
|
801
|
+
for col in crossRegionMutations {
|
|
802
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
803
|
+
column: col.exportFlag + col.feature,
|
|
804
|
+
id: col.id,
|
|
805
|
+
allowNA: true,
|
|
806
|
+
naRegex: "region_not_covered",
|
|
807
|
+
spec: {
|
|
808
|
+
valueType: "Int",
|
|
809
|
+
name: col.specName,
|
|
810
|
+
annotations: a(orderP, false, {
|
|
811
|
+
"pl7.app/label": col.label,
|
|
812
|
+
"pl7.app/isScore": "true",
|
|
813
|
+
"pl7.app/score/rankingOrder": col.rankingOrder
|
|
814
|
+
})
|
|
815
|
+
}
|
|
816
|
+
} ]
|
|
817
|
+
exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
|
|
818
|
+
shmMapping = append(shmMapping, {
|
|
819
|
+
outputColumn: col.outputColumn,
|
|
820
|
+
tsvColumn: col.exportFlag + col.feature
|
|
821
|
+
})
|
|
822
|
+
orderP -= 100
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
// CDR mutation fraction (computed in aggregate-by-clonotype-key)
|
|
826
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
827
|
+
column: "fractionCDRMutations",
|
|
828
|
+
id: "fraction-cdr-mutations",
|
|
829
|
+
naRegex: "^[a-z_]*$",
|
|
830
|
+
allowNA: true,
|
|
831
|
+
spec: {
|
|
832
|
+
valueType: "Double",
|
|
833
|
+
name: "pl7.app/vdj/sequence/fractionCDRMutations",
|
|
834
|
+
annotations: a(orderP, false, {
|
|
835
|
+
"pl7.app/label": "CDR mutation fraction",
|
|
836
|
+
"pl7.app/isScore": "true",
|
|
837
|
+
"pl7.app/score/rankingOrder": "decreasing",
|
|
838
|
+
"pl7.app/format": ".2f"
|
|
839
|
+
})
|
|
840
|
+
}
|
|
841
|
+
} ]
|
|
842
|
+
orderP -= 100
|
|
843
|
+
}
|
|
844
|
+
|
|
768
845
|
// Flags: productive, oof, stop codons
|
|
769
846
|
|
|
770
847
|
flagColumnVariants := [ {
|
|
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
|
|
|
1042
1119
|
exportArgs: exportArgs,
|
|
1043
1120
|
|
|
1044
1121
|
hashCellKey: hashCellKey,
|
|
1045
|
-
cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
|
|
1122
|
+
cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
|
|
1123
|
+
|
|
1124
|
+
shmMapping: shmMapping
|
|
1046
1125
|
}
|
|
1047
1126
|
}
|
|
1048
1127
|
|
|
@@ -7,6 +7,7 @@ smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
|
7
7
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
8
|
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
9
9
|
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
10
|
+
units := import("@platforma-sdk/workflow-tengo:units")
|
|
10
11
|
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
11
12
|
clonotypeLabel := import(":clonotype-label")
|
|
12
13
|
stopCodonReplacement := import(":stop-codon-replacement")
|
|
@@ -31,7 +32,7 @@ self.body(func(inputs) {
|
|
|
31
32
|
aminoAcidSeqColumns := params.aminoAcidSeqColumns
|
|
32
33
|
aminoAcidSeqColumnPairs := params.aminoAcidSeqColumnPairs
|
|
33
34
|
cdr3SeqColumns := params.cdr3SeqColumns
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
clonotypeKeyColumns := params.clonotypeKeyColumns
|
|
36
37
|
clonotypeKeyArgs := params.clonotypeKeyArgs
|
|
37
38
|
|
|
@@ -49,7 +50,7 @@ self.body(func(inputs) {
|
|
|
49
50
|
|
|
50
51
|
useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
|
|
51
52
|
|
|
52
|
-
exportMemGB := undefined
|
|
53
|
+
exportMemGB := undefined
|
|
53
54
|
if !is_undefined(inputs.perProcessMemGB) {
|
|
54
55
|
exportMemGB = int(1.0*inputs.perProcessMemGB/4.0)
|
|
55
56
|
if exportMemGB < 12 {
|
|
@@ -59,8 +60,7 @@ self.body(func(inputs) {
|
|
|
59
60
|
exportMemGB = 12
|
|
60
61
|
}
|
|
61
62
|
ptMemGB := int(2.0*exportMemGB/3.0)
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
hashKeyDerivationExpressionPt := func(sourceColumns) {
|
|
65
65
|
return pt.concatStr(
|
|
66
66
|
slices.map(sourceColumns, func(colName) { return pt.col(colName).fillNull("") }),
|
|
@@ -130,7 +130,7 @@ self.body(func(inputs) {
|
|
|
130
130
|
// PTabler processing for main TSV output
|
|
131
131
|
wfMain := pt.workflow().
|
|
132
132
|
inMediumQueue().
|
|
133
|
-
mem(ptMemGB).
|
|
133
|
+
mem(ptMemGB * units.GiB).
|
|
134
134
|
cpu(2)
|
|
135
135
|
|
|
136
136
|
frameInputMap := {
|
|
@@ -17,7 +17,7 @@ math := import("math")
|
|
|
17
17
|
|
|
18
18
|
self.defineOutputs("abundanceTsv", "clonotypeTsv",
|
|
19
19
|
"propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv",
|
|
20
|
-
"cellsTsv")
|
|
20
|
+
"cellsTsv", "shmTsv")
|
|
21
21
|
|
|
22
22
|
ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
|
|
23
23
|
|
|
@@ -37,6 +37,11 @@ self.body(func(inputs) {
|
|
|
37
37
|
|
|
38
38
|
schemaPerClonotypeNoAggregates := inputs.params.schemaPerClonotypeNoAggregates
|
|
39
39
|
|
|
40
|
+
shmMapping := inputs.params.shmMapping
|
|
41
|
+
if is_undefined(shmMapping) {
|
|
42
|
+
shmMapping = []
|
|
43
|
+
}
|
|
44
|
+
|
|
40
45
|
//
|
|
41
46
|
// Preprocessing
|
|
42
47
|
//
|
|
@@ -303,6 +308,96 @@ self.body(func(inputs) {
|
|
|
303
308
|
|
|
304
309
|
outputProcessingRunResult := outputProcessingWf.run()
|
|
305
310
|
|
|
311
|
+
propsAPrimaryFile := outputProcessingRunResult.getFile(chainMappings[0].finalOutFile)
|
|
312
|
+
propsBPrimaryFile := outputProcessingRunResult.getFile(chainMappings[2].finalOutFile)
|
|
313
|
+
|
|
314
|
+
// Sum SHM mutation columns across primary A+B chains
|
|
315
|
+
shmTsv := undefined
|
|
316
|
+
if len(shmMapping) > 0 {
|
|
317
|
+
shmSchema := [{ column: "scClonotypeKey", type: "String" }]
|
|
318
|
+
for m in shmMapping {
|
|
319
|
+
shmSchema = append(shmSchema, { column: m.tsvColumn, type: "String" })
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
shmWf := pt.workflow()
|
|
323
|
+
|
|
324
|
+
propsAShmDf := shmWf.frame(propsAPrimaryFile, {
|
|
325
|
+
xsvType: "tsv",
|
|
326
|
+
schema: shmSchema,
|
|
327
|
+
inferSchema: false
|
|
328
|
+
})
|
|
329
|
+
propsBShmDf := shmWf.frame(propsBPrimaryFile, {
|
|
330
|
+
xsvType: "tsv",
|
|
331
|
+
schema: shmSchema,
|
|
332
|
+
inferSchema: false,
|
|
333
|
+
id: "props_b_shm"
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
// Cast "region_not_covered" to null, then to Int
|
|
337
|
+
castExprs := []
|
|
338
|
+
for m in shmMapping {
|
|
339
|
+
castExprs = append(castExprs,
|
|
340
|
+
pt.when(pt.col(m.tsvColumn).eq("region_not_covered")).
|
|
341
|
+
then(pt.lit(undefined)).
|
|
342
|
+
otherwise(pt.col(m.tsvColumn)).
|
|
343
|
+
cast("Int").
|
|
344
|
+
alias(m.tsvColumn)
|
|
345
|
+
)
|
|
346
|
+
}
|
|
347
|
+
propsAShmDf = propsAShmDf.withColumns(castExprs...)
|
|
348
|
+
propsBShmDf = propsBShmDf.withColumns(castExprs...)
|
|
349
|
+
|
|
350
|
+
//Join A and B chains
|
|
351
|
+
leftCols := []
|
|
352
|
+
rightCols := []
|
|
353
|
+
for m in shmMapping {
|
|
354
|
+
leftCols = append(leftCols, { column: m.tsvColumn, rename: m.outputColumn + "_A" })
|
|
355
|
+
rightCols = append(rightCols, { column: m.tsvColumn, rename: m.outputColumn + "_B" })
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
shmCombinedDf := propsAShmDf.join(propsBShmDf, {
|
|
359
|
+
how: "full",
|
|
360
|
+
on: ["scClonotypeKey"],
|
|
361
|
+
coalesce: true,
|
|
362
|
+
leftColumns: leftCols,
|
|
363
|
+
rightColumns: rightCols
|
|
364
|
+
})
|
|
365
|
+
|
|
366
|
+
// Sum SHM mutation columns across primary A+B chains
|
|
367
|
+
for m in shmMapping {
|
|
368
|
+
shmCombinedDf = shmCombinedDf.withColumns(
|
|
369
|
+
pt.col(m.outputColumn + "_A").plus(pt.col(m.outputColumn + "_B")).
|
|
370
|
+
alias(m.outputColumn)
|
|
371
|
+
)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Calculate CDR mutation fraction
|
|
375
|
+
cdr := "nAAMutationsCDR"
|
|
376
|
+
fwr := "nAAMutationsFWR"
|
|
377
|
+
shmCombinedDf = shmCombinedDf.withColumns(
|
|
378
|
+
pt.when(
|
|
379
|
+
pt.col(cdr).isNotNull().
|
|
380
|
+
and(pt.col(fwr).isNotNull()).
|
|
381
|
+
and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
|
|
382
|
+
).then(
|
|
383
|
+
pt.col(cdr).cast("Double").truediv(
|
|
384
|
+
pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
|
|
385
|
+
)
|
|
386
|
+
).otherwise(pt.lit(1.0)).
|
|
387
|
+
alias("fractionCDRMutations")
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
shmOutputCols := ["scClonotypeKey"]
|
|
391
|
+
for m in shmMapping {
|
|
392
|
+
shmOutputCols = append(shmOutputCols, m.outputColumn)
|
|
393
|
+
}
|
|
394
|
+
shmOutputCols = append(shmOutputCols, "fractionCDRMutations")
|
|
395
|
+
shmCombinedDf.save("shm.tsv", { columns: shmOutputCols, xsvType: "tsv" })
|
|
396
|
+
|
|
397
|
+
shmRunResult := shmWf.run()
|
|
398
|
+
shmTsv = shmRunResult.getFile("shm.tsv")
|
|
399
|
+
}
|
|
400
|
+
|
|
306
401
|
return {
|
|
307
402
|
// must have sampleId and scClonotypeKey columns
|
|
308
403
|
abundanceTsv: abundanceTsv,
|
|
@@ -314,9 +409,11 @@ self.body(func(inputs) {
|
|
|
314
409
|
cellsTsv: cellsTsv,
|
|
315
410
|
|
|
316
411
|
// must have scClonotypeKey columns
|
|
317
|
-
propertiesAPrimaryTsv:
|
|
412
|
+
propertiesAPrimaryTsv: propsAPrimaryFile,
|
|
318
413
|
propertiesASecondaryTsv: outputProcessingRunResult.getFile(chainMappings[1].finalOutFile),
|
|
319
|
-
propertiesBPrimaryTsv:
|
|
320
|
-
propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile)
|
|
414
|
+
propertiesBPrimaryTsv: propsBPrimaryFile,
|
|
415
|
+
propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile),
|
|
416
|
+
|
|
417
|
+
shmTsv: shmTsv
|
|
321
418
|
}
|
|
322
419
|
})
|
package/src/process.tpl.tengo
CHANGED
|
@@ -233,6 +233,8 @@ self.body(func(inputs) {
|
|
|
233
233
|
|
|
234
234
|
mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
|
|
235
235
|
mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
|
|
236
|
+
|
|
237
|
+
shmMapping := exportSpecs.shmMapping
|
|
236
238
|
mainAbundanceColumnNormalizedArgs := exportSpecs.mainAbundanceColumnNormalizedArgs
|
|
237
239
|
mainAbundanceColumnUnnormalizedArgs := exportSpecs.mainAbundanceColumnUnnormalizedArgs
|
|
238
240
|
|
|
@@ -566,6 +568,77 @@ self.body(func(inputs) {
|
|
|
566
568
|
}
|
|
567
569
|
|
|
568
570
|
if isSingleCell {
|
|
571
|
+
// SHM mutation columns: build filtering set and combined output specs
|
|
572
|
+
hasShm := len(shmMapping) > 0
|
|
573
|
+
shmColumnNames := {}
|
|
574
|
+
shmOutputSpecs := []
|
|
575
|
+
if hasShm {
|
|
576
|
+
for m in shmMapping {
|
|
577
|
+
shmColumnNames[m.tsvColumn] = true
|
|
578
|
+
}
|
|
579
|
+
shmColumnNames["fractionCDRMutations"] = true
|
|
580
|
+
|
|
581
|
+
orderP := 10400
|
|
582
|
+
shmDefs := [ {
|
|
583
|
+
outputColumn: "nMutations",
|
|
584
|
+
label: "Nt mutations",
|
|
585
|
+
specName: "pl7.app/vdj/sequence/nMutations",
|
|
586
|
+
valueType: "Int",
|
|
587
|
+
rankingOrder: "decreasing"
|
|
588
|
+
}, {
|
|
589
|
+
outputColumn: "nAAMutationsCDR",
|
|
590
|
+
label: "AA mutations (CDR)",
|
|
591
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
|
|
592
|
+
valueType: "Int",
|
|
593
|
+
rankingOrder: "decreasing"
|
|
594
|
+
}, {
|
|
595
|
+
outputColumn: "nAAMutationsFWR",
|
|
596
|
+
label: "AA mutations (FWR)",
|
|
597
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
|
|
598
|
+
valueType: "Int",
|
|
599
|
+
rankingOrder: "increasing"
|
|
600
|
+
} ]
|
|
601
|
+
|
|
602
|
+
for def in shmDefs {
|
|
603
|
+
shmOutputSpecs = append(shmOutputSpecs, {
|
|
604
|
+
column: def.outputColumn,
|
|
605
|
+
id: def.outputColumn,
|
|
606
|
+
allowNA: true,
|
|
607
|
+
naRegex: "^[a-z_]*$",
|
|
608
|
+
spec: {
|
|
609
|
+
valueType: def.valueType,
|
|
610
|
+
name: def.specName,
|
|
611
|
+
annotations: {
|
|
612
|
+
"pl7.app/label": def.label,
|
|
613
|
+
"pl7.app/table/orderPriority": string(orderP),
|
|
614
|
+
"pl7.app/table/visibility": "optional",
|
|
615
|
+
"pl7.app/isScore": "true",
|
|
616
|
+
"pl7.app/score/rankingOrder": def.rankingOrder
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
})
|
|
620
|
+
orderP -= 100
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
shmOutputSpecs = append(shmOutputSpecs, {
|
|
624
|
+
column: "fractionCDRMutations",
|
|
625
|
+
id: "fraction-cdr-mutations",
|
|
626
|
+
naRegex: "^[a-z_]*$",
|
|
627
|
+
allowNA: true,
|
|
628
|
+
spec: {
|
|
629
|
+
valueType: "Double",
|
|
630
|
+
name: "pl7.app/vdj/sequence/fractionCDRMutations",
|
|
631
|
+
annotations: {
|
|
632
|
+
"pl7.app/label": "CDR mutation fraction",
|
|
633
|
+
"pl7.app/table/orderPriority": string(orderP),
|
|
634
|
+
"pl7.app/table/visibility": "optional",
|
|
635
|
+
"pl7.app/isScore": "true",
|
|
636
|
+
"pl7.app/score/rankingOrder": "decreasing"
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
})
|
|
640
|
+
}
|
|
641
|
+
|
|
569
642
|
for receptor in receptors {
|
|
570
643
|
receptorInfo := receptorInfos[receptor]
|
|
571
644
|
|
|
@@ -627,7 +700,10 @@ self.body(func(inputs) {
|
|
|
627
700
|
|
|
628
701
|
// Modify column visibility for TCR chains
|
|
629
702
|
isTCRChain := text.has_prefix(chain, "TCR")
|
|
630
|
-
|
|
703
|
+
// Filter out SHM mutation columns (We will generate chain-agnostic columns)
|
|
704
|
+
columnsForSingleCell := hasShm ? slices.filter(columnsSpecPerClonotypeNoAggregates, func(col) {
|
|
705
|
+
return !shmColumnNames[col.column]
|
|
706
|
+
}) : columnsSpecPerClonotypeNoAggregates
|
|
631
707
|
if isTCRChain {
|
|
632
708
|
visibilitySettings := {
|
|
633
709
|
"bestCGene": "optional",
|
|
@@ -718,6 +794,23 @@ self.body(func(inputs) {
|
|
|
718
794
|
path: ["cellsTsv"]
|
|
719
795
|
} ]
|
|
720
796
|
|
|
797
|
+
if hasShm {
|
|
798
|
+
singleCellOutputs += [ {
|
|
799
|
+
type: "Xsv",
|
|
800
|
+
xsvType: "tsv",
|
|
801
|
+
settings: {
|
|
802
|
+
axes: [ axisByScClonotypeKeyGen(receptor) ],
|
|
803
|
+
columns: shmOutputSpecs,
|
|
804
|
+
storageFormat: "Parquet",
|
|
805
|
+
partitionKeyLength: 0
|
|
806
|
+
},
|
|
807
|
+
mem: "12GiB",
|
|
808
|
+
cpu: 2,
|
|
809
|
+
name: "shmCombined",
|
|
810
|
+
path: ["shmTsv"]
|
|
811
|
+
} ]
|
|
812
|
+
}
|
|
813
|
+
|
|
721
814
|
chainA := receptorInfo.chains[0]
|
|
722
815
|
chainB := receptorInfo.chains[1]
|
|
723
816
|
|
|
@@ -746,7 +839,8 @@ self.body(func(inputs) {
|
|
|
746
839
|
params: {
|
|
747
840
|
mainAbundanceColumn: mainAbundanceColumnUnnormalized,
|
|
748
841
|
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
749
|
-
schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates)
|
|
842
|
+
schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates),
|
|
843
|
+
shmMapping: shmMapping
|
|
750
844
|
}
|
|
751
845
|
}
|
|
752
846
|
}
|
|
@@ -769,6 +863,10 @@ self.body(func(inputs) {
|
|
|
769
863
|
singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBPrimary", "clonotypeProperties/" + receptor + "/bPrimary/")
|
|
770
864
|
singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBSecondary", "clonotypeProperties/" + receptor + "/bSecondary/")
|
|
771
865
|
|
|
866
|
+
if hasShm {
|
|
867
|
+
singleCellResult.addXsvOutputToBuilder(clonotypes, "shmCombined", "clonotypeProperties/" + receptor + "/shmCombined/")
|
|
868
|
+
}
|
|
869
|
+
|
|
772
870
|
for columnName in singleCellResult.listXsvColumns("cellsLinkerTable") {
|
|
773
871
|
anonymizedData := singleCellResult.outputData("cellsLinkerTable", columnName)
|
|
774
872
|
clonotypes.add(
|