@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.1 → 1.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +12 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +68 -1
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/export-report.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
- package/package.json +1 -1
- package/src/aggregate-by-clonotype-key.tpl.tengo +23 -0
- package/src/calculate-export-specs.lib.tengo +68 -1
- package/src/mixcr-export.tpl.tengo +12 -2
- package/src/process.tpl.tengo +18 -4
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.3 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
|
|
2
2
|
|
|
3
|
+
## 1.19.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 8685e8d: Add new mutation columns
|
|
8
|
+
|
|
9
|
+
## 1.19.2
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 4aeac00: Link memory limits of downstream operations (XSV conversion, MiXCR export, PTabler) to user-specified perProcessMemGB override to prevent over-scheduling on local runs
|
|
14
|
+
|
|
3
15
|
## 1.19.1
|
|
4
16
|
|
|
5
17
|
### Patch Changes
|
|
@@ -619,7 +619,74 @@ inFrameFeatures := {
|
|
|
619
619
|
exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
|
|
620
620
|
|
|
621
621
|
}
|
|
622
|
-
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
if assemblingFeature == "VDJRegion" {
|
|
625
|
+
orderP = 7500
|
|
626
|
+
nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
|
|
627
|
+
|
|
628
|
+
crossRegionMutations := [ {
|
|
629
|
+
exportFlag: "nMutationsCount",
|
|
630
|
+
feature: nMutationsFeature,
|
|
631
|
+
id: "n-mutations-total",
|
|
632
|
+
label: "Nt mutations",
|
|
633
|
+
specName: "pl7.app/vdj/sequence/nMutations",
|
|
634
|
+
rankingOrder: "decreasing"
|
|
635
|
+
}, {
|
|
636
|
+
exportFlag: "aaMutationsCount",
|
|
637
|
+
feature: "CDR1,CDR2",
|
|
638
|
+
id: "aa-mutations-cdr",
|
|
639
|
+
label: "AA mutations (CDR)",
|
|
640
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
|
|
641
|
+
rankingOrder: "decreasing"
|
|
642
|
+
}, {
|
|
643
|
+
exportFlag: "aaMutationsCount",
|
|
644
|
+
feature: "FR1,FR2,FR3,FR4",
|
|
645
|
+
id: "aa-mutations-fwr",
|
|
646
|
+
label: "AA mutations (FWR)",
|
|
647
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
|
|
648
|
+
rankingOrder: "increasing"
|
|
649
|
+
} ]
|
|
650
|
+
|
|
651
|
+
for col in crossRegionMutations {
|
|
652
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
653
|
+
column: col.exportFlag + col.feature,
|
|
654
|
+
id: col.id,
|
|
655
|
+
allowNA: true,
|
|
656
|
+
naRegex: "region_not_covered",
|
|
657
|
+
spec: {
|
|
658
|
+
valueType: "Int",
|
|
659
|
+
name: col.specName,
|
|
660
|
+
annotations: a(orderP, false, {
|
|
661
|
+
"pl7.app/label": col.label,
|
|
662
|
+
"pl7.app/isScore": "true",
|
|
663
|
+
"pl7.app/score/rankingOrder": col.rankingOrder
|
|
664
|
+
})
|
|
665
|
+
}
|
|
666
|
+
} ]
|
|
667
|
+
exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
|
|
668
|
+
orderP -= 100
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
673
|
+
column: "fractionCDRMutations",
|
|
674
|
+
id: "fraction-cdr-mutations",
|
|
675
|
+
naRegex: "^[a-z_]*$",
|
|
676
|
+
allowNA: true,
|
|
677
|
+
spec: {
|
|
678
|
+
valueType: "Double",
|
|
679
|
+
name: "pl7.app/vdj/sequence/fractionCDRMutations",
|
|
680
|
+
annotations: a(orderP, false, {
|
|
681
|
+
"pl7.app/label": "CDR mutation fraction",
|
|
682
|
+
"pl7.app/isScore": "true",
|
|
683
|
+
"pl7.app/score/rankingOrder": "decreasing",
|
|
684
|
+
"pl7.app/format": ".2f"
|
|
685
|
+
})
|
|
686
|
+
}
|
|
687
|
+
} ]
|
|
688
|
+
}
|
|
689
|
+
|
|
623
690
|
|
|
624
691
|
|
|
625
692
|
germlineVFeature := "GermlineVCDR3Part"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -72,11 +72,16 @@ self.body(func(inputs) {
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
aggExpressions := []
|
|
75
|
+
hasFractionCDRMutations := false
|
|
75
76
|
|
|
76
77
|
for colDef in schemaPerClonotypeNoAggregates {
|
|
77
78
|
if colDef.column == "clonotypeLabel" {
|
|
78
79
|
continue
|
|
79
80
|
}
|
|
81
|
+
if colDef.column == "fractionCDRMutations" {
|
|
82
|
+
hasFractionCDRMutations = true
|
|
83
|
+
continue
|
|
84
|
+
}
|
|
80
85
|
aggExpressions = append(aggExpressions,
|
|
81
86
|
pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
|
|
82
87
|
)
|
|
@@ -90,6 +95,24 @@ self.body(func(inputs) {
|
|
|
90
95
|
|
|
91
96
|
aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
|
|
92
97
|
|
|
98
|
+
// Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
|
|
99
|
+
if hasFractionCDRMutations {
|
|
100
|
+
cdr := "aaMutationsCountCDR1,CDR2"
|
|
101
|
+
fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
|
|
102
|
+
aggregatedDf = aggregatedDf.withColumns(
|
|
103
|
+
pt.when(
|
|
104
|
+
pt.col(cdr).isNotNull().
|
|
105
|
+
and(pt.col(fwr).isNotNull()).
|
|
106
|
+
and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
|
|
107
|
+
).then(
|
|
108
|
+
pt.col(cdr).cast("Double").truediv(
|
|
109
|
+
pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
|
|
110
|
+
)
|
|
111
|
+
).otherwise(pt.lit(1.0)).
|
|
112
|
+
alias("fractionCDRMutations")
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
|
|
93
116
|
aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
|
|
94
117
|
|
|
95
118
|
cdr3Df := aggregatedDf.select(
|
|
@@ -619,7 +619,74 @@ inFrameFeatures := {
|
|
|
619
619
|
exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
|
|
620
620
|
|
|
621
621
|
}
|
|
622
|
-
|
|
622
|
+
|
|
623
|
+
// Cross-region SHM mutation counts (only for full VDJRegion assembling)
|
|
624
|
+
if assemblingFeature == "VDJRegion" {
|
|
625
|
+
orderP = 7500
|
|
626
|
+
nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
|
|
627
|
+
|
|
628
|
+
crossRegionMutations := [ {
|
|
629
|
+
exportFlag: "nMutationsCount",
|
|
630
|
+
feature: nMutationsFeature,
|
|
631
|
+
id: "n-mutations-total",
|
|
632
|
+
label: "Nt mutations",
|
|
633
|
+
specName: "pl7.app/vdj/sequence/nMutations",
|
|
634
|
+
rankingOrder: "decreasing"
|
|
635
|
+
}, {
|
|
636
|
+
exportFlag: "aaMutationsCount",
|
|
637
|
+
feature: "CDR1,CDR2",
|
|
638
|
+
id: "aa-mutations-cdr",
|
|
639
|
+
label: "AA mutations (CDR)",
|
|
640
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
|
|
641
|
+
rankingOrder: "decreasing"
|
|
642
|
+
}, {
|
|
643
|
+
exportFlag: "aaMutationsCount",
|
|
644
|
+
feature: "FR1,FR2,FR3,FR4",
|
|
645
|
+
id: "aa-mutations-fwr",
|
|
646
|
+
label: "AA mutations (FWR)",
|
|
647
|
+
specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
|
|
648
|
+
rankingOrder: "increasing"
|
|
649
|
+
} ]
|
|
650
|
+
|
|
651
|
+
for col in crossRegionMutations {
|
|
652
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
653
|
+
column: col.exportFlag + col.feature,
|
|
654
|
+
id: col.id,
|
|
655
|
+
allowNA: true,
|
|
656
|
+
naRegex: "region_not_covered",
|
|
657
|
+
spec: {
|
|
658
|
+
valueType: "Int",
|
|
659
|
+
name: col.specName,
|
|
660
|
+
annotations: a(orderP, false, {
|
|
661
|
+
"pl7.app/label": col.label,
|
|
662
|
+
"pl7.app/isScore": "true",
|
|
663
|
+
"pl7.app/score/rankingOrder": col.rankingOrder
|
|
664
|
+
})
|
|
665
|
+
}
|
|
666
|
+
} ]
|
|
667
|
+
exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
|
|
668
|
+
orderP -= 100
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// CDR mutation fraction (computed in aggregate-by-clonotype-key)
|
|
672
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
673
|
+
column: "fractionCDRMutations",
|
|
674
|
+
id: "fraction-cdr-mutations",
|
|
675
|
+
naRegex: "^[a-z_]*$",
|
|
676
|
+
allowNA: true,
|
|
677
|
+
spec: {
|
|
678
|
+
valueType: "Double",
|
|
679
|
+
name: "pl7.app/vdj/sequence/fractionCDRMutations",
|
|
680
|
+
annotations: a(orderP, false, {
|
|
681
|
+
"pl7.app/label": "CDR mutation fraction",
|
|
682
|
+
"pl7.app/isScore": "true",
|
|
683
|
+
"pl7.app/score/rankingOrder": "decreasing",
|
|
684
|
+
"pl7.app/format": ".2f"
|
|
685
|
+
})
|
|
686
|
+
}
|
|
687
|
+
} ]
|
|
688
|
+
}
|
|
689
|
+
|
|
623
690
|
// Export germline CDR3 part mutations for both VDJRegion and CDR3 assembling features
|
|
624
691
|
// These will be summed to create the CDR3 mutations count columns
|
|
625
692
|
germlineVFeature := "GermlineVCDR3Part"
|
|
@@ -8,6 +8,7 @@ exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
|
8
8
|
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
9
9
|
clonotypeLabel := import(":clonotype-label")
|
|
10
10
|
|
|
11
|
+
math := import("math")
|
|
11
12
|
json := import("json")
|
|
12
13
|
text := import("text")
|
|
13
14
|
|
|
@@ -176,6 +177,15 @@ self.body(func(inputs) {
|
|
|
176
177
|
|
|
177
178
|
useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
|
|
178
179
|
|
|
180
|
+
// Memory for downstream operations, linked to user override with hardcoded floors
|
|
181
|
+
baseMemGB := 64
|
|
182
|
+
if !is_undefined(params.perProcessMemGB) {
|
|
183
|
+
baseMemGB = params.perProcessMemGB
|
|
184
|
+
}
|
|
185
|
+
memGB := func(floorGB, divisor) {
|
|
186
|
+
return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
|
|
187
|
+
}
|
|
188
|
+
|
|
179
189
|
hashKeyDerivationExpressionPt := func(sourceColumns) {
|
|
180
190
|
return pt.concatStr(
|
|
181
191
|
slices.map(sourceColumns, func(colName) { return pt.col(colName) }),
|
|
@@ -186,7 +196,7 @@ self.body(func(inputs) {
|
|
|
186
196
|
createExport := func(additionalAction) {
|
|
187
197
|
mixcrCmdBuilder := exec.builder().
|
|
188
198
|
inMediumQueue().
|
|
189
|
-
mem(
|
|
199
|
+
mem(memGB(12, 4)).
|
|
190
200
|
cpu(2).
|
|
191
201
|
printErrStreamToStdout().
|
|
192
202
|
software(mixcrSw).
|
|
@@ -233,7 +243,7 @@ self.body(func(inputs) {
|
|
|
233
243
|
// Simplified PTabler processing for main TSV output
|
|
234
244
|
wfMain := pt.workflow().
|
|
235
245
|
inMediumQueue().
|
|
236
|
-
mem(
|
|
246
|
+
mem(memGB(8, 4)).
|
|
237
247
|
cpu(2)
|
|
238
248
|
|
|
239
249
|
frameInputMap := {
|
package/src/process.tpl.tengo
CHANGED
|
@@ -9,6 +9,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
|
9
9
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
10
10
|
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
11
11
|
|
|
12
|
+
math := import("math")
|
|
12
13
|
json := import("json")
|
|
13
14
|
text := import("text")
|
|
14
15
|
|
|
@@ -71,6 +72,18 @@ self.body(func(inputs) {
|
|
|
71
72
|
limitInput := inputs.limitInput
|
|
72
73
|
perProcessMemGB := params.perProcessMemGB
|
|
73
74
|
perProcessCPUs := params.perProcessCPUs
|
|
75
|
+
|
|
76
|
+
// Base memory for scheduling downstream operations, linked to user override
|
|
77
|
+
// with current hardcoded values as floors (see memGB helper below)
|
|
78
|
+
baseMemGB := 64
|
|
79
|
+
if !is_undefined(perProcessMemGB) {
|
|
80
|
+
baseMemGB = perProcessMemGB
|
|
81
|
+
}
|
|
82
|
+
// Returns max(floor, baseMemGB / divisor) as a string like "32GB"
|
|
83
|
+
memGB := func(floorGB, divisor) {
|
|
84
|
+
return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
|
|
85
|
+
}
|
|
86
|
+
|
|
74
87
|
fileExtension := inputSpec.domain["pl7.app/fileExtension"]
|
|
75
88
|
sampleIdAxisSpec := inputSpec.axesSpec[0]
|
|
76
89
|
|
|
@@ -249,7 +262,7 @@ self.body(func(inputs) {
|
|
|
249
262
|
storageFormat: "Parquet",
|
|
250
263
|
partitionKeyLength: 0
|
|
251
264
|
},
|
|
252
|
-
mem:
|
|
265
|
+
mem: memGB(16, 2),
|
|
253
266
|
cpu: 2,
|
|
254
267
|
name: "byCloneKeyBySample",
|
|
255
268
|
path: ["tsv"]
|
|
@@ -271,7 +284,8 @@ self.body(func(inputs) {
|
|
|
271
284
|
aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
|
|
272
285
|
cdr3SeqColumns: cdr3SeqColumns,
|
|
273
286
|
stopCodonTypes: params.stopCodonTypes,
|
|
274
|
-
stopCodonReplacements: params.stopCodonReplacements
|
|
287
|
+
stopCodonReplacements: params.stopCodonReplacements,
|
|
288
|
+
perProcessMemGB: perProcessMemGB
|
|
275
289
|
}, { removeUndefs: true })
|
|
276
290
|
}
|
|
277
291
|
}
|
|
@@ -302,7 +316,7 @@ self.body(func(inputs) {
|
|
|
302
316
|
columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
|
|
303
317
|
storageFormat: "Parquet"
|
|
304
318
|
},
|
|
305
|
-
mem:
|
|
319
|
+
mem: memGB(12, 4),
|
|
306
320
|
cpu: 2,
|
|
307
321
|
name: "aggregates",
|
|
308
322
|
path: ["tsv"]
|
|
@@ -314,7 +328,7 @@ self.body(func(inputs) {
|
|
|
314
328
|
columns: cdr3DistanceColumnsSpec,
|
|
315
329
|
storageFormat: "Parquet"
|
|
316
330
|
},
|
|
317
|
-
mem:
|
|
331
|
+
mem: memGB(8, 8),
|
|
318
332
|
cpu: 1,
|
|
319
333
|
name: "cdr3Distances",
|
|
320
334
|
path: ["cdr3DistancesTsv"]
|