@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.1 → 1.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.1 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
3
+ > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.3 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
2
2
 
3
+ ## 1.19.3
4
+
5
+ ### Patch Changes
6
+
7
+ - 8685e8d: Add new mutation columns
8
+
9
+ ## 1.19.2
10
+
11
+ ### Patch Changes
12
+
13
+ - 4aeac00: Link memory limits of downstream operations (XSV conversion, MiXCR export, PTabler) to user-specified perProcessMemGB override to prevent over-scheduling on local runs
14
+
3
15
  ## 1.19.1
4
16
 
5
17
  ### Patch Changes
@@ -619,7 +619,74 @@ inFrameFeatures := {
619
619
  exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
620
620
 
621
621
  }
622
-
622
+
623
+
624
+ if assemblingFeature == "VDJRegion" {
625
+ orderP = 7500
626
+ nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
627
+
628
+ crossRegionMutations := [ {
629
+ exportFlag: "nMutationsCount",
630
+ feature: nMutationsFeature,
631
+ id: "n-mutations-total",
632
+ label: "Nt mutations",
633
+ specName: "pl7.app/vdj/sequence/nMutations",
634
+ rankingOrder: "decreasing"
635
+ }, {
636
+ exportFlag: "aaMutationsCount",
637
+ feature: "CDR1,CDR2",
638
+ id: "aa-mutations-cdr",
639
+ label: "AA mutations (CDR)",
640
+ specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
641
+ rankingOrder: "decreasing"
642
+ }, {
643
+ exportFlag: "aaMutationsCount",
644
+ feature: "FR1,FR2,FR3,FR4",
645
+ id: "aa-mutations-fwr",
646
+ label: "AA mutations (FWR)",
647
+ specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
648
+ rankingOrder: "increasing"
649
+ } ]
650
+
651
+ for col in crossRegionMutations {
652
+ columnsSpecPerClonotypeNoAggregates += [ {
653
+ column: col.exportFlag + col.feature,
654
+ id: col.id,
655
+ allowNA: true,
656
+ naRegex: "region_not_covered",
657
+ spec: {
658
+ valueType: "Int",
659
+ name: col.specName,
660
+ annotations: a(orderP, false, {
661
+ "pl7.app/label": col.label,
662
+ "pl7.app/isScore": "true",
663
+ "pl7.app/score/rankingOrder": col.rankingOrder
664
+ })
665
+ }
666
+ } ]
667
+ exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
668
+ orderP -= 100
669
+ }
670
+
671
+
672
+ columnsSpecPerClonotypeNoAggregates += [ {
673
+ column: "fractionCDRMutations",
674
+ id: "fraction-cdr-mutations",
675
+ naRegex: "^[a-z_]*$",
676
+ allowNA: true,
677
+ spec: {
678
+ valueType: "Double",
679
+ name: "pl7.app/vdj/sequence/fractionCDRMutations",
680
+ annotations: a(orderP, false, {
681
+ "pl7.app/label": "CDR mutation fraction",
682
+ "pl7.app/isScore": "true",
683
+ "pl7.app/score/rankingOrder": "decreasing",
684
+ "pl7.app/format": ".2f"
685
+ })
686
+ }
687
+ } ]
688
+ }
689
+
623
690
 
624
691
 
625
692
  germlineVFeature := "GermlineVCDR3Part"
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
3
- "version": "1.19.1",
3
+ "version": "1.19.3",
4
4
  "description": "MiXCR Amplicon Alignment Workflow",
5
5
  "type": "module",
6
6
  "dependencies": {
@@ -72,11 +72,16 @@ self.body(func(inputs) {
72
72
  }
73
73
 
74
74
  aggExpressions := []
75
+ hasFractionCDRMutations := false
75
76
 
76
77
  for colDef in schemaPerClonotypeNoAggregates {
77
78
  if colDef.column == "clonotypeLabel" {
78
79
  continue
79
80
  }
81
+ if colDef.column == "fractionCDRMutations" {
82
+ hasFractionCDRMutations = true
83
+ continue
84
+ }
80
85
  aggExpressions = append(aggExpressions,
81
86
  pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
82
87
  )
@@ -90,6 +95,24 @@ self.body(func(inputs) {
90
95
 
91
96
  aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
92
97
 
98
+ // Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
99
+ if hasFractionCDRMutations {
100
+ cdr := "aaMutationsCountCDR1,CDR2"
101
+ fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
102
+ aggregatedDf = aggregatedDf.withColumns(
103
+ pt.when(
104
+ pt.col(cdr).isNotNull().
105
+ and(pt.col(fwr).isNotNull()).
106
+ and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
107
+ ).then(
108
+ pt.col(cdr).cast("Double").truediv(
109
+ pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
110
+ )
111
+ ).otherwise(pt.lit(1.0)).
112
+ alias("fractionCDRMutations")
113
+ )
114
+ }
115
+
93
116
  aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
94
117
 
95
118
  cdr3Df := aggregatedDf.select(
@@ -619,7 +619,74 @@ inFrameFeatures := {
619
619
  exportArgs += [ [ "-allAAMutationsCount", "FR1Begin", "FR4End" ] ]
620
620
 
621
621
  }
622
-
622
+
623
+ // Cross-region SHM mutation counts (only for full VDJRegion assembling)
624
+ if assemblingFeature == "VDJRegion" {
625
+ orderP = 7500
626
+ nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
627
+
628
+ crossRegionMutations := [ {
629
+ exportFlag: "nMutationsCount",
630
+ feature: nMutationsFeature,
631
+ id: "n-mutations-total",
632
+ label: "Nt mutations",
633
+ specName: "pl7.app/vdj/sequence/nMutations",
634
+ rankingOrder: "decreasing"
635
+ }, {
636
+ exportFlag: "aaMutationsCount",
637
+ feature: "CDR1,CDR2",
638
+ id: "aa-mutations-cdr",
639
+ label: "AA mutations (CDR)",
640
+ specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
641
+ rankingOrder: "decreasing"
642
+ }, {
643
+ exportFlag: "aaMutationsCount",
644
+ feature: "FR1,FR2,FR3,FR4",
645
+ id: "aa-mutations-fwr",
646
+ label: "AA mutations (FWR)",
647
+ specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
648
+ rankingOrder: "increasing"
649
+ } ]
650
+
651
+ for col in crossRegionMutations {
652
+ columnsSpecPerClonotypeNoAggregates += [ {
653
+ column: col.exportFlag + col.feature,
654
+ id: col.id,
655
+ allowNA: true,
656
+ naRegex: "region_not_covered",
657
+ spec: {
658
+ valueType: "Int",
659
+ name: col.specName,
660
+ annotations: a(orderP, false, {
661
+ "pl7.app/label": col.label,
662
+ "pl7.app/isScore": "true",
663
+ "pl7.app/score/rankingOrder": col.rankingOrder
664
+ })
665
+ }
666
+ } ]
667
+ exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
668
+ orderP -= 100
669
+ }
670
+
671
+ // CDR mutation fraction (computed in aggregate-by-clonotype-key)
672
+ columnsSpecPerClonotypeNoAggregates += [ {
673
+ column: "fractionCDRMutations",
674
+ id: "fraction-cdr-mutations",
675
+ naRegex: "^[a-z_]*$",
676
+ allowNA: true,
677
+ spec: {
678
+ valueType: "Double",
679
+ name: "pl7.app/vdj/sequence/fractionCDRMutations",
680
+ annotations: a(orderP, false, {
681
+ "pl7.app/label": "CDR mutation fraction",
682
+ "pl7.app/isScore": "true",
683
+ "pl7.app/score/rankingOrder": "decreasing",
684
+ "pl7.app/format": ".2f"
685
+ })
686
+ }
687
+ } ]
688
+ }
689
+
623
690
  // Export germline CDR3 part mutations for both VDJRegion and CDR3 assembling features
624
691
  // These will be summed to create the CDR3 mutations count columns
625
692
  germlineVFeature := "GermlineVCDR3Part"
@@ -8,6 +8,7 @@ exec := import("@platforma-sdk/workflow-tengo:exec")
8
8
  pt := import("@platforma-sdk/workflow-tengo:pt")
9
9
  clonotypeLabel := import(":clonotype-label")
10
10
 
11
+ math := import("math")
11
12
  json := import("json")
12
13
  text := import("text")
13
14
 
@@ -176,6 +177,15 @@ self.body(func(inputs) {
176
177
 
177
178
  useProductiveFilter := is_undefined(stopCodonTypes) || len(stopCodonTypes) == 0
178
179
 
180
+ // Memory for downstream operations, linked to user override with hardcoded floors
181
+ baseMemGB := 64
182
+ if !is_undefined(params.perProcessMemGB) {
183
+ baseMemGB = params.perProcessMemGB
184
+ }
185
+ memGB := func(floorGB, divisor) {
186
+ return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
187
+ }
188
+
179
189
  hashKeyDerivationExpressionPt := func(sourceColumns) {
180
190
  return pt.concatStr(
181
191
  slices.map(sourceColumns, func(colName) { return pt.col(colName) }),
@@ -186,7 +196,7 @@ self.body(func(inputs) {
186
196
  createExport := func(additionalAction) {
187
197
  mixcrCmdBuilder := exec.builder().
188
198
  inMediumQueue().
189
- mem("12GB").
199
+ mem(memGB(12, 4)).
190
200
  cpu(2).
191
201
  printErrStreamToStdout().
192
202
  software(mixcrSw).
@@ -233,7 +243,7 @@ self.body(func(inputs) {
233
243
  // Simplified PTabler processing for main TSV output
234
244
  wfMain := pt.workflow().
235
245
  inMediumQueue().
236
- mem("8GB").
246
+ mem(memGB(8, 4)).
237
247
  cpu(2)
238
248
 
239
249
  frameInputMap := {
@@ -9,6 +9,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
9
9
  slices := import("@platforma-sdk/workflow-tengo:slices")
10
10
  maps := import("@platforma-sdk/workflow-tengo:maps")
11
11
 
12
+ math := import("math")
12
13
  json := import("json")
13
14
  text := import("text")
14
15
 
@@ -71,6 +72,18 @@ self.body(func(inputs) {
71
72
  limitInput := inputs.limitInput
72
73
  perProcessMemGB := params.perProcessMemGB
73
74
  perProcessCPUs := params.perProcessCPUs
75
+
76
+ // Base memory for scheduling downstream operations, linked to user override
77
+ // with current hardcoded values as floors (see memGB helper below)
78
+ baseMemGB := 64
79
+ if !is_undefined(perProcessMemGB) {
80
+ baseMemGB = perProcessMemGB
81
+ }
82
+ // Returns max(floor, baseMemGB / divisor) as a string like "32GB"
83
+ memGB := func(floorGB, divisor) {
84
+ return string(int(math.max(floorGB, baseMemGB / divisor))) + "GB"
85
+ }
86
+
74
87
  fileExtension := inputSpec.domain["pl7.app/fileExtension"]
75
88
  sampleIdAxisSpec := inputSpec.axesSpec[0]
76
89
 
@@ -249,7 +262,7 @@ self.body(func(inputs) {
249
262
  storageFormat: "Parquet",
250
263
  partitionKeyLength: 0
251
264
  },
252
- mem: "16GB",
265
+ mem: memGB(16, 2),
253
266
  cpu: 2,
254
267
  name: "byCloneKeyBySample",
255
268
  path: ["tsv"]
@@ -271,7 +284,8 @@ self.body(func(inputs) {
271
284
  aminoAcidSeqColumnPairs: aminoAcidSeqColumnPairs,
272
285
  cdr3SeqColumns: cdr3SeqColumns,
273
286
  stopCodonTypes: params.stopCodonTypes,
274
- stopCodonReplacements: params.stopCodonReplacements
287
+ stopCodonReplacements: params.stopCodonReplacements,
288
+ perProcessMemGB: perProcessMemGB
275
289
  }, { removeUndefs: true })
276
290
  }
277
291
  }
@@ -302,7 +316,7 @@ self.body(func(inputs) {
302
316
  columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
303
317
  storageFormat: "Parquet"
304
318
  },
305
- mem: "12GB",
319
+ mem: memGB(12, 4),
306
320
  cpu: 2,
307
321
  name: "aggregates",
308
322
  path: ["tsv"]
@@ -314,7 +328,7 @@ self.body(func(inputs) {
314
328
  columns: cdr3DistanceColumnsSpec,
315
329
  storageFormat: "Parquet"
316
330
  },
317
- mem: "8GB",
331
+ mem: memGB(8, 8),
318
332
  cpu: 1,
319
333
  name: "cdr3Distances",
320
334
  path: ["cdr3DistancesTsv"]