@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.23.5 → 3.23.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.5 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.23.6 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.23.6
4
+
5
+ ### Patch Changes
6
+
7
+ - b8a3a50: New mutation columns
8
+
3
9
  ## 3.23.5
4
10
 
5
11
  ### Patch Changes
@@ -767,6 +767,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
767
767
 
768
768
 
769
769
 
770
+ shmMapping := []
771
+
772
+ if assemblingFeature == "VDJRegion" {
773
+ nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
774
+
775
+ crossRegionMutations := [ {
776
+ exportFlag: "nMutationsCount",
777
+ feature: nMutationsFeature,
778
+ id: "n-mutations-total",
779
+ label: "Nt mutations",
780
+ specName: "pl7.app/vdj/sequence/nMutations",
781
+ rankingOrder: "decreasing",
782
+ outputColumn: "nMutations"
783
+ }, {
784
+ exportFlag: "aaMutationsCount",
785
+ feature: "CDR1,CDR2",
786
+ id: "aa-mutations-cdr",
787
+ label: "AA mutations (CDR)",
788
+ specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
789
+ rankingOrder: "decreasing",
790
+ outputColumn: "nAAMutationsCDR"
791
+ }, {
792
+ exportFlag: "aaMutationsCount",
793
+ feature: "FR1,FR2,FR3,FR4",
794
+ id: "aa-mutations-fwr",
795
+ label: "AA mutations (FWR)",
796
+ specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
797
+ rankingOrder: "increasing",
798
+ outputColumn: "nAAMutationsFWR"
799
+ } ]
800
+
801
+ for col in crossRegionMutations {
802
+ columnsSpecPerClonotypeNoAggregates += [ {
803
+ column: col.exportFlag + col.feature,
804
+ id: col.id,
805
+ allowNA: true,
806
+ naRegex: "region_not_covered",
807
+ spec: {
808
+ valueType: "Int",
809
+ name: col.specName,
810
+ annotations: a(orderP, false, {
811
+ "pl7.app/label": col.label,
812
+ "pl7.app/isScore": "true",
813
+ "pl7.app/score/rankingOrder": col.rankingOrder
814
+ })
815
+ }
816
+ } ]
817
+ exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
818
+ shmMapping = append(shmMapping, {
819
+ outputColumn: col.outputColumn,
820
+ tsvColumn: col.exportFlag + col.feature
821
+ })
822
+ orderP -= 100
823
+ }
824
+
825
+
826
+ columnsSpecPerClonotypeNoAggregates += [ {
827
+ column: "fractionCDRMutations",
828
+ id: "fraction-cdr-mutations",
829
+ naRegex: "^[a-z_]*$",
830
+ allowNA: true,
831
+ spec: {
832
+ valueType: "Double",
833
+ name: "pl7.app/vdj/sequence/fractionCDRMutations",
834
+ annotations: a(orderP, false, {
835
+ "pl7.app/label": "CDR mutation fraction",
836
+ "pl7.app/isScore": "true",
837
+ "pl7.app/score/rankingOrder": "decreasing",
838
+ "pl7.app/format": ".2f"
839
+ })
840
+ }
841
+ } ]
842
+ orderP -= 100
843
+ }
844
+
845
+
846
+
770
847
  flagColumnVariants := [ {
771
848
  columnPrefix: "isProductive",
772
849
  arg: "-isProductive",
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
1042
1119
  exportArgs: exportArgs,
1043
1120
 
1044
1121
  hashCellKey: hashCellKey,
1045
- cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
1122
+ cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
1123
+
1124
+ shmMapping: shmMapping
1046
1125
  }
1047
1126
  }
1048
1127
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.23.5",
3
+ "version": "3.23.6",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
6
  "@platforma-sdk/workflow-tengo": "5.8.2",
@@ -78,11 +78,16 @@ self.body(func(inputs) {
78
78
  }
79
79
 
80
80
  aggExpressions := []
81
+ hasFractionCDRMutations := false
81
82
 
82
83
  for colDef in schemaPerClonotypeNoAggregates {
83
84
  if colDef.column == "clonotypeLabel" || colDef.column == "nLengthTotalAdded" {
84
85
  continue
85
86
  }
87
+ if colDef.column == "fractionCDRMutations" {
88
+ hasFractionCDRMutations = true
89
+ continue
90
+ }
86
91
  aggExpressions = append(aggExpressions,
87
92
  pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
88
93
  )
@@ -104,6 +109,24 @@ self.body(func(inputs) {
104
109
  alias("nLengthTotalAdded")
105
110
  )
106
111
 
112
+ // Calculate CDR mutation fraction: CDR / (CDR + FWR), fallback 1.0 when NA or zero denominator
113
+ if hasFractionCDRMutations {
114
+ cdr := "aaMutationsCountCDR1,CDR2"
115
+ fwr := "aaMutationsCountFR1,FR2,FR3,FR4"
116
+ aggregatedDf = aggregatedDf.withColumns(
117
+ pt.when(
118
+ pt.col(cdr).isNotNull().
119
+ and(pt.col(fwr).isNotNull()).
120
+ and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
121
+ ).then(
122
+ pt.col(cdr).cast("Double").truediv(
123
+ pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
124
+ )
125
+ ).otherwise(pt.lit(1.0)).
126
+ alias("fractionCDRMutations")
127
+ )
128
+ }
129
+
107
130
  aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
108
131
 
109
132
  aggregatedDf.save("output.tsv")
@@ -765,6 +765,83 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
765
765
  }
766
766
  }
767
767
 
768
+ // Cross-region SHM mutation counts (only for full VDJRegion assembling)
769
+ // shmMapping: [{ outputColumn, tsvColumn }] for single-cell chain summing
770
+ shmMapping := []
771
+
772
+ if assemblingFeature == "VDJRegion" {
773
+ nMutationsFeature := coreGeneFeatures["V"] + "," + coreGeneFeatures["J"]
774
+
775
+ crossRegionMutations := [ {
776
+ exportFlag: "nMutationsCount",
777
+ feature: nMutationsFeature,
778
+ id: "n-mutations-total",
779
+ label: "Nt mutations",
780
+ specName: "pl7.app/vdj/sequence/nMutations",
781
+ rankingOrder: "decreasing",
782
+ outputColumn: "nMutations"
783
+ }, {
784
+ exportFlag: "aaMutationsCount",
785
+ feature: "CDR1,CDR2",
786
+ id: "aa-mutations-cdr",
787
+ label: "AA mutations (CDR)",
788
+ specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
789
+ rankingOrder: "decreasing",
790
+ outputColumn: "nAAMutationsCDR"
791
+ }, {
792
+ exportFlag: "aaMutationsCount",
793
+ feature: "FR1,FR2,FR3,FR4",
794
+ id: "aa-mutations-fwr",
795
+ label: "AA mutations (FWR)",
796
+ specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
797
+ rankingOrder: "increasing",
798
+ outputColumn: "nAAMutationsFWR"
799
+ } ]
800
+
801
+ for col in crossRegionMutations {
802
+ columnsSpecPerClonotypeNoAggregates += [ {
803
+ column: col.exportFlag + col.feature,
804
+ id: col.id,
805
+ allowNA: true,
806
+ naRegex: "region_not_covered",
807
+ spec: {
808
+ valueType: "Int",
809
+ name: col.specName,
810
+ annotations: a(orderP, false, {
811
+ "pl7.app/label": col.label,
812
+ "pl7.app/isScore": "true",
813
+ "pl7.app/score/rankingOrder": col.rankingOrder
814
+ })
815
+ }
816
+ } ]
817
+ exportArgs += [ [ "-" + col.exportFlag, col.feature ] ]
818
+ shmMapping = append(shmMapping, {
819
+ outputColumn: col.outputColumn,
820
+ tsvColumn: col.exportFlag + col.feature
821
+ })
822
+ orderP -= 100
823
+ }
824
+
825
+ // CDR mutation fraction (computed in aggregate-by-clonotype-key)
826
+ columnsSpecPerClonotypeNoAggregates += [ {
827
+ column: "fractionCDRMutations",
828
+ id: "fraction-cdr-mutations",
829
+ naRegex: "^[a-z_]*$",
830
+ allowNA: true,
831
+ spec: {
832
+ valueType: "Double",
833
+ name: "pl7.app/vdj/sequence/fractionCDRMutations",
834
+ annotations: a(orderP, false, {
835
+ "pl7.app/label": "CDR mutation fraction",
836
+ "pl7.app/isScore": "true",
837
+ "pl7.app/score/rankingOrder": "decreasing",
838
+ "pl7.app/format": ".2f"
839
+ })
840
+ }
841
+ } ]
842
+ orderP -= 100
843
+ }
844
+
768
845
  // Flags: productive, oof, stop codons
769
846
 
770
847
  flagColumnVariants := [ {
@@ -1042,7 +1119,9 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId, expor
1042
1119
  exportArgs: exportArgs,
1043
1120
 
1044
1121
  hashCellKey: hashCellKey,
1045
- cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen
1122
+ cellLinkerColumnSettingsGen: cellLinkerColumnSettingsGen,
1123
+
1124
+ shmMapping: shmMapping
1046
1125
  }
1047
1126
  }
1048
1127
 
@@ -17,7 +17,7 @@ math := import("math")
17
17
 
18
18
  self.defineOutputs("abundanceTsv", "clonotypeTsv",
19
19
  "propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv",
20
- "cellsTsv")
20
+ "cellsTsv", "shmTsv")
21
21
 
22
22
  ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
23
23
 
@@ -37,6 +37,11 @@ self.body(func(inputs) {
37
37
 
38
38
  schemaPerClonotypeNoAggregates := inputs.params.schemaPerClonotypeNoAggregates
39
39
 
40
+ shmMapping := inputs.params.shmMapping
41
+ if is_undefined(shmMapping) {
42
+ shmMapping = []
43
+ }
44
+
40
45
  //
41
46
  // Preprocessing
42
47
  //
@@ -303,6 +308,96 @@ self.body(func(inputs) {
303
308
 
304
309
  outputProcessingRunResult := outputProcessingWf.run()
305
310
 
311
+ propsAPrimaryFile := outputProcessingRunResult.getFile(chainMappings[0].finalOutFile)
312
+ propsBPrimaryFile := outputProcessingRunResult.getFile(chainMappings[2].finalOutFile)
313
+
314
+ // Sum SHM mutation columns across primary A+B chains
315
+ shmTsv := undefined
316
+ if len(shmMapping) > 0 {
317
+ shmSchema := [{ column: "scClonotypeKey", type: "String" }]
318
+ for m in shmMapping {
319
+ shmSchema = append(shmSchema, { column: m.tsvColumn, type: "String" })
320
+ }
321
+
322
+ shmWf := pt.workflow()
323
+
324
+ propsAShmDf := shmWf.frame(propsAPrimaryFile, {
325
+ xsvType: "tsv",
326
+ schema: shmSchema,
327
+ inferSchema: false
328
+ })
329
+ propsBShmDf := shmWf.frame(propsBPrimaryFile, {
330
+ xsvType: "tsv",
331
+ schema: shmSchema,
332
+ inferSchema: false,
333
+ id: "props_b_shm"
334
+ })
335
+
336
+ // Cast "region_not_covered" to null, then to Int
337
+ castExprs := []
338
+ for m in shmMapping {
339
+ castExprs = append(castExprs,
340
+ pt.when(pt.col(m.tsvColumn).eq("region_not_covered")).
341
+ then(pt.lit(undefined)).
342
+ otherwise(pt.col(m.tsvColumn)).
343
+ cast("Int").
344
+ alias(m.tsvColumn)
345
+ )
346
+ }
347
+ propsAShmDf = propsAShmDf.withColumns(castExprs...)
348
+ propsBShmDf = propsBShmDf.withColumns(castExprs...)
349
+
350
+ //Join A and B chains
351
+ leftCols := []
352
+ rightCols := []
353
+ for m in shmMapping {
354
+ leftCols = append(leftCols, { column: m.tsvColumn, rename: m.outputColumn + "_A" })
355
+ rightCols = append(rightCols, { column: m.tsvColumn, rename: m.outputColumn + "_B" })
356
+ }
357
+
358
+ shmCombinedDf := propsAShmDf.join(propsBShmDf, {
359
+ how: "full",
360
+ on: ["scClonotypeKey"],
361
+ coalesce: true,
362
+ leftColumns: leftCols,
363
+ rightColumns: rightCols
364
+ })
365
+
366
+ // Sum SHM mutation columns across primary A+B chains
367
+ for m in shmMapping {
368
+ shmCombinedDf = shmCombinedDf.withColumns(
369
+ pt.col(m.outputColumn + "_A").plus(pt.col(m.outputColumn + "_B")).
370
+ alias(m.outputColumn)
371
+ )
372
+ }
373
+
374
+ // Calculate CDR mutation fraction
375
+ cdr := "nAAMutationsCDR"
376
+ fwr := "nAAMutationsFWR"
377
+ shmCombinedDf = shmCombinedDf.withColumns(
378
+ pt.when(
379
+ pt.col(cdr).isNotNull().
380
+ and(pt.col(fwr).isNotNull()).
381
+ and(pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double")).gt(0.0))
382
+ ).then(
383
+ pt.col(cdr).cast("Double").truediv(
384
+ pt.col(cdr).cast("Double").plus(pt.col(fwr).cast("Double"))
385
+ )
386
+ ).otherwise(pt.lit(1.0)).
387
+ alias("fractionCDRMutations")
388
+ )
389
+
390
+ shmOutputCols := ["scClonotypeKey"]
391
+ for m in shmMapping {
392
+ shmOutputCols = append(shmOutputCols, m.outputColumn)
393
+ }
394
+ shmOutputCols = append(shmOutputCols, "fractionCDRMutations")
395
+ shmCombinedDf.save("shm.tsv", { columns: shmOutputCols, xsvType: "tsv" })
396
+
397
+ shmRunResult := shmWf.run()
398
+ shmTsv = shmRunResult.getFile("shm.tsv")
399
+ }
400
+
306
401
  return {
307
402
  // must have sampleId and scClonotypeKey columns
308
403
  abundanceTsv: abundanceTsv,
@@ -314,9 +409,11 @@ self.body(func(inputs) {
314
409
  cellsTsv: cellsTsv,
315
410
 
316
411
  // must have scClonotypeKey columns
317
- propertiesAPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[0].finalOutFile),
412
+ propertiesAPrimaryTsv: propsAPrimaryFile,
318
413
  propertiesASecondaryTsv: outputProcessingRunResult.getFile(chainMappings[1].finalOutFile),
319
- propertiesBPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[2].finalOutFile),
320
- propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile)
414
+ propertiesBPrimaryTsv: propsBPrimaryFile,
415
+ propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile),
416
+
417
+ shmTsv: shmTsv
321
418
  }
322
419
  })
@@ -233,6 +233,8 @@ self.body(func(inputs) {
233
233
 
234
234
  mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
235
235
  mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
236
+
237
+ shmMapping := exportSpecs.shmMapping
236
238
  mainAbundanceColumnNormalizedArgs := exportSpecs.mainAbundanceColumnNormalizedArgs
237
239
  mainAbundanceColumnUnnormalizedArgs := exportSpecs.mainAbundanceColumnUnnormalizedArgs
238
240
 
@@ -566,6 +568,77 @@ self.body(func(inputs) {
566
568
  }
567
569
 
568
570
  if isSingleCell {
571
+ // SHM mutation columns: build filtering set and combined output specs
572
+ hasShm := len(shmMapping) > 0
573
+ shmColumnNames := {}
574
+ shmOutputSpecs := []
575
+ if hasShm {
576
+ for m in shmMapping {
577
+ shmColumnNames[m.tsvColumn] = true
578
+ }
579
+ shmColumnNames["fractionCDRMutations"] = true
580
+
581
+ orderP := 10400
582
+ shmDefs := [ {
583
+ outputColumn: "nMutations",
584
+ label: "Nt mutations",
585
+ specName: "pl7.app/vdj/sequence/nMutations",
586
+ valueType: "Int",
587
+ rankingOrder: "decreasing"
588
+ }, {
589
+ outputColumn: "nAAMutationsCDR",
590
+ label: "AA mutations (CDR)",
591
+ specName: "pl7.app/vdj/sequence/nAAMutationsCDR",
592
+ valueType: "Int",
593
+ rankingOrder: "decreasing"
594
+ }, {
595
+ outputColumn: "nAAMutationsFWR",
596
+ label: "AA mutations (FWR)",
597
+ specName: "pl7.app/vdj/sequence/nAAMutationsFWR",
598
+ valueType: "Int",
599
+ rankingOrder: "increasing"
600
+ } ]
601
+
602
+ for def in shmDefs {
603
+ shmOutputSpecs = append(shmOutputSpecs, {
604
+ column: def.outputColumn,
605
+ id: def.outputColumn,
606
+ allowNA: true,
607
+ naRegex: "^[a-z_]*$",
608
+ spec: {
609
+ valueType: def.valueType,
610
+ name: def.specName,
611
+ annotations: {
612
+ "pl7.app/label": def.label,
613
+ "pl7.app/table/orderPriority": string(orderP),
614
+ "pl7.app/table/visibility": "optional",
615
+ "pl7.app/isScore": "true",
616
+ "pl7.app/score/rankingOrder": def.rankingOrder
617
+ }
618
+ }
619
+ })
620
+ orderP -= 100
621
+ }
622
+
623
+ shmOutputSpecs = append(shmOutputSpecs, {
624
+ column: "fractionCDRMutations",
625
+ id: "fraction-cdr-mutations",
626
+ naRegex: "^[a-z_]*$",
627
+ allowNA: true,
628
+ spec: {
629
+ valueType: "Double",
630
+ name: "pl7.app/vdj/sequence/fractionCDRMutations",
631
+ annotations: {
632
+ "pl7.app/label": "CDR mutation fraction",
633
+ "pl7.app/table/orderPriority": string(orderP),
634
+ "pl7.app/table/visibility": "optional",
635
+ "pl7.app/isScore": "true",
636
+ "pl7.app/score/rankingOrder": "decreasing"
637
+ }
638
+ }
639
+ })
640
+ }
641
+
569
642
  for receptor in receptors {
570
643
  receptorInfo := receptorInfos[receptor]
571
644
 
@@ -627,7 +700,10 @@ self.body(func(inputs) {
627
700
 
628
701
  // Modify column visibility for TCR chains
629
702
  isTCRChain := text.has_prefix(chain, "TCR")
630
- columnsForSingleCell := columnsSpecPerClonotypeNoAggregates
703
+ // Filter out SHM mutation columns (We will generate chain-agnostic columns)
704
+ columnsForSingleCell := hasShm ? slices.filter(columnsSpecPerClonotypeNoAggregates, func(col) {
705
+ return !shmColumnNames[col.column]
706
+ }) : columnsSpecPerClonotypeNoAggregates
631
707
  if isTCRChain {
632
708
  visibilitySettings := {
633
709
  "bestCGene": "optional",
@@ -718,6 +794,23 @@ self.body(func(inputs) {
718
794
  path: ["cellsTsv"]
719
795
  } ]
720
796
 
797
+ if hasShm {
798
+ singleCellOutputs += [ {
799
+ type: "Xsv",
800
+ xsvType: "tsv",
801
+ settings: {
802
+ axes: [ axisByScClonotypeKeyGen(receptor) ],
803
+ columns: shmOutputSpecs,
804
+ storageFormat: "Parquet",
805
+ partitionKeyLength: 0
806
+ },
807
+ mem: "12GiB",
808
+ cpu: 2,
809
+ name: "shmCombined",
810
+ path: ["shmTsv"]
811
+ } ]
812
+ }
813
+
721
814
  chainA := receptorInfo.chains[0]
722
815
  chainB := receptorInfo.chains[1]
723
816
 
@@ -746,7 +839,8 @@ self.body(func(inputs) {
746
839
  params: {
747
840
  mainAbundanceColumn: mainAbundanceColumnUnnormalized,
748
841
  mainIsProductiveColumn: mainIsProductiveColumn,
749
- schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates)
842
+ schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates),
843
+ shmMapping: shmMapping
750
844
  }
751
845
  }
752
846
  }
@@ -769,6 +863,10 @@ self.body(func(inputs) {
769
863
  singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBPrimary", "clonotypeProperties/" + receptor + "/bPrimary/")
770
864
  singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBSecondary", "clonotypeProperties/" + receptor + "/bSecondary/")
771
865
 
866
+ if hasShm {
867
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "shmCombined", "clonotypeProperties/" + receptor + "/shmCombined/")
868
+ }
869
+
772
870
  for columnName in singleCellResult.listXsvColumns("cellsLinkerTable") {
773
871
  anonymizedData := singleCellResult.outputData("cellsLinkerTable", columnName)
774
872
  clonotypes.add(