@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.18.3 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.18.3 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.19.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
7
7
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
8
8
  Processing "src/calculate-export-specs.lib.tengo"...
9
9
  Processing "src/calculate-preset-info.tpl.tengo"...
10
+ Processing "src/clonotype-label.lib.tengo"...
10
11
  Processing "src/list-presets.tpl.tengo"...
11
12
  Processing "src/main.tpl.tengo"...
12
13
  Processing "src/mixcr-analyze.tpl.tengo"...
@@ -20,6 +21,7 @@ No syntax errors found.
20
21
  info: Skipping unknown file type: test/columns.test.ts
21
22
  info: Compiling 'dist'...
22
23
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/calculate-export-specs.lib.tengo
24
+ info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/clonotype-label.lib.tengo
23
25
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz
24
26
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/calculate-preset-info.plj.gz
25
27
  info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/list-presets.plj.gz
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 2.19.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 4f09d25: - All table transformations migrated from ptransform and custop python scripts to universal PT API
8
+ - Enhance export specifications by adding format property for fraction and sequence columns
9
+
3
10
  ## 2.18.3
4
11
 
5
12
  ### Patch Changes
@@ -170,7 +170,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
170
170
 
171
171
  columnsSpecPerSample := []
172
172
  columnsSpecPerSampleSc := undefined
173
- columnsSpecPerClonotype := []
173
+ columnsSpecPerClonotypeNoAggregates := []
174
174
  columnsSpecPerClonotypeSc := undefined
175
175
 
176
176
 
@@ -209,7 +209,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
209
209
  "pl7.app/abundance/unit": "reads",
210
210
  "pl7.app/abundance/normalized": "true",
211
211
  "pl7.app/abundance/isPrimary": !hasUmi ? "true" : undefined,
212
- "pl7.app/label": "Fraction of reads"
212
+ "pl7.app/label": "Fraction of reads",
213
+ "pl7.app/format": ".2p"
213
214
  })
214
215
  }
215
216
  } ]
@@ -220,8 +221,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
220
221
 
221
222
  mainAbundanceColumnUnnormalized := "readCount"
222
223
  mainAbundanceColumnNormalized := "readFraction"
224
+ mainAbundanceColumnUnnormalizedArgs := [ [ "-readCount" ] ]
225
+ mainAbundanceColumnNormalizedArgs := [ [ "-readFraction" ] ]
223
226
 
224
- mainAbundanceColumnAggregates := [{
227
+ columnsSpecPerClonotypeAggregates := [{
225
228
  column: mainAbundanceColumnUnnormalized + "Sum",
226
229
  id: "read-count-total",
227
230
  allowNA: false,
@@ -249,7 +252,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
249
252
  "pl7.app/isAbundance": "true",
250
253
  "pl7.app/abundance/unit": "reads",
251
254
  "pl7.app/abundance/normalized": "true",
252
- "pl7.app/label": "Mean Fraction of Reads"
255
+ "pl7.app/label": "Mean Fraction of Reads",
256
+ "pl7.app/format": ".2p"
253
257
  })
254
258
  }
255
259
  }]
@@ -286,7 +290,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
286
290
  "pl7.app/abundance/unit": "molecules",
287
291
  "pl7.app/abundance/normalized": "true",
288
292
  "pl7.app/abundance/isPrimary": "true",
289
- "pl7.app/label": "Fraction of UMIs"
293
+ "pl7.app/label": "Fraction of UMIs",
294
+ "pl7.app/format": ".2p"
290
295
  })
291
296
  }
292
297
  } ]
@@ -294,9 +299,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
294
299
  [ "-uniqueTagCount", "Molecule" ],
295
300
  [ "-uniqueTagFraction", "Molecule" ]
296
301
  ]
302
+
297
303
  mainAbundanceColumnNormalized = "uniqueMoleculeFraction"
298
304
  mainAbundanceColumnUnnormalized = "uniqueMoleculeCount"
299
- mainAbundanceColumnAggregates = [ {
305
+ mainAbundanceColumnNormalizedArgs = [ [ "-uniqueTagFraction", "Molecule" ] ]
306
+ mainAbundanceColumnUnnormalizedArgs = [ [ "-uniqueTagCount", "Molecule" ] ]
307
+
308
+ columnsSpecPerClonotypeAggregates = [ {
300
309
  column: mainAbundanceColumnUnnormalized + "Sum",
301
310
  id: "umi-count-total",
302
311
  allowNA: false,
@@ -324,7 +333,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
324
333
  "pl7.app/isAbundance": "true",
325
334
  "pl7.app/abundance/unit": "molecules",
326
335
  "pl7.app/abundance/normalized": "true",
327
- "pl7.app/label": "Mean Fraction of UMIs"
336
+ "pl7.app/label": "Mean Fraction of UMIs",
337
+ "pl7.app/format": ".2p"
328
338
  })
329
339
  }
330
340
  } ]
@@ -347,6 +357,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
347
357
  }
348
358
  }
349
359
 
360
+ columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
361
+
362
+ clonotypeLabelColumn := {
363
+ column: "clonotypeLabel",
364
+ id: "clonotype-label",
365
+ spec: {
366
+ name: "pl7.app/label",
367
+ valueType: "String",
368
+ annotations: a(100000, false, {
369
+ "pl7.app/label": "Clone label"
370
+ })
371
+ }
372
+ }
373
+
350
374
  if isSingleCell {
351
375
 
352
376
  columnsSpecPerSample = addSpec(columnsSpecPerSample, {
@@ -388,14 +412,12 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
388
412
  "pl7.app/abundance/unit": "cells",
389
413
  "pl7.app/abundance/normalized": "true",
390
414
  "pl7.app/abundance/isPrimary": "true",
391
- "pl7.app/label": "Fraction of Cells"
415
+ "pl7.app/label": "Fraction of Cells",
416
+ "pl7.app/format": ".2p"
392
417
  })
393
418
  }
394
419
  } ]
395
- columnsSpecPerClonotypeSc = [ sampleCountColumn ]
396
- } else {
397
- columnsSpecPerClonotype += [ sampleCountColumn ]
398
- columnsSpecPerClonotype += mainAbundanceColumnAggregates
420
+ columnsSpecPerClonotypeSc = [ sampleCountColumn, clonotypeLabelColumn ]
399
421
  }
400
422
 
401
423
  orderP := 80000
@@ -423,7 +445,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
423
445
  alphabetShortMixcr := isAminoAcid ? "aa" : "n"
424
446
  columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
425
447
  visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
426
- columnsSpecPerClonotype += [ {
448
+ columnsSpecPerClonotypeNoAggregates += [ {
427
449
  column: columnName,
428
450
  id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
429
451
  naRegex: "region_not_covered",
@@ -438,6 +460,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
438
460
  "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
439
461
  "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
440
462
  "pl7.app/vdj/imputed": string(isImputed),
463
+ "pl7.app/table/fontFamily": "monospace",
441
464
  "pl7.app/label": featureInFrameU + " " + alphabetShort
442
465
  })
443
466
  }
@@ -449,7 +472,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
449
472
  if !isImputed && featureU == assemblingFeature {
450
473
  for annotationType in annotationTypes {
451
474
  columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
452
- columnsSpecPerClonotype += [ {
475
+ columnsSpecPerClonotypeNoAggregates += [ {
453
476
  column: columnName,
454
477
  id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
455
478
  naRegex: "region_not_covered",
@@ -475,7 +498,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
475
498
 
476
499
 
477
500
  if featureU == "CDR3" {
478
- columnsSpecPerClonotype += [ {
501
+ columnsSpecPerClonotypeNoAggregates += [ {
479
502
  column: alphabetShortMixcr + "Length" + featureU,
480
503
  id: alphabetShortMixcr + "-length-" + featureL,
481
504
  naRegex: "region_not_covered",
@@ -495,24 +518,24 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
495
518
  }
496
519
 
497
520
 
498
- if isAminoAcid && !isSingleCell && featureU == "CDR3" {
499
- columnsSpecPerClonotype += [ {
500
- column: columnName,
501
- id: "clonotype-label",
502
- preProcess: [{
503
- type: "regexpReplace",
504
- pattern: "^region_not_covered$",
505
- replacement: "Unlabelled"
506
- }],
507
- spec: {
508
- name: "pl7.app/label",
509
- valueType: "String",
510
- annotations: a(100000, false, {
511
- "pl7.app/label": "Clone label"
512
- })
513
- }
514
- } ]
515
- }
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
516
539
  }
517
540
  }
518
541
  }
@@ -537,7 +560,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
537
560
  for vdjcU in ["V", "D", "J", "C"] {
538
561
  vdjcL := text.to_lower(vdjcU)
539
562
  for variant in geneHitColumnVariants {
540
- columnsSpecPerClonotype += [ {
563
+ columnsSpecPerClonotypeNoAggregates += [ {
541
564
  column: "best" + vdjcU + variant.columnNameSuffix,
542
565
  id: "best-" + vdjcL + variant.idSuffix,
543
566
  naRegex: "",
@@ -596,7 +619,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
596
619
  }
597
620
 
598
621
  for variant in mutationColumnVariants {
599
- columnsSpecPerClonotype += [ {
622
+ columnsSpecPerClonotypeNoAggregates += [ {
600
623
  column: alphabetShortMixcr + variant.name + coreFeature,
601
624
  id: alphabetShortMixcr + variant.idPart + geneL,
602
625
  allowNA: true,
@@ -644,10 +667,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
644
667
 
645
668
 
646
669
  ]
647
- mainProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
648
- mainProductiveArgs := [ flagColumnVariants[0].arg, productiveFeature ]
670
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
671
+ mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
649
672
  for variant in flagColumnVariants {
650
- columnsSpecPerClonotype += [ {
673
+ columnsSpecPerClonotypeNoAggregates += [ {
651
674
  column: variant.columnPrefix + productiveFeature,
652
675
  id: variant.id,
653
676
  allowNA: false,
@@ -692,7 +715,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
692
715
 
693
716
 
694
717
 
695
- columnsSpecPerClonotype += [ {
718
+ columnsSpecPerClonotypeNoAggregates += [ {
696
719
  column: "isotypePrimary",
697
720
  id: "isotype",
698
721
  naRegex: "",
@@ -724,9 +747,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
724
747
  [ "-topChains" ]
725
748
  ]
726
749
 
750
+ columnsSpecPerClonotypeNoAggregates += [ clonotypeLabelColumn ]
751
+
727
752
 
728
753
 
729
- columnsSpec := columnsSpecPerSample + columnsSpecPerClonotype
754
+ columnsSpec := columnsSpecPerSample + columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates
730
755
 
731
756
 
732
757
  columnsByName := {}
@@ -767,8 +792,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
767
792
  "pl7.app/vdj/clonotypingRunId": blockId
768
793
  },
769
794
  annotations: {
770
- "pl7.app/label": "Clonotype key",
771
- "pl7.app/table/visibility": "optional",
795
+ "pl7.app/label": "Clonotype ID",
796
+ "pl7.app/table/fontFamily": "monospace",
797
+ "pl7.app/table/visibility": "default",
772
798
  "pl7.app/table/orderPriority": "110000",
773
799
  "pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
774
800
  }
@@ -791,8 +817,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
791
817
  "pl7.app/vdj/clonotypingRunId": blockId
792
818
  },
793
819
  annotations: {
794
- "pl7.app/label": "Clone label",
795
- "pl7.app/table/visibility": "optional",
820
+ "pl7.app/label": "Clonotype ID",
821
+ "pl7.app/table/fontFamily": "monospace",
822
+ "pl7.app/table/visibility": "default",
796
823
  "pl7.app/table/orderPriority": "110000",
797
824
  "pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
798
825
  }
@@ -825,16 +852,19 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
825
852
 
826
853
  columnsSpecPerSample: columnsSpecPerSample,
827
854
  columnsSpecPerSampleSc: columnsSpecPerSampleSc,
828
- columnsSpecPerClonotype: columnsSpecPerClonotype,
855
+ columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
856
+ columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
829
857
  columnsSpecPerClonotypeSc: columnsSpecPerClonotypeSc,
830
858
 
831
859
  columnsSpec: columnsSpec,
832
860
 
833
861
  mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
862
+ mainAbundanceColumnNormalizedArgs: mainAbundanceColumnNormalizedArgs,
834
863
  mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
864
+ mainAbundanceColumnUnnormalizedArgs: mainAbundanceColumnUnnormalizedArgs,
835
865
 
836
- mainProductiveColumn: mainProductiveColumn,
837
- mainProductiveArgs: mainProductiveArgs,
866
+ mainIsProductiveColumn: mainIsProductiveColumn,
867
+ mainIsProductiveArgs: mainIsProductiveArgs,
838
868
 
839
869
  exportArgs: exportArgs
840
870
  }
@@ -0,0 +1,121 @@
1
+ generateClonotypeLabelSteps := func(clonotypeKeyCol, clonotypeLabelCol, targetTable) {
2
+ prefixTempCol := clonotypeLabelCol + "_prefix_temp"
3
+ rankTempCol := clonotypeLabelCol + "_rank_temp"
4
+
5
+ steps := []
6
+
7
+
8
+ steps = append(steps, {
9
+ type: "add_columns",
10
+ table: targetTable,
11
+ columns: [{
12
+ name: prefixTempCol,
13
+ expression: {
14
+ type: "to_upper",
15
+ value: {
16
+ type: "substring",
17
+ value: {
18
+ type: "str_replace",
19
+ value: { type: "col", name: clonotypeKeyCol },
20
+ pattern: "\\d", // Regex for digits
21
+ replacement: "",
22
+ replaceAll: true
23
+ },
24
+ start: 0,
25
+ length: 5
26
+ }
27
+ }
28
+ }]
29
+ })
30
+
31
+
32
+
33
+ steps = append(steps, {
34
+ type: "add_columns",
35
+ table: targetTable,
36
+ columns: [{
37
+ name: rankTempCol,
38
+ expression: {
39
+ type: "rank",
40
+ partitionBy: [{ type: "col", name: prefixTempCol }],
41
+ orderBy: [{ type: "col", name: clonotypeKeyCol }]
42
+ }
43
+ }]
44
+ })
45
+
46
+
47
+ steps = append(steps, {
48
+ type: "add_columns",
49
+ table: targetTable,
50
+ columns: [{
51
+ name: clonotypeLabelCol,
52
+ expression: {
53
+ type: "when_then_otherwise",
54
+ conditions: [
55
+ {
56
+ when: {
57
+ type: "gt",
58
+ lhs: { type: "col", name: rankTempCol },
59
+ rhs: { type: "const", value: 1 }
60
+ },
61
+ then: {
62
+ type: "str_join",
63
+ operands: [
64
+ { type: "const", value: "C" },
65
+ { type: "col", name: prefixTempCol },
66
+ { type: "col", name: rankTempCol }
67
+ ],
68
+ delimiter: "-"
69
+ }
70
+ }
71
+ ],
72
+ otherwise: {
73
+ type: "str_join",
74
+ operands: [
75
+ { type: "const", value: "C" },
76
+ { type: "col", name: prefixTempCol }
77
+ ],
78
+ delimiter: "-"
79
+ }
80
+ }
81
+ }]
82
+ })
83
+
84
+ return steps
85
+ }
86
+
87
+ addClonotypeLabelColumnsPt := func(df, clonotypeKeyCol, clonotypeLabelCol, pt) {
88
+ prefixTempCol := clonotypeLabelCol + "_prefix_temp"
89
+ rankTempCol := clonotypeLabelCol + "_rank_temp"
90
+
91
+
92
+ df = df.withColumns(
93
+ pt.col(clonotypeKeyCol).
94
+ strReplace("\\d", "", { replaceAll: true }).
95
+ strSlice(0, 5). // Take first 5 characters
96
+ strToUpper(). // Convert to uppercase
97
+ alias(prefixTempCol)
98
+ )
99
+
100
+
101
+ df = df.withColumns(
102
+ pt.rank(pt.col(clonotypeKeyCol)). // Rank based on clonotypeKeyCol (default ascending)
103
+ over(pt.col(prefixTempCol)). // Partition by prefixTempCol
104
+ alias(rankTempCol)
105
+ )
106
+
107
+
108
+ df = df.withColumns(
109
+ pt.when(pt.col(rankTempCol).gt(pt.lit(1))).
110
+ then(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol), pt.col(rankTempCol).cast("String")], { delimiter: "-" })).
111
+ otherwise(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol)], { delimiter: "-" })).
112
+ alias(clonotypeLabelCol)
113
+ )
114
+
115
+ return df.withoutColumns(prefixTempCol, rankTempCol)
116
+ }
117
+
118
+ export {
119
+ generateClonotypeLabelSteps: generateClonotypeLabelSteps,
120
+ addClonotypeLabelColumnsPt: addClonotypeLabelColumnsPt
121
+ }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,18 +1,15 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "2.18.3",
3
+ "version": "2.19.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "^4.4.5",
7
- "@platforma-open/milaboratories.mixcr-clonotyping-2.single-cell-scripts": "1.1.3",
8
- "@platforma-open/milaboratories.mixcr-clonotyping-2.hash-column": "1.1.1"
6
+ "@platforma-sdk/workflow-tengo": "^4.6.1",
7
+ "@platforma-open/milaboratories.software-mixcr": "4.7.0-169-develop"
9
8
  },
10
9
  "devDependencies": {
11
- "@platforma-sdk/tengo-builder": "^2.1.3",
12
- "@platforma-open/milaboratories.software-mixcr": "4.7.0-169-develop",
13
- "@platforma-open/milaboratories.software-ptransform": "^1.4.3",
14
- "@platforma-sdk/test": "^1.30.24",
15
- "vitest": "~2.1.8",
10
+ "@platforma-sdk/tengo-builder": "^2.1.5",
11
+ "@platforma-sdk/test": "^1.31.13",
12
+ "vitest": "~2.1.9",
16
13
  "typescript": "~5.5.4"
17
14
  },
18
15
  "scripts": {
@@ -2,16 +2,14 @@ ll := import("@platforma-sdk/workflow-tengo:ll")
2
2
  self := import("@platforma-sdk/workflow-tengo:tpl")
3
3
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
4
4
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
- assets := import("@platforma-sdk/workflow-tengo:assets")
6
- exec := import("@platforma-sdk/workflow-tengo:exec")
7
5
  maps := import("@platforma-sdk/workflow-tengo:maps")
6
+ clonotypeLabel := import(":clonotype-label")
7
+ pt := import("@platforma-sdk/workflow-tengo:pt")
8
8
 
9
9
  json := import("json")
10
10
 
11
11
  self.defineOutputs("tsv")
12
12
 
13
- ptransformSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
14
-
15
13
  self.body(func(inputs) {
16
14
  inputData := inputs[pConstants.VALUE_FIELD_NAME]
17
15
  inputDataMeta := inputData.getDataAsJson()
@@ -21,48 +19,18 @@ self.body(func(inputs) {
21
19
  params := inputs.params
22
20
  mainAbundanceColumnNormalized := params.mainAbundanceColumnNormalized
23
21
  mainAbundanceColumnUnnormalized := params.mainAbundanceColumnUnnormalized
24
- clonotypeColumns := params.clonotypeColumns
25
-
26
- pickCols := []
27
- for col in clonotypeColumns {
28
- if col == "sampleCount" || col == mainAbundanceColumnNormalized + "Mean" || col == mainAbundanceColumnUnnormalized + "Sum" {
29
- continue
30
- }
31
- pickCols = append(pickCols, [col, col])
32
- }
33
22
 
34
- // Adding clonotypeKey column
35
- pWorkflow := {
36
- steps: [ {
37
- type: "aggregate",
38
- groupBy: ["clonotypeKey"],
39
- aggregations: [ {
40
- type: "max_by",
41
- rankingCol: mainAbundanceColumnNormalized,
42
- pickCols: pickCols
43
- }, {
44
- type: "count",
45
- src: mainAbundanceColumnNormalized,
46
- dst: "sampleCount"
47
- }, {
48
- type: "sum",
49
- src: mainAbundanceColumnUnnormalized,
50
- dst: mainAbundanceColumnUnnormalized + "Sum"
51
- }, {
52
- type: "mean",
53
- src: mainAbundanceColumnNormalized,
54
- dst: mainAbundanceColumnNormalized + "Mean"
55
- }]
56
- } ]
57
- }
23
+ // { column: string; type: string }
24
+ schemaPerClonotypeNoAggregates := params.schemaPerClonotypeNoAggregates
25
+ schemaPerSample := params.schemaPerSample
58
26
 
59
- aggregateBuilderCmd := exec.builder().
60
- printErrStreamToStdout().
61
- software(ptransformSw).
62
- arg("--workflow").arg("wf.json").
63
- writeFile("wf.json", json.encode(pWorkflow))
27
+ wf := pt.workflow()
28
+ dataFrames := []
64
29
 
65
30
  inputMap := inputData.inputs()
31
+
32
+ baseSchemaForRead := schemaPerSample + [ { column: "clonotypeKey", type: "String" } ]
33
+
66
34
  for sKey in maps.getKeys(inputMap) {
67
35
  inputFile := inputMap[sKey]
68
36
  key := json.decode(sKey)
@@ -70,16 +38,54 @@ self.body(func(inputs) {
70
38
  ll.panic("malformed key: %v", sKey)
71
39
  }
72
40
  sampleId := key[0]
73
- aggregateBuilderCmd.
74
- arg(sampleId + ".tsv").
75
- addFile(sampleId + ".tsv", inputFile)
41
+ dfId := "table_" + sampleId
42
+
43
+ df := wf.frame({
44
+ file: inputFile,
45
+ xsvType: "tsv",
46
+ schema: baseSchemaForRead
47
+ }, {
48
+ id: dfId,
49
+ inferSchema: false
50
+ })
51
+ dataFrames = append(dataFrames, df)
52
+ }
53
+
54
+ currentDf := undefined
55
+ if len(dataFrames) == 0 {
56
+ ll.panic("no input files found")
57
+ } else if len(dataFrames) == 1 {
58
+ currentDf = dataFrames[0]
59
+ } else {
60
+ currentDf = pt.concat(dataFrames)
61
+ }
62
+
63
+ aggExpressions := []
64
+
65
+ for colDef in schemaPerClonotypeNoAggregates {
66
+ if colDef.column == "clonotypeLabel" {
67
+ continue
68
+ }
69
+ aggExpressions = append(aggExpressions,
70
+ pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
71
+ )
76
72
  }
77
73
 
78
- aggregateCmd := aggregateBuilderCmd.
79
- arg("output.tsv").saveFile("output.tsv").
80
- run()
74
+ aggExpressions = append(aggExpressions,
75
+ pt.col(mainAbundanceColumnNormalized).count().alias("sampleCount"),
76
+ pt.col(mainAbundanceColumnUnnormalized).sum().alias(mainAbundanceColumnUnnormalized + "Sum"),
77
+ pt.col(mainAbundanceColumnNormalized).mean().alias(mainAbundanceColumnNormalized + "Mean")
78
+ )
79
+
80
+ aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
81
+
82
+ aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
83
+
84
+ aggregatedDf.save("output.tsv")
85
+
86
+ ptablerResult := wf.run()
81
87
 
82
- processedTsv := aggregateCmd.getFile("output.tsv")
88
+ processedTsv := ptablerResult.getFile("output.tsv")
83
89
 
84
90
  return {
85
91
  tsv: processedTsv