@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.18.2 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -170,7 +170,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
170
170
 
171
171
  columnsSpecPerSample := []
172
172
  columnsSpecPerSampleSc := undefined
173
- columnsSpecPerClonotype := []
173
+ columnsSpecPerClonotypeNoAggregates := []
174
174
  columnsSpecPerClonotypeSc := undefined
175
175
 
176
176
  // array of array of arg groups
@@ -209,7 +209,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
209
209
  "pl7.app/abundance/unit": "reads",
210
210
  "pl7.app/abundance/normalized": "true",
211
211
  "pl7.app/abundance/isPrimary": !hasUmi ? "true" : undefined,
212
- "pl7.app/label": "Fraction of reads"
212
+ "pl7.app/label": "Fraction of reads",
213
+ "pl7.app/format": ".2p"
213
214
  })
214
215
  }
215
216
  } ]
@@ -220,8 +221,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
220
221
 
221
222
  mainAbundanceColumnUnnormalized := "readCount"
222
223
  mainAbundanceColumnNormalized := "readFraction"
224
+ mainAbundanceColumnUnnormalizedArgs := [ [ "-readCount" ] ]
225
+ mainAbundanceColumnNormalizedArgs := [ [ "-readFraction" ] ]
223
226
 
224
- mainAbundanceColumnAggregates := [{
227
+ columnsSpecPerClonotypeAggregates := [{
225
228
  column: mainAbundanceColumnUnnormalized + "Sum",
226
229
  id: "read-count-total",
227
230
  allowNA: false,
@@ -249,7 +252,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
249
252
  "pl7.app/isAbundance": "true",
250
253
  "pl7.app/abundance/unit": "reads",
251
254
  "pl7.app/abundance/normalized": "true",
252
- "pl7.app/label": "Mean Fraction of Reads"
255
+ "pl7.app/label": "Mean Fraction of Reads",
256
+ "pl7.app/format": ".2p"
253
257
  })
254
258
  }
255
259
  }]
@@ -286,7 +290,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
286
290
  "pl7.app/abundance/unit": "molecules",
287
291
  "pl7.app/abundance/normalized": "true",
288
292
  "pl7.app/abundance/isPrimary": "true",
289
- "pl7.app/label": "Fraction of UMIs"
293
+ "pl7.app/label": "Fraction of UMIs",
294
+ "pl7.app/format": ".2p"
290
295
  })
291
296
  }
292
297
  } ]
@@ -294,9 +299,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
294
299
  [ "-uniqueTagCount", "Molecule" ],
295
300
  [ "-uniqueTagFraction", "Molecule" ]
296
301
  ]
302
+
297
303
  mainAbundanceColumnNormalized = "uniqueMoleculeFraction"
298
304
  mainAbundanceColumnUnnormalized = "uniqueMoleculeCount"
299
- mainAbundanceColumnAggregates = [ {
305
+ mainAbundanceColumnNormalizedArgs = [ [ "-uniqueTagFraction", "Molecule" ] ]
306
+ mainAbundanceColumnUnnormalizedArgs = [ [ "-uniqueTagCount", "Molecule" ] ]
307
+
308
+ columnsSpecPerClonotypeAggregates = [ {
300
309
  column: mainAbundanceColumnUnnormalized + "Sum",
301
310
  id: "umi-count-total",
302
311
  allowNA: false,
@@ -324,7 +333,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
324
333
  "pl7.app/isAbundance": "true",
325
334
  "pl7.app/abundance/unit": "molecules",
326
335
  "pl7.app/abundance/normalized": "true",
327
- "pl7.app/label": "Mean Fraction of UMIs"
336
+ "pl7.app/label": "Mean Fraction of UMIs",
337
+ "pl7.app/format": ".2p"
328
338
  })
329
339
  }
330
340
  } ]
@@ -347,6 +357,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
347
357
  }
348
358
  }
349
359
 
360
+ columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
361
+
362
+ clonotypeLabelColumn := {
363
+ column: "clonotypeLabel",
364
+ id: "clonotype-label",
365
+ spec: {
366
+ name: "pl7.app/label",
367
+ valueType: "String",
368
+ annotations: a(100000, false, {
369
+ "pl7.app/label": "Clone label"
370
+ })
371
+ }
372
+ }
373
+
350
374
  if isSingleCell {
351
375
  // copying reads and umi counts and fraction removing isPrimary and isAnchor
352
376
  columnsSpecPerSample = addSpec(columnsSpecPerSample, {
@@ -388,14 +412,12 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
388
412
  "pl7.app/abundance/unit": "cells",
389
413
  "pl7.app/abundance/normalized": "true",
390
414
  "pl7.app/abundance/isPrimary": "true",
391
- "pl7.app/label": "Fraction of Cells"
415
+ "pl7.app/label": "Fraction of Cells",
416
+ "pl7.app/format": ".2p"
392
417
  })
393
418
  }
394
419
  } ]
395
- columnsSpecPerClonotypeSc = [ sampleCountColumn ]
396
- } else {
397
- columnsSpecPerClonotype += [ sampleCountColumn ]
398
- columnsSpecPerClonotype += mainAbundanceColumnAggregates
420
+ columnsSpecPerClonotypeSc = [ sampleCountColumn, clonotypeLabelColumn ]
399
421
  }
400
422
 
401
423
  orderP := 80000
@@ -423,7 +445,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
423
445
  alphabetShortMixcr := isAminoAcid ? "aa" : "n"
424
446
  columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
425
447
  visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
426
- columnsSpecPerClonotype += [ {
448
+ columnsSpecPerClonotypeNoAggregates += [ {
427
449
  column: columnName,
428
450
  id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
429
451
  naRegex: "region_not_covered",
@@ -438,6 +460,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
438
460
  "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
439
461
  "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
440
462
  "pl7.app/vdj/imputed": string(isImputed),
463
+ "pl7.app/table/fontFamily": "monospace",
441
464
  "pl7.app/label": featureInFrameU + " " + alphabetShort
442
465
  })
443
466
  }
@@ -449,7 +472,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
449
472
  if !isImputed && featureU == assemblingFeature {
450
473
  for annotationType in annotationTypes {
451
474
  columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
452
- columnsSpecPerClonotype += [ {
475
+ columnsSpecPerClonotypeNoAggregates += [ {
453
476
  column: columnName,
454
477
  id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
455
478
  naRegex: "region_not_covered",
@@ -475,7 +498,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
475
498
 
476
499
  // For now calculate length only for CDR3 to keep the number of columns manageable
477
500
  if featureU == "CDR3" {
478
- columnsSpecPerClonotype += [ {
501
+ columnsSpecPerClonotypeNoAggregates += [ {
479
502
  column: alphabetShortMixcr + "Length" + featureU,
480
503
  id: alphabetShortMixcr + "-length-" + featureL,
481
504
  naRegex: "region_not_covered",
@@ -495,24 +518,24 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
495
518
  }
496
519
 
497
520
  // label column
498
- if isAminoAcid && !isSingleCell && featureU == "CDR3" {
499
- columnsSpecPerClonotype += [ {
500
- column: columnName,
501
- id: "clonotype-label",
502
- preProcess: [{
503
- type: "regexpReplace",
504
- pattern: "^region_not_covered$",
505
- replacement: "Unlabelled"
506
- }],
507
- spec: {
508
- name: "pl7.app/label",
509
- valueType: "String",
510
- annotations: a(100000, false, {
511
- "pl7.app/label": "Clone label"
512
- })
513
- }
514
- } ]
515
- }
521
+ // if isAminoAcid && !isSingleCell && featureU == "CDR3" {
522
+ // columnsSpecPerClonotype += [ {
523
+ // column: columnName,
524
+ // id: "clonotype-label",
525
+ // preProcess: [{
526
+ // type: "regexpReplace",
527
+ // pattern: "^region_not_covered$",
528
+ // replacement: "Unlabelled"
529
+ // }],
530
+ // spec: {
531
+ // name: "pl7.app/label",
532
+ // valueType: "String",
533
+ // annotations: a(100000, false, {
534
+ // "pl7.app/label": "Clone label"
535
+ // })
536
+ // }
537
+ // } ]
538
+ // }
516
539
  }
517
540
  }
518
541
  }
@@ -537,7 +560,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
537
560
  for vdjcU in ["V", "D", "J", "C"] {
538
561
  vdjcL := text.to_lower(vdjcU)
539
562
  for variant in geneHitColumnVariants {
540
- columnsSpecPerClonotype += [ {
563
+ columnsSpecPerClonotypeNoAggregates += [ {
541
564
  column: "best" + vdjcU + variant.columnNameSuffix,
542
565
  id: "best-" + vdjcL + variant.idSuffix,
543
566
  naRegex: "",
@@ -596,7 +619,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
596
619
  }
597
620
 
598
621
  for variant in mutationColumnVariants {
599
- columnsSpecPerClonotype += [ {
622
+ columnsSpecPerClonotypeNoAggregates += [ {
600
623
  column: alphabetShortMixcr + variant.name + coreFeature,
601
624
  id: alphabetShortMixcr + variant.idPart + geneL,
602
625
  allowNA: true,
@@ -644,10 +667,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
644
667
  // visibility: false
645
668
  // }
646
669
  ]
647
- mainProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
648
- mainProductiveArgs := [ flagColumnVariants[0].arg, productiveFeature ]
670
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
671
+ mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
649
672
  for variant in flagColumnVariants {
650
- columnsSpecPerClonotype += [ {
673
+ columnsSpecPerClonotypeNoAggregates += [ {
651
674
  column: variant.columnPrefix + productiveFeature,
652
675
  id: variant.id,
653
676
  allowNA: false,
@@ -692,7 +715,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
692
715
 
693
716
  // Isotype and chain
694
717
 
695
- columnsSpecPerClonotype += [ {
718
+ columnsSpecPerClonotypeNoAggregates += [ {
696
719
  column: "isotypePrimary",
697
720
  id: "isotype",
698
721
  naRegex: "",
@@ -724,9 +747,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
724
747
  [ "-topChains" ]
725
748
  ]
726
749
 
750
+ columnsSpecPerClonotypeNoAggregates += [ clonotypeLabelColumn ]
751
+
727
752
  // All columns are added
728
753
 
729
- columnsSpec := columnsSpecPerSample + columnsSpecPerClonotype
754
+ columnsSpec := columnsSpecPerSample + columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates
730
755
 
731
756
  // Creating a column map for fast search
732
757
  columnsByName := {}
@@ -767,8 +792,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
767
792
  "pl7.app/vdj/clonotypingRunId": blockId
768
793
  },
769
794
  annotations: {
770
- "pl7.app/label": "Clonotype key",
771
- "pl7.app/table/visibility": "optional",
795
+ "pl7.app/label": "Clonotype ID",
796
+ "pl7.app/table/fontFamily": "monospace",
797
+ "pl7.app/table/visibility": "default",
772
798
  "pl7.app/table/orderPriority": "110000",
773
799
  "pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
774
800
  }
@@ -791,8 +817,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
791
817
  "pl7.app/vdj/clonotypingRunId": blockId
792
818
  },
793
819
  annotations: {
794
- "pl7.app/label": "Clone label",
795
- "pl7.app/table/visibility": "optional",
820
+ "pl7.app/label": "Clonotype ID",
821
+ "pl7.app/table/fontFamily": "monospace",
822
+ "pl7.app/table/visibility": "default",
796
823
  "pl7.app/table/orderPriority": "110000",
797
824
  "pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
798
825
  }
@@ -825,16 +852,19 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
825
852
 
826
853
  columnsSpecPerSample: columnsSpecPerSample,
827
854
  columnsSpecPerSampleSc: columnsSpecPerSampleSc,
828
- columnsSpecPerClonotype: columnsSpecPerClonotype,
855
+ columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
856
+ columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
829
857
  columnsSpecPerClonotypeSc: columnsSpecPerClonotypeSc,
830
858
 
831
859
  columnsSpec: columnsSpec,
832
860
 
833
861
  mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
862
+ mainAbundanceColumnNormalizedArgs: mainAbundanceColumnNormalizedArgs,
834
863
  mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
864
+ mainAbundanceColumnUnnormalizedArgs: mainAbundanceColumnUnnormalizedArgs,
835
865
 
836
- mainProductiveColumn: mainProductiveColumn,
837
- mainProductiveArgs: mainProductiveArgs,
866
+ mainIsProductiveColumn: mainIsProductiveColumn,
867
+ mainIsProductiveArgs: mainIsProductiveArgs,
838
868
 
839
869
  exportArgs: exportArgs
840
870
  }
@@ -0,0 +1,121 @@
1
+ generateClonotypeLabelSteps := func(clonotypeKeyCol, clonotypeLabelCol, targetTable) {
2
+ prefixTempCol := clonotypeLabelCol + "_prefix_temp"
3
+ rankTempCol := clonotypeLabelCol + "_rank_temp"
4
+
5
+ steps := []
6
+
7
+ // Add prefix_temp column (digits removed, first 5 chars, uppercased)
8
+ steps = append(steps, {
9
+ type: "add_columns",
10
+ table: targetTable,
11
+ columns: [{
12
+ name: prefixTempCol,
13
+ expression: {
14
+ type: "to_upper",
15
+ value: {
16
+ type: "substring",
17
+ value: {
18
+ type: "str_replace",
19
+ value: { type: "col", name: clonotypeKeyCol },
20
+ pattern: "\\d", // Regex for digits
21
+ replacement: "",
22
+ replaceAll: true
23
+ },
24
+ start: 0,
25
+ length: 5
26
+ }
27
+ }
28
+ }]
29
+ })
30
+
31
+ // Add rank_temp column - rank of the clonotype in the prefixTempCol,
32
+ // used to diversify repeated clonotype labels (due to the birthday paradox)
33
+ steps = append(steps, {
34
+ type: "add_columns",
35
+ table: targetTable,
36
+ columns: [{
37
+ name: rankTempCol,
38
+ expression: {
39
+ type: "rank",
40
+ partitionBy: [{ type: "col", name: prefixTempCol }],
41
+ orderBy: [{ type: "col", name: clonotypeKeyCol }]
42
+ }
43
+ }]
44
+ })
45
+
46
+ // Add final clonotypeLabelCol column (C-XXXXX or C-XXXXX-RANK)
47
+ steps = append(steps, {
48
+ type: "add_columns",
49
+ table: targetTable,
50
+ columns: [{
51
+ name: clonotypeLabelCol,
52
+ expression: {
53
+ type: "when_then_otherwise",
54
+ conditions: [
55
+ {
56
+ when: {
57
+ type: "gt",
58
+ lhs: { type: "col", name: rankTempCol },
59
+ rhs: { type: "const", value: 1 }
60
+ },
61
+ then: {
62
+ type: "str_join",
63
+ operands: [
64
+ { type: "const", value: "C" },
65
+ { type: "col", name: prefixTempCol },
66
+ { type: "col", name: rankTempCol }
67
+ ],
68
+ delimiter: "-"
69
+ }
70
+ }
71
+ ],
72
+ otherwise: {
73
+ type: "str_join",
74
+ operands: [
75
+ { type: "const", value: "C" },
76
+ { type: "col", name: prefixTempCol }
77
+ ],
78
+ delimiter: "-"
79
+ }
80
+ }
81
+ }]
82
+ })
83
+
84
+ return steps
85
+ }
86
+
87
+ addClonotypeLabelColumnsPt := func(df, clonotypeKeyCol, clonotypeLabelCol, pt) {
88
+ prefixTempCol := clonotypeLabelCol + "_prefix_temp"
89
+ rankTempCol := clonotypeLabelCol + "_rank_temp"
90
+
91
+ // Add prefix_temp column (digits removed, first 5 chars, uppercased)
92
+ df = df.withColumns(
93
+ pt.col(clonotypeKeyCol).
94
+ strReplace("\\d", "", { replaceAll: true }).
95
+ strSlice(0, 5). // Take first 5 characters
96
+ strToUpper(). // Convert to uppercase
97
+ alias(prefixTempCol)
98
+ )
99
+
100
+ // Add rank_temp column - rank of the clonotypeKeyCol within each prefixTempCol group
101
+ df = df.withColumns(
102
+ pt.rank(pt.col(clonotypeKeyCol)). // Rank based on clonotypeKeyCol (default ascending)
103
+ over(pt.col(prefixTempCol)). // Partition by prefixTempCol
104
+ alias(rankTempCol)
105
+ )
106
+
107
+ // Add final clonotypeLabelCol column (C-XXXXX or C-XXXXX-RANK)
108
+ df = df.withColumns(
109
+ pt.when(pt.col(rankTempCol).gt(pt.lit(1))).
110
+ then(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol), pt.col(rankTempCol).cast("String")], { delimiter: "-" })).
111
+ otherwise(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol)], { delimiter: "-" })).
112
+ alias(clonotypeLabelCol)
113
+ )
114
+
115
+ return df.withoutColumns(prefixTempCol, rankTempCol)
116
+ }
117
+
118
+ export {
119
+ generateClonotypeLabelSteps: generateClonotypeLabelSteps,
120
+ addClonotypeLabelColumnsPt: addClonotypeLabelColumnsPt
121
+ }
@@ -2,16 +2,17 @@ ll := import("@platforma-sdk/workflow-tengo:ll")
2
2
  self := import("@platforma-sdk/workflow-tengo:tpl.light")
3
3
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
4
4
  smart := import("@platforma-sdk/workflow-tengo:smart")
5
+ slices := import("@platforma-sdk/workflow-tengo:slices")
5
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
6
7
  exec := import("@platforma-sdk/workflow-tengo:exec")
8
+ pt := import("@platforma-sdk/workflow-tengo:pt")
7
9
 
8
10
  json := import("json")
9
11
 
10
12
  self.defineOutputs("tsv", "tsvForSingleCell")
11
13
 
12
14
  mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:low-memory")
13
- ptransformSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
14
- hashColumnSw := assets.importSoftware("@platforma-open/milaboratories.mixcr-clonotyping-2.hash-column:main")
15
+ ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
15
16
 
16
17
  self.body(func(inputs) {
17
18
  clnsFile := inputs[pConstants.VALUE_FIELD_NAME]
@@ -24,8 +25,19 @@ self.body(func(inputs) {
24
25
 
25
26
  clonotypeKeyColumns := params.clonotypeKeyColumns
26
27
  clonotypeKeyArgs := params.clonotypeKeyArgs
28
+
27
29
  cellTagColumns := params.cellTagColumns
28
30
 
31
+ mainAbundanceColumnUnnormalizedArgs := params.mainAbundanceColumnUnnormalizedArgs
32
+ mainIsProductiveArgs := params.mainIsProductiveArgs
33
+
34
+ hashKeyDerivationExpressionPt := func(sourceColumns) {
35
+ return pt.concatStr(
36
+ slices.map(sourceColumns, func(colName) { return pt.col(colName) }),
37
+ {delimiter: "#"}
38
+ ).hash("sha256", "base64_alphanumeric", 120)
39
+ }
40
+
29
41
  // Exporting clones from clns file
30
42
 
31
43
  createExport := func(additionalAction) {
@@ -76,124 +88,57 @@ self.body(func(inputs) {
76
88
  }
77
89
 
78
90
  if is_undefined(clonotypeKeyColumns) {
79
- result.tsv = unprocessedTsv
80
-
81
- } else if is_undefined(cellTagColumns) {
82
- hashCmdBuilder := exec.builder().
83
- printErrStreamToStdout().
84
- software(hashColumnSw).
85
- arg("--input-table").arg("input.tsv").
86
- addFile("input.tsv", unprocessedTsv).
87
- arg("--output-table").arg("output.tsv").
88
- arg("--calculate")
89
-
90
- for col in clonotypeKeyColumns {
91
- hashCmdBuilder.arg(col)
92
- }
93
-
94
- hashCmdBuilder.arg("clonotypeKey")
95
-
96
- hashCmd := hashCmdBuilder.
97
- saveFile("output.tsv").
98
- run()
99
-
100
- processedTsv := hashCmd.getFile("output.tsv")
101
-
102
- result.tsv = processedTsv
103
-
104
- } else {
105
- pWorkflow := {
106
- steps: [ {
107
- type: "combine_columns_as_json",
108
- src: clonotypeKeyColumns,
109
- dst: "clonotypeKey"
110
- } ]
111
- }
91
+ ll.panic("clonotypeKeyColumns is undefined")
92
+ }
112
93
 
113
- aggregateCmd := exec.builder().
114
- printErrStreamToStdout().
115
- software(ptransformSw).
116
- arg("--workflow").arg("wf.json").
117
- writeFile("wf.json", json.encode(pWorkflow)).
118
- arg("input.tsv").addFile("input.tsv", unprocessedTsv).
119
- arg("output.tsv").saveFile("output.tsv").
120
- run()
94
+ // PTabler processing for main TSV output
95
+ wfMain := pt.workflow()
96
+ frameInputMap := {
97
+ file: unprocessedTsv,
98
+ xsvType: "tsv",
99
+ schema: [ { column: "readCount", type: "Double" } ]
100
+ }
101
+ dfMain := wfMain.frame(frameInputMap, { inferSchema: false, id: "input_table" })
121
102
 
122
- processedTsv := aggregateCmd.getFile("output.tsv")
103
+ dfMain.addColumns(
104
+ pt.col("readCount").round().cast("Long").alias("readCount")
105
+ )
106
+ dfMain.addColumns(
107
+ hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
108
+ )
123
109
 
124
- result.tsv = processedTsv
125
- }
110
+ dfMain.save("output.tsv")
111
+ ptablerResultMain := wfMain.run()
112
+ processedTsv := ptablerResultMain.getFile("output.tsv")
113
+ result.tsv = processedTsv
126
114
 
127
115
  if !is_undefined(cellTagColumns) {
128
116
  mixcrForSingleCell := createExport(func(mixcrCmdBuilder) {
129
117
  mixcrCmdBuilder.
130
118
  arg("--split-by-tags").arg("Cell").
131
- arg("-tags").arg("Cell").
132
- arg("-readCount").
133
- arg("-isProductive").arg("CDR3")
119
+ arg("-tags").arg("Cell")
134
120
 
135
- for argGrp in clonotypeKeyArgs {
121
+ for argGrp in (clonotypeKeyArgs + mainIsProductiveArgs + mainAbundanceColumnUnnormalizedArgs) {
136
122
  for arg in argGrp {
137
123
  mixcrCmdBuilder.arg(arg)
138
124
  }
139
125
  }
140
126
  })
141
127
 
142
- if is_undefined(clonotypeKeyColumns) {
143
- ll.panic("clonotypeKeyColumns is undefined")
144
- }
145
-
146
128
  unprocessedTsvForSingleCell := mixcrForSingleCell.getFile("clones.tsv")
147
129
 
148
- pWorkflow := {
149
- steps: [ {
150
- type: "combine_columns_as_json",
151
- src: clonotypeKeyColumns,
152
- dst: "clonotypeKey"
153
- }, {
154
- type: "combine_columns_as_json",
155
- src: cellTagColumns,
156
- dst: "cellTag"
157
- } ]
158
- }
159
-
160
- aggregateCmd := exec.builder().
161
- printErrStreamToStdout().
162
- software(ptransformSw).
163
- arg("--workflow").arg("wf.json").
164
- writeFile("wf.json", json.encode(pWorkflow)).
165
- arg("input.tsv").addFile("input.tsv", unprocessedTsvForSingleCell).
166
- arg("output.tsv").saveFile("output.tsv").
167
- run()
168
-
169
- result.tsvForSingleCell = aggregateCmd.getFile("output.tsv")
170
-
171
- // uncomment this to use hashes
172
-
173
- // hashCmdBuilderSingleCell := exec.builder().
174
- // printErrStreamToStdout().
175
- // software(hashColumnSw).
176
- // arg("--input-table").arg("input.tsv").
177
- // addFile("input.tsv", unprocessedTsvForSingleCell).
178
- // arg("--output-table").arg("output.tsv")
179
-
180
- // hashCmdBuilderSingleCell.arg("--calculate")
181
- // for col in clonotypeKeyColumns {
182
- // hashCmdBuilderSingleCell.arg(col)
183
- // }
184
- // hashCmdBuilderSingleCell.arg("clonotypeKey")
185
-
186
- // hashCmdBuilderSingleCell.arg("--calculate")
187
- // for col in cellTagColumns {
188
- // hashCmdBuilderSingleCell.arg(col)
189
- // }
190
- // hashCmdBuilderSingleCell.arg("cellTag")
130
+ // PTabler processing for single-cell TSV output
131
+ wfSingleCell := pt.workflow()
132
+ dfSingleCell := wfSingleCell.frame(unprocessedTsvForSingleCell, { xsvType: "tsv", inferSchema: false })
191
133
 
192
- // hashCmdSingleCell := hashCmdBuilderSingleCell.
193
- // saveFile("output.tsv").
194
- // run()
134
+ dfSingleCell.addColumns(
135
+ hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey"),
136
+ hashKeyDerivationExpressionPt(cellTagColumns).alias("cellKey")
137
+ )
195
138
 
196
- // result.tsvForSingleCell = hashCmdSingleCell.getFile("output.tsv")
139
+ dfSingleCell.save("output.tsv")
140
+ ptablerResultSingleCell := wfSingleCell.run()
141
+ result.tsvForSingleCell = ptablerResultSingleCell.getFile("output.tsv")
197
142
  }
198
143
 
199
144
  return result