@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.18.3 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +7 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +77 -47
- package/dist/tengo/lib/clonotype-label.lib.tengo +121 -0
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
- package/dist/tengo/tpl/list-presets.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
- package/package.json +6 -9
- package/src/aggregate-by-clonotype-key.tpl.tengo +55 -49
- package/src/calculate-export-specs.lib.tengo +77 -47
- package/src/clonotype-label.lib.tengo +121 -0
- package/src/mixcr-export.tpl.tengo +46 -101
- package/src/process-single-cell.tpl.tengo +259 -75
- package/src/process.tpl.tengo +41 -9
|
@@ -170,7 +170,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
170
170
|
|
|
171
171
|
columnsSpecPerSample := []
|
|
172
172
|
columnsSpecPerSampleSc := undefined
|
|
173
|
-
|
|
173
|
+
columnsSpecPerClonotypeNoAggregates := []
|
|
174
174
|
columnsSpecPerClonotypeSc := undefined
|
|
175
175
|
|
|
176
176
|
// array of array of arg groups
|
|
@@ -209,7 +209,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
209
209
|
"pl7.app/abundance/unit": "reads",
|
|
210
210
|
"pl7.app/abundance/normalized": "true",
|
|
211
211
|
"pl7.app/abundance/isPrimary": !hasUmi ? "true" : undefined,
|
|
212
|
-
"pl7.app/label": "Fraction of reads"
|
|
212
|
+
"pl7.app/label": "Fraction of reads",
|
|
213
|
+
"pl7.app/format": ".2p"
|
|
213
214
|
})
|
|
214
215
|
}
|
|
215
216
|
} ]
|
|
@@ -220,8 +221,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
220
221
|
|
|
221
222
|
mainAbundanceColumnUnnormalized := "readCount"
|
|
222
223
|
mainAbundanceColumnNormalized := "readFraction"
|
|
224
|
+
mainAbundanceColumnUnnormalizedArgs := [ [ "-readCount" ] ]
|
|
225
|
+
mainAbundanceColumnNormalizedArgs := [ [ "-readFraction" ] ]
|
|
223
226
|
|
|
224
|
-
|
|
227
|
+
columnsSpecPerClonotypeAggregates := [{
|
|
225
228
|
column: mainAbundanceColumnUnnormalized + "Sum",
|
|
226
229
|
id: "read-count-total",
|
|
227
230
|
allowNA: false,
|
|
@@ -249,7 +252,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
249
252
|
"pl7.app/isAbundance": "true",
|
|
250
253
|
"pl7.app/abundance/unit": "reads",
|
|
251
254
|
"pl7.app/abundance/normalized": "true",
|
|
252
|
-
"pl7.app/label": "Mean Fraction of Reads"
|
|
255
|
+
"pl7.app/label": "Mean Fraction of Reads",
|
|
256
|
+
"pl7.app/format": ".2p"
|
|
253
257
|
})
|
|
254
258
|
}
|
|
255
259
|
}]
|
|
@@ -286,7 +290,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
286
290
|
"pl7.app/abundance/unit": "molecules",
|
|
287
291
|
"pl7.app/abundance/normalized": "true",
|
|
288
292
|
"pl7.app/abundance/isPrimary": "true",
|
|
289
|
-
"pl7.app/label": "Fraction of UMIs"
|
|
293
|
+
"pl7.app/label": "Fraction of UMIs",
|
|
294
|
+
"pl7.app/format": ".2p"
|
|
290
295
|
})
|
|
291
296
|
}
|
|
292
297
|
} ]
|
|
@@ -294,9 +299,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
294
299
|
[ "-uniqueTagCount", "Molecule" ],
|
|
295
300
|
[ "-uniqueTagFraction", "Molecule" ]
|
|
296
301
|
]
|
|
302
|
+
|
|
297
303
|
mainAbundanceColumnNormalized = "uniqueMoleculeFraction"
|
|
298
304
|
mainAbundanceColumnUnnormalized = "uniqueMoleculeCount"
|
|
299
|
-
|
|
305
|
+
mainAbundanceColumnNormalizedArgs = [ [ "-uniqueTagFraction", "Molecule" ] ]
|
|
306
|
+
mainAbundanceColumnUnnormalizedArgs = [ [ "-uniqueTagCount", "Molecule" ] ]
|
|
307
|
+
|
|
308
|
+
columnsSpecPerClonotypeAggregates = [ {
|
|
300
309
|
column: mainAbundanceColumnUnnormalized + "Sum",
|
|
301
310
|
id: "umi-count-total",
|
|
302
311
|
allowNA: false,
|
|
@@ -324,7 +333,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
324
333
|
"pl7.app/isAbundance": "true",
|
|
325
334
|
"pl7.app/abundance/unit": "molecules",
|
|
326
335
|
"pl7.app/abundance/normalized": "true",
|
|
327
|
-
"pl7.app/label": "Mean Fraction of UMIs"
|
|
336
|
+
"pl7.app/label": "Mean Fraction of UMIs",
|
|
337
|
+
"pl7.app/format": ".2p"
|
|
328
338
|
})
|
|
329
339
|
}
|
|
330
340
|
} ]
|
|
@@ -347,6 +357,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
347
357
|
}
|
|
348
358
|
}
|
|
349
359
|
|
|
360
|
+
columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
|
|
361
|
+
|
|
362
|
+
clonotypeLabelColumn := {
|
|
363
|
+
column: "clonotypeLabel",
|
|
364
|
+
id: "clonotype-label",
|
|
365
|
+
spec: {
|
|
366
|
+
name: "pl7.app/label",
|
|
367
|
+
valueType: "String",
|
|
368
|
+
annotations: a(100000, false, {
|
|
369
|
+
"pl7.app/label": "Clone label"
|
|
370
|
+
})
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
350
374
|
if isSingleCell {
|
|
351
375
|
// copying reads and umi counts and fraction removing isPrimary and isAnchor
|
|
352
376
|
columnsSpecPerSample = addSpec(columnsSpecPerSample, {
|
|
@@ -388,14 +412,12 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
388
412
|
"pl7.app/abundance/unit": "cells",
|
|
389
413
|
"pl7.app/abundance/normalized": "true",
|
|
390
414
|
"pl7.app/abundance/isPrimary": "true",
|
|
391
|
-
"pl7.app/label": "Fraction of Cells"
|
|
415
|
+
"pl7.app/label": "Fraction of Cells",
|
|
416
|
+
"pl7.app/format": ".2p"
|
|
392
417
|
})
|
|
393
418
|
}
|
|
394
419
|
} ]
|
|
395
|
-
columnsSpecPerClonotypeSc = [ sampleCountColumn ]
|
|
396
|
-
} else {
|
|
397
|
-
columnsSpecPerClonotype += [ sampleCountColumn ]
|
|
398
|
-
columnsSpecPerClonotype += mainAbundanceColumnAggregates
|
|
420
|
+
columnsSpecPerClonotypeSc = [ sampleCountColumn, clonotypeLabelColumn ]
|
|
399
421
|
}
|
|
400
422
|
|
|
401
423
|
orderP := 80000
|
|
@@ -423,7 +445,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
423
445
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
424
446
|
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
425
447
|
visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
|
|
426
|
-
|
|
448
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
427
449
|
column: columnName,
|
|
428
450
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
429
451
|
naRegex: "region_not_covered",
|
|
@@ -438,6 +460,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
438
460
|
"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
|
|
439
461
|
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
440
462
|
"pl7.app/vdj/imputed": string(isImputed),
|
|
463
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
441
464
|
"pl7.app/label": featureInFrameU + " " + alphabetShort
|
|
442
465
|
})
|
|
443
466
|
}
|
|
@@ -449,7 +472,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
449
472
|
if !isImputed && featureU == assemblingFeature {
|
|
450
473
|
for annotationType in annotationTypes {
|
|
451
474
|
columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
|
|
452
|
-
|
|
475
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
453
476
|
column: columnName,
|
|
454
477
|
id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
|
|
455
478
|
naRegex: "region_not_covered",
|
|
@@ -475,7 +498,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
475
498
|
|
|
476
499
|
// For now calculate length only for CDR3 to keep the number of columns manageable
|
|
477
500
|
if featureU == "CDR3" {
|
|
478
|
-
|
|
501
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
479
502
|
column: alphabetShortMixcr + "Length" + featureU,
|
|
480
503
|
id: alphabetShortMixcr + "-length-" + featureL,
|
|
481
504
|
naRegex: "region_not_covered",
|
|
@@ -495,24 +518,24 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
495
518
|
}
|
|
496
519
|
|
|
497
520
|
// label column
|
|
498
|
-
if isAminoAcid && !isSingleCell && featureU == "CDR3" {
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
}
|
|
521
|
+
// if isAminoAcid && !isSingleCell && featureU == "CDR3" {
|
|
522
|
+
// columnsSpecPerClonotype += [ {
|
|
523
|
+
// column: columnName,
|
|
524
|
+
// id: "clonotype-label",
|
|
525
|
+
// preProcess: [{
|
|
526
|
+
// type: "regexpReplace",
|
|
527
|
+
// pattern: "^region_not_covered$",
|
|
528
|
+
// replacement: "Unlabelled"
|
|
529
|
+
// }],
|
|
530
|
+
// spec: {
|
|
531
|
+
// name: "pl7.app/label",
|
|
532
|
+
// valueType: "String",
|
|
533
|
+
// annotations: a(100000, false, {
|
|
534
|
+
// "pl7.app/label": "Clone label"
|
|
535
|
+
// })
|
|
536
|
+
// }
|
|
537
|
+
// } ]
|
|
538
|
+
// }
|
|
516
539
|
}
|
|
517
540
|
}
|
|
518
541
|
}
|
|
@@ -537,7 +560,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
537
560
|
for vdjcU in ["V", "D", "J", "C"] {
|
|
538
561
|
vdjcL := text.to_lower(vdjcU)
|
|
539
562
|
for variant in geneHitColumnVariants {
|
|
540
|
-
|
|
563
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
541
564
|
column: "best" + vdjcU + variant.columnNameSuffix,
|
|
542
565
|
id: "best-" + vdjcL + variant.idSuffix,
|
|
543
566
|
naRegex: "",
|
|
@@ -596,7 +619,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
596
619
|
}
|
|
597
620
|
|
|
598
621
|
for variant in mutationColumnVariants {
|
|
599
|
-
|
|
622
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
600
623
|
column: alphabetShortMixcr + variant.name + coreFeature,
|
|
601
624
|
id: alphabetShortMixcr + variant.idPart + geneL,
|
|
602
625
|
allowNA: true,
|
|
@@ -644,10 +667,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
644
667
|
// visibility: false
|
|
645
668
|
// }
|
|
646
669
|
]
|
|
647
|
-
|
|
648
|
-
|
|
670
|
+
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
|
|
671
|
+
mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
|
|
649
672
|
for variant in flagColumnVariants {
|
|
650
|
-
|
|
673
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
651
674
|
column: variant.columnPrefix + productiveFeature,
|
|
652
675
|
id: variant.id,
|
|
653
676
|
allowNA: false,
|
|
@@ -692,7 +715,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
692
715
|
|
|
693
716
|
// Isotype and chain
|
|
694
717
|
|
|
695
|
-
|
|
718
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
696
719
|
column: "isotypePrimary",
|
|
697
720
|
id: "isotype",
|
|
698
721
|
naRegex: "",
|
|
@@ -724,9 +747,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
724
747
|
[ "-topChains" ]
|
|
725
748
|
]
|
|
726
749
|
|
|
750
|
+
columnsSpecPerClonotypeNoAggregates += [ clonotypeLabelColumn ]
|
|
751
|
+
|
|
727
752
|
// All columns are added
|
|
728
753
|
|
|
729
|
-
columnsSpec := columnsSpecPerSample +
|
|
754
|
+
columnsSpec := columnsSpecPerSample + columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates
|
|
730
755
|
|
|
731
756
|
// Creating a column map for fast search
|
|
732
757
|
columnsByName := {}
|
|
@@ -767,8 +792,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
767
792
|
"pl7.app/vdj/clonotypingRunId": blockId
|
|
768
793
|
},
|
|
769
794
|
annotations: {
|
|
770
|
-
"pl7.app/label": "Clonotype
|
|
771
|
-
"pl7.app/table/
|
|
795
|
+
"pl7.app/label": "Clonotype ID",
|
|
796
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
797
|
+
"pl7.app/table/visibility": "default",
|
|
772
798
|
"pl7.app/table/orderPriority": "110000",
|
|
773
799
|
"pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
|
|
774
800
|
}
|
|
@@ -791,8 +817,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
791
817
|
"pl7.app/vdj/clonotypingRunId": blockId
|
|
792
818
|
},
|
|
793
819
|
annotations: {
|
|
794
|
-
"pl7.app/label": "
|
|
795
|
-
"pl7.app/table/
|
|
820
|
+
"pl7.app/label": "Clonotype ID",
|
|
821
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
822
|
+
"pl7.app/table/visibility": "default",
|
|
796
823
|
"pl7.app/table/orderPriority": "110000",
|
|
797
824
|
"pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
|
|
798
825
|
}
|
|
@@ -825,16 +852,19 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
825
852
|
|
|
826
853
|
columnsSpecPerSample: columnsSpecPerSample,
|
|
827
854
|
columnsSpecPerSampleSc: columnsSpecPerSampleSc,
|
|
828
|
-
|
|
855
|
+
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
856
|
+
columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
|
|
829
857
|
columnsSpecPerClonotypeSc: columnsSpecPerClonotypeSc,
|
|
830
858
|
|
|
831
859
|
columnsSpec: columnsSpec,
|
|
832
860
|
|
|
833
861
|
mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
|
|
862
|
+
mainAbundanceColumnNormalizedArgs: mainAbundanceColumnNormalizedArgs,
|
|
834
863
|
mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
|
|
864
|
+
mainAbundanceColumnUnnormalizedArgs: mainAbundanceColumnUnnormalizedArgs,
|
|
835
865
|
|
|
836
|
-
|
|
837
|
-
|
|
866
|
+
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
867
|
+
mainIsProductiveArgs: mainIsProductiveArgs,
|
|
838
868
|
|
|
839
869
|
exportArgs: exportArgs
|
|
840
870
|
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
generateClonotypeLabelSteps := func(clonotypeKeyCol, clonotypeLabelCol, targetTable) {
|
|
2
|
+
prefixTempCol := clonotypeLabelCol + "_prefix_temp"
|
|
3
|
+
rankTempCol := clonotypeLabelCol + "_rank_temp"
|
|
4
|
+
|
|
5
|
+
steps := []
|
|
6
|
+
|
|
7
|
+
// Add prefix_temp column (digits removed, first 5 chars, uppercased)
|
|
8
|
+
steps = append(steps, {
|
|
9
|
+
type: "add_columns",
|
|
10
|
+
table: targetTable,
|
|
11
|
+
columns: [{
|
|
12
|
+
name: prefixTempCol,
|
|
13
|
+
expression: {
|
|
14
|
+
type: "to_upper",
|
|
15
|
+
value: {
|
|
16
|
+
type: "substring",
|
|
17
|
+
value: {
|
|
18
|
+
type: "str_replace",
|
|
19
|
+
value: { type: "col", name: clonotypeKeyCol },
|
|
20
|
+
pattern: "\\d", // Regex for digits
|
|
21
|
+
replacement: "",
|
|
22
|
+
replaceAll: true
|
|
23
|
+
},
|
|
24
|
+
start: 0,
|
|
25
|
+
length: 5
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}]
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
// Add rank_temp column - rank of the clonotype in the prefixTempCol,
|
|
32
|
+
// used to diversify repeated clonotype labels (due to the birthday paradox)
|
|
33
|
+
steps = append(steps, {
|
|
34
|
+
type: "add_columns",
|
|
35
|
+
table: targetTable,
|
|
36
|
+
columns: [{
|
|
37
|
+
name: rankTempCol,
|
|
38
|
+
expression: {
|
|
39
|
+
type: "rank",
|
|
40
|
+
partitionBy: [{ type: "col", name: prefixTempCol }],
|
|
41
|
+
orderBy: [{ type: "col", name: clonotypeKeyCol }]
|
|
42
|
+
}
|
|
43
|
+
}]
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
// Add final clonotypeLabelCol column (C-XXXXX or C-XXXXX-RANK)
|
|
47
|
+
steps = append(steps, {
|
|
48
|
+
type: "add_columns",
|
|
49
|
+
table: targetTable,
|
|
50
|
+
columns: [{
|
|
51
|
+
name: clonotypeLabelCol,
|
|
52
|
+
expression: {
|
|
53
|
+
type: "when_then_otherwise",
|
|
54
|
+
conditions: [
|
|
55
|
+
{
|
|
56
|
+
when: {
|
|
57
|
+
type: "gt",
|
|
58
|
+
lhs: { type: "col", name: rankTempCol },
|
|
59
|
+
rhs: { type: "const", value: 1 }
|
|
60
|
+
},
|
|
61
|
+
then: {
|
|
62
|
+
type: "str_join",
|
|
63
|
+
operands: [
|
|
64
|
+
{ type: "const", value: "C" },
|
|
65
|
+
{ type: "col", name: prefixTempCol },
|
|
66
|
+
{ type: "col", name: rankTempCol }
|
|
67
|
+
],
|
|
68
|
+
delimiter: "-"
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
otherwise: {
|
|
73
|
+
type: "str_join",
|
|
74
|
+
operands: [
|
|
75
|
+
{ type: "const", value: "C" },
|
|
76
|
+
{ type: "col", name: prefixTempCol }
|
|
77
|
+
],
|
|
78
|
+
delimiter: "-"
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}]
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
return steps
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
addClonotypeLabelColumnsPt := func(df, clonotypeKeyCol, clonotypeLabelCol, pt) {
|
|
88
|
+
prefixTempCol := clonotypeLabelCol + "_prefix_temp"
|
|
89
|
+
rankTempCol := clonotypeLabelCol + "_rank_temp"
|
|
90
|
+
|
|
91
|
+
// Add prefix_temp column (digits removed, first 5 chars, uppercased)
|
|
92
|
+
df = df.withColumns(
|
|
93
|
+
pt.col(clonotypeKeyCol).
|
|
94
|
+
strReplace("\\d", "", { replaceAll: true }).
|
|
95
|
+
strSlice(0, 5). // Take first 5 characters
|
|
96
|
+
strToUpper(). // Convert to uppercase
|
|
97
|
+
alias(prefixTempCol)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
// Add rank_temp column - rank of the clonotypeKeyCol within each prefixTempCol group
|
|
101
|
+
df = df.withColumns(
|
|
102
|
+
pt.rank(pt.col(clonotypeKeyCol)). // Rank based on clonotypeKeyCol (default ascending)
|
|
103
|
+
over(pt.col(prefixTempCol)). // Partition by prefixTempCol
|
|
104
|
+
alias(rankTempCol)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
// Add final clonotypeLabelCol column (C-XXXXX or C-XXXXX-RANK)
|
|
108
|
+
df = df.withColumns(
|
|
109
|
+
pt.when(pt.col(rankTempCol).gt(pt.lit(1))).
|
|
110
|
+
then(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol), pt.col(rankTempCol).cast("String")], { delimiter: "-" })).
|
|
111
|
+
otherwise(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol)], { delimiter: "-" })).
|
|
112
|
+
alias(clonotypeLabelCol)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return df.withoutColumns(prefixTempCol, rankTempCol)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export {
|
|
119
|
+
generateClonotypeLabelSteps: generateClonotypeLabelSteps,
|
|
120
|
+
addClonotypeLabelColumnsPt: addClonotypeLabelColumnsPt
|
|
121
|
+
}
|
|
@@ -2,16 +2,17 @@ ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
|
2
2
|
self := import("@platforma-sdk/workflow-tengo:tpl.light")
|
|
3
3
|
pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
4
4
|
smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
5
|
+
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
5
6
|
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
6
7
|
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
8
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
7
9
|
|
|
8
10
|
json := import("json")
|
|
9
11
|
|
|
10
12
|
self.defineOutputs("tsv", "tsvForSingleCell")
|
|
11
13
|
|
|
12
14
|
mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:low-memory")
|
|
13
|
-
|
|
14
|
-
hashColumnSw := assets.importSoftware("@platforma-open/milaboratories.mixcr-clonotyping-2.hash-column:main")
|
|
15
|
+
ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
|
|
15
16
|
|
|
16
17
|
self.body(func(inputs) {
|
|
17
18
|
clnsFile := inputs[pConstants.VALUE_FIELD_NAME]
|
|
@@ -24,8 +25,19 @@ self.body(func(inputs) {
|
|
|
24
25
|
|
|
25
26
|
clonotypeKeyColumns := params.clonotypeKeyColumns
|
|
26
27
|
clonotypeKeyArgs := params.clonotypeKeyArgs
|
|
28
|
+
|
|
27
29
|
cellTagColumns := params.cellTagColumns
|
|
28
30
|
|
|
31
|
+
mainAbundanceColumnUnnormalizedArgs := params.mainAbundanceColumnUnnormalizedArgs
|
|
32
|
+
mainIsProductiveArgs := params.mainIsProductiveArgs
|
|
33
|
+
|
|
34
|
+
hashKeyDerivationExpressionPt := func(sourceColumns) {
|
|
35
|
+
return pt.concatStr(
|
|
36
|
+
slices.map(sourceColumns, func(colName) { return pt.col(colName) }),
|
|
37
|
+
{delimiter: "#"}
|
|
38
|
+
).hash("sha256", "base64_alphanumeric", 120)
|
|
39
|
+
}
|
|
40
|
+
|
|
29
41
|
// Exporting clones from clns file
|
|
30
42
|
|
|
31
43
|
createExport := func(additionalAction) {
|
|
@@ -76,124 +88,57 @@ self.body(func(inputs) {
|
|
|
76
88
|
}
|
|
77
89
|
|
|
78
90
|
if is_undefined(clonotypeKeyColumns) {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
} else if is_undefined(cellTagColumns) {
|
|
82
|
-
hashCmdBuilder := exec.builder().
|
|
83
|
-
printErrStreamToStdout().
|
|
84
|
-
software(hashColumnSw).
|
|
85
|
-
arg("--input-table").arg("input.tsv").
|
|
86
|
-
addFile("input.tsv", unprocessedTsv).
|
|
87
|
-
arg("--output-table").arg("output.tsv").
|
|
88
|
-
arg("--calculate")
|
|
89
|
-
|
|
90
|
-
for col in clonotypeKeyColumns {
|
|
91
|
-
hashCmdBuilder.arg(col)
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
hashCmdBuilder.arg("clonotypeKey")
|
|
95
|
-
|
|
96
|
-
hashCmd := hashCmdBuilder.
|
|
97
|
-
saveFile("output.tsv").
|
|
98
|
-
run()
|
|
99
|
-
|
|
100
|
-
processedTsv := hashCmd.getFile("output.tsv")
|
|
101
|
-
|
|
102
|
-
result.tsv = processedTsv
|
|
103
|
-
|
|
104
|
-
} else {
|
|
105
|
-
pWorkflow := {
|
|
106
|
-
steps: [ {
|
|
107
|
-
type: "combine_columns_as_json",
|
|
108
|
-
src: clonotypeKeyColumns,
|
|
109
|
-
dst: "clonotypeKey"
|
|
110
|
-
} ]
|
|
111
|
-
}
|
|
91
|
+
ll.panic("clonotypeKeyColumns is undefined")
|
|
92
|
+
}
|
|
112
93
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
94
|
+
// PTabler processing for main TSV output
|
|
95
|
+
wfMain := pt.workflow()
|
|
96
|
+
frameInputMap := {
|
|
97
|
+
file: unprocessedTsv,
|
|
98
|
+
xsvType: "tsv",
|
|
99
|
+
schema: [ { column: "readCount", type: "Double" } ]
|
|
100
|
+
}
|
|
101
|
+
dfMain := wfMain.frame(frameInputMap, { inferSchema: false, id: "input_table" })
|
|
121
102
|
|
|
122
|
-
|
|
103
|
+
dfMain.addColumns(
|
|
104
|
+
pt.col("readCount").round().cast("Long").alias("readCount")
|
|
105
|
+
)
|
|
106
|
+
dfMain.addColumns(
|
|
107
|
+
hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey")
|
|
108
|
+
)
|
|
123
109
|
|
|
124
|
-
|
|
125
|
-
|
|
110
|
+
dfMain.save("output.tsv")
|
|
111
|
+
ptablerResultMain := wfMain.run()
|
|
112
|
+
processedTsv := ptablerResultMain.getFile("output.tsv")
|
|
113
|
+
result.tsv = processedTsv
|
|
126
114
|
|
|
127
115
|
if !is_undefined(cellTagColumns) {
|
|
128
116
|
mixcrForSingleCell := createExport(func(mixcrCmdBuilder) {
|
|
129
117
|
mixcrCmdBuilder.
|
|
130
118
|
arg("--split-by-tags").arg("Cell").
|
|
131
|
-
arg("-tags").arg("Cell")
|
|
132
|
-
arg("-readCount").
|
|
133
|
-
arg("-isProductive").arg("CDR3")
|
|
119
|
+
arg("-tags").arg("Cell")
|
|
134
120
|
|
|
135
|
-
for argGrp in clonotypeKeyArgs {
|
|
121
|
+
for argGrp in (clonotypeKeyArgs + mainIsProductiveArgs + mainAbundanceColumnUnnormalizedArgs) {
|
|
136
122
|
for arg in argGrp {
|
|
137
123
|
mixcrCmdBuilder.arg(arg)
|
|
138
124
|
}
|
|
139
125
|
}
|
|
140
126
|
})
|
|
141
127
|
|
|
142
|
-
if is_undefined(clonotypeKeyColumns) {
|
|
143
|
-
ll.panic("clonotypeKeyColumns is undefined")
|
|
144
|
-
}
|
|
145
|
-
|
|
146
128
|
unprocessedTsvForSingleCell := mixcrForSingleCell.getFile("clones.tsv")
|
|
147
129
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
src: clonotypeKeyColumns,
|
|
152
|
-
dst: "clonotypeKey"
|
|
153
|
-
}, {
|
|
154
|
-
type: "combine_columns_as_json",
|
|
155
|
-
src: cellTagColumns,
|
|
156
|
-
dst: "cellTag"
|
|
157
|
-
} ]
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
aggregateCmd := exec.builder().
|
|
161
|
-
printErrStreamToStdout().
|
|
162
|
-
software(ptransformSw).
|
|
163
|
-
arg("--workflow").arg("wf.json").
|
|
164
|
-
writeFile("wf.json", json.encode(pWorkflow)).
|
|
165
|
-
arg("input.tsv").addFile("input.tsv", unprocessedTsvForSingleCell).
|
|
166
|
-
arg("output.tsv").saveFile("output.tsv").
|
|
167
|
-
run()
|
|
168
|
-
|
|
169
|
-
result.tsvForSingleCell = aggregateCmd.getFile("output.tsv")
|
|
170
|
-
|
|
171
|
-
// uncomment this to use hashes
|
|
172
|
-
|
|
173
|
-
// hashCmdBuilderSingleCell := exec.builder().
|
|
174
|
-
// printErrStreamToStdout().
|
|
175
|
-
// software(hashColumnSw).
|
|
176
|
-
// arg("--input-table").arg("input.tsv").
|
|
177
|
-
// addFile("input.tsv", unprocessedTsvForSingleCell).
|
|
178
|
-
// arg("--output-table").arg("output.tsv")
|
|
179
|
-
|
|
180
|
-
// hashCmdBuilderSingleCell.arg("--calculate")
|
|
181
|
-
// for col in clonotypeKeyColumns {
|
|
182
|
-
// hashCmdBuilderSingleCell.arg(col)
|
|
183
|
-
// }
|
|
184
|
-
// hashCmdBuilderSingleCell.arg("clonotypeKey")
|
|
185
|
-
|
|
186
|
-
// hashCmdBuilderSingleCell.arg("--calculate")
|
|
187
|
-
// for col in cellTagColumns {
|
|
188
|
-
// hashCmdBuilderSingleCell.arg(col)
|
|
189
|
-
// }
|
|
190
|
-
// hashCmdBuilderSingleCell.arg("cellTag")
|
|
130
|
+
// PTabler processing for single-cell TSV output
|
|
131
|
+
wfSingleCell := pt.workflow()
|
|
132
|
+
dfSingleCell := wfSingleCell.frame(unprocessedTsvForSingleCell, { xsvType: "tsv", inferSchema: false })
|
|
191
133
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
134
|
+
dfSingleCell.addColumns(
|
|
135
|
+
hashKeyDerivationExpressionPt(clonotypeKeyColumns).alias("clonotypeKey"),
|
|
136
|
+
hashKeyDerivationExpressionPt(cellTagColumns).alias("cellKey")
|
|
137
|
+
)
|
|
195
138
|
|
|
196
|
-
|
|
139
|
+
dfSingleCell.save("output.tsv")
|
|
140
|
+
ptablerResultSingleCell := wfSingleCell.run()
|
|
141
|
+
result.tsvForSingleCell = ptablerResultSingleCell.getFile("output.tsv")
|
|
197
142
|
}
|
|
198
143
|
|
|
199
144
|
return result
|