@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.18.3 → 2.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +13 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +77 -47
- package/dist/tengo/lib/clonotype-label.lib.tengo +121 -0
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
- package/dist/tengo/tpl/list-presets.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
- package/package.json +6 -9
- package/src/aggregate-by-clonotype-key.tpl.tengo +55 -49
- package/src/calculate-export-specs.lib.tengo +77 -47
- package/src/clonotype-label.lib.tengo +121 -0
- package/src/mixcr-export.tpl.tengo +46 -101
- package/src/process-single-cell.tpl.tengo +259 -75
- package/src/process.tpl.tengo +41 -9
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.19.1 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
info: Skipping unknown file type: test/columns.test.ts
|
|
7
7
|
Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
|
|
8
8
|
Processing "src/calculate-export-specs.lib.tengo"...
|
|
9
9
|
Processing "src/calculate-preset-info.tpl.tengo"...
|
|
10
|
+
Processing "src/clonotype-label.lib.tengo"...
|
|
10
11
|
Processing "src/list-presets.tpl.tengo"...
|
|
11
12
|
Processing "src/main.tpl.tengo"...
|
|
12
13
|
Processing "src/mixcr-analyze.tpl.tengo"...
|
|
@@ -20,6 +21,7 @@ No syntax errors found.
|
|
|
20
21
|
info: Skipping unknown file type: test/columns.test.ts
|
|
21
22
|
info: Compiling 'dist'...
|
|
22
23
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/calculate-export-specs.lib.tengo
|
|
24
|
+
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/lib/clonotype-label.lib.tengo
|
|
23
25
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz
|
|
24
26
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/calculate-preset-info.plj.gz
|
|
25
27
|
info: - writing /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow/dist/tengo/tpl/list-presets.plj.gz
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-clonotyping.workflow
|
|
2
2
|
|
|
3
|
+
## 2.19.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- fdf3e57: SDK Upgrade to fixed redundant partitioning of exports
|
|
8
|
+
|
|
9
|
+
## 2.19.0
|
|
10
|
+
|
|
11
|
+
### Minor Changes
|
|
12
|
+
|
|
13
|
+
- 4f09d25: - All table transformations migrated from ptransform and custop python scripts to universal PT API
|
|
14
|
+
- Enhance export specifications by adding format property for fraction and sequence columns
|
|
15
|
+
|
|
3
16
|
## 2.18.3
|
|
4
17
|
|
|
5
18
|
### Patch Changes
|
|
@@ -170,7 +170,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
170
170
|
|
|
171
171
|
columnsSpecPerSample := []
|
|
172
172
|
columnsSpecPerSampleSc := undefined
|
|
173
|
-
|
|
173
|
+
columnsSpecPerClonotypeNoAggregates := []
|
|
174
174
|
columnsSpecPerClonotypeSc := undefined
|
|
175
175
|
|
|
176
176
|
|
|
@@ -209,7 +209,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
209
209
|
"pl7.app/abundance/unit": "reads",
|
|
210
210
|
"pl7.app/abundance/normalized": "true",
|
|
211
211
|
"pl7.app/abundance/isPrimary": !hasUmi ? "true" : undefined,
|
|
212
|
-
"pl7.app/label": "Fraction of reads"
|
|
212
|
+
"pl7.app/label": "Fraction of reads",
|
|
213
|
+
"pl7.app/format": ".2p"
|
|
213
214
|
})
|
|
214
215
|
}
|
|
215
216
|
} ]
|
|
@@ -220,8 +221,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
220
221
|
|
|
221
222
|
mainAbundanceColumnUnnormalized := "readCount"
|
|
222
223
|
mainAbundanceColumnNormalized := "readFraction"
|
|
224
|
+
mainAbundanceColumnUnnormalizedArgs := [ [ "-readCount" ] ]
|
|
225
|
+
mainAbundanceColumnNormalizedArgs := [ [ "-readFraction" ] ]
|
|
223
226
|
|
|
224
|
-
|
|
227
|
+
columnsSpecPerClonotypeAggregates := [{
|
|
225
228
|
column: mainAbundanceColumnUnnormalized + "Sum",
|
|
226
229
|
id: "read-count-total",
|
|
227
230
|
allowNA: false,
|
|
@@ -249,7 +252,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
249
252
|
"pl7.app/isAbundance": "true",
|
|
250
253
|
"pl7.app/abundance/unit": "reads",
|
|
251
254
|
"pl7.app/abundance/normalized": "true",
|
|
252
|
-
"pl7.app/label": "Mean Fraction of Reads"
|
|
255
|
+
"pl7.app/label": "Mean Fraction of Reads",
|
|
256
|
+
"pl7.app/format": ".2p"
|
|
253
257
|
})
|
|
254
258
|
}
|
|
255
259
|
}]
|
|
@@ -286,7 +290,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
286
290
|
"pl7.app/abundance/unit": "molecules",
|
|
287
291
|
"pl7.app/abundance/normalized": "true",
|
|
288
292
|
"pl7.app/abundance/isPrimary": "true",
|
|
289
|
-
"pl7.app/label": "Fraction of UMIs"
|
|
293
|
+
"pl7.app/label": "Fraction of UMIs",
|
|
294
|
+
"pl7.app/format": ".2p"
|
|
290
295
|
})
|
|
291
296
|
}
|
|
292
297
|
} ]
|
|
@@ -294,9 +299,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
294
299
|
[ "-uniqueTagCount", "Molecule" ],
|
|
295
300
|
[ "-uniqueTagFraction", "Molecule" ]
|
|
296
301
|
]
|
|
302
|
+
|
|
297
303
|
mainAbundanceColumnNormalized = "uniqueMoleculeFraction"
|
|
298
304
|
mainAbundanceColumnUnnormalized = "uniqueMoleculeCount"
|
|
299
|
-
|
|
305
|
+
mainAbundanceColumnNormalizedArgs = [ [ "-uniqueTagFraction", "Molecule" ] ]
|
|
306
|
+
mainAbundanceColumnUnnormalizedArgs = [ [ "-uniqueTagCount", "Molecule" ] ]
|
|
307
|
+
|
|
308
|
+
columnsSpecPerClonotypeAggregates = [ {
|
|
300
309
|
column: mainAbundanceColumnUnnormalized + "Sum",
|
|
301
310
|
id: "umi-count-total",
|
|
302
311
|
allowNA: false,
|
|
@@ -324,7 +333,8 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
324
333
|
"pl7.app/isAbundance": "true",
|
|
325
334
|
"pl7.app/abundance/unit": "molecules",
|
|
326
335
|
"pl7.app/abundance/normalized": "true",
|
|
327
|
-
"pl7.app/label": "Mean Fraction of UMIs"
|
|
336
|
+
"pl7.app/label": "Mean Fraction of UMIs",
|
|
337
|
+
"pl7.app/format": ".2p"
|
|
328
338
|
})
|
|
329
339
|
}
|
|
330
340
|
} ]
|
|
@@ -347,6 +357,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
347
357
|
}
|
|
348
358
|
}
|
|
349
359
|
|
|
360
|
+
columnsSpecPerClonotypeAggregates += [ sampleCountColumn ]
|
|
361
|
+
|
|
362
|
+
clonotypeLabelColumn := {
|
|
363
|
+
column: "clonotypeLabel",
|
|
364
|
+
id: "clonotype-label",
|
|
365
|
+
spec: {
|
|
366
|
+
name: "pl7.app/label",
|
|
367
|
+
valueType: "String",
|
|
368
|
+
annotations: a(100000, false, {
|
|
369
|
+
"pl7.app/label": "Clone label"
|
|
370
|
+
})
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
350
374
|
if isSingleCell {
|
|
351
375
|
|
|
352
376
|
columnsSpecPerSample = addSpec(columnsSpecPerSample, {
|
|
@@ -388,14 +412,12 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
388
412
|
"pl7.app/abundance/unit": "cells",
|
|
389
413
|
"pl7.app/abundance/normalized": "true",
|
|
390
414
|
"pl7.app/abundance/isPrimary": "true",
|
|
391
|
-
"pl7.app/label": "Fraction of Cells"
|
|
415
|
+
"pl7.app/label": "Fraction of Cells",
|
|
416
|
+
"pl7.app/format": ".2p"
|
|
392
417
|
})
|
|
393
418
|
}
|
|
394
419
|
} ]
|
|
395
|
-
columnsSpecPerClonotypeSc = [ sampleCountColumn ]
|
|
396
|
-
} else {
|
|
397
|
-
columnsSpecPerClonotype += [ sampleCountColumn ]
|
|
398
|
-
columnsSpecPerClonotype += mainAbundanceColumnAggregates
|
|
420
|
+
columnsSpecPerClonotypeSc = [ sampleCountColumn, clonotypeLabelColumn ]
|
|
399
421
|
}
|
|
400
422
|
|
|
401
423
|
orderP := 80000
|
|
@@ -423,7 +445,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
423
445
|
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
424
446
|
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
425
447
|
visibility := featureU == "CDR3" && (!isSingleCell || isAminoAcid) // isSingleCell ? (featureU == "CDR3") && isAminoAcid : (featureU == "CDR3") || (featureU == assemblingFeature)
|
|
426
|
-
|
|
448
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
427
449
|
column: columnName,
|
|
428
450
|
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
429
451
|
naRegex: "region_not_covered",
|
|
@@ -438,6 +460,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
438
460
|
"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
|
|
439
461
|
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
440
462
|
"pl7.app/vdj/imputed": string(isImputed),
|
|
463
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
441
464
|
"pl7.app/label": featureInFrameU + " " + alphabetShort
|
|
442
465
|
})
|
|
443
466
|
}
|
|
@@ -449,7 +472,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
449
472
|
if !isImputed && featureU == assemblingFeature {
|
|
450
473
|
for annotationType in annotationTypes {
|
|
451
474
|
columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
|
|
452
|
-
|
|
475
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
453
476
|
column: columnName,
|
|
454
477
|
id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
|
|
455
478
|
naRegex: "region_not_covered",
|
|
@@ -475,7 +498,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
475
498
|
|
|
476
499
|
|
|
477
500
|
if featureU == "CDR3" {
|
|
478
|
-
|
|
501
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
479
502
|
column: alphabetShortMixcr + "Length" + featureU,
|
|
480
503
|
id: alphabetShortMixcr + "-length-" + featureL,
|
|
481
504
|
naRegex: "region_not_covered",
|
|
@@ -495,24 +518,24 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
495
518
|
}
|
|
496
519
|
|
|
497
520
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
|
|
516
539
|
}
|
|
517
540
|
}
|
|
518
541
|
}
|
|
@@ -537,7 +560,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
537
560
|
for vdjcU in ["V", "D", "J", "C"] {
|
|
538
561
|
vdjcL := text.to_lower(vdjcU)
|
|
539
562
|
for variant in geneHitColumnVariants {
|
|
540
|
-
|
|
563
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
541
564
|
column: "best" + vdjcU + variant.columnNameSuffix,
|
|
542
565
|
id: "best-" + vdjcL + variant.idSuffix,
|
|
543
566
|
naRegex: "",
|
|
@@ -596,7 +619,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
596
619
|
}
|
|
597
620
|
|
|
598
621
|
for variant in mutationColumnVariants {
|
|
599
|
-
|
|
622
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
600
623
|
column: alphabetShortMixcr + variant.name + coreFeature,
|
|
601
624
|
id: alphabetShortMixcr + variant.idPart + geneL,
|
|
602
625
|
allowNA: true,
|
|
@@ -644,10 +667,10 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
644
667
|
|
|
645
668
|
|
|
646
669
|
]
|
|
647
|
-
|
|
648
|
-
|
|
670
|
+
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
|
|
671
|
+
mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
|
|
649
672
|
for variant in flagColumnVariants {
|
|
650
|
-
|
|
673
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
651
674
|
column: variant.columnPrefix + productiveFeature,
|
|
652
675
|
id: variant.id,
|
|
653
676
|
allowNA: false,
|
|
@@ -692,7 +715,7 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
692
715
|
|
|
693
716
|
|
|
694
717
|
|
|
695
|
-
|
|
718
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
696
719
|
column: "isotypePrimary",
|
|
697
720
|
id: "isotype",
|
|
698
721
|
naRegex: "",
|
|
@@ -724,9 +747,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
724
747
|
[ "-topChains" ]
|
|
725
748
|
]
|
|
726
749
|
|
|
750
|
+
columnsSpecPerClonotypeNoAggregates += [ clonotypeLabelColumn ]
|
|
751
|
+
|
|
727
752
|
|
|
728
753
|
|
|
729
|
-
columnsSpec := columnsSpecPerSample +
|
|
754
|
+
columnsSpec := columnsSpecPerSample + columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates
|
|
730
755
|
|
|
731
756
|
|
|
732
757
|
columnsByName := {}
|
|
@@ -767,8 +792,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
767
792
|
"pl7.app/vdj/clonotypingRunId": blockId
|
|
768
793
|
},
|
|
769
794
|
annotations: {
|
|
770
|
-
"pl7.app/label": "Clonotype
|
|
771
|
-
"pl7.app/table/
|
|
795
|
+
"pl7.app/label": "Clonotype ID",
|
|
796
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
797
|
+
"pl7.app/table/visibility": "default",
|
|
772
798
|
"pl7.app/table/orderPriority": "110000",
|
|
773
799
|
"pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
|
|
774
800
|
}
|
|
@@ -791,8 +817,9 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
791
817
|
"pl7.app/vdj/clonotypingRunId": blockId
|
|
792
818
|
},
|
|
793
819
|
annotations: {
|
|
794
|
-
"pl7.app/label": "
|
|
795
|
-
"pl7.app/table/
|
|
820
|
+
"pl7.app/label": "Clonotype ID",
|
|
821
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
822
|
+
"pl7.app/table/visibility": "default",
|
|
796
823
|
"pl7.app/table/orderPriority": "110000",
|
|
797
824
|
"pl7.app/segmentedBy": string(json.encode(["pl7.app/vdj/clonotypingRunId"]))
|
|
798
825
|
}
|
|
@@ -825,16 +852,19 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
825
852
|
|
|
826
853
|
columnsSpecPerSample: columnsSpecPerSample,
|
|
827
854
|
columnsSpecPerSampleSc: columnsSpecPerSampleSc,
|
|
828
|
-
|
|
855
|
+
columnsSpecPerClonotypeNoAggregates: columnsSpecPerClonotypeNoAggregates,
|
|
856
|
+
columnsSpecPerClonotypeAggregates: columnsSpecPerClonotypeAggregates,
|
|
829
857
|
columnsSpecPerClonotypeSc: columnsSpecPerClonotypeSc,
|
|
830
858
|
|
|
831
859
|
columnsSpec: columnsSpec,
|
|
832
860
|
|
|
833
861
|
mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
|
|
862
|
+
mainAbundanceColumnNormalizedArgs: mainAbundanceColumnNormalizedArgs,
|
|
834
863
|
mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
|
|
864
|
+
mainAbundanceColumnUnnormalizedArgs: mainAbundanceColumnUnnormalizedArgs,
|
|
835
865
|
|
|
836
|
-
|
|
837
|
-
|
|
866
|
+
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
867
|
+
mainIsProductiveArgs: mainIsProductiveArgs,
|
|
838
868
|
|
|
839
869
|
exportArgs: exportArgs
|
|
840
870
|
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
generateClonotypeLabelSteps := func(clonotypeKeyCol, clonotypeLabelCol, targetTable) {
|
|
2
|
+
prefixTempCol := clonotypeLabelCol + "_prefix_temp"
|
|
3
|
+
rankTempCol := clonotypeLabelCol + "_rank_temp"
|
|
4
|
+
|
|
5
|
+
steps := []
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
steps = append(steps, {
|
|
9
|
+
type: "add_columns",
|
|
10
|
+
table: targetTable,
|
|
11
|
+
columns: [{
|
|
12
|
+
name: prefixTempCol,
|
|
13
|
+
expression: {
|
|
14
|
+
type: "to_upper",
|
|
15
|
+
value: {
|
|
16
|
+
type: "substring",
|
|
17
|
+
value: {
|
|
18
|
+
type: "str_replace",
|
|
19
|
+
value: { type: "col", name: clonotypeKeyCol },
|
|
20
|
+
pattern: "\\d", // Regex for digits
|
|
21
|
+
replacement: "",
|
|
22
|
+
replaceAll: true
|
|
23
|
+
},
|
|
24
|
+
start: 0,
|
|
25
|
+
length: 5
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}]
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
steps = append(steps, {
|
|
34
|
+
type: "add_columns",
|
|
35
|
+
table: targetTable,
|
|
36
|
+
columns: [{
|
|
37
|
+
name: rankTempCol,
|
|
38
|
+
expression: {
|
|
39
|
+
type: "rank",
|
|
40
|
+
partitionBy: [{ type: "col", name: prefixTempCol }],
|
|
41
|
+
orderBy: [{ type: "col", name: clonotypeKeyCol }]
|
|
42
|
+
}
|
|
43
|
+
}]
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
steps = append(steps, {
|
|
48
|
+
type: "add_columns",
|
|
49
|
+
table: targetTable,
|
|
50
|
+
columns: [{
|
|
51
|
+
name: clonotypeLabelCol,
|
|
52
|
+
expression: {
|
|
53
|
+
type: "when_then_otherwise",
|
|
54
|
+
conditions: [
|
|
55
|
+
{
|
|
56
|
+
when: {
|
|
57
|
+
type: "gt",
|
|
58
|
+
lhs: { type: "col", name: rankTempCol },
|
|
59
|
+
rhs: { type: "const", value: 1 }
|
|
60
|
+
},
|
|
61
|
+
then: {
|
|
62
|
+
type: "str_join",
|
|
63
|
+
operands: [
|
|
64
|
+
{ type: "const", value: "C" },
|
|
65
|
+
{ type: "col", name: prefixTempCol },
|
|
66
|
+
{ type: "col", name: rankTempCol }
|
|
67
|
+
],
|
|
68
|
+
delimiter: "-"
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
otherwise: {
|
|
73
|
+
type: "str_join",
|
|
74
|
+
operands: [
|
|
75
|
+
{ type: "const", value: "C" },
|
|
76
|
+
{ type: "col", name: prefixTempCol }
|
|
77
|
+
],
|
|
78
|
+
delimiter: "-"
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}]
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
return steps
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
addClonotypeLabelColumnsPt := func(df, clonotypeKeyCol, clonotypeLabelCol, pt) {
|
|
88
|
+
prefixTempCol := clonotypeLabelCol + "_prefix_temp"
|
|
89
|
+
rankTempCol := clonotypeLabelCol + "_rank_temp"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
df = df.withColumns(
|
|
93
|
+
pt.col(clonotypeKeyCol).
|
|
94
|
+
strReplace("\\d", "", { replaceAll: true }).
|
|
95
|
+
strSlice(0, 5). // Take first 5 characters
|
|
96
|
+
strToUpper(). // Convert to uppercase
|
|
97
|
+
alias(prefixTempCol)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
df = df.withColumns(
|
|
102
|
+
pt.rank(pt.col(clonotypeKeyCol)). // Rank based on clonotypeKeyCol (default ascending)
|
|
103
|
+
over(pt.col(prefixTempCol)). // Partition by prefixTempCol
|
|
104
|
+
alias(rankTempCol)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
df = df.withColumns(
|
|
109
|
+
pt.when(pt.col(rankTempCol).gt(pt.lit(1))).
|
|
110
|
+
then(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol), pt.col(rankTempCol).cast("String")], { delimiter: "-" })).
|
|
111
|
+
otherwise(pt.concatStr([pt.lit("C"), pt.col(prefixTempCol)], { delimiter: "-" })).
|
|
112
|
+
alias(clonotypeLabelCol)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return df.withoutColumns(prefixTempCol, rankTempCol)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export {
|
|
119
|
+
generateClonotypeLabelSteps: generateClonotypeLabelSteps,
|
|
120
|
+
addClonotypeLabelColumnsPt: addClonotypeLabelColumnsPt
|
|
121
|
+
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,18 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.19.1",
|
|
4
4
|
"description": "Tengo-based template",
|
|
5
5
|
"dependencies": {
|
|
6
|
-
"@platforma-sdk/workflow-tengo": "^4.
|
|
7
|
-
"@platforma-open/milaboratories.mixcr
|
|
8
|
-
"@platforma-open/milaboratories.mixcr-clonotyping-2.hash-column": "1.1.1"
|
|
6
|
+
"@platforma-sdk/workflow-tengo": "^4.6.3",
|
|
7
|
+
"@platforma-open/milaboratories.software-mixcr": "4.7.0-169-develop"
|
|
9
8
|
},
|
|
10
9
|
"devDependencies": {
|
|
11
|
-
"@platforma-sdk/tengo-builder": "^2.1.
|
|
12
|
-
"@platforma-
|
|
13
|
-
"
|
|
14
|
-
"@platforma-sdk/test": "^1.30.24",
|
|
15
|
-
"vitest": "~2.1.8",
|
|
10
|
+
"@platforma-sdk/tengo-builder": "^2.1.6",
|
|
11
|
+
"@platforma-sdk/test": "^1.32.4",
|
|
12
|
+
"vitest": "~2.1.9",
|
|
16
13
|
"typescript": "~5.5.4"
|
|
17
14
|
},
|
|
18
15
|
"scripts": {
|
|
@@ -2,16 +2,14 @@ ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
|
2
2
|
self := import("@platforma-sdk/workflow-tengo:tpl")
|
|
3
3
|
pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
4
4
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
5
|
-
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
6
|
-
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
7
5
|
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
6
|
+
clonotypeLabel := import(":clonotype-label")
|
|
7
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
8
8
|
|
|
9
9
|
json := import("json")
|
|
10
10
|
|
|
11
11
|
self.defineOutputs("tsv")
|
|
12
12
|
|
|
13
|
-
ptransformSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
14
|
-
|
|
15
13
|
self.body(func(inputs) {
|
|
16
14
|
inputData := inputs[pConstants.VALUE_FIELD_NAME]
|
|
17
15
|
inputDataMeta := inputData.getDataAsJson()
|
|
@@ -21,48 +19,18 @@ self.body(func(inputs) {
|
|
|
21
19
|
params := inputs.params
|
|
22
20
|
mainAbundanceColumnNormalized := params.mainAbundanceColumnNormalized
|
|
23
21
|
mainAbundanceColumnUnnormalized := params.mainAbundanceColumnUnnormalized
|
|
24
|
-
clonotypeColumns := params.clonotypeColumns
|
|
25
|
-
|
|
26
|
-
pickCols := []
|
|
27
|
-
for col in clonotypeColumns {
|
|
28
|
-
if col == "sampleCount" || col == mainAbundanceColumnNormalized + "Mean" || col == mainAbundanceColumnUnnormalized + "Sum" {
|
|
29
|
-
continue
|
|
30
|
-
}
|
|
31
|
-
pickCols = append(pickCols, [col, col])
|
|
32
|
-
}
|
|
33
22
|
|
|
34
|
-
//
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
type: "aggregate",
|
|
38
|
-
groupBy: ["clonotypeKey"],
|
|
39
|
-
aggregations: [ {
|
|
40
|
-
type: "max_by",
|
|
41
|
-
rankingCol: mainAbundanceColumnNormalized,
|
|
42
|
-
pickCols: pickCols
|
|
43
|
-
}, {
|
|
44
|
-
type: "count",
|
|
45
|
-
src: mainAbundanceColumnNormalized,
|
|
46
|
-
dst: "sampleCount"
|
|
47
|
-
}, {
|
|
48
|
-
type: "sum",
|
|
49
|
-
src: mainAbundanceColumnUnnormalized,
|
|
50
|
-
dst: mainAbundanceColumnUnnormalized + "Sum"
|
|
51
|
-
}, {
|
|
52
|
-
type: "mean",
|
|
53
|
-
src: mainAbundanceColumnNormalized,
|
|
54
|
-
dst: mainAbundanceColumnNormalized + "Mean"
|
|
55
|
-
}]
|
|
56
|
-
} ]
|
|
57
|
-
}
|
|
23
|
+
// { column: string; type: string }
|
|
24
|
+
schemaPerClonotypeNoAggregates := params.schemaPerClonotypeNoAggregates
|
|
25
|
+
schemaPerSample := params.schemaPerSample
|
|
58
26
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
software(ptransformSw).
|
|
62
|
-
arg("--workflow").arg("wf.json").
|
|
63
|
-
writeFile("wf.json", json.encode(pWorkflow))
|
|
27
|
+
wf := pt.workflow()
|
|
28
|
+
dataFrames := []
|
|
64
29
|
|
|
65
30
|
inputMap := inputData.inputs()
|
|
31
|
+
|
|
32
|
+
baseSchemaForRead := schemaPerSample + [ { column: "clonotypeKey", type: "String" } ]
|
|
33
|
+
|
|
66
34
|
for sKey in maps.getKeys(inputMap) {
|
|
67
35
|
inputFile := inputMap[sKey]
|
|
68
36
|
key := json.decode(sKey)
|
|
@@ -70,16 +38,54 @@ self.body(func(inputs) {
|
|
|
70
38
|
ll.panic("malformed key: %v", sKey)
|
|
71
39
|
}
|
|
72
40
|
sampleId := key[0]
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
41
|
+
dfId := "table_" + sampleId
|
|
42
|
+
|
|
43
|
+
df := wf.frame({
|
|
44
|
+
file: inputFile,
|
|
45
|
+
xsvType: "tsv",
|
|
46
|
+
schema: baseSchemaForRead
|
|
47
|
+
}, {
|
|
48
|
+
id: dfId,
|
|
49
|
+
inferSchema: false
|
|
50
|
+
})
|
|
51
|
+
dataFrames = append(dataFrames, df)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
currentDf := undefined
|
|
55
|
+
if len(dataFrames) == 0 {
|
|
56
|
+
ll.panic("no input files found")
|
|
57
|
+
} else if len(dataFrames) == 1 {
|
|
58
|
+
currentDf = dataFrames[0]
|
|
59
|
+
} else {
|
|
60
|
+
currentDf = pt.concat(dataFrames)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
aggExpressions := []
|
|
64
|
+
|
|
65
|
+
for colDef in schemaPerClonotypeNoAggregates {
|
|
66
|
+
if colDef.column == "clonotypeLabel" {
|
|
67
|
+
continue
|
|
68
|
+
}
|
|
69
|
+
aggExpressions = append(aggExpressions,
|
|
70
|
+
pt.col(colDef.column).maxBy(pt.col(mainAbundanceColumnNormalized)).alias(colDef.column)
|
|
71
|
+
)
|
|
76
72
|
}
|
|
77
73
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
74
|
+
aggExpressions = append(aggExpressions,
|
|
75
|
+
pt.col(mainAbundanceColumnNormalized).count().alias("sampleCount"),
|
|
76
|
+
pt.col(mainAbundanceColumnUnnormalized).sum().alias(mainAbundanceColumnUnnormalized + "Sum"),
|
|
77
|
+
pt.col(mainAbundanceColumnNormalized).mean().alias(mainAbundanceColumnNormalized + "Mean")
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
|
|
81
|
+
|
|
82
|
+
aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
|
|
83
|
+
|
|
84
|
+
aggregatedDf.save("output.tsv")
|
|
85
|
+
|
|
86
|
+
ptablerResult := wf.run()
|
|
81
87
|
|
|
82
|
-
processedTsv :=
|
|
88
|
+
processedTsv := ptablerResult.getFile("output.tsv")
|
|
83
89
|
|
|
84
90
|
return {
|
|
85
91
|
tsv: processedTsv
|