@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.23.3 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.23.3 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@2.25.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 2.25.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 427ffa6: Include junction lengths and total number of added nt
8
+
9
+ ## 2.24.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 35856bb: Fix for caching and deduplication & SDK Upgrade
14
+
3
15
  ## 2.23.3
4
16
 
5
17
  ### Patch Changes
@@ -541,6 +541,52 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
541
541
  }
542
542
 
543
543
 
544
+ junctionTypes := [ "VJ", "VD", "DJ" ]
545
+ for junctionType in junctionTypes {
546
+ feature := junctionType + "Junction"
547
+ naRegex := junctionType == "VJ" ? "region_not_covered" : "no_d_gene"
548
+
549
+ columnsSpecPerClonotypeNoAggregates += [ {
550
+ column: "nLength" + feature,
551
+ id: "n-length-" + text.to_lower(junctionType) + "-junction",
552
+ naRegex: naRegex,
553
+ allowNA: true,
554
+ spec: {
555
+ name: "pl7.app/vdj/sequenceLength",
556
+ valueType: "Int",
557
+ domain: {
558
+ "pl7.app/vdj/feature": feature,
559
+ "pl7.app/alphabet": "nucleotide"
560
+ },
561
+ annotations: a(orderP, false, {
562
+ "pl7.app/label": "Length of " + junctionType + " Junction nt"
563
+ })
564
+ }
565
+ } ]
566
+ exportArgs += [ [ "-nLength", feature ] ]
567
+ orderP -= 100
568
+ }
569
+
570
+
571
+ columnsSpecPerClonotypeNoAggregates += [ {
572
+ column: "nLengthTotalAdded",
573
+ id: "n-length-total-added",
574
+ naRegex: "region_not_covered|no_d_gene",
575
+ allowNA: true,
576
+ spec: {
577
+ name: "pl7.app/vdj/sequenceLength",
578
+ valueType: "Int",
579
+ domain: {
580
+ "pl7.app/alphabet": "nucleotide"
581
+ },
582
+ annotations: a(orderP, false, {
583
+ "pl7.app/label": "Total number of added nt"
584
+ })
585
+ }
586
+ } ]
587
+ orderP -= 100
588
+
589
+
544
590
 
545
591
  geneHitColumnVariants := [ {
546
592
  name: "pl7.app/vdj/geneHitWithAllele",
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "2.23.3",
3
+ "version": "2.25.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "^4.9.3",
6
+ "@platforma-sdk/workflow-tengo": "^4.10.0",
7
7
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-190-develop"
8
8
  },
9
9
  "devDependencies": {
10
10
  "@platforma-sdk/tengo-builder": "^2.1.12",
11
- "@platforma-sdk/test": "^1.39.15",
11
+ "@platforma-sdk/test": "^1.39.21",
12
12
  "vitest": "~2.1.9",
13
13
  "typescript": "~5.5.4"
14
14
  },
@@ -1,13 +1,16 @@
1
+ //tengo:hash_override 8B1CFC68-2542-4C81-8CD4-F927B75F3975
2
+
1
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
4
  self := import("@platforma-sdk/workflow-tengo:tpl")
3
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
4
6
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
7
  maps := import("@platforma-sdk/workflow-tengo:maps")
6
8
  units := import("@platforma-sdk/workflow-tengo:units")
7
- clonotypeLabel := import(":clonotype-label")
8
9
  pt := import("@platforma-sdk/workflow-tengo:pt")
9
- math := import("math")
10
10
 
11
+ clonotypeLabel := import(":clonotype-label")
12
+
13
+ math := import("math")
11
14
  json := import("json")
12
15
 
13
16
  self.defineOutputs("tsv")
@@ -38,6 +41,8 @@ self.body(func(inputs) {
38
41
 
39
42
  baseSchemaForRead := schemaPerSample + [ { column: "clonotypeKey", type: "String" } ]
40
43
 
44
+ ll.print("__THE_LOG__ AGGREGATE BY CLONOTYPE KEY: " + json.encode(maps.getKeys(inputMap)))
45
+
41
46
  for sKey in maps.getKeys(inputMap) {
42
47
  inputFile := inputMap[sKey]
43
48
  key := json.decode(sKey)
@@ -70,7 +75,7 @@ self.body(func(inputs) {
70
75
  aggExpressions := []
71
76
 
72
77
  for colDef in schemaPerClonotypeNoAggregates {
73
- if colDef.column == "clonotypeLabel" {
78
+ if colDef.column == "clonotypeLabel" || colDef.column == "nLengthTotalAdded" {
74
79
  continue
75
80
  }
76
81
  aggExpressions = append(aggExpressions,
@@ -86,6 +91,14 @@ self.body(func(inputs) {
86
91
 
87
92
  aggregatedDf := currentDf.groupBy("clonotypeKey").agg(aggExpressions...)
88
93
 
94
+ // Calculate total added nucleotides: VDJunction + DJJunction for chains with D genes, VJJunction for chains without D genes
95
+ aggregatedDf = aggregatedDf.withColumns(
96
+ pt.when(pt.col("nLengthVDJunction").isNotNull().and(pt.col("nLengthVDJunction").neq("no_d_gene"))).
97
+ then(pt.col("nLengthVDJunction").cast("Int").plus(pt.col("nLengthDJJunction").cast("Int"))).
98
+ otherwise(pt.col("nLengthVJJunction").cast("Int")).
99
+ alias("nLengthTotalAdded")
100
+ )
101
+
89
102
  aggregatedDf = clonotypeLabel.addClonotypeLabelColumnsPt(aggregatedDf, "clonotypeKey", "clonotypeLabel", pt)
90
103
 
91
104
  aggregatedDf.save("output.tsv")
@@ -540,6 +540,52 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
540
540
  }
541
541
  }
542
542
 
543
+ // Include junction lengths for calculation of total added nucleotides
544
+ junctionTypes := [ "VJ", "VD", "DJ" ]
545
+ for junctionType in junctionTypes {
546
+ feature := junctionType + "Junction"
547
+ naRegex := junctionType == "VJ" ? "region_not_covered" : "no_d_gene"
548
+
549
+ columnsSpecPerClonotypeNoAggregates += [ {
550
+ column: "nLength" + feature,
551
+ id: "n-length-" + text.to_lower(junctionType) + "-junction",
552
+ naRegex: naRegex,
553
+ allowNA: true,
554
+ spec: {
555
+ name: "pl7.app/vdj/sequenceLength",
556
+ valueType: "Int",
557
+ domain: {
558
+ "pl7.app/vdj/feature": feature,
559
+ "pl7.app/alphabet": "nucleotide"
560
+ },
561
+ annotations: a(orderP, false, {
562
+ "pl7.app/label": "Length of " + junctionType + " Junction nt"
563
+ })
564
+ }
565
+ } ]
566
+ exportArgs += [ [ "-nLength", feature ] ]
567
+ orderP -= 100
568
+ }
569
+
570
+ // Spec for total added length (calculated by pt in aggregate-by-clonotype-key, no flag)
571
+ columnsSpecPerClonotypeNoAggregates += [ {
572
+ column: "nLengthTotalAdded",
573
+ id: "n-length-total-added",
574
+ naRegex: "region_not_covered|no_d_gene",
575
+ allowNA: true,
576
+ spec: {
577
+ name: "pl7.app/vdj/sequenceLength",
578
+ valueType: "Int",
579
+ domain: {
580
+ "pl7.app/alphabet": "nucleotide"
581
+ },
582
+ annotations: a(orderP, false, {
583
+ "pl7.app/label": "Total number of added nt"
584
+ })
585
+ }
586
+ } ]
587
+ orderP -= 100
588
+
543
589
  // VDJC Hits
544
590
 
545
591
  geneHitColumnVariants := [ {
@@ -167,7 +167,10 @@ wf.body(func(args) {
167
167
  clonotypes: pframes.exportFrame(runMixcr.output("clonotypes")),
168
168
  fileImports: smart.createMapResource(maps.mapValues(fileImports, func(im) {
169
169
  return im.handle
170
- }))
170
+ })),
171
+
172
+ // will be attached as output, to keep this intermediate results as long as block is not deleted
173
+ resultsToCache: runMixcr.output("resultsToCache")
171
174
  }
172
175
 
173
176
  if !is_undefined(libraryImportHandle) {
@@ -1,7 +1,5 @@
1
1
  //tengo:hash_override D70EDB25-6FF6-4615-966D-B79B04B5751C
2
2
 
3
- // mixcr analyze
4
-
5
3
  self := import("@platforma-sdk/workflow-tengo:tpl")
6
4
  smart := import("@platforma-sdk/workflow-tengo:smart")
7
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
@@ -1,3 +1,5 @@
1
+ //tengo:hash_override 553D3080-FB87-44BC-BEEB-DB9EB5F773D8
2
+
1
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
4
  self := import("@platforma-sdk/workflow-tengo:tpl.light")
3
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
@@ -1,3 +1,5 @@
1
+ //tengo:hash_override 8E4B6062-534D-4322-9CED-EC75E499A10A
2
+
1
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
4
  self := import("@platforma-sdk/workflow-tengo:tpl.light")
3
5
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
@@ -5,17 +7,21 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
5
7
  exec := import("@platforma-sdk/workflow-tengo:exec")
6
8
  maps := import("@platforma-sdk/workflow-tengo:maps")
7
9
  slices := import("@platforma-sdk/workflow-tengo:slices")
8
- clonotypeLabel := import(":clonotype-label")
9
10
  units := import("@platforma-sdk/workflow-tengo:units")
11
+ pt := import("@platforma-sdk/workflow-tengo:pt")
12
+
13
+ clonotypeLabel := import(":clonotype-label")
14
+
10
15
  json := import("json")
11
16
  math := import("math")
12
- pt := import("@platforma-sdk/workflow-tengo:pt")
13
17
 
14
18
  self.defineOutputs("abundanceTsv", "clonotypeTsv", "propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv")
15
19
 
16
20
  ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
17
21
 
18
22
  self.body(func(inputs) {
23
+ ll.print("__THE_LOG__ PROCESS SINGLE CELL")
24
+
19
25
  byCellTagA := inputs[pConstants.VALUE_FIELD_NAME]
20
26
  inputDataMeta := byCellTagA.getDataAsJson()
21
27
  ll.assert(inputDataMeta.keyLength == 1, "unexpected number of aggregation axes")
@@ -8,11 +8,13 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
8
8
  slices := import("@platforma-sdk/workflow-tengo:slices")
9
9
  maps := import("@platforma-sdk/workflow-tengo:maps")
10
10
  sets := import("@platforma-sdk/workflow-tengo:sets")
11
+ anonymize := import("@platforma-sdk/workflow-tengo:anonymize")
11
12
 
12
13
  calculateExportSpecs := import(":calculate-export-specs")
13
14
 
14
15
  json := import("json")
15
16
  text := import("text")
17
+ times := import("times")
16
18
 
17
19
  mixcrAnalyzeTpl := assets.importTemplate(":mixcr-analyze")
18
20
  mixcrExportTpl := assets.importTemplate(":mixcr-export")
@@ -250,6 +252,10 @@ self.body(func(inputs) {
250
252
  // in the body template
251
253
  passAggregationAxesNames: true,
252
254
 
255
+ // this will cache each individual step result for 30 minutes, even if execution of some of the steps
256
+ // failed or whole execution was interrupted
257
+ stepCache: 30 * times.minute,
258
+
253
259
  // will be automatically propagated to all output specs
254
260
  traceSteps: [{type: "milaboratories.mixcr-clonotyping", id: blockId, importance: 20, label: "MiXCR " + presetCommonName}],
255
261
 
@@ -265,6 +271,9 @@ self.body(func(inputs) {
265
271
  library: library,
266
272
  presetContent: presetContent
267
273
  },
274
+
275
+ // by passing those parameters as meta fields we allow for recovery and deduplication mechanisms
276
+ // to pick up the results from executions with different values for CPU and Memory overrides
268
277
  metaExtra: {
269
278
  perProcessMemGB: perProcessMemGB,
270
279
  perProcessCPUs: perProcessCPUs
@@ -309,6 +318,8 @@ self.body(func(inputs) {
309
318
  return schema
310
319
  }
311
320
 
321
+ resultsToCache := {}
322
+
312
323
  for chain in chains {
313
324
  chainInfo := chainInfos[chain]
314
325
  ll.assert(!is_undefined(chainInfo), "chainInfo not found for chain %v", chain)
@@ -378,6 +389,10 @@ self.body(func(inputs) {
378
389
  // will be automatically propagated to all output specs
379
390
  traceSteps: [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}],
380
391
 
392
+ // this will cache each individual step result for 30 minutes, even if execution of some of the steps
393
+ // failed or whole execution was interrupted
394
+ stepCache: 30 * times.minute,
395
+
381
396
  extra: {
382
397
  params: {
383
398
  chains: chainInfo.mixcrFilter,
@@ -438,12 +453,22 @@ self.body(func(inputs) {
438
453
  } ]
439
454
  }
440
455
 
456
+ resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
457
+
441
458
  aggregateByCloneKey := pframes.processColumn(
442
459
  exportResults.output("clonotypeTable"),
443
460
  aggregateByClonotypeKeyTpl,
444
461
  aggregationOutputs,
445
462
  {
446
- aggregate: ["pl7.app/sampleId"],
463
+ aggregate: [ {
464
+ name: "pl7.app/sampleId",
465
+ anonymize: true
466
+ } ],
467
+
468
+ // this will cache each individual step result for 30 minutes, even if execution of some of the steps
469
+ // failed or whole execution was interrupted
470
+ stepCache: 30 * times.minute,
471
+
447
472
  extra: {
448
473
  params: {
449
474
  mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
@@ -457,11 +482,15 @@ self.body(func(inputs) {
457
482
  )
458
483
 
459
484
  if isSingleCell {
460
- // collecting results for possible future single cell processing
485
+ // collecting results for future single cell processing
461
486
  perChainResults[chain] = {
462
487
  tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
463
488
  clonotypeProperties: aggregateByCloneKey.output("clonotypeProperties")
464
489
  }
490
+
491
+ // caching intermediate results until the block is removed
492
+ resultsToCache["clonotypeTableForSingleCell/" + chain] = exportResults.outputData("clonotypeTableForSingleCell")
493
+ resultsToCache["clonotypeProperties/" + chain] = aggregateByCloneKey.outputData("clonotypeProperties")
465
494
  } else {
466
495
  // only adding data outputs if we are in bulk mode
467
496
  exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
@@ -597,14 +626,22 @@ self.body(func(inputs) {
597
626
  // Using A chain files as main PColumn for xsv conversion through pframes.processColumn.
598
627
  // Since we aggregate by sample, this is just a single pass through the data.
599
628
 
629
+ // manual data anonymization
630
+ anonymizationResult := anonymize.anonymizePKeys({
631
+ byCellTagAData: perChainResults[chainA].tsvForSingleCell.data,
632
+ byCellTagBData: perChainResults[chainB].tsvForSingleCell.data
633
+ }, [0])
634
+ byCellTagAData := anonymizationResult.result.byCellTagAData
635
+ byCellTagBData := anonymizationResult.result.byCellTagBData
636
+
600
637
  singleCellResult := pframes.processColumn(
601
- perChainResults[chainA].tsvForSingleCell,
638
+ { spec: perChainResults[chainA].tsvForSingleCell.spec, data: byCellTagAData },
602
639
  processSingleCellTpl,
603
640
  singleCellOutputs,
604
641
  {
605
642
  aggregate: ["pl7.app/sampleId"],
606
643
  extra: {
607
- byCellTagB: perChainResults[chainB].tsvForSingleCell.data,
644
+ byCellTagB: byCellTagBData,
608
645
  propertiesA: perChainResults[chainA].clonotypeProperties.data,
609
646
  propertiesB: perChainResults[chainB].clonotypeProperties.data,
610
647
  params: {
@@ -616,7 +653,16 @@ self.body(func(inputs) {
616
653
  }
617
654
  )
618
655
 
619
- singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
656
+ // singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
657
+ for columnName in singleCellResult.listXsvColumns("abundanceTable") {
658
+ anonymizedData := singleCellResult.outputData("abundanceTable", columnName)
659
+ clonotypes.add(
660
+ "clonotypeProperties/abundance/" + receptor + "/" + columnName,
661
+ singleCellResult.outputSpec("abundanceTable", columnName),
662
+ anonymize.deanonymizePKeys(anonymizedData, 0, [0], anonymizationResult.mapping)
663
+ )
664
+ resultsToCache["clonotypeProperties/abundance/" + receptor + "/" + columnName] = anonymizedData
665
+ }
620
666
  singleCellResult.addXsvOutputToBuilder(clonotypes, "aggregates", "clonotypeProperties/aggregates/" + receptor + "/")
621
667
 
622
668
  singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesAPrimary", "clonotypeProperties/" + receptor + "/aPrimary/")
@@ -639,6 +685,8 @@ self.body(func(inputs) {
639
685
  "clns.spec": mixcrResults.outputSpec("clns"),
640
686
  "clns.data": mixcrResults.outputData("clns"),
641
687
 
688
+ "resultsToCache": resultsToCache,
689
+
642
690
  clonotypes: clonotypes.build()
643
691
  }
644
692
  })