@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.24.5 → 3.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.24.5 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.25.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.25.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 979cbf8: Add descriptions to key columns from QC table and include number of clonotypes dropped from un-paired cells
8
+
9
+ ## 3.24.6
10
+
11
+ ### Patch Changes
12
+
13
+ - eea91d6: Filter non productive sequences for single cell data
14
+
3
15
  ## 3.24.5
4
16
 
5
17
  ### Patch Changes
@@ -615,7 +615,8 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
615
615
  "pl7.app/min": "0",
616
616
  "pl7.app/table/orderPriority": "109000",
617
617
  "pl7.app/table/visibility": "default",
618
- "pl7.app/label": "Total Clonotypes"
618
+ "pl7.app/label": "Total Clonotypes",
619
+ "pl7.app/description": "Sum of unique clonotypes across all chains. In single-cell mode, only clonotypes from paired cells are counted."
619
620
  }
620
621
  }
621
622
  },
@@ -818,7 +819,8 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
818
819
  "pl7.app/min": "0",
819
820
  "pl7.app/table/orderPriority": "107900",
820
821
  "pl7.app/table/visibility": "default",
821
- "pl7.app/label": "Total number of cells (with paired chains)"
822
+ "pl7.app/label": "Total number of cells (with paired chains)",
823
+ "pl7.app/description": "Cells detected with at least two distinct chains (e.g. heavy and light)."
822
824
  }
823
825
  }
824
826
  }]
@@ -838,7 +840,8 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
838
840
  "pl7.app/min": "0",
839
841
  "pl7.app/table/orderPriority": string(n),
840
842
  "pl7.app/table/visibility": "default",
841
- "pl7.app/label": "Clonotypes by Chain " + chain
843
+ "pl7.app/label": "Clonotypes by Chain " + chain,
844
+ "pl7.app/description": "Number of unique clonotypes for this chain (paired cells only). Includes secondary chain rearrangements."
842
845
  }
843
846
  }
844
847
  }]
@@ -846,6 +849,25 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
846
849
  }
847
850
 
848
851
 
852
+ singleCellColumns += [{
853
+ column: "clonotypesDroppedUnpaired",
854
+ id: "clonotypes-dropped-unpaired",
855
+ allowNA: true,
856
+ naRegex: "NaN",
857
+ spec: {
858
+ name: "mixcr.com/reports/singleCell/clonotypesDroppedUnpaired",
859
+ valueType: "Long",
860
+ annotations: {
861
+ "pl7.app/min": "0",
862
+ "pl7.app/table/orderPriority": "108150",
863
+ "pl7.app/table/visibility": "optional",
864
+ "pl7.app/label": "Clonotypes Dropped - Unpaired",
865
+ "pl7.app/description": "Clonotypes discarded because their cell lacked a paired chain."
866
+ }
867
+ }
868
+ }]
869
+
870
+
849
871
  singleCellColumns += [
850
872
 
851
873
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.24.5",
3
+ "version": "3.25.0",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
6
  "@platforma-sdk/workflow-tengo": "5.11.0",
@@ -374,29 +374,29 @@ self.body(func(inputs) {
374
374
  )
375
375
  perChainJoined = perChainJoined.join(chainAgg, { how: "left", on: ["sampleId"] })
376
376
  }
377
- // Single-cell: compute per-sample cell pairing stats (both chains vs one chain)
377
+ // Single-cell: compute per-sample cell pairing stats and paired clonotype counts
378
378
  if isSingleCell && !is_undefined(singleCellChainTsvsData) {
379
- // Expect two chains for receptor; if more, we count any cell having A1 and B1 as both
380
- // Build a map of per-sample cellKey presence per chain
379
+ // Load all single-cell chain TSVs once with (sampleId, cellKey, clonotypeKey, chain)
381
380
  scDfs := []
382
381
  maps.forEach(singleCellChainTsvsData, func(chainName, chainFiles) {
383
382
  maps.forEach(chainFiles.inputs(), func(key, f) {
384
383
  sampleId := json.decode(key)[0]
385
- df := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" } ] })
386
- df2 := df.select(
384
+ df := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" }, { column: "clonotypeKey", type: "String" } ] })
385
+ scDfs = append(scDfs, df.select(
387
386
  pt.lit(sampleId).alias("sampleId"),
388
- pt.col("cellKey").alias("cellKey"),
387
+ pt.col("cellKey"),
388
+ pt.col("clonotypeKey"),
389
389
  pt.lit(chainName).alias("chain")
390
- )
391
- scDfs = append(scDfs, df2)
390
+ ))
392
391
  })
393
392
  })
394
393
  if len(scDfs) > 0 {
395
394
  scAll := len(scDfs) > 1 ? pt.concat(scDfs) : scDfs[0]
395
+
396
396
  // Count cells per sample across all chains (unique cellKey)
397
397
  cellsPerSample := scAll.groupBy("sampleId").agg(pt.col("cellKey").nUnique().alias("scCellsTotal"))
398
398
 
399
- // Cells paired across different chains: require the same cellKey to appear in >1 distinct chains per sample
399
+ // Cells paired across different chains: cellKey appears in >1 distinct chain
400
400
  cellsPerSampleChainCounts := scAll.groupBy("sampleId", "cellKey").agg(pt.col("chain").nUnique().alias("_numChains"))
401
401
  bothChainCells := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).groupBy("sampleId").agg(pt.col("cellKey").count().alias("scCellsBothChains"))
402
402
  pairedKeys := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).select(pt.col("sampleId"), pt.col("cellKey"))
@@ -404,31 +404,20 @@ self.body(func(inputs) {
404
404
  perChainJoined = perChainJoined.join(cellsPerSample, { how: "left", on: ["sampleId"] })
405
405
  perChainJoined = perChainJoined.join(bothChainCells, { how: "left", on: ["sampleId"] })
406
406
 
407
- // Recompute per-chain clonotype counts using only paired cells
407
+ // Filter to paired cells only
408
+ scPaired := scAll.join(pairedKeys, { how: "inner", on: ["sampleId", "cellKey"] })
409
+
410
+ // Per-chain clonotype counts from paired cells
408
411
  perChainPairedJoined := perChainJoined
409
412
  totalPairedParts := []
410
413
  for chain in chains {
411
- chainFiles := singleCellChainTsvsData[chain]
412
414
  chainColPaired := "clonotypesByChain." + chain + ".paired"
413
- if is_undefined(chainFiles) {
414
- perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
415
- continue
416
- }
417
- parts := []
418
- maps.forEach(chainFiles.inputs(), func(key, f) {
419
- sampleId := json.decode(key)[0]
420
- dfc := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" }, { column: "clonotypeKey", type: "String" } ] })
421
- dfc2 := dfc.select(pt.lit(sampleId).alias("sampleId"), pt.col("cellKey"), pt.col("clonotypeKey"))
422
- // join with paired keys for this sample
423
- dfcJoined := dfc2.join(pairedKeys, { how: "inner", on: ["sampleId", "cellKey"] })
424
- parts = append(parts, dfcJoined.select(pt.col("sampleId"), pt.col("clonotypeKey")))
425
- })
426
- if len(parts) == 0 {
415
+ if is_undefined(singleCellChainTsvsData[chain]) {
427
416
  perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
428
417
  continue
429
418
  }
430
- partsDf := len(parts) > 1 ? pt.concat(parts) : parts[0]
431
- chainAggPaired := partsDf.groupBy("sampleId").agg(pt.col("clonotypeKey").nUnique().alias(chainColPaired))
419
+ chainPaired := scPaired.filter(pt.col("chain").eq(chain))
420
+ chainAggPaired := chainPaired.groupBy("sampleId").agg(pt.col("clonotypeKey").nUnique().alias(chainColPaired))
432
421
  perChainPairedJoined = perChainPairedJoined.join(chainAggPaired, { how: "left", on: ["sampleId"] })
433
422
  totalPairedParts = append(totalPairedParts, chainAggPaired.select(pt.col("sampleId"), pt.col(chainColPaired).alias("__pairedPart")))
434
423
  }
@@ -440,6 +429,7 @@ self.body(func(inputs) {
440
429
  } else {
441
430
  perChainJoined = perChainPairedJoined.withColumns(pt.lit(0).alias("exportedClonotypesPaired"))
442
431
  }
432
+
443
433
  } else {
444
434
  perChainJoined = perChainJoined.withColumns(pt.lit(0).alias("scCellsTotal"), pt.lit(0).alias("scCellsBothChains"))
445
435
  }
@@ -457,7 +447,8 @@ self.body(func(inputs) {
457
447
  }
458
448
  finalDf = finalDf.withColumns(
459
449
  pt.col("exportedClonotypesPaired").fillNull(0).cast("Long").alias("totalClonotypes"),
460
- pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
450
+ pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes"),
451
+ pt.col("exportedClonotypes").fillNull(0).cast("Long").minus(pt.col("exportedClonotypesPaired").fillNull(0).cast("Long")).alias("clonotypesDroppedUnpaired")
461
452
  )
462
453
  } else {
463
454
  finalDf = finalDf.withColumns(
@@ -123,6 +123,13 @@ self.body(func(inputs) {
123
123
  alias("chainRank")
124
124
  ).withoutColumns("rawChainRank")
125
125
 
126
+ // Filter out non-productive chains so that cells with a non-productive
127
+ // primary chain will have no entry and get excluded by the downstream
128
+ // clonotypeKeyA1/B1 isNotNull filter.
129
+ dfWithChainRank = dfWithChainRank.filter(
130
+ pt.col(mainIsProductiveColumn).eq("true")
131
+ )
132
+
126
133
  dfWithChainRank.save("output.tsv")
127
134
 
128
135
  // Run the workflow
@@ -615,7 +615,8 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
615
615
  "pl7.app/min": "0",
616
616
  "pl7.app/table/orderPriority": "109000",
617
617
  "pl7.app/table/visibility": "default",
618
- "pl7.app/label": "Total Clonotypes"
618
+ "pl7.app/label": "Total Clonotypes",
619
+ "pl7.app/description": "Sum of unique clonotypes across all chains. In single-cell mode, only clonotypes from paired cells are counted."
619
620
  }
620
621
  }
621
622
  },
@@ -818,7 +819,8 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
818
819
  "pl7.app/min": "0",
819
820
  "pl7.app/table/orderPriority": "107900",
820
821
  "pl7.app/table/visibility": "default",
821
- "pl7.app/label": "Total number of cells (with paired chains)"
822
+ "pl7.app/label": "Total number of cells (with paired chains)",
823
+ "pl7.app/description": "Cells detected with at least two distinct chains (e.g. heavy and light)."
822
824
  }
823
825
  }
824
826
  }]
@@ -838,13 +840,33 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
838
840
  "pl7.app/min": "0",
839
841
  "pl7.app/table/orderPriority": string(n),
840
842
  "pl7.app/table/visibility": "default",
841
- "pl7.app/label": "Clonotypes by Chain " + chain
843
+ "pl7.app/label": "Clonotypes by Chain " + chain,
844
+ "pl7.app/description": "Number of unique clonotypes for this chain (paired cells only). Includes secondary chain rearrangements."
842
845
  }
843
846
  }
844
847
  }]
845
848
  n -= 100
846
849
  }
847
850
 
851
+ // Clonotypes dropped because the cell was unpaired (single-chain only)
852
+ singleCellColumns += [{
853
+ column: "clonotypesDroppedUnpaired",
854
+ id: "clonotypes-dropped-unpaired",
855
+ allowNA: true,
856
+ naRegex: "NaN",
857
+ spec: {
858
+ name: "mixcr.com/reports/singleCell/clonotypesDroppedUnpaired",
859
+ valueType: "Long",
860
+ annotations: {
861
+ "pl7.app/min": "0",
862
+ "pl7.app/table/orderPriority": "108150",
863
+ "pl7.app/table/visibility": "optional",
864
+ "pl7.app/label": "Clonotypes Dropped - Unpaired",
865
+ "pl7.app/description": "Clonotypes discarded because their cell lacked a paired chain."
866
+ }
867
+ }
868
+ }]
869
+
848
870
  // Single Cell columns
849
871
  singleCellColumns += [
850
872
  // {