@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.9.0 → 3.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.9.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
3
+ > @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.10.1 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: test/columns.test.ts
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-clonotyping.workflow
2
2
 
3
+ ## 3.10.1
4
+
5
+ ### Patch Changes
6
+
7
+ - fed5c72: Support parquet format (update SDK)
8
+
9
+ ## 3.10.0
10
+
11
+ ### Minor Changes
12
+
13
+ - d2b6d24: fix number of reads, clonotypes, cells in report table
14
+
3
15
  ## 3.9.0
4
16
 
5
17
  ### Minor Changes
@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
942
942
  })
943
943
  }
944
944
  } ],
945
- storageFormat: "Binary",
945
+ storageFormat: "Parquet",
946
946
  partitionKeyLength: 1
947
947
  }
948
948
  }
@@ -600,6 +600,30 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
600
600
  }
601
601
  }]
602
602
 
603
+
604
+ if !isSingleCell {
605
+ bulkOrder := 107800
606
+ for chain in chains {
607
+ baseColumns = baseColumns + [{
608
+ column: "clonotypesByChain." + chain,
609
+ id: "clonotypes-by-chain-" + chain,
610
+ allowNA: true,
611
+ naRegex: "NaN",
612
+ spec: {
613
+ name: "mixcr.com/reports/bulk/clonotypesByChain/" + chain,
614
+ valueType: "Long",
615
+ annotations: {
616
+ "pl7.app/min": "0",
617
+ "pl7.app/table/orderPriority": string(bulkOrder),
618
+ "pl7.app/table/visibility": "default",
619
+ "pl7.app/label": "Clonotypes by Chain " + chain
620
+ }
621
+ }
622
+ }]
623
+ bulkOrder -= 100
624
+ }
625
+ }
626
+
603
627
  dataWithUmiColumns := [ {
604
628
  column: "refineTags.UMI.outputCount",
605
629
  id: "refine-tags-umi-output-count",
@@ -733,9 +757,51 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
733
757
  }
734
758
  }
735
759
  ]
760
+
761
+ singleCellColumns := []
736
762
 
737
763
 
738
- singleCellColumns := [
764
+ singleCellColumns += [{
765
+ column: "scCellsTotal",
766
+ id: "sc-cells-total",
767
+ allowNA: true,
768
+ naRegex: "NaN",
769
+ spec: {
770
+ name: "mixcr.com/reports/singleCell/pairedCellsTotal",
771
+ valueType: "Long",
772
+ annotations: {
773
+ "pl7.app/min": "0",
774
+ "pl7.app/table/orderPriority": "107900",
775
+ "pl7.app/table/visibility": "default",
776
+ "pl7.app/label": "Total number of cells (with paired chains)"
777
+ }
778
+ }
779
+ }]
780
+
781
+
782
+ n := 107800
783
+ for chain in chains {
784
+ singleCellColumns += [{
785
+ column: "clonotypesByChain." + chain,
786
+ id: "clonotypes-by-chain-" + chain,
787
+ allowNA: true,
788
+ naRegex: "NaN",
789
+ spec: {
790
+ name: "mixcr.com/reports/singleCell/clonotypesByChain/" + chain,
791
+ valueType: "Long",
792
+ annotations: {
793
+ "pl7.app/min": "0",
794
+ "pl7.app/table/orderPriority": string(n),
795
+ "pl7.app/table/visibility": "default",
796
+ "pl7.app/label": "Clonotypes by Chain " + chain
797
+ }
798
+ }
799
+ }]
800
+ n -= 100
801
+ }
802
+
803
+
804
+ singleCellColumns += [
739
805
 
740
806
 
741
807
 
@@ -1287,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
1287
1353
  reportColumnsSpec: {
1288
1354
  axes: axes,
1289
1355
  columns: columns,
1290
- storageFormat: "Binary",
1356
+ storageFormat: "Parquet",
1291
1357
  partitionKeyLength: 0
1292
1358
  }
1293
1359
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
3
- "version": "3.9.0",
3
+ "version": "3.10.1",
4
4
  "description": "Tengo-based template",
5
5
  "dependencies": {
6
- "@platforma-sdk/workflow-tengo": "^5.3.3",
6
+ "@platforma-sdk/workflow-tengo": "^5.4.2",
7
7
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-233-develop"
8
8
  },
9
9
  "devDependencies": {
10
10
  "@milaboratories/ts-configs": "^1.0.6",
11
- "@platforma-sdk/tengo-builder": "^2.3.0",
12
- "@platforma-sdk/test": "^1.44.0",
11
+ "@platforma-sdk/tengo-builder": "^2.3.2",
12
+ "@platforma-sdk/test": "^1.44.19",
13
13
  "vitest": "~2.1.9",
14
14
  "typescript": "~5.6.3"
15
15
  },
@@ -942,7 +942,7 @@ calculateExportSpecs := func(presetSpecForBack, sampleIdAxisSpec, blockId) {
942
942
  })
943
943
  }
944
944
  } ],
945
- storageFormat: "Binary",
945
+ storageFormat: "Parquet",
946
946
  partitionKeyLength: 1
947
947
  }
948
948
  }
@@ -29,10 +29,12 @@ self.body(func(inputs) {
29
29
  chains := inputs.chains
30
30
  library := inputs.library
31
31
  isLibraryFileGzipped := inputs.isLibraryFileGzipped
32
+ clonotypeTablesData := inputs.clonotypeTablesData
32
33
 
33
34
  isSingleCell := len(presetSpecForBack.cellTags) > 0
34
35
  hasUmi := !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
35
36
  cellTags := presetSpecForBack.cellTags
37
+ singleCellChainTsvsData := inputs.singleCellChainTsvsData
36
38
 
37
39
  chainInfos := {
38
40
  "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
@@ -97,9 +99,174 @@ self.body(func(inputs) {
97
99
  pt.col("fileName").strSlice(0, pt.col("fileName").strLenChars().minus(5)).alias("sampleId")
98
100
  )
99
101
 
100
- // For now, let's just use the processed DataFrame without zero column filtering
101
- // since the columns() method is not available in this pTabler version
102
- finalDf := processedDf
102
+ // Calculate real number of exported (productive) clonotypes per sample
103
+ // Use existing clonotype tables produced earlier; per-sample counts and read sums across chains
104
+ countDfs := []
105
+ for chain in chains {
106
+ chainData := clonotypeTablesData[chain]
107
+ if is_undefined(chainData) { continue }
108
+ for key, clonesFile in chainData.inputs() {
109
+ sampleId := json.decode(key)[0]
110
+ dfCountSource := wf.frame(clonesFile, { xsvType: "tsv", inferSchema: false, schema: [ { column: "readCount", type: "Double" } ] })
111
+ dfCount := dfCountSource.select(
112
+ pt.lit(sampleId).alias("sampleId"),
113
+ pt.col("clonotypeKey").count().alias("exportedClonotypes"),
114
+ pt.col("readCount").round().cast("Long").sum().alias("readsUsedInClonotypes")
115
+ )
116
+ countDfs = append(countDfs, dfCount)
117
+ }
118
+ }
119
+
120
+ countsDf := undefined
121
+ if len(countDfs) > 1 { countsDf = pt.concat(countDfs) } else { countsDf = countDfs[0] }
122
+ aggregatedCounts := countsDf.groupBy("sampleId").agg(
123
+ pt.col("exportedClonotypes").sum().alias("exportedClonotypes"),
124
+ pt.col("readsUsedInClonotypes").sum().alias("readsUsedInClonotypesNew")
125
+ )
126
+
127
+ // Join counts and overwrite totalClonotypes to reflect exported (productive) clones
128
+ joinedDf := processedDf.join(aggregatedCounts, { how: "left", on: ["sampleId"] })
129
+
130
+ // Per-chain clonotype counts
131
+ perChainJoined := joinedDf
132
+ for chain in chains {
133
+ chainData := clonotypeTablesData[chain]
134
+ chainCol := "clonotypesByChain." + chain
135
+ if is_undefined(chainData) {
136
+ perChainJoined = perChainJoined.withColumns(pt.lit(0).alias(chainCol))
137
+ continue
138
+ }
139
+ perChainDfs := []
140
+ for key, clonesFile in chainData.inputs() {
141
+ sampleId := json.decode(key)[0]
142
+ dfSrc := wf.frame(clonesFile, { xsvType: "tsv", inferSchema: false })
143
+ dfCnt := dfSrc.select(
144
+ pt.lit(sampleId).alias("sampleId"),
145
+ pt.col("clonotypeKey").count().alias("__chainCount")
146
+ )
147
+ perChainDfs = append(perChainDfs, dfCnt)
148
+ }
149
+ if len(perChainDfs) == 0 {
150
+ perChainJoined = perChainJoined.withColumns(pt.lit(0).alias(chainCol))
151
+ continue
152
+ }
153
+ chainCountsDf := len(perChainDfs) > 1 ? pt.concat(perChainDfs) : perChainDfs[0]
154
+ chainAgg := chainCountsDf.groupBy("sampleId").agg(
155
+ pt.col("__chainCount").sum().alias(chainCol)
156
+ )
157
+ perChainJoined = perChainJoined.join(chainAgg, { how: "left", on: ["sampleId"] })
158
+ }
159
+ // Single-cell: compute per-sample cell pairing stats (both chains vs one chain)
160
+ if isSingleCell && !is_undefined(singleCellChainTsvsData) {
161
+ // Expect two chains for receptor; if more, we count any cell having A1 and B1 as both
162
+ // Build a map of per-sample cellKey presence per chain
163
+ scDfs := []
164
+ maps.forEach(singleCellChainTsvsData, func(chainName, chainFiles) {
165
+ maps.forEach(chainFiles.inputs(), func(key, f) {
166
+ sampleId := json.decode(key)[0]
167
+ df := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" } ] })
168
+ df2 := df.select(
169
+ pt.lit(sampleId).alias("sampleId"),
170
+ pt.col("cellKey").alias("cellKey"),
171
+ pt.lit(chainName).alias("chain")
172
+ )
173
+ scDfs = append(scDfs, df2)
174
+ })
175
+ })
176
+ if len(scDfs) > 0 {
177
+ scAll := len(scDfs) > 1 ? pt.concat(scDfs) : scDfs[0]
178
+ // Count cells per sample across all chains (unique cellKey)
179
+ cellsPerSample := scAll.groupBy("sampleId").agg(pt.col("cellKey").nUnique().alias("scCellsTotal"))
180
+
181
+ // Cells paired across different chains: require the same cellKey to appear in >1 distinct chains per sample
182
+ cellsPerSampleChainCounts := scAll.groupBy("sampleId", "cellKey").agg(pt.col("chain").nUnique().alias("_numChains"))
183
+ bothChainCells := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).groupBy("sampleId").agg(pt.col("cellKey").count().alias("scCellsBothChains"))
184
+ pairedKeys := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).select(pt.col("sampleId"), pt.col("cellKey"))
185
+
186
+ perChainJoined = perChainJoined.join(cellsPerSample, { how: "left", on: ["sampleId"] })
187
+ perChainJoined = perChainJoined.join(bothChainCells, { how: "left", on: ["sampleId"] })
188
+
189
+ // Recompute per-chain clonotype counts using only paired cells
190
+ perChainPairedJoined := perChainJoined
191
+ totalPairedParts := []
192
+ for chain in chains {
193
+ chainFiles := singleCellChainTsvsData[chain]
194
+ chainColPaired := "clonotypesByChain." + chain + ".paired"
195
+ if is_undefined(chainFiles) {
196
+ perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
197
+ continue
198
+ }
199
+ parts := []
200
+ maps.forEach(chainFiles.inputs(), func(key, f) {
201
+ sampleId := json.decode(key)[0]
202
+ dfc := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" }, { column: "clonotypeKey", type: "String" } ] })
203
+ dfc2 := dfc.select(pt.lit(sampleId).alias("sampleId"), pt.col("cellKey"), pt.col("clonotypeKey"))
204
+ // join with paired keys for this sample
205
+ dfcJoined := dfc2.join(pairedKeys, { how: "inner", on: ["sampleId", "cellKey"] })
206
+ parts = append(parts, dfcJoined.select(pt.col("sampleId"), pt.col("clonotypeKey")))
207
+ })
208
+ if len(parts) == 0 {
209
+ perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
210
+ continue
211
+ }
212
+ partsDf := len(parts) > 1 ? pt.concat(parts) : parts[0]
213
+ chainAggPaired := partsDf.groupBy("sampleId").agg(pt.col("clonotypeKey").nUnique().alias(chainColPaired))
214
+ perChainPairedJoined = perChainPairedJoined.join(chainAggPaired, { how: "left", on: ["sampleId"] })
215
+ totalPairedParts = append(totalPairedParts, chainAggPaired.select(pt.col("sampleId"), pt.col(chainColPaired).alias("__pairedPart")))
216
+ }
217
+
218
+ if len(totalPairedParts) > 0 {
219
+ totalPairedDf := len(totalPairedParts) > 1 ? pt.concat(totalPairedParts) : totalPairedParts[0]
220
+ totalPairedAgg := totalPairedDf.groupBy("sampleId").agg(pt.col("__pairedPart").sum().alias("exportedClonotypesPaired"))
221
+ perChainJoined = perChainPairedJoined.join(totalPairedAgg, { how: "left", on: ["sampleId"] })
222
+ } else {
223
+ perChainJoined = perChainPairedJoined.withColumns(pt.lit(0).alias("exportedClonotypesPaired"))
224
+ }
225
+ } else {
226
+ perChainJoined = perChainJoined.withColumns(pt.lit(0).alias("scCellsTotal"), pt.lit(0).alias("scCellsBothChains"))
227
+ }
228
+ }
229
+
230
+ // Finalize: cast/fill totals and per-chain counts
231
+ finalDf := perChainJoined
232
+ if isSingleCell {
233
+ // Use paired-only totals and per-chain counts
234
+ // Replace per-chain columns from ".paired" variants and set total from exportedClonotypesPaired
235
+ for chain in chains {
236
+ colPaired := "clonotypesByChain." + chain + ".paired"
237
+ col := "clonotypesByChain." + chain
238
+ finalDf = finalDf.withColumns(pt.col(colPaired).fillNull(0).cast("Long").alias(col))
239
+ }
240
+ finalDf = finalDf.withColumns(
241
+ pt.col("exportedClonotypesPaired").fillNull(0).cast("Long").alias("totalClonotypes"),
242
+ pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
243
+ )
244
+ } else {
245
+ finalDf = finalDf.withColumns(
246
+ pt.col("exportedClonotypes").fillNull(0).cast("Long").alias("totalClonotypes"),
247
+ pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
248
+ )
249
+ for chain in chains {
250
+ col := "clonotypesByChain." + chain
251
+ finalDf = finalDf.withColumns(
252
+ pt.col(col).fillNull(0).cast("Long").alias(col)
253
+ )
254
+ }
255
+ }
256
+
257
+ for chain in chains {
258
+ col := "clonotypesByChain." + chain
259
+ finalDf = finalDf.withColumns(
260
+ pt.col(col).fillNull(0).cast("Long").alias(col)
261
+ )
262
+ }
263
+
264
+ if isSingleCell {
265
+ // Keep only paired cells count and name it scCellsTotal
266
+ finalDf = finalDf.withColumns(
267
+ pt.col("scCellsBothChains").fillNull(0).cast("Long").alias("scCellsTotal")
268
+ )
269
+ }
103
270
 
104
271
  // Save the final DataFrame back to TSV
105
272
  finalDf.save("qc-report-processed.tsv", {
@@ -111,7 +278,7 @@ self.body(func(inputs) {
111
278
 
112
279
  tsvFile := wfResult.getFile("qc-report-processed.tsv")
113
280
 
114
- qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chainsForMixcr, cellTags)
281
+ qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags)
115
282
  reportColumnsSpec := qcReportColumns.reportColumnsSpec
116
283
 
117
284
  qcReportTable := xsv.importFile(
@@ -319,7 +319,9 @@ self.body(func(inputs) {
319
319
  return schema
320
320
  }
321
321
 
322
- clonotypeTables := pframes.pFrameBuilder()
322
+ clonotypeTables := pframes.pFrameBuilder()
323
+ singleCellChainTsvs := {}
324
+ clonotypeTablesData := {}
323
325
  resultsToCache := {}
324
326
 
325
327
  for chain in chains {
@@ -353,7 +355,7 @@ self.body(func(inputs) {
353
355
  settings: {
354
356
  axes: [ axisByClonotypeKeyGen(chain) ],
355
357
  columns: columnsSpecPerSample,
356
- storageFormat: "Binary",
358
+ storageFormat: "Parquet",
357
359
  partitionKeyLength: 0
358
360
  },
359
361
  mem: "16GiB",
@@ -446,7 +448,7 @@ self.body(func(inputs) {
446
448
  settings: {
447
449
  axes: [ axisByClonotypeKeyGen(chain) ],
448
450
  columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
449
- storageFormat: "Binary",
451
+ storageFormat: "Parquet",
450
452
  partitionKeyLength: 0
451
453
  },
452
454
  mem: "24GiB",
@@ -456,8 +458,9 @@ self.body(func(inputs) {
456
458
  } ]
457
459
  }
458
460
 
459
- resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
460
- clonotypeTables.add(chain, exportResults.outputSpec("clonotypeTable"), exportResults.outputData("clonotypeTable"))
461
+ resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
462
+ clonotypeTablesData[chain] = exportResults.outputData("clonotypeTable")
463
+ clonotypeTables.add(chain, exportResults.outputSpec("clonotypeTable"), exportResults.outputData("clonotypeTable"))
461
464
 
462
465
  aggregateByCloneKey := pframes.processColumn(
463
466
  exportResults.output("clonotypeTable"),
@@ -485,7 +488,7 @@ self.body(func(inputs) {
485
488
  }
486
489
  )
487
490
 
488
- if isSingleCell {
491
+ if isSingleCell {
489
492
  // collecting results for future single cell processing
490
493
  perChainResults[chain] = {
491
494
  tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
@@ -495,6 +498,9 @@ self.body(func(inputs) {
495
498
  // caching intermediate results until the block is removed
496
499
  resultsToCache["clonotypeTableForSingleCell/" + chain] = exportResults.outputData("clonotypeTableForSingleCell")
497
500
  resultsToCache["clonotypeProperties/" + chain] = aggregateByCloneKey.outputData("clonotypeProperties")
501
+
502
+ // collect per-chain single-cell TSVs for QC report stats
503
+ singleCellChainTsvs[chain] = exportResults.outputData("clonotypeTableForSingleCell")
498
504
  } else {
499
505
  // only adding data outputs if we are in bulk mode
500
506
  exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
@@ -527,7 +533,7 @@ self.body(func(inputs) {
527
533
  spec: sampleIdAxisSpec
528
534
  }, axisByScClonotypeKeyGen(receptor) ],
529
535
  columns: columnsSpecPerSampleSc,
530
- storageFormat: "Binary",
536
+ storageFormat: "Parquet",
531
537
  partitionKeyLength: 1
532
538
  },
533
539
  mem: "16GiB",
@@ -540,7 +546,7 @@ self.body(func(inputs) {
540
546
  settings: {
541
547
  axes: [ axisByScClonotypeKeyGen(receptor) ],
542
548
  columns: columnsSpecPerClonotypeSc,
543
- storageFormat: "Binary"
549
+ storageFormat: "Parquet"
544
550
  },
545
551
  mem: "12GiB",
546
552
  cpu: 2,
@@ -609,7 +615,7 @@ self.body(func(inputs) {
609
615
  annotations: annotationsTransformation
610
616
  }
611
617
  }),
612
- storageFormat: "Binary",
618
+ storageFormat: "Parquet",
613
619
  partitionKeyLength: 0
614
620
  },
615
621
  mem: "24GiB",
@@ -693,13 +699,15 @@ self.body(func(inputs) {
693
699
  }
694
700
  }
695
701
 
696
- qcReportTable := render.create(exportReportTpl, {
702
+ qcReportTable := render.create(exportReportTpl, {
697
703
  clnsData: mixcrResults.outputData("clns"),
698
704
  presetSpecForBack: presetSpecForBack,
699
705
  sampleIdAxisSpec: sampleIdAxisSpec,
700
- chains: chains,
706
+ chains: chains,
701
707
  library: library,
702
- isLibraryFileGzipped: isLibraryFileGzipped
708
+ isLibraryFileGzipped: isLibraryFileGzipped,
709
+ clonotypeTablesData: clonotypeTablesData,
710
+ singleCellChainTsvsData: singleCellChainTsvs
703
711
  })
704
712
 
705
713
  return {
@@ -600,6 +600,30 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
600
600
  }
601
601
  }]
602
602
 
603
+ // Add per-chain clonotype counts for bulk datasets (avoid duplicates in single-cell)
604
+ if !isSingleCell {
605
+ bulkOrder := 107800
606
+ for chain in chains {
607
+ baseColumns = baseColumns + [{
608
+ column: "clonotypesByChain." + chain,
609
+ id: "clonotypes-by-chain-" + chain,
610
+ allowNA: true,
611
+ naRegex: "NaN",
612
+ spec: {
613
+ name: "mixcr.com/reports/bulk/clonotypesByChain/" + chain,
614
+ valueType: "Long",
615
+ annotations: {
616
+ "pl7.app/min": "0",
617
+ "pl7.app/table/orderPriority": string(bulkOrder),
618
+ "pl7.app/table/visibility": "default",
619
+ "pl7.app/label": "Clonotypes by Chain " + chain
620
+ }
621
+ }
622
+ }]
623
+ bulkOrder -= 100
624
+ }
625
+ }
626
+
603
627
  dataWithUmiColumns := [ {
604
628
  column: "refineTags.UMI.outputCount",
605
629
  id: "refine-tags-umi-output-count",
@@ -733,9 +757,51 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
733
757
  }
734
758
  }
735
759
  ]
760
+
761
+ singleCellColumns := []
736
762
 
763
+ // Paired cells total (single-cell): number of cells with distinct chains per sample
764
+ singleCellColumns += [{
765
+ column: "scCellsTotal",
766
+ id: "sc-cells-total",
767
+ allowNA: true,
768
+ naRegex: "NaN",
769
+ spec: {
770
+ name: "mixcr.com/reports/singleCell/pairedCellsTotal",
771
+ valueType: "Long",
772
+ annotations: {
773
+ "pl7.app/min": "0",
774
+ "pl7.app/table/orderPriority": "107900",
775
+ "pl7.app/table/visibility": "default",
776
+ "pl7.app/label": "Total number of cells (with paired chains)"
777
+ }
778
+ }
779
+ }]
780
+
781
+ // Per-chain clonotype counts (paired-only in single-cell), one column per available chain
782
+ n := 107800
783
+ for chain in chains {
784
+ singleCellColumns += [{
785
+ column: "clonotypesByChain." + chain,
786
+ id: "clonotypes-by-chain-" + chain,
787
+ allowNA: true,
788
+ naRegex: "NaN",
789
+ spec: {
790
+ name: "mixcr.com/reports/singleCell/clonotypesByChain/" + chain,
791
+ valueType: "Long",
792
+ annotations: {
793
+ "pl7.app/min": "0",
794
+ "pl7.app/table/orderPriority": string(n),
795
+ "pl7.app/table/visibility": "default",
796
+ "pl7.app/label": "Clonotypes by Chain " + chain
797
+ }
798
+ }
799
+ }]
800
+ n -= 100
801
+ }
802
+
737
803
  // Single Cell columns
738
- singleCellColumns := [
804
+ singleCellColumns += [
739
805
  // {
740
806
  // column: "align.readsWithChain.IGH",
741
807
  // id: "align-reads-with-chain-igh",
@@ -1287,7 +1353,7 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
1287
1353
  reportColumnsSpec: {
1288
1354
  axes: axes,
1289
1355
  columns: columns,
1290
- storageFormat: "Binary",
1356
+ storageFormat: "Parquet",
1291
1357
  partitionKeyLength: 0
1292
1358
  }
1293
1359
  }