@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  self := import("@platforma-sdk/workflow-tengo:tpl")
4
4
 
5
5
  ll := import("@platforma-sdk/workflow-tengo:ll")
6
+ render := import("@platforma-sdk/workflow-tengo:render")
6
7
  assets := import("@platforma-sdk/workflow-tengo:assets")
7
8
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
8
9
  smart := import("@platforma-sdk/workflow-tengo:smart")
@@ -23,6 +24,8 @@ mixcrAnalyzeTpl := assets.importTemplate(":mixcr-analyze")
23
24
  mixcrExportTpl := assets.importTemplate(":mixcr-export")
24
25
  aggregateByClonotypeKeyTpl := assets.importTemplate(":aggregate-by-clonotype-key")
25
26
 
27
+ processSingleCellTpl := assets.importTemplate(":process-single-cell")
28
+
26
29
  self.awaitState("InputsLocked")
27
30
  self.awaitState("params", "ResourceReady")
28
31
  self.awaitState("inputSpec", "ResourceReady")
@@ -30,19 +33,25 @@ self.awaitState("presetSpecForBack", "ResourceReady")
30
33
  self.awaitState("presetContent", "ResourceReady")
31
34
 
32
35
  chainInfos := {
33
- "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy" },
34
- "IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light" },
35
- "TRAlpha": { mixcrFilter: "TRA", name: "TR Alpha" },
36
- "TRBeta": { mixcrFilter: "TRB", name: "TR Beta" },
37
- "TRGamma": { mixcrFilter: "TRD", name: "TR Gamma" },
38
- "TRDelta": { mixcrFilter: "TRG", name: "TR Delta" }
36
+ "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
37
+ "IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light", shortName: "Light" },
38
+ "TCRAlpha": { mixcrFilter: "TRA", name: "TCR Alpha", shortName: "Alpha" },
39
+ "TCRBeta": { mixcrFilter: "TRB", name: "TCR Beta", shortName: "Beta" },
40
+ "TCRGamma": { mixcrFilter: "TRD", name: "TCR Gamma", shortName: "Gamma" },
41
+ "TCRDelta": { mixcrFilter: "TRG", name: "TCR Delta", shortName: "Delta" }
39
42
  }
40
43
 
41
- // TODO better naming
42
- receptorInfo := {
44
+ // Chain with higher diversity go first
45
+ receptorInfos := {
43
46
  "IG": { chains: ["IGHeavy", "IGLight"], name: "IG" },
44
- "TRAB": { chains: ["TRAlpha", "TRBeta"], name: "TRAB" },
45
- "TRGD": { chains: ["TRGamma", "TRDelta"], name: "TRGD" }
47
+ "TCRAB": { chains: ["TCRBeta", "TCRAlpha"], name: "TCR Alpha/Beta" }, // TODO αβ ?
48
+ "TCRGD": { chains: ["TCRDelta", "TCRGamma"], name: "TCR Gamma/Delta" } // TODO γδ ?
49
+ }
50
+
51
+ transformSpecs := func(columns, additionalSpec) {
52
+ return slices.map(columns, func(columnSpec) {
53
+ return maps.deepTransform(columnSpec, additionalSpec)
54
+ })
46
55
  }
47
56
 
48
57
  self.body(func(inputs) {
@@ -56,7 +65,8 @@ self.body(func(inputs) {
56
65
 
57
66
  params := inputs.params
58
67
  species := params.species
59
- chains := params.chains
68
+ // can be either receptors or chains
69
+ receptorsOrChains := params.receptorsOrChains
60
70
  limitInput := params.limitInput
61
71
  blockId := params.blockId
62
72
  presetCommonName := params.presetCommonName
@@ -88,6 +98,7 @@ self.body(func(inputs) {
88
98
  hasAssembleCells = true
89
99
  }
90
100
  }
101
+ isSingleCell := len(presetSpecForBack.cellTags) > 0
91
102
 
92
103
  // calculating clns annotations
93
104
 
@@ -191,13 +202,17 @@ self.body(func(inputs) {
191
202
  exportSpecs := calculateExportSpecs(presetSpecForBack, blockId)
192
203
 
193
204
  columnsSpecPerSample := exportSpecs.columnsSpecPerSample
205
+ columnsSpecPerSampleSc := exportSpecs.columnsSpecPerSampleSc
194
206
  columnsSpecPerClonotype := exportSpecs.columnsSpecPerClonotype
195
207
  columnsSpec := exportSpecs.columnsSpec
196
208
 
197
209
  clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
210
+ clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
211
+ cellTagColumns := exportSpecs.cellTagColumns
198
212
 
199
- axesByClonotypeId := exportSpecs.axesByClonotypeId
213
+ // axesByClonotypeId := exportSpecs.axesByClonotypeId
200
214
  axesByClonotypeKey := exportSpecs.axesByClonotypeKey
215
+ axesByScClonotypeKey := exportSpecs.axesByScClonotypeKey
201
216
 
202
217
  exportArgs := exportSpecs.exportArgs
203
218
 
@@ -242,23 +257,33 @@ self.body(func(inputs) {
242
257
 
243
258
  clonotypes := pframes.pFrameBuilder()
244
259
 
260
+ chains := []
261
+ receptors := []
262
+
263
+ for chainOrReceptor in receptorsOrChains {
264
+ if receptorInfos[chainOrReceptor] != undefined {
265
+ receptors += [chainOrReceptor]
266
+ chains += receptorInfos[chainOrReceptor].chains
267
+ } else {
268
+ if is_undefined(chainInfos[chainOrReceptor]) {
269
+ ll.panic("chainInfo not found for %v", chainOrReceptor)
270
+ }
271
+ if len(receptors) > 0 {
272
+ ll.panic("mixing receptors and chains is not allowed")
273
+ }
274
+ chains += [chainOrReceptor]
275
+ }
276
+ }
277
+
278
+ perChainResults := {}
279
+
245
280
  for chain in chains {
246
281
  chainInfo := chainInfos[chain]
247
282
  ll.assert(!is_undefined(chainInfo), "chainInfo not found for chain %v", chain)
248
- additionalSpec := {
249
- spec: {
250
- domain: {
251
- "pl7.app/chain": chain
252
- }
253
- }
254
- }
255
- columnSpecsTransform := func(specs) {
256
- return slices.map(specs, func(spec) {
257
- return maps.deepMerge(spec, additionalSpec)
258
- })
259
- }
260
283
 
261
- traceSteps := [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}]
284
+ //
285
+ // Exporting CLNS -> TSV
286
+ //
262
287
 
263
288
  exportOutputs := [ {
264
289
  type: "Resource",
@@ -274,13 +299,16 @@ self.body(func(inputs) {
274
299
  path: ["tsv"]
275
300
  } ]
276
301
 
277
- if !is_undefined(axesByClonotypeKey) {
302
+ if !is_undefined(axesByClonotypeKey) && !isSingleCell {
303
+ // only adding data outputs if we are in bulk mode
278
304
  exportOutputs += [ {
279
305
  type: "Xsv",
280
306
  xsvType: "tsv",
281
307
  settings: {
282
308
  axes: axesByClonotypeKey,
283
- columns: columnSpecsTransform(columnsSpecPerSample),
309
+ columns: transformSpecs(columnsSpecPerSample, {
310
+ spec: { domain: { "pl7.app/vdj/chain": chain } }
311
+ }),
284
312
  storageFormat: "Binary",
285
313
  partitionKeyLength: 0
286
314
  },
@@ -289,39 +317,80 @@ self.body(func(inputs) {
289
317
  } ]
290
318
  }
291
319
 
320
+ if isSingleCell {
321
+ exportOutputs += [ {
322
+ type: "Resource",
323
+ spec: {
324
+ kind: "PColumn",
325
+ name: "mixcr.com/clonotypeTableForSingleCell",
326
+ domain: {
327
+ "pl7.app/vdj/clonotypingRunId": blockId
328
+ },
329
+ valueType: "File"
330
+ },
331
+ name: "clonotypeTableForSingleCell",
332
+ path: ["tsvForSingleCell"]
333
+ } ]
334
+ }
335
+
292
336
  exportResults := pframes.processColumn(
293
337
  clnsFiles,
294
338
  mixcrExportTpl,
295
339
  exportOutputs,
296
340
  {
297
341
  // will be automatically propagated to all output specs
298
- traceSteps: traceSteps,
342
+ traceSteps: [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}],
299
343
 
300
344
  extra: {
301
345
  params: {
302
346
  chains: chainInfo.mixcrFilter,
303
347
  clonotypeKeyColumns: clonotypeKeyColumns,
348
+ clonotypeKeyArgs: clonotypeKeyArgs,
349
+ cellTagColumns: cellTagColumns,
304
350
  exportArgs: exportArgs
305
351
  }
306
352
  }
307
353
  }
308
354
  )
309
355
 
310
- exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
356
+ //
357
+ // Aggregating the data to produce:
358
+ // [clonotypeKey] -> properties
359
+ // from:
360
+ // [sampleId, clonotypeKey] -> properties
361
+ //
311
362
 
312
363
  aggregationOutputs := [ {
313
- type: "Xsv",
314
- xsvType: "tsv",
315
- settings: {
316
- axes: axesByClonotypeKey,
317
- columns: columnSpecsTransform(columnsSpecPerClonotype),
318
- storageFormat: "Binary",
319
- partitionKeyLength: 0
364
+ type: "Resource",
365
+ spec: {
366
+ kind: "PColumn",
367
+ name: "mixcr.com/clonotypeProperties",
368
+ domain: {
369
+ "pl7.app/vdj/clonotypingRunId": blockId
370
+ },
371
+ valueType: "File"
320
372
  },
321
- name: "byCloneKey",
373
+ name: "clonotypeProperties",
322
374
  path: ["tsv"]
323
375
  } ]
324
376
 
377
+ if !isSingleCell {
378
+ aggregationOutputs += [ {
379
+ type: "Xsv",
380
+ xsvType: "tsv",
381
+ settings: {
382
+ axes: axesByClonotypeKey,
383
+ columns: transformSpecs(columnsSpecPerClonotype, {
384
+ spec: { domain: { "pl7.app/vdj/chain": chain } }
385
+ }),
386
+ storageFormat: "Binary",
387
+ partitionKeyLength: 0
388
+ },
389
+ name: "byCloneKey",
390
+ path: ["tsv"]
391
+ } ]
392
+ }
393
+
325
394
  aggregateByCloneKey := pframes.processColumn(
326
395
  exportResults.output("clonotypeTable"),
327
396
  aggregateByClonotypeKeyTpl,
@@ -339,7 +408,134 @@ self.body(func(inputs) {
339
408
  }
340
409
  )
341
410
 
342
- aggregateByCloneKey.addXsvOutputToBuilder(clonotypes, "byCloneKey", "byCloneKey/" + chain + "/")
411
+ if isSingleCell {
412
+ // collecting results for possible future single cell processing
413
+ perChainResults[chain] = {
414
+ tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
415
+ clonotypeProperties: aggregateByCloneKey.output("clonotypeProperties")
416
+ }
417
+ } else {
418
+ // only adding data outputs if we are in bulk mode
419
+ exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
420
+ aggregateByCloneKey.addXsvOutputToBuilder(clonotypes, "byCloneKey", "byCloneKey/" + chain + "/")
421
+ }
422
+ }
423
+
424
+ if isSingleCell {
425
+ for receptor in receptors {
426
+ receptorInfo := receptorInfos[receptor]
427
+
428
+ singleCellOutputs := [ {
429
+ type: "Resource",
430
+ spec: {
431
+ kind: "PColumn",
432
+ name: "mixcr.com/singleCellAbundanceTable",
433
+ domain: {
434
+ "pl7.app/vdj/clonotypingRunId": blockId
435
+ },
436
+ valueType: "File"
437
+ },
438
+ name: "abundanceTsv",
439
+ path: ["abundanceTsv"]
440
+ }, {
441
+ type: "Xsv",
442
+ xsvType: "tsv",
443
+ settings: {
444
+ axes: [ {
445
+ column: "sampleId",
446
+ spec: inputSpec.axesSpec[0]
447
+ } ] + axesByScClonotypeKey,
448
+ columns: transformSpecs(columnsSpecPerSampleSc, {
449
+ spec: { domain: { "pl7.app/vdj/receptor": receptor } }
450
+ }),
451
+ storageFormat: "Binary",
452
+ partitionKeyLength: 1
453
+ },
454
+ name: "abundanceTable",
455
+ path: ["abundanceTsv"]
456
+ } ]
457
+
458
+ for chainIdx in [0, 1] {
459
+ // "A" chain is always the one that is more diverse
460
+ chainLetterU := ["A", "B"][chainIdx]
461
+ chainNameU := chainInfos[receptorInfo.chains[chainIdx]].name
462
+ chainNameL := text.to_lower(chainNameU)
463
+
464
+ for isPrimary in [true, false] {
465
+ pPrefixU := isPrimary ? "Primary" : "Secondary"
466
+ pPrefixL := text.to_lower(pPrefixU)
467
+
468
+ // i.e. propertiesAPrimary
469
+ propertiesTableName := "properties" + chainLetterU + pPrefixU
470
+ // i.e. propertiesAPrimaryTsv
471
+ propertiesTsvOutputName := propertiesTableName + "Tsv"
472
+
473
+ singleCellOutputs += [ {
474
+ type: "Resource",
475
+ spec: {
476
+ kind: "PColumn",
477
+ name: "mixcr.com/scClonotypeTable/" + chainNameL + pPrefixU,
478
+ domain: {
479
+ "pl7.app/vdj/clonotypingRunId": blockId
480
+ },
481
+ valueType: "File"
482
+ },
483
+ name: propertiesTsvOutputName,
484
+ path: [ propertiesTsvOutputName ]
485
+ }, {
486
+ type: "Xsv",
487
+ xsvType: "tsv",
488
+ settings: {
489
+ axes: axesByScClonotypeKey,
490
+ columns: transformSpecs(columnsSpecPerClonotype, {
491
+ spec: {
492
+ domain: {
493
+ "pl7.app/vdj/receptor": receptor,
494
+ "pl7.app/vdj/scClonotypeChain": chainLetterU
495
+ },
496
+ annotations: {
497
+ "pl7.app/label": func(label) {
498
+ return pPrefixU + " " + chainNameU + " " + label
499
+ }
500
+ }
501
+ }
502
+ }),
503
+ storageFormat: "Binary",
504
+ partitionKeyLength: 0
505
+ },
506
+ name: propertiesTableName,
507
+ path: [ propertiesTsvOutputName ]
508
+ } ]
509
+ }
510
+ }
511
+
512
+ chainA := receptorInfo.chains[0]
513
+ chainB := receptorInfo.chains[1]
514
+
515
+ // Using A chain files as main PColumn for xsv conversion through pframes.processColumn.
516
+ // Since we aggregate by sample, this is just a single pass through the data.
517
+
518
+ singleCellResult := pframes.processColumn(
519
+ perChainResults[chainA].tsvForSingleCell,
520
+ processSingleCellTpl,
521
+ singleCellOutputs,
522
+ {
523
+ aggregate: ["pl7.app/sampleId"],
524
+ extra: {
525
+ byCellTagB: perChainResults[chainB].tsvForSingleCell.data,
526
+ propertiesA: perChainResults[chainA].clonotypeProperties.data,
527
+ propertiesB: perChainResults[chainB].clonotypeProperties.data
528
+ }
529
+ }
530
+ )
531
+
532
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
533
+
534
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesAPrimary", "clonotypeProperties/" + receptor + "/aPrimary/")
535
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesASecondary", "clonotypeProperties/" + receptor + "/aSecondary/")
536
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBPrimary", "clonotypeProperties/" + receptor + "/bPrimary/")
537
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBSecondary", "clonotypeProperties/" + receptor + "/bSecondary/")
538
+ }
343
539
  }
344
540
 
345
541
  return {
@@ -27,8 +27,8 @@ const testCases: TestCase[] = [
27
27
  preset: 'milab-human-dna-xcr-7genes-multiplex',
28
28
  check: (expect, config) => {
29
29
  // console.dir(config, { depth: 5 });
30
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
31
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
30
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
31
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
32
32
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
33
33
  expect(config.columnsSpec.find((c: any) => c.column === 'readFraction')).toBeDefined();
34
34
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -40,10 +40,10 @@ const testCases: TestCase[] = [
40
40
  species: 'human',
41
41
  check: (expect, config) => {
42
42
  // console.dir(config, { depth: 5 });
43
- expect(config.axesByClonotypeId).to.have.lengthOf(2);
44
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'tagValueCELL')).toBeDefined();
45
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
46
- expect(config.columnsSpec.find((c: any) => c.column === 'cellGroup')).toBeDefined();
43
+ // expect(config.axesByClonotypeId).to.have.lengthOf(2);
44
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'tagValueCELL')).toBeDefined();
45
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
46
+ // expect(config.columnsSpec.find((c: any) => c.column === 'cellGroup')).toBeDefined();
47
47
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
48
48
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
49
49
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqFR1')).toBeDefined();
@@ -67,8 +67,8 @@ const testCases: TestCase[] = [
67
67
  preset: 'cellecta-human-rna-xcr-umi-drivermap-air',
68
68
  check: (expect, config) => {
69
69
  // console.dir(config, { depth: 5 });
70
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
71
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
70
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
71
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
72
72
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
73
73
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
74
74
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -79,8 +79,8 @@ const testCases: TestCase[] = [
79
79
  preset: 'takara-human-rna-bcr-umi-smartseq',
80
80
  check: (expect, config) => {
81
81
  // console.dir(config, { depth: 5 });
82
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
83
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
82
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
83
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
84
84
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
85
85
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
86
86
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqFR1')).toBeDefined();
@@ -105,8 +105,8 @@ const testCases: TestCase[] = [
105
105
  species: 'human',
106
106
  check: (expect, config) => {
107
107
  // console.dir(config, { depth: 5 });
108
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
109
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
108
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
109
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
110
110
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
111
111
  expect(config.columnsSpec.find((c: any) => c.column === 'readFraction')).toBeDefined();
112
112
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -118,7 +118,7 @@ const testCases: TestCase[] = [
118
118
  species: 'human',
119
119
  check: (expect, config) => {
120
120
  // console.dir(config, { depth: 5 });
121
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
121
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
122
122
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
123
123
  }
124
124
  }