@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.2.2 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,8 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
5
5
  ll := import("@platforma-sdk/workflow-tengo:ll")
6
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
7
7
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
8
- smart := import("@platforma-sdk/workflow-tengo:smart")
9
8
  slices := import("@platforma-sdk/workflow-tengo:slices")
10
9
  maps := import("@platforma-sdk/workflow-tengo:maps")
11
- file := import("@platforma-sdk/workflow-tengo:file")
12
- llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
13
- pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
14
- pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
15
- pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
16
10
 
17
11
  calculateExportSpecs := import(":calculate-export-specs")
18
12
 
@@ -23,6 +17,8 @@ mixcrAnalyzeTpl := assets.importTemplate(":mixcr-analyze")
23
17
  mixcrExportTpl := assets.importTemplate(":mixcr-export")
24
18
  aggregateByClonotypeKeyTpl := assets.importTemplate(":aggregate-by-clonotype-key")
25
19
 
20
+ processSingleCellTpl := assets.importTemplate(":process-single-cell")
21
+
26
22
  self.awaitState("InputsLocked")
27
23
  self.awaitState("params", "ResourceReady")
28
24
  self.awaitState("inputSpec", "ResourceReady")
@@ -30,19 +26,25 @@ self.awaitState("presetSpecForBack", "ResourceReady")
30
26
  self.awaitState("presetContent", "ResourceReady")
31
27
 
32
28
  chainInfos := {
33
- "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy" },
34
- "IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light" },
35
- "TRAlpha": { mixcrFilter: "TRA", name: "TR Alpha" },
36
- "TRBeta": { mixcrFilter: "TRB", name: "TR Beta" },
37
- "TRGamma": { mixcrFilter: "TRD", name: "TR Gamma" },
38
- "TRDelta": { mixcrFilter: "TRG", name: "TR Delta" }
29
+ "IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
30
+ "IGLight": { mixcrFilter: "IGK,IGL", name: "IG Light", shortName: "Light" },
31
+ "TCRAlpha": { mixcrFilter: "TRA", name: "TCR Alpha", shortName: "Alpha" },
32
+ "TCRBeta": { mixcrFilter: "TRB", name: "TCR Beta", shortName: "Beta" },
33
+ "TCRGamma": { mixcrFilter: "TRD", name: "TCR Gamma", shortName: "Gamma" },
34
+ "TCRDelta": { mixcrFilter: "TRG", name: "TCR Delta", shortName: "Delta" }
39
35
  }
40
36
 
41
- // TODO better naming
42
- receptorInfo := {
37
+ // Chain with higher diversity go first
38
+ receptorInfos := {
43
39
  "IG": { chains: ["IGHeavy", "IGLight"], name: "IG" },
44
- "TRAB": { chains: ["TRAlpha", "TRBeta"], name: "TRAB" },
45
- "TRGD": { chains: ["TRGamma", "TRDelta"], name: "TRGD" }
40
+ "TCRAB": { chains: ["TCRBeta", "TCRAlpha"], name: "TCR Alpha/Beta" }, // TODO αβ ?
41
+ "TCRGD": { chains: ["TCRDelta", "TCRGamma"], name: "TCR Gamma/Delta" } // TODO γδ ?
42
+ }
43
+
44
+ transformSpecs := func(columns, additionalSpec) {
45
+ return slices.map(columns, func(columnSpec) {
46
+ return maps.deepTransform(columnSpec, additionalSpec)
47
+ })
46
48
  }
47
49
 
48
50
  self.body(func(inputs) {
@@ -56,7 +58,8 @@ self.body(func(inputs) {
56
58
 
57
59
  params := inputs.params
58
60
  species := params.species
59
- chains := params.chains
61
+ // can be either receptors or chains
62
+ receptorsOrChains := params.receptorsOrChains
60
63
  limitInput := params.limitInput
61
64
  blockId := params.blockId
62
65
  presetCommonName := params.presetCommonName
@@ -88,6 +91,7 @@ self.body(func(inputs) {
88
91
  hasAssembleCells = true
89
92
  }
90
93
  }
94
+ isSingleCell := len(presetSpecForBack.cellTags) > 0
91
95
 
92
96
  // calculating clns annotations
93
97
 
@@ -191,13 +195,17 @@ self.body(func(inputs) {
191
195
  exportSpecs := calculateExportSpecs(presetSpecForBack, blockId)
192
196
 
193
197
  columnsSpecPerSample := exportSpecs.columnsSpecPerSample
198
+ columnsSpecPerSampleSc := exportSpecs.columnsSpecPerSampleSc
194
199
  columnsSpecPerClonotype := exportSpecs.columnsSpecPerClonotype
195
200
  columnsSpec := exportSpecs.columnsSpec
196
201
 
197
202
  clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
203
+ clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
204
+ cellTagColumns := exportSpecs.cellTagColumns
198
205
 
199
- axesByClonotypeId := exportSpecs.axesByClonotypeId
206
+ // axesByClonotypeId := exportSpecs.axesByClonotypeId
200
207
  axesByClonotypeKey := exportSpecs.axesByClonotypeKey
208
+ axesByScClonotypeKey := exportSpecs.axesByScClonotypeKey
201
209
 
202
210
  exportArgs := exportSpecs.exportArgs
203
211
 
@@ -242,23 +250,33 @@ self.body(func(inputs) {
242
250
 
243
251
  clonotypes := pframes.pFrameBuilder()
244
252
 
253
+ chains := []
254
+ receptors := []
255
+
256
+ for chainOrReceptor in receptorsOrChains {
257
+ if receptorInfos[chainOrReceptor] != undefined {
258
+ receptors += [chainOrReceptor]
259
+ chains += receptorInfos[chainOrReceptor].chains
260
+ } else {
261
+ if is_undefined(chainInfos[chainOrReceptor]) {
262
+ ll.panic("chainInfo not found for %v", chainOrReceptor)
263
+ }
264
+ if len(receptors) > 0 {
265
+ ll.panic("mixing receptors and chains is not allowed")
266
+ }
267
+ chains += [chainOrReceptor]
268
+ }
269
+ }
270
+
271
+ perChainResults := {}
272
+
245
273
  for chain in chains {
246
274
  chainInfo := chainInfos[chain]
247
275
  ll.assert(!is_undefined(chainInfo), "chainInfo not found for chain %v", chain)
248
- additionalSpec := {
249
- spec: {
250
- domain: {
251
- "pl7.app/chain": chain
252
- }
253
- }
254
- }
255
- columnSpecsTransform := func(specs) {
256
- return slices.map(specs, func(spec) {
257
- return maps.deepMerge(spec, additionalSpec)
258
- })
259
- }
260
276
 
261
- traceSteps := [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}]
277
+ //
278
+ // Exporting CLNS -> TSV
279
+ //
262
280
 
263
281
  exportOutputs := [ {
264
282
  type: "Resource",
@@ -274,13 +292,16 @@ self.body(func(inputs) {
274
292
  path: ["tsv"]
275
293
  } ]
276
294
 
277
- if !is_undefined(axesByClonotypeKey) {
295
+ if !is_undefined(axesByClonotypeKey) && !isSingleCell {
296
+ // only adding data outputs if we are in bulk mode
278
297
  exportOutputs += [ {
279
298
  type: "Xsv",
280
299
  xsvType: "tsv",
281
300
  settings: {
282
301
  axes: axesByClonotypeKey,
283
- columns: columnSpecsTransform(columnsSpecPerSample),
302
+ columns: transformSpecs(columnsSpecPerSample, {
303
+ spec: { domain: { "pl7.app/vdj/chain": chain } }
304
+ }),
284
305
  storageFormat: "Binary",
285
306
  partitionKeyLength: 0
286
307
  },
@@ -289,39 +310,80 @@ self.body(func(inputs) {
289
310
  } ]
290
311
  }
291
312
 
313
+ if isSingleCell {
314
+ exportOutputs += [ {
315
+ type: "Resource",
316
+ spec: {
317
+ kind: "PColumn",
318
+ name: "mixcr.com/clonotypeTableForSingleCell",
319
+ domain: {
320
+ "pl7.app/vdj/clonotypingRunId": blockId
321
+ },
322
+ valueType: "File"
323
+ },
324
+ name: "clonotypeTableForSingleCell",
325
+ path: ["tsvForSingleCell"]
326
+ } ]
327
+ }
328
+
292
329
  exportResults := pframes.processColumn(
293
330
  clnsFiles,
294
331
  mixcrExportTpl,
295
332
  exportOutputs,
296
333
  {
297
334
  // will be automatically propagated to all output specs
298
- traceSteps: traceSteps,
335
+ traceSteps: [{type: "milaboratories.mixcr-clonotyping.export", id: blockId + "." + chain, importance: 80, label: chainInfo.name}],
299
336
 
300
337
  extra: {
301
338
  params: {
302
339
  chains: chainInfo.mixcrFilter,
303
340
  clonotypeKeyColumns: clonotypeKeyColumns,
341
+ clonotypeKeyArgs: clonotypeKeyArgs,
342
+ cellTagColumns: cellTagColumns,
304
343
  exportArgs: exportArgs
305
344
  }
306
345
  }
307
346
  }
308
347
  )
309
348
 
310
- exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
349
+ //
350
+ // Aggregating the data to produce:
351
+ // [clonotypeKey] -> properties
352
+ // from:
353
+ // [sampleId, clonotypeKey] -> properties
354
+ //
311
355
 
312
356
  aggregationOutputs := [ {
313
- type: "Xsv",
314
- xsvType: "tsv",
315
- settings: {
316
- axes: axesByClonotypeKey,
317
- columns: columnSpecsTransform(columnsSpecPerClonotype),
318
- storageFormat: "Binary",
319
- partitionKeyLength: 0
357
+ type: "Resource",
358
+ spec: {
359
+ kind: "PColumn",
360
+ name: "mixcr.com/clonotypeProperties",
361
+ domain: {
362
+ "pl7.app/vdj/clonotypingRunId": blockId
363
+ },
364
+ valueType: "File"
320
365
  },
321
- name: "byCloneKey",
366
+ name: "clonotypeProperties",
322
367
  path: ["tsv"]
323
368
  } ]
324
369
 
370
+ if !isSingleCell {
371
+ aggregationOutputs += [ {
372
+ type: "Xsv",
373
+ xsvType: "tsv",
374
+ settings: {
375
+ axes: axesByClonotypeKey,
376
+ columns: transformSpecs(columnsSpecPerClonotype, {
377
+ spec: { domain: { "pl7.app/vdj/chain": chain } }
378
+ }),
379
+ storageFormat: "Binary",
380
+ partitionKeyLength: 0
381
+ },
382
+ name: "byCloneKey",
383
+ path: ["tsv"]
384
+ } ]
385
+ }
386
+
325
387
  aggregateByCloneKey := pframes.processColumn(
326
388
  exportResults.output("clonotypeTable"),
327
389
  aggregateByClonotypeKeyTpl,
@@ -339,7 +401,134 @@ self.body(func(inputs) {
339
401
  }
340
402
  )
341
403
 
342
- aggregateByCloneKey.addXsvOutputToBuilder(clonotypes, "byCloneKey", "byCloneKey/" + chain + "/")
404
+ if isSingleCell {
405
+ // collecting results for possible future single cell processing
406
+ perChainResults[chain] = {
407
+ tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
408
+ clonotypeProperties: aggregateByCloneKey.output("clonotypeProperties")
409
+ }
410
+ } else {
411
+ // only adding data outputs if we are in bulk mode
412
+ exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
413
+ aggregateByCloneKey.addXsvOutputToBuilder(clonotypes, "byCloneKey", "byCloneKey/" + chain + "/")
414
+ }
415
+ }
416
+
417
+ if isSingleCell {
418
+ for receptor in receptors {
419
+ receptorInfo := receptorInfos[receptor]
420
+
421
+ singleCellOutputs := [ {
422
+ type: "Resource",
423
+ spec: {
424
+ kind: "PColumn",
425
+ name: "mixcr.com/singleCellAbundanceTable",
426
+ domain: {
427
+ "pl7.app/vdj/clonotypingRunId": blockId
428
+ },
429
+ valueType: "File"
430
+ },
431
+ name: "abundanceTsv",
432
+ path: ["abundanceTsv"]
433
+ }, {
434
+ type: "Xsv",
435
+ xsvType: "tsv",
436
+ settings: {
437
+ axes: [ {
438
+ column: "sampleId",
439
+ spec: inputSpec.axesSpec[0]
440
+ } ] + axesByScClonotypeKey,
441
+ columns: transformSpecs(columnsSpecPerSampleSc, {
442
+ spec: { domain: { "pl7.app/vdj/receptor": receptor } }
443
+ }),
444
+ storageFormat: "Binary",
445
+ partitionKeyLength: 1
446
+ },
447
+ name: "abundanceTable",
448
+ path: ["abundanceTsv"]
449
+ } ]
450
+
451
+ for chainIdx in [0, 1] {
452
+ // "A" chain is always the one that is more diverse
453
+ chainLetterU := ["A", "B"][chainIdx]
454
+ chainNameU := chainInfos[receptorInfo.chains[chainIdx]].name
455
+ chainNameL := text.to_lower(chainNameU)
456
+
457
+ for isPrimary in [true, false] {
458
+ pPrefixU := isPrimary ? "Primary" : "Secondary"
459
+ pPrefixL := text.to_lower(pPrefixU)
460
+
461
+ // i.e. propertiesAPrimary
462
+ propertiesTableName := "properties" + chainLetterU + pPrefixU
463
+ // i.e. propertiesAPrimaryTsv
464
+ propertiesTsvOutputName := propertiesTableName + "Tsv"
465
+
466
+ singleCellOutputs += [ {
467
+ type: "Resource",
468
+ spec: {
469
+ kind: "PColumn",
470
+ name: "mixcr.com/scClonotypeTable/" + chainNameL + pPrefixU,
471
+ domain: {
472
+ "pl7.app/vdj/clonotypingRunId": blockId
473
+ },
474
+ valueType: "File"
475
+ },
476
+ name: propertiesTsvOutputName,
477
+ path: [ propertiesTsvOutputName ]
478
+ }, {
479
+ type: "Xsv",
480
+ xsvType: "tsv",
481
+ settings: {
482
+ axes: axesByScClonotypeKey,
483
+ columns: transformSpecs(columnsSpecPerClonotype, {
484
+ spec: {
485
+ domain: {
486
+ "pl7.app/vdj/receptor": receptor,
487
+ "pl7.app/vdj/scClonotypeChain": chainLetterU
488
+ },
489
+ annotations: {
490
+ "pl7.app/label": func(label) {
491
+ return pPrefixU + " " + chainNameU + " " + label
492
+ }
493
+ }
494
+ }
495
+ }),
496
+ storageFormat: "Binary",
497
+ partitionKeyLength: 0
498
+ },
499
+ name: propertiesTableName,
500
+ path: [ propertiesTsvOutputName ]
501
+ } ]
502
+ }
503
+ }
504
+
505
+ chainA := receptorInfo.chains[0]
506
+ chainB := receptorInfo.chains[1]
507
+
508
+ // Using A chain files as main PColumn for xsv conversion through pframes.processColumn.
509
+ // Since we aggregate by sample, this is just a single pass through the data.
510
+
511
+ singleCellResult := pframes.processColumn(
512
+ perChainResults[chainA].tsvForSingleCell,
513
+ processSingleCellTpl,
514
+ singleCellOutputs,
515
+ {
516
+ aggregate: ["pl7.app/sampleId"],
517
+ extra: {
518
+ byCellTagB: perChainResults[chainB].tsvForSingleCell.data,
519
+ propertiesA: perChainResults[chainA].clonotypeProperties.data,
520
+ propertiesB: perChainResults[chainB].clonotypeProperties.data
521
+ }
522
+ }
523
+ )
524
+
525
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "abundanceTable", "clonotypeProperties/abundance/" + receptor + "/")
526
+
527
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesAPrimary", "clonotypeProperties/" + receptor + "/aPrimary/")
528
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesASecondary", "clonotypeProperties/" + receptor + "/aSecondary/")
529
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBPrimary", "clonotypeProperties/" + receptor + "/bPrimary/")
530
+ singleCellResult.addXsvOutputToBuilder(clonotypes, "propertiesBSecondary", "clonotypeProperties/" + receptor + "/bSecondary/")
531
+ }
343
532
  }
344
533
 
345
534
  return {
@@ -1,4 +1,4 @@
1
- self := import("@platforma-sdk/workflow-tengo:tpl")
1
+ self := import("@platforma-sdk/workflow-tengo:tpl.light")
2
2
  ll := import("@platforma-sdk/workflow-tengo:ll")
3
3
 
4
4
  calculateExportSpecs := import(":calculate-export-specs")
@@ -27,8 +27,8 @@ const testCases: TestCase[] = [
27
27
  preset: 'milab-human-dna-xcr-7genes-multiplex',
28
28
  check: (expect, config) => {
29
29
  // console.dir(config, { depth: 5 });
30
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
31
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
30
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
31
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
32
32
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
33
33
  expect(config.columnsSpec.find((c: any) => c.column === 'readFraction')).toBeDefined();
34
34
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -40,10 +40,10 @@ const testCases: TestCase[] = [
40
40
  species: 'human',
41
41
  check: (expect, config) => {
42
42
  // console.dir(config, { depth: 5 });
43
- expect(config.axesByClonotypeId).to.have.lengthOf(2);
44
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'tagValueCELL')).toBeDefined();
45
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
46
- expect(config.columnsSpec.find((c: any) => c.column === 'cellGroup')).toBeDefined();
43
+ // expect(config.axesByClonotypeId).to.have.lengthOf(2);
44
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'tagValueCELL')).toBeDefined();
45
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
46
+ // expect(config.columnsSpec.find((c: any) => c.column === 'cellGroup')).toBeDefined();
47
47
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
48
48
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
49
49
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqFR1')).toBeDefined();
@@ -67,8 +67,8 @@ const testCases: TestCase[] = [
67
67
  preset: 'cellecta-human-rna-xcr-umi-drivermap-air',
68
68
  check: (expect, config) => {
69
69
  // console.dir(config, { depth: 5 });
70
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
71
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
70
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
71
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
72
72
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
73
73
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
74
74
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -79,8 +79,8 @@ const testCases: TestCase[] = [
79
79
  preset: 'takara-human-rna-bcr-umi-smartseq',
80
80
  check: (expect, config) => {
81
81
  // console.dir(config, { depth: 5 });
82
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
83
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
82
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
83
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
84
84
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeCount')).toBeDefined();
85
85
  expect(config.columnsSpec.find((c: any) => c.column === 'uniqueMoleculeFraction')).toBeDefined();
86
86
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqFR1')).toBeDefined();
@@ -105,8 +105,8 @@ const testCases: TestCase[] = [
105
105
  species: 'human',
106
106
  check: (expect, config) => {
107
107
  // console.dir(config, { depth: 5 });
108
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
109
- expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
108
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
109
+ // expect(config.axesByClonotypeId.find((c: any) => c.column === 'cloneId')).toBeDefined();
110
110
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
111
111
  expect(config.columnsSpec.find((c: any) => c.column === 'readFraction')).toBeDefined();
112
112
  expect(config.columnsSpec.find((c: any) => c.column === 'nSeqCDR3')).toBeDefined();
@@ -118,7 +118,7 @@ const testCases: TestCase[] = [
118
118
  species: 'human',
119
119
  check: (expect, config) => {
120
120
  // console.dir(config, { depth: 5 });
121
- expect(config.axesByClonotypeId).to.have.lengthOf(1);
121
+ // expect(config.axesByClonotypeId).to.have.lengthOf(1);
122
122
  expect(config.columnsSpec.find((c: any) => c.column === 'readCount')).toBeDefined();
123
123
  }
124
124
  }