@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,666 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ text := import("text")
3
+
4
+ // ==============================================
5
+ //
6
+ // Every function in this file will return:
7
+ // pfconvParams - params to run xsv.importFileMap on exported files
8
+ // cmdArgs - additional args for MiXCR to specify what fields to extract
9
+ //
10
+ // ==============================================
11
+
12
+ // TODO: TMP
13
+ allowNA := true
14
+
15
+ // export of threes whithout nodes
16
+ shmTreeTableOptions := func(dataDescription, runWithSingleCell) {
17
+ // TODO add forChain if runWithSingleCell
18
+
19
+ axes := []
20
+ columns := []
21
+ cmdArgs := []
22
+
23
+ cmdArgs = append(cmdArgs, "-treeId")
24
+ axes = append(axes, {
25
+ "column": "treeId",
26
+ "spec": {
27
+ "name": "pl7.app/dendrogram/treeId",
28
+ "type": "Long",
29
+ "domain": { },
30
+ "annotations": {
31
+ "pl7.app/label": "Tree id"
32
+ }
33
+ }
34
+ })
35
+
36
+ if runWithSingleCell && dataDescription["hasCellTags"] {
37
+ cmdArgs = append(cmdArgs, "-subtreeId")
38
+ axes = append(axes, {
39
+ "column": "subtreeId",
40
+ "spec": {
41
+ "name": "pl7.app/dendrogram/subtreeId",
42
+ "type": "Long",
43
+ "domain": { },
44
+ "annotations": {
45
+ "pl7.app/label": "Subtree id"
46
+ }
47
+ }
48
+ })
49
+ }
50
+
51
+ if dataDescription["hasCellTags"] {
52
+ cmdArgs = append(cmdArgs, "-totalUniqueTagCountInTree", "Cell")
53
+ columns = append(columns, {
54
+ "column": "totalUniqueCellCountInTree",
55
+ "id": "uniq-cell-count",
56
+ "allowNA": allowNA,
57
+ "spec": {
58
+ "name": "pl7.app/vdj/uniqueCellCount",
59
+ "valueType": "Long",
60
+ "annotations": {
61
+ "pl7.app/label": "Number of cells"
62
+ }
63
+ }
64
+ })
65
+ }
66
+
67
+ if dataDescription["hasUmiTags"] {
68
+ cmdArgs = append(cmdArgs, "-totalUniqueTagCountInTree", "Molecule")
69
+ columns = append(columns, {
70
+ "column": "totalUniqueMoleculeCountInTree",
71
+ "id": "uniq-umi-count",
72
+ "allowNA": allowNA,
73
+ "spec": {
74
+ "name": "pl7.app/vdj/uniqueMoleculeCount",
75
+ "valueType": "Long",
76
+ "annotations": {
77
+ "pl7.app/label": "Number of molecules"
78
+ }
79
+ }
80
+ })
81
+ }
82
+
83
+ cmdArgs = append(cmdArgs, "-numberOfClonesInTree")
84
+ columns = append(columns, {
85
+ "column": "numberOfClonesInTree",
86
+ "id": "number-of-clones-in-tree",
87
+ "allowNA": allowNA,
88
+ "spec": {
89
+ "name": "pl7.app/vdj/numberOfClonesInTree",
90
+ "valueType": "Long",
91
+ "annotations": {
92
+ "pl7.app/label": "Number of clones"
93
+ }
94
+ }
95
+ })
96
+
97
+ cmdArgs = append(cmdArgs, "-numberOfNodesWithClones")
98
+ columns = append(columns, {
99
+ "column": "numberOfNodesWithClones",
100
+ "id": "number-of-nodes-with-clones",
101
+ "allowNA": allowNA,
102
+ "spec": {
103
+ "name": "pl7.app/vdj/numberOfNodesWithClones",
104
+ "valueType": "Long",
105
+ "annotations": {
106
+ "pl7.app/label": "Number of clones"
107
+ }
108
+ }
109
+ })
110
+
111
+ cmdArgs = append(cmdArgs, "-totalReadsCountInTree")
112
+ columns = append(columns, {
113
+ "column": "totalReadsCountInTree",
114
+ "id": "total-reads-count-in-tree",
115
+ "allowNA": allowNA,
116
+ "spec": {
117
+ "name": "pl7.app/vdj/totalReadsCountInTree",
118
+ "valueType": "Long",
119
+ "annotations": {
120
+ "pl7.app/label": "Total reads count"
121
+ }
122
+ }
123
+ })
124
+
125
+ cmdArgs = append(cmdArgs, "-vHit")
126
+ columns = append(columns, {
127
+ "column": "bestVHit",
128
+ "id": "v-gene",
129
+ "allowNA": allowNA,
130
+ "spec": {
131
+ "name": "pl7.app/vdj/geneHit",
132
+ "valueType": "String",
133
+ "domain": {
134
+ "pl7.app/vdj/reference": "VGene"
135
+ },
136
+ "annotations": {
137
+ "type": "V gene name",
138
+ "pl7.app/label": "V gene"
139
+ }
140
+ }
141
+ })
142
+
143
+ cmdArgs = append(cmdArgs, "-jHit")
144
+ columns = append(columns, {
145
+ "column": "bestJHit",
146
+ "id": "j-gene",
147
+ "allowNA": allowNA,
148
+ "spec": {
149
+ "name": "pl7.app/vdj/geneHit",
150
+ "valueType": "String",
151
+ "domain": {
152
+ "pl7.app/vdj/reference": "JGene"
153
+ },
154
+ "annotations": {
155
+ "type": "J gene name",
156
+ "pl7.app/label": "J gene"
157
+ }
158
+ }
159
+ })
160
+
161
+ cmdArgs = append(cmdArgs, "-chains")
162
+ columns = append(columns, {
163
+ "column": "chains",
164
+ "id": "chains",
165
+ "allowNA": allowNA,
166
+ "spec": {
167
+ "valueType": "String",
168
+ "name": "pl7.app/vdj/chain",
169
+ "annotations": {
170
+ "pl7.app/label": "Chain"
171
+ }
172
+ }
173
+ })
174
+
175
+ for feature in dataDescription["coveredFeatures"] {
176
+ cmdArgs = append(cmdArgs, "-aaFeature", feature, "mrca")
177
+ columns = append(columns, {
178
+ "column": "aaSeq" + feature + "OfMrca",
179
+ "id": "aa-seq-" + feature + "-mrca",
180
+ "allowNA": allowNA,
181
+ "spec": {
182
+ "name": "pl7.app/vdj/sequence",
183
+ "valueType": "String",
184
+ "domain": {
185
+ "pl7.app/vdj/feature": feature,
186
+ "pl7.app/alphabet": "aminoacid"
187
+ },
188
+ "annotations": {
189
+ "pl7.app/type": "sequence",
190
+ "pl7.app/label": feature + " of MRCA aa"
191
+ }
192
+ }
193
+ })
194
+
195
+ cmdArgs = append(cmdArgs, "-nFeature", feature, "mrca")
196
+ columns = append(columns, {
197
+ "column": "nSeq" + feature + "OfMrca",
198
+ "id": "n-seq-" + feature + "-mrca",
199
+ "allowNA": allowNA,
200
+ "spec": {
201
+ "name": "pl7.app/vdj/sequence",
202
+ "valueType": "String",
203
+ "domain": {
204
+ "pl7.app/vdj/feature": feature,
205
+ "pl7.app/alphabet": "nucleotide"
206
+ },
207
+ "annotations": {
208
+ "pl7.app/type": "sequence",
209
+ "pl7.app/label": feature + " of MRCA nt"
210
+ }
211
+ }
212
+ })
213
+ }
214
+
215
+ return {
216
+ "pfconvParams": {
217
+ "axes": axes,
218
+ "columns": columns,
219
+ "storageFormat": "Binary",
220
+ "partitionKeyLength": 0
221
+ },
222
+ "cmdArgs": cmdArgs
223
+ }
224
+ }
225
+
226
+ // export data that is uniq for node
227
+ shmTreeNodesTableOptions := func(dataDescription, runWithSingleCell) {
228
+ // TODO add forChain if runWithSingleCell
229
+
230
+ axes := []
231
+ columns := []
232
+ cmdArgs := []
233
+
234
+ cmdArgs = append(cmdArgs, "-treeId")
235
+ axes = append(axes, {
236
+ "column": "treeId",
237
+ "spec": {
238
+ "name": "pl7.app/dendrogram/treeId",
239
+ "type": "Long",
240
+ "domain": { },
241
+ "annotations": {
242
+ "pl7.app/label": "Tree id"
243
+ }
244
+ }
245
+ })
246
+
247
+ if runWithSingleCell && dataDescription["hasCellTags"] {
248
+ cmdArgs = append(cmdArgs, "-subtreeId")
249
+ axes = append(axes, {
250
+ "column": "subtreeId",
251
+ "spec": {
252
+ "name": "pl7.app/dendrogram/subtreeId",
253
+ "type": "Long",
254
+ "domain": { },
255
+ "annotations": {
256
+ "pl7.app/label": "Subtree id"
257
+ }
258
+ }
259
+ })
260
+ }
261
+
262
+ cmdArgs = append(cmdArgs, "-nodeId")
263
+ axes = append(axes, {
264
+ "column": "nodeId",
265
+ "spec": {
266
+ "name": "pl7.app/dendrogram/nodeId",
267
+ "type": "Long",
268
+ "domain": { },
269
+ "annotations": {
270
+ "pl7.app/label": "Node id"
271
+ }
272
+ }
273
+ })
274
+
275
+ cmdArgs = append(cmdArgs, "-isObserved")
276
+ columns = append(columns, {
277
+ "column": "isObserved",
278
+ "id": "is-node-obsered",
279
+ "allowNA": false,
280
+ "spec": {
281
+ "name": "pl7.app/dendrogram/isObserved",
282
+ // TODO change to Boolean when it will be supported
283
+ "valueType": "String",
284
+ "annotations": {
285
+ "pl7.app/label": "Is observed in data"
286
+ }
287
+ }
288
+ })
289
+
290
+ cmdArgs = append(cmdArgs, "-parentId")
291
+ columns = append(columns, {
292
+ "column": "parentId",
293
+ "id": "parent-id",
294
+ "allowNA": true,
295
+ "spec": {
296
+ "name": "pl7.app/dendrogram/topology",
297
+ "valueType": "Long",
298
+ "annotations": {
299
+ "pl7.app/label": "Parent node id",
300
+ "pl7.app/dendrogram/isTopology": "true"
301
+ }
302
+ }
303
+ })
304
+
305
+ cmdArgs = append(cmdArgs, "-distance", "germline")
306
+ columns = append(columns, {
307
+ "column": "DistanceFromGermline",
308
+ "id": "distance-from-germline",
309
+ "allowNA": true,
310
+ "spec": {
311
+ "name": "pl7.app/dendrogram/distance",
312
+ "valueType": "Double",
313
+ "annotations": {
314
+ "pl7.app/label": "Distanse from germline",
315
+ "pl7.app/dendrogram/isDistance": "true"
316
+ }
317
+ }
318
+ })
319
+
320
+ cmdArgs = append(cmdArgs, "-nMutationsRate")
321
+ columns = append(columns, {
322
+ "column": "nMutationsRate",
323
+ "id": "n-mutations-rate",
324
+ "allowNA": allowNA,
325
+ "spec": {
326
+ "name": "pl7.app/vdj/mutationsRate",
327
+ "valueType": "Double",
328
+ "domain": {
329
+ "pl7.app/vdj/features": text.join(dataDescription["coveredFeatures"], ","),
330
+ "pl7.app/alphabet": "nucleotide"
331
+ },
332
+ "annotations": {
333
+ "pl7.app/label": "Mutations rate nt"
334
+ }
335
+ }
336
+ })
337
+
338
+ cmdArgs = append(cmdArgs, "-vHit")
339
+ columns = append(columns, {
340
+ "column": "bestVHit",
341
+ "id": "v-gene",
342
+ "allowNA": allowNA,
343
+ "spec": {
344
+ "name": "pl7.app/vdj/geneHit",
345
+ "valueType": "String",
346
+ "domain": {
347
+ "pl7.app/vdj/reference": "VGene"
348
+ },
349
+ "annotations": {
350
+ "type": "V gene name",
351
+ "pl7.app/label": "V gene"
352
+ }
353
+ }
354
+ })
355
+
356
+ cmdArgs = append(cmdArgs, "-jHit")
357
+ columns = append(columns, {
358
+ "column": "bestJHit",
359
+ "id": "j-gene",
360
+ "allowNA": allowNA,
361
+ "spec": {
362
+ "name": "pl7.app/vdj/geneHit",
363
+ "valueType": "String",
364
+ "domain": {
365
+ "pl7.app/vdj/reference": "JGene"
366
+ },
367
+ "annotations": {
368
+ "type": "J gene name",
369
+ "pl7.app/label": "J gene"
370
+ }
371
+ }
372
+ })
373
+
374
+ for feature in dataDescription["coveredFeatures"] {
375
+ cmdArgs = append(cmdArgs, "-aaFeature", feature)
376
+ columns = append(columns, {
377
+ "column": "aaSeq" + feature,
378
+ "id": "aa-seq-" + feature,
379
+ "allowNA": allowNA,
380
+ "spec": {
381
+ "name": "pl7.app/vdj/sequence",
382
+ "valueType": "String",
383
+ "domain": {
384
+ "pl7.app/vdj/feature": feature,
385
+ "pl7.app/alphabet": "aminoacid"
386
+ },
387
+ "annotations": {
388
+ "pl7.app/type": "sequence",
389
+ "pl7.app/label": feature + " aa"
390
+ }
391
+ }
392
+ })
393
+
394
+ cmdArgs = append(cmdArgs, "-nFeature", feature)
395
+ columns = append(columns, {
396
+ "column": "nSeq" + feature,
397
+ "id": "n-seq-" + feature,
398
+ "allowNA": allowNA,
399
+ "spec": {
400
+ "name": "pl7.app/vdj/sequence",
401
+ "valueType": "String",
402
+ "domain": {
403
+ "pl7.app/vdj/feature": feature,
404
+ "pl7.app/alphabet": "nucleotide"
405
+ },
406
+ "annotations": {
407
+ "pl7.app/type": "sequence",
408
+ "pl7.app/label": feature + " nt"
409
+ }
410
+ }
411
+ })
412
+ }
413
+
414
+ return {
415
+ "pfconvParams": {
416
+ "axes": axes,
417
+ "columns": columns,
418
+ "storageFormat": "Binary",
419
+ "partitionKeyLength": 0
420
+ },
421
+ "cmdArgs": cmdArgs
422
+ }
423
+ }
424
+
425
+ // export data that is unique for clones, but not unique for a node
426
+ // (different clones could be in the same topology node, for example, different time points)
427
+ shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn, runWithSingleCell) {
428
+ // TODO add forChain if runWithSingleCell
429
+ donorColumnSpec := donorColumn.get("spec").getDataAsJson()
430
+
431
+ axes := []
432
+ columns := []
433
+ cmdArgs := []
434
+
435
+ cmdArgs = append(cmdArgs, "-treeId")
436
+ axes = append(axes, {
437
+ "column": "treeId",
438
+ "spec": {
439
+ "name": "pl7.app/dendrogram/treeId",
440
+ "type": "Long",
441
+ "domain": { },
442
+ "annotations": {
443
+ "pl7.app/label": "Tree id"
444
+ }
445
+ }
446
+ })
447
+
448
+ if runWithSingleCell && dataDescription["hasCellTags"] {
449
+ cmdArgs = append(cmdArgs, "-subtreeId")
450
+ axes = append(axes, {
451
+ "column": "subtreeId",
452
+ "spec": {
453
+ "name": "pl7.app/dendrogram/subtreeId",
454
+ "type": "Long",
455
+ "domain": { },
456
+ "annotations": {
457
+ "pl7.app/label": "Subtree id"
458
+ }
459
+ }
460
+ })
461
+ }
462
+
463
+ cmdArgs = append(cmdArgs, "-nodeId")
464
+ axes = append(axes, {
465
+ "column": "nodeId",
466
+ "spec": {
467
+ "name": "pl7.app/dendrogram/nodeId",
468
+ "type": "Long",
469
+ "domain": { },
470
+ "annotations": {
471
+ "pl7.app/label": "Node id"
472
+ }
473
+ }
474
+ })
475
+
476
+ cmdArgs = append(cmdArgs, "-fileName")
477
+ axes = append(axes, {
478
+ "column": "fileName",
479
+ "preProcess": [
480
+ {
481
+ "type": "regexpReplace",
482
+ "pattern": "^(.*)___.*\\.clns$",
483
+ "replacement": "$1"
484
+ }
485
+ ],
486
+ "spec": donorColumnSpec.axesSpec[0]
487
+ })
488
+
489
+ cmdArgs = append(cmdArgs, "-cloneId")
490
+ axes = append(axes, {
491
+ "column": "cloneId",
492
+ "id": "clone-id",
493
+ "spec": {
494
+ "name": "pl7.app/vdj/cloneId",
495
+ "type": "Long",
496
+ // TODO domain with blockId
497
+ "annotations": {
498
+ "pl7.app/label": "Clone id"
499
+ }
500
+ }
501
+ })
502
+
503
+ if dataDescription["hasCellTags"] {
504
+ cmdArgs = append(cmdArgs, "-uniqueTagCount", "Cell")
505
+ columns = append(columns, {
506
+ "column": "uniqueCellCount",
507
+ "id": "uniq-cell-count-for-clone",
508
+ "allowNA": true,
509
+ "spec": {
510
+ "name": "pl7.app/vdj/uniqueCellCount",
511
+ "valueType": "Long",
512
+ "annotations": {
513
+ "pl7.app/label": "Number of cells"
514
+ }
515
+ }
516
+ })
517
+ }
518
+
519
+ if dataDescription["hasUmiTags"] {
520
+ cmdArgs = append(cmdArgs, "-uniqueTagCount", "Molecule")
521
+ columns = append(columns, {
522
+ "column": "uniqueMoleculeCount",
523
+ "id": "uniq-umi-count-for-clone",
524
+ "allowNA": true,
525
+ "spec": {
526
+ "name": "pl7.app/vdj/uniqueMoleculeCount",
527
+ "valueType": "Long",
528
+ "annotations": {
529
+ "pl7.app/label": "Number of molecules"
530
+ }
531
+ }
532
+ })
533
+ }
534
+
535
+ cmdArgs = append(cmdArgs, "-readCount")
536
+ columns = append(columns, {
537
+ "column": "readCount",
538
+ "id": "read-count",
539
+ "allowNA": true,
540
+ "spec": {
541
+ "name": "pl7.app/vdj/readCount",
542
+ "valueType": "Long",
543
+ "annotations": {
544
+ "pl7.app/label": "Number of Reads"
545
+ }
546
+ }
547
+ })
548
+
549
+ cmdArgs = append(cmdArgs, "-readFraction")
550
+ columns = append(columns, {
551
+ "column": "readFraction",
552
+ "id": "read-fraction",
553
+ "allowNA": true,
554
+ "spec": {
555
+ "name": "pl7.app/vdj/readFraction",
556
+ "valueType": "Double",
557
+ "annotations": {
558
+ "pl7.app/label": "Fraction of reads"
559
+ }
560
+ }
561
+ })
562
+
563
+ cmdArgs = append(cmdArgs, "-targetSequences")
564
+ columns = append(columns, {
565
+ "column": "targetSequences",
566
+ "id": "n-seq-clonal-sequences",
567
+ "allowNA": true,
568
+ "spec": {
569
+ "name": "pl7.app/vdj/sequence",
570
+ "valueType": "String",
571
+ "domain": {
572
+ "pl7.app/vdj/sequence": "clonalSequence",
573
+ "pl7.app/alphabet": "nucleotide"
574
+ },
575
+ "annotations": {
576
+ "pl7.app/type": "sequence",
577
+ "pl7.app/label": "Clonal sequences"
578
+ }
579
+ }
580
+ })
581
+
582
+ cmdArgs = append(cmdArgs, "-targetQualities")
583
+ columns = append(columns, {
584
+ "column": "targetQualities",
585
+ "id": "clonal-qualities",
586
+ "allowNA": true,
587
+ "spec": {
588
+ "name": "pl7.app/vdj/sequenceQuality",
589
+ "valueType": "String",
590
+ "domain": {
591
+ "pl7.app/vdj/quality": "clonalQuality"
592
+ },
593
+ "annotations": {
594
+ "type": "quality string",
595
+ "pl7.app/label": "Clonal qualities"
596
+ }
597
+ }
598
+ })
599
+
600
+ cmdArgs = append(cmdArgs, "-dHit")
601
+ columns = append(columns, {
602
+ "column": "bestDHit",
603
+ "id": "d-gene",
604
+ "allowNA": true,
605
+ "spec": {
606
+ "name": "pl7.app/vdj/geneHit",
607
+ "valueType": "String",
608
+ "domain": {
609
+ "pl7.app/vdj/reference": "DGene"
610
+ },
611
+ "annotations": {
612
+ "type": "D gene name",
613
+ "pl7.app/label": "Best D hit"
614
+ }
615
+ }
616
+ })
617
+
618
+ cmdArgs = append(cmdArgs, "-cHit")
619
+ columns = append(columns, {
620
+ "column": "bestCHit",
621
+ "id": "c-gene",
622
+ "allowNA": true,
623
+ "spec": {
624
+ "name": "pl7.app/vdj/geneHit",
625
+ "valueType": "String",
626
+ "domain": {
627
+ "pl7.app/vdj/reference": "CGene"
628
+ },
629
+ "annotations": {
630
+ "type": "C gene name",
631
+ "pl7.app/label": "Best C hit"
632
+ }
633
+ }
634
+ })
635
+
636
+ cmdArgs = append(cmdArgs, "-isotype")
637
+ columns = append(columns, {
638
+ "column": "isotype",
639
+ "id": "isotype",
640
+ "allowNA": true,
641
+ "spec": {
642
+ "valueType": "String",
643
+ "name": "pl7.app/vdj/isotype",
644
+ "annotations": {
645
+ "pl7.app/label": "IG isotype"
646
+ }
647
+ }
648
+ })
649
+
650
+ return {
651
+ "pfconvParams": {
652
+ "axes": axes,
653
+ "columns": columns,
654
+ "storageFormat": "Binary",
655
+ "partitionKeyLength": 0
656
+ },
657
+ "cmdArgs": cmdArgs
658
+ }
659
+ }
660
+
661
+ // to use the file as a library, we should explicitly export functions
662
+ export ll.toStrict({
663
+ shmTreeTableOptions: shmTreeTableOptions,
664
+ shmTreeNodesTableOptions: shmTreeNodesTableOptions,
665
+ shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions
666
+ })