@platforma-open/milaboratories.top-antibodies.workflow 1.15.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,495 +0,0 @@
1
- // Prerun template for clonotype filtering
2
- wf := import("@platforma-sdk/workflow-tengo:workflow")
3
- exec := import("@platforma-sdk/workflow-tengo:exec")
4
- assets := import("@platforma-sdk/workflow-tengo:assets")
5
- xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
6
- pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
- slices := import("@platforma-sdk/workflow-tengo:slices")
8
- render := import("@platforma-sdk/workflow-tengo:render")
9
- ll := import("@platforma-sdk/workflow-tengo:ll")
10
- kabatConv := import(":pf-kabat-conv")
11
-
12
- spectratypeConv := import(":pf-spectratype-conv")
13
- vjUsageConv := import(":pf-vj-usage-conv")
14
-
15
- filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
16
-
17
- wf.prepare(func(args){
18
- if is_undefined(args.inputAnchor) {
19
- return {
20
- columns: wf.createPBundleBuilder().build()
21
- }
22
- }
23
- // We need a table with cluster ID (optional) | clonotype id | selected ranking columns
24
- bundleBuilder := wf.createPBundleBuilder()
25
- bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
26
- bundleBuilder.addAnchor("main", args.inputAnchor)
27
-
28
- validRanks := false
29
- if len(args.rankingOrder) > 0 {
30
- for col in args.rankingOrder {
31
- // For cases where the user is selecting the table to filter
32
- if col.value != undefined {
33
- bundleBuilder.addAnchor(col.value.anchorName, col.value.anchorRef)
34
- bundleBuilder.addSingle(col.value.column)
35
- validRanks = true
36
- }
37
- }
38
- }
39
- if !validRanks {
40
- // @TODO: this is a temporal patch for issue where rankingOrderDefault
41
- // are not defined by the time prerun works
42
- // prerun sometimes runs before this variable is ready
43
- if args.rankingOrderDefault.value != undefined {
44
- bundleBuilder.addAnchor(args.rankingOrderDefault.value.anchorName,
45
- args.rankingOrderDefault.value.anchorRef)
46
- bundleBuilder.addSingle(args.rankingOrderDefault.value.column)
47
- }
48
- }
49
-
50
- // Load filter columns
51
- if len(args.filters) > 0 {
52
- for filter in args.filters {
53
- if filter.value != undefined {
54
- bundleBuilder.addAnchor(filter.value.anchorName, filter.value.anchorRef)
55
- bundleBuilder.addSingle(filter.value.column)
56
- }
57
- }
58
- }
59
-
60
-
61
- // Add linker column
62
- bundleBuilder.addMulti({
63
- axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
64
- annotations: { "pl7.app/isLinkerColumn": "true" },
65
- partialAxesMatch: true
66
- }, "linkers")
67
-
68
- // Add cluster size columns from clustering blocks
69
- bundleBuilder.addMulti({
70
- name: "pl7.app/vdj/clustering/clusterSize",
71
- partialAxesMatch: true
72
- }, "clusterSizes")
73
-
74
- // Add CDR3 sequences
75
- bundleBuilder.addMulti({
76
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
77
- name: "pl7.app/vdj/sequence",
78
- domain: {
79
- "pl7.app/alphabet": "aminoacid",
80
- "pl7.app/vdj/feature": "CDR3" // Specify CDR3 feature
81
- }
82
- }, "cdr3Sequences") // New collection name for CDR3 sequences
83
-
84
- // Add V gene
85
- bundleBuilder.addMulti({
86
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
87
- name: "pl7.app/vdj/geneHit",
88
- domain: {
89
- "pl7.app/vdj/reference": "VGene"
90
- }
91
- }, "VGenes")
92
-
93
- // Add J gene
94
- bundleBuilder.addMulti({
95
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
96
- name: "pl7.app/vdj/geneHit",
97
- domain: {
98
- "pl7.app/vdj/reference": "JGene"
99
- }
100
- }, "JGenes")
101
-
102
- // Add assembling feature aminoacid sequences (bulk, sc, scFv)
103
- bundleBuilder.addMulti({
104
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
105
- annotations: { "pl7.app/vdj/isAssemblingFeature": "true" },
106
- domain: { "pl7.app/alphabet": "aminoacid" }
107
- }, "assemblingAaSeqs")
108
-
109
- return {
110
- columns: bundleBuilder.build()
111
- }
112
- })
113
-
114
- wf.body(func(args) {
115
- // output containers
116
- outputs := {}
117
-
118
- if !is_undefined(args.inputAnchor) {
119
- // Input arguments
120
- columns := args.columns
121
- datasetSpec := columns.getSpec(args.inputAnchor)
122
- topClonotypes := args.topClonotypes
123
-
124
- // Needed conditional variable
125
- isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
126
-
127
- ////////// Clonotype Filtering //////////
128
- // Build clonotype table
129
- cloneTable := pframes.csvFileBuilder()
130
- cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
131
-
132
- // Add Filters to table
133
- addedAxes := []
134
- filterMap := {}
135
- rankingMap := {}
136
- addedCols := false
137
- if len(args.filters) > 0 {
138
- for i, filter in args.filters {
139
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
140
- if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
141
- // Columns added here might also be in ranking list, so we add default IDs
142
- cloneTable.add(columns.getColumn(filter.value.column),
143
- {header: "Filter_" + string(i), id: "filter_" + string(i)})
144
- addedCols = true
145
- // Store reference value and filter type associated to this column
146
- filterMap["Filter_" + string(i)] = filter.filter
147
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
148
-
149
- // If column does not have main anchor axis we have to include theirs
150
- colsSpec := columns.getSpec(filter.value.column)
151
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
152
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
153
- for na, ax in colsSpec.axesSpec {
154
- if ax.name != datasetSpec.axesSpec[1].name {
155
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
156
- addedAxes = append(addedAxes, ax.name)
157
- }
158
- }
159
- }
160
- }
161
- }
162
- }
163
-
164
- // Add ranking columns to table
165
- validRanks := false
166
- if len(args.rankingOrder) > 0 {
167
- for i, col in args.rankingOrder {
168
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
169
- if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
170
- validRanks = true
171
- cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
172
- addedCols = true
173
- // Store ranking order for this column
174
- rankingMap["Col" + string(i)] = col.rankingOrder
175
-
176
- // If column does not have main anchor axis we have to include theirs
177
- colsSpec := columns.getSpec(col.value.column)
178
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
179
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
180
- for na, ax in colsSpec.axesSpec {
181
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
182
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
183
- }
184
- }
185
- }
186
- }
187
- }
188
- }
189
- // If we didn't have any ranking column or all where not valid
190
- if !validRanks {
191
- // @TODO: this is a temporal patch for issue where rankingOrderDefault
192
- // are not defined by the time prerun works
193
- if args.rankingOrderDefault.value != undefined {
194
- i := 0
195
- cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
196
- addedCols = true
197
- // Store default ranking order
198
- rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
199
-
200
- // If column does not have main anchor axis we have to include theirs
201
- colsSpec := columns.getSpec(args.rankingOrderDefault.value.column)
202
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
203
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
204
- for na, ax in colsSpec.axesSpec {
205
- if ax.name != datasetSpec.axesSpec[1].name {
206
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
207
- }
208
- }
209
- }
210
- }
211
- }
212
-
213
- // Get linker columns if needed
214
- linkerAxisSpec := {}
215
- linkerClusterIdAxes := []
216
- if len(columns.getColumns("linkers")) > 0 {
217
- for i, col in columns.getColumns("linkers") {
218
- clusterIdAxis := undefined
219
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
220
- cloneTable.add(col, {header: "linker." + string(i)})
221
- cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
222
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
223
- clusterIdAxis = col.spec.axesSpec[0]
224
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
225
- cloneTable.add(col, {header: "linker." + string(i)})
226
- cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
227
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
228
- clusterIdAxis = col.spec.axesSpec[1]
229
- }
230
- // Collect clusterId axes from linker columns to match cluster size columns
231
- if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
232
- linkerClusterIdAxes = append(linkerClusterIdAxes, clusterIdAxis)
233
- }
234
- addedCols = true
235
- }
236
- }
237
-
238
- // Add cluster size columns if available, but only those matching linker columns' clusterId axes
239
- // This ensures we only join columns from the same clustering run
240
- if len(columns.getColumns("clusterSizes")) > 0 {
241
- clusterSizeIdx := 0
242
- for col in columns.getColumns("clusterSizes") {
243
- // Find the clusterId axis in this cluster size column
244
- clusterSizeClusterIdAxis := undefined
245
- for axis in col.spec.axesSpec {
246
- if axis.name == "pl7.app/vdj/clusterId" {
247
- clusterSizeClusterIdAxis = axis
248
- break
249
- }
250
- }
251
-
252
- // Only add if we have linker columns and this cluster size matches one of them
253
- shouldAdd := false
254
- if len(linkerClusterIdAxes) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
255
- // Check if this cluster size column matches any linker's clusterId axis
256
- for linkerAxis in linkerClusterIdAxes {
257
- // Compare domains - they must match exactly for same clustering run
258
- if clusterSizeClusterIdAxis.name == linkerAxis.name &&
259
- clusterSizeClusterIdAxis.type == linkerAxis.type {
260
- // Check if domains match
261
- domainsMatch := true
262
- if is_undefined(clusterSizeClusterIdAxis.domain) != is_undefined(linkerAxis.domain) {
263
- domainsMatch = false
264
- } else if !is_undefined(clusterSizeClusterIdAxis.domain) && !is_undefined(linkerAxis.domain) {
265
- // Compare domain keys and values
266
- if len(clusterSizeClusterIdAxis.domain) != len(linkerAxis.domain) {
267
- domainsMatch = false
268
- } else {
269
- for k, v in clusterSizeClusterIdAxis.domain {
270
- if is_undefined(linkerAxis.domain[k]) || linkerAxis.domain[k] != v {
271
- domainsMatch = false
272
- break
273
- }
274
- }
275
- }
276
- }
277
- if domainsMatch {
278
- shouldAdd = true
279
- break
280
- }
281
- }
282
- }
283
- }
284
-
285
- // Only add cluster size columns that match a linker column's clustering run
286
- if shouldAdd {
287
- // Trace elements are already present in col.spec from the clustering block.
288
- // deriveLabels (in label.ts) will use these existing trace elements to construct
289
- // distinguishing labels when multiple clustering blocks are joined, similar to
290
- // how LabelTypeFull ('__LABEL__@1') works. The trace includes:
291
- // - Original dataset trace
292
- // - "milaboratories.clonotype-clustering.sequences" trace element
293
- // - "milaboratories.clonotype-clustering.clustering" trace element
294
- // No modification needed - just preserve the existing trace.
295
-
296
- cloneTable.add(col, {header: "clusterSize." + string(clusterSizeIdx)})
297
- addedCols = true
298
- // Add the cluster axis header
299
- for axisIdx, axis in col.spec.axesSpec {
300
- if axis.name != datasetSpec.axesSpec[1].name {
301
- cloneTable.setAxisHeader(axis, "clusterAxis_" + string(clusterSizeIdx) + "_" + string(axisIdx))
302
- }
303
- }
304
- clusterSizeIdx = clusterSizeIdx + 1
305
- }
306
- }
307
- }
308
-
309
- // Continue only if we have at least a column
310
- // This condition prevents temporal intermittent error while filters are
311
- // being processed and possibly in other situations too
312
- if addedCols {
313
- cloneTable.mem("16GiB")
314
- cloneTable.cpu(1)
315
- cloneTable = cloneTable.build()
316
-
317
- // Use ender.create to call the filter-clonotypes template
318
- filterSampleResult := render.create(filterAndSampleTpl, {
319
- inputAnchor: args.inputAnchor,
320
- cloneTable: cloneTable,
321
- rankingOrder: args.rankingOrder,
322
- rankingOrderDefault: args.rankingOrderDefault,
323
- filters: args.filters,
324
- filterMap: filterMap,
325
- rankingMap: rankingMap,
326
- datasetSpec: datasetSpec,
327
- topClonotypes: args.topClonotypes
328
- })
329
-
330
- // Get the filtered clonotypes from the template result
331
- outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
332
-
333
- // Get the filtered and sampled clonotypes P-frame and CSV from the template result
334
- finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
335
- // outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
336
-
337
- ////////// CDR3 Length Calculation //////////
338
-
339
- cdr3SeqTable := pframes.tsvFileBuilder()
340
- cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
341
-
342
- // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
343
- // Chain will be added in the header as cdr3Sequence.chain and used in python script
344
- // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
345
-
346
- // Helper function to add chain information to the headers dynamically
347
- chainMapping := {
348
- "IG": { "A": "Heavy", "B": "Light" },
349
- "TCRAB": { "A": "TRA", "B": "TRB" },
350
- "TCRGD": { "A": "TRG", "B": "TRD" }
351
- }
352
-
353
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
354
- if isSingleCell {
355
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
356
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
357
- chainLabel := chainMapping[receptor][chain]
358
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
359
- } else {
360
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
361
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
362
- if chainFromDomain != undefined {
363
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
364
- }
365
- }
366
- return baseHeaderName
367
- };
368
-
369
- // Process CDR3 sequences
370
- cdr3Sequences := columns.getColumns("cdr3Sequences")
371
-
372
- for col in cdr3Sequences {
373
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
374
- if isSingleCell {
375
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
376
- cdr3SeqTable.add(col, {header: headerName})
377
- }
378
- } else {
379
- cdr3SeqTable.add(col, {header: headerName})
380
- }
381
- }
382
-
383
- // Process V genes
384
- vGenes := columns.getColumns("VGenes")
385
-
386
- for col in vGenes {
387
- headerName := makeHeaderName(col, "vGene", isSingleCell)
388
- cdr3SeqTable.add(col, {header: headerName})
389
- }
390
-
391
- // Process J genes
392
- jGenes := columns.getColumns("JGenes")
393
-
394
- for col in jGenes {
395
- headerName := makeHeaderName(col, "jGene", isSingleCell)
396
- cdr3SeqTable.add(col, {header: headerName})
397
- }
398
-
399
- cdr3SeqTable.mem("16GiB")
400
- cdr3SeqTable.cpu(1)
401
- cdr3SeqTableBuilt := cdr3SeqTable.build()
402
-
403
- cdr3VspectratypeCmd := exec.builder().
404
- software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
405
- mem("16GiB").
406
- cpu(1).
407
- addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
408
- arg("--input_tsv").arg("cdr3_sequences_input.tsv").
409
- arg("--spectratype_tsv").arg("spectratype.tsv").
410
- arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
411
-
412
- // Add top clonotypes argument and file to the builder if provided
413
- if finalClonotypesCsv != undefined {
414
- cdr3VspectratypeCmd = cdr3VspectratypeCmd.
415
- arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
416
- addFile("finalClonotypes.csv", finalClonotypesCsv)
417
- }
418
-
419
- cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
420
- saveFile("spectratype.tsv").
421
- saveFile("vj_usage.tsv").
422
- printErrStreamToStdout().
423
- cache(24 * 60 * 60 * 1000).
424
- run()
425
-
426
-
427
- // Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
428
-
429
- cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
430
- "tsv", spectratypeConv.getColumns(),
431
- {cpu: 1, mem: "16GiB"})
432
- outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
433
-
434
- // For vjUsage structure is [chain][vGene][jGene] -> count
435
- vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
436
- "tsv", vjUsageConv.getColumns(),
437
- {cpu: 1, mem: "16GiB"})
438
- outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
439
-
440
- if args.kabatNumbering == true {
441
- ////////// Assembling AA sequences //////////
442
- assemSeqTable := pframes.tsvFileBuilder()
443
- keyHeader := "clonotypeKey"
444
- assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
445
-
446
- seqCols := columns.getColumns("assemblingAaSeqs")
447
- for col in seqCols {
448
- headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
449
- assemSeqTable.add(col, {header: headerName})
450
- }
451
-
452
- assemSeqTable.mem("16GiB")
453
- assemSeqTable.cpu(1)
454
- assemSeqTableBuilt := assemSeqTable.build()
455
-
456
- // Convert assembling feature sequences to FASTA via sub-template
457
- assemFastaTpl := assets.importTemplate(":assembling-fasta")
458
- bulkChain := undefined
459
- if !isSingleCell {
460
- // infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
461
- chainDetected := "KL"
462
- for col in seqCols {
463
- ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
464
- if ch == "IGHeavy" { chainDetected = "H"; break }
465
- if ch == "IGLight" { chainDetected = "KL" }
466
- }
467
- bulkChain = chainDetected
468
- }
469
- assem := render.create(assemFastaTpl, {
470
- inputTsv: assemSeqTableBuilt,
471
- keyColumn: "clonotypeKey",
472
- finalClonotypesCsv: finalClonotypesCsv,
473
- isSingleCell: isSingleCell,
474
- bulkChain: bulkChain
475
- })
476
- //outputs["assemblingAnarci"] = assem.output("anarci", 24 * 60 * 60 * 1000)
477
- kabatFile := assem.output("kabat", 24 * 60 * 60 * 1000)
478
- // Derive feature name from assembling feature columns (prefer first column's feature)
479
- featName := ""
480
- if len(seqCols) > 0 {
481
- f := seqCols[0].spec.domain["pl7.app/vdj/feature"]
482
- if f != undefined { featName = f }
483
- }
484
- // Convert kabat.tsv to PFrame with proper specs (bulk: select heavy/light)
485
- kabatPf := xsv.importFile(kabatFile, "tsv", kabatConv.getColumns(datasetSpec, featName, bulkChain), {cpu: 1, mem: "8GiB"})
486
- outputs["assemblingKabatPf"] = pframes.exportFrame(kabatPf)
487
- }
488
- }
489
- }
490
-
491
- return {
492
- outputs: outputs,
493
- exports: {}
494
- }
495
- })