@platforma-open/milaboratories.top-antibodies.workflow 1.15.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,493 +0,0 @@
1
- // Prerun template for clonotype filtering
2
- wf := import("@platforma-sdk/workflow-tengo:workflow")
3
- exec := import("@platforma-sdk/workflow-tengo:exec")
4
- assets := import("@platforma-sdk/workflow-tengo:assets")
5
- xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
6
- pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
- slices := import("@platforma-sdk/workflow-tengo:slices")
8
- render := import("@platforma-sdk/workflow-tengo:render")
9
- ll := import("@platforma-sdk/workflow-tengo:ll")
10
- kabatConv := import(":pf-kabat-conv")
11
-
12
- spectratypeConv := import(":pf-spectratype-conv")
13
- vjUsageConv := import(":pf-vj-usage-conv")
14
-
15
- filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
16
-
17
- wf.prepare(func(args){
18
- if is_undefined(args.inputAnchor) {
19
- return {
20
- columns: wf.createPBundleBuilder().build()
21
- }
22
- }
23
- // We need a table with cluster ID (optional) | clonotype id | selected ranking columns
24
- bundleBuilder := wf.createPBundleBuilder()
25
- bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
26
- bundleBuilder.addAnchor("main", args.inputAnchor)
27
-
28
- validRanks := false
29
- if len(args.rankingOrder) > 0 {
30
- for col in args.rankingOrder {
31
- // For cases where the user is selecting the table to filter
32
- if col.value != undefined {
33
- bundleBuilder.addAnchor(col.value.anchorName, col.value.anchorRef)
34
- bundleBuilder.addSingle(col.value.column)
35
- validRanks = true
36
- }
37
- }
38
- }
39
- if !validRanks {
40
- // @TODO: this is a temporal patch for issue where rankingOrderDefault
41
- // are not defined by the time prerun works
42
- // prerun sometimes runs before this variable is ready
43
- if args.rankingOrderDefault.value != undefined {
44
- bundleBuilder.addAnchor(args.rankingOrderDefault.value.anchorName,
45
- args.rankingOrderDefault.value.anchorRef)
46
- bundleBuilder.addSingle(args.rankingOrderDefault.value.column)
47
- }
48
- }
49
-
50
- // Load filter columns
51
- if len(args.filters) > 0 {
52
- for filter in args.filters {
53
- if filter.value != undefined {
54
- bundleBuilder.addAnchor(filter.value.anchorName, filter.value.anchorRef)
55
- bundleBuilder.addSingle(filter.value.column)
56
- }
57
- }
58
- }
59
-
60
-
61
- // Add linker column
62
- bundleBuilder.addMulti({
63
- axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
64
- annotations: { "pl7.app/isLinkerColumn": "true" },
65
- partialAxesMatch: true
66
- }, "linkers")
67
-
68
- // Add cluster size columns from clustering blocks
69
- bundleBuilder.addMulti({
70
- name: "pl7.app/vdj/clustering/clusterSize",
71
- partialAxesMatch: true
72
- }, "clusterSizes")
73
-
74
- // Add CDR3 sequences
75
- bundleBuilder.addMulti({
76
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
77
- name: "pl7.app/vdj/sequence",
78
- domain: {
79
- "pl7.app/alphabet": "aminoacid",
80
- "pl7.app/vdj/feature": "CDR3" // Specify CDR3 feature
81
- }
82
- }, "cdr3Sequences") // New collection name for CDR3 sequences
83
-
84
- // Add V gene
85
- bundleBuilder.addMulti({
86
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
87
- name: "pl7.app/vdj/geneHit",
88
- domain: {
89
- "pl7.app/vdj/reference": "VGene"
90
- }
91
- }, "VGenes")
92
-
93
- // Add J gene
94
- bundleBuilder.addMulti({
95
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
96
- name: "pl7.app/vdj/geneHit",
97
- domain: {
98
- "pl7.app/vdj/reference": "JGene"
99
- }
100
- }, "JGenes")
101
-
102
- // Add assembling feature aminoacid sequences (bulk, sc, scFv)
103
- bundleBuilder.addMulti({
104
- axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
105
- annotations: { "pl7.app/vdj/isAssemblingFeature": "true" },
106
- domain: { "pl7.app/alphabet": "aminoacid" }
107
- }, "assemblingAaSeqs")
108
-
109
- return {
110
- columns: bundleBuilder.build()
111
- }
112
- })
113
-
114
- wf.body(func(args) {
115
- // output containers
116
- outputs := {}
117
-
118
- if !is_undefined(args.inputAnchor) {
119
- // Input arguments
120
- columns := args.columns
121
- datasetSpec := columns.getSpec(args.inputAnchor)
122
- topClonotypes := args.topClonotypes
123
-
124
- // Needed conditional variable
125
- isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
126
-
127
- ////////// Clonotype Filtering //////////
128
- // Build clonotype table
129
- cloneTable := pframes.csvFileBuilder()
130
- cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
131
-
132
- // Add Filters to table
133
- addedAxes := []
134
- filterMap := {}
135
- rankingMap := {}
136
- addedCols := false
137
- if len(args.filters) > 0 {
138
- for i, filter in args.filters {
139
- if filter.value != undefined {
140
- // Columns added here might also be in ranking list, so we add default IDs
141
- cloneTable.add(columns.getColumn(filter.value.column),
142
- {header: "Filter_" + string(i), id: "filter_" + string(i)})
143
- addedCols = true
144
- // Store reference value and filter type associated to this column
145
- filterMap["Filter_" + string(i)] = filter.filter
146
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
147
-
148
- // If column does not have main anchor axis we have to include theirs
149
- colsSpec := columns.getSpec(filter.value.column)
150
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
151
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
152
- for na, ax in colsSpec.axesSpec {
153
- if ax.name != datasetSpec.axesSpec[1].name {
154
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
155
- addedAxes = append(addedAxes, ax.name)
156
- }
157
- }
158
- }
159
- }
160
- }
161
- }
162
-
163
- // Add ranking columns to table
164
- validRanks := false
165
- if len(args.rankingOrder) > 0 {
166
- for i, col in args.rankingOrder {
167
- if col.value != undefined {
168
- validRanks = true
169
- cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
170
- addedCols = true
171
- // Store ranking order for this column
172
- rankingMap["Col" + string(i)] = col.rankingOrder
173
-
174
- // If column does not have main anchor axis we have to include theirs
175
- colsSpec := columns.getSpec(col.value.column)
176
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
177
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
178
- for na, ax in colsSpec.axesSpec {
179
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
180
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
181
- }
182
- }
183
- }
184
- }
185
- }
186
- }
187
- // If we didn't have any ranking column or all where not valid
188
- if !validRanks {
189
- // @TODO: this is a temporal patch for issue where rankingOrderDefault
190
- // are not defined by the time prerun works
191
- if args.rankingOrderDefault.value != undefined {
192
- i := 0
193
- cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
194
- addedCols = true
195
- // Store default ranking order
196
- rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
197
-
198
- // If column does not have main anchor axis we have to include theirs
199
- colsSpec := columns.getSpec(args.rankingOrderDefault.value.column)
200
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
201
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
202
- for na, ax in colsSpec.axesSpec {
203
- if ax.name != datasetSpec.axesSpec[1].name {
204
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
205
- }
206
- }
207
- }
208
- }
209
- }
210
-
211
- // Get linker columns if needed
212
- linkerAxisSpec := {}
213
- linkerClusterIdAxes := []
214
- if len(columns.getColumns("linkers")) > 0 {
215
- for i, col in columns.getColumns("linkers") {
216
- clusterIdAxis := undefined
217
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
218
- cloneTable.add(col, {header: "linker." + string(i)})
219
- cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
220
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
221
- clusterIdAxis = col.spec.axesSpec[0]
222
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
223
- cloneTable.add(col, {header: "linker." + string(i)})
224
- cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
225
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
226
- clusterIdAxis = col.spec.axesSpec[1]
227
- }
228
- // Collect clusterId axes from linker columns to match cluster size columns
229
- if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
230
- linkerClusterIdAxes = append(linkerClusterIdAxes, clusterIdAxis)
231
- }
232
- addedCols = true
233
- }
234
- }
235
-
236
- // Add cluster size columns if available, but only those matching linker columns' clusterId axes
237
- // This ensures we only join columns from the same clustering run
238
- if len(columns.getColumns("clusterSizes")) > 0 {
239
- clusterSizeIdx := 0
240
- for col in columns.getColumns("clusterSizes") {
241
- // Find the clusterId axis in this cluster size column
242
- clusterSizeClusterIdAxis := undefined
243
- for axis in col.spec.axesSpec {
244
- if axis.name == "pl7.app/vdj/clusterId" {
245
- clusterSizeClusterIdAxis = axis
246
- break
247
- }
248
- }
249
-
250
- // Only add if we have linker columns and this cluster size matches one of them
251
- shouldAdd := false
252
- if len(linkerClusterIdAxes) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
253
- // Check if this cluster size column matches any linker's clusterId axis
254
- for linkerAxis in linkerClusterIdAxes {
255
- // Compare domains - they must match exactly for same clustering run
256
- if clusterSizeClusterIdAxis.name == linkerAxis.name &&
257
- clusterSizeClusterIdAxis.type == linkerAxis.type {
258
- // Check if domains match
259
- domainsMatch := true
260
- if is_undefined(clusterSizeClusterIdAxis.domain) != is_undefined(linkerAxis.domain) {
261
- domainsMatch = false
262
- } else if !is_undefined(clusterSizeClusterIdAxis.domain) && !is_undefined(linkerAxis.domain) {
263
- // Compare domain keys and values
264
- if len(clusterSizeClusterIdAxis.domain) != len(linkerAxis.domain) {
265
- domainsMatch = false
266
- } else {
267
- for k, v in clusterSizeClusterIdAxis.domain {
268
- if is_undefined(linkerAxis.domain[k]) || linkerAxis.domain[k] != v {
269
- domainsMatch = false
270
- break
271
- }
272
- }
273
- }
274
- }
275
- if domainsMatch {
276
- shouldAdd = true
277
- break
278
- }
279
- }
280
- }
281
- }
282
-
283
- // Only add cluster size columns that match a linker column's clustering run
284
- if shouldAdd {
285
- // Trace elements are already present in col.spec from the clustering block.
286
- // deriveLabels (in label.ts) will use these existing trace elements to construct
287
- // distinguishing labels when multiple clustering blocks are joined, similar to
288
- // how LabelTypeFull ('__LABEL__@1') works. The trace includes:
289
- // - Original dataset trace
290
- // - "milaboratories.clonotype-clustering.sequences" trace element
291
- // - "milaboratories.clonotype-clustering.clustering" trace element
292
- // No modification needed - just preserve the existing trace.
293
-
294
- cloneTable.add(col, {header: "clusterSize." + string(clusterSizeIdx)})
295
- addedCols = true
296
- // Add the cluster axis header
297
- for axisIdx, axis in col.spec.axesSpec {
298
- if axis.name != datasetSpec.axesSpec[1].name {
299
- cloneTable.setAxisHeader(axis, "clusterAxis_" + string(clusterSizeIdx) + "_" + string(axisIdx))
300
- }
301
- }
302
- clusterSizeIdx = clusterSizeIdx + 1
303
- }
304
- }
305
- }
306
-
307
- // Continue only if we have at least a column
308
- // This condition prevents temporal intermittent error while filters are
309
- // being processed and possibly in other situations too
310
- if addedCols {
311
- cloneTable.mem("16GiB")
312
- cloneTable.cpu(1)
313
- cloneTable = cloneTable.build()
314
-
315
- // Use ender.create to call the filter-clonotypes template
316
- filterSampleResult := render.create(filterAndSampleTpl, {
317
- inputAnchor: args.inputAnchor,
318
- cloneTable: cloneTable,
319
- rankingOrder: args.rankingOrder,
320
- rankingOrderDefault: args.rankingOrderDefault,
321
- filters: args.filters,
322
- filterMap: filterMap,
323
- rankingMap: rankingMap,
324
- datasetSpec: datasetSpec,
325
- topClonotypes: args.topClonotypes
326
- })
327
-
328
- // Get the filtered clonotypes from the template result
329
- outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
330
-
331
- // Get the filtered and sampled clonotypes P-frame and CSV from the template result
332
- finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
333
- // outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
334
-
335
- ////////// CDR3 Length Calculation //////////
336
-
337
- cdr3SeqTable := pframes.tsvFileBuilder()
338
- cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
339
-
340
- // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
341
- // Chain will be added in the header as cdr3Sequence.chain and used in python script
342
- // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
343
-
344
- // Helper function to add chain information to the headers dynamically
345
- chainMapping := {
346
- "IG": { "A": "Heavy", "B": "Light" },
347
- "TCRAB": { "A": "TRA", "B": "TRB" },
348
- "TCRGD": { "A": "TRG", "B": "TRD" }
349
- }
350
-
351
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
352
- if isSingleCell {
353
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
354
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
355
- chainLabel := chainMapping[receptor][chain]
356
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
357
- } else {
358
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
359
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
360
- if chainFromDomain != undefined {
361
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
362
- }
363
- }
364
- return baseHeaderName
365
- };
366
-
367
- // Process CDR3 sequences
368
- cdr3Sequences := columns.getColumns("cdr3Sequences")
369
-
370
- for col in cdr3Sequences {
371
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
372
- if isSingleCell {
373
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
374
- cdr3SeqTable.add(col, {header: headerName})
375
- }
376
- } else {
377
- cdr3SeqTable.add(col, {header: headerName})
378
- }
379
- }
380
-
381
- // Process V genes
382
- vGenes := columns.getColumns("VGenes")
383
-
384
- for col in vGenes {
385
- headerName := makeHeaderName(col, "vGene", isSingleCell)
386
- cdr3SeqTable.add(col, {header: headerName})
387
- }
388
-
389
- // Process J genes
390
- jGenes := columns.getColumns("JGenes")
391
-
392
- for col in jGenes {
393
- headerName := makeHeaderName(col, "jGene", isSingleCell)
394
- cdr3SeqTable.add(col, {header: headerName})
395
- }
396
-
397
- cdr3SeqTable.mem("16GiB")
398
- cdr3SeqTable.cpu(1)
399
- cdr3SeqTableBuilt := cdr3SeqTable.build()
400
-
401
- cdr3VspectratypeCmd := exec.builder().
402
- software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
403
- mem("16GiB").
404
- cpu(1).
405
- addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
406
- arg("--input_tsv").arg("cdr3_sequences_input.tsv").
407
- arg("--spectratype_tsv").arg("spectratype.tsv").
408
- arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
409
-
410
- // Add top clonotypes argument and file to the builder if provided
411
- if finalClonotypesCsv != undefined {
412
- cdr3VspectratypeCmd = cdr3VspectratypeCmd.
413
- arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
414
- addFile("finalClonotypes.csv", finalClonotypesCsv)
415
- }
416
-
417
- cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
418
- saveFile("spectratype.tsv").
419
- saveFile("vj_usage.tsv").
420
- printErrStreamToStdout().
421
- cache(24 * 60 * 60 * 1000).
422
- run()
423
-
424
-
425
- // Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
426
-
427
- cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
428
- "tsv", spectratypeConv.getColumns(),
429
- {cpu: 1, mem: "16GiB"})
430
- outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
431
-
432
- // For vjUsage structure is [chain][vGene][jGene] -> count
433
- vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
434
- "tsv", vjUsageConv.getColumns(),
435
- {cpu: 1, mem: "16GiB"})
436
- outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
437
-
438
- if args.kabatNumbering == true {
439
- ////////// Assembling AA sequences //////////
440
- assemSeqTable := pframes.tsvFileBuilder()
441
- keyHeader := "clonotypeKey"
442
- assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
443
-
444
- seqCols := columns.getColumns("assemblingAaSeqs")
445
- for col in seqCols {
446
- headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
447
- assemSeqTable.add(col, {header: headerName})
448
- }
449
-
450
- assemSeqTable.mem("16GiB")
451
- assemSeqTable.cpu(1)
452
- assemSeqTableBuilt := assemSeqTable.build()
453
-
454
- // Convert assembling feature sequences to FASTA via sub-template
455
- assemFastaTpl := assets.importTemplate(":assembling-fasta")
456
- bulkChain := undefined
457
- if !isSingleCell {
458
- // infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
459
- chainDetected := "KL"
460
- for col in seqCols {
461
- ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
462
- if ch == "IGHeavy" { chainDetected = "H"; break }
463
- if ch == "IGLight" { chainDetected = "KL" }
464
- }
465
- bulkChain = chainDetected
466
- }
467
- assem := render.create(assemFastaTpl, {
468
- inputTsv: assemSeqTableBuilt,
469
- keyColumn: "clonotypeKey",
470
- finalClonotypesCsv: finalClonotypesCsv,
471
- isSingleCell: isSingleCell,
472
- bulkChain: bulkChain
473
- })
474
- //outputs["assemblingAnarci"] = assem.output("anarci", 24 * 60 * 60 * 1000)
475
- kabatFile := assem.output("kabat", 24 * 60 * 60 * 1000)
476
- // Derive feature name from assembling feature columns (prefer first column's feature)
477
- featName := ""
478
- if len(seqCols) > 0 {
479
- f := seqCols[0].spec.domain["pl7.app/vdj/feature"]
480
- if f != undefined { featName = f }
481
- }
482
- // Convert kabat.tsv to PFrame with proper specs (bulk: select heavy/light)
483
- kabatPf := xsv.importFile(kabatFile, "tsv", kabatConv.getColumns(datasetSpec, featName, bulkChain), {cpu: 1, mem: "8GiB"})
484
- outputs["assemblingKabatPf"] = pframes.exportFrame(kabatPf)
485
- }
486
- }
487
- }
488
-
489
- return {
490
- outputs: outputs,
491
- exports: {}
492
- }
493
- })