@platforma-open/milaboratories.top-antibodies.workflow 1.16.0 → 1.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,12 +49,17 @@ wf.prepare(func(args){
49
49
  }
50
50
 
51
51
 
52
- // Add linker column
52
+ // Add linker columns
53
53
  bundleBuilder.addMulti({
54
54
  axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
55
55
  annotations: { "pl7.app/isLinkerColumn": "true" },
56
56
  partialAxesMatch: true
57
57
  }, "linkers")
58
+
59
+ // Add clusterColumn as a named anchor for matching in body phase
60
+ if !is_undefined(args.clusterColumn) {
61
+ bundleBuilder.addAnchor("selectedCluster", args.clusterColumn)
62
+ }
58
63
 
59
64
  // Add cluster size columns from clustering blocks
60
65
  bundleBuilder.addMulti({
@@ -116,180 +121,19 @@ wf.body(func(args) {
116
121
  isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
117
122
 
118
123
  ////////// Clonotype Filtering //////////
119
- // Build clonotype table
120
- cloneTable := pframes.parquetFileBuilder()
121
- cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
122
-
123
- // Add Filters to table
124
- addedAxes := []
125
- filterMap := {}
126
- rankingMap := {}
127
- addedCols := false
128
- if len(args.filters) > 0 {
129
- for i, filter in args.filters {
130
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
131
- if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
132
- // Columns added here might also be in ranking list, so we add default IDs
133
- cloneTable.add(columns.getColumn(filter.value.column),
134
- {header: "Filter_" + string(i), id: "filter_" + string(i)})
135
- addedCols = true
136
- // Store reference value and filter type associated to this column
137
- filterMap["Filter_" + string(i)] = filter.filter
138
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
139
-
140
- // If column does not have main anchor axis we have to include theirs
141
- colsSpec := columns.getSpec(filter.value.column)
142
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
143
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
144
- for na, ax in colsSpec.axesSpec {
145
- if ax.name != datasetSpec.axesSpec[1].name {
146
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
147
- addedAxes = append(addedAxes, ax.name)
148
- }
149
- }
150
- }
151
- }
152
- }
153
- }
154
-
155
- // Add ranking columns to table
156
- validRanks := false
157
- clusterPropertyIdx := 0
158
- clonotypePropertyIdx := 0
159
-
160
- if len(args.rankingOrder) > 0 {
161
- for i, col in args.rankingOrder {
162
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
163
- if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
164
- validRanks = true
165
-
166
- // Process the ranking column to determine header and cluster axis
167
- colsSpec := columns.getSpec(col.value.column)
168
- linkerColumns := columns.getColumns("linkers")
169
- result := utils.processRankingColumn(colsSpec, datasetSpec.axesSpec[1].name, linkerColumns, clusterPropertyIdx)
170
-
171
- header := ""
172
- if result.isClusterProperty {
173
- header = result.header
174
- clusterPropertyIdx = result.newClusterPropertyIdx
175
-
176
- // Add cluster axis with matching index
177
- for na, ax in colsSpec.axesSpec {
178
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
179
- axisHeader := "cluster_" + string(result.clusterAxisIdx)
180
- cloneTable.setAxisHeader(ax, axisHeader)
181
- addedAxes = append(addedAxes, ax.name)
182
- }
183
- }
184
- } else {
185
- header = "Col" + string(clonotypePropertyIdx)
186
- clonotypePropertyIdx = clonotypePropertyIdx + 1
187
- }
188
-
189
- cloneTable.add(columns.getColumn(col.value.column), {header: header})
190
- addedCols = true
191
- rankingMap[header] = col.rankingOrder
192
- }
193
- }
194
- }
195
-
196
- // Get linker columns if needed
197
- linkerAxisSpec := {}
198
- linkerClusterIdAxes := []
199
- if len(columns.getColumns("linkers")) > 0 {
200
- for i, col in columns.getColumns("linkers") {
201
- clusterIdAxis := undefined
202
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
203
- cloneTable.add(col, {header: "linker." + string(i)})
204
- cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
205
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
206
- clusterIdAxis = col.spec.axesSpec[0]
207
- addedCols = true
208
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
209
- cloneTable.add(col, {header: "linker." + string(i)})
210
- cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
211
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
212
- clusterIdAxis = col.spec.axesSpec[1]
213
- addedCols = true
214
- }
215
- // Collect clusterId axes from linker columns to match cluster size columns
216
- if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
217
- linkerClusterIdAxes = append(linkerClusterIdAxes, clusterIdAxis)
218
- }
219
- }
220
- }
221
-
222
- // Add cluster size columns if available, but only those matching linker columns' clusterId axes
223
- // This ensures we only join columns from the same clustering run
224
- if len(columns.getColumns("clusterSizes")) > 0 {
225
- clusterSizeIdx := 0
226
- for col in columns.getColumns("clusterSizes") {
227
- // Find the clusterId axis in this cluster size column
228
- clusterSizeClusterIdAxis := undefined
229
- for axis in col.spec.axesSpec {
230
- if axis.name == "pl7.app/vdj/clusterId" {
231
- clusterSizeClusterIdAxis = axis
232
- break
233
- }
234
- }
235
-
236
- // Only add if we have linker columns and this cluster size matches one of them
237
- shouldAdd := false
238
- if len(linkerClusterIdAxes) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
239
- // Check if this cluster size column matches any linker's clusterId axis
240
- for linkerAxis in linkerClusterIdAxes {
241
- // Compare domains - they must match exactly for same clustering run
242
- if clusterSizeClusterIdAxis.name == linkerAxis.name &&
243
- clusterSizeClusterIdAxis.type == linkerAxis.type &&
244
- utils.clusterAxisDomainsMatch(clusterSizeClusterIdAxis, linkerAxis) {
245
- shouldAdd = true
246
- break
247
- }
248
- }
249
- }
250
-
251
- // Only add cluster size columns that match a linker column's clustering run
252
- if shouldAdd {
253
- // Trace elements are already present in col.spec from the clustering block.
254
- // deriveLabels (in label.ts) will use these existing trace elements to construct
255
- // distinguishing labels when multiple clustering blocks are joined, similar to
256
- // how LabelTypeFull ('__LABEL__@1') works. The trace includes:
257
- // - Original dataset trace
258
- // - "milaboratories.clonotype-clustering.sequences" trace element
259
- // - "milaboratories.clonotype-clustering.clustering" trace element
260
- // No modification needed - just preserve the existing trace.
261
-
262
- cloneTable.add(col, {header: "clusterSize." + string(clusterSizeIdx)})
263
- addedCols = true
264
- // Add the cluster axis header
265
- for axisIdx, axis in col.spec.axesSpec {
266
- if axis.name != datasetSpec.axesSpec[1].name {
267
- cloneTable.setAxisHeader(axis, "clusterAxis_" + string(clusterSizeIdx) + "_" + string(axisIdx))
268
- }
269
- }
270
- clusterSizeIdx = clusterSizeIdx + 1
271
- }
272
- }
273
- }
274
-
275
- // Fallback: if no columns have been added yet, add at least one CDR3 sequence column
276
- // This ensures the table can be built even when no filters/ranking columns are specified
277
- if !addedCols {
278
- cdr3Sequences := columns.getColumns("cdr3Sequences")
279
- if len(cdr3Sequences) > 0 {
280
- // Add the first CDR3 sequence as a fallback column
281
- cloneTable.add(cdr3Sequences[0], {header: "cdr3_fallback"})
282
- addedCols = true
283
- }
284
- }
124
+ // Initialize and build clone table with all columns
125
+ tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec)
126
+ cloneTable := tableInit.cloneTable
127
+ filterMap := tableInit.filterMap
128
+ rankingMap := tableInit.rankingMap
129
+ sortedLinkers := tableInit.sortedLinkers
130
+ clusterColumnHeader := tableInit.clusterColumnHeader
131
+ addedCols := tableInit.addedCols
285
132
 
286
133
  // Continue only if we have at least a column
287
134
  // This condition prevents temporal intermittent error while filters are
288
135
  // being processed and possibly in other situations too
289
136
  if addedCols {
290
- cloneTable.mem("16GiB")
291
- cloneTable.cpu(1)
292
- cloneTable = cloneTable.build()
293
137
 
294
138
  // Use ender.create to call the filter-clonotypes template
295
139
  filterSampleResult := render.create(filterAndSampleTpl, {
@@ -302,7 +146,7 @@ wf.body(func(args) {
302
146
  datasetSpec: datasetSpec,
303
147
  topClonotypes: args.topClonotypes,
304
148
  disableClusterRanking: args.disableClusterRanking,
305
- clusterColumn: args.clusterColumn
149
+ clusterColumn: clusterColumnHeader
306
150
  })
307
151
 
308
152
  // Get the filtered clonotypes from the template result
@@ -312,70 +156,8 @@ wf.body(func(args) {
312
156
  finalClonotypes := filterSampleResult.output("finalClonotypes", 24 * 60 * 60 * 1000)
313
157
 
314
158
  ////////// CDR3 Length Calculation //////////
315
-
316
- cdr3SeqTable := pframes.parquetFileBuilder()
317
- cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
318
-
319
- // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
320
- // Chain will be added in the header as cdr3Sequence.chain and used in python script
321
- // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
322
-
323
- // Helper function to add chain information to the headers dynamically
324
- chainMapping := {
325
- "IG": { "A": "Heavy", "B": "Light" },
326
- "TCRAB": { "A": "TRA", "B": "TRB" },
327
- "TCRGD": { "A": "TRG", "B": "TRD" }
328
- }
329
-
330
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
331
- if isSingleCell {
332
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
333
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
334
- chainLabel := chainMapping[receptor][chain]
335
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
336
- } else {
337
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
338
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
339
- if chainFromDomain != undefined {
340
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
341
- }
342
- }
343
- return baseHeaderName
344
- };
345
-
346
- // Process CDR3 sequences
347
- cdr3Sequences := columns.getColumns("cdr3Sequences")
348
-
349
- for col in cdr3Sequences {
350
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
351
- if isSingleCell {
352
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
353
- cdr3SeqTable.add(col, {header: headerName})
354
- }
355
- } else {
356
- cdr3SeqTable.add(col, {header: headerName})
357
- }
358
- }
359
-
360
- // Process V genes
361
- vGenes := columns.getColumns("VGenes")
362
-
363
- for col in vGenes {
364
- headerName := makeHeaderName(col, "vGene", isSingleCell)
365
- cdr3SeqTable.add(col, {header: headerName})
366
- }
367
-
368
- // Process J genes
369
- jGenes := columns.getColumns("JGenes")
370
-
371
- for col in jGenes {
372
- headerName := makeHeaderName(col, "jGene", isSingleCell)
373
- cdr3SeqTable.add(col, {header: headerName})
374
- }
375
-
376
- cdr3SeqTable.mem("16GiB")
377
- cdr3SeqTable.cpu(1)
378
- cdr3SeqTableBuilt := cdr3SeqTable.build()
159
+ // Initialize and build CDR3 sequence table
160
+ cdr3SeqTableBuilt := utils.initializeCdr3SeqTable(pframes, columns, datasetSpec, isSingleCell)
379
161
 
380
162
  cdr3VspectratypeCmd := exec.builder().
381
163
  software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
@@ -416,33 +198,14 @@ wf.body(func(args) {
416
198
 
417
199
  if args.kabatNumbering == true {
418
200
  ////////// Assembling AA sequences //////////
419
- assemSeqTable := pframes.parquetFileBuilder()
420
- keyHeader := "clonotypeKey"
421
- assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
422
-
423
- seqCols := columns.getColumns("assemblingAaSeqs")
424
- for col in seqCols {
425
- headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
426
- assemSeqTable.add(col, {header: headerName})
427
- }
428
-
429
- assemSeqTable.mem("16GiB")
430
- assemSeqTable.cpu(1)
431
- assemSeqTableBuilt := assemSeqTable.build()
201
+ // Initialize and build assembling sequence table
202
+ assemInit := utils.initializeAssemSeqTable(pframes, columns, datasetSpec, isSingleCell)
203
+ assemSeqTableBuilt := assemInit.assemSeqTable
204
+ bulkChain := assemInit.bulkChain
205
+ seqCols := assemInit.seqCols
432
206
 
433
207
  // Convert assembling feature sequences to FASTA via sub-template
434
208
  assemFastaTpl := assets.importTemplate(":assembling-fasta")
435
- bulkChain := undefined
436
- if !isSingleCell {
437
- // infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
438
- chainDetected := "KL"
439
- for col in seqCols {
440
- ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
441
- if ch == "IGHeavy" { chainDetected = "H"; break }
442
- if ch == "IGLight" { chainDetected = "KL" }
443
- }
444
- bulkChain = chainDetected
445
- }
446
209
  assem := render.create(assemFastaTpl, {
447
210
  inputTsv: assemSeqTableBuilt,
448
211
  keyColumn: "clonotypeKey",