@platforma-open/milaboratories.top-antibodies.workflow 1.17.0 → 1.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@1.17.0 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@1.17.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/assembling-fasta.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 1.17.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 0b57c1b: Show only specific columns be default: Clone, Cluster Id, AA sequence and filter/rank columns
8
+ - Updated dependencies [0b57c1b]
9
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@1.9.1
10
+
3
11
  ## 1.17.0
4
12
 
5
13
  ### Minor Changes
@@ -100,6 +100,7 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
100
100
 
101
101
  return {
102
102
  isClusterProperty: false,
103
+ isLinkerColumn: false,
103
104
  header: undefined,
104
105
  clusterAxisIdx: undefined,
105
106
  newClusterPropertyIdx: clusterPropertyIdx
@@ -112,11 +113,14 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
112
113
  header := ""
113
114
  clusterAxisIdx := undefined
114
115
  newClusterPropertyIdx := clusterPropertyIdx
116
+ isLinkerColumn := false
115
117
 
116
118
  if linkerIdx != undefined {
117
119
 
120
+
118
121
  header = "Col_linker." + string(linkerIdx)
119
122
  clusterAxisIdx = linkerIdx
123
+ isLinkerColumn = true
120
124
  } else {
121
125
 
122
126
  header = "Col_cluster." + string(clusterPropertyIdx)
@@ -126,14 +130,430 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
126
130
 
127
131
  return {
128
132
  isClusterProperty: true,
133
+ isLinkerColumn: isLinkerColumn,
129
134
  header: header,
130
135
  clusterAxisIdx: clusterAxisIdx,
131
136
  newClusterPropertyIdx: newClusterPropertyIdx
132
137
  }
133
138
  }
134
139
 
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ buildSortedLinkers := func(columns, datasetSpec) {
148
+ allLinkersUnsorted := columns.getColumns("linkers")
149
+
150
+
151
+ sortedLinkers := []
152
+
153
+ for col in allLinkersUnsorted {
154
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
155
+ sortedLinkers = append(sortedLinkers, col)
156
+ }
157
+ }
158
+
159
+ for col in allLinkersUnsorted {
160
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
161
+ sortedLinkers = append(sortedLinkers, col)
162
+ }
163
+ }
164
+
165
+ return sortedLinkers
166
+ }
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
177
+ if is_undefined(args.clusterColumn) {
178
+ return undefined
179
+ }
180
+
181
+
182
+ selectedLinkerSpec := columns.getSpec(args.clusterColumn)
183
+ if is_undefined(selectedLinkerSpec) {
184
+ return undefined
185
+ }
186
+
187
+
188
+ selectedClusterIdAxis := undefined
189
+ for axis in selectedLinkerSpec.axesSpec {
190
+ if axis.name == "pl7.app/vdj/clusterId" {
191
+ selectedClusterIdAxis = axis
192
+ break
193
+ }
194
+ }
195
+
196
+ if is_undefined(selectedClusterIdAxis) {
197
+ return undefined
198
+ }
199
+
200
+
201
+ for linkerIdx, col in sortedLinkers {
202
+
203
+ for axis in col.spec.axesSpec {
204
+ if axis.name == "pl7.app/vdj/clusterId" {
205
+
206
+ if clusterAxisDomainsMatch(selectedClusterIdAxis, axis) {
207
+ return "clusterAxis_" + string(linkerIdx) + "_0"
208
+ }
209
+ }
210
+ }
211
+ }
212
+
213
+ return undefined
214
+ }
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
227
+
228
+ cloneTable := pframes.parquetFileBuilder()
229
+ cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
230
+
231
+
232
+ sortedLinkers := buildSortedLinkers(columns, datasetSpec)
233
+
234
+
235
+ addedAxes := []
236
+ filterMap := {}
237
+ rankingMap := {}
238
+ addedCols := false
239
+
240
+ if len(args.filters) > 0 {
241
+ for i, filter in args.filters {
242
+
243
+ if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
244
+
245
+ cloneTable.add(columns.getColumn(filter.value.column),
246
+ {header: "Filter_" + string(i), id: "filter_" + string(i)})
247
+ addedCols = true
248
+
249
+ filterMap["Filter_" + string(i)] = filter.filter
250
+ filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
251
+
252
+
253
+ colsSpec := columns.getSpec(filter.value.column)
254
+ axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
255
+ if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
256
+ for na, ax in colsSpec.axesSpec {
257
+ if ax.name != datasetSpec.axesSpec[1].name {
258
+ cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
259
+ addedAxes = append(addedAxes, ax.name)
260
+ }
261
+ }
262
+ }
263
+ }
264
+ }
265
+ }
266
+
267
+
268
+ clusterPropertyIdx := 0
269
+ clonotypePropertyIdx := 0
270
+ linkerColumnCounters := {} // Track column count per linker index
271
+
272
+ if len(args.rankingOrder) > 0 {
273
+ for i, col in args.rankingOrder {
274
+
275
+ if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
276
+
277
+ colsSpec := columns.getSpec(col.value.column)
278
+
279
+ result := processRankingColumn(colsSpec, datasetSpec.axesSpec[1].name, sortedLinkers, clusterPropertyIdx)
280
+
281
+ header := ""
282
+ if result.isClusterProperty {
283
+
284
+ if result.isLinkerColumn {
285
+
286
+ linkerKey := "linker_" + string(result.clusterAxisIdx)
287
+ if is_undefined(linkerColumnCounters[linkerKey]) {
288
+ linkerColumnCounters[linkerKey] = 0
289
+ }
290
+ counter := linkerColumnCounters[linkerKey]
291
+ header = "Col_linker." + string(result.clusterAxisIdx) + "." + string(counter)
292
+ linkerColumnCounters[linkerKey] = counter + 1
293
+ } else {
294
+ header = result.header
295
+ clusterPropertyIdx = result.newClusterPropertyIdx
296
+ }
297
+
298
+
299
+ for na, ax in colsSpec.axesSpec {
300
+ if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
301
+ axisHeader := "cluster_" + string(result.clusterAxisIdx)
302
+ cloneTable.setAxisHeader(ax, axisHeader)
303
+ addedAxes = append(addedAxes, ax.name)
304
+ }
305
+ }
306
+ } else {
307
+ header = "Col" + string(clonotypePropertyIdx)
308
+ clonotypePropertyIdx = clonotypePropertyIdx + 1
309
+ }
310
+
311
+ cloneTable.add(columns.getColumn(col.value.column), {header: header})
312
+ addedCols = true
313
+ rankingMap[header] = col.rankingOrder
314
+ }
315
+ }
316
+ }
317
+
318
+
319
+ linkerClusterIdAxesWithIdx := []
320
+
321
+ for linkerIdx, col in sortedLinkers {
322
+ clusterIdAxis := undefined
323
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
324
+
325
+ cloneTable.add(col, {header: "linker." + string(linkerIdx)})
326
+ cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(linkerIdx))
327
+ clusterIdAxis = col.spec.axesSpec[0]
328
+ addedCols = true
329
+ } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
330
+
331
+ cloneTable.add(col, {header: "linker." + string(linkerIdx)})
332
+ cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(linkerIdx))
333
+ clusterIdAxis = col.spec.axesSpec[1]
334
+ addedCols = true
335
+ }
336
+
337
+ if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
338
+ linkerClusterIdAxesWithIdx = append(linkerClusterIdAxesWithIdx, {
339
+ axis: clusterIdAxis,
340
+ linkerIdx: linkerIdx
341
+ })
342
+ }
343
+ }
344
+
345
+
346
+ if len(columns.getColumns("clusterSizes")) > 0 {
347
+ for col in columns.getColumns("clusterSizes") {
348
+
349
+ clusterSizeClusterIdAxis := undefined
350
+ for axis in col.spec.axesSpec {
351
+ if axis.name == "pl7.app/vdj/clusterId" {
352
+ clusterSizeClusterIdAxis = axis
353
+ break
354
+ }
355
+ }
356
+
357
+
358
+ matchingLinkerIdx := -1
359
+ if len(linkerClusterIdAxesWithIdx) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
360
+ for entry in linkerClusterIdAxesWithIdx {
361
+ linkerAxis := entry.axis
362
+
363
+ if clusterSizeClusterIdAxis.name == linkerAxis.name &&
364
+ clusterSizeClusterIdAxis.type == linkerAxis.type &&
365
+ clusterAxisDomainsMatch(clusterSizeClusterIdAxis, linkerAxis) {
366
+ matchingLinkerIdx = entry.linkerIdx
367
+ break
368
+ }
369
+ }
370
+ }
371
+
372
+
373
+ if matchingLinkerIdx >= 0 {
374
+ cloneTable.add(col, {header: "clusterSize." + string(matchingLinkerIdx)})
375
+ addedCols = true
376
+
377
+ for axisIdx, axis in col.spec.axesSpec {
378
+ if axis.name != datasetSpec.axesSpec[1].name {
379
+ cloneTable.setAxisHeader(axis, "clusterAxis_" + string(matchingLinkerIdx) + "_" + string(axisIdx))
380
+ }
381
+ }
382
+ }
383
+ }
384
+ }
385
+
386
+
387
+ if !addedCols {
388
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
389
+ if len(cdr3Sequences) > 0 {
390
+ cloneTable.add(cdr3Sequences[0], {header: "cdr3_fallback"})
391
+ addedCols = true
392
+ }
393
+ }
394
+
395
+
396
+ builtTable := undefined
397
+ clusterColumnHeader := undefined
398
+ if addedCols {
399
+ cloneTable.mem("16GiB")
400
+ cloneTable.cpu(1)
401
+ builtTable = cloneTable.build()
402
+
403
+
404
+ clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
405
+ }
406
+
407
+ return {
408
+ cloneTable: builtTable,
409
+ filterMap: filterMap,
410
+ rankingMap: rankingMap,
411
+ sortedLinkers: sortedLinkers,
412
+ clusterColumnHeader: clusterColumnHeader,
413
+ addedCols: addedCols
414
+ }
415
+ }
416
+
417
+
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+ makeHeaderName := func(col, baseHeaderName, isSingleCell) {
426
+ chainMapping := {
427
+ "IG": { "A": "Heavy", "B": "Light" },
428
+ "TCRAB": { "A": "TRA", "B": "TRB" },
429
+ "TCRGD": { "A": "TRG", "B": "TRD" }
430
+ }
431
+
432
+ if isSingleCell {
433
+ chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
434
+ receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
435
+ chainLabel := chainMapping[receptor][chain]
436
+ return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
437
+ } else {
438
+
439
+ chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
440
+ if chainFromDomain != undefined {
441
+ return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
442
+ }
443
+ }
444
+ return baseHeaderName
445
+ }
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
+ initializeCdr3SeqTable := func(pframes, columns, datasetSpec, isSingleCell) {
457
+ cdr3SeqTable := pframes.parquetFileBuilder()
458
+ cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
459
+
460
+
461
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
462
+ for col in cdr3Sequences {
463
+ headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
464
+ if isSingleCell {
465
+ if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
466
+ cdr3SeqTable.add(col, {header: headerName})
467
+ }
468
+ } else {
469
+ cdr3SeqTable.add(col, {header: headerName})
470
+ }
471
+ }
472
+
473
+
474
+ vGenes := columns.getColumns("VGenes")
475
+ for col in vGenes {
476
+ headerName := makeHeaderName(col, "vGene", isSingleCell)
477
+ cdr3SeqTable.add(col, {header: headerName})
478
+ }
479
+
480
+
481
+ jGenes := columns.getColumns("JGenes")
482
+ for col in jGenes {
483
+ headerName := makeHeaderName(col, "jGene", isSingleCell)
484
+ cdr3SeqTable.add(col, {header: headerName})
485
+ }
486
+
487
+ cdr3SeqTable.mem("16GiB")
488
+ cdr3SeqTable.cpu(1)
489
+ return cdr3SeqTable.build()
490
+ }
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+ detectBulkChain := func(seqCols) {
499
+ chainDetected := "KL"
500
+ for col in seqCols {
501
+ ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
502
+ if ch == "IGHeavy" {
503
+ chainDetected = "H"
504
+ break
505
+ }
506
+ if ch == "IGLight" {
507
+ chainDetected = "KL"
508
+ }
509
+ }
510
+ return chainDetected
511
+ }
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+ initializeAssemSeqTable := func(pframes, columns, datasetSpec, isSingleCell) {
523
+ assemSeqTable := pframes.parquetFileBuilder()
524
+ assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
525
+
526
+ seqCols := columns.getColumns("assemblingAaSeqs")
527
+ for col in seqCols {
528
+ headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
529
+ assemSeqTable.add(col, {header: headerName})
530
+ }
531
+
532
+ assemSeqTable.mem("16GiB")
533
+ assemSeqTable.cpu(1)
534
+
535
+
536
+ bulkChain := undefined
537
+ if !isSingleCell {
538
+ bulkChain = detectBulkChain(seqCols)
539
+ }
540
+
541
+ return {
542
+ assemSeqTable: assemSeqTable.build(),
543
+ bulkChain: bulkChain,
544
+ seqCols: seqCols
545
+ }
546
+ }
547
+
135
548
  export {
136
549
  clusterAxisDomainsMatch: clusterAxisDomainsMatch,
137
550
  findMatchingLinkerIndex: findMatchingLinkerIndex,
138
- processRankingColumn: processRankingColumn
551
+ processRankingColumn: processRankingColumn,
552
+ buildSortedLinkers: buildSortedLinkers,
553
+ resolveClusterColumnHeader: resolveClusterColumnHeader,
554
+ initializeCloneTable: initializeCloneTable,
555
+ makeHeaderName: makeHeaderName,
556
+ initializeCdr3SeqTable: initializeCdr3SeqTable,
557
+ detectBulkChain: detectBulkChain,
558
+ initializeAssemSeqTable: initializeAssemSeqTable
139
559
  }
Binary file
package/package.json CHANGED
@@ -1,25 +1,23 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "1.17.0",
3
+ "version": "1.17.1",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^5.7.3",
7
+ "@platforma-sdk/workflow-tengo": "5.7.3",
8
8
  "@platforma-open/milaboratories.software-anarci": "^0.0.3",
9
- "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.9.0",
9
+ "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.9.1",
10
+ "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.0",
10
11
  "@platforma-open/milaboratories.top-antibodies.umap": "1.2.1",
11
12
  "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.0",
12
- "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.0",
13
13
  "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.3.0"
14
14
  },
15
15
  "devDependencies": {
16
- "@platforma-sdk/tengo-builder": "^2.4.2",
17
- "@platforma-sdk/test": "^1.48.8",
18
- "vitest": "^2.1.8"
16
+ "@platforma-sdk/tengo-builder": "2.4.8"
19
17
  },
20
18
  "scripts": {
21
19
  "build": "rm -rf dist && pl-tengo check && pl-tengo build",
22
- "test": "vitest",
23
- "format": "/usr/bin/env emacs --script ./format.el"
20
+ "format": "/usr/bin/env emacs --script ./format.el",
21
+ "do-pack": "rm -f *.tgz && pnpm pack && mv *.tgz package.tgz"
24
22
  }
25
23
  }
@@ -49,12 +49,17 @@ wf.prepare(func(args){
49
49
  }
50
50
 
51
51
 
52
- // Add linker column
52
+ // Add linker columns
53
53
  bundleBuilder.addMulti({
54
54
  axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
55
55
  annotations: { "pl7.app/isLinkerColumn": "true" },
56
56
  partialAxesMatch: true
57
57
  }, "linkers")
58
+
59
+ // Add clusterColumn as a named anchor for matching in body phase
60
+ if !is_undefined(args.clusterColumn) {
61
+ bundleBuilder.addAnchor("selectedCluster", args.clusterColumn)
62
+ }
58
63
 
59
64
  // Add cluster size columns from clustering blocks
60
65
  bundleBuilder.addMulti({
@@ -116,180 +121,19 @@ wf.body(func(args) {
116
121
  isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
117
122
 
118
123
  ////////// Clonotype Filtering //////////
119
- // Build clonotype table
120
- cloneTable := pframes.parquetFileBuilder()
121
- cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
122
-
123
- // Add Filters to table
124
- addedAxes := []
125
- filterMap := {}
126
- rankingMap := {}
127
- addedCols := false
128
- if len(args.filters) > 0 {
129
- for i, filter in args.filters {
130
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
131
- if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
132
- // Columns added here might also be in ranking list, so we add default IDs
133
- cloneTable.add(columns.getColumn(filter.value.column),
134
- {header: "Filter_" + string(i), id: "filter_" + string(i)})
135
- addedCols = true
136
- // Store reference value and filter type associated to this column
137
- filterMap["Filter_" + string(i)] = filter.filter
138
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
139
-
140
- // If column does not have main anchor axis we have to include theirs
141
- colsSpec := columns.getSpec(filter.value.column)
142
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
143
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
144
- for na, ax in colsSpec.axesSpec {
145
- if ax.name != datasetSpec.axesSpec[1].name {
146
- cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
147
- addedAxes = append(addedAxes, ax.name)
148
- }
149
- }
150
- }
151
- }
152
- }
153
- }
154
-
155
- // Add ranking columns to table
156
- validRanks := false
157
- clusterPropertyIdx := 0
158
- clonotypePropertyIdx := 0
159
-
160
- if len(args.rankingOrder) > 0 {
161
- for i, col in args.rankingOrder {
162
- // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
163
- if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
164
- validRanks = true
165
-
166
- // Process the ranking column to determine header and cluster axis
167
- colsSpec := columns.getSpec(col.value.column)
168
- linkerColumns := columns.getColumns("linkers")
169
- result := utils.processRankingColumn(colsSpec, datasetSpec.axesSpec[1].name, linkerColumns, clusterPropertyIdx)
170
-
171
- header := ""
172
- if result.isClusterProperty {
173
- header = result.header
174
- clusterPropertyIdx = result.newClusterPropertyIdx
175
-
176
- // Add cluster axis with matching index
177
- for na, ax in colsSpec.axesSpec {
178
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
179
- axisHeader := "cluster_" + string(result.clusterAxisIdx)
180
- cloneTable.setAxisHeader(ax, axisHeader)
181
- addedAxes = append(addedAxes, ax.name)
182
- }
183
- }
184
- } else {
185
- header = "Col" + string(clonotypePropertyIdx)
186
- clonotypePropertyIdx = clonotypePropertyIdx + 1
187
- }
188
-
189
- cloneTable.add(columns.getColumn(col.value.column), {header: header})
190
- addedCols = true
191
- rankingMap[header] = col.rankingOrder
192
- }
193
- }
194
- }
195
-
196
- // Get linker columns if needed
197
- linkerAxisSpec := {}
198
- linkerClusterIdAxes := []
199
- if len(columns.getColumns("linkers")) > 0 {
200
- for i, col in columns.getColumns("linkers") {
201
- clusterIdAxis := undefined
202
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
203
- cloneTable.add(col, {header: "linker." + string(i)})
204
- cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
205
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
206
- clusterIdAxis = col.spec.axesSpec[0]
207
- addedCols = true
208
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
209
- cloneTable.add(col, {header: "linker." + string(i)})
210
- cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
211
- linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
212
- clusterIdAxis = col.spec.axesSpec[1]
213
- addedCols = true
214
- }
215
- // Collect clusterId axes from linker columns to match cluster size columns
216
- if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
217
- linkerClusterIdAxes = append(linkerClusterIdAxes, clusterIdAxis)
218
- }
219
- }
220
- }
221
-
222
- // Add cluster size columns if available, but only those matching linker columns' clusterId axes
223
- // This ensures we only join columns from the same clustering run
224
- if len(columns.getColumns("clusterSizes")) > 0 {
225
- clusterSizeIdx := 0
226
- for col in columns.getColumns("clusterSizes") {
227
- // Find the clusterId axis in this cluster size column
228
- clusterSizeClusterIdAxis := undefined
229
- for axis in col.spec.axesSpec {
230
- if axis.name == "pl7.app/vdj/clusterId" {
231
- clusterSizeClusterIdAxis = axis
232
- break
233
- }
234
- }
235
-
236
- // Only add if we have linker columns and this cluster size matches one of them
237
- shouldAdd := false
238
- if len(linkerClusterIdAxes) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
239
- // Check if this cluster size column matches any linker's clusterId axis
240
- for linkerAxis in linkerClusterIdAxes {
241
- // Compare domains - they must match exactly for same clustering run
242
- if clusterSizeClusterIdAxis.name == linkerAxis.name &&
243
- clusterSizeClusterIdAxis.type == linkerAxis.type &&
244
- utils.clusterAxisDomainsMatch(clusterSizeClusterIdAxis, linkerAxis) {
245
- shouldAdd = true
246
- break
247
- }
248
- }
249
- }
250
-
251
- // Only add cluster size columns that match a linker column's clustering run
252
- if shouldAdd {
253
- // Trace elements are already present in col.spec from the clustering block.
254
- // deriveLabels (in label.ts) will use these existing trace elements to construct
255
- // distinguishing labels when multiple clustering blocks are joined, similar to
256
- // how LabelTypeFull ('__LABEL__@1') works. The trace includes:
257
- // - Original dataset trace
258
- // - "milaboratories.clonotype-clustering.sequences" trace element
259
- // - "milaboratories.clonotype-clustering.clustering" trace element
260
- // No modification needed - just preserve the existing trace.
261
-
262
- cloneTable.add(col, {header: "clusterSize." + string(clusterSizeIdx)})
263
- addedCols = true
264
- // Add the cluster axis header
265
- for axisIdx, axis in col.spec.axesSpec {
266
- if axis.name != datasetSpec.axesSpec[1].name {
267
- cloneTable.setAxisHeader(axis, "clusterAxis_" + string(clusterSizeIdx) + "_" + string(axisIdx))
268
- }
269
- }
270
- clusterSizeIdx = clusterSizeIdx + 1
271
- }
272
- }
273
- }
274
-
275
- // Fallback: if no columns have been added yet, add at least one CDR3 sequence column
276
- // This ensures the table can be built even when no filters/ranking columns are specified
277
- if !addedCols {
278
- cdr3Sequences := columns.getColumns("cdr3Sequences")
279
- if len(cdr3Sequences) > 0 {
280
- // Add the first CDR3 sequence as a fallback column
281
- cloneTable.add(cdr3Sequences[0], {header: "cdr3_fallback"})
282
- addedCols = true
283
- }
284
- }
124
+ // Initialize and build clone table with all columns
125
+ tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec)
126
+ cloneTable := tableInit.cloneTable
127
+ filterMap := tableInit.filterMap
128
+ rankingMap := tableInit.rankingMap
129
+ sortedLinkers := tableInit.sortedLinkers
130
+ clusterColumnHeader := tableInit.clusterColumnHeader
131
+ addedCols := tableInit.addedCols
285
132
 
286
133
  // Continue only if we have at least a column
287
134
  // This condition prevents temporal intermittent error while filters are
288
135
  // being processed and possibly in other situations too
289
136
  if addedCols {
290
- cloneTable.mem("16GiB")
291
- cloneTable.cpu(1)
292
- cloneTable = cloneTable.build()
293
137
 
294
138
  // Use ender.create to call the filter-clonotypes template
295
139
  filterSampleResult := render.create(filterAndSampleTpl, {
@@ -302,7 +146,7 @@ wf.body(func(args) {
302
146
  datasetSpec: datasetSpec,
303
147
  topClonotypes: args.topClonotypes,
304
148
  disableClusterRanking: args.disableClusterRanking,
305
- clusterColumn: args.clusterColumn
149
+ clusterColumn: clusterColumnHeader
306
150
  })
307
151
 
308
152
  // Get the filtered clonotypes from the template result
@@ -312,70 +156,8 @@ wf.body(func(args) {
312
156
  finalClonotypes := filterSampleResult.output("finalClonotypes", 24 * 60 * 60 * 1000)
313
157
 
314
158
  ////////// CDR3 Length Calculation //////////
315
-
316
- cdr3SeqTable := pframes.parquetFileBuilder()
317
- cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
318
-
319
- // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
320
- // Chain will be added in the header as cdr3Sequence.chain and used in python script
321
- // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
322
-
323
- // Helper function to add chain information to the headers dynamically
324
- chainMapping := {
325
- "IG": { "A": "Heavy", "B": "Light" },
326
- "TCRAB": { "A": "TRA", "B": "TRB" },
327
- "TCRGD": { "A": "TRG", "B": "TRD" }
328
- }
329
-
330
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
331
- if isSingleCell {
332
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
333
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
334
- chainLabel := chainMapping[receptor][chain]
335
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
336
- } else {
337
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
338
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
339
- if chainFromDomain != undefined {
340
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
341
- }
342
- }
343
- return baseHeaderName
344
- };
345
-
346
- // Process CDR3 sequences
347
- cdr3Sequences := columns.getColumns("cdr3Sequences")
348
-
349
- for col in cdr3Sequences {
350
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
351
- if isSingleCell {
352
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
353
- cdr3SeqTable.add(col, {header: headerName})
354
- }
355
- } else {
356
- cdr3SeqTable.add(col, {header: headerName})
357
- }
358
- }
359
-
360
- // Process V genes
361
- vGenes := columns.getColumns("VGenes")
362
-
363
- for col in vGenes {
364
- headerName := makeHeaderName(col, "vGene", isSingleCell)
365
- cdr3SeqTable.add(col, {header: headerName})
366
- }
367
-
368
- // Process J genes
369
- jGenes := columns.getColumns("JGenes")
370
-
371
- for col in jGenes {
372
- headerName := makeHeaderName(col, "jGene", isSingleCell)
373
- cdr3SeqTable.add(col, {header: headerName})
374
- }
375
-
376
- cdr3SeqTable.mem("16GiB")
377
- cdr3SeqTable.cpu(1)
378
- cdr3SeqTableBuilt := cdr3SeqTable.build()
159
+ // Initialize and build CDR3 sequence table
160
+ cdr3SeqTableBuilt := utils.initializeCdr3SeqTable(pframes, columns, datasetSpec, isSingleCell)
379
161
 
380
162
  cdr3VspectratypeCmd := exec.builder().
381
163
  software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
@@ -416,33 +198,14 @@ wf.body(func(args) {
416
198
 
417
199
  if args.kabatNumbering == true {
418
200
  ////////// Assembling AA sequences //////////
419
- assemSeqTable := pframes.parquetFileBuilder()
420
- keyHeader := "clonotypeKey"
421
- assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
422
-
423
- seqCols := columns.getColumns("assemblingAaSeqs")
424
- for col in seqCols {
425
- headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
426
- assemSeqTable.add(col, {header: headerName})
427
- }
428
-
429
- assemSeqTable.mem("16GiB")
430
- assemSeqTable.cpu(1)
431
- assemSeqTableBuilt := assemSeqTable.build()
201
+ // Initialize and build assembling sequence table
202
+ assemInit := utils.initializeAssemSeqTable(pframes, columns, datasetSpec, isSingleCell)
203
+ assemSeqTableBuilt := assemInit.assemSeqTable
204
+ bulkChain := assemInit.bulkChain
205
+ seqCols := assemInit.seqCols
432
206
 
433
207
  // Convert assembling feature sequences to FASTA via sub-template
434
208
  assemFastaTpl := assets.importTemplate(":assembling-fasta")
435
- bulkChain := undefined
436
- if !isSingleCell {
437
- // infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
438
- chainDetected := "KL"
439
- for col in seqCols {
440
- ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
441
- if ch == "IGHeavy" { chainDetected = "H"; break }
442
- if ch == "IGLight" { chainDetected = "KL" }
443
- }
444
- bulkChain = chainDetected
445
- }
446
209
  assem := render.create(assemFastaTpl, {
447
210
  inputTsv: assemSeqTableBuilt,
448
211
  keyColumn: "clonotypeKey",
@@ -90,7 +90,7 @@ findMatchingLinkerIndex := func(colsSpec, linkerColumns) {
90
90
  * @param datasetMainAxisName - Name of the main dataset axis (e.g., clonotype axis)
91
91
  * @param linkerColumns - List of linker columns to match against
92
92
  * @param clusterPropertyIdx - Current cluster property index counter
93
- * @return Map with keys: header, clusterAxisIdx, newClusterPropertyIdx
93
+ * @return Map with keys: isClusterProperty, isLinkerColumn, header, clusterAxisIdx, newClusterPropertyIdx
94
94
  */
95
95
  processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clusterPropertyIdx) {
96
96
  axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
@@ -100,6 +100,7 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
100
100
  // This is a clonotype property
101
101
  return {
102
102
  isClusterProperty: false,
103
+ isLinkerColumn: false,
103
104
  header: undefined,
104
105
  clusterAxisIdx: undefined,
105
106
  newClusterPropertyIdx: clusterPropertyIdx
@@ -112,11 +113,14 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
112
113
  header := ""
113
114
  clusterAxisIdx := undefined
114
115
  newClusterPropertyIdx := clusterPropertyIdx
116
+ isLinkerColumn := false
115
117
 
116
118
  if linkerIdx != undefined {
117
- // This column belongs to a linker - use the linker index
119
+ // This column belongs to a linker - header will be generated by caller
120
+ // to ensure uniqueness across multiple columns from same linker
118
121
  header = "Col_linker." + string(linkerIdx)
119
122
  clusterAxisIdx = linkerIdx
123
+ isLinkerColumn = true
120
124
  } else {
121
125
  // This is a generic cluster property (not associated with any linker)
122
126
  header = "Col_cluster." + string(clusterPropertyIdx)
@@ -126,14 +130,430 @@ processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clust
126
130
 
127
131
  return {
128
132
  isClusterProperty: true,
133
+ isLinkerColumn: isLinkerColumn,
129
134
  header: header,
130
135
  clusterAxisIdx: clusterAxisIdx,
131
136
  newClusterPropertyIdx: newClusterPropertyIdx
132
137
  }
133
138
  }
134
139
 
140
+ /**
141
+ * Builds sorted linker list in the same order as model.
142
+ *
143
+ * @param columns - PBundle containing all columns
144
+ * @param datasetSpec - Dataset specification with axes
145
+ * @return List of linker columns sorted by axis position
146
+ */
147
+ buildSortedLinkers := func(columns, datasetSpec) {
148
+ allLinkersUnsorted := columns.getColumns("linkers")
149
+
150
+ // Collect linkers by axis position (same iteration order as model)
151
+ sortedLinkers := []
152
+ // First: linkers where clonotypeKey is in SECOND axis
153
+ for col in allLinkersUnsorted {
154
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
155
+ sortedLinkers = append(sortedLinkers, col)
156
+ }
157
+ }
158
+ // Then: linkers where clonotypeKey is in FIRST axis
159
+ for col in allLinkersUnsorted {
160
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
161
+ sortedLinkers = append(sortedLinkers, col)
162
+ }
163
+ }
164
+
165
+ return sortedLinkers
166
+ }
167
+
168
+ /**
169
+ * Resolves cluster column reference to header name by matching against sortedLinkers.
170
+ *
171
+ * @param args - Arguments containing clusterColumn
172
+ * @param columns - PBundle containing all columns
173
+ * @param sortedLinkers - List of linker columns in proper order
174
+ * @return Cluster column header string or undefined
175
+ */
176
+ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
177
+ if is_undefined(args.clusterColumn) {
178
+ return undefined
179
+ }
180
+
181
+ // Get the spec for the selected cluster column
182
+ selectedLinkerSpec := columns.getSpec(args.clusterColumn)
183
+ if is_undefined(selectedLinkerSpec) {
184
+ return undefined
185
+ }
186
+
187
+ // Find the clusterId axis in the selected linker
188
+ selectedClusterIdAxis := undefined
189
+ for axis in selectedLinkerSpec.axesSpec {
190
+ if axis.name == "pl7.app/vdj/clusterId" {
191
+ selectedClusterIdAxis = axis
192
+ break
193
+ }
194
+ }
195
+
196
+ if is_undefined(selectedClusterIdAxis) {
197
+ return undefined
198
+ }
199
+
200
+ // Find matching linker by comparing clusterId axis domains
201
+ for linkerIdx, col in sortedLinkers {
202
+ // Get the clusterId axis from this linker
203
+ for axis in col.spec.axesSpec {
204
+ if axis.name == "pl7.app/vdj/clusterId" {
205
+ // Use clusterAxisDomainsMatch for proper domain comparison
206
+ if clusterAxisDomainsMatch(selectedClusterIdAxis, axis) {
207
+ return "clusterAxis_" + string(linkerIdx) + "_0"
208
+ }
209
+ }
210
+ }
211
+ }
212
+
213
+ return undefined
214
+ }
215
+
216
+ /**
217
+ * Initializes and builds complete clone table with all columns.
218
+ * Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
219
+ *
220
+ * @param pframes - PFrames import
221
+ * @param columns - PBundle containing all columns
222
+ * @param args - Arguments containing filters, rankingOrder, clusterColumn
223
+ * @param datasetSpec - Dataset specification with axes
224
+ * @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
225
+ */
226
+ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
227
+ // Build clonotype table
228
+ cloneTable := pframes.parquetFileBuilder()
229
+ cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
230
+
231
+ // Build linker list in SAME ORDER as model
232
+ sortedLinkers := buildSortedLinkers(columns, datasetSpec)
233
+
234
+ // Add Filters to table
235
+ addedAxes := []
236
+ filterMap := {}
237
+ rankingMap := {}
238
+ addedCols := false
239
+
240
+ if len(args.filters) > 0 {
241
+ for i, filter in args.filters {
242
+ // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
243
+ if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
244
+ // Columns added here might also be in ranking list, so we add default IDs
245
+ cloneTable.add(columns.getColumn(filter.value.column),
246
+ {header: "Filter_" + string(i), id: "filter_" + string(i)})
247
+ addedCols = true
248
+ // Store reference value and filter type associated to this column
249
+ filterMap["Filter_" + string(i)] = filter.filter
250
+ filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
251
+
252
+ // If column does not have main anchor axis we have to include theirs
253
+ colsSpec := columns.getSpec(filter.value.column)
254
+ axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
255
+ if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
256
+ for na, ax in colsSpec.axesSpec {
257
+ if ax.name != datasetSpec.axesSpec[1].name {
258
+ cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
259
+ addedAxes = append(addedAxes, ax.name)
260
+ }
261
+ }
262
+ }
263
+ }
264
+ }
265
+ }
266
+
267
+ // Add ranking columns to table
268
+ clusterPropertyIdx := 0
269
+ clonotypePropertyIdx := 0
270
+ linkerColumnCounters := {} // Track column count per linker index
271
+
272
+ if len(args.rankingOrder) > 0 {
273
+ for i, col in args.rankingOrder {
274
+ // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
275
+ if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
276
+ // Process the ranking column to determine header and cluster axis
277
+ colsSpec := columns.getSpec(col.value.column)
278
+ // Use sortedLinkers for consistent ordering with model
279
+ result := processRankingColumn(colsSpec, datasetSpec.axesSpec[1].name, sortedLinkers, clusterPropertyIdx)
280
+
281
+ header := ""
282
+ if result.isClusterProperty {
283
+ // Check if this column is from a linker
284
+ if result.isLinkerColumn {
285
+ // Track and use counter for this specific linker
286
+ linkerKey := "linker_" + string(result.clusterAxisIdx)
287
+ if is_undefined(linkerColumnCounters[linkerKey]) {
288
+ linkerColumnCounters[linkerKey] = 0
289
+ }
290
+ counter := linkerColumnCounters[linkerKey]
291
+ header = "Col_linker." + string(result.clusterAxisIdx) + "." + string(counter)
292
+ linkerColumnCounters[linkerKey] = counter + 1
293
+ } else {
294
+ header = result.header
295
+ clusterPropertyIdx = result.newClusterPropertyIdx
296
+ }
297
+
298
+ // Add cluster axis with matching index
299
+ for na, ax in colsSpec.axesSpec {
300
+ if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
301
+ axisHeader := "cluster_" + string(result.clusterAxisIdx)
302
+ cloneTable.setAxisHeader(ax, axisHeader)
303
+ addedAxes = append(addedAxes, ax.name)
304
+ }
305
+ }
306
+ } else {
307
+ header = "Col" + string(clonotypePropertyIdx)
308
+ clonotypePropertyIdx = clonotypePropertyIdx + 1
309
+ }
310
+
311
+ cloneTable.add(columns.getColumn(col.value.column), {header: header})
312
+ addedCols = true
313
+ rankingMap[header] = col.rankingOrder
314
+ }
315
+ }
316
+ }
317
+
318
+ // Get linker columns and add them to the table
319
+ linkerClusterIdAxesWithIdx := []
320
+
321
+ for linkerIdx, col in sortedLinkers {
322
+ clusterIdAxis := undefined
323
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
324
+ // clonotypeKey is in second axis
325
+ cloneTable.add(col, {header: "linker." + string(linkerIdx)})
326
+ cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(linkerIdx))
327
+ clusterIdAxis = col.spec.axesSpec[0]
328
+ addedCols = true
329
+ } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
330
+ // clonotypeKey is in first axis
331
+ cloneTable.add(col, {header: "linker." + string(linkerIdx)})
332
+ cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(linkerIdx))
333
+ clusterIdAxis = col.spec.axesSpec[1]
334
+ addedCols = true
335
+ }
336
+ // Collect clusterId axes from linker columns to match cluster size columns
337
+ if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
338
+ linkerClusterIdAxesWithIdx = append(linkerClusterIdAxesWithIdx, {
339
+ axis: clusterIdAxis,
340
+ linkerIdx: linkerIdx
341
+ })
342
+ }
343
+ }
344
+
345
+ // Add cluster size columns if available, matching linker columns' clusterId axes
346
+ if len(columns.getColumns("clusterSizes")) > 0 {
347
+ for col in columns.getColumns("clusterSizes") {
348
+ // Find the clusterId axis in this cluster size column
349
+ clusterSizeClusterIdAxis := undefined
350
+ for axis in col.spec.axesSpec {
351
+ if axis.name == "pl7.app/vdj/clusterId" {
352
+ clusterSizeClusterIdAxis = axis
353
+ break
354
+ }
355
+ }
356
+
357
+ // Find matching linker index
358
+ matchingLinkerIdx := -1
359
+ if len(linkerClusterIdAxesWithIdx) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
360
+ for entry in linkerClusterIdAxesWithIdx {
361
+ linkerAxis := entry.axis
362
+ // Compare domains - they must match exactly for same clustering run
363
+ if clusterSizeClusterIdAxis.name == linkerAxis.name &&
364
+ clusterSizeClusterIdAxis.type == linkerAxis.type &&
365
+ clusterAxisDomainsMatch(clusterSizeClusterIdAxis, linkerAxis) {
366
+ matchingLinkerIdx = entry.linkerIdx
367
+ break
368
+ }
369
+ }
370
+ }
371
+
372
+ // Only add cluster size columns that match a linker column's clustering run
373
+ if matchingLinkerIdx >= 0 {
374
+ cloneTable.add(col, {header: "clusterSize." + string(matchingLinkerIdx)})
375
+ addedCols = true
376
+ // Add the cluster axis header using matching linker index
377
+ for axisIdx, axis in col.spec.axesSpec {
378
+ if axis.name != datasetSpec.axesSpec[1].name {
379
+ cloneTable.setAxisHeader(axis, "clusterAxis_" + string(matchingLinkerIdx) + "_" + string(axisIdx))
380
+ }
381
+ }
382
+ }
383
+ }
384
+ }
385
+
386
+ // Fallback: if no columns added, add at least one CDR3 sequence column
387
+ if !addedCols {
388
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
389
+ if len(cdr3Sequences) > 0 {
390
+ cloneTable.add(cdr3Sequences[0], {header: "cdr3_fallback"})
391
+ addedCols = true
392
+ }
393
+ }
394
+
395
+ // Build the table if we have columns
396
+ builtTable := undefined
397
+ clusterColumnHeader := undefined
398
+ if addedCols {
399
+ cloneTable.mem("16GiB")
400
+ cloneTable.cpu(1)
401
+ builtTable = cloneTable.build()
402
+
403
+ // Resolve clusterColumn ref to header name
404
+ clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
405
+ }
406
+
407
+ return {
408
+ cloneTable: builtTable,
409
+ filterMap: filterMap,
410
+ rankingMap: rankingMap,
411
+ sortedLinkers: sortedLinkers,
412
+ clusterColumnHeader: clusterColumnHeader,
413
+ addedCols: addedCols
414
+ }
415
+ }
416
+
417
+ /**
418
+ * Creates a header name with chain information for CDR3/gene columns.
419
+ *
420
+ * @param col - Column specification
421
+ * @param baseHeaderName - Base name for the header (e.g., "cdr3Sequence", "vGene", "jGene")
422
+ * @param isSingleCell - Whether the data is single cell
423
+ * @return Header name with chain information
424
+ */
425
+ makeHeaderName := func(col, baseHeaderName, isSingleCell) {
426
+ chainMapping := {
427
+ "IG": { "A": "Heavy", "B": "Light" },
428
+ "TCRAB": { "A": "TRA", "B": "TRB" },
429
+ "TCRGD": { "A": "TRG", "B": "TRD" }
430
+ }
431
+
432
+ if isSingleCell {
433
+ chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
434
+ receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
435
+ chainLabel := chainMapping[receptor][chain]
436
+ return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
437
+ } else {
438
+ // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
439
+ chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
440
+ if chainFromDomain != undefined {
441
+ return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
442
+ }
443
+ }
444
+ return baseHeaderName
445
+ }
446
+
447
+ /**
448
+ * Initializes and builds CDR3 sequence table with CDR3 sequences, V genes, and J genes.
449
+ *
450
+ * @param pframes - PFrames import
451
+ * @param columns - PBundle containing all columns
452
+ * @param datasetSpec - Dataset specification with axes
453
+ * @param isSingleCell - Whether the data is single cell
454
+ * @return Built CDR3 sequence table
455
+ */
456
+ initializeCdr3SeqTable := func(pframes, columns, datasetSpec, isSingleCell) {
457
+ cdr3SeqTable := pframes.parquetFileBuilder()
458
+ cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
459
+
460
+ // Process CDR3 sequences
461
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
462
+ for col in cdr3Sequences {
463
+ headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
464
+ if isSingleCell {
465
+ if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
466
+ cdr3SeqTable.add(col, {header: headerName})
467
+ }
468
+ } else {
469
+ cdr3SeqTable.add(col, {header: headerName})
470
+ }
471
+ }
472
+
473
+ // Process V genes
474
+ vGenes := columns.getColumns("VGenes")
475
+ for col in vGenes {
476
+ headerName := makeHeaderName(col, "vGene", isSingleCell)
477
+ cdr3SeqTable.add(col, {header: headerName})
478
+ }
479
+
480
+ // Process J genes
481
+ jGenes := columns.getColumns("JGenes")
482
+ for col in jGenes {
483
+ headerName := makeHeaderName(col, "jGene", isSingleCell)
484
+ cdr3SeqTable.add(col, {header: headerName})
485
+ }
486
+
487
+ cdr3SeqTable.mem("16GiB")
488
+ cdr3SeqTable.cpu(1)
489
+ return cdr3SeqTable.build()
490
+ }
491
+
492
+ /**
493
+ * Detects bulk chain from sequence columns.
494
+ *
495
+ * @param seqCols - List of sequence columns
496
+ * @return Chain string ("H" for Heavy, "KL" for Light)
497
+ */
498
+ detectBulkChain := func(seqCols) {
499
+ chainDetected := "KL"
500
+ for col in seqCols {
501
+ ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
502
+ if ch == "IGHeavy" {
503
+ chainDetected = "H"
504
+ break
505
+ }
506
+ if ch == "IGLight" {
507
+ chainDetected = "KL"
508
+ }
509
+ }
510
+ return chainDetected
511
+ }
512
+
513
+ /**
514
+ * Initializes and builds assembling sequence table with assembling AA sequences.
515
+ *
516
+ * @param pframes - PFrames import
517
+ * @param columns - PBundle containing all columns
518
+ * @param datasetSpec - Dataset specification with axes
519
+ * @param isSingleCell - Whether the data is single cell
520
+ * @return Map with keys: assemSeqTable (built table), bulkChain, seqCols
521
+ */
522
+ initializeAssemSeqTable := func(pframes, columns, datasetSpec, isSingleCell) {
523
+ assemSeqTable := pframes.parquetFileBuilder()
524
+ assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
525
+
526
+ seqCols := columns.getColumns("assemblingAaSeqs")
527
+ for col in seqCols {
528
+ headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
529
+ assemSeqTable.add(col, {header: headerName})
530
+ }
531
+
532
+ assemSeqTable.mem("16GiB")
533
+ assemSeqTable.cpu(1)
534
+
535
+ // Detect bulk chain if needed
536
+ bulkChain := undefined
537
+ if !isSingleCell {
538
+ bulkChain = detectBulkChain(seqCols)
539
+ }
540
+
541
+ return {
542
+ assemSeqTable: assemSeqTable.build(),
543
+ bulkChain: bulkChain,
544
+ seqCols: seqCols
545
+ }
546
+ }
547
+
135
548
  export {
136
549
  clusterAxisDomainsMatch: clusterAxisDomainsMatch,
137
550
  findMatchingLinkerIndex: findMatchingLinkerIndex,
138
- processRankingColumn: processRankingColumn
551
+ processRankingColumn: processRankingColumn,
552
+ buildSortedLinkers: buildSortedLinkers,
553
+ resolveClusterColumnHeader: resolveClusterColumnHeader,
554
+ initializeCloneTable: initializeCloneTable,
555
+ makeHeaderName: makeHeaderName,
556
+ initializeCdr3SeqTable: initializeCdr3SeqTable,
557
+ detectBulkChain: detectBulkChain,
558
+ initializeAssemSeqTable: initializeAssemSeqTable
139
559
  }
package/index.d.ts DELETED
@@ -1,4 +0,0 @@
1
- declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
- declare type TplName = "main";
3
- declare const Templates: Record<TplName, TemplateFromFile>;
4
- export { Templates };
package/index.js DELETED
@@ -1,3 +0,0 @@
1
- module.exports = { Templates: {
2
- 'main': { type: 'from-file', path: require.resolve('./dist/tengo/tpl/main.plj.gz') }
3
- }}
package/tsconfig.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "es2022",
4
- "module": "commonjs",
5
- "moduleResolution": "node",
6
- "esModuleInterop": true,
7
- "strict": true,
8
- "outDir": "./dist",
9
- "rootDir": "./src",
10
- "sourceMap": true,
11
- "declaration": true
12
- },
13
- "types": [],
14
- "include": ["src/**/*"],
15
- "exclude": ["node_modules", "dist"]
16
- }
package/vitest.config.mts DELETED
@@ -1,9 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- watch: false,
6
- maxConcurrency: 3,
7
- testTimeout: 5000
8
- }
9
- });