@platforma-open/milaboratories.top-antibodies.workflow 4.2.0 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@4.2.0 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@4.3.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
4
4
  > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/assembling-fasta.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 4.3.1
4
+
5
+ ### Patch Changes
6
+
7
+ - a7b65c0: No Op Change To Unblock
8
+ - Updated dependencies [a7b65c0]
9
+ - @platforma-open/milaboratories.top-antibodies.anarci-kabat@1.4.6
10
+ - @platforma-open/milaboratories.top-antibodies.assembling-fasta@1.3.5
11
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.2.1
12
+ - @platforma-open/milaboratories.top-antibodies.spectratype@1.8.6
13
+ - @platforma-open/milaboratories.top-antibodies.umap@1.2.6
14
+
15
+ ## 4.3.0
16
+
17
+ ### Minor Changes
18
+
19
+ - 0a06331: New changeset
20
+
21
+ ### Patch Changes
22
+
23
+ - Updated dependencies [0a06331]
24
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.2.0
25
+
26
+ ## 4.2.1
27
+
28
+ ### Patch Changes
29
+
30
+ - b466a9b: Fix Selection Plot funnel starting from fewer clonotypes than the project has. The clone table is now built with a Full join plus a dense per-clonotype presence column instead of an inner join, so clonotypes that lack sparse columns (e.g. an enrichment row) reach the funnel and are dropped at the filter stage that checks the missing column rather than before stage tracking — the funnel total now matches the full clonotype count. The optional primary dataset filter is applied as a row pre-condition in the sampler, and null-ranked or null-diversification clonotypes are dropped before selection so they are never sampled.
31
+ - Updated dependencies [b466a9b]
32
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.1.5
33
+
3
34
  ## 4.2.0
4
35
 
5
36
  ### Minor Changes
@@ -2,6 +2,9 @@
2
2
 
3
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
4
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ pt := import("@platforma-sdk/workflow-tengo:pt")
6
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
7
+ pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
5
8
  json := import("json")
6
9
 
7
10
 
@@ -241,6 +244,57 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
241
244
 
242
245
 
243
246
 
247
+ deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
248
+ if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
249
+ return undefined
250
+ }
251
+
252
+ clonotypeAxisName := datasetSpec.axesSpec[1].name
253
+ clonotypeAxis := undefined
254
+ for axis in mainSeqsCol.spec.axesSpec {
255
+ if axis.name == clonotypeAxisName {
256
+ clonotypeAxis = axis
257
+ break
258
+ }
259
+ }
260
+ if is_undefined(clonotypeAxis) {
261
+ return undefined
262
+ }
263
+
264
+ axisId := pSpec.getAxisId(clonotypeAxis)
265
+ presenceSpec := {
266
+ kind: "PColumn",
267
+ name: "clonotypePresence",
268
+ valueType: "Int"
269
+ }
270
+
271
+ wf := pt.workflow().cpu(1).mem("4GiB")
272
+ wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
273
+ select(pt.sc.axis(clonotypeAxis).alias(axisId)).
274
+ withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
275
+ saveFrameDirect("presenceFrame", {
276
+ axes: [{ column: axisId, spec: clonotypeAxis }],
277
+ columns: [{ column: "clonotypePresence", spec: presenceSpec }],
278
+ partitionKeyLength: 0
279
+ })
280
+
281
+ presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
282
+ return presenceColumns["clonotypePresence"]
283
+ }
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
244
298
 
245
299
  initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
246
300
 
@@ -480,28 +534,45 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
480
534
 
481
535
 
482
536
 
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+ mainSeqCols := columns.getColumns("mainSeqs")
546
+ for col in columns.getColumns("mainSeqsVdj") {
547
+ mainSeqCols = append(mainSeqCols, col)
548
+ }
549
+ if len(mainSeqCols) > 0 {
550
+ presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
551
+ if !is_undefined(presenceCol) {
552
+ cloneTable.add(presenceCol, {header: "clonotype_presence"})
553
+ addedCols = true
554
+ }
555
+ }
556
+
557
+
558
+
483
559
  if !addedCols {
484
560
  cdr3Sequences := columns.getColumns("cdr3Sequences")
485
561
  if len(cdr3Sequences) > 0 {
486
562
  cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
487
563
  addedCols = true
488
- } else {
489
- peptideMainSeqs := columns.getColumns("peptideMainSeqs")
490
- if len(peptideMainSeqs) > 0 {
491
- cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
492
- addedCols = true
493
- }
494
564
  }
495
565
  }
496
566
 
497
567
 
568
+
498
569
  builtTable := undefined
499
570
  clusterColumnHeader := undefined
500
571
  if addedCols {
501
572
  cloneTable.mem("16GiB")
502
573
  cloneTable.cpu(1)
503
- builtTable = cloneTable.build({joinType: "Inner"})
504
-
574
+ builtTable = cloneTable.build({joinType: "Full"})
575
+
505
576
 
506
577
  clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
507
578
  }
Binary file
package/package.json CHANGED
@@ -1,19 +1,19 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "4.2.0",
3
+ "version": "4.3.1",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "5.26.0",
7
+ "@platforma-sdk/workflow-tengo": "6.6.5",
8
8
  "@platforma-open/milaboratories.software-anarci": "^0.0.3",
9
- "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.4",
10
- "@platforma-open/milaboratories.top-antibodies.umap": "1.2.5",
11
- "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.4",
12
- "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.5",
13
- "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.5"
9
+ "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.2.1",
10
+ "@platforma-open/milaboratories.top-antibodies.umap": "1.2.6",
11
+ "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.6",
12
+ "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.6",
13
+ "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.5"
14
14
  },
15
15
  "devDependencies": {
16
- "@platforma-sdk/tengo-builder": "3.0.3"
16
+ "@platforma-sdk/tengo-builder": "4.0.9"
17
17
  },
18
18
  "scripts": {
19
19
  "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
@@ -116,10 +116,7 @@ self.body(func(inputs) {
116
116
  }
117
117
 
118
118
  if topClonotypes != undefined {
119
- valueLabels[string(stageIdx)] = "Filtered"
120
- valueLabels[string(stageIdx + 1)] = "Selected"
121
- } else {
122
- valueLabels[string(stageIdx)] = "Passed Filters"
119
+ valueLabels[string(stageIdx)] = "Selection"
123
120
  }
124
121
 
125
122
  // Import selection stage parquet as selectionStage PColumn
@@ -28,9 +28,9 @@ wf.prepare(func(args){
28
28
  bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
29
29
  bundleBuilder.addAnchor("main", args.inputAnchor)
30
30
 
31
- // Optional primary filter from PlDatasetSelector — inner-joined into the
32
- // clone table below so the filter narrows every downstream stage
33
- // (finalClonotypes, spectratype, Kabat, etc.).
31
+ // Optional primary filter from PlDatasetSelector — added to the clone table
32
+ // below. Under the Full join it no longer narrows via join semantics; the
33
+ // filter software drops clonotypes outside it as a pre-condition (filter.py).
34
34
  if !is_undefined(args.inputFilter) {
35
35
  bundleBuilder.addRef(args.inputFilter)
36
36
  }
@@ -140,8 +140,7 @@ wf.prepare(func(args){
140
140
  domain: { "pl7.app/alphabet": "aminoacid" }
141
141
  }, "scFvPerChainSeqs")
142
142
 
143
- // Peptide main sequence — single-axis column on variantKey, used as
144
- // modality-aware fallback when no filter/ranking columns load.
143
+ // Main sequence column
145
144
  bundleBuilder.addMulti({
146
145
  axes: [{ anchor: "main", idx: 1 }],
147
146
  annotations: {
@@ -149,8 +148,16 @@ wf.prepare(func(args){
149
148
  "pl7.app/isMainSequence": "true"
150
149
  },
151
150
  domain: { "pl7.app/alphabet": "aminoacid" }
152
- }, "peptideMainSeqs")
153
-
151
+ }, "mainSeqs")
152
+ bundleBuilder.addMulti({
153
+ axes: [{ anchor: "main", idx: 1 }],
154
+ annotations: {
155
+ "pl7.app/vdj/isAssemblingFeature": "true",
156
+ "pl7.app/vdj/isMainSequence": "true"
157
+ },
158
+ domain: { "pl7.app/alphabet": "aminoacid" }
159
+ }, "mainSeqsVdj")
160
+
154
161
  return {
155
162
  columns: bundleBuilder.build()
156
163
  }
@@ -2,6 +2,9 @@
2
2
 
3
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
4
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ pt := import("@platforma-sdk/workflow-tengo:pt")
6
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
7
+ pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
5
8
  json := import("json")
6
9
 
7
10
  // PColumn names used as source columns for In Vivo Score computation.
@@ -229,6 +232,56 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
229
232
  return undefined
230
233
  }
231
234
 
235
+ /**
236
+ * Derives a lightweight, dense, per-clonotype presence column from a "main
237
+ * sequence" column. The clone table is built with a Full join, which keeps only
238
+ * the union of keys across added columns; sparse columns (e.g. enrichment, which
239
+ * upstream emits only for clusters passing its pre-filter) would otherwise leave
240
+ * whole clonotypes out of the funnel. A main-sequence column exists once per
241
+ * clonotype across the entire dataset, so it defines the complete keyset.
242
+ *
243
+ * @param mainSeqsCol - A main-sequence PColumn {spec, data}
244
+ * @param datasetSpec - Dataset specification; axesSpec[1] is the clonotype axis
245
+ * @return The derived presence PColumn {spec, data}, or undefined if unavailable
246
+ */
247
+ deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
248
+ if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
249
+ return undefined
250
+ }
251
+
252
+ clonotypeAxisName := datasetSpec.axesSpec[1].name
253
+ clonotypeAxis := undefined
254
+ for axis in mainSeqsCol.spec.axesSpec {
255
+ if axis.name == clonotypeAxisName {
256
+ clonotypeAxis = axis
257
+ break
258
+ }
259
+ }
260
+ if is_undefined(clonotypeAxis) {
261
+ return undefined
262
+ }
263
+
264
+ axisId := pSpec.getAxisId(clonotypeAxis)
265
+ presenceSpec := {
266
+ kind: "PColumn",
267
+ name: "clonotypePresence",
268
+ valueType: "Int"
269
+ }
270
+
271
+ wf := pt.workflow().cpu(1).mem("4GiB")
272
+ wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
273
+ select(pt.sc.axis(clonotypeAxis).alias(axisId)).
274
+ withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
275
+ saveFrameDirect("presenceFrame", {
276
+ axes: [{ column: axisId, spec: clonotypeAxis }],
277
+ columns: [{ column: "clonotypePresence", spec: presenceSpec }],
278
+ partitionKeyLength: 0
279
+ })
280
+
281
+ presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
282
+ return presenceColumns["clonotypePresence"]
283
+ }
284
+
232
285
  /**
233
286
  * Initializes and builds complete clone table with all columns.
234
287
  * Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
@@ -238,8 +291,9 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
238
291
  * @param args - Arguments containing filters, rankingOrder, diversificationColumn
239
292
  * @param datasetSpec - Dataset specification with axes
240
293
  * @param inputFilterColumn - Optional resolved column from the primary filter
241
- * (PlDatasetSelector). Added with an inner join so missing keys are
242
- * dropped from the clone table narrows every downstream stage.
294
+ * (PlDatasetSelector). Added to the clone table; clonotypes missing from it
295
+ * are dropped as a pre-condition in the filter software (see filter.py),
296
+ * not via the join, so the Full join can keep all clonotypes for the funnel.
243
297
  * @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
244
298
  */
245
299
  initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
@@ -256,10 +310,10 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
256
310
  rankingMap := {}
257
311
  addedCols := false
258
312
 
259
- // Apply the optional primary filter from PlDatasetSelector first. The
260
- // builder's inner-join keeps the rows where every added column has a
261
- // value, so once this column is present rows missing from the filter are
262
- // dropped from the entire pipeline.
313
+ // Add the optional primary filter from PlDatasetSelector. The clone table is
314
+ // built with a Full join (below), so this column no longer narrows the keyset
315
+ // via join semantics; clonotypes outside the filter are dropped as a
316
+ // pre-condition in the filter software (filter.py), before stage tracking.
263
317
  if !is_undefined(inputFilterColumn) {
264
318
  cloneTable.add(inputFilterColumn, {header: "primary_filter"})
265
319
  addedCols = true
@@ -478,30 +532,47 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
478
532
  }
479
533
  }
480
534
 
481
- // Fallback: if no columns added, add a single-axis trunk-keyed sequence
482
- // column. Try VDJ CDR3 first
535
+ // Dense per-clonotype presence column: guarantees the Full join below keeps
536
+ // every clonotype, so the Selection Plot funnel total matches the whole dataset.
537
+ // Clonotypes that lack sparse columns (e.g. an enrichment row) now carry null
538
+ // for those columns and are dropped at the relevant FILTER stage instead of
539
+ // being silently dropped before stage tracking. The presence column is a light
540
+ // Int marker — adding the heavy main-sequence column itself would blow up the join.
541
+ // Combine both groups with a loop rather than `append(a, b...)`: Tengo's
542
+ // `append` requires >=2 args, so spreading an empty second group (e.g. the
543
+ // peptide case, which has no VDJ main sequences) would collapse to a
544
+ // single-arg call and fail with "wrong number of arguments".
545
+ mainSeqCols := columns.getColumns("mainSeqs")
546
+ for col in columns.getColumns("mainSeqsVdj") {
547
+ mainSeqCols = append(mainSeqCols, col)
548
+ }
549
+ if len(mainSeqCols) > 0 {
550
+ presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
551
+ if !is_undefined(presenceCol) {
552
+ cloneTable.add(presenceCol, {header: "clonotype_presence"})
553
+ addedCols = true
554
+ }
555
+ }
556
+
557
+ // Fallback: if still no columns (no presence column, no filters/ranking),
558
+ // add the VDJ CDR3 sequence as a single-axis trunk-keyed column.
483
559
  if !addedCols {
484
560
  cdr3Sequences := columns.getColumns("cdr3Sequences")
485
561
  if len(cdr3Sequences) > 0 {
486
562
  cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
487
563
  addedCols = true
488
- } else {
489
- peptideMainSeqs := columns.getColumns("peptideMainSeqs")
490
- if len(peptideMainSeqs) > 0 {
491
- cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
492
- addedCols = true
493
- }
494
564
  }
495
565
  }
496
566
 
497
- // Build the table if we have columns
567
+ // Build the table if we have columns. Full (outer) join so sparse columns
568
+ // never drop clonotypes; the dense presence column above defines the complete keyset.
498
569
  builtTable := undefined
499
570
  clusterColumnHeader := undefined
500
571
  if addedCols {
501
572
  cloneTable.mem("16GiB")
502
573
  cloneTable.cpu(1)
503
- builtTable = cloneTable.build({joinType: "Inner"})
504
-
574
+ builtTable = cloneTable.build({joinType: "Full"})
575
+
505
576
  // Resolve diversificationColumn ref to header name
506
577
  clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
507
578
  }