@platforma-open/milaboratories.top-antibodies.workflow 4.1.2 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@4.1.2 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@4.2.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
4
4
  > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/assembling-fasta.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 4.2.1
4
+
5
+ ### Patch Changes
6
+
7
+ - b466a9b: Fix Selection Plot funnel starting from fewer clonotypes than the project has. The clone table is now built with a Full join plus a dense per-clonotype presence column instead of an inner join, so clonotypes that lack sparse columns (e.g. an enrichment row) reach the funnel and are dropped at the filter stage that checks the missing column rather than before stage tracking — the funnel total now matches the full clonotype count. The optional primary dataset filter is applied as a row pre-condition in the sampler, and null-ranked or null-diversification clonotypes are dropped before selection so they are never sampled.
8
+ - Updated dependencies [b466a9b]
9
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.1.5
10
+
11
+ ## 4.2.0
12
+
13
+ ### Minor Changes
14
+
15
+ - 8edddd1: Add dataset selector with optional filter dropdown. Replaces the plain dataset dropdown with `PlDatasetSelector`, and inner-joins the selected filter column into the clone table so it narrows every downstream stage (final clonotypes, spectratype, Kabat).
16
+
3
17
  ## 4.1.2
4
18
 
5
19
  ### Patch Changes
@@ -2,6 +2,9 @@
2
2
 
3
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
4
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ pt := import("@platforma-sdk/workflow-tengo:pt")
6
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
7
+ pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
5
8
  json := import("json")
6
9
 
7
10
 
@@ -239,20 +242,83 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
239
242
 
240
243
 
241
244
 
242
- initializeCloneTable := func(pframes, columns, args, datasetSpec) {
245
+
246
+
247
+ deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
248
+ if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
249
+ return undefined
250
+ }
251
+
252
+ clonotypeAxisName := datasetSpec.axesSpec[1].name
253
+ clonotypeAxis := undefined
254
+ for axis in mainSeqsCol.spec.axesSpec {
255
+ if axis.name == clonotypeAxisName {
256
+ clonotypeAxis = axis
257
+ break
258
+ }
259
+ }
260
+ if is_undefined(clonotypeAxis) {
261
+ return undefined
262
+ }
263
+
264
+ axisId := pSpec.getAxisId(clonotypeAxis)
265
+ presenceSpec := {
266
+ kind: "PColumn",
267
+ name: "clonotypePresence",
268
+ valueType: "Int"
269
+ }
270
+
271
+ wf := pt.workflow().cpu(1).mem("4GiB")
272
+ wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
273
+ select(pt.sc.axis(clonotypeAxis).alias(axisId)).
274
+ withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
275
+ saveFrameDirect("presenceFrame", {
276
+ axes: [{ column: axisId, spec: clonotypeAxis }],
277
+ columns: [{ column: "clonotypePresence", spec: presenceSpec }],
278
+ partitionKeyLength: 0
279
+ })
280
+
281
+ presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
282
+ return presenceColumns["clonotypePresence"]
283
+ }
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
243
300
 
244
301
  cloneTable := pframes.parquetFileBuilder()
245
302
  cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
246
-
303
+
247
304
 
248
305
  sortedLinkers := buildSortedLinkers(columns, datasetSpec)
249
-
306
+
250
307
 
251
308
  addedAxes := []
252
309
  filterMap := {}
253
310
  rankingMap := {}
254
311
  addedCols := false
255
-
312
+
313
+
314
+
315
+
316
+
317
+ if !is_undefined(inputFilterColumn) {
318
+ cloneTable.add(inputFilterColumn, {header: "primary_filter"})
319
+ addedCols = true
320
+ }
321
+
256
322
  if len(args.filters) > 0 {
257
323
  for i, filter in args.filters {
258
324
 
@@ -468,28 +534,40 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
468
534
 
469
535
 
470
536
 
537
+
538
+
539
+
540
+
541
+ mainSeqCols := append(
542
+ columns.getColumns("mainSeqs"),
543
+ columns.getColumns("mainSeqsVdj")...)
544
+ if len(mainSeqCols) > 0 {
545
+ presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
546
+ if !is_undefined(presenceCol) {
547
+ cloneTable.add(presenceCol, {header: "clonotype_presence"})
548
+ addedCols = true
549
+ }
550
+ }
551
+
552
+
553
+
471
554
  if !addedCols {
472
555
  cdr3Sequences := columns.getColumns("cdr3Sequences")
473
556
  if len(cdr3Sequences) > 0 {
474
557
  cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
475
558
  addedCols = true
476
- } else {
477
- peptideMainSeqs := columns.getColumns("peptideMainSeqs")
478
- if len(peptideMainSeqs) > 0 {
479
- cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
480
- addedCols = true
481
- }
482
559
  }
483
560
  }
484
561
 
485
562
 
563
+
486
564
  builtTable := undefined
487
565
  clusterColumnHeader := undefined
488
566
  if addedCols {
489
567
  cloneTable.mem("16GiB")
490
568
  cloneTable.cpu(1)
491
- builtTable = cloneTable.build({joinType: "Inner"})
492
-
569
+ builtTable = cloneTable.build({joinType: "Full"})
570
+
493
571
 
494
572
  clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
495
573
  }
Binary file
package/package.json CHANGED
@@ -1,19 +1,19 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "4.1.2",
3
+ "version": "4.2.1",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "5.21.0",
7
+ "@platforma-sdk/workflow-tengo": "6.6.3",
8
8
  "@platforma-open/milaboratories.software-anarci": "^0.0.3",
9
- "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.4",
10
9
  "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.5",
10
+ "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.5",
11
11
  "@platforma-open/milaboratories.top-antibodies.umap": "1.2.5",
12
12
  "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.5",
13
13
  "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.4"
14
14
  },
15
15
  "devDependencies": {
16
- "@platforma-sdk/tengo-builder": "2.5.26"
16
+ "@platforma-sdk/tengo-builder": "4.0.9"
17
17
  },
18
18
  "scripts": {
19
19
  "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
@@ -116,10 +116,7 @@ self.body(func(inputs) {
116
116
  }
117
117
 
118
118
  if topClonotypes != undefined {
119
- valueLabels[string(stageIdx)] = "Filtered"
120
- valueLabels[string(stageIdx + 1)] = "Selected"
121
- } else {
122
- valueLabels[string(stageIdx)] = "Passed Filters"
119
+ valueLabels[string(stageIdx)] = "Selection"
123
120
  }
124
121
 
125
122
  // Import selection stage parquet as selectionStage PColumn
@@ -7,6 +7,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
7
  slices := import("@platforma-sdk/workflow-tengo:slices")
8
8
  render := import("@platforma-sdk/workflow-tengo:render")
9
9
  pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
10
+ smart := import("@platforma-sdk/workflow-tengo:smart")
10
11
  ll := import("@platforma-sdk/workflow-tengo:ll")
11
12
  kabatConv := import(":pf-kabat-conv")
12
13
 
@@ -25,8 +26,15 @@ wf.prepare(func(args){
25
26
  // We need a table with cluster ID (optional) | clonotype id | selected ranking columns
26
27
  bundleBuilder := wf.createPBundleBuilder()
27
28
  bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
28
- bundleBuilder.addAnchor("main", args.inputAnchor)
29
-
29
+ bundleBuilder.addAnchor("main", args.inputAnchor)
30
+
31
+ // Optional primary filter from PlDatasetSelector — added to the clone table
32
+ // below. Under the Full join it no longer narrows via join semantics; the
33
+ // filter software drops clonotypes outside it as a pre-condition (filter.py).
34
+ if !is_undefined(args.inputFilter) {
35
+ bundleBuilder.addRef(args.inputFilter)
36
+ }
37
+
30
38
  validRanks := false
31
39
  if len(args.rankingOrder) > 0 {
32
40
  for col in args.rankingOrder {
@@ -132,8 +140,7 @@ wf.prepare(func(args){
132
140
  domain: { "pl7.app/alphabet": "aminoacid" }
133
141
  }, "scFvPerChainSeqs")
134
142
 
135
- // Peptide main sequence — single-axis column on variantKey, used as
136
- // modality-aware fallback when no filter/ranking columns load.
143
+ // Main sequence column
137
144
  bundleBuilder.addMulti({
138
145
  axes: [{ anchor: "main", idx: 1 }],
139
146
  annotations: {
@@ -141,18 +148,31 @@ wf.prepare(func(args){
141
148
  "pl7.app/isMainSequence": "true"
142
149
  },
143
150
  domain: { "pl7.app/alphabet": "aminoacid" }
144
- }, "peptideMainSeqs")
145
-
151
+ }, "mainSeqs")
152
+ bundleBuilder.addMulti({
153
+ axes: [{ anchor: "main", idx: 1 }],
154
+ annotations: {
155
+ "pl7.app/vdj/isAssemblingFeature": "true",
156
+ "pl7.app/vdj/isMainSequence": "true"
157
+ },
158
+ domain: { "pl7.app/alphabet": "aminoacid" }
159
+ }, "mainSeqsVdj")
160
+
146
161
  return {
147
162
  columns: bundleBuilder.build()
148
163
  }
149
164
  })
150
165
 
151
166
  wf.body(func(args) {
152
- // output containers
167
+ // output containers
153
168
  outputs := {}
154
169
  exports := {}
155
170
 
171
+ // Expose this block's own id so the model can drop self-produced filter
172
+ // entries from the dataset selector — without this the just-finished
173
+ // sampled subset shows up as a filter option on the next configuration.
174
+ outputs["selfBlockId"] = smart.createJsonResource(wf.getBlockId())
175
+
156
176
  if !is_undefined(args.inputAnchor) {
157
177
  // Input arguments
158
178
  columns := args.columns
@@ -164,8 +184,14 @@ wf.body(func(args) {
164
184
  isPeptide := datasetSpec.axesSpec[1].name == "pl7.app/variantKey"
165
185
 
166
186
  ////////// Clonotype Filtering //////////
167
- // Initialize and build clone table with all columns
168
- tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec)
187
+ // Initialize and build clone table with all columns. When the user
188
+ // picked an optional primary filter in PlDatasetSelector, fetch the
189
+ // resolved column so initializeCloneTable can inner-join it.
190
+ inputFilterColumn := undefined
191
+ if !is_undefined(args.inputFilter) {
192
+ inputFilterColumn = columns.getColumn(args.inputFilter)
193
+ }
194
+ tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec, inputFilterColumn)
169
195
  cloneTable := tableInit.cloneTable
170
196
  filterMap := tableInit.filterMap
171
197
  rankingMap := tableInit.rankingMap
@@ -2,6 +2,9 @@
2
2
 
3
3
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
4
  slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ pt := import("@platforma-sdk/workflow-tengo:pt")
6
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
7
+ pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
5
8
  json := import("json")
6
9
 
7
10
  // PColumn names used as source columns for In Vivo Score computation.
@@ -229,30 +232,93 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
229
232
  return undefined
230
233
  }
231
234
 
235
+ /**
236
+ * Derives a lightweight, dense, per-clonotype presence column from a "main
237
+ * sequence" column. The clone table is built with a Full join, which keeps only
238
+ * the union of keys across added columns; sparse columns (e.g. enrichment, which
239
+ * upstream emits only for clusters passing its pre-filter) would otherwise leave
240
+ * whole clonotypes out of the funnel. A main-sequence column exists once per
241
+ * clonotype across the entire dataset, so it defines the complete keyset.
242
+ *
243
+ * @param mainSeqsCol - A main-sequence PColumn {spec, data}
244
+ * @param datasetSpec - Dataset specification; axesSpec[1] is the clonotype axis
245
+ * @return The derived presence PColumn {spec, data}, or undefined if unavailable
246
+ */
247
+ deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
248
+ if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
249
+ return undefined
250
+ }
251
+
252
+ clonotypeAxisName := datasetSpec.axesSpec[1].name
253
+ clonotypeAxis := undefined
254
+ for axis in mainSeqsCol.spec.axesSpec {
255
+ if axis.name == clonotypeAxisName {
256
+ clonotypeAxis = axis
257
+ break
258
+ }
259
+ }
260
+ if is_undefined(clonotypeAxis) {
261
+ return undefined
262
+ }
263
+
264
+ axisId := pSpec.getAxisId(clonotypeAxis)
265
+ presenceSpec := {
266
+ kind: "PColumn",
267
+ name: "clonotypePresence",
268
+ valueType: "Int"
269
+ }
270
+
271
+ wf := pt.workflow().cpu(1).mem("4GiB")
272
+ wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
273
+ select(pt.sc.axis(clonotypeAxis).alias(axisId)).
274
+ withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
275
+ saveFrameDirect("presenceFrame", {
276
+ axes: [{ column: axisId, spec: clonotypeAxis }],
277
+ columns: [{ column: "clonotypePresence", spec: presenceSpec }],
278
+ partitionKeyLength: 0
279
+ })
280
+
281
+ presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
282
+ return presenceColumns["clonotypePresence"]
283
+ }
284
+
232
285
  /**
233
286
  * Initializes and builds complete clone table with all columns.
234
287
  * Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
235
- *
288
+ *
236
289
  * @param pframes - PFrames import
237
290
  * @param columns - PBundle containing all columns
238
291
  * @param args - Arguments containing filters, rankingOrder, diversificationColumn
239
292
  * @param datasetSpec - Dataset specification with axes
293
+ * @param inputFilterColumn - Optional resolved column from the primary filter
294
+ * (PlDatasetSelector). Added to the clone table; clonotypes missing from it
295
+ * are dropped as a pre-condition in the filter software (see filter.py),
296
+ * not via the join, so the Full join can keep all clonotypes for the funnel.
240
297
  * @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
241
298
  */
242
- initializeCloneTable := func(pframes, columns, args, datasetSpec) {
299
+ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
243
300
  // Build clonotype table
244
301
  cloneTable := pframes.parquetFileBuilder()
245
302
  cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
246
-
303
+
247
304
  // Build linker list in SAME ORDER as model
248
305
  sortedLinkers := buildSortedLinkers(columns, datasetSpec)
249
-
306
+
250
307
  // Add Filters to table
251
308
  addedAxes := []
252
309
  filterMap := {}
253
310
  rankingMap := {}
254
311
  addedCols := false
255
-
312
+
313
+ // Add the optional primary filter from PlDatasetSelector. The clone table is
314
+ // built with a Full join (below), so this column no longer narrows the keyset
315
+ // via join semantics; clonotypes outside the filter are dropped as a
316
+ // pre-condition in the filter software (filter.py), before stage tracking.
317
+ if !is_undefined(inputFilterColumn) {
318
+ cloneTable.add(inputFilterColumn, {header: "primary_filter"})
319
+ addedCols = true
320
+ }
321
+
256
322
  if len(args.filters) > 0 {
257
323
  for i, filter in args.filters {
258
324
  // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
@@ -466,30 +532,42 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
466
532
  }
467
533
  }
468
534
 
469
- // Fallback: if no columns added, add a single-axis trunk-keyed sequence
470
- // column. Try VDJ CDR3 first
535
+ // Dense per-clonotype presence column: guarantees the Full join below keeps
536
+ // every clonotype, so the Selection Plot funnel total matches the whole dataset.
537
+ // Clonotypes that lack sparse columns (e.g. an enrichment row) now carry null
538
+ // for those columns and are dropped at the relevant FILTER stage instead of
539
+ // being silently dropped before stage tracking. The presence column is a light
540
+ // Int marker — adding the heavy main-sequence column itself would blow up the join.
541
+ mainSeqCols := append(
542
+ columns.getColumns("mainSeqs"),
543
+ columns.getColumns("mainSeqsVdj")...)
544
+ if len(mainSeqCols) > 0 {
545
+ presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
546
+ if !is_undefined(presenceCol) {
547
+ cloneTable.add(presenceCol, {header: "clonotype_presence"})
548
+ addedCols = true
549
+ }
550
+ }
551
+
552
+ // Fallback: if still no columns (no presence column, no filters/ranking),
553
+ // add the VDJ CDR3 sequence as a single-axis trunk-keyed column.
471
554
  if !addedCols {
472
555
  cdr3Sequences := columns.getColumns("cdr3Sequences")
473
556
  if len(cdr3Sequences) > 0 {
474
557
  cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
475
558
  addedCols = true
476
- } else {
477
- peptideMainSeqs := columns.getColumns("peptideMainSeqs")
478
- if len(peptideMainSeqs) > 0 {
479
- cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
480
- addedCols = true
481
- }
482
559
  }
483
560
  }
484
561
 
485
- // Build the table if we have columns
562
+ // Build the table if we have columns. Full (outer) join so sparse columns
563
+ // never drop clonotypes; the dense presence column above defines the complete keyset.
486
564
  builtTable := undefined
487
565
  clusterColumnHeader := undefined
488
566
  if addedCols {
489
567
  cloneTable.mem("16GiB")
490
568
  cloneTable.cpu(1)
491
- builtTable = cloneTable.build({joinType: "Inner"})
492
-
569
+ builtTable = cloneTable.build({joinType: "Full"})
570
+
493
571
  // Resolve diversificationColumn ref to header name
494
572
  clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
495
573
  }