npm - @platforma-open/milaboratories.top-antibodies.workflow - Versions diffs - 4.1.2 → 4.2.1 - Mend

@platforma-open/milaboratories.top-antibodies.workflow 4.1.2 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +14 -0
package/dist/tengo/lib/utils.lib.tengo +90 -12
package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/package.json +4 -4
package/src/filter-and-sample.tpl.tengo +1 -4
package/src/main.tpl.tengo +35 -9
package/src/utils.lib.tengo +94 -16

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.top-antibodies.workflow@4.1.2 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
+> @platforma-open/milaboratories.top-antibodies.workflow@4.2.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
 > shx rm -rf dist && pl-tengo check && pl-tengo build
 Processing "src/assembling-fasta.tpl.tengo"...

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # @platforma-open/milaboratories.top-antibodies.workflow
+## 4.2.1
+### Patch Changes
+- b466a9b: Fix Selection Plot funnel starting from fewer clonotypes than the project has. The clone table is now built with a Full join plus a dense per-clonotype presence column instead of an inner join, so clonotypes that lack sparse columns (e.g. an enrichment row) reach the funnel and are dropped at the filter stage that checks the missing column rather than before stage tracking — the funnel total now matches the full clonotype count. The optional primary dataset filter is applied as a row pre-condition in the sampler, and null-ranked or null-diversification clonotypes are dropped before selection so they are never sampled.
+- Updated dependencies [b466a9b]
+  - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.1.5
+## 4.2.0
+### Minor Changes
+- 8edddd1: Add dataset selector with optional filter dropdown. Replaces the plain dataset dropdown with `PlDatasetSelector`, and inner-joins the selected filter column into the clone table so it narrows every downstream stage (final clonotypes, spectratype, Kabat).
 ## 4.1.2
 ### Patch Changes

package/dist/tengo/lib/utils.lib.tengo CHANGED Viewed

@@ -2,6 +2,9 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 slices := import("@platforma-sdk/workflow-tengo:slices")
+pt := import("@platforma-sdk/workflow-tengo:pt")
+pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
+pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
 json := import("json")
@@ -239,20 +242,83 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
-initializeCloneTable := func(pframes, columns, args, datasetSpec) {
+deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
+    if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
+        return undefined
+    }
+    clonotypeAxisName := datasetSpec.axesSpec[1].name
+    clonotypeAxis := undefined
+    for axis in mainSeqsCol.spec.axesSpec {
+        if axis.name == clonotypeAxisName {
+            clonotypeAxis = axis
+            break
+        }
+    }
+    if is_undefined(clonotypeAxis) {
+        return undefined
+    }
+    axisId := pSpec.getAxisId(clonotypeAxis)
+    presenceSpec := {
+        kind: "PColumn",
+        name: "clonotypePresence",
+        valueType: "Int"
+    }
+    wf := pt.workflow().cpu(1).mem("4GiB")
+    wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
+        select(pt.sc.axis(clonotypeAxis).alias(axisId)).
+        withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
+        saveFrameDirect("presenceFrame", {
+            axes: [{ column: axisId, spec: clonotypeAxis }],
+            columns: [{ column: "clonotypePresence", spec: presenceSpec }],
+            partitionKeyLength: 0
+        })
+    presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
+    return presenceColumns["clonotypePresence"]
+}
+initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
     cloneTable := pframes.parquetFileBuilder()
     cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
     sortedLinkers := buildSortedLinkers(columns, datasetSpec)
     addedAxes := []
     filterMap := {}
     rankingMap := {}
     addedCols := false
+    if !is_undefined(inputFilterColumn) {
+        cloneTable.add(inputFilterColumn, {header: "primary_filter"})
+        addedCols = true
+    }
     if len(args.filters) > 0 {
         for i, filter in args.filters {
@@ -468,28 +534,40 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
+    mainSeqCols := append(
+        columns.getColumns("mainSeqs"),
+        columns.getColumns("mainSeqsVdj")...)
+    if len(mainSeqCols) > 0 {
+        presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
+        if !is_undefined(presenceCol) {
+            cloneTable.add(presenceCol, {header: "clonotype_presence"})
+            addedCols = true
+        }
+    }
     if !addedCols {
         cdr3Sequences := columns.getColumns("cdr3Sequences")
         if len(cdr3Sequences) > 0 {
             cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
             addedCols = true
-        } else {
-            peptideMainSeqs := columns.getColumns("peptideMainSeqs")
-            if len(peptideMainSeqs) > 0 {
-                cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
-                addedCols = true
-            }
         }
     }
     builtTable := undefined
     clusterColumnHeader := undefined
     if addedCols {
         cloneTable.mem("16GiB")
         cloneTable.cpu(1)
-        builtTable = cloneTable.build({joinType: "Inner"})
+        builtTable = cloneTable.build({joinType: "Full"})
         clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
     }

package/dist/tengo/tpl/assembling-fasta.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/filter-and-sample.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,19 +1,19 @@
 {
   "name": "@platforma-open/milaboratories.top-antibodies.workflow",
-  "version": "4.1.2",
+  "version": "4.2.1",
   "type": "module",
   "description": "Block Workflow",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "5.21.0",
+    "@platforma-sdk/workflow-tengo": "6.6.3",
     "@platforma-open/milaboratories.software-anarci": "^0.0.3",
-    "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.4",
     "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.5",
+    "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.5",
     "@platforma-open/milaboratories.top-antibodies.umap": "1.2.5",
     "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.5",
     "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.4"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "2.5.26"
+    "@platforma-sdk/tengo-builder": "4.0.9"
   },
   "scripts": {
     "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",

package/src/filter-and-sample.tpl.tengo CHANGED Viewed

@@ -116,10 +116,7 @@ self.body(func(inputs) {
     }
     if topClonotypes != undefined {
-        valueLabels[string(stageIdx)] = "Filtered"
-        valueLabels[string(stageIdx + 1)] = "Selected"
-    } else {
-        valueLabels[string(stageIdx)] = "Passed Filters"
+        valueLabels[string(stageIdx)] = "Selection"
     }
     // Import selection stage parquet as selectionStage PColumn

package/src/main.tpl.tengo CHANGED Viewed

@@ -7,6 +7,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
 slices := import("@platforma-sdk/workflow-tengo:slices")
 render := import("@platforma-sdk/workflow-tengo:render")
 pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
+smart := import("@platforma-sdk/workflow-tengo:smart")
 ll := import("@platforma-sdk/workflow-tengo:ll")
 kabatConv := import(":pf-kabat-conv")
@@ -25,8 +26,15 @@ wf.prepare(func(args){
 	// We need a table with cluster ID (optional) | clonotype id | selected ranking columns
     bundleBuilder := wf.createPBundleBuilder()
     bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
-    bundleBuilder.addAnchor("main", args.inputAnchor)
+    bundleBuilder.addAnchor("main", args.inputAnchor)
+    // Optional primary filter from PlDatasetSelector — added to the clone table
+    // below. Under the Full join it no longer narrows via join semantics; the
+    // filter software drops clonotypes outside it as a pre-condition (filter.py).
+    if !is_undefined(args.inputFilter) {
+        bundleBuilder.addRef(args.inputFilter)
+    }
     validRanks := false
     if len(args.rankingOrder) > 0 {
         for col in args.rankingOrder {
@@ -132,8 +140,7 @@ wf.prepare(func(args){
 		domain: { "pl7.app/alphabet": "aminoacid" }
 	}, "scFvPerChainSeqs")
-	// Peptide main sequence — single-axis column on variantKey, used as
-	// modality-aware fallback when no filter/ranking columns load.
+	// Main sequence column
 	bundleBuilder.addMulti({
 		axes: [{ anchor: "main", idx: 1 }],
 		annotations: {
@@ -141,18 +148,31 @@ wf.prepare(func(args){
 			"pl7.app/isMainSequence": "true"
 		},
 		domain: { "pl7.app/alphabet": "aminoacid" }
-	}, "peptideMainSeqs")
+	}, "mainSeqs")
+	bundleBuilder.addMulti({
+		axes: [{ anchor: "main", idx: 1 }],
+		annotations: {
+			"pl7.app/vdj/isAssemblingFeature": "true",
+			"pl7.app/vdj/isMainSequence": "true"
+		},
+		domain: { "pl7.app/alphabet": "aminoacid" }
+	}, "mainSeqsVdj")
     return {
         columns: bundleBuilder.build()
     }
 })
 wf.body(func(args) {
-	// output containers
+	// output containers
 	outputs := {}
     exports := {}
+    // Expose this block's own id so the model can drop self-produced filter
+    // entries from the dataset selector — without this the just-finished
+    // sampled subset shows up as a filter option on the next configuration.
+    outputs["selfBlockId"] = smart.createJsonResource(wf.getBlockId())
     if !is_undefined(args.inputAnchor) {
         // Input arguments
         columns := args.columns
@@ -164,8 +184,14 @@ wf.body(func(args) {
 	    isPeptide := datasetSpec.axesSpec[1].name == "pl7.app/variantKey"
         ////////// Clonotype Filtering //////////
-        // Initialize and build clone table with all columns
-        tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec)
+        // Initialize and build clone table with all columns. When the user
+        // picked an optional primary filter in PlDatasetSelector, fetch the
+        // resolved column so initializeCloneTable can inner-join it.
+        inputFilterColumn := undefined
+        if !is_undefined(args.inputFilter) {
+            inputFilterColumn = columns.getColumn(args.inputFilter)
+        }
+        tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec, inputFilterColumn)
         cloneTable := tableInit.cloneTable
         filterMap := tableInit.filterMap
         rankingMap := tableInit.rankingMap

package/src/utils.lib.tengo CHANGED Viewed

@@ -2,6 +2,9 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 slices := import("@platforma-sdk/workflow-tengo:slices")
+pt := import("@platforma-sdk/workflow-tengo:pt")
+pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
+pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
 json := import("json")
 // PColumn names used as source columns for In Vivo Score computation.
@@ -229,30 +232,93 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
     return undefined
 }
+/**
+ * Derives a lightweight, dense, per-clonotype presence column from a "main
+ * sequence" column. The clone table is built with a Full join, which keeps only
+ * the union of keys across added columns; sparse columns (e.g. enrichment, which
+ * upstream emits only for clusters passing its pre-filter) would otherwise leave
+ * whole clonotypes out of the funnel. A main-sequence column exists once per
+ * clonotype across the entire dataset, so it defines the complete keyset.
+ *
+ * @param mainSeqsCol - A main-sequence PColumn {spec, data}
+ * @param datasetSpec - Dataset specification; axesSpec[1] is the clonotype axis
+ * @return The derived presence PColumn {spec, data}, or undefined if unavailable
+ */
+deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
+    if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
+        return undefined
+    }
+    clonotypeAxisName := datasetSpec.axesSpec[1].name
+    clonotypeAxis := undefined
+    for axis in mainSeqsCol.spec.axesSpec {
+        if axis.name == clonotypeAxisName {
+            clonotypeAxis = axis
+            break
+        }
+    }
+    if is_undefined(clonotypeAxis) {
+        return undefined
+    }
+    axisId := pSpec.getAxisId(clonotypeAxis)
+    presenceSpec := {
+        kind: "PColumn",
+        name: "clonotypePresence",
+        valueType: "Int"
+    }
+    wf := pt.workflow().cpu(1).mem("4GiB")
+    wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
+        select(pt.sc.axis(clonotypeAxis).alias(axisId)).
+        withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
+        saveFrameDirect("presenceFrame", {
+            axes: [{ column: axisId, spec: clonotypeAxis }],
+            columns: [{ column: "clonotypePresence", spec: presenceSpec }],
+            partitionKeyLength: 0
+        })
+    presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
+    return presenceColumns["clonotypePresence"]
+}
 /**
  * Initializes and builds complete clone table with all columns.
  * Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
- *
+ *
  * @param pframes - PFrames import
  * @param columns - PBundle containing all columns
  * @param args - Arguments containing filters, rankingOrder, diversificationColumn
  * @param datasetSpec - Dataset specification with axes
+ * @param inputFilterColumn - Optional resolved column from the primary filter
+ *        (PlDatasetSelector). Added to the clone table; clonotypes missing from it
+ *        are dropped as a pre-condition in the filter software (see filter.py),
+ *        not via the join, so the Full join can keep all clonotypes for the funnel.
  * @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
  */
-initializeCloneTable := func(pframes, columns, args, datasetSpec) {
+initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
     // Build clonotype table
     cloneTable := pframes.parquetFileBuilder()
     cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
     // Build linker list in SAME ORDER as model
     sortedLinkers := buildSortedLinkers(columns, datasetSpec)
     // Add Filters to table
     addedAxes := []
     filterMap := {}
     rankingMap := {}
     addedCols := false
+    // Add the optional primary filter from PlDatasetSelector. The clone table is
+    // built with a Full join (below), so this column no longer narrows the keyset
+    // via join semantics; clonotypes outside the filter are dropped as a
+    // pre-condition in the filter software (filter.py), before stage tracking.
+    if !is_undefined(inputFilterColumn) {
+        cloneTable.add(inputFilterColumn, {header: "primary_filter"})
+        addedCols = true
+    }
     if len(args.filters) > 0 {
         for i, filter in args.filters {
             // we check for value presence and for actual pcolumn (cases where upstream block is deleted)
@@ -466,30 +532,42 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
         }
     }
-    // Fallback: if no columns added, add a single-axis trunk-keyed sequence
-    // column. Try VDJ CDR3 first
+    // Dense per-clonotype presence column: guarantees the Full join below keeps
+    // every clonotype, so the Selection Plot funnel total matches the whole dataset.
+    // Clonotypes that lack sparse columns (e.g. an enrichment row) now carry null
+    // for those columns and are dropped at the relevant FILTER stage instead of
+    // being silently dropped before stage tracking. The presence column is a light
+    // Int marker — adding the heavy main-sequence column itself would blow up the join.
+    mainSeqCols := append(
+        columns.getColumns("mainSeqs"),
+        columns.getColumns("mainSeqsVdj")...)
+    if len(mainSeqCols) > 0 {
+        presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
+        if !is_undefined(presenceCol) {
+            cloneTable.add(presenceCol, {header: "clonotype_presence"})
+            addedCols = true
+        }
+    }
+    // Fallback: if still no columns (no presence column, no filters/ranking),
+    // add the VDJ CDR3 sequence as a single-axis trunk-keyed column.
     if !addedCols {
         cdr3Sequences := columns.getColumns("cdr3Sequences")
         if len(cdr3Sequences) > 0 {
             cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
             addedCols = true
-        } else {
-            peptideMainSeqs := columns.getColumns("peptideMainSeqs")
-            if len(peptideMainSeqs) > 0 {
-                cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
-                addedCols = true
-            }
         }
     }
-    // Build the table if we have columns
+    // Build the table if we have columns. Full (outer) join so sparse columns
+    // never drop clonotypes; the dense presence column above defines the complete keyset.
     builtTable := undefined
     clusterColumnHeader := undefined
     if addedCols {
         cloneTable.mem("16GiB")
         cloneTable.cpu(1)
-        builtTable = cloneTable.build({joinType: "Inner"})
+        builtTable = cloneTable.build({joinType: "Full"})
         // Resolve diversificationColumn ref to header name
         clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
     }