npm - @platforma-open/milaboratories.top-antibodies.workflow - Versions diffs - 4.2.0 → 4.3.1 - Mend

@platforma-open/milaboratories.top-antibodies.workflow 4.2.0 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +31 -0
package/dist/tengo/lib/utils.lib.tengo +79 -8
package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/package.json +8 -8
package/src/filter-and-sample.tpl.tengo +1 -4
package/src/main.tpl.tengo +14 -7
package/src/utils.lib.tengo +88 -17

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.top-antibodies.workflow@4.2.0 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
+> @platforma-open/milaboratories.top-antibodies.workflow@4.3.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
 > shx rm -rf dist && pl-tengo check && pl-tengo build
 Processing "src/assembling-fasta.tpl.tengo"...

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,36 @@
 # @platforma-open/milaboratories.top-antibodies.workflow
+## 4.3.1
+### Patch Changes
+- a7b65c0: No Op Change To Unblock
+- Updated dependencies [a7b65c0]
+  - @platforma-open/milaboratories.top-antibodies.anarci-kabat@1.4.6
+  - @platforma-open/milaboratories.top-antibodies.assembling-fasta@1.3.5
+  - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.2.1
+  - @platforma-open/milaboratories.top-antibodies.spectratype@1.8.6
+  - @platforma-open/milaboratories.top-antibodies.umap@1.2.6
+## 4.3.0
+### Minor Changes
+- 0a06331: New changeset
+### Patch Changes
+- Updated dependencies [0a06331]
+  - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.2.0
+## 4.2.1
+### Patch Changes
+- b466a9b: Fix Selection Plot funnel starting from fewer clonotypes than the project has. The clone table is now built with a Full join plus a dense per-clonotype presence column instead of an inner join, so clonotypes that lack sparse columns (e.g. an enrichment row) reach the funnel and are dropped at the filter stage that checks the missing column rather than before stage tracking — the funnel total now matches the full clonotype count. The optional primary dataset filter is applied as a row pre-condition in the sampler, and null-ranked or null-diversification clonotypes are dropped before selection so they are never sampled.
+- Updated dependencies [b466a9b]
+  - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.1.5
 ## 4.2.0
 ### Minor Changes

package/dist/tengo/lib/utils.lib.tengo CHANGED Viewed

@@ -2,6 +2,9 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 slices := import("@platforma-sdk/workflow-tengo:slices")
+pt := import("@platforma-sdk/workflow-tengo:pt")
+pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
+pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
 json := import("json")
@@ -241,6 +244,57 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
+deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
+    if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
+        return undefined
+    }
+    clonotypeAxisName := datasetSpec.axesSpec[1].name
+    clonotypeAxis := undefined
+    for axis in mainSeqsCol.spec.axesSpec {
+        if axis.name == clonotypeAxisName {
+            clonotypeAxis = axis
+            break
+        }
+    }
+    if is_undefined(clonotypeAxis) {
+        return undefined
+    }
+    axisId := pSpec.getAxisId(clonotypeAxis)
+    presenceSpec := {
+        kind: "PColumn",
+        name: "clonotypePresence",
+        valueType: "Int"
+    }
+    wf := pt.workflow().cpu(1).mem("4GiB")
+    wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
+        select(pt.sc.axis(clonotypeAxis).alias(axisId)).
+        withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
+        saveFrameDirect("presenceFrame", {
+            axes: [{ column: axisId, spec: clonotypeAxis }],
+            columns: [{ column: "clonotypePresence", spec: presenceSpec }],
+            partitionKeyLength: 0
+        })
+    presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
+    return presenceColumns["clonotypePresence"]
+}
 initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
@@ -480,28 +534,45 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
+    mainSeqCols := columns.getColumns("mainSeqs")
+    for col in columns.getColumns("mainSeqsVdj") {
+        mainSeqCols = append(mainSeqCols, col)
+    }
+    if len(mainSeqCols) > 0 {
+        presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
+        if !is_undefined(presenceCol) {
+            cloneTable.add(presenceCol, {header: "clonotype_presence"})
+            addedCols = true
+        }
+    }
     if !addedCols {
         cdr3Sequences := columns.getColumns("cdr3Sequences")
         if len(cdr3Sequences) > 0 {
             cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
             addedCols = true
-        } else {
-            peptideMainSeqs := columns.getColumns("peptideMainSeqs")
-            if len(peptideMainSeqs) > 0 {
-                cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
-                addedCols = true
-            }
         }
     }
     builtTable := undefined
     clusterColumnHeader := undefined
     if addedCols {
         cloneTable.mem("16GiB")
         cloneTable.cpu(1)
-        builtTable = cloneTable.build({joinType: "Inner"})
+        builtTable = cloneTable.build({joinType: "Full"})
         clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
     }

package/dist/tengo/tpl/assembling-fasta.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/filter-and-sample.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,19 +1,19 @@
 {
   "name": "@platforma-open/milaboratories.top-antibodies.workflow",
-  "version": "4.2.0",
+  "version": "4.3.1",
   "type": "module",
   "description": "Block Workflow",
   "dependencies": {
-    "@platforma-sdk/workflow-tengo": "5.26.0",
+    "@platforma-sdk/workflow-tengo": "6.6.5",
     "@platforma-open/milaboratories.software-anarci": "^0.0.3",
-    "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.4",
-    "@platforma-open/milaboratories.top-antibodies.umap": "1.2.5",
-    "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.4",
-    "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.5",
-    "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.5"
+    "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.2.1",
+    "@platforma-open/milaboratories.top-antibodies.umap": "1.2.6",
+    "@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.6",
+    "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.6",
+    "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.5"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "3.0.3"
+    "@platforma-sdk/tengo-builder": "4.0.9"
   },
   "scripts": {
     "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",

package/src/filter-and-sample.tpl.tengo CHANGED Viewed

@@ -116,10 +116,7 @@ self.body(func(inputs) {
     }
     if topClonotypes != undefined {
-        valueLabels[string(stageIdx)] = "Filtered"
-        valueLabels[string(stageIdx + 1)] = "Selected"
-    } else {
-        valueLabels[string(stageIdx)] = "Passed Filters"
+        valueLabels[string(stageIdx)] = "Selection"
     }
     // Import selection stage parquet as selectionStage PColumn

package/src/main.tpl.tengo CHANGED Viewed

@@ -28,9 +28,9 @@ wf.prepare(func(args){
     bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
     bundleBuilder.addAnchor("main", args.inputAnchor)
-    // Optional primary filter from PlDatasetSelector — inner-joined into the
-    // clone table below so the filter narrows every downstream stage
-    // (finalClonotypes, spectratype, Kabat, etc.).
+    // Optional primary filter from PlDatasetSelector — added to the clone table
+    // below. Under the Full join it no longer narrows via join semantics; the
+    // filter software drops clonotypes outside it as a pre-condition (filter.py).
     if !is_undefined(args.inputFilter) {
         bundleBuilder.addRef(args.inputFilter)
     }
@@ -140,8 +140,7 @@ wf.prepare(func(args){
 		domain: { "pl7.app/alphabet": "aminoacid" }
 	}, "scFvPerChainSeqs")
-	// Peptide main sequence — single-axis column on variantKey, used as
-	// modality-aware fallback when no filter/ranking columns load.
+	// Main sequence column
 	bundleBuilder.addMulti({
 		axes: [{ anchor: "main", idx: 1 }],
 		annotations: {
@@ -149,8 +148,16 @@ wf.prepare(func(args){
 			"pl7.app/isMainSequence": "true"
 		},
 		domain: { "pl7.app/alphabet": "aminoacid" }
-	}, "peptideMainSeqs")
+	}, "mainSeqs")
+	bundleBuilder.addMulti({
+		axes: [{ anchor: "main", idx: 1 }],
+		annotations: {
+			"pl7.app/vdj/isAssemblingFeature": "true",
+			"pl7.app/vdj/isMainSequence": "true"
+		},
+		domain: { "pl7.app/alphabet": "aminoacid" }
+	}, "mainSeqsVdj")
     return {
         columns: bundleBuilder.build()
     }

package/src/utils.lib.tengo CHANGED Viewed

@@ -2,6 +2,9 @@
 ll := import("@platforma-sdk/workflow-tengo:ll")
 slices := import("@platforma-sdk/workflow-tengo:slices")
+pt := import("@platforma-sdk/workflow-tengo:pt")
+pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
+pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
 json := import("json")
 // PColumn names used as source columns for In Vivo Score computation.
@@ -229,6 +232,56 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
     return undefined
 }
+/**
+ * Derives a lightweight, dense, per-clonotype presence column from a "main
+ * sequence" column. The clone table is built with a Full join, which keeps only
+ * the union of keys across added columns; sparse columns (e.g. enrichment, which
+ * upstream emits only for clusters passing its pre-filter) would otherwise leave
+ * whole clonotypes out of the funnel. A main-sequence column exists once per
+ * clonotype across the entire dataset, so it defines the complete keyset.
+ *
+ * @param mainSeqsCol - A main-sequence PColumn {spec, data}
+ * @param datasetSpec - Dataset specification; axesSpec[1] is the clonotype axis
+ * @return The derived presence PColumn {spec, data}, or undefined if unavailable
+ */
+deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
+    if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
+        return undefined
+    }
+    clonotypeAxisName := datasetSpec.axesSpec[1].name
+    clonotypeAxis := undefined
+    for axis in mainSeqsCol.spec.axesSpec {
+        if axis.name == clonotypeAxisName {
+            clonotypeAxis = axis
+            break
+        }
+    }
+    if is_undefined(clonotypeAxis) {
+        return undefined
+    }
+    axisId := pSpec.getAxisId(clonotypeAxis)
+    presenceSpec := {
+        kind: "PColumn",
+        name: "clonotypePresence",
+        valueType: "Int"
+    }
+    wf := pt.workflow().cpu(1).mem("4GiB")
+    wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
+        select(pt.sc.axis(clonotypeAxis).alias(axisId)).
+        withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
+        saveFrameDirect("presenceFrame", {
+            axes: [{ column: axisId, spec: clonotypeAxis }],
+            columns: [{ column: "clonotypePresence", spec: presenceSpec }],
+            partitionKeyLength: 0
+        })
+    presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
+    return presenceColumns["clonotypePresence"]
+}
 /**
  * Initializes and builds complete clone table with all columns.
  * Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
@@ -238,8 +291,9 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
  * @param args - Arguments containing filters, rankingOrder, diversificationColumn
  * @param datasetSpec - Dataset specification with axes
  * @param inputFilterColumn - Optional resolved column from the primary filter
- *        (PlDatasetSelector). Added with an inner join so missing keys are
- *        dropped from the clone table — narrows every downstream stage.
+ *        (PlDatasetSelector). Added to the clone table; clonotypes missing from it
+ *        are dropped as a pre-condition in the filter software (see filter.py),
+ *        not via the join, so the Full join can keep all clonotypes for the funnel.
  * @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
  */
 initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
@@ -256,10 +310,10 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
     rankingMap := {}
     addedCols := false
-    // Apply the optional primary filter from PlDatasetSelector first. The
-    // builder's inner-join keeps the rows where every added column has a
-    // value, so once this column is present rows missing from the filter are
-    // dropped from the entire pipeline.
+    // Add the optional primary filter from PlDatasetSelector. The clone table is
+    // built with a Full join (below), so this column no longer narrows the keyset
+    // via join semantics; clonotypes outside the filter are dropped as a
+    // pre-condition in the filter software (filter.py), before stage tracking.
     if !is_undefined(inputFilterColumn) {
         cloneTable.add(inputFilterColumn, {header: "primary_filter"})
         addedCols = true
@@ -478,30 +532,47 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterCol
         }
     }
-    // Fallback: if no columns added, add a single-axis trunk-keyed sequence
-    // column. Try VDJ CDR3 first
+    // Dense per-clonotype presence column: guarantees the Full join below keeps
+    // every clonotype, so the Selection Plot funnel total matches the whole dataset.
+    // Clonotypes that lack sparse columns (e.g. an enrichment row) now carry null
+    // for those columns and are dropped at the relevant FILTER stage instead of
+    // being silently dropped before stage tracking. The presence column is a light
+    // Int marker — adding the heavy main-sequence column itself would blow up the join.
+    // Combine both groups with a loop rather than `append(a, b...)`: Tengo's
+    // `append` requires >=2 args, so spreading an empty second group (e.g. the
+    // peptide case, which has no VDJ main sequences) would collapse to a
+    // single-arg call and fail with "wrong number of arguments".
+    mainSeqCols := columns.getColumns("mainSeqs")
+    for col in columns.getColumns("mainSeqsVdj") {
+        mainSeqCols = append(mainSeqCols, col)
+    }
+    if len(mainSeqCols) > 0 {
+        presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
+        if !is_undefined(presenceCol) {
+            cloneTable.add(presenceCol, {header: "clonotype_presence"})
+            addedCols = true
+        }
+    }
+    // Fallback: if still no columns (no presence column, no filters/ranking),
+    // add the VDJ CDR3 sequence as a single-axis trunk-keyed column.
     if !addedCols {
         cdr3Sequences := columns.getColumns("cdr3Sequences")
         if len(cdr3Sequences) > 0 {
             cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
             addedCols = true
-        } else {
-            peptideMainSeqs := columns.getColumns("peptideMainSeqs")
-            if len(peptideMainSeqs) > 0 {
-                cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
-                addedCols = true
-            }
         }
     }
-    // Build the table if we have columns
+    // Build the table if we have columns. Full (outer) join so sparse columns
+    // never drop clonotypes; the dense presence column above defines the complete keyset.
     builtTable := undefined
     clusterColumnHeader := undefined
     if addedCols {
         cloneTable.mem("16GiB")
         cloneTable.cpu(1)
-        builtTable = cloneTable.build({joinType: "Inner"})
+        builtTable = cloneTable.build({joinType: "Full"})
         // Resolve diversificationColumn ref to header name
         clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
     }