@platforma-open/milaboratories.top-antibodies.workflow 4.1.2 → 4.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +14 -0
- package/dist/tengo/lib/utils.lib.tengo +90 -12
- package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/package.json +4 -4
- package/src/filter-and-sample.tpl.tengo +1 -4
- package/src/main.tpl.tengo +35 -9
- package/src/utils.lib.tengo +94 -16
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.top-antibodies.workflow@4.1
|
|
3
|
+
> @platforma-open/milaboratories.top-antibodies.workflow@4.2.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
|
|
4
4
|
> shx rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/assembling-fasta.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.top-antibodies.workflow
|
|
2
2
|
|
|
3
|
+
## 4.2.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- b466a9b: Fix Selection Plot funnel starting from fewer clonotypes than the project has. The clone table is now built with a Full join plus a dense per-clonotype presence column instead of an inner join, so clonotypes that lack sparse columns (e.g. an enrichment row) reach the funnel and are dropped at the filter stage that checks the missing column rather than before stage tracking — the funnel total now matches the full clonotype count. The optional primary dataset filter is applied as a row pre-condition in the sampler, and null-ranked or null-diversification clonotypes are dropped before selection so they are never sampled.
|
|
8
|
+
- Updated dependencies [b466a9b]
|
|
9
|
+
- @platforma-open/milaboratories.top-antibodies.sample-clonotypes@2.1.5
|
|
10
|
+
|
|
11
|
+
## 4.2.0
|
|
12
|
+
|
|
13
|
+
### Minor Changes
|
|
14
|
+
|
|
15
|
+
- 8edddd1: Add dataset selector with optional filter dropdown. Replaces the plain dataset dropdown with `PlDatasetSelector`, and inner-joins the selected filter column into the clone table so it narrows every downstream stage (final clonotypes, spectratype, Kabat).
|
|
16
|
+
|
|
3
17
|
## 4.1.2
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
4
4
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
5
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
6
|
+
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
|
|
7
|
+
pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
|
|
5
8
|
json := import("json")
|
|
6
9
|
|
|
7
10
|
|
|
@@ -239,20 +242,83 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
|
|
|
239
242
|
|
|
240
243
|
|
|
241
244
|
|
|
242
|
-
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
|
|
248
|
+
if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
|
|
249
|
+
return undefined
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
clonotypeAxisName := datasetSpec.axesSpec[1].name
|
|
253
|
+
clonotypeAxis := undefined
|
|
254
|
+
for axis in mainSeqsCol.spec.axesSpec {
|
|
255
|
+
if axis.name == clonotypeAxisName {
|
|
256
|
+
clonotypeAxis = axis
|
|
257
|
+
break
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if is_undefined(clonotypeAxis) {
|
|
261
|
+
return undefined
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
axisId := pSpec.getAxisId(clonotypeAxis)
|
|
265
|
+
presenceSpec := {
|
|
266
|
+
kind: "PColumn",
|
|
267
|
+
name: "clonotypePresence",
|
|
268
|
+
valueType: "Int"
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
wf := pt.workflow().cpu(1).mem("4GiB")
|
|
272
|
+
wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
|
|
273
|
+
select(pt.sc.axis(clonotypeAxis).alias(axisId)).
|
|
274
|
+
withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
|
|
275
|
+
saveFrameDirect("presenceFrame", {
|
|
276
|
+
axes: [{ column: axisId, spec: clonotypeAxis }],
|
|
277
|
+
columns: [{ column: "clonotypePresence", spec: presenceSpec }],
|
|
278
|
+
partitionKeyLength: 0
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
|
|
282
|
+
return presenceColumns["clonotypePresence"]
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
|
|
243
300
|
|
|
244
301
|
cloneTable := pframes.parquetFileBuilder()
|
|
245
302
|
cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
|
|
246
|
-
|
|
303
|
+
|
|
247
304
|
|
|
248
305
|
sortedLinkers := buildSortedLinkers(columns, datasetSpec)
|
|
249
|
-
|
|
306
|
+
|
|
250
307
|
|
|
251
308
|
addedAxes := []
|
|
252
309
|
filterMap := {}
|
|
253
310
|
rankingMap := {}
|
|
254
311
|
addedCols := false
|
|
255
|
-
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if !is_undefined(inputFilterColumn) {
|
|
318
|
+
cloneTable.add(inputFilterColumn, {header: "primary_filter"})
|
|
319
|
+
addedCols = true
|
|
320
|
+
}
|
|
321
|
+
|
|
256
322
|
if len(args.filters) > 0 {
|
|
257
323
|
for i, filter in args.filters {
|
|
258
324
|
|
|
@@ -468,28 +534,40 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
|
|
|
468
534
|
|
|
469
535
|
|
|
470
536
|
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
mainSeqCols := append(
|
|
542
|
+
columns.getColumns("mainSeqs"),
|
|
543
|
+
columns.getColumns("mainSeqsVdj")...)
|
|
544
|
+
if len(mainSeqCols) > 0 {
|
|
545
|
+
presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
|
|
546
|
+
if !is_undefined(presenceCol) {
|
|
547
|
+
cloneTable.add(presenceCol, {header: "clonotype_presence"})
|
|
548
|
+
addedCols = true
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
|
|
471
554
|
if !addedCols {
|
|
472
555
|
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
473
556
|
if len(cdr3Sequences) > 0 {
|
|
474
557
|
cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
|
|
475
558
|
addedCols = true
|
|
476
|
-
} else {
|
|
477
|
-
peptideMainSeqs := columns.getColumns("peptideMainSeqs")
|
|
478
|
-
if len(peptideMainSeqs) > 0 {
|
|
479
|
-
cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
|
|
480
|
-
addedCols = true
|
|
481
|
-
}
|
|
482
559
|
}
|
|
483
560
|
}
|
|
484
561
|
|
|
485
562
|
|
|
563
|
+
|
|
486
564
|
builtTable := undefined
|
|
487
565
|
clusterColumnHeader := undefined
|
|
488
566
|
if addedCols {
|
|
489
567
|
cloneTable.mem("16GiB")
|
|
490
568
|
cloneTable.cpu(1)
|
|
491
|
-
builtTable = cloneTable.build({joinType: "
|
|
492
|
-
|
|
569
|
+
builtTable = cloneTable.build({joinType: "Full"})
|
|
570
|
+
|
|
493
571
|
|
|
494
572
|
clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
|
|
495
573
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.top-antibodies.workflow",
|
|
3
|
-
"version": "4.1
|
|
3
|
+
"version": "4.2.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Block Workflow",
|
|
6
6
|
"dependencies": {
|
|
7
|
-
"@platforma-sdk/workflow-tengo": "
|
|
7
|
+
"@platforma-sdk/workflow-tengo": "6.6.3",
|
|
8
8
|
"@platforma-open/milaboratories.software-anarci": "^0.0.3",
|
|
9
|
-
"@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.4",
|
|
10
9
|
"@platforma-open/milaboratories.top-antibodies.spectratype": "1.8.5",
|
|
10
|
+
"@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "2.1.5",
|
|
11
11
|
"@platforma-open/milaboratories.top-antibodies.umap": "1.2.5",
|
|
12
12
|
"@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.4.5",
|
|
13
13
|
"@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.3.4"
|
|
14
14
|
},
|
|
15
15
|
"devDependencies": {
|
|
16
|
-
"@platforma-sdk/tengo-builder": "
|
|
16
|
+
"@platforma-sdk/tengo-builder": "4.0.9"
|
|
17
17
|
},
|
|
18
18
|
"scripts": {
|
|
19
19
|
"build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
|
|
@@ -116,10 +116,7 @@ self.body(func(inputs) {
|
|
|
116
116
|
}
|
|
117
117
|
|
|
118
118
|
if topClonotypes != undefined {
|
|
119
|
-
valueLabels[string(stageIdx)] = "
|
|
120
|
-
valueLabels[string(stageIdx + 1)] = "Selected"
|
|
121
|
-
} else {
|
|
122
|
-
valueLabels[string(stageIdx)] = "Passed Filters"
|
|
119
|
+
valueLabels[string(stageIdx)] = "Selection"
|
|
123
120
|
}
|
|
124
121
|
|
|
125
122
|
// Import selection stage parquet as selectionStage PColumn
|
package/src/main.tpl.tengo
CHANGED
|
@@ -7,6 +7,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
|
7
7
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
8
|
render := import("@platforma-sdk/workflow-tengo:render")
|
|
9
9
|
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
|
|
10
|
+
smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
10
11
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
11
12
|
kabatConv := import(":pf-kabat-conv")
|
|
12
13
|
|
|
@@ -25,8 +26,15 @@ wf.prepare(func(args){
|
|
|
25
26
|
// We need a table with cluster ID (optional) | clonotype id | selected ranking columns
|
|
26
27
|
bundleBuilder := wf.createPBundleBuilder()
|
|
27
28
|
bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
|
|
28
|
-
bundleBuilder.addAnchor("main", args.inputAnchor)
|
|
29
|
-
|
|
29
|
+
bundleBuilder.addAnchor("main", args.inputAnchor)
|
|
30
|
+
|
|
31
|
+
// Optional primary filter from PlDatasetSelector — added to the clone table
|
|
32
|
+
// below. Under the Full join it no longer narrows via join semantics; the
|
|
33
|
+
// filter software drops clonotypes outside it as a pre-condition (filter.py).
|
|
34
|
+
if !is_undefined(args.inputFilter) {
|
|
35
|
+
bundleBuilder.addRef(args.inputFilter)
|
|
36
|
+
}
|
|
37
|
+
|
|
30
38
|
validRanks := false
|
|
31
39
|
if len(args.rankingOrder) > 0 {
|
|
32
40
|
for col in args.rankingOrder {
|
|
@@ -132,8 +140,7 @@ wf.prepare(func(args){
|
|
|
132
140
|
domain: { "pl7.app/alphabet": "aminoacid" }
|
|
133
141
|
}, "scFvPerChainSeqs")
|
|
134
142
|
|
|
135
|
-
//
|
|
136
|
-
// modality-aware fallback when no filter/ranking columns load.
|
|
143
|
+
// Main sequence column
|
|
137
144
|
bundleBuilder.addMulti({
|
|
138
145
|
axes: [{ anchor: "main", idx: 1 }],
|
|
139
146
|
annotations: {
|
|
@@ -141,18 +148,31 @@ wf.prepare(func(args){
|
|
|
141
148
|
"pl7.app/isMainSequence": "true"
|
|
142
149
|
},
|
|
143
150
|
domain: { "pl7.app/alphabet": "aminoacid" }
|
|
144
|
-
}, "
|
|
145
|
-
|
|
151
|
+
}, "mainSeqs")
|
|
152
|
+
bundleBuilder.addMulti({
|
|
153
|
+
axes: [{ anchor: "main", idx: 1 }],
|
|
154
|
+
annotations: {
|
|
155
|
+
"pl7.app/vdj/isAssemblingFeature": "true",
|
|
156
|
+
"pl7.app/vdj/isMainSequence": "true"
|
|
157
|
+
},
|
|
158
|
+
domain: { "pl7.app/alphabet": "aminoacid" }
|
|
159
|
+
}, "mainSeqsVdj")
|
|
160
|
+
|
|
146
161
|
return {
|
|
147
162
|
columns: bundleBuilder.build()
|
|
148
163
|
}
|
|
149
164
|
})
|
|
150
165
|
|
|
151
166
|
wf.body(func(args) {
|
|
152
|
-
// output containers
|
|
167
|
+
// output containers
|
|
153
168
|
outputs := {}
|
|
154
169
|
exports := {}
|
|
155
170
|
|
|
171
|
+
// Expose this block's own id so the model can drop self-produced filter
|
|
172
|
+
// entries from the dataset selector — without this the just-finished
|
|
173
|
+
// sampled subset shows up as a filter option on the next configuration.
|
|
174
|
+
outputs["selfBlockId"] = smart.createJsonResource(wf.getBlockId())
|
|
175
|
+
|
|
156
176
|
if !is_undefined(args.inputAnchor) {
|
|
157
177
|
// Input arguments
|
|
158
178
|
columns := args.columns
|
|
@@ -164,8 +184,14 @@ wf.body(func(args) {
|
|
|
164
184
|
isPeptide := datasetSpec.axesSpec[1].name == "pl7.app/variantKey"
|
|
165
185
|
|
|
166
186
|
////////// Clonotype Filtering //////////
|
|
167
|
-
// Initialize and build clone table with all columns
|
|
168
|
-
|
|
187
|
+
// Initialize and build clone table with all columns. When the user
|
|
188
|
+
// picked an optional primary filter in PlDatasetSelector, fetch the
|
|
189
|
+
// resolved column so initializeCloneTable can inner-join it.
|
|
190
|
+
inputFilterColumn := undefined
|
|
191
|
+
if !is_undefined(args.inputFilter) {
|
|
192
|
+
inputFilterColumn = columns.getColumn(args.inputFilter)
|
|
193
|
+
}
|
|
194
|
+
tableInit := utils.initializeCloneTable(pframes, columns, args, datasetSpec, inputFilterColumn)
|
|
169
195
|
cloneTable := tableInit.cloneTable
|
|
170
196
|
filterMap := tableInit.filterMap
|
|
171
197
|
rankingMap := tableInit.rankingMap
|
package/src/utils.lib.tengo
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
4
4
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
5
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
6
|
+
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
|
|
7
|
+
pUtil := import("@platforma-sdk/workflow-tengo:pframes.util")
|
|
5
8
|
json := import("json")
|
|
6
9
|
|
|
7
10
|
// PColumn names used as source columns for In Vivo Score computation.
|
|
@@ -229,30 +232,93 @@ resolveClusterColumnHeader := func(args, columns, sortedLinkers) {
|
|
|
229
232
|
return undefined
|
|
230
233
|
}
|
|
231
234
|
|
|
235
|
+
/**
|
|
236
|
+
* Derives a lightweight, dense, per-clonotype presence column from a "main
|
|
237
|
+
* sequence" column. The clone table is built with a Full join, which keeps only
|
|
238
|
+
* the union of keys across added columns; sparse columns (e.g. enrichment, which
|
|
239
|
+
* upstream emits only for clusters passing its pre-filter) would otherwise leave
|
|
240
|
+
* whole clonotypes out of the funnel. A main-sequence column exists once per
|
|
241
|
+
* clonotype across the entire dataset, so it defines the complete keyset.
|
|
242
|
+
*
|
|
243
|
+
* @param mainSeqsCol - A main-sequence PColumn {spec, data}
|
|
244
|
+
* @param datasetSpec - Dataset specification; axesSpec[1] is the clonotype axis
|
|
245
|
+
* @return The derived presence PColumn {spec, data}, or undefined if unavailable
|
|
246
|
+
*/
|
|
247
|
+
deriveClonotypePresence := func(mainSeqsCol, datasetSpec) {
|
|
248
|
+
if is_undefined(mainSeqsCol) || is_undefined(mainSeqsCol.spec) || is_undefined(mainSeqsCol.data) {
|
|
249
|
+
return undefined
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
clonotypeAxisName := datasetSpec.axesSpec[1].name
|
|
253
|
+
clonotypeAxis := undefined
|
|
254
|
+
for axis in mainSeqsCol.spec.axesSpec {
|
|
255
|
+
if axis.name == clonotypeAxisName {
|
|
256
|
+
clonotypeAxis = axis
|
|
257
|
+
break
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if is_undefined(clonotypeAxis) {
|
|
261
|
+
return undefined
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
axisId := pSpec.getAxisId(clonotypeAxis)
|
|
265
|
+
presenceSpec := {
|
|
266
|
+
kind: "PColumn",
|
|
267
|
+
name: "clonotypePresence",
|
|
268
|
+
valueType: "Int"
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
wf := pt.workflow().cpu(1).mem("4GiB")
|
|
272
|
+
wf.frame(pt.p.column("presenceSource", { spec: mainSeqsCol.spec, data: mainSeqsCol.data })).
|
|
273
|
+
select(pt.sc.axis(clonotypeAxis).alias(axisId)).
|
|
274
|
+
withColumns(pt.lit(1).cast("Int").alias("clonotypePresence")).
|
|
275
|
+
saveFrameDirect("presenceFrame", {
|
|
276
|
+
axes: [{ column: axisId, spec: clonotypeAxis }],
|
|
277
|
+
columns: [{ column: "clonotypePresence", spec: presenceSpec }],
|
|
278
|
+
partitionKeyLength: 0
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
presenceColumns := pUtil.pFrameToColumnsMap(wf.run().getFrameDirect("presenceFrame"))
|
|
282
|
+
return presenceColumns["clonotypePresence"]
|
|
283
|
+
}
|
|
284
|
+
|
|
232
285
|
/**
|
|
233
286
|
* Initializes and builds complete clone table with all columns.
|
|
234
287
|
* Handles filters, ranking columns, linkers, cluster sizes, and fallback columns.
|
|
235
|
-
*
|
|
288
|
+
*
|
|
236
289
|
* @param pframes - PFrames import
|
|
237
290
|
* @param columns - PBundle containing all columns
|
|
238
291
|
* @param args - Arguments containing filters, rankingOrder, diversificationColumn
|
|
239
292
|
* @param datasetSpec - Dataset specification with axes
|
|
293
|
+
* @param inputFilterColumn - Optional resolved column from the primary filter
|
|
294
|
+
* (PlDatasetSelector). Added to the clone table; clonotypes missing from it
|
|
295
|
+
* are dropped as a pre-condition in the filter software (see filter.py),
|
|
296
|
+
* not via the join, so the Full join can keep all clonotypes for the funnel.
|
|
240
297
|
* @return Map with keys: cloneTable, filterMap, rankingMap, sortedLinkers, clusterColumnHeader, addedCols
|
|
241
298
|
*/
|
|
242
|
-
initializeCloneTable := func(pframes, columns, args, datasetSpec) {
|
|
299
|
+
initializeCloneTable := func(pframes, columns, args, datasetSpec, inputFilterColumn) {
|
|
243
300
|
// Build clonotype table
|
|
244
301
|
cloneTable := pframes.parquetFileBuilder()
|
|
245
302
|
cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
|
|
246
|
-
|
|
303
|
+
|
|
247
304
|
// Build linker list in SAME ORDER as model
|
|
248
305
|
sortedLinkers := buildSortedLinkers(columns, datasetSpec)
|
|
249
|
-
|
|
306
|
+
|
|
250
307
|
// Add Filters to table
|
|
251
308
|
addedAxes := []
|
|
252
309
|
filterMap := {}
|
|
253
310
|
rankingMap := {}
|
|
254
311
|
addedCols := false
|
|
255
|
-
|
|
312
|
+
|
|
313
|
+
// Add the optional primary filter from PlDatasetSelector. The clone table is
|
|
314
|
+
// built with a Full join (below), so this column no longer narrows the keyset
|
|
315
|
+
// via join semantics; clonotypes outside the filter are dropped as a
|
|
316
|
+
// pre-condition in the filter software (filter.py), before stage tracking.
|
|
317
|
+
if !is_undefined(inputFilterColumn) {
|
|
318
|
+
cloneTable.add(inputFilterColumn, {header: "primary_filter"})
|
|
319
|
+
addedCols = true
|
|
320
|
+
}
|
|
321
|
+
|
|
256
322
|
if len(args.filters) > 0 {
|
|
257
323
|
for i, filter in args.filters {
|
|
258
324
|
// we check for value presence and for actual pcolumn (cases where upstream block is deleted)
|
|
@@ -466,30 +532,42 @@ initializeCloneTable := func(pframes, columns, args, datasetSpec) {
|
|
|
466
532
|
}
|
|
467
533
|
}
|
|
468
534
|
|
|
469
|
-
//
|
|
470
|
-
//
|
|
535
|
+
// Dense per-clonotype presence column: guarantees the Full join below keeps
|
|
536
|
+
// every clonotype, so the Selection Plot funnel total matches the whole dataset.
|
|
537
|
+
// Clonotypes that lack sparse columns (e.g. an enrichment row) now carry null
|
|
538
|
+
// for those columns and are dropped at the relevant FILTER stage instead of
|
|
539
|
+
// being silently dropped before stage tracking. The presence column is a light
|
|
540
|
+
// Int marker — adding the heavy main-sequence column itself would blow up the join.
|
|
541
|
+
mainSeqCols := append(
|
|
542
|
+
columns.getColumns("mainSeqs"),
|
|
543
|
+
columns.getColumns("mainSeqsVdj")...)
|
|
544
|
+
if len(mainSeqCols) > 0 {
|
|
545
|
+
presenceCol := deriveClonotypePresence(mainSeqCols[0], datasetSpec)
|
|
546
|
+
if !is_undefined(presenceCol) {
|
|
547
|
+
cloneTable.add(presenceCol, {header: "clonotype_presence"})
|
|
548
|
+
addedCols = true
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// Fallback: if still no columns (no presence column, no filters/ranking),
|
|
553
|
+
// add the VDJ CDR3 sequence as a single-axis trunk-keyed column.
|
|
471
554
|
if !addedCols {
|
|
472
555
|
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
473
556
|
if len(cdr3Sequences) > 0 {
|
|
474
557
|
cloneTable.add(cdr3Sequences[0], {header: "sequence_fallback"})
|
|
475
558
|
addedCols = true
|
|
476
|
-
} else {
|
|
477
|
-
peptideMainSeqs := columns.getColumns("peptideMainSeqs")
|
|
478
|
-
if len(peptideMainSeqs) > 0 {
|
|
479
|
-
cloneTable.add(peptideMainSeqs[0], {header: "sequence_fallback"})
|
|
480
|
-
addedCols = true
|
|
481
|
-
}
|
|
482
559
|
}
|
|
483
560
|
}
|
|
484
561
|
|
|
485
|
-
// Build the table if we have columns
|
|
562
|
+
// Build the table if we have columns. Full (outer) join so sparse columns
|
|
563
|
+
// never drop clonotypes; the dense presence column above defines the complete keyset.
|
|
486
564
|
builtTable := undefined
|
|
487
565
|
clusterColumnHeader := undefined
|
|
488
566
|
if addedCols {
|
|
489
567
|
cloneTable.mem("16GiB")
|
|
490
568
|
cloneTable.cpu(1)
|
|
491
|
-
builtTable = cloneTable.build({joinType: "
|
|
492
|
-
|
|
569
|
+
builtTable = cloneTable.build({joinType: "Full"})
|
|
570
|
+
|
|
493
571
|
// Resolve diversificationColumn ref to header name
|
|
494
572
|
clusterColumnHeader = resolveClusterColumnHeader(args, columns, sortedLinkers)
|
|
495
573
|
}
|