@platforma-open/milaboratories.top-antibodies.workflow 1.15.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -3
- package/CHANGELOG.md +11 -0
- package/dist/index.cjs +0 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +0 -1
- package/dist/tengo/lib/sampled-cols-conv.lib.tengo +1 -1
- package/dist/tengo/lib/utils.lib.tengo +139 -0
- package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/package.json +6 -6
- package/src/assembling-fasta.tpl.tengo +6 -6
- package/src/filter-and-sample.tpl.tengo +30 -20
- package/src/main.tpl.tengo +465 -6
- package/src/sampled-cols-conv.lib.tengo +1 -1
- package/src/utils.lib.tengo +139 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/src/prerun.tpl.tengo +0 -495
package/src/main.tpl.tengo
CHANGED
|
@@ -1,13 +1,472 @@
|
|
|
1
|
+
// Main template for clonotype filtering
|
|
1
2
|
wf := import("@platforma-sdk/workflow-tengo:workflow")
|
|
2
|
-
|
|
3
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
4
|
+
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
5
|
+
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
6
|
+
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
7
|
+
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
|
+
render := import("@platforma-sdk/workflow-tengo:render")
|
|
9
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
10
|
+
kabatConv := import(":pf-kabat-conv")
|
|
3
11
|
|
|
4
|
-
|
|
5
|
-
|
|
12
|
+
spectratypeConv := import(":pf-spectratype-conv")
|
|
13
|
+
vjUsageConv := import(":pf-vj-usage-conv")
|
|
14
|
+
utils := import(":utils")
|
|
6
15
|
|
|
7
|
-
|
|
16
|
+
filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
|
|
17
|
+
|
|
18
|
+
wf.prepare(func(args){
|
|
19
|
+
if is_undefined(args.inputAnchor) {
|
|
20
|
+
return {
|
|
21
|
+
columns: wf.createPBundleBuilder().build()
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
// We need a table with cluster ID (optional) | clonotype id | selected ranking columns
|
|
25
|
+
bundleBuilder := wf.createPBundleBuilder()
|
|
26
|
+
bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
|
|
27
|
+
bundleBuilder.addAnchor("main", args.inputAnchor)
|
|
28
|
+
|
|
29
|
+
validRanks := false
|
|
30
|
+
if len(args.rankingOrder) > 0 {
|
|
31
|
+
for col in args.rankingOrder {
|
|
32
|
+
// For cases where the user is selecting the table to filter
|
|
33
|
+
if col.value != undefined {
|
|
34
|
+
bundleBuilder.addAnchor(col.value.anchorName, col.value.anchorRef)
|
|
35
|
+
bundleBuilder.addSingle(col.value.column)
|
|
36
|
+
validRanks = true
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Load filter columns
|
|
42
|
+
if len(args.filters) > 0 {
|
|
43
|
+
for filter in args.filters {
|
|
44
|
+
if filter.value != undefined {
|
|
45
|
+
bundleBuilder.addAnchor(filter.value.anchorName, filter.value.anchorRef)
|
|
46
|
+
bundleBuilder.addSingle(filter.value.column)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
// Add linker column
|
|
53
|
+
bundleBuilder.addMulti({
|
|
54
|
+
axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
|
|
55
|
+
annotations: { "pl7.app/isLinkerColumn": "true" },
|
|
56
|
+
partialAxesMatch: true
|
|
57
|
+
}, "linkers")
|
|
58
|
+
|
|
59
|
+
// Add cluster size columns from clustering blocks
|
|
60
|
+
bundleBuilder.addMulti({
|
|
61
|
+
name: "pl7.app/vdj/clustering/clusterSize",
|
|
62
|
+
partialAxesMatch: true
|
|
63
|
+
}, "clusterSizes")
|
|
64
|
+
|
|
65
|
+
// Add CDR3 sequences
|
|
66
|
+
bundleBuilder.addMulti({
|
|
67
|
+
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
68
|
+
name: "pl7.app/vdj/sequence",
|
|
69
|
+
domain: {
|
|
70
|
+
"pl7.app/alphabet": "aminoacid",
|
|
71
|
+
"pl7.app/vdj/feature": "CDR3" // Specify CDR3 feature
|
|
72
|
+
}
|
|
73
|
+
}, "cdr3Sequences") // New collection name for CDR3 sequences
|
|
74
|
+
|
|
75
|
+
// Add V gene
|
|
76
|
+
bundleBuilder.addMulti({
|
|
77
|
+
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
78
|
+
name: "pl7.app/vdj/geneHit",
|
|
79
|
+
domain: {
|
|
80
|
+
"pl7.app/vdj/reference": "VGene"
|
|
81
|
+
}
|
|
82
|
+
}, "VGenes")
|
|
83
|
+
|
|
84
|
+
// Add J gene
|
|
85
|
+
bundleBuilder.addMulti({
|
|
86
|
+
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
87
|
+
name: "pl7.app/vdj/geneHit",
|
|
88
|
+
domain: {
|
|
89
|
+
"pl7.app/vdj/reference": "JGene"
|
|
90
|
+
}
|
|
91
|
+
}, "JGenes")
|
|
92
|
+
|
|
93
|
+
// Add assembling feature aminoacid sequences (bulk, sc, scFv)
|
|
94
|
+
bundleBuilder.addMulti({
|
|
95
|
+
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
96
|
+
annotations: { "pl7.app/vdj/isAssemblingFeature": "true" },
|
|
97
|
+
domain: { "pl7.app/alphabet": "aminoacid" }
|
|
98
|
+
}, "assemblingAaSeqs")
|
|
99
|
+
|
|
8
100
|
return {
|
|
9
|
-
|
|
10
|
-
exports: {}
|
|
101
|
+
columns: bundleBuilder.build()
|
|
11
102
|
}
|
|
12
103
|
})
|
|
13
104
|
|
|
105
|
+
wf.body(func(args) {
|
|
106
|
+
// output containers
|
|
107
|
+
outputs := {}
|
|
108
|
+
|
|
109
|
+
if !is_undefined(args.inputAnchor) {
|
|
110
|
+
// Input arguments
|
|
111
|
+
columns := args.columns
|
|
112
|
+
datasetSpec := columns.getSpec(args.inputAnchor)
|
|
113
|
+
topClonotypes := args.topClonotypes
|
|
114
|
+
|
|
115
|
+
// Needed conditional variable
|
|
116
|
+
isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
|
|
117
|
+
|
|
118
|
+
////////// Clonotype Filtering //////////
|
|
119
|
+
// Build clonotype table
|
|
120
|
+
cloneTable := pframes.parquetFileBuilder()
|
|
121
|
+
cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
|
|
122
|
+
|
|
123
|
+
// Add Filters to table
|
|
124
|
+
addedAxes := []
|
|
125
|
+
filterMap := {}
|
|
126
|
+
rankingMap := {}
|
|
127
|
+
addedCols := false
|
|
128
|
+
if len(args.filters) > 0 {
|
|
129
|
+
for i, filter in args.filters {
|
|
130
|
+
// we check for value presence and for actual pcolumn (cases where upstream block is deleted)
|
|
131
|
+
if filter.value != undefined && columns.getColumn(filter.value.column).spec != undefined {
|
|
132
|
+
// Columns added here might also be in ranking list, so we add default IDs
|
|
133
|
+
cloneTable.add(columns.getColumn(filter.value.column),
|
|
134
|
+
{header: "Filter_" + string(i), id: "filter_" + string(i)})
|
|
135
|
+
addedCols = true
|
|
136
|
+
// Store reference value and filter type associated to this column
|
|
137
|
+
filterMap["Filter_" + string(i)] = filter.filter
|
|
138
|
+
filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
|
|
139
|
+
|
|
140
|
+
// If column does not have main anchor axis we have to include theirs
|
|
141
|
+
colsSpec := columns.getSpec(filter.value.column)
|
|
142
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
143
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
144
|
+
for na, ax in colsSpec.axesSpec {
|
|
145
|
+
if ax.name != datasetSpec.axesSpec[1].name {
|
|
146
|
+
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
147
|
+
addedAxes = append(addedAxes, ax.name)
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Add ranking columns to table
|
|
156
|
+
validRanks := false
|
|
157
|
+
clusterPropertyIdx := 0
|
|
158
|
+
clonotypePropertyIdx := 0
|
|
159
|
+
|
|
160
|
+
if len(args.rankingOrder) > 0 {
|
|
161
|
+
for i, col in args.rankingOrder {
|
|
162
|
+
// we check for value presence and for actual pcolumn (cases where upstream block is deleted)
|
|
163
|
+
if col.value != undefined && columns.getColumn(col.value.column).spec != undefined {
|
|
164
|
+
validRanks = true
|
|
165
|
+
|
|
166
|
+
// Process the ranking column to determine header and cluster axis
|
|
167
|
+
colsSpec := columns.getSpec(col.value.column)
|
|
168
|
+
linkerColumns := columns.getColumns("linkers")
|
|
169
|
+
result := utils.processRankingColumn(colsSpec, datasetSpec.axesSpec[1].name, linkerColumns, clusterPropertyIdx)
|
|
170
|
+
|
|
171
|
+
header := ""
|
|
172
|
+
if result.isClusterProperty {
|
|
173
|
+
header = result.header
|
|
174
|
+
clusterPropertyIdx = result.newClusterPropertyIdx
|
|
175
|
+
|
|
176
|
+
// Add cluster axis with matching index
|
|
177
|
+
for na, ax in colsSpec.axesSpec {
|
|
178
|
+
if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
|
|
179
|
+
axisHeader := "cluster_" + string(result.clusterAxisIdx)
|
|
180
|
+
cloneTable.setAxisHeader(ax, axisHeader)
|
|
181
|
+
addedAxes = append(addedAxes, ax.name)
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
} else {
|
|
185
|
+
header = "Col" + string(clonotypePropertyIdx)
|
|
186
|
+
clonotypePropertyIdx = clonotypePropertyIdx + 1
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
cloneTable.add(columns.getColumn(col.value.column), {header: header})
|
|
190
|
+
addedCols = true
|
|
191
|
+
rankingMap[header] = col.rankingOrder
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Get linker columns if needed
|
|
197
|
+
linkerAxisSpec := {}
|
|
198
|
+
linkerClusterIdAxes := []
|
|
199
|
+
if len(columns.getColumns("linkers")) > 0 {
|
|
200
|
+
for i, col in columns.getColumns("linkers") {
|
|
201
|
+
clusterIdAxis := undefined
|
|
202
|
+
if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
|
|
203
|
+
cloneTable.add(col, {header: "linker." + string(i)})
|
|
204
|
+
cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
|
|
205
|
+
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
|
|
206
|
+
clusterIdAxis = col.spec.axesSpec[0]
|
|
207
|
+
addedCols = true
|
|
208
|
+
} else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
|
|
209
|
+
cloneTable.add(col, {header: "linker." + string(i)})
|
|
210
|
+
cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
|
|
211
|
+
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
|
|
212
|
+
clusterIdAxis = col.spec.axesSpec[1]
|
|
213
|
+
addedCols = true
|
|
214
|
+
}
|
|
215
|
+
// Collect clusterId axes from linker columns to match cluster size columns
|
|
216
|
+
if !is_undefined(clusterIdAxis) && clusterIdAxis.name == "pl7.app/vdj/clusterId" {
|
|
217
|
+
linkerClusterIdAxes = append(linkerClusterIdAxes, clusterIdAxis)
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Add cluster size columns if available, but only those matching linker columns' clusterId axes
|
|
223
|
+
// This ensures we only join columns from the same clustering run
|
|
224
|
+
if len(columns.getColumns("clusterSizes")) > 0 {
|
|
225
|
+
clusterSizeIdx := 0
|
|
226
|
+
for col in columns.getColumns("clusterSizes") {
|
|
227
|
+
// Find the clusterId axis in this cluster size column
|
|
228
|
+
clusterSizeClusterIdAxis := undefined
|
|
229
|
+
for axis in col.spec.axesSpec {
|
|
230
|
+
if axis.name == "pl7.app/vdj/clusterId" {
|
|
231
|
+
clusterSizeClusterIdAxis = axis
|
|
232
|
+
break
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Only add if we have linker columns and this cluster size matches one of them
|
|
237
|
+
shouldAdd := false
|
|
238
|
+
if len(linkerClusterIdAxes) > 0 && !is_undefined(clusterSizeClusterIdAxis) {
|
|
239
|
+
// Check if this cluster size column matches any linker's clusterId axis
|
|
240
|
+
for linkerAxis in linkerClusterIdAxes {
|
|
241
|
+
// Compare domains - they must match exactly for same clustering run
|
|
242
|
+
if clusterSizeClusterIdAxis.name == linkerAxis.name &&
|
|
243
|
+
clusterSizeClusterIdAxis.type == linkerAxis.type &&
|
|
244
|
+
utils.clusterAxisDomainsMatch(clusterSizeClusterIdAxis, linkerAxis) {
|
|
245
|
+
shouldAdd = true
|
|
246
|
+
break
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Only add cluster size columns that match a linker column's clustering run
|
|
252
|
+
if shouldAdd {
|
|
253
|
+
// Trace elements are already present in col.spec from the clustering block.
|
|
254
|
+
// deriveLabels (in label.ts) will use these existing trace elements to construct
|
|
255
|
+
// distinguishing labels when multiple clustering blocks are joined, similar to
|
|
256
|
+
// how LabelTypeFull ('__LABEL__@1') works. The trace includes:
|
|
257
|
+
// - Original dataset trace
|
|
258
|
+
// - "milaboratories.clonotype-clustering.sequences" trace element
|
|
259
|
+
// - "milaboratories.clonotype-clustering.clustering" trace element
|
|
260
|
+
// No modification needed - just preserve the existing trace.
|
|
261
|
+
|
|
262
|
+
cloneTable.add(col, {header: "clusterSize." + string(clusterSizeIdx)})
|
|
263
|
+
addedCols = true
|
|
264
|
+
// Add the cluster axis header
|
|
265
|
+
for axisIdx, axis in col.spec.axesSpec {
|
|
266
|
+
if axis.name != datasetSpec.axesSpec[1].name {
|
|
267
|
+
cloneTable.setAxisHeader(axis, "clusterAxis_" + string(clusterSizeIdx) + "_" + string(axisIdx))
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
clusterSizeIdx = clusterSizeIdx + 1
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Fallback: if no columns have been added yet, add at least one CDR3 sequence column
|
|
276
|
+
// This ensures the table can be built even when no filters/ranking columns are specified
|
|
277
|
+
if !addedCols {
|
|
278
|
+
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
279
|
+
if len(cdr3Sequences) > 0 {
|
|
280
|
+
// Add the first CDR3 sequence as a fallback column
|
|
281
|
+
cloneTable.add(cdr3Sequences[0], {header: "cdr3_fallback"})
|
|
282
|
+
addedCols = true
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Continue only if we have at least a column
|
|
287
|
+
// This condition prevents temporal intermittent error while filters are
|
|
288
|
+
// being processed and possibly in other situations too
|
|
289
|
+
if addedCols {
|
|
290
|
+
cloneTable.mem("16GiB")
|
|
291
|
+
cloneTable.cpu(1)
|
|
292
|
+
cloneTable = cloneTable.build()
|
|
293
|
+
|
|
294
|
+
// Use ender.create to call the filter-clonotypes template
|
|
295
|
+
filterSampleResult := render.create(filterAndSampleTpl, {
|
|
296
|
+
inputAnchor: args.inputAnchor,
|
|
297
|
+
cloneTable: cloneTable,
|
|
298
|
+
rankingOrder: args.rankingOrder,
|
|
299
|
+
filters: args.filters,
|
|
300
|
+
filterMap: filterMap,
|
|
301
|
+
rankingMap: rankingMap,
|
|
302
|
+
datasetSpec: datasetSpec,
|
|
303
|
+
topClonotypes: args.topClonotypes,
|
|
304
|
+
disableClusterRanking: args.disableClusterRanking,
|
|
305
|
+
clusterColumn: args.clusterColumn
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
// Get the filtered clonotypes from the template result
|
|
309
|
+
outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
310
|
+
|
|
311
|
+
// Get the filtered and sampled clonotypes P-frame from the template result
|
|
312
|
+
finalClonotypes := filterSampleResult.output("finalClonotypes", 24 * 60 * 60 * 1000)
|
|
313
|
+
|
|
314
|
+
////////// CDR3 Length Calculation //////////
|
|
315
|
+
|
|
316
|
+
cdr3SeqTable := pframes.parquetFileBuilder()
|
|
317
|
+
cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
|
|
318
|
+
|
|
319
|
+
// Must deal with multiple CDR3 sequences (two for each cell in single cell data)
|
|
320
|
+
// Chain will be added in the header as cdr3Sequence.chain and used in python script
|
|
321
|
+
// Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
|
|
322
|
+
|
|
323
|
+
// Helper function to add chain information to the headers dynamically
|
|
324
|
+
chainMapping := {
|
|
325
|
+
"IG": { "A": "Heavy", "B": "Light" },
|
|
326
|
+
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
327
|
+
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
331
|
+
if isSingleCell {
|
|
332
|
+
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
333
|
+
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
334
|
+
chainLabel := chainMapping[receptor][chain]
|
|
335
|
+
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
336
|
+
} else {
|
|
337
|
+
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
338
|
+
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
339
|
+
if chainFromDomain != undefined {
|
|
340
|
+
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return baseHeaderName
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
// Process CDR3 sequences
|
|
347
|
+
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
348
|
+
|
|
349
|
+
for col in cdr3Sequences {
|
|
350
|
+
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
351
|
+
if isSingleCell {
|
|
352
|
+
if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
|
|
353
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
354
|
+
}
|
|
355
|
+
} else {
|
|
356
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Process V genes
|
|
361
|
+
vGenes := columns.getColumns("VGenes")
|
|
362
|
+
|
|
363
|
+
for col in vGenes {
|
|
364
|
+
headerName := makeHeaderName(col, "vGene", isSingleCell)
|
|
365
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Process J genes
|
|
369
|
+
jGenes := columns.getColumns("JGenes")
|
|
370
|
+
|
|
371
|
+
for col in jGenes {
|
|
372
|
+
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
373
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
cdr3SeqTable.mem("16GiB")
|
|
377
|
+
cdr3SeqTable.cpu(1)
|
|
378
|
+
cdr3SeqTableBuilt := cdr3SeqTable.build()
|
|
379
|
+
|
|
380
|
+
cdr3VspectratypeCmd := exec.builder().
|
|
381
|
+
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
|
|
382
|
+
mem("16GiB").
|
|
383
|
+
cpu(1).
|
|
384
|
+
addFile("cdr3_sequences_input.parquet", cdr3SeqTableBuilt).
|
|
385
|
+
arg("--input_parquet").arg("cdr3_sequences_input.parquet").
|
|
386
|
+
arg("--spectratype_tsv").arg("spectratype.tsv").
|
|
387
|
+
arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
|
|
388
|
+
|
|
389
|
+
// Add top clonotypes argument and file to the builder if provided
|
|
390
|
+
if finalClonotypes != undefined {
|
|
391
|
+
cdr3VspectratypeCmd = cdr3VspectratypeCmd.
|
|
392
|
+
arg("--final-clonotypes").arg("finalClonotypes.parquet").
|
|
393
|
+
addFile("finalClonotypes.parquet", finalClonotypes)
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
|
|
397
|
+
saveFile("spectratype.tsv").
|
|
398
|
+
saveFile("vj_usage.tsv").
|
|
399
|
+
printErrStreamToStdout().
|
|
400
|
+
cache(24 * 60 * 60 * 1000).
|
|
401
|
+
run()
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
// Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
|
|
405
|
+
|
|
406
|
+
cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
|
|
407
|
+
"tsv", spectratypeConv.getColumns(),
|
|
408
|
+
{cpu: 1, mem: "16GiB"})
|
|
409
|
+
outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
|
|
410
|
+
|
|
411
|
+
// For vjUsage structure is [chain][vGene][jGene] -> count
|
|
412
|
+
vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
|
|
413
|
+
"tsv", vjUsageConv.getColumns(),
|
|
414
|
+
{cpu: 1, mem: "16GiB"})
|
|
415
|
+
outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
|
|
416
|
+
|
|
417
|
+
if args.kabatNumbering == true {
|
|
418
|
+
////////// Assembling AA sequences //////////
|
|
419
|
+
assemSeqTable := pframes.parquetFileBuilder()
|
|
420
|
+
keyHeader := "clonotypeKey"
|
|
421
|
+
assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
|
|
422
|
+
|
|
423
|
+
seqCols := columns.getColumns("assemblingAaSeqs")
|
|
424
|
+
for col in seqCols {
|
|
425
|
+
headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
|
|
426
|
+
assemSeqTable.add(col, {header: headerName})
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
assemSeqTable.mem("16GiB")
|
|
430
|
+
assemSeqTable.cpu(1)
|
|
431
|
+
assemSeqTableBuilt := assemSeqTable.build()
|
|
432
|
+
|
|
433
|
+
// Convert assembling feature sequences to FASTA via sub-template
|
|
434
|
+
assemFastaTpl := assets.importTemplate(":assembling-fasta")
|
|
435
|
+
bulkChain := undefined
|
|
436
|
+
if !isSingleCell {
|
|
437
|
+
// infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
|
|
438
|
+
chainDetected := "KL"
|
|
439
|
+
for col in seqCols {
|
|
440
|
+
ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
|
|
441
|
+
if ch == "IGHeavy" { chainDetected = "H"; break }
|
|
442
|
+
if ch == "IGLight" { chainDetected = "KL" }
|
|
443
|
+
}
|
|
444
|
+
bulkChain = chainDetected
|
|
445
|
+
}
|
|
446
|
+
assem := render.create(assemFastaTpl, {
|
|
447
|
+
inputTsv: assemSeqTableBuilt,
|
|
448
|
+
keyColumn: "clonotypeKey",
|
|
449
|
+
finalClonotypes: finalClonotypes,
|
|
450
|
+
isSingleCell: isSingleCell,
|
|
451
|
+
bulkChain: bulkChain
|
|
452
|
+
})
|
|
453
|
+
//outputs["assemblingAnarci"] = assem.output("anarci", 24 * 60 * 60 * 1000)
|
|
454
|
+
kabatFile := assem.output("kabat", 24 * 60 * 60 * 1000)
|
|
455
|
+
// Derive feature name from assembling feature columns (prefer first column's feature)
|
|
456
|
+
featName := ""
|
|
457
|
+
if len(seqCols) > 0 {
|
|
458
|
+
f := seqCols[0].spec.domain["pl7.app/vdj/feature"]
|
|
459
|
+
if f != undefined { featName = f }
|
|
460
|
+
}
|
|
461
|
+
// Convert kabat.tsv to PFrame with proper specs (bulk: select heavy/light)
|
|
462
|
+
kabatPf := xsv.importFile(kabatFile, "tsv", kabatConv.getColumns(datasetSpec, featName, bulkChain), {cpu: 1, mem: "8GiB"})
|
|
463
|
+
outputs["assemblingKabatPf"] = pframes.exportFrame(kabatPf)
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
outputs: outputs,
|
|
470
|
+
exports: {}
|
|
471
|
+
}
|
|
472
|
+
})
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
// Utility functions for antibody-tcr-lead-selection workflow
|
|
2
|
+
|
|
3
|
+
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Checks if two clusterId axes have matching domains.
|
|
7
|
+
* Used to determine if two columns belong to the same clustering run.
|
|
8
|
+
*
|
|
9
|
+
* @param axis1 - First clusterId axis to compare
|
|
10
|
+
* @param axis2 - Second clusterId axis to compare
|
|
11
|
+
* @return true if domains match, false otherwise
|
|
12
|
+
*/
|
|
13
|
+
clusterAxisDomainsMatch := func(axis1, axis2) {
|
|
14
|
+
// If either axis is undefined, they don't match
|
|
15
|
+
if is_undefined(axis1) || is_undefined(axis2) {
|
|
16
|
+
return false
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Check if both have the same domain presence
|
|
20
|
+
if is_undefined(axis1.domain) != is_undefined(axis2.domain) {
|
|
21
|
+
return false
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// If both have no domain, consider them matching
|
|
25
|
+
if is_undefined(axis1.domain) && is_undefined(axis2.domain) {
|
|
26
|
+
return true
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Both have domains - compare them
|
|
30
|
+
if len(axis1.domain) != len(axis2.domain) {
|
|
31
|
+
return false
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Check all keys and values match
|
|
35
|
+
for k, v in axis1.domain {
|
|
36
|
+
if is_undefined(axis2.domain[k]) || axis2.domain[k] != v {
|
|
37
|
+
return false
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return true
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Finds the linker index that matches a ranking column's clusterId axis.
|
|
46
|
+
* Returns undefined if no matching linker is found.
|
|
47
|
+
*
|
|
48
|
+
* @param colsSpec - Column specification containing axes
|
|
49
|
+
* @param linkerColumns - List of linker columns to match against
|
|
50
|
+
* @return Linker index (number) or undefined if not found
|
|
51
|
+
*/
|
|
52
|
+
findMatchingLinkerIndex := func(colsSpec, linkerColumns) {
|
|
53
|
+
// Find the clusterId axis in the ranking column
|
|
54
|
+
rankingClusterIdAxis := undefined
|
|
55
|
+
for axis in colsSpec.axesSpec {
|
|
56
|
+
if axis.name == "pl7.app/vdj/clusterId" {
|
|
57
|
+
rankingClusterIdAxis = axis
|
|
58
|
+
break
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Try to match this column to a linker by comparing clusterId axes
|
|
63
|
+
if is_undefined(rankingClusterIdAxis) {
|
|
64
|
+
return undefined
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
for li, linkerCol in linkerColumns {
|
|
68
|
+
// Get the clusterId axis from the linker column
|
|
69
|
+
linkerClusterIdAxis := undefined
|
|
70
|
+
for axis in linkerCol.spec.axesSpec {
|
|
71
|
+
if axis.name == "pl7.app/vdj/clusterId" {
|
|
72
|
+
linkerClusterIdAxis = axis
|
|
73
|
+
break
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Compare the axes - check if domains match
|
|
78
|
+
if clusterAxisDomainsMatch(rankingClusterIdAxis, linkerClusterIdAxis) {
|
|
79
|
+
return li
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return undefined
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Processes a ranking column to determine its header name and cluster axis index.
|
|
88
|
+
*
|
|
89
|
+
* @param colsSpec - Column specification
|
|
90
|
+
* @param datasetMainAxisName - Name of the main dataset axis (e.g., clonotype axis)
|
|
91
|
+
* @param linkerColumns - List of linker columns to match against
|
|
92
|
+
* @param clusterPropertyIdx - Current cluster property index counter
|
|
93
|
+
* @return Map with keys: header, clusterAxisIdx, newClusterPropertyIdx
|
|
94
|
+
*/
|
|
95
|
+
processRankingColumn := func(colsSpec, datasetMainAxisName, linkerColumns, clusterPropertyIdx) {
|
|
96
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
97
|
+
isClusterProperty := !slices.hasElement(axesNames, datasetMainAxisName)
|
|
98
|
+
|
|
99
|
+
if !isClusterProperty {
|
|
100
|
+
// This is a clonotype property
|
|
101
|
+
return {
|
|
102
|
+
isClusterProperty: false,
|
|
103
|
+
header: undefined,
|
|
104
|
+
clusterAxisIdx: undefined,
|
|
105
|
+
newClusterPropertyIdx: clusterPropertyIdx
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// This is a cluster property - try to find matching linker
|
|
110
|
+
linkerIdx := findMatchingLinkerIndex(colsSpec, linkerColumns)
|
|
111
|
+
|
|
112
|
+
header := ""
|
|
113
|
+
clusterAxisIdx := undefined
|
|
114
|
+
newClusterPropertyIdx := clusterPropertyIdx
|
|
115
|
+
|
|
116
|
+
if linkerIdx != undefined {
|
|
117
|
+
// This column belongs to a linker - use the linker index
|
|
118
|
+
header = "Col_linker." + string(linkerIdx)
|
|
119
|
+
clusterAxisIdx = linkerIdx
|
|
120
|
+
} else {
|
|
121
|
+
// This is a generic cluster property (not associated with any linker)
|
|
122
|
+
header = "Col_cluster." + string(clusterPropertyIdx)
|
|
123
|
+
clusterAxisIdx = clusterPropertyIdx
|
|
124
|
+
newClusterPropertyIdx = clusterPropertyIdx + 1
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
isClusterProperty: true,
|
|
129
|
+
header: header,
|
|
130
|
+
clusterAxisIdx: clusterAxisIdx,
|
|
131
|
+
newClusterPropertyIdx: newClusterPropertyIdx
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export {
|
|
136
|
+
clusterAxisDomainsMatch: clusterAxisDomainsMatch,
|
|
137
|
+
findMatchingLinkerIndex: findMatchingLinkerIndex,
|
|
138
|
+
processRankingColumn: processRankingColumn
|
|
139
|
+
}
|
|
Binary file
|