@platforma-open/milaboratories.top-antibodies.workflow 1.14.0 → 1.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +11 -11
- package/CHANGELOG.md +12 -0
- package/dist/index.cjs +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -0
- package/dist/tengo/lib/{libs.pf-kabat-conv.lib.tengo → pf-kabat-conv.lib.tengo} +5 -4
- package/{src/libs → dist/tengo/lib}/pf-spectratype-conv.lib.tengo +0 -1
- package/{src/libs → dist/tengo/lib}/pf-vj-usage-conv.lib.tengo +0 -1
- package/{src/libs → dist/tengo/lib}/sampled-cols-conv.lib.tengo +0 -1
- package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/package.json +6 -6
- package/src/assembling-fasta.tpl.tengo +4 -4
- package/src/filter-and-sample.tpl.tengo +81 -0
- package/src/{libs/pf-kabat-conv.lib.tengo → pf-kabat-conv.lib.tengo} +5 -4
- package/{dist/tengo/lib/libs.pf-spectratype-conv.lib.tengo → src/pf-spectratype-conv.lib.tengo} +0 -1
- package/{dist/tengo/lib/libs.pf-vj-usage-conv.lib.tengo → src/pf-vj-usage-conv.lib.tengo} +0 -1
- package/src/prerun.tpl.tengo +213 -71
- package/{dist/tengo/lib/libs.sampled-cols-conv.lib.tengo → src/sampled-cols-conv.lib.tengo} +0 -1
- package/dist/tengo/lib/libs.data-utils.lib.tengo +0 -324
- package/src/libs/data-utils.lib.tengo +0 -324
package/src/prerun.tpl.tengo
CHANGED
|
@@ -7,16 +7,12 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
|
7
7
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
8
|
render := import("@platforma-sdk/workflow-tengo:render")
|
|
9
9
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
10
|
-
|
|
11
|
-
text := import("text")
|
|
12
|
-
json := import("json")
|
|
10
|
+
kabatConv := import(":pf-kabat-conv")
|
|
13
11
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
vjUsageConv := import(":libs.pf-vj-usage-conv")
|
|
17
|
-
sampledColsConv := import(":libs.sampled-cols-conv")
|
|
18
|
-
kabatConv := import(":libs.pf-kabat-conv")
|
|
12
|
+
spectratypeConv := import(":pf-spectratype-conv")
|
|
13
|
+
vjUsageConv := import(":pf-vj-usage-conv")
|
|
19
14
|
|
|
15
|
+
filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
|
|
20
16
|
|
|
21
17
|
wf.prepare(func(args){
|
|
22
18
|
if is_undefined(args.inputAnchor) {
|
|
@@ -127,86 +123,230 @@ wf.body(func(args) {
|
|
|
127
123
|
|
|
128
124
|
// Needed conditional variable
|
|
129
125
|
isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
|
|
130
|
-
|
|
126
|
+
|
|
131
127
|
////////// Clonotype Filtering //////////
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
128
|
+
// Build clonotype table
|
|
129
|
+
cloneTable := pframes.csvFileBuilder()
|
|
130
|
+
cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
|
|
131
|
+
|
|
132
|
+
// Add Filters to table
|
|
133
|
+
addedAxes := []
|
|
134
|
+
filterMap := {}
|
|
135
|
+
rankingMap := {}
|
|
136
|
+
addedCols := false
|
|
137
|
+
if len(args.filters) > 0 {
|
|
138
|
+
for i, filter in args.filters {
|
|
139
|
+
if filter.value != undefined {
|
|
140
|
+
// Columns added here might also be in ranking list, so we add default IDs
|
|
141
|
+
cloneTable.add(columns.getColumn(filter.value.column),
|
|
142
|
+
{header: "Filter_" + string(i), id: "filter_" + string(i)})
|
|
143
|
+
addedCols = true
|
|
144
|
+
// Store reference value and filter type associated to this column
|
|
145
|
+
filterMap["Filter_" + string(i)] = filter.filter
|
|
146
|
+
filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
|
|
147
|
+
|
|
148
|
+
// If column does not have main anchor axis we have to include theirs
|
|
149
|
+
colsSpec := columns.getSpec(filter.value.column)
|
|
150
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
151
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
152
|
+
for na, ax in colsSpec.axesSpec {
|
|
153
|
+
if ax.name != datasetSpec.axesSpec[1].name {
|
|
154
|
+
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
155
|
+
addedAxes = append(addedAxes, ax.name)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Add ranking columns to table
|
|
164
|
+
validRanks := false
|
|
165
|
+
if len(args.rankingOrder) > 0 {
|
|
166
|
+
for i, col in args.rankingOrder {
|
|
167
|
+
if col.value != undefined {
|
|
168
|
+
validRanks = true
|
|
169
|
+
cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
|
|
170
|
+
addedCols = true
|
|
171
|
+
// Store ranking order for this column
|
|
172
|
+
rankingMap["Col" + string(i)] = col.rankingOrder
|
|
173
|
+
|
|
174
|
+
// If column does not have main anchor axis we have to include theirs
|
|
175
|
+
colsSpec := columns.getSpec(col.value.column)
|
|
176
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
177
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
178
|
+
for na, ax in colsSpec.axesSpec {
|
|
179
|
+
if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
|
|
180
|
+
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
// If we didn't have any ranking column or all where not valid
|
|
188
|
+
if !validRanks {
|
|
189
|
+
// @TODO: this is a temporal patch for issue where rankingOrderDefault
|
|
190
|
+
// are not defined by the time prerun works
|
|
191
|
+
if args.rankingOrderDefault.value != undefined {
|
|
192
|
+
i := 0
|
|
193
|
+
cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
|
|
194
|
+
addedCols = true
|
|
195
|
+
// Store default ranking order
|
|
196
|
+
rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
|
|
197
|
+
|
|
198
|
+
// If column does not have main anchor axis we have to include theirs
|
|
199
|
+
colsSpec := columns.getSpec(args.rankingOrderDefault.value.column)
|
|
200
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
201
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
202
|
+
for na, ax in colsSpec.axesSpec {
|
|
203
|
+
if ax.name != datasetSpec.axesSpec[1].name {
|
|
204
|
+
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Get linker columns if needed
|
|
212
|
+
linkerAxisSpec := {}
|
|
213
|
+
if len(columns.getColumns("linkers")) > 0 {
|
|
214
|
+
for i, col in columns.getColumns("linkers") {
|
|
215
|
+
if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
|
|
216
|
+
cloneTable.add(col, {header: "linker." + string(i)})
|
|
217
|
+
cloneTable.setAxisHeader(col.spec.axesSpec[0], "cluster_" + string(i))
|
|
218
|
+
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
|
|
219
|
+
} else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
|
|
220
|
+
cloneTable.add(col, {header: "linker." + string(i)})
|
|
221
|
+
cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
|
|
222
|
+
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
|
|
223
|
+
}
|
|
224
|
+
addedCols = true
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Add cluster size columns if available
|
|
229
|
+
if len(columns.getColumns("clusterSizes")) > 0 {
|
|
230
|
+
for i, col in columns.getColumns("clusterSizes") {
|
|
231
|
+
cloneTable.add(col, {header: "clusterSize." + string(i)})
|
|
232
|
+
addedCols = true
|
|
233
|
+
// Add the cluster axis header
|
|
234
|
+
for axisIdx, axis in col.spec.axesSpec {
|
|
235
|
+
if axis.name != datasetSpec.axesSpec[1].name {
|
|
236
|
+
cloneTable.setAxisHeader(axis, "clusterAxis_" + string(i) + "_" + string(axisIdx))
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
139
241
|
|
|
140
242
|
// Continue only if we have at least a column
|
|
141
243
|
// This condition prevents temporal intermittent error while filters are
|
|
142
244
|
// being processed and possibly in other situations too
|
|
143
245
|
if addedCols {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
246
|
+
cloneTable.mem("16GiB")
|
|
247
|
+
cloneTable.cpu(1)
|
|
248
|
+
cloneTable = cloneTable.build()
|
|
249
|
+
|
|
250
|
+
// Use ender.create to call the filter-clonotypes template
|
|
251
|
+
filterSampleResult := render.create(filterAndSampleTpl, {
|
|
252
|
+
inputAnchor: args.inputAnchor,
|
|
253
|
+
cloneTable: cloneTable,
|
|
254
|
+
rankingOrder: args.rankingOrder,
|
|
255
|
+
rankingOrderDefault: args.rankingOrderDefault,
|
|
256
|
+
filters: args.filters,
|
|
257
|
+
filterMap: filterMap,
|
|
258
|
+
rankingMap: rankingMap,
|
|
259
|
+
datasetSpec: datasetSpec,
|
|
260
|
+
topClonotypes: args.topClonotypes
|
|
261
|
+
})
|
|
262
|
+
|
|
263
|
+
// Get the filtered clonotypes from the template result
|
|
264
|
+
outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
265
|
+
|
|
266
|
+
// Get the filtered and sampled clonotypes P-frame and CSV from the template result
|
|
267
|
+
finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
|
|
268
|
+
// outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
269
|
+
|
|
270
|
+
////////// CDR3 Length Calculation //////////
|
|
165
271
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
272
|
+
cdr3SeqTable := pframes.tsvFileBuilder()
|
|
273
|
+
cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
|
|
274
|
+
|
|
275
|
+
// Must deal with multiple CDR3 sequences (two for each cell in single cell data)
|
|
276
|
+
// Chain will be added in the header as cdr3Sequence.chain and used in python script
|
|
277
|
+
// Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
|
|
278
|
+
|
|
279
|
+
// Helper function to add chain information to the headers dynamically
|
|
280
|
+
chainMapping := {
|
|
281
|
+
"IG": { "A": "Heavy", "B": "Light" },
|
|
282
|
+
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
283
|
+
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
175
284
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
285
|
+
|
|
286
|
+
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
287
|
+
if isSingleCell {
|
|
288
|
+
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
289
|
+
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
290
|
+
chainLabel := chainMapping[receptor][chain]
|
|
291
|
+
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
292
|
+
} else {
|
|
293
|
+
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
294
|
+
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
295
|
+
if chainFromDomain != undefined {
|
|
296
|
+
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
return baseHeaderName
|
|
300
|
+
};
|
|
301
|
+
|
|
302
|
+
// Process CDR3 sequences
|
|
303
|
+
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
304
|
+
|
|
305
|
+
for col in cdr3Sequences {
|
|
306
|
+
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
307
|
+
if isSingleCell {
|
|
308
|
+
if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
|
|
309
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
310
|
+
}
|
|
311
|
+
} else {
|
|
312
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
313
|
+
}
|
|
186
314
|
}
|
|
187
|
-
|
|
188
|
-
|
|
315
|
+
|
|
316
|
+
// Process V genes
|
|
317
|
+
vGenes := columns.getColumns("VGenes")
|
|
318
|
+
|
|
319
|
+
for col in vGenes {
|
|
320
|
+
headerName := makeHeaderName(col, "vGene", isSingleCell)
|
|
321
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
189
322
|
}
|
|
190
323
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
324
|
+
// Process J genes
|
|
325
|
+
jGenes := columns.getColumns("JGenes")
|
|
326
|
+
|
|
327
|
+
for col in jGenes {
|
|
328
|
+
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
329
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
cdr3SeqTable.mem("16GiB")
|
|
333
|
+
cdr3SeqTable.cpu(1)
|
|
334
|
+
cdr3SeqTableBuilt := cdr3SeqTable.build()
|
|
195
335
|
|
|
196
336
|
cdr3VspectratypeCmd := exec.builder().
|
|
197
337
|
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
|
|
198
338
|
mem("16GiB").
|
|
199
339
|
cpu(1).
|
|
200
|
-
addFile("cdr3_sequences_input.
|
|
201
|
-
arg("--
|
|
340
|
+
addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
|
|
341
|
+
arg("--input_tsv").arg("cdr3_sequences_input.tsv").
|
|
202
342
|
arg("--spectratype_tsv").arg("spectratype.tsv").
|
|
203
343
|
arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
|
|
204
344
|
|
|
205
345
|
// Add top clonotypes argument and file to the builder if provided
|
|
206
|
-
if
|
|
346
|
+
if finalClonotypesCsv != undefined {
|
|
207
347
|
cdr3VspectratypeCmd = cdr3VspectratypeCmd.
|
|
208
|
-
arg("--
|
|
209
|
-
addFile("finalClonotypes.
|
|
348
|
+
arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
|
|
349
|
+
addFile("finalClonotypes.csv", finalClonotypesCsv)
|
|
210
350
|
}
|
|
211
351
|
|
|
212
352
|
cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
|
|
@@ -216,16 +356,18 @@ wf.body(func(args) {
|
|
|
216
356
|
cache(24 * 60 * 60 * 1000).
|
|
217
357
|
run()
|
|
218
358
|
|
|
359
|
+
|
|
219
360
|
// Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
|
|
361
|
+
|
|
220
362
|
cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
|
|
221
363
|
"tsv", spectratypeConv.getColumns(),
|
|
222
|
-
{cpu: 1, mem: "
|
|
364
|
+
{cpu: 1, mem: "16GiB"})
|
|
223
365
|
outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
|
|
224
366
|
|
|
225
367
|
// For vjUsage structure is [chain][vGene][jGene] -> count
|
|
226
368
|
vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
|
|
227
369
|
"tsv", vjUsageConv.getColumns(),
|
|
228
|
-
{cpu: 1, mem: "
|
|
370
|
+
{cpu: 1, mem: "16GiB"})
|
|
229
371
|
outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
|
|
230
372
|
|
|
231
373
|
if args.kabatNumbering == true {
|
|
@@ -236,7 +378,7 @@ wf.body(func(args) {
|
|
|
236
378
|
|
|
237
379
|
seqCols := columns.getColumns("assemblingAaSeqs")
|
|
238
380
|
for col in seqCols {
|
|
239
|
-
headerName :=
|
|
381
|
+
headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
|
|
240
382
|
assemSeqTable.add(col, {header: headerName})
|
|
241
383
|
}
|
|
242
384
|
|
|
@@ -260,7 +402,7 @@ wf.body(func(args) {
|
|
|
260
402
|
assem := render.create(assemFastaTpl, {
|
|
261
403
|
inputTsv: assemSeqTableBuilt,
|
|
262
404
|
keyColumn: "clonotypeKey",
|
|
263
|
-
|
|
405
|
+
finalClonotypesCsv: finalClonotypesCsv,
|
|
264
406
|
isSingleCell: isSingleCell,
|
|
265
407
|
bulkChain: bulkChain
|
|
266
408
|
})
|