@platforma-open/milaboratories.top-antibodies.workflow 1.13.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +11 -11
- package/CHANGELOG.md +15 -0
- package/dist/index.cjs +0 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +0 -1
- package/dist/tengo/lib/libs.data-utils.lib.tengo +324 -0
- package/dist/tengo/lib/{pf-kabat-conv.lib.tengo → libs.pf-kabat-conv.lib.tengo} +4 -5
- package/dist/tengo/lib/{pf-spectratype-conv.lib.tengo → libs.pf-spectratype-conv.lib.tengo} +1 -0
- package/{src/pf-vj-usage-conv.lib.tengo → dist/tengo/lib/libs.pf-vj-usage-conv.lib.tengo} +1 -0
- package/dist/tengo/lib/{sampled-cols-conv.lib.tengo → libs.sampled-cols-conv.lib.tengo} +1 -0
- package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/package.json +9 -9
- package/src/assembling-fasta.tpl.tengo +4 -4
- package/src/libs/data-utils.lib.tengo +324 -0
- package/src/{pf-kabat-conv.lib.tengo → libs/pf-kabat-conv.lib.tengo} +4 -5
- package/src/{pf-spectratype-conv.lib.tengo → libs/pf-spectratype-conv.lib.tengo} +1 -0
- package/{dist/tengo/lib → src/libs}/pf-vj-usage-conv.lib.tengo +1 -0
- package/src/{sampled-cols-conv.lib.tengo → libs/sampled-cols-conv.lib.tengo} +1 -0
- package/src/prerun.tpl.tengo +71 -213
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/src/filter-and-sample.tpl.tengo +0 -81
package/src/prerun.tpl.tengo
CHANGED
|
@@ -7,12 +7,16 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
|
7
7
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
8
|
render := import("@platforma-sdk/workflow-tengo:render")
|
|
9
9
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
10
|
-
|
|
10
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
11
|
+
text := import("text")
|
|
12
|
+
json := import("json")
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
dataUtils := import(":libs.data-utils")
|
|
15
|
+
spectratypeConv := import(":libs.pf-spectratype-conv")
|
|
16
|
+
vjUsageConv := import(":libs.pf-vj-usage-conv")
|
|
17
|
+
sampledColsConv := import(":libs.sampled-cols-conv")
|
|
18
|
+
kabatConv := import(":libs.pf-kabat-conv")
|
|
14
19
|
|
|
15
|
-
filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
|
|
16
20
|
|
|
17
21
|
wf.prepare(func(args){
|
|
18
22
|
if is_undefined(args.inputAnchor) {
|
|
@@ -123,230 +127,86 @@ wf.body(func(args) {
|
|
|
123
127
|
|
|
124
128
|
// Needed conditional variable
|
|
125
129
|
isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
|
|
126
|
-
|
|
127
|
-
////////// Clonotype Filtering //////////
|
|
128
|
-
// Build clonotype table
|
|
129
|
-
cloneTable := pframes.csvFileBuilder()
|
|
130
|
-
cloneTable.setAxisHeader(datasetSpec.axesSpec[1], "clonotypeKey")
|
|
131
|
-
|
|
132
|
-
// Add Filters to table
|
|
133
|
-
addedAxes := []
|
|
134
|
-
filterMap := {}
|
|
135
|
-
rankingMap := {}
|
|
136
|
-
addedCols := false
|
|
137
|
-
if len(args.filters) > 0 {
|
|
138
|
-
for i, filter in args.filters {
|
|
139
|
-
if filter.value != undefined {
|
|
140
|
-
// Columns added here might also be in ranking list, so we add default IDs
|
|
141
|
-
cloneTable.add(columns.getColumn(filter.value.column),
|
|
142
|
-
{header: "Filter_" + string(i), id: "filter_" + string(i)})
|
|
143
|
-
addedCols = true
|
|
144
|
-
// Store reference value and filter type associated to this column
|
|
145
|
-
filterMap["Filter_" + string(i)] = filter.filter
|
|
146
|
-
filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
|
|
147
|
-
|
|
148
|
-
// If column does not have main anchor axis we have to include theirs
|
|
149
|
-
colsSpec := columns.getSpec(filter.value.column)
|
|
150
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
151
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
152
|
-
for na, ax in colsSpec.axesSpec {
|
|
153
|
-
if ax.name != datasetSpec.axesSpec[1].name {
|
|
154
|
-
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
155
|
-
addedAxes = append(addedAxes, ax.name)
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Add ranking columns to table
|
|
164
|
-
validRanks := false
|
|
165
|
-
if len(args.rankingOrder) > 0 {
|
|
166
|
-
for i, col in args.rankingOrder {
|
|
167
|
-
if col.value != undefined {
|
|
168
|
-
validRanks = true
|
|
169
|
-
cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
|
|
170
|
-
addedCols = true
|
|
171
|
-
// Store ranking order for this column
|
|
172
|
-
rankingMap["Col" + string(i)] = col.rankingOrder
|
|
173
|
-
|
|
174
|
-
// If column does not have main anchor axis we have to include theirs
|
|
175
|
-
colsSpec := columns.getSpec(col.value.column)
|
|
176
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
177
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
178
|
-
for na, ax in colsSpec.axesSpec {
|
|
179
|
-
if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
|
|
180
|
-
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
// If we didn't have any ranking column or all where not valid
|
|
188
|
-
if !validRanks {
|
|
189
|
-
// @TODO: this is a temporal patch for issue where rankingOrderDefault
|
|
190
|
-
// are not defined by the time prerun works
|
|
191
|
-
if args.rankingOrderDefault.value != undefined {
|
|
192
|
-
i := 0
|
|
193
|
-
cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
|
|
194
|
-
addedCols = true
|
|
195
|
-
// Store default ranking order
|
|
196
|
-
rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
|
|
197
|
-
|
|
198
|
-
// If column does not have main anchor axis we have to include theirs
|
|
199
|
-
colsSpec := columns.getSpec(args.rankingOrderDefault.value.column)
|
|
200
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
201
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
202
|
-
for na, ax in colsSpec.axesSpec {
|
|
203
|
-
if ax.name != datasetSpec.axesSpec[1].name {
|
|
204
|
-
cloneTable.setAxisHeader(ax, "cluster_" + string(i) + string(na))
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
130
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
} else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
|
|
220
|
-
cloneTable.add(col, {header: "linker." + string(i)})
|
|
221
|
-
cloneTable.setAxisHeader(col.spec.axesSpec[1], "cluster_" + string(i))
|
|
222
|
-
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
|
|
223
|
-
}
|
|
224
|
-
addedCols = true
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// Add cluster size columns if available
|
|
229
|
-
if len(columns.getColumns("clusterSizes")) > 0 {
|
|
230
|
-
for i, col in columns.getColumns("clusterSizes") {
|
|
231
|
-
cloneTable.add(col, {header: "clusterSize." + string(i)})
|
|
232
|
-
addedCols = true
|
|
233
|
-
// Add the cluster axis header
|
|
234
|
-
for axisIdx, axis in col.spec.axesSpec {
|
|
235
|
-
if axis.name != datasetSpec.axesSpec[1].name {
|
|
236
|
-
cloneTable.setAxisHeader(axis, "clusterAxis_" + string(i) + "_" + string(axisIdx))
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
}
|
|
131
|
+
////////// Clonotype Filtering //////////
|
|
132
|
+
clonotypeData := dataUtils.prepareClonotypeData(args.filters, args.rankingOrder, args.rankingOrderDefault, columns, datasetSpec)
|
|
133
|
+
structuredMap := clonotypeData.structuredMap
|
|
134
|
+
axisRenames := clonotypeData.axisRenames
|
|
135
|
+
filterMap := clonotypeData.filterMap
|
|
136
|
+
rankingMap := clonotypeData.rankingMap
|
|
137
|
+
addedCols := clonotypeData.addedCols
|
|
138
|
+
linkerAxisSpec := clonotypeData.linkerAxisSpec
|
|
241
139
|
|
|
242
140
|
// Continue only if we have at least a column
|
|
243
141
|
// This condition prevents temporal intermittent error while filters are
|
|
244
142
|
// being processed and possibly in other situations too
|
|
245
143
|
if addedCols {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
|
|
268
|
-
// outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
269
|
-
|
|
270
|
-
////////// CDR3 Length Calculation //////////
|
|
144
|
+
// Run ptabler-based filtering (matches filter.py logic)
|
|
145
|
+
filterResult := dataUtils.filterClonotypes(structuredMap, axisRenames, filterMap, datasetSpec)
|
|
146
|
+
// Run sampling script if topClonotypes is defined
|
|
147
|
+
finalClonotypesParquet := undefined
|
|
148
|
+
if args.topClonotypes != undefined {
|
|
149
|
+
sampleClones := exec.builder().
|
|
150
|
+
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.sample-clonotypes:main")).
|
|
151
|
+
mem("16GiB").
|
|
152
|
+
cpu(1).
|
|
153
|
+
addFile("filteredClonotypes.parquet", filterResult.filteredParquet).
|
|
154
|
+
arg("--input").arg("filteredClonotypes.parquet").
|
|
155
|
+
arg("--n").arg(string(topClonotypes)).
|
|
156
|
+
arg("--ranking-map").arg(string(json.encode(rankingMap))).
|
|
157
|
+
arg("--out").arg("sampledClonotypes_top.csv").
|
|
158
|
+
arg("--out-parquet").arg("sampledClonotypes_top.parquet").
|
|
159
|
+
saveFile("sampledClonotypes_top.csv").
|
|
160
|
+
saveFile("sampledClonotypes_top.parquet").
|
|
161
|
+
printErrStreamToStdout().
|
|
162
|
+
saveStdoutContent().
|
|
163
|
+
cache(24 * 60 * 60 * 1000).
|
|
164
|
+
run()
|
|
271
165
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
"IG": { "A": "Heavy", "B": "Light" },
|
|
282
|
-
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
283
|
-
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
287
|
-
if isSingleCell {
|
|
288
|
-
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
289
|
-
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
290
|
-
chainLabel := chainMapping[receptor][chain]
|
|
291
|
-
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
292
|
-
} else {
|
|
293
|
-
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
294
|
-
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
295
|
-
if chainFromDomain != undefined {
|
|
296
|
-
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
return baseHeaderName
|
|
300
|
-
};
|
|
301
|
-
|
|
302
|
-
// Process CDR3 sequences
|
|
303
|
-
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
304
|
-
|
|
305
|
-
for col in cdr3Sequences {
|
|
306
|
-
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
307
|
-
if isSingleCell {
|
|
308
|
-
if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
|
|
309
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
310
|
-
}
|
|
311
|
-
} else {
|
|
312
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
313
|
-
}
|
|
166
|
+
finalClonotypesCsv := sampleClones.getFile("sampledClonotypes_top.csv")
|
|
167
|
+
sampledColumnsPf := xsv.importFile(finalClonotypesCsv, "csv",
|
|
168
|
+
sampledColsConv.getColumns(datasetSpec, true), {cpu: 1, mem: "4GiB"})
|
|
169
|
+
outputs["sampledRows"] = pframes.exportFrame(sampledColumnsPf)
|
|
170
|
+
finalClonotypesParquet = sampleClones.getFile("sampledClonotypes_top.parquet")
|
|
171
|
+
} else {
|
|
172
|
+
// No sampling, use filtered parquet as final output
|
|
173
|
+
finalClonotypesParquet = filterResult.filteredParquet
|
|
174
|
+
outputs["sampledRows"] = pframes.exportFrame(filterResult.pframe)
|
|
314
175
|
}
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
176
|
+
////////// CDR3 Length Calculation //////////
|
|
177
|
+
cdr3Data := dataUtils.prepareCdr3Data(columns, datasetSpec, isSingleCell)
|
|
178
|
+
cdr3SeqStructuredMap := cdr3Data.structuredMap
|
|
179
|
+
cdr3SeqAxisRenames := cdr3Data.axisRenames
|
|
180
|
+
|
|
181
|
+
// Build ptabler workflow
|
|
182
|
+
wfCdr3Seq := pt.workflow().cacheInputs(24 * 60 * 60 * 1000)
|
|
183
|
+
cdr3SeqProjection := []
|
|
184
|
+
for origAxis, aliasName in cdr3SeqAxisRenames {
|
|
185
|
+
cdr3SeqProjection = append(cdr3SeqProjection, pt.axis(origAxis).alias(aliasName))
|
|
322
186
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
jGenes := columns.getColumns("JGenes")
|
|
326
|
-
|
|
327
|
-
for col in jGenes {
|
|
328
|
-
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
329
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
187
|
+
for colName, _ in cdr3SeqStructuredMap {
|
|
188
|
+
cdr3SeqProjection = append(cdr3SeqProjection, pt.col(colName))
|
|
330
189
|
}
|
|
331
190
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
191
|
+
dfCdr3Seq := wfCdr3Seq.frame(pt.p.full(cdr3SeqStructuredMap)).select(cdr3SeqProjection...)
|
|
192
|
+
dfCdr3Seq.save("cdr3_sequences.parquet")
|
|
193
|
+
cdr3SeqResult := wfCdr3Seq.run()
|
|
194
|
+
cdr3SeqParquet := cdr3SeqResult.getFile("cdr3_sequences.parquet")
|
|
335
195
|
|
|
336
196
|
cdr3VspectratypeCmd := exec.builder().
|
|
337
197
|
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
|
|
338
198
|
mem("16GiB").
|
|
339
199
|
cpu(1).
|
|
340
|
-
addFile("cdr3_sequences_input.
|
|
341
|
-
arg("--
|
|
200
|
+
addFile("cdr3_sequences_input.parquet", cdr3SeqParquet).
|
|
201
|
+
arg("--input").arg("cdr3_sequences_input.parquet").
|
|
342
202
|
arg("--spectratype_tsv").arg("spectratype.tsv").
|
|
343
203
|
arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
|
|
344
204
|
|
|
345
205
|
// Add top clonotypes argument and file to the builder if provided
|
|
346
|
-
if
|
|
206
|
+
if finalClonotypesParquet != undefined {
|
|
347
207
|
cdr3VspectratypeCmd = cdr3VspectratypeCmd.
|
|
348
|
-
arg("--
|
|
349
|
-
addFile("finalClonotypes.
|
|
208
|
+
arg("--final_clonotypes_parquet").arg("finalClonotypes.parquet").
|
|
209
|
+
addFile("finalClonotypes.parquet", finalClonotypesParquet)
|
|
350
210
|
}
|
|
351
211
|
|
|
352
212
|
cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
|
|
@@ -356,18 +216,16 @@ wf.body(func(args) {
|
|
|
356
216
|
cache(24 * 60 * 60 * 1000).
|
|
357
217
|
run()
|
|
358
218
|
|
|
359
|
-
|
|
360
219
|
// Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
|
|
361
|
-
|
|
362
220
|
cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
|
|
363
221
|
"tsv", spectratypeConv.getColumns(),
|
|
364
|
-
{cpu: 1, mem: "
|
|
222
|
+
{cpu: 1, mem: "4GiB"})
|
|
365
223
|
outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
|
|
366
224
|
|
|
367
225
|
// For vjUsage structure is [chain][vGene][jGene] -> count
|
|
368
226
|
vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
|
|
369
227
|
"tsv", vjUsageConv.getColumns(),
|
|
370
|
-
{cpu: 1, mem: "
|
|
228
|
+
{cpu: 1, mem: "4GiB"})
|
|
371
229
|
outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
|
|
372
230
|
|
|
373
231
|
if args.kabatNumbering == true {
|
|
@@ -378,7 +236,7 @@ wf.body(func(args) {
|
|
|
378
236
|
|
|
379
237
|
seqCols := columns.getColumns("assemblingAaSeqs")
|
|
380
238
|
for col in seqCols {
|
|
381
|
-
headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
|
|
239
|
+
headerName := dataUtils.makeHeaderName(col, "assemblingFeature", isSingleCell)
|
|
382
240
|
assemSeqTable.add(col, {header: headerName})
|
|
383
241
|
}
|
|
384
242
|
|
|
@@ -402,7 +260,7 @@ wf.body(func(args) {
|
|
|
402
260
|
assem := render.create(assemFastaTpl, {
|
|
403
261
|
inputTsv: assemSeqTableBuilt,
|
|
404
262
|
keyColumn: "clonotypeKey",
|
|
405
|
-
|
|
263
|
+
finalClonotypesParquet: finalClonotypesParquet,
|
|
406
264
|
isSingleCell: isSingleCell,
|
|
407
265
|
bulkChain: bulkChain
|
|
408
266
|
})
|
|
Binary file
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
// Template for clonotype filtering and sampling
|
|
2
|
-
self := import("@platforma-sdk/workflow-tengo:tpl")
|
|
3
|
-
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
4
|
-
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
5
|
-
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
6
|
-
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
7
|
-
render := import("@platforma-sdk/workflow-tengo:render")
|
|
8
|
-
sampledColsConv := import(":sampled-cols-conv")
|
|
9
|
-
json := import("json")
|
|
10
|
-
|
|
11
|
-
self.defineOutputs("sampledRows", "finalClonotypesCsv")
|
|
12
|
-
|
|
13
|
-
self.body(func(inputs) {
|
|
14
|
-
|
|
15
|
-
cloneTable := inputs.cloneTable
|
|
16
|
-
datasetSpec := inputs.datasetSpec
|
|
17
|
-
filterMap := inputs.filterMap
|
|
18
|
-
rankingMap := inputs.rankingMap
|
|
19
|
-
topClonotypes := inputs.topClonotypes
|
|
20
|
-
|
|
21
|
-
outputs := {}
|
|
22
|
-
finalClonotypesCsv := undefined
|
|
23
|
-
|
|
24
|
-
// Run filtering script
|
|
25
|
-
filterResult := exec.builder().
|
|
26
|
-
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.sample-clonotypes:filter")).
|
|
27
|
-
mem("16GiB").
|
|
28
|
-
cpu(1).
|
|
29
|
-
addFile("clonotypes.csv", cloneTable).
|
|
30
|
-
arg("--csv").arg("clonotypes.csv").
|
|
31
|
-
arg("--out").arg("filteredClonotypes.csv").
|
|
32
|
-
arg("--filter-map").arg(string(json.encode(filterMap))).
|
|
33
|
-
saveFile("filteredClonotypes.csv").
|
|
34
|
-
printErrStreamToStdout().
|
|
35
|
-
cache(24 * 60 * 60 * 1000).
|
|
36
|
-
run()
|
|
37
|
-
|
|
38
|
-
// Save filtered CSV file
|
|
39
|
-
filteredClonotypesCsv := filterResult.getFile("filteredClonotypes.csv")
|
|
40
|
-
|
|
41
|
-
// Store outputs
|
|
42
|
-
sampledColsParams := sampledColsConv.getColumns(datasetSpec, false) // No ranking column
|
|
43
|
-
filteredClonotypesPf := xsv.importFile(filteredClonotypesCsv, "csv", sampledColsParams,
|
|
44
|
-
{cpu: 1, mem: "16GiB"})
|
|
45
|
-
|
|
46
|
-
// Prepare outputs in case there is no top ranking
|
|
47
|
-
outputs["sampledRows"] = pframes.exportFrame(filteredClonotypesPf)
|
|
48
|
-
finalClonotypesCsv = filteredClonotypesCsv
|
|
49
|
-
|
|
50
|
-
if topClonotypes != undefined {
|
|
51
|
-
|
|
52
|
-
////////// Top Clonotypes Sampling //////////
|
|
53
|
-
// Run sampling script on filtered data
|
|
54
|
-
sampleClones := exec.builder().
|
|
55
|
-
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.sample-clonotypes:main")).
|
|
56
|
-
mem("16GiB").
|
|
57
|
-
cpu(1).
|
|
58
|
-
addFile("filteredClonotypes.csv", filteredClonotypesCsv).
|
|
59
|
-
arg("--csv").arg("filteredClonotypes.csv").
|
|
60
|
-
arg("--n").arg(string(topClonotypes)).
|
|
61
|
-
arg("--ranking-map").arg(string(json.encode(rankingMap))).
|
|
62
|
-
arg("--out").arg("sampledClonotypes_top.csv").
|
|
63
|
-
saveFile("sampledClonotypes_top.csv").
|
|
64
|
-
printErrStreamToStdout().
|
|
65
|
-
cache(24 * 60 * 60 * 1000).
|
|
66
|
-
run()
|
|
67
|
-
|
|
68
|
-
// Save top clonotypes CSV file
|
|
69
|
-
finalClonotypesCsv = sampleClones.getFile("sampledClonotypes_top.csv")
|
|
70
|
-
|
|
71
|
-
// Store outputs
|
|
72
|
-
sampledColsParams := sampledColsConv.getColumns(datasetSpec, true) // Add ranking column
|
|
73
|
-
sampledColumnsPf := xsv.importFile(finalClonotypesCsv, "csv", sampledColsParams,
|
|
74
|
-
{cpu: 1, mem: "16GiB"})
|
|
75
|
-
outputs["sampledRows"] = pframes.exportFrame(sampledColumnsPf)
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
outputs["finalClonotypesCsv"] = finalClonotypesCsv
|
|
79
|
-
|
|
80
|
-
return outputs
|
|
81
|
-
})
|