@platforma-open/milaboratories.immune-assay-data.workflow 1.11.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +20 -0
- package/dist/tengo/tpl/analysis.plj.gz +0 -0
- package/dist/tengo/tpl/build-outputs.plj.gz +0 -0
- package/dist/tengo/tpl/check-content-empty.plj.gz +0 -0
- package/dist/tengo/tpl/extract-unique-values.plj.gz +0 -0
- package/dist/tengo/tpl/get-unique-values.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-outputs.plj.gz +0 -0
- package/dist/tengo/tpl/run-alignment.plj.gz +0 -0
- package/package.json +7 -5
- package/src/analysis.tpl.tengo +59 -17
- package/src/build-outputs.tpl.tengo +3 -3
- package/src/run-alignment.tpl.tengo +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.immune-assay-data.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.immune-assay-data.workflow@1.12.1 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/analysis.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.immune-assay-data.workflow
|
|
2
2
|
|
|
3
|
+
## 1.12.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [2eff9dc]
|
|
8
|
+
- @platforma-open/milaboratories.immune-assay-data.split-fasta@1.2.0
|
|
9
|
+
|
|
10
|
+
## 1.12.0
|
|
11
|
+
|
|
12
|
+
### Minor Changes
|
|
13
|
+
|
|
14
|
+
- 5f43c2b: Improved scalability for large datasets
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- Updated dependencies [5f43c2b]
|
|
19
|
+
- @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.3.0
|
|
20
|
+
- @platforma-open/milaboratories.immune-assay-data.merge-results@1.1.0
|
|
21
|
+
- @platforma-open/milaboratories.immune-assay-data.split-fasta@1.1.0
|
|
22
|
+
|
|
3
23
|
## 1.11.0
|
|
4
24
|
|
|
5
25
|
### Minor Changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.immune-assay-data.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.12.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Tengo-based template",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@platforma-sdk/workflow-tengo": "5.11.0",
|
|
8
8
|
"@platforma-open/soedinglab.software-mmseqs2": "1.18.3",
|
|
9
|
+
"@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3",
|
|
9
10
|
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.3",
|
|
10
|
-
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.2.0",
|
|
11
|
-
"@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
|
|
12
11
|
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
|
|
13
|
-
"@platforma-open/milaboratories.immune-assay-data.
|
|
14
|
-
"@platforma-open/milaboratories.immune-assay-data.check-content-empty": "1.0.1"
|
|
12
|
+
"@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
|
|
13
|
+
"@platforma-open/milaboratories.immune-assay-data.check-content-empty": "1.0.1",
|
|
14
|
+
"@platforma-open/milaboratories.immune-assay-data.split-fasta": "1.2.0",
|
|
15
|
+
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.3.0",
|
|
16
|
+
"@platforma-open/milaboratories.immune-assay-data.merge-results": "1.1.0"
|
|
15
17
|
},
|
|
16
18
|
"devDependencies": {
|
|
17
19
|
"@platforma-sdk/tengo-builder": "2.5.5"
|
package/src/analysis.tpl.tengo
CHANGED
|
@@ -9,10 +9,11 @@ render := import("@platforma-sdk/workflow-tengo:render")
|
|
|
9
9
|
|
|
10
10
|
prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
|
|
11
11
|
fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
|
|
12
|
-
addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
|
|
13
12
|
covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
|
|
14
13
|
xlsxToCsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv:main")
|
|
15
14
|
checkContentEmptySw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.check-content-empty:main")
|
|
15
|
+
splitFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.split-fasta:main")
|
|
16
|
+
mergeResultsSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.merge-results:main")
|
|
16
17
|
|
|
17
18
|
runAlignmentTpl := assets.importTemplate(":run-alignment")
|
|
18
19
|
checkContentEmptyTpl := assets.importTemplate(":check-content-empty")
|
|
@@ -232,6 +233,27 @@ self.body(func(args) {
|
|
|
232
233
|
|
|
233
234
|
covMode := coverageModeRun.getFileContent("coverage_mode.txt")
|
|
234
235
|
|
|
236
|
+
// Split clone FASTA into 2 equal chunks to limit mmseqs2 index disk usage.
|
|
237
|
+
// Running two searches against 25M sequences each uses half the peak disk
|
|
238
|
+
// of a single 50M search. E-values are normalized to the full database size.
|
|
239
|
+
splitRun := exec.builder().
|
|
240
|
+
software(splitFastaSw).
|
|
241
|
+
mem("8GiB").
|
|
242
|
+
cpu(1).
|
|
243
|
+
addFile("clones.fasta", clonesFasta).
|
|
244
|
+
arg("-i").arg("clones.fasta").
|
|
245
|
+
arg("--chunk1").arg("chunk_1.fasta").
|
|
246
|
+
arg("--chunk2").arg("chunk_2.fasta").
|
|
247
|
+
arg("--counts").arg("counts.json").
|
|
248
|
+
saveFile("chunk_1.fasta").
|
|
249
|
+
saveFile("chunk_2.fasta").
|
|
250
|
+
saveFile("counts.json").
|
|
251
|
+
run()
|
|
252
|
+
|
|
253
|
+
chunk1Fasta := splitRun.getFile("chunk_1.fasta")
|
|
254
|
+
chunk2Fasta := splitRun.getFile("chunk_2.fasta")
|
|
255
|
+
splitCounts := splitRun.getFile("counts.json")
|
|
256
|
+
|
|
235
257
|
// MMseqs2 Alignment
|
|
236
258
|
mmseqsSearchType := "0"
|
|
237
259
|
if targetSequenceType == "aminoacid" && assaySequenceType == "aminoacid" {
|
|
@@ -248,13 +270,28 @@ self.body(func(args) {
|
|
|
248
270
|
mmseqsSearchType = "2"
|
|
249
271
|
}
|
|
250
272
|
|
|
251
|
-
|
|
273
|
+
runMmseqs1 := render.create(runAlignmentTpl, {
|
|
274
|
+
covMode: covMode,
|
|
275
|
+
mmseqsSearchType: mmseqsSearchType,
|
|
276
|
+
coverageThreshold: coverageThreshold,
|
|
277
|
+
identityThreshold: identityThreshold,
|
|
278
|
+
similarityType: similarityType,
|
|
279
|
+
clonesFasta: chunk1Fasta,
|
|
280
|
+
assayFasta: assayFasta,
|
|
281
|
+
lessSensitive: lessSensitive
|
|
282
|
+
}, {
|
|
283
|
+
metaInputs: {
|
|
284
|
+
mem: mem,
|
|
285
|
+
cpu: cpu
|
|
286
|
+
}
|
|
287
|
+
})
|
|
288
|
+
runMmseqs2 := render.create(runAlignmentTpl, {
|
|
252
289
|
covMode: covMode,
|
|
253
290
|
mmseqsSearchType: mmseqsSearchType,
|
|
254
291
|
coverageThreshold: coverageThreshold,
|
|
255
292
|
identityThreshold: identityThreshold,
|
|
256
293
|
similarityType: similarityType,
|
|
257
|
-
clonesFasta:
|
|
294
|
+
clonesFasta: chunk2Fasta,
|
|
258
295
|
assayFasta: assayFasta,
|
|
259
296
|
lessSensitive: lessSensitive
|
|
260
297
|
}, {
|
|
@@ -264,20 +301,25 @@ self.body(func(args) {
|
|
|
264
301
|
}
|
|
265
302
|
})
|
|
266
303
|
|
|
267
|
-
|
|
304
|
+
mmseqsOutput1 := runMmseqs1.output("mmseqsOutput")
|
|
305
|
+
mmseqsOutput2 := runMmseqs2.output("mmseqsOutput")
|
|
268
306
|
|
|
269
|
-
//
|
|
270
|
-
|
|
271
|
-
software(
|
|
307
|
+
// Merge both raw results, add header, and normalize e-values to full database size
|
|
308
|
+
mergeRun := exec.builder().
|
|
309
|
+
software(mergeResultsSw).
|
|
272
310
|
mem("16GiB").
|
|
273
311
|
cpu(1).
|
|
274
|
-
|
|
312
|
+
addFile("results_1.tsv", mmseqsOutput1).
|
|
313
|
+
addFile("results_2.tsv", mmseqsOutput2).
|
|
314
|
+
addFile("counts.json", splitCounts).
|
|
315
|
+
arg("-i1").arg("results_1.tsv").
|
|
316
|
+
arg("-i2").arg("results_2.tsv").
|
|
317
|
+
arg("--counts").arg("counts.json").
|
|
275
318
|
arg("-o").arg("results_with_header.tsv").
|
|
276
|
-
addFile("results.tsv", mmseqsOutput).
|
|
277
319
|
saveFile("results_with_header.tsv").
|
|
278
320
|
run()
|
|
279
321
|
|
|
280
|
-
mmseqsResultTsv :=
|
|
322
|
+
mmseqsResultTsv := mergeRun.getFile("results_with_header.tsv")
|
|
281
323
|
|
|
282
324
|
// Check if results are empty (only header line or nothing)
|
|
283
325
|
checkResultsRun := exec.builder().
|
|
@@ -297,7 +339,7 @@ self.body(func(args) {
|
|
|
297
339
|
emptyResults := checkResult.output("result")
|
|
298
340
|
|
|
299
341
|
result := {
|
|
300
|
-
mmseqsOutput:
|
|
342
|
+
mmseqsOutput: mmseqsResultTsv,
|
|
301
343
|
emptyResults: emptyResults
|
|
302
344
|
}
|
|
303
345
|
|
|
@@ -315,7 +357,7 @@ self.body(func(args) {
|
|
|
315
357
|
)
|
|
316
358
|
|
|
317
359
|
cols := []
|
|
318
|
-
for _, col in ["bits", "evalue", "
|
|
360
|
+
for _, col in ["bits", "evalue", "query", "pident", "alnlen", "mismatch",
|
|
319
361
|
"gapopen", "qstart", "qend", "tstart", "tend"] {
|
|
320
362
|
cols = append(cols,
|
|
321
363
|
pt.col(col).maxBy(
|
|
@@ -325,7 +367,7 @@ self.body(func(args) {
|
|
|
325
367
|
)
|
|
326
368
|
}
|
|
327
369
|
|
|
328
|
-
dfRes = dfRes.groupBy("
|
|
370
|
+
dfRes = dfRes.groupBy("target").agg(cols...)
|
|
329
371
|
// Add link column for linker pFrame (assayLinkerPframe)
|
|
330
372
|
dfRes = dfRes.withColumns(
|
|
331
373
|
pt.lit(1).cast("Int64").alias("link")
|
|
@@ -340,13 +382,13 @@ self.body(func(args) {
|
|
|
340
382
|
|
|
341
383
|
// import how many matches per assay sequence found
|
|
342
384
|
assayDf = assayDf.join(
|
|
343
|
-
dfRes.groupBy("
|
|
344
|
-
pt.col("
|
|
385
|
+
dfRes.groupBy("query").agg(
|
|
386
|
+
pt.col("target").count().alias("queryCount")
|
|
345
387
|
),
|
|
346
388
|
{
|
|
347
389
|
how: "left",
|
|
348
390
|
leftOn: "seqId",
|
|
349
|
-
rightOn: "
|
|
391
|
+
rightOn: "query"
|
|
350
392
|
}
|
|
351
393
|
)
|
|
352
394
|
assayDf.save("assay_data.tsv")
|
|
@@ -355,7 +397,7 @@ self.body(func(args) {
|
|
|
355
397
|
clonesDf := dfRes.join(assayDf,
|
|
356
398
|
{
|
|
357
399
|
how: "left",
|
|
358
|
-
leftOn: "
|
|
400
|
+
leftOn: "query",
|
|
359
401
|
rightOn: "seqId"
|
|
360
402
|
}
|
|
361
403
|
)
|
|
@@ -174,7 +174,7 @@ self.body(func(inputs) {
|
|
|
174
174
|
cloneImportResults := xsv.importFile(
|
|
175
175
|
inputs.clonesDataTsv, "tsv", {
|
|
176
176
|
axes: [{
|
|
177
|
-
column: "
|
|
177
|
+
column: "target",
|
|
178
178
|
spec: inputs.datasetSpec.axesSpec[1]
|
|
179
179
|
}],
|
|
180
180
|
columns: cloneColumns,
|
|
@@ -189,11 +189,11 @@ self.body(func(inputs) {
|
|
|
189
189
|
inputs.bestAlignmentTsv, "tsv", {
|
|
190
190
|
axes: [
|
|
191
191
|
{
|
|
192
|
-
column: "
|
|
192
|
+
column: "target",
|
|
193
193
|
spec: inputs.datasetSpec.axesSpec[1]
|
|
194
194
|
},
|
|
195
195
|
{
|
|
196
|
-
column: "
|
|
196
|
+
column: "query",
|
|
197
197
|
spec: {
|
|
198
198
|
name: "pl7.app/vdj/assay/sequenceId",
|
|
199
199
|
type: "String",
|