@platforma-open/milaboratories.immune-assay-data.workflow 1.10.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +19 -0
- package/dist/tengo/tpl/analysis.plj.gz +0 -0
- package/dist/tengo/tpl/build-outputs.plj.gz +0 -0
- package/dist/tengo/tpl/check-content-empty.plj.gz +0 -0
- package/dist/tengo/tpl/extract-unique-values.plj.gz +0 -0
- package/dist/tengo/tpl/get-unique-values.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-outputs.plj.gz +0 -0
- package/dist/tengo/tpl/run-alignment.plj.gz +0 -0
- package/package.json +4 -2
- package/src/analysis.tpl.tengo +53 -11
- package/src/run-alignment.tpl.tengo +0 -5
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.immune-assay-data.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.immune-assay-data.workflow@1.12.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/analysis.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.immune-assay-data.workflow
|
|
2
2
|
|
|
3
|
+
## 1.12.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 5f43c2b: Improved scalability for large datasets
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [5f43c2b]
|
|
12
|
+
- @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.3.0
|
|
13
|
+
- @platforma-open/milaboratories.immune-assay-data.merge-results@1.1.0
|
|
14
|
+
- @platforma-open/milaboratories.immune-assay-data.split-fasta@1.1.0
|
|
15
|
+
|
|
16
|
+
## 1.11.0
|
|
17
|
+
|
|
18
|
+
### Minor Changes
|
|
19
|
+
|
|
20
|
+
- ac74170: Improved performance on large datasets, eliminating disk and memory pressure
|
|
21
|
+
|
|
3
22
|
## 1.10.0
|
|
4
23
|
|
|
5
24
|
### Minor Changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.immune-assay-data.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.12.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Tengo-based template",
|
|
6
6
|
"dependencies": {
|
|
@@ -8,8 +8,10 @@
|
|
|
8
8
|
"@platforma-open/soedinglab.software-mmseqs2": "1.18.3",
|
|
9
9
|
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
|
|
10
10
|
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.3",
|
|
11
|
-
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.
|
|
11
|
+
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.3.0",
|
|
12
12
|
"@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3",
|
|
13
|
+
"@platforma-open/milaboratories.immune-assay-data.merge-results": "1.1.0",
|
|
14
|
+
"@platforma-open/milaboratories.immune-assay-data.split-fasta": "1.1.0",
|
|
13
15
|
"@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
|
|
14
16
|
"@platforma-open/milaboratories.immune-assay-data.check-content-empty": "1.0.1"
|
|
15
17
|
},
|
package/src/analysis.tpl.tengo
CHANGED
|
@@ -9,10 +9,11 @@ render := import("@platforma-sdk/workflow-tengo:render")
|
|
|
9
9
|
|
|
10
10
|
prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
|
|
11
11
|
fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
|
|
12
|
-
addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
|
|
13
12
|
covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
|
|
14
13
|
xlsxToCsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv:main")
|
|
15
14
|
checkContentEmptySw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.check-content-empty:main")
|
|
15
|
+
splitFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.split-fasta:main")
|
|
16
|
+
mergeResultsSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.merge-results:main")
|
|
16
17
|
|
|
17
18
|
runAlignmentTpl := assets.importTemplate(":run-alignment")
|
|
18
19
|
checkContentEmptyTpl := assets.importTemplate(":check-content-empty")
|
|
@@ -232,6 +233,27 @@ self.body(func(args) {
|
|
|
232
233
|
|
|
233
234
|
covMode := coverageModeRun.getFileContent("coverage_mode.txt")
|
|
234
235
|
|
|
236
|
+
// Split clone FASTA into 2 equal chunks to limit mmseqs2 index disk usage.
|
|
237
|
+
// Running two searches against 25M sequences each uses half the peak disk
|
|
238
|
+
// of a single 50M search. E-values are normalized to the full database size.
|
|
239
|
+
splitRun := exec.builder().
|
|
240
|
+
software(splitFastaSw).
|
|
241
|
+
mem("8GiB").
|
|
242
|
+
cpu(1).
|
|
243
|
+
addFile("clones.fasta", clonesFasta).
|
|
244
|
+
arg("-i").arg("clones.fasta").
|
|
245
|
+
arg("--chunk1").arg("chunk_1.fasta").
|
|
246
|
+
arg("--chunk2").arg("chunk_2.fasta").
|
|
247
|
+
arg("--counts").arg("counts.json").
|
|
248
|
+
saveFile("chunk_1.fasta").
|
|
249
|
+
saveFile("chunk_2.fasta").
|
|
250
|
+
saveFile("counts.json").
|
|
251
|
+
run()
|
|
252
|
+
|
|
253
|
+
chunk1Fasta := splitRun.getFile("chunk_1.fasta")
|
|
254
|
+
chunk2Fasta := splitRun.getFile("chunk_2.fasta")
|
|
255
|
+
splitCounts := splitRun.getFile("counts.json")
|
|
256
|
+
|
|
235
257
|
// MMseqs2 Alignment
|
|
236
258
|
mmseqsSearchType := "0"
|
|
237
259
|
if targetSequenceType == "aminoacid" && assaySequenceType == "aminoacid" {
|
|
@@ -248,13 +270,28 @@ self.body(func(args) {
|
|
|
248
270
|
mmseqsSearchType = "2"
|
|
249
271
|
}
|
|
250
272
|
|
|
251
|
-
|
|
273
|
+
runMmseqs1 := render.create(runAlignmentTpl, {
|
|
274
|
+
covMode: covMode,
|
|
275
|
+
mmseqsSearchType: mmseqsSearchType,
|
|
276
|
+
coverageThreshold: coverageThreshold,
|
|
277
|
+
identityThreshold: identityThreshold,
|
|
278
|
+
similarityType: similarityType,
|
|
279
|
+
clonesFasta: chunk1Fasta,
|
|
280
|
+
assayFasta: assayFasta,
|
|
281
|
+
lessSensitive: lessSensitive
|
|
282
|
+
}, {
|
|
283
|
+
metaInputs: {
|
|
284
|
+
mem: mem,
|
|
285
|
+
cpu: cpu
|
|
286
|
+
}
|
|
287
|
+
})
|
|
288
|
+
runMmseqs2 := render.create(runAlignmentTpl, {
|
|
252
289
|
covMode: covMode,
|
|
253
290
|
mmseqsSearchType: mmseqsSearchType,
|
|
254
291
|
coverageThreshold: coverageThreshold,
|
|
255
292
|
identityThreshold: identityThreshold,
|
|
256
293
|
similarityType: similarityType,
|
|
257
|
-
clonesFasta:
|
|
294
|
+
clonesFasta: chunk2Fasta,
|
|
258
295
|
assayFasta: assayFasta,
|
|
259
296
|
lessSensitive: lessSensitive
|
|
260
297
|
}, {
|
|
@@ -264,20 +301,25 @@ self.body(func(args) {
|
|
|
264
301
|
}
|
|
265
302
|
})
|
|
266
303
|
|
|
267
|
-
|
|
304
|
+
mmseqsOutput1 := runMmseqs1.output("mmseqsOutput")
|
|
305
|
+
mmseqsOutput2 := runMmseqs2.output("mmseqsOutput")
|
|
268
306
|
|
|
269
|
-
//
|
|
270
|
-
|
|
271
|
-
software(
|
|
307
|
+
// Merge both raw results, add header, and normalize e-values to full database size
|
|
308
|
+
mergeRun := exec.builder().
|
|
309
|
+
software(mergeResultsSw).
|
|
272
310
|
mem("16GiB").
|
|
273
311
|
cpu(1).
|
|
274
|
-
|
|
312
|
+
addFile("results_1.tsv", mmseqsOutput1).
|
|
313
|
+
addFile("results_2.tsv", mmseqsOutput2).
|
|
314
|
+
addFile("counts.json", splitCounts).
|
|
315
|
+
arg("-i1").arg("results_1.tsv").
|
|
316
|
+
arg("-i2").arg("results_2.tsv").
|
|
317
|
+
arg("--counts").arg("counts.json").
|
|
275
318
|
arg("-o").arg("results_with_header.tsv").
|
|
276
|
-
addFile("results.tsv", mmseqsOutput).
|
|
277
319
|
saveFile("results_with_header.tsv").
|
|
278
320
|
run()
|
|
279
321
|
|
|
280
|
-
mmseqsResultTsv :=
|
|
322
|
+
mmseqsResultTsv := mergeRun.getFile("results_with_header.tsv")
|
|
281
323
|
|
|
282
324
|
// Check if results are empty (only header line or nothing)
|
|
283
325
|
checkResultsRun := exec.builder().
|
|
@@ -297,7 +339,7 @@ self.body(func(args) {
|
|
|
297
339
|
emptyResults := checkResult.output("result")
|
|
298
340
|
|
|
299
341
|
result := {
|
|
300
|
-
mmseqsOutput:
|
|
342
|
+
mmseqsOutput: mmseqsResultTsv,
|
|
301
343
|
emptyResults: emptyResults
|
|
302
344
|
}
|
|
303
345
|
|
|
@@ -27,10 +27,6 @@ self.body(func(args) {
|
|
|
27
27
|
cpu = args.metaInputs.cpu
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
// Cap mmseqs2 in-RAM usage to 80% of allocated memory so it splits to disk
|
|
31
|
-
// rather than getting OOM-killed by the kernel on large datasets.
|
|
32
|
-
memLimit := "{int(ceil(system.ram.gb * 0.8))}" + "G"
|
|
33
|
-
|
|
34
30
|
mmseqs := exec.builder().
|
|
35
31
|
software(mmseqsSw).
|
|
36
32
|
mem(mem).
|
|
@@ -40,7 +36,6 @@ self.body(func(args) {
|
|
|
40
36
|
arg("clones.fasta").
|
|
41
37
|
arg("results.tsv").
|
|
42
38
|
arg("tmp").
|
|
43
|
-
arg("--split-memory-limit").argWithVar(memLimit).
|
|
44
39
|
arg("--threads").arg(string(cpu)).
|
|
45
40
|
arg("--max-seqs").arg("10000").
|
|
46
41
|
arg("--search-type").arg(mmseqsSearchType).
|