@platforma-open/milaboratories.immune-assay-data.workflow 1.6.3 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +19 -0
- package/dist/tengo/tpl/build-outputs.plj.gz +0 -0
- package/dist/tengo/tpl/extract-unique-values.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/run-alignment.plj.gz +0 -0
- package/package.json +5 -4
- package/src/build-outputs.tpl.tengo +3 -3
- package/src/main.tpl.tengo +43 -9
- package/src/run-alignment.tpl.tengo +23 -3
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.immune-assay-data.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.immune-assay-data.workflow@1.7.1 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/build-outputs.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.immune-assay-data.workflow
|
|
2
2
|
|
|
3
|
+
## 1.7.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- cc7794e: Update mmseqs binary
|
|
8
|
+
|
|
9
|
+
## 1.7.0
|
|
10
|
+
|
|
11
|
+
### Minor Changes
|
|
12
|
+
|
|
13
|
+
- c98d8b5: - Introduce fast mode for sequence match
|
|
14
|
+
- Support XLSX file as assay data input
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- Updated dependencies [c98d8b5]
|
|
19
|
+
- @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.2.0
|
|
20
|
+
- @platforma-open/milaboratories.immune-assay-data.xlsx-to-csv@1.1.0
|
|
21
|
+
|
|
3
22
|
## 1.6.3
|
|
4
23
|
|
|
5
24
|
### Patch Changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.immune-assay-data.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.7.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Tengo-based template",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@platforma-sdk/workflow-tengo": "5.9.0",
|
|
8
|
-
"@platforma-open/soedinglab.software-mmseqs2": "
|
|
8
|
+
"@platforma-open/soedinglab.software-mmseqs2": "1.18.3",
|
|
9
9
|
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.3",
|
|
10
10
|
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.3",
|
|
11
|
-
"@platforma-open/milaboratories.immune-assay-data.
|
|
12
|
-
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.
|
|
11
|
+
"@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv": "1.1.0",
|
|
12
|
+
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.2.0",
|
|
13
|
+
"@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.3"
|
|
13
14
|
},
|
|
14
15
|
"devDependencies": {
|
|
15
16
|
"@platforma-sdk/tengo-builder": "2.4.25"
|
|
@@ -191,7 +191,7 @@ self.body(func(inputs) {
|
|
|
191
191
|
cloneImportResults := xsv.importFile(
|
|
192
192
|
inputs.clonesDataTsv, "tsv", {
|
|
193
193
|
axes: [{
|
|
194
|
-
column: "
|
|
194
|
+
column: "target",
|
|
195
195
|
spec: inputs.datasetSpec.axesSpec[1]
|
|
196
196
|
}],
|
|
197
197
|
columns: cloneColumns,
|
|
@@ -206,11 +206,11 @@ self.body(func(inputs) {
|
|
|
206
206
|
inputs.bestAlignmentTsv, "tsv", {
|
|
207
207
|
axes: [
|
|
208
208
|
{
|
|
209
|
-
column: "
|
|
209
|
+
column: "target",
|
|
210
210
|
spec: inputs.datasetSpec.axesSpec[1]
|
|
211
211
|
},
|
|
212
212
|
{
|
|
213
|
-
column: "
|
|
213
|
+
column: "query",
|
|
214
214
|
spec: {
|
|
215
215
|
name: "pl7.app/vdj/assay/sequenceId",
|
|
216
216
|
type: "String",
|
package/src/main.tpl.tengo
CHANGED
|
@@ -21,6 +21,7 @@ prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-a
|
|
|
21
21
|
fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
|
|
22
22
|
addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
|
|
23
23
|
covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
|
|
24
|
+
xlsxToCsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.xlsx-to-csv:main")
|
|
24
25
|
|
|
25
26
|
wf.prepare(func(args){
|
|
26
27
|
bundleBuilder := wf.createPBundleBuilder()
|
|
@@ -112,7 +113,7 @@ prepareClonesTsv := func(args) {
|
|
|
112
113
|
runTsvToFasta := func(fileTsv) {
|
|
113
114
|
e := exec.builder().
|
|
114
115
|
software(prepareFastaSw).
|
|
115
|
-
mem("
|
|
116
|
+
mem("8GiB").
|
|
116
117
|
cpu(1).
|
|
117
118
|
addFile("input.tsv", fileTsv).
|
|
118
119
|
arg("-i").arg("input.tsv").
|
|
@@ -132,7 +133,7 @@ runTsvToFasta := func(fileTsv) {
|
|
|
132
133
|
runFastaToTsv := func(fileFasta) {
|
|
133
134
|
e := exec.builder().
|
|
134
135
|
software(fastaToTsvSw).
|
|
135
|
-
mem("
|
|
136
|
+
mem("8GiB").
|
|
136
137
|
cpu(1).
|
|
137
138
|
addFile("input.fasta", fileFasta).
|
|
138
139
|
arg("-i").arg("input.fasta").
|
|
@@ -142,6 +143,24 @@ runFastaToTsv := func(fileFasta) {
|
|
|
142
143
|
return e.run()
|
|
143
144
|
}
|
|
144
145
|
|
|
146
|
+
/**
|
|
147
|
+
* Convert xlsx file to csv file
|
|
148
|
+
* @param fileXlsx - xlsx file
|
|
149
|
+
* @return csv file run result
|
|
150
|
+
*/
|
|
151
|
+
runXlsxToCsv := func(fileXlsx) {
|
|
152
|
+
e := exec.builder().
|
|
153
|
+
software(xlsxToCsvSw).
|
|
154
|
+
mem("16GiB").
|
|
155
|
+
cpu(1).
|
|
156
|
+
addFile("input.xlsx", fileXlsx).
|
|
157
|
+
arg("-i").arg("input.xlsx").
|
|
158
|
+
arg("-o").arg("output.csv").
|
|
159
|
+
saveFile("output.csv")
|
|
160
|
+
|
|
161
|
+
return e.run()
|
|
162
|
+
}
|
|
163
|
+
|
|
145
164
|
wf.body(func(args) {
|
|
146
165
|
importFile := file.importFile(args.fileHandle)
|
|
147
166
|
datasetSpec := args.columns.getSpec(args.datasetRef)
|
|
@@ -187,6 +206,18 @@ wf.body(func(args) {
|
|
|
187
206
|
xsvType = fileNameParts[len(fileNameParts)-1]
|
|
188
207
|
}
|
|
189
208
|
|
|
209
|
+
// Use detected delimiter from UI if available (file extension may not match actual delimiter)
|
|
210
|
+
if args.detectedXsvType != undefined {
|
|
211
|
+
xsvType = args.detectedXsvType
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Handle XLSX files by converting to CSV first
|
|
215
|
+
if xsvType == "xlsx" {
|
|
216
|
+
xlsxToCsvRun := runXlsxToCsv(importFile.file)
|
|
217
|
+
importFile.file = xlsxToCsvRun.getFile("output.csv")
|
|
218
|
+
xsvType = "csv"
|
|
219
|
+
}
|
|
220
|
+
|
|
190
221
|
// Handle FASTA files by converting to TSV first
|
|
191
222
|
if xsvType == "fasta" || xsvType == "fa" {
|
|
192
223
|
fastaToTsvRun := runFastaToTsv(importFile.file)
|
|
@@ -240,7 +271,10 @@ wf.body(func(args) {
|
|
|
240
271
|
identityThreshold: args.settings.identity,
|
|
241
272
|
similarityType: args.settings.similarityType,
|
|
242
273
|
clonesFasta: clonesFasta,
|
|
243
|
-
assayFasta: assayFasta
|
|
274
|
+
assayFasta: assayFasta,
|
|
275
|
+
lessSensitive: args.lessSensitive,
|
|
276
|
+
mem: args.mem,
|
|
277
|
+
cpu: args.cpu
|
|
244
278
|
})
|
|
245
279
|
|
|
246
280
|
mmseqsOutput := runMmseqs.output("mmseqsOutput")
|
|
@@ -287,7 +321,7 @@ wf.body(func(args) {
|
|
|
287
321
|
)
|
|
288
322
|
|
|
289
323
|
cols := []
|
|
290
|
-
for _, col in ["bits", "evalue", "
|
|
324
|
+
for _, col in ["bits", "evalue", "query", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
|
|
291
325
|
cols = append(cols,
|
|
292
326
|
pt.col(col).maxBy(
|
|
293
327
|
pt.col("evalue").multiply(-1),
|
|
@@ -296,7 +330,7 @@ wf.body(func(args) {
|
|
|
296
330
|
)
|
|
297
331
|
}
|
|
298
332
|
|
|
299
|
-
df = df.groupBy("
|
|
333
|
+
df = df.groupBy("target").agg(cols...)
|
|
300
334
|
|
|
301
335
|
// Add link column for linker pFrame (assayLinkerPframe)
|
|
302
336
|
df = df.withColumns(
|
|
@@ -312,13 +346,13 @@ wf.body(func(args) {
|
|
|
312
346
|
})
|
|
313
347
|
// import how many matches per assay sequence found
|
|
314
348
|
assayDf = assayDf.join(
|
|
315
|
-
df.groupBy("
|
|
316
|
-
pt.col("
|
|
349
|
+
df.groupBy("query").agg(
|
|
350
|
+
pt.col("target").count().alias("queryCount")
|
|
317
351
|
),
|
|
318
352
|
{
|
|
319
353
|
how: "left",
|
|
320
354
|
leftOn: "seqId",
|
|
321
|
-
rightOn: "
|
|
355
|
+
rightOn: "query"
|
|
322
356
|
}
|
|
323
357
|
)
|
|
324
358
|
assayDf.save("assayData.tsv")
|
|
@@ -327,7 +361,7 @@ wf.body(func(args) {
|
|
|
327
361
|
clonesDf := df.join(assayDf,
|
|
328
362
|
{
|
|
329
363
|
how: "left",
|
|
330
|
-
leftOn: "
|
|
364
|
+
leftOn: "query",
|
|
331
365
|
rightOn: "seqId"
|
|
332
366
|
}
|
|
333
367
|
)
|
|
@@ -16,15 +16,26 @@ self.body(func(args) {
|
|
|
16
16
|
clonesFasta := args.clonesFasta
|
|
17
17
|
assayFasta := args.assayFasta
|
|
18
18
|
|
|
19
|
+
mem := "8GiB"
|
|
20
|
+
cpu := 1
|
|
21
|
+
if !is_undefined(args.mem) {
|
|
22
|
+
mem = string(args.mem) + "GiB"
|
|
23
|
+
}
|
|
24
|
+
if !is_undefined(args.cpu) {
|
|
25
|
+
cpu = args.cpu
|
|
26
|
+
}
|
|
27
|
+
|
|
19
28
|
mmseqs := exec.builder().
|
|
20
29
|
software(mmseqsSw).
|
|
21
|
-
mem(
|
|
22
|
-
cpu(
|
|
30
|
+
mem(mem).
|
|
31
|
+
cpu(cpu).
|
|
23
32
|
arg("easy-search").
|
|
24
|
-
arg("clones.fasta").
|
|
25
33
|
arg("assay.fasta").
|
|
34
|
+
arg("clones.fasta").
|
|
26
35
|
arg("results.tsv").
|
|
27
36
|
arg("tmp").
|
|
37
|
+
arg("--threads").arg(string(cpu)).
|
|
38
|
+
arg("--max-seqs").arg("10000").
|
|
28
39
|
arg("--search-type").arg(mmseqsSearchType).
|
|
29
40
|
arg("--cov-mode").arg(string(covMode)).
|
|
30
41
|
arg("-c").arg(string(coverageThreshold)).
|
|
@@ -34,6 +45,15 @@ self.body(func(args) {
|
|
|
34
45
|
mmseqs = mmseqs.arg("--alignment-mode").arg("3")
|
|
35
46
|
}
|
|
36
47
|
|
|
48
|
+
lessSensitive := is_undefined(args.lessSensitive) ? false : args.lessSensitive
|
|
49
|
+
if lessSensitive {
|
|
50
|
+
mmseqs = mmseqs.
|
|
51
|
+
arg("--comp-bias-corr").arg("0").
|
|
52
|
+
arg("--mask").arg("0").
|
|
53
|
+
arg("--exact-kmer-matching").arg("1").
|
|
54
|
+
arg("-k").arg("7")
|
|
55
|
+
}
|
|
56
|
+
|
|
37
57
|
mmseqs = mmseqs.
|
|
38
58
|
addFile("clones.fasta", clonesFasta).
|
|
39
59
|
addFile("assay.fasta", assayFasta).
|