@platforma-open/milaboratories.immune-assay-data.workflow 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.immune-assay-data.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.immune-assay-data.workflow@1.4.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/main.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.immune-assay-data.workflow
|
|
2
2
|
|
|
3
|
+
## 1.4.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 8e515a8: Support fasta file as assay data
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [8e515a8]
|
|
12
|
+
- @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.1.0
|
|
13
|
+
- @platforma-open/milaboratories.immune-assay-data.prepare-fasta@1.1.0
|
|
14
|
+
- @platforma-open/milaboratories.immune-assay-data.fasta-to-tsv@1.1.0
|
|
15
|
+
- @platforma-open/milaboratories.immune-assay-data.add-header@1.1.0
|
|
16
|
+
|
|
17
|
+
## 1.3.0
|
|
18
|
+
|
|
19
|
+
### Minor Changes
|
|
20
|
+
|
|
21
|
+
- b18f925: Update trace label and importance
|
|
22
|
+
|
|
3
23
|
## 1.2.0
|
|
4
24
|
|
|
5
25
|
### Minor Changes
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.immune-assay-data.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Block Workflow",
|
|
6
6
|
"dependencies": {
|
|
7
|
-
"@platforma-sdk/workflow-tengo": "
|
|
7
|
+
"@platforma-sdk/workflow-tengo": "4.8.0",
|
|
8
8
|
"@platforma-open/soedinglab.software-mmseqs2": "^1.0.0",
|
|
9
|
-
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.0
|
|
10
|
-
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.0
|
|
11
|
-
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.
|
|
9
|
+
"@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.0",
|
|
10
|
+
"@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.0",
|
|
11
|
+
"@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.1.0",
|
|
12
|
+
"@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.0"
|
|
12
13
|
},
|
|
13
14
|
"devDependencies": {
|
|
14
15
|
"@platforma-sdk/tengo-builder": "^2.1.11",
|
|
15
|
-
"@platforma-sdk/test": "^1.37.
|
|
16
|
+
"@platforma-sdk/test": "^1.37.9",
|
|
16
17
|
"vitest": "^2.1.8"
|
|
17
18
|
},
|
|
18
19
|
"scripts": {
|
package/src/main.tpl.tengo
CHANGED
|
@@ -16,6 +16,7 @@ strings := import("@platforma-sdk/workflow-tengo:strings")
|
|
|
16
16
|
runAlignmentTpl := assets.importTemplate(":run-alignment")
|
|
17
17
|
|
|
18
18
|
prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
|
|
19
|
+
fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
|
|
19
20
|
addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
|
|
20
21
|
covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
|
|
21
22
|
|
|
@@ -37,9 +38,26 @@ prepareAssayFile := func(args, file, xsvType) {
|
|
|
37
38
|
})
|
|
38
39
|
|
|
39
40
|
//////// calculate sequence id ////////
|
|
41
|
+
// Create unique seqId for each row by combining sequence with row index
|
|
42
|
+
// First add row index using ordinal rank
|
|
43
|
+
df = df.withColumns(
|
|
44
|
+
pt.rank(pt.col(args.sequenceColumnHeader)).
|
|
45
|
+
over(pt.col(args.sequenceColumnHeader)).
|
|
46
|
+
alias("rowIndex")
|
|
47
|
+
)
|
|
48
|
+
// Concatenate sequence with row index and then hash
|
|
49
|
+
df = df.withColumns(
|
|
50
|
+
pt.when(pt.col("rowIndex").gt(pt.lit(1))).
|
|
51
|
+
then(pt.concatStr([pt.col(args.sequenceColumnHeader), pt.col("rowIndex").cast("String")], {delimiter: "_"})).
|
|
52
|
+
otherwise(pt.col(args.sequenceColumnHeader)).
|
|
53
|
+
alias("uniqueKey")
|
|
54
|
+
)
|
|
55
|
+
// Create hash from the unique key
|
|
40
56
|
df = df.addColumns(
|
|
41
|
-
pt.col(
|
|
57
|
+
pt.col("uniqueKey").hash("sha256", "base64_alphanumeric", 120).alias("seqId")
|
|
42
58
|
)
|
|
59
|
+
// Remove the temporary columns
|
|
60
|
+
//df = df.withoutColumns("uniqueKey", "rowIndex")
|
|
43
61
|
//////// add label to ids ////////
|
|
44
62
|
df = df.withColumns(
|
|
45
63
|
pt.col("seqId").
|
|
@@ -104,6 +122,24 @@ runTsvToFasta := func(fileTsv) {
|
|
|
104
122
|
return e.run()
|
|
105
123
|
}
|
|
106
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Convert fasta file to tsv file
|
|
127
|
+
* @param fileFasta - fasta file
|
|
128
|
+
* @return tsv file run result
|
|
129
|
+
*/
|
|
130
|
+
runFastaToTsv := func(fileFasta) {
|
|
131
|
+
e := exec.builder().
|
|
132
|
+
software(fastaToTsvSw).
|
|
133
|
+
mem("16GiB").
|
|
134
|
+
cpu(1).
|
|
135
|
+
addFile("input.fasta", fileFasta).
|
|
136
|
+
arg("-i").arg("input.fasta").
|
|
137
|
+
arg("-o").arg("output.tsv").
|
|
138
|
+
saveFile("output.tsv")
|
|
139
|
+
|
|
140
|
+
return e.run()
|
|
141
|
+
}
|
|
142
|
+
|
|
107
143
|
assayColumnName := func(header) {
|
|
108
144
|
return "pl7.app/vdj/assay-data/" + strings.substituteSpecialCharacters(header)
|
|
109
145
|
}
|
|
@@ -147,12 +183,19 @@ wf.body(func(args) {
|
|
|
147
183
|
ll.panic("Could not determine filename from file handle: ", args.fileHandle)
|
|
148
184
|
}
|
|
149
185
|
|
|
150
|
-
fileNameParts := path.split(fileName, ".")
|
|
186
|
+
fileNameParts := path.split(text.to_lower(fileName), ".")
|
|
151
187
|
xsvType := "tsv"
|
|
152
188
|
if len(fileNameParts) > 1 {
|
|
153
189
|
xsvType = fileNameParts[len(fileNameParts)-1]
|
|
154
190
|
}
|
|
155
191
|
|
|
192
|
+
// Handle FASTA files by converting to TSV first
|
|
193
|
+
if xsvType == "fasta" || xsvType == "fa" {
|
|
194
|
+
fastaToTsvRun := runFastaToTsv(importFile.file)
|
|
195
|
+
importFile.file = fastaToTsvRun.getFile("output.tsv")
|
|
196
|
+
xsvType = "tsv"
|
|
197
|
+
}
|
|
198
|
+
|
|
156
199
|
assayTsv := prepareAssayFile(args, importFile.file, xsvType)
|
|
157
200
|
clonesTsv := prepareClonesTsv(args)
|
|
158
201
|
|
|
@@ -463,11 +506,18 @@ wf.body(func(args) {
|
|
|
463
506
|
{ splitDataAndSpec: true, cpu: 1, mem: "16GiB" }
|
|
464
507
|
)
|
|
465
508
|
|
|
509
|
+
// Create informative label with relevant matching parameters
|
|
510
|
+
identityStr := string(args.settings.identity)
|
|
511
|
+
coverageStr := string(args.settings.coverageThreshold)
|
|
512
|
+
similarityTypeStr := args.settings.similarityType == "sequence-identity" ? "Exact Match" : "BLOSUM"
|
|
513
|
+
|
|
514
|
+
traceLabel := "Assay Data (sim:" + similarityTypeStr + ", ident:" + identityStr + ", cov:" + coverageStr + ")"
|
|
515
|
+
|
|
466
516
|
trace := pSpec.makeTrace(datasetSpec,
|
|
467
517
|
{
|
|
468
518
|
type: "milaboratories.immune-assay-data",
|
|
469
|
-
importance:
|
|
470
|
-
label:
|
|
519
|
+
importance: 40,
|
|
520
|
+
label: traceLabel
|
|
471
521
|
})
|
|
472
522
|
|
|
473
523
|
epfB := pframes.pFrameBuilder()
|