@platforma-open/milaboratories.immune-assay-data.workflow 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.immune-assay-data.workflow@1.2.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
3
+ > @platforma-open/milaboratories.immune-assay-data.workflow@1.4.0 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/main.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @platforma-open/milaboratories.immune-assay-data.workflow
2
2
 
3
+ ## 1.4.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 8e515a8: Support fasta file as assay data
8
+
9
+ ### Patch Changes
10
+
11
+ - Updated dependencies [8e515a8]
12
+ - @platforma-open/milaboratories.immune-assay-data.coverage-mode-calc@1.1.0
13
+ - @platforma-open/milaboratories.immune-assay-data.prepare-fasta@1.1.0
14
+ - @platforma-open/milaboratories.immune-assay-data.fasta-to-tsv@1.1.0
15
+ - @platforma-open/milaboratories.immune-assay-data.add-header@1.1.0
16
+
17
+ ## 1.3.0
18
+
19
+ ### Minor Changes
20
+
21
+ - b18f925: Update trace label and importance
22
+
3
23
  ## 1.2.0
4
24
 
5
25
  ### Minor Changes
Binary file
Binary file
package/package.json CHANGED
@@ -1,18 +1,19 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.immune-assay-data.workflow",
3
- "version": "1.2.0",
3
+ "version": "1.4.0",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^4.9.0",
7
+ "@platforma-sdk/workflow-tengo": "4.8.0",
8
8
  "@platforma-open/soedinglab.software-mmseqs2": "^1.0.0",
9
- "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.0.3",
10
- "@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.2",
11
- "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.0.0"
9
+ "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.1.0",
10
+ "@platforma-open/milaboratories.immune-assay-data.add-header": "1.1.0",
11
+ "@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc": "1.1.0",
12
+ "@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv": "1.1.0"
12
13
  },
13
14
  "devDependencies": {
14
15
  "@platforma-sdk/tengo-builder": "^2.1.11",
15
- "@platforma-sdk/test": "^1.37.8",
16
+ "@platforma-sdk/test": "^1.37.9",
16
17
  "vitest": "^2.1.8"
17
18
  },
18
19
  "scripts": {
@@ -16,6 +16,7 @@ strings := import("@platforma-sdk/workflow-tengo:strings")
16
16
  runAlignmentTpl := assets.importTemplate(":run-alignment")
17
17
 
18
18
  prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
19
+ fastaToTsvSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.fasta-to-tsv:main")
19
20
  addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
20
21
  covModeCalcSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.coverage-mode-calc:main")
21
22
 
@@ -37,9 +38,26 @@ prepareAssayFile := func(args, file, xsvType) {
37
38
  })
38
39
 
39
40
  //////// calculate sequence id ////////
41
+ // Create unique seqId for each row by combining sequence with row index
42
+ // First add row index using ordinal rank
43
+ df = df.withColumns(
44
+ pt.rank(pt.col(args.sequenceColumnHeader)).
45
+ over(pt.col(args.sequenceColumnHeader)).
46
+ alias("rowIndex")
47
+ )
48
+ // Concatenate sequence with row index and then hash
49
+ df = df.withColumns(
50
+ pt.when(pt.col("rowIndex").gt(pt.lit(1))).
51
+ then(pt.concatStr([pt.col(args.sequenceColumnHeader), pt.col("rowIndex").cast("String")], {delimiter: "_"})).
52
+ otherwise(pt.col(args.sequenceColumnHeader)).
53
+ alias("uniqueKey")
54
+ )
55
+ // Create hash from the unique key
40
56
  df = df.addColumns(
41
- pt.col(args.sequenceColumnHeader).hash("sha256", "base64_alphanumeric", 120).alias("seqId")
57
+ pt.col("uniqueKey").hash("sha256", "base64_alphanumeric", 120).alias("seqId")
42
58
  )
59
+ // Remove the temporary columns
60
+ //df = df.withoutColumns("uniqueKey", "rowIndex")
43
61
  //////// add label to ids ////////
44
62
  df = df.withColumns(
45
63
  pt.col("seqId").
@@ -104,6 +122,24 @@ runTsvToFasta := func(fileTsv) {
104
122
  return e.run()
105
123
  }
106
124
 
125
+ /**
126
+ * Convert fasta file to tsv file
127
+ * @param fileFasta - fasta file
128
+ * @return tsv file run result
129
+ */
130
+ runFastaToTsv := func(fileFasta) {
131
+ e := exec.builder().
132
+ software(fastaToTsvSw).
133
+ mem("16GiB").
134
+ cpu(1).
135
+ addFile("input.fasta", fileFasta).
136
+ arg("-i").arg("input.fasta").
137
+ arg("-o").arg("output.tsv").
138
+ saveFile("output.tsv")
139
+
140
+ return e.run()
141
+ }
142
+
107
143
  assayColumnName := func(header) {
108
144
  return "pl7.app/vdj/assay-data/" + strings.substituteSpecialCharacters(header)
109
145
  }
@@ -147,12 +183,19 @@ wf.body(func(args) {
147
183
  ll.panic("Could not determine filename from file handle: ", args.fileHandle)
148
184
  }
149
185
 
150
- fileNameParts := path.split(fileName, ".")
186
+ fileNameParts := path.split(text.to_lower(fileName), ".")
151
187
  xsvType := "tsv"
152
188
  if len(fileNameParts) > 1 {
153
189
  xsvType = fileNameParts[len(fileNameParts)-1]
154
190
  }
155
191
 
192
+ // Handle FASTA files by converting to TSV first
193
+ if xsvType == "fasta" || xsvType == "fa" {
194
+ fastaToTsvRun := runFastaToTsv(importFile.file)
195
+ importFile.file = fastaToTsvRun.getFile("output.tsv")
196
+ xsvType = "tsv"
197
+ }
198
+
156
199
  assayTsv := prepareAssayFile(args, importFile.file, xsvType)
157
200
  clonesTsv := prepareClonesTsv(args)
158
201
 
@@ -463,11 +506,18 @@ wf.body(func(args) {
463
506
  { splitDataAndSpec: true, cpu: 1, mem: "16GiB" }
464
507
  )
465
508
 
509
+ // Create informative label with relevant matching parameters
510
+ identityStr := string(args.settings.identity)
511
+ coverageStr := string(args.settings.coverageThreshold)
512
+ similarityTypeStr := args.settings.similarityType == "sequence-identity" ? "Exact Match" : "BLOSUM"
513
+
514
+ traceLabel := "Assay Data (sim:" + similarityTypeStr + ", ident:" + identityStr + ", cov:" + coverageStr + ")"
515
+
466
516
  trace := pSpec.makeTrace(datasetSpec,
467
517
  {
468
518
  type: "milaboratories.immune-assay-data",
469
- importance: 30,
470
- label: "Assay Data"
519
+ importance: 40,
520
+ label: traceLabel
471
521
  })
472
522
 
473
523
  epfB := pframes.pFrameBuilder()