@platforma-open/milaboratories.immune-assay-data.workflow 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+  WARN  Issue while reading "/home/runner/work/immune-assay-data/immune-assay-data/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
+
3
+ > @platforma-open/milaboratories.immune-assay-data.workflow@1.0.1 build /home/runner/work/immune-assay-data/immune-assay-data/workflow
4
+ > rm -rf dist && pl-tengo check && pl-tengo build
5
+
6
+ Processing "src/main.tpl.tengo"...
7
+ No syntax errors found.
8
+ info: Compiling 'dist'...
9
+ info: - writing /home/runner/work/immune-assay-data/immune-assay-data/workflow/dist/tengo/tpl/main.plj.gz
10
+ info: Template Pack build done.
11
+ info: Template Pack build done.
package/CHANGELOG.md ADDED
@@ -0,0 +1,9 @@
1
+ # @platforma-open/milaboratories.immune-assay-data.workflow
2
+
3
+ ## 1.0.1
4
+
5
+ ### Patch Changes
6
+
7
+ - f59e17e: Initial release
8
+ - Updated dependencies [f59e17e]
9
+ - @platforma-open/milaboratories.immune-assay-data.prepare-fasta@1.0.2
package/dist/index.cjs ADDED
@@ -0,0 +1,3 @@
1
+ module.exports = { Templates: {
2
+ 'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
3
+ }};
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/dist/index.js ADDED
@@ -0,0 +1,4 @@
1
+ import { resolve } from 'node:path';
2
+ export const Templates = {
3
+ 'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
4
+ };
Binary file
package/format.el ADDED
@@ -0,0 +1,43 @@
1
+ ;; This program formats all files inside src directory. Usage: emacs --script ./format.el
2
+
3
+ (defun install-go-mode ()
4
+ "Installs go-mode"
5
+ (require 'package)
6
+ (add-to-list 'package-archives
7
+ '("melpa-stable" . "https://stable.melpa.org/packages/"))
8
+ (package-initialize)
9
+ (unless package-archive-contents
10
+ (package-refresh-contents))
11
+
12
+ (package-install 'go-mode t)
13
+ (require 'go-mode))
14
+
15
+ ;; spaces -> tabs only at the beginning of lines
16
+ (setq tabify-regexp "^\t* [ \t]+")
17
+
18
+ (defun format-file (file)
19
+ "Formats a file according to slightly changed Go rules"
20
+ (message "Format %s" file)
21
+ (save-excursion
22
+ (find-file file)
23
+ (delete-trailing-whitespace) ;; deletes whitespaces
24
+ (go-mode) ;; sets golang rules for indentation
25
+ (tabify (point-min) (point-max)) ;; spaces -> tabs in the whole file
26
+ (indent-region (point-min) (point-max)) ;; indentation in the whole file
27
+ (save-buffer))) ;; save file
28
+
29
+ (install-go-mode)
30
+
31
+ ;; change syntax of a standard go-mode a bit
32
+ (advice-add
33
+ 'go--in-composite-literal-p
34
+ :filter-return
35
+ (lambda (&rest r) t))
36
+
37
+ ;; find all files in src
38
+ (setq files (directory-files-recursively "src" "\\.tengo\\'"))
39
+
40
+ ;; call format on every file.
41
+ (dolist (file files)
42
+ (format-file file))
43
+
package/index.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/index.js ADDED
@@ -0,0 +1,3 @@
1
+ module.exports = { Templates: {
2
+ 'main': { type: 'from-file', path: require.resolve('./dist/tengo/tpl/main.plj.gz') }
3
+ }}
package/package.json ADDED
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "@platforma-open/milaboratories.immune-assay-data.workflow",
3
+ "version": "1.0.1",
4
+ "type": "module",
5
+ "description": "Block Workflow",
6
+ "dependencies": {
7
+ "@platforma-sdk/workflow-tengo": "^4.6.1",
8
+ "@platforma-open/soedinglab.software-mmseqs2": "^1.0.0",
9
+ "@platforma-open/milaboratories.immune-assay-data.prepare-fasta": "1.0.2",
10
+ "@platforma-open/milaboratories.immune-assay-data.add-header": "1.0.1"
11
+ },
12
+ "devDependencies": {
13
+ "@platforma-sdk/tengo-builder": "^2.1.5",
14
+ "@platforma-sdk/test": "^1.31.16",
15
+ "vitest": "^2.1.8"
16
+ },
17
+ "scripts": {
18
+ "build": "rm -rf dist && pl-tengo check && pl-tengo build",
19
+ "test": "vitest",
20
+ "format": "/usr/bin/env emacs --script ./format.el"
21
+ }
22
+ }
@@ -0,0 +1,408 @@
1
+ // light block with no workflow
2
+ wf := import("@platforma-sdk/workflow-tengo:workflow")
3
+ ll := import("@platforma-sdk/workflow-tengo:ll")
4
+ file := import("@platforma-sdk/workflow-tengo:file")
5
+ exec := import("@platforma-sdk/workflow-tengo:exec")
6
+ assets:= import("@platforma-sdk/workflow-tengo:assets")
7
+ maps:= import("@platforma-sdk/workflow-tengo:maps")
8
+ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
9
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
10
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
11
+ slices := import("@platforma-sdk/workflow-tengo:slices")
12
+ pt := import("@platforma-sdk/workflow-tengo:pt")
13
+
14
+ prepareFastaSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.prepare-fasta:main")
15
+ mmseqsSw := assets.importSoftware("@platforma-open/soedinglab.software-mmseqs2:main")
16
+ addHeaderSw := assets.importSoftware("@platforma-open/milaboratories.immune-assay-data.add-header:main")
17
+
18
+ wf.prepare(func(args){
19
+ bundleBuilder := wf.createPBundleBuilder()
20
+ bundleBuilder.addAnchor("main", args.datasetRef)
21
+ bundleBuilder.addSingle(args.targetRef)
22
+ return {
23
+ columns: bundleBuilder.build()
24
+ }
25
+ })
26
+
27
+ prepareAssayTsv := func(args, file) {
28
+ // assign ids to assay sequences
29
+ ptw := pt.workflow()
30
+ df := ptw.frame({
31
+ file: file,
32
+ xsvType: "tsv" // @TODO (!!!)
33
+ })
34
+
35
+ //////// calculate sequence id ////////
36
+ df = df.addColumns(
37
+ pt.col(args.sequenceColumnHeader).hash("sha256", "base64_alphanumeric", 120).alias("seqId")
38
+ )
39
+ //////// add label to ids ////////
40
+ df = df.withColumns(
41
+ pt.col("seqId").
42
+ strReplace("\\d", "", { replaceAll: true }).
43
+ strSlice(0, 5). // Take first 5 characters
44
+ strToUpper(). // Convert to uppercase
45
+ alias("tmpLabel")
46
+ )
47
+ df = df.withColumns(
48
+ pt.rank(pt.col("seqId")). // Rank based on clonotypeKeyCol (default ascending)
49
+ over(pt.col("tmpLabel")). // Partition by prefixTempCol
50
+ alias("rank")
51
+ )
52
+ df = df.withColumns(
53
+ pt.when(pt.col("rank").gt(pt.lit(1))).
54
+ then(pt.concatStr([pt.lit("A"), pt.col("tmpLabel"), pt.col("rank").cast("String")], { delimiter: "-" })).
55
+ otherwise(pt.concatStr([pt.lit("A"), pt.col("tmpLabel")], { delimiter: "-" })).
56
+ alias("seqIdLabel")
57
+ )
58
+ df = df.withoutColumns("rank", "tmpLabel")
59
+
60
+ //////// add sequence column ////////
61
+ df = df.addColumns(
62
+ pt.col(args.sequenceColumnHeader).alias("sequence")
63
+ )
64
+ df.save("output.tsv")
65
+
66
+ return ptw.run().getFile("output.tsv")
67
+ }
68
+
69
+ prepareClonesTsv := func(args) {
70
+ columns := args.columns
71
+ datasetSpec := columns.getSpec(args.datasetRef)
72
+
73
+ cloneTable := pframes.tsvFileBuilder()
74
+
75
+ cloneTable.setAxisHeader(datasetSpec.axesSpec[1].name, "seqId")
76
+ cloneTable.add(columns.getColumn(args.targetRef), {header: "sequence"})
77
+
78
+ return cloneTable.build()
79
+ }
80
+
81
+ /**
82
+ * Convert tsv file to fasta file
83
+ * @param fileTsv - tsv file
84
+ * @return fasta file
85
+ */
86
+ tsv2Fasta := func(fileTsv) {
87
+ e := exec.builder().
88
+ software(prepareFastaSw).
89
+ addFile("input.tsv", fileTsv).
90
+ arg("-i").arg("input.tsv").
91
+ arg("-o").arg("output.fasta").
92
+ arg("--seq_col").arg("sequence").
93
+ arg("--id_col").arg("seqId").
94
+ saveFile("output.fasta")
95
+
96
+ return e.run().getFile("output.fasta")
97
+ }
98
+
99
+ wf.body(func(args) {
100
+ importFile := file.importFile(args.fileHandle)
101
+ datasetSpec := args.columns.getSpec(args.datasetRef)
102
+ targetSpec := args.columns.getSpec(args.targetRef)
103
+
104
+ // aminoacid or nucleotide
105
+ sequenceColumnInfo := undefined
106
+ for col in args.importColumns {
107
+ if col.header == args.sequenceColumnHeader {
108
+ sequenceColumnInfo = col
109
+ break
110
+ }
111
+ }
112
+
113
+ // aminoacid or nucleotide
114
+ targetSequenceType := targetSpec.domain["pl7.app/alphabet"]
115
+ assaySequenceType := sequenceColumnInfo.sequenceType
116
+
117
+ if targetSequenceType == undefined {
118
+ ll.panic("Target sequence type is undefined")
119
+ }
120
+
121
+ if assaySequenceType == undefined {
122
+ ll.panic("Assay sequence type is undefined")
123
+ }
124
+
125
+ assayTsv := prepareAssayTsv(args, importFile.file)
126
+ clonesTsv := prepareClonesTsv(args)
127
+
128
+ // prepare fasta
129
+ clonesFasta := tsv2Fasta(clonesTsv)
130
+ assayFasta := tsv2Fasta(assayTsv)
131
+
132
+ mmseqsSearchType := "0"
133
+ if targetSequenceType == "aminoacid" && assaySequenceType == "aminoacid" {
134
+ //1: amino acid
135
+ mmseqsSearchType = "1"
136
+ } else if targetSequenceType == "nucleotide" && assaySequenceType == "nucleotide" {
137
+ // 3: nucleotide
138
+ mmseqsSearchType = "3"
139
+ } else if targetSequenceType == "nucleotide" && assaySequenceType == "aminoacid" {
140
+ // 4: translated nucleotide alignment
141
+ mmseqsSearchType = "4"
142
+ } else if targetSequenceType == "aminoacid" && assaySequenceType == "nucleotide" {
143
+ // 2: nucleotide
144
+ mmseqsSearchType = "2"
145
+ }
146
+ // run search
147
+ mmseqs := exec.builder().
148
+ software(mmseqsSw).
149
+ dontSaveStdoutOrStderr(). // important to avoid CID conflict problems coming from different stdout output on same datasets
150
+ arg("easy-search").
151
+ arg("clones.fasta").
152
+ arg("assay.fasta").
153
+ arg("results.tsv").
154
+ arg("tmp").
155
+ arg("--search-type").arg(mmseqsSearchType).
156
+ arg("--cov-mode").arg(string(args.settings.coverageMode)).
157
+ arg("-c").arg(string(args.settings.coverageThreshold)).
158
+ addFile("clones.fasta", clonesFasta).
159
+ addFile("assay.fasta", assayFasta).
160
+ saveFile("results.tsv").
161
+ run()
162
+
163
+ mmseqsOutput := mmseqs.getFile("results.tsv")
164
+ // @TODO remove header stuff and replace with pt when available (!)
165
+ mmseqsResultTsv := exec.builder().
166
+ software(addHeaderSw).
167
+ arg("-i").arg("results.tsv").
168
+ arg("-o").arg("results_with_header.tsv").
169
+ addFile("results.tsv", mmseqsOutput).
170
+ saveFile("results_with_header.tsv").
171
+ run().
172
+ getFile("results_with_header.tsv")
173
+
174
+
175
+ //////// Process tables ////////
176
+
177
+ ptw := pt.workflow()
178
+ df := ptw.frame({
179
+ file: mmseqsResultTsv,
180
+ xsvType: "tsv"
181
+ })
182
+
183
+ cols := []
184
+ for _, col in ["bits", "evalue", "target", "pident", "alnlen", "mismatch", "gapopen", "qstart", "qend", "tstart", "tend"] {
185
+ cols = append(cols,
186
+ pt.col(col).maxBy(
187
+ pt.col("evalue").multiply(-1),
188
+ pt.col("bits")
189
+ ).alias(col)
190
+ )
191
+ }
192
+
193
+ df = df.groupBy("query").agg(cols...)
194
+ df.save("results.tsv")
195
+
196
+ // assay data import summary
197
+ assayDf := ptw.frame({
198
+ file: assayTsv,
199
+ xsvType: "tsv"
200
+ })
201
+ // import how many matches per assay sequence found
202
+ assayDf = assayDf.join(
203
+ df.groupBy("target").agg(
204
+ pt.col("query").count().alias("queryCount")
205
+ ),
206
+ {
207
+ how: "left",
208
+ leftOn: "seqId",
209
+ rightOn: "target"
210
+ }
211
+ )
212
+ assayDf.save("assayData.tsv")
213
+
214
+ // clones
215
+ clonesDf := df.join(assayDf,
216
+ {
217
+ how: "left",
218
+ leftOn: "target",
219
+ rightOn: "seqId"
220
+ }
221
+ )
222
+
223
+ clonesDf.save("clonesData.tsv")
224
+ ptw = ptw.run()
225
+
226
+ //////// Building outputs & exports ////////
227
+ blockId := wf.blockId().getDataAsJson()
228
+
229
+ assayColumns := [
230
+ {
231
+ column: "seqIdLabel",
232
+ spec: {
233
+ name: "pl7.app/label",
234
+ valueType: "String",
235
+ annotations: {
236
+ "pl7.app/label": "Sequence Id",
237
+ "pl7.app/table/fontFamily": "monospace"
238
+ }
239
+ }
240
+ },
241
+ {
242
+ column: "queryCount",
243
+ spec: {
244
+ name: "pl7.app/vdj/assay/queryCount",
245
+ valueType: "Int",
246
+ annotations: {
247
+ "pl7.app/label": "Matched Clones",
248
+ "pl7.app/table/orderPriority": "9000"
249
+ }
250
+ }
251
+ },
252
+ {
253
+ column: sequenceColumnInfo.header,
254
+ spec: {
255
+ name: "pl7.app/vdj/sequence",
256
+ valueType: "String",
257
+ domain: {
258
+ "pl7.app/alphabet": assaySequenceType
259
+ },
260
+ annotations: {
261
+ "pl7.app/label": sequenceColumnInfo.header,
262
+ "pl7.app/table/fontFamily": "monospace",
263
+ "pl7.app/table/orderPriority": "10000"
264
+ }
265
+ }
266
+ }
267
+ ]
268
+ for h in args.importColumns {
269
+ if h.header == args.sequenceColumnHeader {
270
+ continue
271
+ }
272
+ assayColumns = append(assayColumns, {
273
+ column: h.header,
274
+ spec: {
275
+ name: h.header,
276
+ valueType: h.type,
277
+ annotations: {
278
+ "pl7.app/label": h.header,
279
+ "pl7.app/table/orderPriority": "1000"
280
+ }
281
+ }
282
+ })
283
+ }
284
+
285
+ assayImportResults := xsv.importFile(ptw.getFile("assayData.tsv"), "tsv", {
286
+ axes: [{
287
+ column: "seqId",
288
+ spec: {
289
+ name: "pl7.app/vdj/assay/sequenceId",
290
+ type: "String",
291
+ domain: {
292
+ "pl7.app/blockId": blockId
293
+ },
294
+ annotations: {
295
+ "pl7.app/label": "Sequence Id",
296
+ "pl7.app/table/fontFamily": "monospace"
297
+ }
298
+ }
299
+ }],
300
+ columns: assayColumns,
301
+ annotations: {
302
+ "pl7.app/isAnchor": "true"
303
+ }
304
+ })
305
+
306
+ // "bits", "evalue", "pident"
307
+ cloneColumns := [
308
+ {
309
+ column: "target",
310
+ spec: {
311
+ name: "pl7.app/vdj/assay/sequenceId",
312
+ valueType: "String",
313
+ annotations: {
314
+ "pl7.app/label": "Assay Sequence Id",
315
+ "pl7.app/table/defaultVisibility": "optional"
316
+ }
317
+ }
318
+ },
319
+ {
320
+ column: "bits",
321
+ spec: {
322
+ name: "pl7.app/alignment/bitScore",
323
+ valueType: "Float",
324
+ annotations: {
325
+ "pl7.app/label": "Bit Score",
326
+ "pl7.app/table/defaultVisibility": "optional"
327
+ }
328
+ }
329
+ },
330
+ {
331
+ column: "evalue",
332
+ spec: {
333
+ name: "pl7.app/alignment/evalue",
334
+ valueType: "Float",
335
+ annotations: {
336
+ "pl7.app/label": "E-value",
337
+ "pl7.app/table/defaultVisibility": "optional"
338
+ }
339
+ }
340
+ },
341
+ {
342
+ column: "pident",
343
+ spec: {
344
+ name: "pl7.app/alignment/pident",
345
+ valueType: "Float",
346
+ annotations: {
347
+ "pl7.app/label": "Percentage of identical matches",
348
+ "pl7.app/table/defaultVisibility": "optional"
349
+ }
350
+ }
351
+ }]
352
+
353
+ for h in args.importColumns {
354
+ cloneColumns = append(cloneColumns, {
355
+ column: h.header,
356
+ spec: {
357
+ name: h.header,
358
+ valueType: h.type,
359
+ annotations: {
360
+ "pl7.app/label": h.header,
361
+ "pl7.app/table/defaultVisibility": h.header == args.sequenceColumnHeader ? "optional" : "default"
362
+ }
363
+ }
364
+ })
365
+ }
366
+
367
+ // insert domain
368
+ for col in cloneColumns {
369
+ col.spec.domain = maps.deepMerge(col.spec.domain, {
370
+ "pl7.app/blockId": blockId
371
+ })
372
+ }
373
+
374
+ cloneImportResults := xsv.importFile(
375
+ ptw.getFile("clonesData.tsv"), "tsv", {
376
+ axes: [{
377
+ column: "query",
378
+ spec: datasetSpec.axesSpec[1]
379
+ }],
380
+ columns: cloneColumns
381
+ },
382
+ { splitDataAndSpec: true }
383
+ )
384
+
385
+ trace := pSpec.makeTrace(datasetSpec,
386
+ {
387
+ type: "milaboratories.immune-assay-data",
388
+ importance: 30,
389
+ label: "Assay Data"
390
+ })
391
+
392
+ epf := pframes.pFrameBuilder()
393
+ for k, v in cloneImportResults {
394
+ epf.add(k, trace.inject(v.spec), v.data)
395
+ }
396
+ epf = epf.build()
397
+
398
+ return {
399
+ outputs: {
400
+ dataImportHandle: importFile.handle,
401
+ table: pframes.exportFrame(assayImportResults),
402
+ mmseqsOutput: mmseqsOutput // @TODO tmp fix to resolve CID conflicts
403
+ },
404
+ exports: {
405
+ epf: epf
406
+ }
407
+ }
408
+ })
package/tsconfig.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "node",
6
+ "esModuleInterop": true,
7
+ "strict": true,
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "sourceMap": true,
11
+ "declaration": true
12
+ },
13
+ "types": [],
14
+ "include": ["src/**/*"],
15
+ "exclude": ["node_modules", "dist"]
16
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ watch: false,
6
+ maxConcurrency: 3,
7
+ testTimeout: 5000
8
+ }
9
+ });