@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ wf := import("@platforma-sdk/workflow-tengo:workflow")
2
+
3
+ render := import("@platforma-sdk/workflow-tengo:render")
4
+ assets := import("@platforma-sdk/workflow-tengo:assets")
5
+ ll := import("@platforma-sdk/workflow-tengo:ll")
6
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
+
8
+ processTpl := assets.importTemplate(":process")
9
+
10
+ wf.body(func(args) {
11
+ if is_undefined(args.donorColumn) {
12
+ ll.panic("No donor column")
13
+ }
14
+
15
+ if (len(args.datasetColumns) == 0) {
16
+ ll.panic("No datasets to process")
17
+ }
18
+
19
+ // we could not use array as request for waiting (see below), so we store datasets in a dictionary
20
+ datasets := {}
21
+ for datasetRef in args.datasetColumns {
22
+ if is_undefined(datasetRef) {
23
+ ll.panic("Dataset is undefined")
24
+ }
25
+ // it's blockId of MiXCR, we suppose that it procuce only one clns column
26
+ datasets[datasetRef.blockId] = wf.resolve(datasetRef)
27
+ }
28
+
29
+ donorColumn := wf.resolve(args.donorColumn)
30
+
31
+ // The problem is that refs for data is not resolved.
32
+ // To deal with it, we should call resolve resolve that will return feature.
33
+ // Then to resolve feature we should call another templete where we will describe what's to wait
34
+ results := render.createEphemeral(processTpl, {
35
+ datasets: datasets,
36
+ donorColumn: donorColumn,
37
+ params: {
38
+ seed: args.seed
39
+ }
40
+ })
41
+
42
+ return {
43
+ outputs: {
44
+ "trees": results.output("trees"),
45
+ "treeNodes": results.output("treeNodes"),
46
+ "treeNodesWithClones": results.output("treeNodesWithClones"),
47
+
48
+ "tsvs": results.output("tsvs"),
49
+ "allelesLogs": results.output("allelesLogs"),
50
+ "treesLogs": results.output("treesLogs"),
51
+
52
+ // files should be explicitly published, otherwise it will not be assesable from GUI
53
+ // that's why pframes.exportColumnData
54
+ "allelesReports": pframes.exportColumnData(results.output("allelesReports")),
55
+ "treesReports": pframes.exportColumnData(results.output("treesReports")),
56
+ "allelesReportsJson": pframes.exportColumnData(results.output("allelesReportsJson")),
57
+ "treesReportsJson": pframes.exportColumnData(results.output("treesReportsJson"))
58
+ },
59
+ exports: {}
60
+ }
61
+ })
@@ -0,0 +1,78 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ smart := import("@platforma-sdk/workflow-tengo:smart")
3
+
4
+ json := import("json")
5
+
6
+ _P_COLUMN_DATA_RESOURCE_MAP := { Name: "PColumnData/ResourceMap", Version: "1" }
7
+
8
+ groupDataByDonorId := func(donorColumn, datasets) {
9
+ // we need to form a pColumn with two axes:
10
+ // axes[0]: donorId
11
+ // axes[1]: sampleId
12
+ // axes[2]: mixcrBlockId
13
+ // value: fileRef resource
14
+
15
+ // we have:
16
+ // column of donorIds:
17
+ // axes[0]: sampleId
18
+ // value: donorId
19
+ // several columns of clns:
20
+ // axes[0]: sampleId
21
+ // value: fileRef resource
22
+
23
+ donorColumnSpec := donorColumn.get("spec").getDataAsJson()
24
+
25
+ resultSpec := {
26
+ // annotations and domain could differ between datasets
27
+ "axesSpec": [
28
+ {
29
+ "annotations": donorColumnSpec["annotations"],
30
+ "domain": donorColumnSpec["domain"],
31
+ "name": donorColumnSpec["name"],
32
+ "type": donorColumnSpec["valueType"]
33
+ },
34
+ donorColumnSpec["axesSpec"][0],
35
+ {
36
+ "annotations": {
37
+ "pl7.app/label": "Clonotyping block id"
38
+ },
39
+ "name": "pl7.app/blockId",
40
+ "type": "String"
41
+ }
42
+ ],
43
+ "kind": "PColumn",
44
+ "name": "mixcr.com/clns",
45
+ "valueType": "File"
46
+ }
47
+
48
+
49
+ sampleToDonor := {}
50
+
51
+ // columns with meta could be fetched as data direcctly
52
+ for k, v in donorColumn.get("data").getDataAsJson()["data"] {
53
+ sampleId := json.decode(k)[0]
54
+ sampleToDonor[sampleId] = v
55
+ }
56
+
57
+ // build pColumn by hand
58
+ dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
59
+
60
+ // collect all the clns files that we have into pColumn
61
+ for blockId, dataset in datasets {
62
+ for sKey, fileRef in dataset.get("data").inputs() {
63
+ sampleId := json.decode(sKey)[0]
64
+ donor := sampleToDonor[sampleId]
65
+ dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
66
+ }
67
+ }
68
+
69
+ return {
70
+ spec: resultSpec,
71
+ data: dataBuilder.lockAndBuild()
72
+ }
73
+ }
74
+
75
+ // to use the file as a library, we should explicitly export functions
76
+ export ll.toStrict({
77
+ groupDataByDonorId: groupDataByDonorId
78
+ })
@@ -0,0 +1,155 @@
1
+ self := import("@platforma-sdk/workflow-tengo:tpl")
2
+
3
+ llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
4
+ ll := import("@platforma-sdk/workflow-tengo:ll")
5
+ assets := import("@platforma-sdk/workflow-tengo:assets")
6
+ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
7
+ text := import("text")
8
+ exportSettings := import(":export-settings")
9
+ prepareDonorColumn := import(":prepare-donor-column")
10
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
11
+
12
+ reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
13
+
14
+ // this templete should run only after resolving of all inputs
15
+ // we don't need to wait for file content, just refs
16
+ self.awaitState("datasets", { wildcard: "*" }, "data", "InputsLocked")
17
+ // but we need spec already
18
+ self.awaitState("datasets", { wildcard: "*" }, "spec", "ResourceReady")
19
+ self.awaitState("donorColumn", "ResourceReady")
20
+ self.awaitState("params", "ResourceReady")
21
+
22
+ self.body(func(inputs) {
23
+ // overall description of data that we have.
24
+ dataDescription := {
25
+ "hasUmiTags": false,
26
+ "hasCellTags": false,
27
+ // will be filled
28
+ "coveredFeatures": []
29
+ }
30
+
31
+ assemblingFeature := ""
32
+ for _, dataset in inputs.datasets {
33
+ presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
34
+
35
+ if presetAnnotations["mixcr.com/cellTags"] != "" {
36
+ dataDescription["hasCellTags"] = true
37
+ }
38
+ if presetAnnotations["mixcr.com/umiTags"] != "" {
39
+ dataDescription["hasUmiTags"] = true
40
+ }
41
+ dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
42
+ // check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
43
+ if (assemblingFeature == "") {
44
+ assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
45
+ } else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
46
+ ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
47
+ }
48
+ }
49
+
50
+ // there should be call join on pfFrames, but it's not implements, so we will do it by hand
51
+ dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
52
+
53
+ // collect params for running export commands and to parse result tsv files into pColumns
54
+ shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription, false)
55
+ shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription, false)
56
+ shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn, false)
57
+
58
+ // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
59
+ mixcrResults := llPFrames.aggregate(
60
+ // files to iterate through
61
+ dataGroupedByDonorId["data"],
62
+ // columns not to combine - sampleId and mixcrBlockId
63
+ [1, 2],
64
+ reconstructShmTreesTpl,
65
+ // all the outputs that should be gethered
66
+ [
67
+ {
68
+ "name": "trees",
69
+ "type": "Resource"
70
+ }, {
71
+ "name": "treeNodes",
72
+ "type": "Resource"
73
+ }, {
74
+ "name": "treeNodesWithClones",
75
+ "type": "Resource"
76
+ }, {
77
+ "name": "tsvs",
78
+ "type": "Resource"
79
+ }, {
80
+ "name": "allelesLog",
81
+ "type": "Resource"
82
+ }, {
83
+ "name": "treesLog",
84
+ "type": "Resource"
85
+ }, {
86
+ "name": "allelesReport",
87
+ "type": "Resource"
88
+ }, {
89
+ "name": "treesReport",
90
+ "type": "Resource"
91
+ }, {
92
+ "name": "allelesReportJson",
93
+ "type": "Resource"
94
+ }, {
95
+ "name": "treesReportJson",
96
+ "type": "Resource"
97
+ }
98
+ ],
99
+ false,
100
+ // inputs
101
+ {
102
+ "shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
103
+ "shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
104
+ "shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
105
+ "globalParams": inputs.params
106
+ }
107
+ )
108
+
109
+ // donorId axis is inherited from dataGroupedByDonorId and we should specify it explicitly (other axes will be supplied by pfconvParams)
110
+ additionalArgsForImportTsv := {
111
+ additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
112
+ }
113
+
114
+ trees := xsv.importFileMap(
115
+ mixcrResults.output("trees"),
116
+ "tsv",
117
+ shmTreeTableOptions["pfconvParams"],
118
+ additionalArgsForImportTsv
119
+ )
120
+
121
+ treeNodes := xsv.importFileMap(
122
+ mixcrResults.output("treeNodes"),
123
+ "tsv",
124
+ shmTreeNodesTableOptions["pfconvParams"],
125
+ additionalArgsForImportTsv
126
+ )
127
+
128
+ treeNodesWithClones := xsv.importFileMap(
129
+ mixcrResults.output("treeNodesWithClones"),
130
+ "tsv",
131
+ shmTreeNodesWithClonesTableOptions["pfconvParams"],
132
+ additionalArgsForImportTsv
133
+ )
134
+
135
+ tsvs := mixcrResults.output("tsvs")
136
+
137
+ return {
138
+ // combine columns into pFrame
139
+ "trees": pframes.exportFrame(trees),
140
+ // combine columns into pFrame
141
+ "treeNodes": pframes.exportFrame(treeNodes),
142
+ // combine columns into pFrame
143
+ "treeNodesWithClones": pframes.exportFrame(treeNodesWithClones),
144
+
145
+ "tsvs": tsvs,
146
+
147
+ "allelesLogs": mixcrResults.output("allelesLog"),
148
+ "treesLogs": mixcrResults.output("treesLog"),
149
+
150
+ "allelesReports": mixcrResults.output("allelesReport"),
151
+ "treesReports": mixcrResults.output("treesReport"),
152
+ "allelesReportsJson": mixcrResults.output("allelesReportJson"),
153
+ "treesReportsJson": mixcrResults.output("treesReportJson")
154
+ }
155
+ })
@@ -0,0 +1,182 @@
1
+ self := import("@platforma-sdk/workflow-tengo:tpl")
2
+ pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
3
+ ll := import("@platforma-sdk/workflow-tengo:ll")
4
+ assets := import("@platforma-sdk/workflow-tengo:assets")
5
+ exec := import("@platforma-sdk/workflow-tengo:exec")
6
+
7
+ json := import("json")
8
+
9
+ // for usage in aggregate function, we should specify all outputs that will be used
10
+ self.defineOutputs(
11
+ "trees", "treeNodes", "treeNodesWithClones",
12
+ "tsvs",
13
+ "allelesLog", "treesLog",
14
+ "allelesReport", "treesReport",
15
+ "allelesReportJson", "treesReportJson"
16
+ )
17
+
18
+ // import MiXCR as a software to use
19
+ mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
20
+
21
+ // env for MiXCR to format progress messages
22
+ progressPrefix := "[==PROGRESS==]"
23
+
24
+ self.body(func(inputs) {
25
+ inputData := inputs[pConstants.VALUE_FIELD_NAME]
26
+ globalParams := inputs.globalParams
27
+
28
+ allelesCmdBuilder := exec.builder().
29
+ printErrStreamToStdout().
30
+ secret("MI_LICENSE", "MI_LICENSE").
31
+ env("MI_PROGRESS_PREFIX", progressPrefix).
32
+ software(mixcrSw).
33
+ arg("findAlleles").
34
+ arg("--report").arg("report.txt").
35
+ saveFile("report.txt").
36
+ arg("--json-report").arg("report.json").
37
+ saveFile("report.json").
38
+ // template specifies where result files will be written
39
+ arg("--output-template").arg("alleles/{file_name}.clns")
40
+
41
+ if !is_undefined(globalParams.seed) {
42
+ allelesCmdBuilder.env("SEED", globalParams.seed)
43
+ }
44
+
45
+ toProcess := []
46
+ for sKey, inputFile in inputData.inputs() {
47
+ key := json.decode(sKey)
48
+ sampleId := key[0]
49
+ clonotypingBlockId := key[1]
50
+ // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
51
+ fileName := sampleId + "___" + clonotypingBlockId + ".clns"
52
+ toProcess = append(toProcess, {
53
+ "fileName": fileName,
54
+ "input": inputFile
55
+ })
56
+ }
57
+
58
+ for input in toProcess {
59
+ allelesCmdBuilder.addFile(input.fileName, input.input).
60
+ arg(input.fileName).
61
+ saveFile("alleles/" + input.fileName)
62
+ }
63
+
64
+ alleles := allelesCmdBuilder.run()
65
+
66
+ shmTreesCmdBuilder := exec.builder().
67
+ printErrStreamToStdout().
68
+ secret("MI_LICENSE", "MI_LICENSE").
69
+ env("MI_PROGRESS_PREFIX", progressPrefix).
70
+ software(mixcrSw).
71
+ arg("findShmTrees").
72
+ arg("--report").arg("report.txt").
73
+ saveFile("report.txt").
74
+ arg("--json-report").arg("report.json").
75
+ saveFile("report.json").
76
+ // TODO support single cell trees
77
+ arg("--dont-combine-tree-by-cells")
78
+
79
+ if !is_undefined(globalParams.seed) {
80
+ shmTreesCmdBuilder.env("SEED", globalParams.seed)
81
+ }
82
+
83
+ for input in toProcess {
84
+ shmTreesCmdBuilder.
85
+ addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
86
+ arg(input.fileName)
87
+ }
88
+
89
+ shmTreesCmdBuilder.arg("output.shmt").saveFile("output.shmt")
90
+
91
+ shmTrees := shmTreesCmdBuilder.run()
92
+
93
+
94
+ // export trees without nodes
95
+ shmTreeExportsCmdBuilder := exec.builder().
96
+ printErrStreamToStdout().
97
+ inLightQueue().
98
+ secret("MI_LICENSE", "MI_LICENSE").
99
+ env("MI_PROGRESS_PREFIX", progressPrefix).
100
+ software(mixcrSw).
101
+ arg("exportShmTrees")
102
+
103
+ for arg in inputs.shmTreeTableOptions {
104
+ shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.arg(arg)
105
+ }
106
+
107
+ shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.
108
+ arg("input.shmt").
109
+ arg("output.tsv").
110
+ addFile("input.shmt", shmTrees.getFile("output.shmt")).
111
+ saveFile("output.tsv")
112
+
113
+ shmTreeExports := shmTreeExportsCmdBuilder.run()
114
+
115
+
116
+ // export tree nodes with data uniq for the node
117
+ shmTreeNodesExportsCmdBuilder := exec.builder().
118
+ printErrStreamToStdout().
119
+ inLightQueue().
120
+ secret("MI_LICENSE", "MI_LICENSE").
121
+ env("MI_PROGRESS_PREFIX", progressPrefix).
122
+ software(mixcrSw).
123
+ arg("exportShmTreesWithNodes")
124
+
125
+ for arg in inputs.shmTreeNodesTableOptions {
126
+ shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.arg(arg)
127
+ }
128
+
129
+ shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.
130
+ arg("input.shmt").
131
+ arg("output.tsv").
132
+ addFile("input.shmt", shmTrees.getFile("output.shmt")).
133
+ saveFile("output.tsv")
134
+
135
+ shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
136
+
137
+
138
+
139
+ // export nodes with clones. For each node could be several clones
140
+ shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
141
+ printErrStreamToStdout().
142
+ inLightQueue().
143
+ secret("MI_LICENSE", "MI_LICENSE").
144
+ env("MI_PROGRESS_PREFIX", progressPrefix).
145
+ software(mixcrSw).
146
+ arg("exportShmTreesWithNodes").
147
+ // don't export nodes that don't have clones
148
+ arg("--only-observed")
149
+
150
+ for arg in inputs.shmTreeNodesWithClonesTableOptions {
151
+ shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.arg(arg)
152
+ }
153
+
154
+ shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.
155
+ arg("input.shmt").
156
+ arg("output.tsv").
157
+ addFile("input.shmt", shmTrees.getFile("output.shmt")).
158
+ saveFile("output.tsv")
159
+
160
+ shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
161
+
162
+ return {
163
+ "trees": shmTreeExports.getFile("output.tsv"),
164
+ "treeNodes": shmTreeNodesExports.getFile("output.tsv"),
165
+ "treeNodesWithClones": shmTreeNodesWithClonesExports.getFile("output.tsv"),
166
+
167
+ "tsvs": {
168
+ "trees": shmTreeExports.getFile("output.tsv"),
169
+ "treeNodes": shmTreeNodesExports.getFile("output.tsv"),
170
+ "treeNodesWithClones": shmTreeNodesWithClonesExports.getFile("output.tsv")
171
+ },
172
+
173
+ "allelesLog": alleles.getStdoutStream(),
174
+ "treesLog": shmTrees.getStdoutStream(),
175
+
176
+ "allelesReport": alleles.getFile("report.txt"),
177
+ "treesReport": shmTrees.getFile("report.txt"),
178
+
179
+ "allelesReportJson": alleles.getFile("report.json"),
180
+ "treesReportJson": shmTrees.getFile("report.json")
181
+ }
182
+ })
package/tsconfig.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "node",
6
+ "esModuleInterop": true,
7
+ "strict": true,
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "sourceMap": true,
11
+ "declaration": true
12
+ },
13
+ "types": [],
14
+ "include": ["src/**/*"],
15
+ "exclude": ["node_modules", "dist"]
16
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ watch: false,
6
+ maxConcurrency: 3,
7
+ testTimeout: 5000
8
+ }
9
+ });