@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +12 -0
- package/dist/tengo/lib/export-settings.lib.tengo +534 -92
- package/dist/tengo/lib/tablesAggregation.lib.tengo +75 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
- package/dist/tengo/tpl/soi.plj.gz +0 -0
- package/package.json +5 -5
- package/src/export-settings.lib.tengo +534 -92
- package/src/main.tpl.tengo +1 -0
- package/src/process.tpl.tengo +19 -2
- package/src/reconstruct-shm-trees.tpl.tengo +16 -39
- package/src/soi.tpl.tengo +1 -1
- package/src/tablesAggregation.lib.tengo +75 -0
package/src/main.tpl.tengo
CHANGED
|
@@ -45,6 +45,7 @@ wf.body(func(args) {
|
|
|
45
45
|
trees: results.output("trees"),
|
|
46
46
|
treeNodes: results.output("treeNodes"),
|
|
47
47
|
treeNodesWithClones: results.output("treeNodesWithClones"),
|
|
48
|
+
treeNodesUniqueIsotype: results.output("treeNodesUniqueIsotype"),
|
|
48
49
|
|
|
49
50
|
soiNodesResults: results.output("soiNodesResults"),
|
|
50
51
|
soiTreesResults: results.output("soiTreesResults"),
|
package/src/process.tpl.tengo
CHANGED
|
@@ -32,6 +32,7 @@ self.body(func(inputs) {
|
|
|
32
32
|
hasCellTags: false,
|
|
33
33
|
// will be filled
|
|
34
34
|
coveredFeatures: [],
|
|
35
|
+
assemblingFeature: undefined,
|
|
35
36
|
cellsAssembled: false
|
|
36
37
|
}
|
|
37
38
|
|
|
@@ -65,8 +66,8 @@ self.body(func(inputs) {
|
|
|
65
66
|
}
|
|
66
67
|
}
|
|
67
68
|
|
|
68
|
-
// adding assembling feature
|
|
69
|
-
dataDescription.
|
|
69
|
+
// adding assembling feature
|
|
70
|
+
dataDescription.assemblingFeature = assemblingFeature
|
|
70
71
|
|
|
71
72
|
// there should be call join on pfFrames, but it's not implements, so we will do it by hand
|
|
72
73
|
dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
|
|
@@ -75,6 +76,7 @@ self.body(func(inputs) {
|
|
|
75
76
|
shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
|
|
76
77
|
shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
|
|
77
78
|
shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
|
|
79
|
+
shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
|
|
78
80
|
|
|
79
81
|
// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
|
|
80
82
|
mixcrResults := llPFrames.aggregate(
|
|
@@ -94,6 +96,9 @@ self.body(func(inputs) {
|
|
|
94
96
|
}, {
|
|
95
97
|
name: "treeNodesWithClones",
|
|
96
98
|
type: "Resource"
|
|
99
|
+
}, {
|
|
100
|
+
name: "treeNodesUniqueIsotype",
|
|
101
|
+
type: "Resource"
|
|
97
102
|
}, {
|
|
98
103
|
name: "tsvs",
|
|
99
104
|
type: "Resource"
|
|
@@ -120,9 +125,12 @@ self.body(func(inputs) {
|
|
|
120
125
|
false,
|
|
121
126
|
// inputs
|
|
122
127
|
{
|
|
128
|
+
shmTreeTableOptions: shmTreeTableOptions,
|
|
123
129
|
shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
|
|
124
130
|
shmTreeNodesTableOptions: shmTreeNodesTableOptions,
|
|
131
|
+
shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
|
|
125
132
|
shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
|
|
133
|
+
shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
|
|
126
134
|
globalParams: maps.merge(
|
|
127
135
|
inputs.params,
|
|
128
136
|
{ datasetTypes: datasetTypes }
|
|
@@ -158,6 +166,13 @@ self.body(func(inputs) {
|
|
|
158
166
|
additionalArgsForImportTsv
|
|
159
167
|
)
|
|
160
168
|
|
|
169
|
+
treeNodesUniqueIsotype := xsv.importFileMap(
|
|
170
|
+
mixcrResults.output("treeNodesUniqueIsotype"),
|
|
171
|
+
"tsv",
|
|
172
|
+
shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
|
|
173
|
+
additionalArgsForImportTsv
|
|
174
|
+
)
|
|
175
|
+
|
|
161
176
|
// Running SOI search for the data
|
|
162
177
|
soiNodesResults := {}
|
|
163
178
|
soiTreesResults := {}
|
|
@@ -206,6 +221,8 @@ self.body(func(inputs) {
|
|
|
206
221
|
treeNodes: pframes.exportFrame(treeNodes),
|
|
207
222
|
// combine columns into pFrame
|
|
208
223
|
treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
|
|
224
|
+
// combine columns into pFrame
|
|
225
|
+
treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
|
|
209
226
|
|
|
210
227
|
soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
|
|
211
228
|
soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
|
|
@@ -9,18 +9,21 @@ json := import("json")
|
|
|
9
9
|
|
|
10
10
|
// for usage in aggregate function, we should specify all outputs that will be used
|
|
11
11
|
self.defineOutputs(
|
|
12
|
-
"trees", "treeNodes", "treeNodesWithClones",
|
|
12
|
+
"trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype",
|
|
13
13
|
"tsvs",
|
|
14
14
|
"allelesLog", "treesLog",
|
|
15
15
|
"allelesReport", "treesReport",
|
|
16
16
|
"allelesReportJson", "treesReportJson"
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
+
//// import function for aggregating by-nodes output to make it uniquely addressable by it's native key
|
|
20
|
+
tablesAggregation := import(":tablesAggregation")
|
|
21
|
+
|
|
19
22
|
// import MiXCR as a software to use
|
|
20
23
|
mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
|
|
21
24
|
|
|
22
25
|
// used to postprocess some tables
|
|
23
|
-
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-
|
|
26
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
24
27
|
|
|
25
28
|
// env for MiXCR to format progress messages
|
|
26
29
|
progressPrefix := "[==PROGRESS==]"
|
|
@@ -148,7 +151,7 @@ self.body(func(inputs) {
|
|
|
148
151
|
|
|
149
152
|
shmTrees := shmTreesCmdBuilder.run()
|
|
150
153
|
outputShmt := shmTrees.getFile("output.shmt")
|
|
151
|
-
|
|
154
|
+
|
|
152
155
|
// export trees without nodes
|
|
153
156
|
shmTreeExportsCmdBuilder := exec.builder().
|
|
154
157
|
printErrStreamToStdout().
|
|
@@ -168,8 +171,9 @@ self.body(func(inputs) {
|
|
|
168
171
|
saveFile("output.tsv")
|
|
169
172
|
|
|
170
173
|
shmTreeExports := shmTreeExportsCmdBuilder.run()
|
|
171
|
-
|
|
174
|
+
shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
|
|
172
175
|
|
|
176
|
+
shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, inputs.shmTreeTableOptions.pfconvParams, "max_by", "totalReadsCountInTree")
|
|
173
177
|
|
|
174
178
|
// export tree nodes with data uniq for the node
|
|
175
179
|
shmTreeNodesExportsCmdBuilder := exec.builder().
|
|
@@ -192,39 +196,7 @@ self.body(func(inputs) {
|
|
|
192
196
|
shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
|
|
193
197
|
shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
|
|
194
198
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
aggregations := []
|
|
198
|
-
for col in inputs.shmTreeNodesTableOptions.pfconvParams.columns {
|
|
199
|
-
aggregations = append(aggregations, {
|
|
200
|
-
type: "first",
|
|
201
|
-
src: col.column,
|
|
202
|
-
dst: col.column
|
|
203
|
-
})
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
keyColumns := []
|
|
207
|
-
for axis in inputs.shmTreeNodesTableOptions.pfconvParams.axes {
|
|
208
|
-
keyColumns = append(keyColumns, axis.column)
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
aggregationWorkflow := { steps: [ {
|
|
212
|
-
type: "aggregate",
|
|
213
|
-
groupBy: keyColumns,
|
|
214
|
-
aggregations: aggregations
|
|
215
|
-
} ] }
|
|
216
|
-
|
|
217
|
-
aggregateCmd := exec.builder().
|
|
218
|
-
printErrStreamToStdout().
|
|
219
|
-
software(paggregateSw).
|
|
220
|
-
arg("--workflow").arg("wf.json").
|
|
221
|
-
writeFile("wf.json", json.encode(aggregationWorkflow)).
|
|
222
|
-
arg("input.tsv").addFile("input.tsv", shmTreeNodesTsvRaw).
|
|
223
|
-
arg("output.tsv").saveFile("output.tsv").
|
|
224
|
-
run()
|
|
225
|
-
|
|
226
|
-
shmTreeNodesTsv := aggregateCmd.getFile("output.tsv")
|
|
227
|
-
|
|
199
|
+
shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams, "first")
|
|
228
200
|
|
|
229
201
|
// export nodes with clones. For each node could be several clones
|
|
230
202
|
shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
|
|
@@ -248,17 +220,22 @@ self.body(func(inputs) {
|
|
|
248
220
|
saveFile("output.tsv")
|
|
249
221
|
|
|
250
222
|
shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
|
|
251
|
-
|
|
223
|
+
shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
|
|
224
|
+
|
|
225
|
+
shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams, "max_by", "readCount")
|
|
226
|
+
shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, inputs.shmTreeNodesUniqueIsotypeTableOptions.pfconvParams, "max_by", "readCount")
|
|
252
227
|
|
|
253
228
|
return {
|
|
254
229
|
trees: shmTreeTsv,
|
|
255
230
|
treeNodes: shmTreeNodesTsv,
|
|
256
231
|
treeNodesWithClones: shmTreeNodesWithClonesTsv,
|
|
232
|
+
treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv,
|
|
257
233
|
|
|
258
234
|
tsvs: {
|
|
259
235
|
trees: shmTreeTsv,
|
|
260
236
|
treeNodes: shmTreeNodesTsv,
|
|
261
|
-
treeNodesWithClones: shmTreeNodesWithClonesTsv
|
|
237
|
+
treeNodesWithClones: shmTreeNodesWithClonesTsv,
|
|
238
|
+
treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
|
|
262
239
|
},
|
|
263
240
|
|
|
264
241
|
allelesLog: alleles.getStdoutStream(),
|
package/src/soi.tpl.tengo
CHANGED
|
@@ -12,7 +12,7 @@ self.defineOutputs("nodesResult", "treesResult")
|
|
|
12
12
|
|
|
13
13
|
// import MiXCR as a software to use
|
|
14
14
|
mitoolSw := assets.importSoftware("@platforma-open/milaboratories.software-mitool:main")
|
|
15
|
-
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-
|
|
15
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
16
16
|
|
|
17
17
|
inferPartitionKeyLength := func(data) {
|
|
18
18
|
rType := data.info().Type.Name
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
2
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
3
|
+
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
4
|
+
json := import("json")
|
|
5
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
6
|
+
|
|
7
|
+
//// aggregating by-nodes output to make it uniquely addressable by it's native key
|
|
8
|
+
ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
|
|
9
|
+
keyColumns := []
|
|
10
|
+
for axis in pfConvParams.axes {
|
|
11
|
+
keyColumns = append(keyColumns, axis.column)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
aggregationWorkflow := undefined
|
|
15
|
+
if len(aggParams) > 1 {
|
|
16
|
+
pickCols := []
|
|
17
|
+
for col in pfConvParams.columns {
|
|
18
|
+
pickCols = append(pickCols, [
|
|
19
|
+
col.column,
|
|
20
|
+
col.column
|
|
21
|
+
]
|
|
22
|
+
)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
rankingCol := aggParams[1]
|
|
26
|
+
aggregationWorkflow = {
|
|
27
|
+
steps: [
|
|
28
|
+
{
|
|
29
|
+
type: "aggregate",
|
|
30
|
+
groupBy: keyColumns,
|
|
31
|
+
aggregations: [
|
|
32
|
+
{
|
|
33
|
+
type: aggParams[0],
|
|
34
|
+
rankingCol: rankingCol,
|
|
35
|
+
pickCols: pickCols
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
aggregations := []
|
|
43
|
+
for col in pfConvParams.columns {
|
|
44
|
+
aggregations = append(aggregations, {
|
|
45
|
+
type: aggParams[0],
|
|
46
|
+
src: col.column,
|
|
47
|
+
dst: col.column
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
aggregationWorkflow = { steps: [
|
|
52
|
+
{
|
|
53
|
+
type: "aggregate",
|
|
54
|
+
groupBy: keyColumns,
|
|
55
|
+
aggregations: aggregations
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
aggregateCmd := exec.builder().
|
|
62
|
+
printErrStreamToStdout().
|
|
63
|
+
software(paggregateSw).
|
|
64
|
+
arg("--workflow").arg("wf.json").
|
|
65
|
+
writeFile("wf.json", json.encode(aggregationWorkflow)).
|
|
66
|
+
arg("input.tsv").addFile("input.tsv", inputTsv).
|
|
67
|
+
arg("output.tsv").saveFile("output.tsv").
|
|
68
|
+
run()
|
|
69
|
+
|
|
70
|
+
return aggregateCmd.getFile("output.tsv")
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export ll.toStrict({
|
|
74
|
+
ensureUniqueness: ensureUniqueness
|
|
75
|
+
})
|