@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.3.1 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +6 -0
- package/dist/tengo/lib/export-settings.lib.tengo +534 -92
- package/dist/tengo/lib/tablesAggregation.lib.tengo +75 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
- package/dist/tengo/tpl/soi.plj.gz +0 -0
- package/package.json +5 -5
- package/src/export-settings.lib.tengo +534 -92
- package/src/main.tpl.tengo +1 -0
- package/src/process.tpl.tengo +18 -2
- package/src/reconstruct-shm-trees.tpl.tengo +14 -42
- package/src/soi.tpl.tengo +1 -1
- package/src/tablesAggregation.lib.tengo +75 -0
package/src/main.tpl.tengo
CHANGED
|
@@ -45,6 +45,7 @@ wf.body(func(args) {
|
|
|
45
45
|
trees: results.output("trees"),
|
|
46
46
|
treeNodes: results.output("treeNodes"),
|
|
47
47
|
treeNodesWithClones: results.output("treeNodesWithClones"),
|
|
48
|
+
treeNodesUniqueIsotype: results.output("treeNodesUniqueIsotype"),
|
|
48
49
|
|
|
49
50
|
soiNodesResults: results.output("soiNodesResults"),
|
|
50
51
|
soiTreesResults: results.output("soiTreesResults"),
|
package/src/process.tpl.tengo
CHANGED
|
@@ -32,6 +32,7 @@ self.body(func(inputs) {
|
|
|
32
32
|
hasCellTags: false,
|
|
33
33
|
// will be filled
|
|
34
34
|
coveredFeatures: [],
|
|
35
|
+
assemblingFeature: undefined,
|
|
35
36
|
cellsAssembled: false
|
|
36
37
|
}
|
|
37
38
|
|
|
@@ -65,8 +66,8 @@ self.body(func(inputs) {
|
|
|
65
66
|
}
|
|
66
67
|
}
|
|
67
68
|
|
|
68
|
-
// adding assembling feature
|
|
69
|
-
dataDescription.
|
|
69
|
+
// adding assembling feature
|
|
70
|
+
dataDescription.assemblingFeature = assemblingFeature
|
|
70
71
|
|
|
71
72
|
// there should be call join on pfFrames, but it's not implements, so we will do it by hand
|
|
72
73
|
dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
|
|
@@ -75,6 +76,7 @@ self.body(func(inputs) {
|
|
|
75
76
|
shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
|
|
76
77
|
shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
|
|
77
78
|
shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
|
|
79
|
+
shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
|
|
78
80
|
|
|
79
81
|
// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
|
|
80
82
|
mixcrResults := llPFrames.aggregate(
|
|
@@ -94,6 +96,9 @@ self.body(func(inputs) {
|
|
|
94
96
|
}, {
|
|
95
97
|
name: "treeNodesWithClones",
|
|
96
98
|
type: "Resource"
|
|
99
|
+
}, {
|
|
100
|
+
name: "treeNodesUniqueIsotype",
|
|
101
|
+
type: "Resource"
|
|
97
102
|
}, {
|
|
98
103
|
name: "tsvs",
|
|
99
104
|
type: "Resource"
|
|
@@ -120,10 +125,12 @@ self.body(func(inputs) {
|
|
|
120
125
|
false,
|
|
121
126
|
// inputs
|
|
122
127
|
{
|
|
128
|
+
shmTreeTableOptions: shmTreeTableOptions,
|
|
123
129
|
shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
|
|
124
130
|
shmTreeNodesTableOptions: shmTreeNodesTableOptions,
|
|
125
131
|
shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
|
|
126
132
|
shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
|
|
133
|
+
shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
|
|
127
134
|
globalParams: maps.merge(
|
|
128
135
|
inputs.params,
|
|
129
136
|
{ datasetTypes: datasetTypes }
|
|
@@ -159,6 +166,13 @@ self.body(func(inputs) {
|
|
|
159
166
|
additionalArgsForImportTsv
|
|
160
167
|
)
|
|
161
168
|
|
|
169
|
+
treeNodesUniqueIsotype := xsv.importFileMap(
|
|
170
|
+
mixcrResults.output("treeNodesUniqueIsotype"),
|
|
171
|
+
"tsv",
|
|
172
|
+
shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
|
|
173
|
+
additionalArgsForImportTsv
|
|
174
|
+
)
|
|
175
|
+
|
|
162
176
|
// Running SOI search for the data
|
|
163
177
|
soiNodesResults := {}
|
|
164
178
|
soiTreesResults := {}
|
|
@@ -207,6 +221,8 @@ self.body(func(inputs) {
|
|
|
207
221
|
treeNodes: pframes.exportFrame(treeNodes),
|
|
208
222
|
// combine columns into pFrame
|
|
209
223
|
treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
|
|
224
|
+
// combine columns into pFrame
|
|
225
|
+
treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
|
|
210
226
|
|
|
211
227
|
soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
|
|
212
228
|
soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
|
|
@@ -9,18 +9,21 @@ json := import("json")
|
|
|
9
9
|
|
|
10
10
|
// for usage in aggregate function, we should specify all outputs that will be used
|
|
11
11
|
self.defineOutputs(
|
|
12
|
-
"trees", "treeNodes", "treeNodesWithClones",
|
|
12
|
+
"trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype",
|
|
13
13
|
"tsvs",
|
|
14
14
|
"allelesLog", "treesLog",
|
|
15
15
|
"allelesReport", "treesReport",
|
|
16
16
|
"allelesReportJson", "treesReportJson"
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
+
//// import function for aggregating by-nodes output to make it uniquely addressable by it's native key
|
|
20
|
+
tablesAggregation := import(":tablesAggregation")
|
|
21
|
+
|
|
19
22
|
// import MiXCR as a software to use
|
|
20
23
|
mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
|
|
21
24
|
|
|
22
25
|
// used to postprocess some tables
|
|
23
|
-
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-
|
|
26
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
24
27
|
|
|
25
28
|
// env for MiXCR to format progress messages
|
|
26
29
|
progressPrefix := "[==PROGRESS==]"
|
|
@@ -148,7 +151,7 @@ self.body(func(inputs) {
|
|
|
148
151
|
|
|
149
152
|
shmTrees := shmTreesCmdBuilder.run()
|
|
150
153
|
outputShmt := shmTrees.getFile("output.shmt")
|
|
151
|
-
|
|
154
|
+
|
|
152
155
|
// export trees without nodes
|
|
153
156
|
shmTreeExportsCmdBuilder := exec.builder().
|
|
154
157
|
printErrStreamToStdout().
|
|
@@ -168,8 +171,9 @@ self.body(func(inputs) {
|
|
|
168
171
|
saveFile("output.tsv")
|
|
169
172
|
|
|
170
173
|
shmTreeExports := shmTreeExportsCmdBuilder.run()
|
|
171
|
-
|
|
174
|
+
shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
|
|
172
175
|
|
|
176
|
+
shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, inputs.shmTreeTableOptions.pfconvParams, "max_by", "totalReadsCountInTree")
|
|
173
177
|
|
|
174
178
|
// export tree nodes with data uniq for the node
|
|
175
179
|
shmTreeNodesExportsCmdBuilder := exec.builder().
|
|
@@ -192,42 +196,7 @@ self.body(func(inputs) {
|
|
|
192
196
|
shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
|
|
193
197
|
shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
|
|
194
198
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
ensureUniqueness := func(inputTsv, pfConvParams) {
|
|
198
|
-
aggregations := []
|
|
199
|
-
for col in pfConvParams.columns {
|
|
200
|
-
aggregations = append(aggregations, {
|
|
201
|
-
type: "first",
|
|
202
|
-
src: col.column,
|
|
203
|
-
dst: col.column
|
|
204
|
-
})
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
keyColumns := []
|
|
208
|
-
for axis in pfConvParams.axes {
|
|
209
|
-
keyColumns = append(keyColumns, axis.column)
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
aggregationWorkflow := { steps: [ {
|
|
213
|
-
type: "aggregate",
|
|
214
|
-
groupBy: keyColumns,
|
|
215
|
-
aggregations: aggregations
|
|
216
|
-
} ] }
|
|
217
|
-
|
|
218
|
-
aggregateCmd := exec.builder().
|
|
219
|
-
printErrStreamToStdout().
|
|
220
|
-
software(paggregateSw).
|
|
221
|
-
arg("--workflow").arg("wf.json").
|
|
222
|
-
writeFile("wf.json", json.encode(aggregationWorkflow)).
|
|
223
|
-
arg("input.tsv").addFile("input.tsv", inputTsv).
|
|
224
|
-
arg("output.tsv").saveFile("output.tsv").
|
|
225
|
-
run()
|
|
226
|
-
|
|
227
|
-
return aggregateCmd.getFile("output.tsv")
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
shmTreeNodesTsv := ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams)
|
|
199
|
+
shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams, "first")
|
|
231
200
|
|
|
232
201
|
// export nodes with clones. For each node could be several clones
|
|
233
202
|
shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
|
|
@@ -253,17 +222,20 @@ self.body(func(inputs) {
|
|
|
253
222
|
shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
|
|
254
223
|
shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
|
|
255
224
|
|
|
256
|
-
shmTreeNodesWithClonesTsv := ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams)
|
|
225
|
+
shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams, "max_by", "readCount")
|
|
226
|
+
shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, inputs.shmTreeNodesUniqueIsotypeTableOptions.pfconvParams, "max_by", "readCount")
|
|
257
227
|
|
|
258
228
|
return {
|
|
259
229
|
trees: shmTreeTsv,
|
|
260
230
|
treeNodes: shmTreeNodesTsv,
|
|
261
231
|
treeNodesWithClones: shmTreeNodesWithClonesTsv,
|
|
232
|
+
treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv,
|
|
262
233
|
|
|
263
234
|
tsvs: {
|
|
264
235
|
trees: shmTreeTsv,
|
|
265
236
|
treeNodes: shmTreeNodesTsv,
|
|
266
|
-
treeNodesWithClones: shmTreeNodesWithClonesTsv
|
|
237
|
+
treeNodesWithClones: shmTreeNodesWithClonesTsv,
|
|
238
|
+
treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
|
|
267
239
|
},
|
|
268
240
|
|
|
269
241
|
allelesLog: alleles.getStdoutStream(),
|
package/src/soi.tpl.tengo
CHANGED
|
@@ -12,7 +12,7 @@ self.defineOutputs("nodesResult", "treesResult")
|
|
|
12
12
|
|
|
13
13
|
// import MiXCR as a software to use
|
|
14
14
|
mitoolSw := assets.importSoftware("@platforma-open/milaboratories.software-mitool:main")
|
|
15
|
-
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-
|
|
15
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
16
16
|
|
|
17
17
|
inferPartitionKeyLength := func(data) {
|
|
18
18
|
rType := data.info().Type.Name
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
2
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
3
|
+
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
4
|
+
json := import("json")
|
|
5
|
+
paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
|
|
6
|
+
|
|
7
|
+
//// aggregating by-nodes output to make it uniquely addressable by it's native key
|
|
8
|
+
ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
|
|
9
|
+
keyColumns := []
|
|
10
|
+
for axis in pfConvParams.axes {
|
|
11
|
+
keyColumns = append(keyColumns, axis.column)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
aggregationWorkflow := undefined
|
|
15
|
+
if len(aggParams) > 1 {
|
|
16
|
+
pickCols := []
|
|
17
|
+
for col in pfConvParams.columns {
|
|
18
|
+
pickCols = append(pickCols, [
|
|
19
|
+
col.column,
|
|
20
|
+
col.column
|
|
21
|
+
]
|
|
22
|
+
)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
rankingCol := aggParams[1]
|
|
26
|
+
aggregationWorkflow = {
|
|
27
|
+
steps: [
|
|
28
|
+
{
|
|
29
|
+
type: "aggregate",
|
|
30
|
+
groupBy: keyColumns,
|
|
31
|
+
aggregations: [
|
|
32
|
+
{
|
|
33
|
+
type: aggParams[0],
|
|
34
|
+
rankingCol: rankingCol,
|
|
35
|
+
pickCols: pickCols
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
aggregations := []
|
|
43
|
+
for col in pfConvParams.columns {
|
|
44
|
+
aggregations = append(aggregations, {
|
|
45
|
+
type: aggParams[0],
|
|
46
|
+
src: col.column,
|
|
47
|
+
dst: col.column
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
aggregationWorkflow = { steps: [
|
|
52
|
+
{
|
|
53
|
+
type: "aggregate",
|
|
54
|
+
groupBy: keyColumns,
|
|
55
|
+
aggregations: aggregations
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
aggregateCmd := exec.builder().
|
|
62
|
+
printErrStreamToStdout().
|
|
63
|
+
software(paggregateSw).
|
|
64
|
+
arg("--workflow").arg("wf.json").
|
|
65
|
+
writeFile("wf.json", json.encode(aggregationWorkflow)).
|
|
66
|
+
arg("input.tsv").addFile("input.tsv", inputTsv).
|
|
67
|
+
arg("output.tsv").saveFile("output.tsv").
|
|
68
|
+
run()
|
|
69
|
+
|
|
70
|
+
return aggregateCmd.getFile("output.tsv")
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export ll.toStrict({
|
|
74
|
+
ensureUniqueness: ensureUniqueness
|
|
75
|
+
})
|