@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.3.1 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,7 @@ wf.body(func(args) {
45
45
  trees: results.output("trees"),
46
46
  treeNodes: results.output("treeNodes"),
47
47
  treeNodesWithClones: results.output("treeNodesWithClones"),
48
+ treeNodesUniqueIsotype: results.output("treeNodesUniqueIsotype"),
48
49
 
49
50
  soiNodesResults: results.output("soiNodesResults"),
50
51
  soiTreesResults: results.output("soiTreesResults"),
@@ -32,6 +32,7 @@ self.body(func(inputs) {
32
32
  hasCellTags: false,
33
33
  // will be filled
34
34
  coveredFeatures: [],
35
+ assemblingFeature: undefined,
35
36
  cellsAssembled: false
36
37
  }
37
38
 
@@ -65,8 +66,8 @@ self.body(func(inputs) {
65
66
  }
66
67
  }
67
68
 
68
- // adding assembling feature to the list of covered features
69
- dataDescription.coveredFeatures = append(dataDescription.coveredFeatures, assemblingFeature)
69
+ // adding assembling feature
70
+ dataDescription.assemblingFeature = assemblingFeature
70
71
 
71
72
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
72
73
  dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
@@ -75,6 +76,7 @@ self.body(func(inputs) {
75
76
  shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
76
77
  shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
77
78
  shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
79
+ shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
78
80
 
79
81
  // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
80
82
  mixcrResults := llPFrames.aggregate(
@@ -94,6 +96,9 @@ self.body(func(inputs) {
94
96
  }, {
95
97
  name: "treeNodesWithClones",
96
98
  type: "Resource"
99
+ }, {
100
+ name: "treeNodesUniqueIsotype",
101
+ type: "Resource"
97
102
  }, {
98
103
  name: "tsvs",
99
104
  type: "Resource"
@@ -120,10 +125,12 @@ self.body(func(inputs) {
120
125
  false,
121
126
  // inputs
122
127
  {
128
+ shmTreeTableOptions: shmTreeTableOptions,
123
129
  shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
124
130
  shmTreeNodesTableOptions: shmTreeNodesTableOptions,
125
131
  shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
126
132
  shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
133
+ shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
127
134
  globalParams: maps.merge(
128
135
  inputs.params,
129
136
  { datasetTypes: datasetTypes }
@@ -159,6 +166,13 @@ self.body(func(inputs) {
159
166
  additionalArgsForImportTsv
160
167
  )
161
168
 
169
+ treeNodesUniqueIsotype := xsv.importFileMap(
170
+ mixcrResults.output("treeNodesUniqueIsotype"),
171
+ "tsv",
172
+ shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
173
+ additionalArgsForImportTsv
174
+ )
175
+
162
176
  // Running SOI search for the data
163
177
  soiNodesResults := {}
164
178
  soiTreesResults := {}
@@ -207,6 +221,8 @@ self.body(func(inputs) {
207
221
  treeNodes: pframes.exportFrame(treeNodes),
208
222
  // combine columns into pFrame
209
223
  treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
224
+ // combine columns into pFrame
225
+ treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
210
226
 
211
227
  soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
212
228
  soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
@@ -9,18 +9,21 @@ json := import("json")
9
9
 
10
10
  // for usage in aggregate function, we should specify all outputs that will be used
11
11
  self.defineOutputs(
12
- "trees", "treeNodes", "treeNodesWithClones",
12
+ "trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype",
13
13
  "tsvs",
14
14
  "allelesLog", "treesLog",
15
15
  "allelesReport", "treesReport",
16
16
  "allelesReportJson", "treesReportJson"
17
17
  )
18
18
 
19
+ //// import function for aggregating by-nodes output to make it uniquely addressable by it's native key
20
+ tablesAggregation := import(":tablesAggregation")
21
+
19
22
  // import MiXCR as a software to use
20
23
  mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
21
24
 
22
25
  // used to postprocess some tables
23
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-paggregate:main")
26
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
24
27
 
25
28
  // env for MiXCR to format progress messages
26
29
  progressPrefix := "[==PROGRESS==]"
@@ -148,7 +151,7 @@ self.body(func(inputs) {
148
151
 
149
152
  shmTrees := shmTreesCmdBuilder.run()
150
153
  outputShmt := shmTrees.getFile("output.shmt")
151
-
154
+
152
155
  // export trees without nodes
153
156
  shmTreeExportsCmdBuilder := exec.builder().
154
157
  printErrStreamToStdout().
@@ -168,8 +171,9 @@ self.body(func(inputs) {
168
171
  saveFile("output.tsv")
169
172
 
170
173
  shmTreeExports := shmTreeExportsCmdBuilder.run()
171
- shmTreeTsv := shmTreeExports.getFile("output.tsv")
174
+ shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
172
175
 
176
+ shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, inputs.shmTreeTableOptions.pfconvParams, "max_by", "totalReadsCountInTree")
173
177
 
174
178
  // export tree nodes with data uniq for the node
175
179
  shmTreeNodesExportsCmdBuilder := exec.builder().
@@ -192,42 +196,7 @@ self.body(func(inputs) {
192
196
  shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
193
197
  shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
194
198
 
195
- // aggregating by-nodes output to make it uniquely addressable by it's native key
196
-
197
- ensureUniqueness := func(inputTsv, pfConvParams) {
198
- aggregations := []
199
- for col in pfConvParams.columns {
200
- aggregations = append(aggregations, {
201
- type: "first",
202
- src: col.column,
203
- dst: col.column
204
- })
205
- }
206
-
207
- keyColumns := []
208
- for axis in pfConvParams.axes {
209
- keyColumns = append(keyColumns, axis.column)
210
- }
211
-
212
- aggregationWorkflow := { steps: [ {
213
- type: "aggregate",
214
- groupBy: keyColumns,
215
- aggregations: aggregations
216
- } ] }
217
-
218
- aggregateCmd := exec.builder().
219
- printErrStreamToStdout().
220
- software(paggregateSw).
221
- arg("--workflow").arg("wf.json").
222
- writeFile("wf.json", json.encode(aggregationWorkflow)).
223
- arg("input.tsv").addFile("input.tsv", inputTsv).
224
- arg("output.tsv").saveFile("output.tsv").
225
- run()
226
-
227
- return aggregateCmd.getFile("output.tsv")
228
- }
229
-
230
- shmTreeNodesTsv := ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams)
199
+ shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams, "first")
231
200
 
232
201
  // export nodes with clones. For each node could be several clones
233
202
  shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
@@ -253,17 +222,20 @@ self.body(func(inputs) {
253
222
  shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
254
223
  shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
255
224
 
256
- shmTreeNodesWithClonesTsv := ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams)
225
+ shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams, "max_by", "readCount")
226
+ shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, inputs.shmTreeNodesUniqueIsotypeTableOptions.pfconvParams, "max_by", "readCount")
257
227
 
258
228
  return {
259
229
  trees: shmTreeTsv,
260
230
  treeNodes: shmTreeNodesTsv,
261
231
  treeNodesWithClones: shmTreeNodesWithClonesTsv,
232
+ treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv,
262
233
 
263
234
  tsvs: {
264
235
  trees: shmTreeTsv,
265
236
  treeNodes: shmTreeNodesTsv,
266
- treeNodesWithClones: shmTreeNodesWithClonesTsv
237
+ treeNodesWithClones: shmTreeNodesWithClonesTsv,
238
+ treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
267
239
  },
268
240
 
269
241
  allelesLog: alleles.getStdoutStream(),
package/src/soi.tpl.tengo CHANGED
@@ -12,7 +12,7 @@ self.defineOutputs("nodesResult", "treesResult")
12
12
 
13
13
  // import MiXCR as a software to use
14
14
  mitoolSw := assets.importSoftware("@platforma-open/milaboratories.software-mitool:main")
15
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-paggregate:main")
15
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
16
16
 
17
17
  inferPartitionKeyLength := func(data) {
18
18
  rType := data.info().Type.Name
@@ -0,0 +1,75 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ json := import("json")
5
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
6
+
7
+ //// aggregating by-nodes output to make it uniquely addressable by it's native key
8
+ ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
9
+ keyColumns := []
10
+ for axis in pfConvParams.axes {
11
+ keyColumns = append(keyColumns, axis.column)
12
+ }
13
+
14
+ aggregationWorkflow := undefined
15
+ if len(aggParams) > 1 {
16
+ pickCols := []
17
+ for col in pfConvParams.columns {
18
+ pickCols = append(pickCols, [
19
+ col.column,
20
+ col.column
21
+ ]
22
+ )
23
+ }
24
+
25
+ rankingCol := aggParams[1]
26
+ aggregationWorkflow = {
27
+ steps: [
28
+ {
29
+ type: "aggregate",
30
+ groupBy: keyColumns,
31
+ aggregations: [
32
+ {
33
+ type: aggParams[0],
34
+ rankingCol: rankingCol,
35
+ pickCols: pickCols
36
+ }
37
+ ]
38
+ }
39
+ ]
40
+ }
41
+ } else {
42
+ aggregations := []
43
+ for col in pfConvParams.columns {
44
+ aggregations = append(aggregations, {
45
+ type: aggParams[0],
46
+ src: col.column,
47
+ dst: col.column
48
+ })
49
+ }
50
+
51
+ aggregationWorkflow = { steps: [
52
+ {
53
+ type: "aggregate",
54
+ groupBy: keyColumns,
55
+ aggregations: aggregations
56
+ }
57
+ ]
58
+ }
59
+ }
60
+
61
+ aggregateCmd := exec.builder().
62
+ printErrStreamToStdout().
63
+ software(paggregateSw).
64
+ arg("--workflow").arg("wf.json").
65
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
66
+ arg("input.tsv").addFile("input.tsv", inputTsv).
67
+ arg("output.tsv").saveFile("output.tsv").
68
+ run()
69
+
70
+ return aggregateCmd.getFile("output.tsv")
71
+ }
72
+
73
+ export ll.toStrict({
74
+ ensureUniqueness: ensureUniqueness
75
+ })