@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,7 @@ wf.body(func(args) {
45
45
  trees: results.output("trees"),
46
46
  treeNodes: results.output("treeNodes"),
47
47
  treeNodesWithClones: results.output("treeNodesWithClones"),
48
+ treeNodesUniqueIsotype: results.output("treeNodesUniqueIsotype"),
48
49
 
49
50
  soiNodesResults: results.output("soiNodesResults"),
50
51
  soiTreesResults: results.output("soiTreesResults"),
@@ -32,6 +32,7 @@ self.body(func(inputs) {
32
32
  hasCellTags: false,
33
33
  // will be filled
34
34
  coveredFeatures: [],
35
+ assemblingFeature: undefined,
35
36
  cellsAssembled: false
36
37
  }
37
38
 
@@ -65,8 +66,8 @@ self.body(func(inputs) {
65
66
  }
66
67
  }
67
68
 
68
- // adding assembling feature to the list of covered features
69
- dataDescription.coveredFeatures = append(dataDescription.coveredFeatures, assemblingFeature)
69
+ // adding assembling feature
70
+ dataDescription.assemblingFeature = assemblingFeature
70
71
 
71
72
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
72
73
  dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
@@ -75,6 +76,7 @@ self.body(func(inputs) {
75
76
  shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
76
77
  shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
77
78
  shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
79
+ shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
78
80
 
79
81
  // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
80
82
  mixcrResults := llPFrames.aggregate(
@@ -94,6 +96,9 @@ self.body(func(inputs) {
94
96
  }, {
95
97
  name: "treeNodesWithClones",
96
98
  type: "Resource"
99
+ }, {
100
+ name: "treeNodesUniqueIsotype",
101
+ type: "Resource"
97
102
  }, {
98
103
  name: "tsvs",
99
104
  type: "Resource"
@@ -120,9 +125,12 @@ self.body(func(inputs) {
120
125
  false,
121
126
  // inputs
122
127
  {
128
+ shmTreeTableOptions: shmTreeTableOptions,
123
129
  shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
124
130
  shmTreeNodesTableOptions: shmTreeNodesTableOptions,
131
+ shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
125
132
  shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
133
+ shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
126
134
  globalParams: maps.merge(
127
135
  inputs.params,
128
136
  { datasetTypes: datasetTypes }
@@ -158,6 +166,13 @@ self.body(func(inputs) {
158
166
  additionalArgsForImportTsv
159
167
  )
160
168
 
169
+ treeNodesUniqueIsotype := xsv.importFileMap(
170
+ mixcrResults.output("treeNodesUniqueIsotype"),
171
+ "tsv",
172
+ shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
173
+ additionalArgsForImportTsv
174
+ )
175
+
161
176
  // Running SOI search for the data
162
177
  soiNodesResults := {}
163
178
  soiTreesResults := {}
@@ -206,6 +221,8 @@ self.body(func(inputs) {
206
221
  treeNodes: pframes.exportFrame(treeNodes),
207
222
  // combine columns into pFrame
208
223
  treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
224
+ // combine columns into pFrame
225
+ treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
209
226
 
210
227
  soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
211
228
  soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
@@ -9,18 +9,21 @@ json := import("json")
9
9
 
10
10
  // for usage in aggregate function, we should specify all outputs that will be used
11
11
  self.defineOutputs(
12
- "trees", "treeNodes", "treeNodesWithClones",
12
+ "trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype",
13
13
  "tsvs",
14
14
  "allelesLog", "treesLog",
15
15
  "allelesReport", "treesReport",
16
16
  "allelesReportJson", "treesReportJson"
17
17
  )
18
18
 
19
+ //// import function for aggregating by-nodes output to make it uniquely addressable by it's native key
20
+ tablesAggregation := import(":tablesAggregation")
21
+
19
22
  // import MiXCR as a software to use
20
23
  mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
21
24
 
22
25
  // used to postprocess some tables
23
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-paggregate:main")
26
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
24
27
 
25
28
  // env for MiXCR to format progress messages
26
29
  progressPrefix := "[==PROGRESS==]"
@@ -148,7 +151,7 @@ self.body(func(inputs) {
148
151
 
149
152
  shmTrees := shmTreesCmdBuilder.run()
150
153
  outputShmt := shmTrees.getFile("output.shmt")
151
-
154
+
152
155
  // export trees without nodes
153
156
  shmTreeExportsCmdBuilder := exec.builder().
154
157
  printErrStreamToStdout().
@@ -168,8 +171,9 @@ self.body(func(inputs) {
168
171
  saveFile("output.tsv")
169
172
 
170
173
  shmTreeExports := shmTreeExportsCmdBuilder.run()
171
- shmTreeTsv := shmTreeExports.getFile("output.tsv")
174
+ shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
172
175
 
176
+ shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, inputs.shmTreeTableOptions.pfconvParams, "max_by", "totalReadsCountInTree")
173
177
 
174
178
  // export tree nodes with data uniq for the node
175
179
  shmTreeNodesExportsCmdBuilder := exec.builder().
@@ -192,39 +196,7 @@ self.body(func(inputs) {
192
196
  shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
193
197
  shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
194
198
 
195
- // aggregating by-nodes output to make it uniquely addressable by it's native key
196
-
197
- aggregations := []
198
- for col in inputs.shmTreeNodesTableOptions.pfconvParams.columns {
199
- aggregations = append(aggregations, {
200
- type: "first",
201
- src: col.column,
202
- dst: col.column
203
- })
204
- }
205
-
206
- keyColumns := []
207
- for axis in inputs.shmTreeNodesTableOptions.pfconvParams.axes {
208
- keyColumns = append(keyColumns, axis.column)
209
- }
210
-
211
- aggregationWorkflow := { steps: [ {
212
- type: "aggregate",
213
- groupBy: keyColumns,
214
- aggregations: aggregations
215
- } ] }
216
-
217
- aggregateCmd := exec.builder().
218
- printErrStreamToStdout().
219
- software(paggregateSw).
220
- arg("--workflow").arg("wf.json").
221
- writeFile("wf.json", json.encode(aggregationWorkflow)).
222
- arg("input.tsv").addFile("input.tsv", shmTreeNodesTsvRaw).
223
- arg("output.tsv").saveFile("output.tsv").
224
- run()
225
-
226
- shmTreeNodesTsv := aggregateCmd.getFile("output.tsv")
227
-
199
+ shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams, "first")
228
200
 
229
201
  // export nodes with clones. For each node could be several clones
230
202
  shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
@@ -248,17 +220,22 @@ self.body(func(inputs) {
248
220
  saveFile("output.tsv")
249
221
 
250
222
  shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
251
- shmTreeNodesWithClonesTsv := shmTreeNodesWithClonesExports.getFile("output.tsv")
223
+ shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
224
+
225
+ shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams, "max_by", "readCount")
226
+ shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, inputs.shmTreeNodesUniqueIsotypeTableOptions.pfconvParams, "max_by", "readCount")
252
227
 
253
228
  return {
254
229
  trees: shmTreeTsv,
255
230
  treeNodes: shmTreeNodesTsv,
256
231
  treeNodesWithClones: shmTreeNodesWithClonesTsv,
232
+ treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv,
257
233
 
258
234
  tsvs: {
259
235
  trees: shmTreeTsv,
260
236
  treeNodes: shmTreeNodesTsv,
261
- treeNodesWithClones: shmTreeNodesWithClonesTsv
237
+ treeNodesWithClones: shmTreeNodesWithClonesTsv,
238
+ treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
262
239
  },
263
240
 
264
241
  allelesLog: alleles.getStdoutStream(),
package/src/soi.tpl.tengo CHANGED
@@ -12,7 +12,7 @@ self.defineOutputs("nodesResult", "treesResult")
12
12
 
13
13
  // import MiXCR as a software to use
14
14
  mitoolSw := assets.importSoftware("@platforma-open/milaboratories.software-mitool:main")
15
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-paggregate:main")
15
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
16
16
 
17
17
  inferPartitionKeyLength := func(data) {
18
18
  rType := data.info().Type.Name
@@ -0,0 +1,75 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ json := import("json")
5
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
6
+
7
+ //// aggregating by-nodes output to make it uniquely addressable by it's native key
8
+ ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
9
+ keyColumns := []
10
+ for axis in pfConvParams.axes {
11
+ keyColumns = append(keyColumns, axis.column)
12
+ }
13
+
14
+ aggregationWorkflow := undefined
15
+ if len(aggParams) > 1 {
16
+ pickCols := []
17
+ for col in pfConvParams.columns {
18
+ pickCols = append(pickCols, [
19
+ col.column,
20
+ col.column
21
+ ]
22
+ )
23
+ }
24
+
25
+ rankingCol := aggParams[1]
26
+ aggregationWorkflow = {
27
+ steps: [
28
+ {
29
+ type: "aggregate",
30
+ groupBy: keyColumns,
31
+ aggregations: [
32
+ {
33
+ type: aggParams[0],
34
+ rankingCol: rankingCol,
35
+ pickCols: pickCols
36
+ }
37
+ ]
38
+ }
39
+ ]
40
+ }
41
+ } else {
42
+ aggregations := []
43
+ for col in pfConvParams.columns {
44
+ aggregations = append(aggregations, {
45
+ type: aggParams[0],
46
+ src: col.column,
47
+ dst: col.column
48
+ })
49
+ }
50
+
51
+ aggregationWorkflow = { steps: [
52
+ {
53
+ type: "aggregate",
54
+ groupBy: keyColumns,
55
+ aggregations: aggregations
56
+ }
57
+ ]
58
+ }
59
+ }
60
+
61
+ aggregateCmd := exec.builder().
62
+ printErrStreamToStdout().
63
+ software(paggregateSw).
64
+ arg("--workflow").arg("wf.json").
65
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
66
+ arg("input.tsv").addFile("input.tsv", inputTsv).
67
+ arg("output.tsv").saveFile("output.tsv").
68
+ run()
69
+
70
+ return aggregateCmd.getFile("output.tsv")
71
+ }
72
+
73
+ export ll.toStrict({
74
+ ensureUniqueness: ensureUniqueness
75
+ })