@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,12 @@ json := import("json")
12
12
  exportSettings := import(":export-settings")
13
13
  prepareDonorColumn := import(":prepare-donor-column")
14
14
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
15
+ tablesAggregation := import(":tables-aggregation")
15
16
 
16
- reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
17
+ // reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
18
+
19
+ mixcrShmTreesTpl := assets.importTemplate(":mixcr-shm-trees")
20
+ mixcrExportTpl := assets.importTemplate(":mixcr-export")
17
21
 
18
22
  self.awaitState("datasets", { wildcard: "*" }, "AllInputsSet")
19
23
  // this templete should run only after resolving of all inputs
@@ -31,6 +35,7 @@ self.awaitState("library", { match: "^data$" }, "AllInputsSet") // change to Inp
31
35
 
32
36
  self.awaitState("donorColumn", "ResourceReady")
33
37
  self.awaitState("params", "ResourceReady")
38
+ self.awaitState("etc", "ResourceReady")
34
39
 
35
40
  soiTpl := assets.importTemplate(":soi")
36
41
 
@@ -45,7 +50,11 @@ self.body(func(inputs) {
45
50
  cellsAssembled: false
46
51
  }
47
52
 
53
+ blockId := inputs.etc.blockId
54
+ firstDatasetSpec := inputs.etc.firstDatasetSpec
55
+
48
56
  library := inputs.library
57
+ libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
49
58
 
50
59
  // clonotypingBlockId -> "bulk" | "sc"
51
60
  datasetTypes := {}
@@ -81,77 +90,142 @@ self.body(func(inputs) {
81
90
  dataDescription.assemblingFeature = assemblingFeature
82
91
 
83
92
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
84
- dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
85
-
86
- // collect params for running export commands and to parse result tsv files into pColumns
87
- shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
88
- shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
89
- shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
90
- shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
91
-
92
- libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
93
-
94
- // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
95
- mixcrResults := llPFrames.aggregate(
96
- // files to iterate through
97
- dataGroupedByDonorId.data,
98
- // columns not to combine - sampleId and mixcrBlockId
99
- [1, 2],
100
- reconstructShmTreesTpl,
101
- // all the outputs that should be gethered
102
- [
103
- {
104
- name: "trees",
105
- type: "Resource"
106
- }, {
107
- name: "treeNodes",
108
- type: "Resource"
109
- }, {
110
- name: "treeNodesWithClones",
111
- type: "Resource"
112
- }, {
113
- name: "treeNodesUniqueIsotype",
114
- type: "Resource"
115
- }, {
116
- name: "tsvs",
117
- type: "Resource"
118
- }, {
119
- name: "allelesLog",
120
- type: "Resource"
121
- }, {
122
- name: "treesLog",
123
- type: "Resource"
124
- }, {
125
- name: "allelesReport",
126
- type: "Resource"
127
- }, {
128
- name: "treesReport",
129
- type: "Resource"
130
- }, {
131
- name: "allelesReportJson",
132
- type: "Resource"
133
- }, {
134
- name: "treesReportJson",
135
- type: "Resource"
93
+ dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets, firstDatasetSpec)
94
+
95
+ shmtResults := pframes.processColumn(
96
+ dataGroupedByDonorId,
97
+ mixcrShmTreesTpl,
98
+ [ {
99
+ name: "alleles",
100
+ type: "Resource",
101
+ spec: {
102
+ kind: "PColumn",
103
+ name: "mixcr.com/shmt/alleles",
104
+ valueType: "FileMap"
105
+ }
106
+ }, {
107
+ name: "downsampled",
108
+ type: "Resource",
109
+ spec: {
110
+ kind: "PColumn",
111
+ name: "mixcr.com/shmt/downsampled",
112
+ valueType: "FileMap"
113
+ }
114
+ }, {
115
+ name: "shmt",
116
+ type: "Resource",
117
+ spec: {
118
+ kind: "PColumn",
119
+ name: "mixcr.com/shmt",
120
+ valueType: "File"
121
+ }
122
+ }, {
123
+ name: "allelesLog",
124
+ type: "Resource",
125
+ spec: {
126
+ kind: "PColumn",
127
+ name: "mixcr.com/shmt/allelesLog",
128
+ valueType: "Log"
129
+ }
130
+ }, {
131
+ name: "treesLog",
132
+ type: "Resource",
133
+ spec: {
134
+ kind: "PColumn",
135
+ name: "mixcr.com/shmt/treesLog",
136
+ valueType: "Log"
137
+ }
138
+ }, {
139
+ name: "allelesReport",
140
+ type: "Resource",
141
+ spec: {
142
+ kind: "PColumn",
143
+ name: "mixcr.com/shmt/allelesReport",
144
+ valueType: "File"
145
+ }
146
+ }, {
147
+ name: "treesReport",
148
+ type: "Resource",
149
+ spec: {
150
+ kind: "PColumn",
151
+ name: "mixcr.com/shmt/treesReport",
152
+ valueType: "File"
153
+ }
154
+ }, {
155
+ name: "allelesReportJson",
156
+ type: "Resource",
157
+ spec: {
158
+ kind: "PColumn",
159
+ name: "mixcr.com/shmt/allelesReportJson",
160
+ valueType: "File"
161
+ }
162
+ }, {
163
+ name: "treesReportJson",
164
+ type: "Resource",
165
+ spec: {
166
+ kind: "PColumn",
167
+ name: "mixcr.com/shmt/treesReportJson",
168
+ valueType: "File"
136
169
  }
137
- ],
138
- false,
139
- // inputs
170
+ } ],
140
171
  {
141
- shmTreeTableOptions: shmTreeTableOptions,
142
- shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
143
- shmTreeNodesTableOptions: shmTreeNodesTableOptions,
144
- shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
145
- shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
146
- shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
147
- library: is_undefined(library) ? smart.createNullResource() : library.data,
148
- globalParams: maps.merge(
149
- inputs.params,
150
- {
172
+ aggregate: [1, 2],
173
+ traceSteps: [{type: "milaboratories.mixcr-shm-trees", id: blockId, importance: 19, label: "SHM Trees"}],
174
+ extra: {
175
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
176
+ globalParams: maps.merge(inputs.params, {
151
177
  datasetTypes: datasetTypes,
152
178
  libraryFormat: libraryFormat
179
+ })
180
+ }
181
+ }
182
+ )
183
+
184
+ // collect params for running export commands and to parse result tsv files into pColumns
185
+ shmTreeTableOptions := exportSettings.shmTree(dataDescription)
186
+ shmTreeNodesTableOptions := exportSettings.shmTreeNodes(dataDescription)
187
+ shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClones(dataDescription, inputs.donorColumn)
188
+ shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotype(dataDescription)
189
+
190
+ exportResults := pframes.processColumn(
191
+ shmtResults.output("shmt"),
192
+ mixcrExportTpl,
193
+ [ {
194
+ type: "Xsv",
195
+ xsvType: "tsv",
196
+ settings: shmTreeTableOptions.pfconvParams,
197
+ name: "trees"
198
+ }, {
199
+ type: "Xsv",
200
+ xsvType: "tsv",
201
+ settings: shmTreeNodesTableOptions.pfconvParams,
202
+ name: "treeNodes"
203
+ }, {
204
+ type: "Xsv",
205
+ xsvType: "tsv",
206
+ settings: shmTreeNodesWithClonesTableOptions.pfconvParams,
207
+ name: "treeNodesWithClones"
208
+ }, {
209
+ type: "Xsv",
210
+ xsvType: "tsv",
211
+ settings: shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
212
+ name: "treeNodesUniqueIsotype"
213
+ } ],
214
+ {
215
+ extra: {
216
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
217
+ params:{
218
+ libraryFormat: libraryFormat,
219
+ shmTreeArgs: shmTreeTableOptions.cmdArgs,
220
+ shmTreeNodesArgs: shmTreeNodesTableOptions.cmdArgs,
221
+ shmTreeNodesWithClonesArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
222
+ shmTreeNodesUniqueIsotypeArgs: shmTreeNodesUniqueIsotypeTableOptions.cmdArgs,
223
+ shmTreeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeTableOptions.pfconvParams),
224
+ shmTreeNodesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesTableOptions.pfconvParams),
225
+ shmTreeNodesWithClonesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesWithClonesTableOptions.pfconvParams),
226
+ shmTreeNodesUniqueIsotypeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesUniqueIsotypeTableOptions.pfconvParams)
153
227
  }
154
- )
228
+ }
155
229
  }
156
230
  )
157
231
 
@@ -162,34 +236,6 @@ self.body(func(inputs) {
162
236
 
163
237
  // ll.print("__THE_LOG__ " + string(json.encode(additionalArgsForImportTsv)))
164
238
 
165
- trees := xsv.importFileMap(
166
- mixcrResults.output("trees"),
167
- "tsv",
168
- shmTreeTableOptions.pfconvParams,
169
- additionalArgsForImportTsv
170
- )
171
-
172
- treeNodes := xsv.importFileMap(
173
- mixcrResults.output("treeNodes"),
174
- "tsv",
175
- shmTreeNodesTableOptions.pfconvParams,
176
- additionalArgsForImportTsv
177
- )
178
-
179
- treeNodesWithClones := xsv.importFileMap(
180
- mixcrResults.output("treeNodesWithClones"),
181
- "tsv",
182
- shmTreeNodesWithClonesTableOptions.pfconvParams,
183
- additionalArgsForImportTsv
184
- )
185
-
186
- treeNodesUniqueIsotype := xsv.importFileMap(
187
- mixcrResults.output("treeNodesUniqueIsotype"),
188
- "tsv",
189
- shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
190
- additionalArgsForImportTsv
191
- )
192
-
193
239
  // Running SOI search for the data
194
240
  soiNodesResults := {}
195
241
  soiTreesResults := {}
@@ -216,8 +262,8 @@ self.body(func(inputs) {
216
262
  ll.panic("unknown target feature: " + soiDb.parameters.targetFeature)
217
263
  }
218
264
 
219
- querySpec := treeNodes[columnId + ".spec"]
220
- queryData := treeNodes[columnId + ".data"]
265
+ querySpec := exportResults.outputSpec("treeNodes", columnId)
266
+ queryData := exportResults.outputData("treeNodes", columnId)
221
267
 
222
268
  soiResult := render.create(soiTpl, {
223
269
  querySpec: querySpec,
@@ -229,29 +275,34 @@ self.body(func(inputs) {
229
275
  soiTreesResults[soiDb.parameters.id] = soiResult.output("treesResult")
230
276
  }
231
277
 
232
- tsvs := mixcrResults.output("tsvs")
278
+
279
+ trees := exportResults.xsvOutputFrame("trees")
280
+
281
+ treeNodes := exportResults.xsvOutputFrame("treeNodes")
282
+
283
+ treeNodesWithClones := exportResults.xsvOutputFrame("treeNodesWithClones")
284
+
285
+ treeNodesUniqueIsotype := exportResults.xsvOutputFrame("treeNodesUniqueIsotype")
233
286
 
234
287
  return {
235
- // combine columns into pFrame
236
- trees: pframes.exportFrame(trees),
237
- // combine columns into pFrame
238
- treeNodes: pframes.exportFrame(treeNodes),
239
- // combine columns into pFrame
240
- treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
241
- // combine columns into pFrame
242
- treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
288
+ alleles: shmtResults.output("alleles"),
289
+ downsampled: shmtResults.output("downsampled"),
290
+ shmt: shmtResults.output("shmt"),
291
+
292
+ trees: trees,
293
+ treeNodes: treeNodes,
294
+ treeNodesWithClones: treeNodesWithClones,
295
+ treeNodesUniqueIsotype: treeNodesUniqueIsotype,
243
296
 
244
297
  soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
245
298
  soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
246
299
 
247
- tsvs: tsvs,
248
-
249
- allelesLogs: mixcrResults.output("allelesLog"),
250
- treesLogs: mixcrResults.output("treesLog"),
300
+ allelesLogs: shmtResults.outputData("allelesLog"),
301
+ treesLogs: shmtResults.outputData("treesLog"),
251
302
 
252
- allelesReports: mixcrResults.output("allelesReport"),
253
- treesReports: mixcrResults.output("treesReport"),
254
- allelesReportsJson: mixcrResults.output("allelesReportJson"),
255
- treesReportsJson: mixcrResults.output("treesReportJson")
303
+ allelesReports: shmtResults.outputData("allelesReport"),
304
+ treesReports: shmtResults.outputData("treesReport"),
305
+ allelesReportsJson: shmtResults.outputData("allelesReportJson"),
306
+ treesReportsJson: shmtResults.outputData("treesReportJson")
256
307
  }
257
308
  })
@@ -0,0 +1,81 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ json := import("json")
6
+
7
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
8
+
9
+ ensureUniquenessParamsFromPconvParams := func(pfConvParams) {
10
+ return {
11
+ axes: slices.map(pfConvParams.axes, func(axis) {
12
+ return axis.column
13
+ }),
14
+ columns: slices.map(pfConvParams.columns, func(col) {
15
+ return col.column
16
+ })
17
+ }
18
+ }
19
+
20
+ /** Aggregating by-nodes output to make it uniquely addressable by it's native key */
21
+ ensureUniqueness := func(inputTsv, params, ...aggParams) {
22
+ keyColumns := params.axes
23
+ pickCols := params.columns
24
+
25
+ aggregationWorkflow := undefined
26
+ if len(aggParams) > 1 {
27
+ pickCols := []
28
+ for col in pickCols {
29
+ pickCols = append(pickCols, [
30
+ col,
31
+ col
32
+ ]
33
+ )
34
+ }
35
+
36
+ rankingCol := aggParams[1]
37
+ aggregationWorkflow = {
38
+ steps: [ {
39
+ type: "aggregate",
40
+ groupBy: keyColumns,
41
+ aggregations: [ {
42
+ type: aggParams[0],
43
+ rankingCol: rankingCol,
44
+ pickCols: pickCols
45
+ } ]
46
+ } ]
47
+ }
48
+ } else {
49
+ aggregations := []
50
+ for col in pickCols {
51
+ aggregations = append(aggregations, {
52
+ type: aggParams[0],
53
+ src: col,
54
+ dst: col
55
+ })
56
+ }
57
+
58
+ aggregationWorkflow = { steps: [ {
59
+ type: "aggregate",
60
+ groupBy: keyColumns,
61
+ aggregations: aggregations
62
+ } ]
63
+ }
64
+ }
65
+
66
+ aggregateCmd := exec.builder().
67
+ printErrStreamToStdout().
68
+ software(paggregateSw).
69
+ arg("--workflow").arg("wf.json").
70
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
71
+ arg("input.tsv").addFile("input.tsv", inputTsv).
72
+ arg("output.tsv").saveFile("output.tsv").
73
+ run()
74
+
75
+ return aggregateCmd.getFile("output.tsv")
76
+ }
77
+
78
+ export ll.toStrict({
79
+ ensureUniqueness: ensureUniqueness,
80
+ ensureUniquenessParamsFromPconvParams: ensureUniquenessParamsFromPconvParams
81
+ })
@@ -1,75 +0,0 @@
1
- ll := import("@platforma-sdk/workflow-tengo:ll")
2
- exec := import("@platforma-sdk/workflow-tengo:exec")
3
- assets := import("@platforma-sdk/workflow-tengo:assets")
4
- json := import("json")
5
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
6
-
7
- //// aggregating by-nodes output to make it uniquely addressable by it's native key
8
- ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
9
- keyColumns := []
10
- for axis in pfConvParams.axes {
11
- keyColumns = append(keyColumns, axis.column)
12
- }
13
-
14
- aggregationWorkflow := undefined
15
- if len(aggParams) > 1 {
16
- pickCols := []
17
- for col in pfConvParams.columns {
18
- pickCols = append(pickCols, [
19
- col.column,
20
- col.column
21
- ]
22
- )
23
- }
24
-
25
- rankingCol := aggParams[1]
26
- aggregationWorkflow = {
27
- steps: [
28
- {
29
- type: "aggregate",
30
- groupBy: keyColumns,
31
- aggregations: [
32
- {
33
- type: aggParams[0],
34
- rankingCol: rankingCol,
35
- pickCols: pickCols
36
- }
37
- ]
38
- }
39
- ]
40
- }
41
- } else {
42
- aggregations := []
43
- for col in pfConvParams.columns {
44
- aggregations = append(aggregations, {
45
- type: aggParams[0],
46
- src: col.column,
47
- dst: col.column
48
- })
49
- }
50
-
51
- aggregationWorkflow = { steps: [
52
- {
53
- type: "aggregate",
54
- groupBy: keyColumns,
55
- aggregations: aggregations
56
- }
57
- ]
58
- }
59
- }
60
-
61
- aggregateCmd := exec.builder().
62
- printErrStreamToStdout().
63
- software(paggregateSw).
64
- arg("--workflow").arg("wf.json").
65
- writeFile("wf.json", json.encode(aggregationWorkflow)).
66
- arg("input.tsv").addFile("input.tsv", inputTsv).
67
- arg("output.tsv").saveFile("output.tsv").
68
- run()
69
-
70
- return aggregateCmd.getFile("output.tsv")
71
- }
72
-
73
- export ll.toStrict({
74
- ensureUniqueness: ensureUniqueness
75
- })