@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,12 @@ json := import("json")
12
12
  exportSettings := import(":export-settings")
13
13
  prepareDonorColumn := import(":prepare-donor-column")
14
14
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
15
+ tablesAggregation := import(":tables-aggregation")
15
16
 
16
- reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
17
+ // reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
18
+
19
+ mixcrShmTreesTpl := assets.importTemplate(":mixcr-shm-trees")
20
+ mixcrExportTpl := assets.importTemplate(":mixcr-export")
17
21
 
18
22
  self.awaitState("datasets", { wildcard: "*" }, "AllInputsSet")
19
23
  // this templete should run only after resolving of all inputs
@@ -31,6 +35,7 @@ self.awaitState("library", { match: "^data$" }, "AllInputsSet") // change to Inp
31
35
 
32
36
  self.awaitState("donorColumn", "ResourceReady")
33
37
  self.awaitState("params", "ResourceReady")
38
+ self.awaitState("etc", "ResourceReady")
34
39
 
35
40
  soiTpl := assets.importTemplate(":soi")
36
41
 
@@ -45,7 +50,11 @@ self.body(func(inputs) {
45
50
  cellsAssembled: false
46
51
  }
47
52
 
53
+ blockId := inputs.etc.blockId
54
+ firstDatasetSpec := inputs.etc.firstDatasetSpec
55
+
48
56
  library := inputs.library
57
+ libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
49
58
 
50
59
  // clonotypingBlockId -> "bulk" | "sc"
51
60
  datasetTypes := {}
@@ -81,77 +90,142 @@ self.body(func(inputs) {
81
90
  dataDescription.assemblingFeature = assemblingFeature
82
91
 
83
92
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
84
- dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
85
-
86
- // collect params for running export commands and to parse result tsv files into pColumns
87
- shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
88
- shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
89
- shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
90
- shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
91
-
92
- libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
93
-
94
- // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
95
- mixcrResults := llPFrames.aggregate(
96
- // files to iterate through
97
- dataGroupedByDonorId.data,
98
- // columns not to combine - sampleId and mixcrBlockId
99
- [1, 2],
100
- reconstructShmTreesTpl,
101
- // all the outputs that should be gethered
102
- [
103
- {
104
- name: "trees",
105
- type: "Resource"
106
- }, {
107
- name: "treeNodes",
108
- type: "Resource"
109
- }, {
110
- name: "treeNodesWithClones",
111
- type: "Resource"
112
- }, {
113
- name: "treeNodesUniqueIsotype",
114
- type: "Resource"
115
- }, {
116
- name: "tsvs",
117
- type: "Resource"
118
- }, {
119
- name: "allelesLog",
120
- type: "Resource"
121
- }, {
122
- name: "treesLog",
123
- type: "Resource"
124
- }, {
125
- name: "allelesReport",
126
- type: "Resource"
127
- }, {
128
- name: "treesReport",
129
- type: "Resource"
130
- }, {
131
- name: "allelesReportJson",
132
- type: "Resource"
133
- }, {
134
- name: "treesReportJson",
135
- type: "Resource"
93
+ dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets, firstDatasetSpec)
94
+
95
+ shmtResults := pframes.processColumn(
96
+ dataGroupedByDonorId,
97
+ mixcrShmTreesTpl,
98
+ [ {
99
+ name: "alleles",
100
+ type: "Resource",
101
+ spec: {
102
+ kind: "PColumn",
103
+ name: "mixcr.com/shmt/alleles",
104
+ valueType: "FileMap"
105
+ }
106
+ }, {
107
+ name: "downsampled",
108
+ type: "Resource",
109
+ spec: {
110
+ kind: "PColumn",
111
+ name: "mixcr.com/shmt/downsampled",
112
+ valueType: "FileMap"
113
+ }
114
+ }, {
115
+ name: "shmt",
116
+ type: "Resource",
117
+ spec: {
118
+ kind: "PColumn",
119
+ name: "mixcr.com/shmt",
120
+ valueType: "File"
121
+ }
122
+ }, {
123
+ name: "allelesLog",
124
+ type: "Resource",
125
+ spec: {
126
+ kind: "PColumn",
127
+ name: "mixcr.com/shmt/allelesLog",
128
+ valueType: "Log"
129
+ }
130
+ }, {
131
+ name: "treesLog",
132
+ type: "Resource",
133
+ spec: {
134
+ kind: "PColumn",
135
+ name: "mixcr.com/shmt/treesLog",
136
+ valueType: "Log"
137
+ }
138
+ }, {
139
+ name: "allelesReport",
140
+ type: "Resource",
141
+ spec: {
142
+ kind: "PColumn",
143
+ name: "mixcr.com/shmt/allelesReport",
144
+ valueType: "File"
145
+ }
146
+ }, {
147
+ name: "treesReport",
148
+ type: "Resource",
149
+ spec: {
150
+ kind: "PColumn",
151
+ name: "mixcr.com/shmt/treesReport",
152
+ valueType: "File"
153
+ }
154
+ }, {
155
+ name: "allelesReportJson",
156
+ type: "Resource",
157
+ spec: {
158
+ kind: "PColumn",
159
+ name: "mixcr.com/shmt/allelesReportJson",
160
+ valueType: "File"
136
161
  }
137
- ],
138
- false,
139
- // inputs
162
+ }, {
163
+ name: "treesReportJson",
164
+ type: "Resource",
165
+ spec: {
166
+ kind: "PColumn",
167
+ name: "mixcr.com/shmt/treesReportJson",
168
+ valueType: "File"
169
+ }
170
+ } ],
140
171
  {
141
- shmTreeTableOptions: shmTreeTableOptions,
142
- shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
143
- shmTreeNodesTableOptions: shmTreeNodesTableOptions,
144
- shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
145
- shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
146
- shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
147
- library: is_undefined(library) ? smart.createNullResource() : library.data,
148
- globalParams: maps.merge(
149
- inputs.params,
150
- {
172
+ aggregate: [1, 2],
173
+ traceSteps: [{type: "milaboratories.mixcr-shm-trees", id: blockId, importance: 19, label: "SHM Trees"}],
174
+ extra: {
175
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
176
+ globalParams: maps.merge(inputs.params, {
151
177
  datasetTypes: datasetTypes,
152
178
  libraryFormat: libraryFormat
179
+ })
180
+ }
181
+ }
182
+ )
183
+
184
+ // collect params for running export commands and to parse result tsv files into pColumns
185
+ shmTreeTableOptions := exportSettings.shmTree(dataDescription)
186
+ shmTreeNodesTableOptions := exportSettings.shmTreeNodes(dataDescription)
187
+ shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClones(dataDescription, inputs.donorColumn)
188
+ shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotype(dataDescription)
189
+
190
+ exportResults := pframes.processColumn(
191
+ shmtResults.output("shmt"),
192
+ mixcrExportTpl,
193
+ [ {
194
+ type: "Xsv",
195
+ xsvType: "tsv",
196
+ settings: shmTreeTableOptions.pfconvParams,
197
+ name: "trees"
198
+ }, {
199
+ type: "Xsv",
200
+ xsvType: "tsv",
201
+ settings: shmTreeNodesTableOptions.pfconvParams,
202
+ name: "treeNodes"
203
+ }, {
204
+ type: "Xsv",
205
+ xsvType: "tsv",
206
+ settings: shmTreeNodesWithClonesTableOptions.pfconvParams,
207
+ name: "treeNodesWithClones"
208
+ }, {
209
+ type: "Xsv",
210
+ xsvType: "tsv",
211
+ settings: shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
212
+ name: "treeNodesUniqueIsotype"
213
+ } ],
214
+ {
215
+ extra: {
216
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
217
+ params:{
218
+ libraryFormat: libraryFormat,
219
+ shmTreeArgs: shmTreeTableOptions.cmdArgs,
220
+ shmTreeNodesArgs: shmTreeNodesTableOptions.cmdArgs,
221
+ shmTreeNodesWithClonesArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
222
+ shmTreeNodesUniqueIsotypeArgs: shmTreeNodesUniqueIsotypeTableOptions.cmdArgs,
223
+ shmTreeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeTableOptions.pfconvParams),
224
+ shmTreeNodesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesTableOptions.pfconvParams),
225
+ shmTreeNodesWithClonesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesWithClonesTableOptions.pfconvParams),
226
+ shmTreeNodesUniqueIsotypeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesUniqueIsotypeTableOptions.pfconvParams)
153
227
  }
154
- )
228
+ }
155
229
  }
156
230
  )
157
231
 
@@ -162,33 +236,13 @@ self.body(func(inputs) {
162
236
 
163
237
  // ll.print("__THE_LOG__ " + string(json.encode(additionalArgsForImportTsv)))
164
238
 
165
- trees := xsv.importFileMap(
166
- mixcrResults.output("trees"),
167
- "tsv",
168
- shmTreeTableOptions.pfconvParams,
169
- additionalArgsForImportTsv
170
- )
171
-
172
- treeNodes := xsv.importFileMap(
173
- mixcrResults.output("treeNodes"),
174
- "tsv",
175
- shmTreeNodesTableOptions.pfconvParams,
176
- additionalArgsForImportTsv
177
- )
178
-
179
- treeNodesWithClones := xsv.importFileMap(
180
- mixcrResults.output("treeNodesWithClones"),
181
- "tsv",
182
- shmTreeNodesWithClonesTableOptions.pfconvParams,
183
- additionalArgsForImportTsv
184
- )
185
-
186
- treeNodesUniqueIsotype := xsv.importFileMap(
187
- mixcrResults.output("treeNodesUniqueIsotype"),
188
- "tsv",
189
- shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
190
- additionalArgsForImportTsv
191
- )
239
+ trees := exportResults.xsvOutputFrame("trees")
240
+
241
+ treeNodes := exportResults.xsvOutputFrame("treeNodes")
242
+
243
+ treeNodesWithClones := exportResults.xsvOutputFrame("treeNodesWithClones")
244
+
245
+ treeNodesUniqueIsotype := exportResults.xsvOutputFrame("treeNodesUniqueIsotype")
192
246
 
193
247
  // Running SOI search for the data
194
248
  soiNodesResults := {}
@@ -229,29 +283,25 @@ self.body(func(inputs) {
229
283
  soiTreesResults[soiDb.parameters.id] = soiResult.output("treesResult")
230
284
  }
231
285
 
232
- tsvs := mixcrResults.output("tsvs")
233
-
234
286
  return {
235
- // combine columns into pFrame
236
- trees: pframes.exportFrame(trees),
237
- // combine columns into pFrame
238
- treeNodes: pframes.exportFrame(treeNodes),
239
- // combine columns into pFrame
240
- treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
241
- // combine columns into pFrame
242
- treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
287
+ alleles: shmtResults.output("alleles"),
288
+ downsampled: shmtResults.output("downsampled"),
289
+ shmt: shmtResults.output("shmt"),
290
+
291
+ trees: trees,
292
+ treeNodes: treeNodes,
293
+ treeNodesWithClones: treeNodesWithClones,
294
+ treeNodesUniqueIsotype: treeNodesUniqueIsotype,
243
295
 
244
296
  soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
245
297
  soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
246
298
 
247
- tsvs: tsvs,
248
-
249
- allelesLogs: mixcrResults.output("allelesLog"),
250
- treesLogs: mixcrResults.output("treesLog"),
299
+ allelesLogs: shmtResults.outputData("allelesLog"),
300
+ treesLogs: shmtResults.outputData("treesLog"),
251
301
 
252
- allelesReports: mixcrResults.output("allelesReport"),
253
- treesReports: mixcrResults.output("treesReport"),
254
- allelesReportsJson: mixcrResults.output("allelesReportJson"),
255
- treesReportsJson: mixcrResults.output("treesReportJson")
302
+ allelesReports: shmtResults.outputData("allelesReport"),
303
+ treesReports: shmtResults.outputData("treesReport"),
304
+ allelesReportsJson: shmtResults.outputData("allelesReportJson"),
305
+ treesReportsJson: shmtResults.outputData("treesReportJson")
256
306
  }
257
307
  })
@@ -0,0 +1,81 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ json := import("json")
6
+
7
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
8
+
9
+ ensureUniquenessParamsFromPconvParams := func(pfConvParams) {
10
+ return {
11
+ axes: slices.map(pfConvParams.axes, func(axis) {
12
+ return axis.column
13
+ }),
14
+ columns: slices.map(pfConvParams.columns, func(col) {
15
+ return col.column
16
+ })
17
+ }
18
+ }
19
+
20
+ /** Aggregating by-nodes output to make it uniquely addressable by it's native key */
21
+ ensureUniqueness := func(inputTsv, params, ...aggParams) {
22
+ keyColumns := params.axes
23
+ pickCols := params.columns
24
+
25
+ aggregationWorkflow := undefined
26
+ if len(aggParams) > 1 {
27
+ pickCols := []
28
+ for col in pickCols {
29
+ pickCols = append(pickCols, [
30
+ col,
31
+ col
32
+ ]
33
+ )
34
+ }
35
+
36
+ rankingCol := aggParams[1]
37
+ aggregationWorkflow = {
38
+ steps: [ {
39
+ type: "aggregate",
40
+ groupBy: keyColumns,
41
+ aggregations: [ {
42
+ type: aggParams[0],
43
+ rankingCol: rankingCol,
44
+ pickCols: pickCols
45
+ } ]
46
+ } ]
47
+ }
48
+ } else {
49
+ aggregations := []
50
+ for col in pickCols {
51
+ aggregations = append(aggregations, {
52
+ type: aggParams[0],
53
+ src: col,
54
+ dst: col
55
+ })
56
+ }
57
+
58
+ aggregationWorkflow = { steps: [ {
59
+ type: "aggregate",
60
+ groupBy: keyColumns,
61
+ aggregations: aggregations
62
+ } ]
63
+ }
64
+ }
65
+
66
+ aggregateCmd := exec.builder().
67
+ printErrStreamToStdout().
68
+ software(paggregateSw).
69
+ arg("--workflow").arg("wf.json").
70
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
71
+ arg("input.tsv").addFile("input.tsv", inputTsv).
72
+ arg("output.tsv").saveFile("output.tsv").
73
+ run()
74
+
75
+ return aggregateCmd.getFile("output.tsv")
76
+ }
77
+
78
+ export ll.toStrict({
79
+ ensureUniqueness: ensureUniqueness,
80
+ ensureUniquenessParamsFromPconvParams: ensureUniquenessParamsFromPconvParams
81
+ })
@@ -1,75 +0,0 @@
1
- ll := import("@platforma-sdk/workflow-tengo:ll")
2
- exec := import("@platforma-sdk/workflow-tengo:exec")
3
- assets := import("@platforma-sdk/workflow-tengo:assets")
4
- json := import("json")
5
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
6
-
7
- //// aggregating by-nodes output to make it uniquely addressable by it's native key
8
- ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
9
- keyColumns := []
10
- for axis in pfConvParams.axes {
11
- keyColumns = append(keyColumns, axis.column)
12
- }
13
-
14
- aggregationWorkflow := undefined
15
- if len(aggParams) > 1 {
16
- pickCols := []
17
- for col in pfConvParams.columns {
18
- pickCols = append(pickCols, [
19
- col.column,
20
- col.column
21
- ]
22
- )
23
- }
24
-
25
- rankingCol := aggParams[1]
26
- aggregationWorkflow = {
27
- steps: [
28
- {
29
- type: "aggregate",
30
- groupBy: keyColumns,
31
- aggregations: [
32
- {
33
- type: aggParams[0],
34
- rankingCol: rankingCol,
35
- pickCols: pickCols
36
- }
37
- ]
38
- }
39
- ]
40
- }
41
- } else {
42
- aggregations := []
43
- for col in pfConvParams.columns {
44
- aggregations = append(aggregations, {
45
- type: aggParams[0],
46
- src: col.column,
47
- dst: col.column
48
- })
49
- }
50
-
51
- aggregationWorkflow = { steps: [
52
- {
53
- type: "aggregate",
54
- groupBy: keyColumns,
55
- aggregations: aggregations
56
- }
57
- ]
58
- }
59
- }
60
-
61
- aggregateCmd := exec.builder().
62
- printErrStreamToStdout().
63
- software(paggregateSw).
64
- arg("--workflow").arg("wf.json").
65
- writeFile("wf.json", json.encode(aggregationWorkflow)).
66
- arg("input.tsv").addFile("input.tsv", inputTsv).
67
- arg("output.tsv").saveFile("output.tsv").
68
- run()
69
-
70
- return aggregateCmd.getFile("output.tsv")
71
- }
72
-
73
- export ll.toStrict({
74
- ensureUniqueness: ensureUniqueness
75
- })