@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.6.1 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,12 @@ json := import("json")
12
12
  exportSettings := import(":export-settings")
13
13
  prepareDonorColumn := import(":prepare-donor-column")
14
14
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
15
+ tablesAggregation := import(":tables-aggregation")
15
16
 
16
- reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
17
+ // reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
18
+
19
+ mixcrShmTreesTpl := assets.importTemplate(":mixcr-shm-trees")
20
+ mixcrExportTpl := assets.importTemplate(":mixcr-export")
17
21
 
18
22
  self.awaitState("datasets", { wildcard: "*" }, "AllInputsSet")
19
23
  // this templete should run only after resolving of all inputs
@@ -21,10 +25,17 @@ self.awaitState("datasets", { wildcard: "*" }, "AllInputsSet")
21
25
  self.awaitState("datasets", { wildcard: "*" }, "data", "InputsLocked")
22
26
  // but we need spec already
23
27
  self.awaitState("datasets", { wildcard: "*" }, "spec", "ResourceReady")
24
- self.awaitState("library", "spec", "ResourceReady")
25
- self.awaitState("library", "data", "AllInputsSet") // change to InputsLocked after fix
28
+
29
+ // Using match instead of exact name to make fields effectively optional,
30
+ // in cases where built-in library is used, null resource is passed as library.
31
+ // Match don't throw error if no fields matched, opposite to exact name.
32
+ self.awaitState("library", { match: "^spec$" }, "ResourceReady")
33
+ self.awaitState("library", { match: "^data$" }, "AllInputsSet") // change to InputsLocked after fix
34
+ // self.awaitState({ match: "^library$" }, "BQueryResultMulti")
35
+
26
36
  self.awaitState("donorColumn", "ResourceReady")
27
37
  self.awaitState("params", "ResourceReady")
38
+ self.awaitState("etc", "ResourceReady")
28
39
 
29
40
  soiTpl := assets.importTemplate(":soi")
30
41
 
@@ -39,7 +50,11 @@ self.body(func(inputs) {
39
50
  cellsAssembled: false
40
51
  }
41
52
 
53
+ blockId := inputs.etc.blockId
54
+ firstDatasetSpec := inputs.etc.firstDatasetSpec
55
+
42
56
  library := inputs.library
57
+ libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
43
58
 
44
59
  // clonotypingBlockId -> "bulk" | "sc"
45
60
  datasetTypes := {}
@@ -75,77 +90,142 @@ self.body(func(inputs) {
75
90
  dataDescription.assemblingFeature = assemblingFeature
76
91
 
77
92
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
78
- dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
79
-
80
- // collect params for running export commands and to parse result tsv files into pColumns
81
- shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription)
82
- shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription)
83
- shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn)
84
- shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotypeTableOptions(dataDescription)
85
-
86
- libraryFormat := library.spec.annotations["pl7.app/vdj/libraryFormat"]
87
-
88
- // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
89
- mixcrResults := llPFrames.aggregate(
90
- // files to iterate through
91
- dataGroupedByDonorId.data,
92
- // columns not to combine - sampleId and mixcrBlockId
93
- [1, 2],
94
- reconstructShmTreesTpl,
95
- // all the outputs that should be gethered
96
- [
97
- {
98
- name: "trees",
99
- type: "Resource"
100
- }, {
101
- name: "treeNodes",
102
- type: "Resource"
103
- }, {
104
- name: "treeNodesWithClones",
105
- type: "Resource"
106
- }, {
107
- name: "treeNodesUniqueIsotype",
108
- type: "Resource"
109
- }, {
110
- name: "tsvs",
111
- type: "Resource"
112
- }, {
113
- name: "allelesLog",
114
- type: "Resource"
115
- }, {
116
- name: "treesLog",
117
- type: "Resource"
118
- }, {
119
- name: "allelesReport",
120
- type: "Resource"
121
- }, {
122
- name: "treesReport",
123
- type: "Resource"
124
- }, {
125
- name: "allelesReportJson",
126
- type: "Resource"
127
- }, {
128
- name: "treesReportJson",
129
- type: "Resource"
93
+ dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets, firstDatasetSpec)
94
+
95
+ shmtResults := pframes.processColumn(
96
+ dataGroupedByDonorId,
97
+ mixcrShmTreesTpl,
98
+ [ {
99
+ name: "alleles",
100
+ type: "Resource",
101
+ spec: {
102
+ kind: "PColumn",
103
+ name: "mixcr.com/shmt/alleles",
104
+ valueType: "FileMap"
105
+ }
106
+ }, {
107
+ name: "downsampled",
108
+ type: "Resource",
109
+ spec: {
110
+ kind: "PColumn",
111
+ name: "mixcr.com/shmt/downsampled",
112
+ valueType: "FileMap"
113
+ }
114
+ }, {
115
+ name: "shmt",
116
+ type: "Resource",
117
+ spec: {
118
+ kind: "PColumn",
119
+ name: "mixcr.com/shmt",
120
+ valueType: "File"
121
+ }
122
+ }, {
123
+ name: "allelesLog",
124
+ type: "Resource",
125
+ spec: {
126
+ kind: "PColumn",
127
+ name: "mixcr.com/shmt/allelesLog",
128
+ valueType: "Log"
129
+ }
130
+ }, {
131
+ name: "treesLog",
132
+ type: "Resource",
133
+ spec: {
134
+ kind: "PColumn",
135
+ name: "mixcr.com/shmt/treesLog",
136
+ valueType: "Log"
137
+ }
138
+ }, {
139
+ name: "allelesReport",
140
+ type: "Resource",
141
+ spec: {
142
+ kind: "PColumn",
143
+ name: "mixcr.com/shmt/allelesReport",
144
+ valueType: "File"
145
+ }
146
+ }, {
147
+ name: "treesReport",
148
+ type: "Resource",
149
+ spec: {
150
+ kind: "PColumn",
151
+ name: "mixcr.com/shmt/treesReport",
152
+ valueType: "File"
130
153
  }
131
- ],
132
- false,
133
- // inputs
154
+ }, {
155
+ name: "allelesReportJson",
156
+ type: "Resource",
157
+ spec: {
158
+ kind: "PColumn",
159
+ name: "mixcr.com/shmt/allelesReportJson",
160
+ valueType: "File"
161
+ }
162
+ }, {
163
+ name: "treesReportJson",
164
+ type: "Resource",
165
+ spec: {
166
+ kind: "PColumn",
167
+ name: "mixcr.com/shmt/treesReportJson",
168
+ valueType: "File"
169
+ }
170
+ } ],
134
171
  {
135
- shmTreeTableOptions: shmTreeTableOptions,
136
- shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
137
- shmTreeNodesTableOptions: shmTreeNodesTableOptions,
138
- shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
139
- shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
140
- shmTreeNodesUniqueIsotypeTableOptions: shmTreeNodesUniqueIsotypeTableOptions,
141
- library: is_undefined(library) ? smart.createNullResource() : library.data,
142
- globalParams: maps.merge(
143
- inputs.params,
144
- {
172
+ aggregate: [1, 2],
173
+ traceSteps: [{type: "milaboratories.mixcr-shm-trees", id: blockId, importance: 19, label: "SHM Trees"}],
174
+ extra: {
175
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
176
+ globalParams: maps.merge(inputs.params, {
145
177
  datasetTypes: datasetTypes,
146
178
  libraryFormat: libraryFormat
179
+ })
180
+ }
181
+ }
182
+ )
183
+
184
+ // collect params for running export commands and to parse result tsv files into pColumns
185
+ shmTreeTableOptions := exportSettings.shmTree(dataDescription)
186
+ shmTreeNodesTableOptions := exportSettings.shmTreeNodes(dataDescription)
187
+ shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClones(dataDescription, inputs.donorColumn)
188
+ shmTreeNodesUniqueIsotypeTableOptions := exportSettings.shmTreeNodesUniqueIsotype(dataDescription)
189
+
190
+ exportResults := pframes.processColumn(
191
+ shmtResults.output("shmt"),
192
+ mixcrExportTpl,
193
+ [ {
194
+ type: "Xsv",
195
+ xsvType: "tsv",
196
+ settings: shmTreeTableOptions.pfconvParams,
197
+ name: "trees"
198
+ }, {
199
+ type: "Xsv",
200
+ xsvType: "tsv",
201
+ settings: shmTreeNodesTableOptions.pfconvParams,
202
+ name: "treeNodes"
203
+ }, {
204
+ type: "Xsv",
205
+ xsvType: "tsv",
206
+ settings: shmTreeNodesWithClonesTableOptions.pfconvParams,
207
+ name: "treeNodesWithClones"
208
+ }, {
209
+ type: "Xsv",
210
+ xsvType: "tsv",
211
+ settings: shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
212
+ name: "treeNodesUniqueIsotype"
213
+ } ],
214
+ {
215
+ extra: {
216
+ library: is_undefined(library) ? smart.createNullResource() : library.data,
217
+ params:{
218
+ libraryFormat: libraryFormat,
219
+ shmTreeArgs: shmTreeTableOptions.cmdArgs,
220
+ shmTreeNodesArgs: shmTreeNodesTableOptions.cmdArgs,
221
+ shmTreeNodesWithClonesArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
222
+ shmTreeNodesUniqueIsotypeArgs: shmTreeNodesUniqueIsotypeTableOptions.cmdArgs,
223
+ shmTreeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeTableOptions.pfconvParams),
224
+ shmTreeNodesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesTableOptions.pfconvParams),
225
+ shmTreeNodesWithClonesEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesWithClonesTableOptions.pfconvParams),
226
+ shmTreeNodesUniqueIsotypeEnsureUniquenessParams: tablesAggregation.ensureUniquenessParamsFromPconvParams(shmTreeNodesUniqueIsotypeTableOptions.pfconvParams)
147
227
  }
148
- )
228
+ }
149
229
  }
150
230
  )
151
231
 
@@ -156,33 +236,13 @@ self.body(func(inputs) {
156
236
 
157
237
  // ll.print("__THE_LOG__ " + string(json.encode(additionalArgsForImportTsv)))
158
238
 
159
- trees := xsv.importFileMap(
160
- mixcrResults.output("trees"),
161
- "tsv",
162
- shmTreeTableOptions.pfconvParams,
163
- additionalArgsForImportTsv
164
- )
165
-
166
- treeNodes := xsv.importFileMap(
167
- mixcrResults.output("treeNodes"),
168
- "tsv",
169
- shmTreeNodesTableOptions.pfconvParams,
170
- additionalArgsForImportTsv
171
- )
172
-
173
- treeNodesWithClones := xsv.importFileMap(
174
- mixcrResults.output("treeNodesWithClones"),
175
- "tsv",
176
- shmTreeNodesWithClonesTableOptions.pfconvParams,
177
- additionalArgsForImportTsv
178
- )
179
-
180
- treeNodesUniqueIsotype := xsv.importFileMap(
181
- mixcrResults.output("treeNodesUniqueIsotype"),
182
- "tsv",
183
- shmTreeNodesUniqueIsotypeTableOptions.pfconvParams,
184
- additionalArgsForImportTsv
185
- )
239
+ trees := exportResults.xsvOutputFrame("trees")
240
+
241
+ treeNodes := exportResults.xsvOutputFrame("treeNodes")
242
+
243
+ treeNodesWithClones := exportResults.xsvOutputFrame("treeNodesWithClones")
244
+
245
+ treeNodesUniqueIsotype := exportResults.xsvOutputFrame("treeNodesUniqueIsotype")
186
246
 
187
247
  // Running SOI search for the data
188
248
  soiNodesResults := {}
@@ -223,29 +283,25 @@ self.body(func(inputs) {
223
283
  soiTreesResults[soiDb.parameters.id] = soiResult.output("treesResult")
224
284
  }
225
285
 
226
- tsvs := mixcrResults.output("tsvs")
227
-
228
286
  return {
229
- // combine columns into pFrame
230
- trees: pframes.exportFrame(trees),
231
- // combine columns into pFrame
232
- treeNodes: pframes.exportFrame(treeNodes),
233
- // combine columns into pFrame
234
- treeNodesWithClones: pframes.exportFrame(treeNodesWithClones),
235
- // combine columns into pFrame
236
- treeNodesUniqueIsotype: pframes.exportFrame(treeNodesUniqueIsotype),
287
+ alleles: shmtResults.output("alleles"),
288
+ downsampled: shmtResults.output("downsampled"),
289
+ shmt: shmtResults.output("shmt"),
290
+
291
+ trees: trees,
292
+ treeNodes: treeNodes,
293
+ treeNodesWithClones: treeNodesWithClones,
294
+ treeNodesUniqueIsotype: treeNodesUniqueIsotype,
237
295
 
238
296
  soiNodesResults: maps.mapValues(soiNodesResults, pframes.exportFrame),
239
297
  soiTreesResults: maps.mapValues(soiTreesResults, pframes.exportFrame),
240
298
 
241
- tsvs: tsvs,
242
-
243
- allelesLogs: mixcrResults.output("allelesLog"),
244
- treesLogs: mixcrResults.output("treesLog"),
299
+ allelesLogs: shmtResults.outputData("allelesLog"),
300
+ treesLogs: shmtResults.outputData("treesLog"),
245
301
 
246
- allelesReports: mixcrResults.output("allelesReport"),
247
- treesReports: mixcrResults.output("treesReport"),
248
- allelesReportsJson: mixcrResults.output("allelesReportJson"),
249
- treesReportsJson: mixcrResults.output("treesReportJson")
302
+ allelesReports: shmtResults.outputData("allelesReport"),
303
+ treesReports: shmtResults.outputData("treesReport"),
304
+ allelesReportsJson: shmtResults.outputData("allelesReportJson"),
305
+ treesReportsJson: shmtResults.outputData("treesReportJson")
250
306
  }
251
307
  })
@@ -17,8 +17,11 @@ self.body(func(args) {
17
17
 
18
18
  for _, dataset in datasets {
19
19
  dsLibraryId := dataset.spec.annotations["pl7.app/vdj/libraryId"]
20
+ if is_undefined(dsLibraryId) {
21
+ dsLibraryId = ""
22
+ }
20
23
  if is_undefined(libraryId) {
21
- libraryId = !is_undefined(dsLibraryId) ? dsLibraryId : ""
24
+ libraryId = dsLibraryId
22
25
  } else {
23
26
  if libraryId != dsLibraryId {
24
27
  ll.panic("All datasets should have the same libraryId. Got " + libraryId + " and " + dsLibraryId)
@@ -0,0 +1,81 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ json := import("json")
6
+
7
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
8
+
9
+ ensureUniquenessParamsFromPconvParams := func(pfConvParams) {
10
+ return {
11
+ axes: slices.map(pfConvParams.axes, func(axis) {
12
+ return axis.column
13
+ }),
14
+ columns: slices.map(pfConvParams.columns, func(col) {
15
+ return col.column
16
+ })
17
+ }
18
+ }
19
+
20
+ /** Aggregating by-nodes output to make it uniquely addressable by it's native key */
21
+ ensureUniqueness := func(inputTsv, params, ...aggParams) {
22
+ keyColumns := params.axes
23
+ pickCols := params.columns
24
+
25
+ aggregationWorkflow := undefined
26
+ if len(aggParams) > 1 {
27
+ pickCols := []
28
+ for col in pickCols {
29
+ pickCols = append(pickCols, [
30
+ col,
31
+ col
32
+ ]
33
+ )
34
+ }
35
+
36
+ rankingCol := aggParams[1]
37
+ aggregationWorkflow = {
38
+ steps: [ {
39
+ type: "aggregate",
40
+ groupBy: keyColumns,
41
+ aggregations: [ {
42
+ type: aggParams[0],
43
+ rankingCol: rankingCol,
44
+ pickCols: pickCols
45
+ } ]
46
+ } ]
47
+ }
48
+ } else {
49
+ aggregations := []
50
+ for col in pickCols {
51
+ aggregations = append(aggregations, {
52
+ type: aggParams[0],
53
+ src: col,
54
+ dst: col
55
+ })
56
+ }
57
+
58
+ aggregationWorkflow = { steps: [ {
59
+ type: "aggregate",
60
+ groupBy: keyColumns,
61
+ aggregations: aggregations
62
+ } ]
63
+ }
64
+ }
65
+
66
+ aggregateCmd := exec.builder().
67
+ printErrStreamToStdout().
68
+ software(paggregateSw).
69
+ arg("--workflow").arg("wf.json").
70
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
71
+ arg("input.tsv").addFile("input.tsv", inputTsv).
72
+ arg("output.tsv").saveFile("output.tsv").
73
+ run()
74
+
75
+ return aggregateCmd.getFile("output.tsv")
76
+ }
77
+
78
+ export ll.toStrict({
79
+ ensureUniqueness: ensureUniqueness,
80
+ ensureUniquenessParamsFromPconvParams: ensureUniquenessParamsFromPconvParams
81
+ })
@@ -1,75 +0,0 @@
1
- ll := import("@platforma-sdk/workflow-tengo:ll")
2
- exec := import("@platforma-sdk/workflow-tengo:exec")
3
- assets := import("@platforma-sdk/workflow-tengo:assets")
4
- json := import("json")
5
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
6
-
7
- //// aggregating by-nodes output to make it uniquely addressable by it's native key
8
- ensureUniqueness := func(inputTsv, pfConvParams, ...aggParams) {
9
- keyColumns := []
10
- for axis in pfConvParams.axes {
11
- keyColumns = append(keyColumns, axis.column)
12
- }
13
-
14
- aggregationWorkflow := undefined
15
- if len(aggParams) > 1 {
16
- pickCols := []
17
- for col in pfConvParams.columns {
18
- pickCols = append(pickCols, [
19
- col.column,
20
- col.column
21
- ]
22
- )
23
- }
24
-
25
- rankingCol := aggParams[1]
26
- aggregationWorkflow = {
27
- steps: [
28
- {
29
- type: "aggregate",
30
- groupBy: keyColumns,
31
- aggregations: [
32
- {
33
- type: aggParams[0],
34
- rankingCol: rankingCol,
35
- pickCols: pickCols
36
- }
37
- ]
38
- }
39
- ]
40
- }
41
- } else {
42
- aggregations := []
43
- for col in pfConvParams.columns {
44
- aggregations = append(aggregations, {
45
- type: aggParams[0],
46
- src: col.column,
47
- dst: col.column
48
- })
49
- }
50
-
51
- aggregationWorkflow = { steps: [
52
- {
53
- type: "aggregate",
54
- groupBy: keyColumns,
55
- aggregations: aggregations
56
- }
57
- ]
58
- }
59
- }
60
-
61
- aggregateCmd := exec.builder().
62
- printErrStreamToStdout().
63
- software(paggregateSw).
64
- arg("--workflow").arg("wf.json").
65
- writeFile("wf.json", json.encode(aggregationWorkflow)).
66
- arg("input.tsv").addFile("input.tsv", inputTsv).
67
- arg("output.tsv").saveFile("output.tsv").
68
- run()
69
-
70
- return aggregateCmd.getFile("output.tsv")
71
- }
72
-
73
- export ll.toStrict({
74
- ensureUniqueness: ensureUniqueness
75
- })