@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,9 +19,14 @@ wf.body(func(args) {
19
19
 
20
20
  // we could not use array as request for waiting (see below), so we store datasets in a dictionary
21
21
  datasets := {}
22
+ firstDataset := undefined
22
23
  for datasetRef in args.datasetColumns {
23
24
  // we assume that mixcr block produces exactly one clns column
24
- datasets[datasetRef.blockId] = wf.resolve(datasetRef)
25
+ ds := wf.resolve(datasetRef)
26
+ datasets[datasetRef.blockId] = ds
27
+ if is_undefined(firstDataset) {
28
+ firstDataset = ds
29
+ }
25
30
  }
26
31
 
27
32
  library := render.createEphemeral(requestLibraryTpl, {
@@ -35,6 +40,10 @@ wf.body(func(args) {
35
40
  datasets: datasets,
36
41
  donorColumn: donorColumn,
37
42
  library: library,
43
+ etc: {
44
+ firstDatasetSpec: firstDataset.spec,
45
+ blockId: wf.getBlockId()
46
+ },
38
47
  params: {
39
48
  downsampling: args.downsampling,
40
49
  sequencesOfInterest: args.sequencesOfInterest
@@ -43,15 +52,18 @@ wf.body(func(args) {
43
52
 
44
53
  return {
45
54
  outputs: {
46
- trees: results.output("trees"),
47
- treeNodes: results.output("treeNodes"),
48
- treeNodesWithClones: results.output("treeNodesWithClones"),
49
- treeNodesUniqueIsotype: results.output("treeNodesUniqueIsotype"),
55
+ trees: pframes.exportFrame(results.output("trees")),
56
+ treeNodes: pframes.exportFrame(results.output("treeNodes")),
57
+ treeNodesWithClones: pframes.exportFrame(results.output("treeNodesWithClones")),
58
+ treeNodesUniqueIsotype: pframes.exportFrame(results.output("treeNodesUniqueIsotype")),
59
+
60
+ alleles: results.output("alleles"),
61
+ downsampled: results.output("downsampled"),
62
+ shmt: results.output("shmt"),
50
63
 
51
64
  soiNodesResults: results.output("soiNodesResults"),
52
65
  soiTreesResults: results.output("soiTreesResults"),
53
66
 
54
- tsvs: results.output("tsvs"),
55
67
  allelesLogs: results.output("allelesLogs"),
56
68
  treesLogs: results.output("treesLogs"),
57
69
 
@@ -0,0 +1,132 @@
1
+ //tengo:hash_override 994D9444-D460-429A-9004-EADA3D49D52E
2
+
3
+ self := import("@platforma-sdk/workflow-tengo:tpl")
4
+ pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
5
+ ll := import("@platforma-sdk/workflow-tengo:ll")
6
+ assets := import("@platforma-sdk/workflow-tengo:assets")
7
+ exec := import("@platforma-sdk/workflow-tengo:exec")
8
+ times := import("times")
9
+
10
+ json := import("json")
11
+
12
+ self.defineOutputs(
13
+ "trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype"
14
+ )
15
+
16
+ tablesAggregation := import(":tables-aggregation")
17
+
18
+ // import MiXCR as a software to use
19
+ mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
20
+
21
+ self.body(func(inputs) {
22
+ shmtFile := inputs[pConstants.VALUE_FIELD_NAME]
23
+ params := inputs.params
24
+ library := inputs.library
25
+ libraryFormat := params.libraryFormat
26
+ shmTreeArgs := params.shmTreeArgs
27
+ shmTreeNodesArgs := params.shmTreeNodesArgs
28
+ shmTreeNodesWithClonesArgs := params.shmTreeNodesWithClonesArgs
29
+ shmTreeNodesUniqueIsotypeArgs := params.shmTreeNodesUniqueIsotypeArgs
30
+ shmTreeEnsureUniquenessParams := params.shmTreeEnsureUniquenessParams
31
+ shmTreeNodesEnsureUniquenessParams := params.shmTreeNodesEnsureUniquenessParams
32
+ shmTreeNodesWithClonesEnsureUniquenessParams := params.shmTreeNodesWithClonesEnsureUniquenessParams
33
+ shmTreeNodesUniqueIsotypeEnsureUniquenessParams := params.shmTreeNodesUniqueIsotypeEnsureUniquenessParams
34
+
35
+ addLibraryFile := func(cmdBuilder) {
36
+ if !is_undefined(library) {
37
+ if libraryFormat == "repseqio.json.gz" {
38
+ cmdBuilder.addFile("library.json.gz", library)
39
+ } else {
40
+ cmdBuilder.addFile("library.json", library)
41
+ }
42
+ }
43
+ }
44
+
45
+ // export trees without nodes
46
+ shmTreeExportsCmdBuilder := exec.builder().
47
+ printErrStreamToStdout().
48
+ dontSaveStdoutOrStderr().
49
+ inLightQueue().
50
+ secret("MI_LICENSE", "MI_LICENSE").
51
+ software(mixcrSw).
52
+ arg("exportShmTrees")
53
+
54
+ addLibraryFile(shmTreeExportsCmdBuilder)
55
+
56
+ for arg in shmTreeArgs {
57
+ shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.arg(arg)
58
+ }
59
+
60
+ shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.
61
+ arg("input.shmt").
62
+ arg("output.tsv").
63
+ addFile("input.shmt", shmtFile).
64
+ saveFile("output.tsv")
65
+
66
+ shmTreeExports := shmTreeExportsCmdBuilder.run()
67
+ shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
68
+
69
+ shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, shmTreeEnsureUniquenessParams, "max_by", "totalReadsCountInTree")
70
+
71
+ // export tree nodes with data uniq for the node
72
+ shmTreeNodesExportsCmdBuilder := exec.builder().
73
+ printErrStreamToStdout().
74
+ dontSaveStdoutOrStderr().
75
+ inLightQueue().
76
+ secret("MI_LICENSE", "MI_LICENSE").
77
+ software(mixcrSw).
78
+ arg("exportShmTreesWithNodes")
79
+
80
+ addLibraryFile(shmTreeNodesExportsCmdBuilder)
81
+
82
+ for arg in shmTreeNodesArgs {
83
+ shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.arg(arg)
84
+ }
85
+
86
+ shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.
87
+ arg("input.shmt").
88
+ arg("output.tsv").
89
+ addFile("input.shmt", shmtFile).
90
+ saveFile("output.tsv")
91
+
92
+ shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
93
+ shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
94
+
95
+ shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, shmTreeNodesEnsureUniquenessParams, "first")
96
+
97
+ // export nodes with clones. For each node could be several clones
98
+ shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
99
+ printErrStreamToStdout().
100
+ dontSaveStdoutOrStderr().
101
+ inLightQueue().
102
+ secret("MI_LICENSE", "MI_LICENSE").
103
+ software(mixcrSw).
104
+ arg("exportShmTreesWithNodes").
105
+ // don't export nodes that don't have clones
106
+ arg("--only-observed")
107
+
108
+ addLibraryFile(shmTreeNodesWithClonesExportsCmdBuilder)
109
+
110
+ for arg in shmTreeNodesWithClonesArgs {
111
+ shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.arg(arg)
112
+ }
113
+
114
+ shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.
115
+ arg("input.shmt").
116
+ arg("output.tsv").
117
+ addFile("input.shmt", shmtFile).
118
+ saveFile("output.tsv")
119
+
120
+ shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
121
+ shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
122
+
123
+ shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, shmTreeNodesWithClonesEnsureUniquenessParams, "max_by", "readCount")
124
+ shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, shmTreeNodesUniqueIsotypeEnsureUniquenessParams, "max_by", "readCount")
125
+
126
+ return {
127
+ trees: shmTreeTsv,
128
+ treeNodes: shmTreeNodesTsv,
129
+ treeNodesWithClones: shmTreeNodesWithClonesTsv,
130
+ treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
131
+ }
132
+ })
@@ -1,30 +1,27 @@
1
+ //tengo:hash_override 33125CD9-6F67-46B3-AA68-0FCD825B51BA
2
+
1
3
  self := import("@platforma-sdk/workflow-tengo:tpl")
2
4
  pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
3
5
  ll := import("@platforma-sdk/workflow-tengo:ll")
4
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
5
7
  exec := import("@platforma-sdk/workflow-tengo:exec")
8
+ smart := import("@platforma-sdk/workflow-tengo:smart")
9
+ maps := import("@platforma-sdk/workflow-tengo:maps")
6
10
  times := import("times")
7
11
 
8
12
  json := import("json")
9
13
 
10
14
  // for usage in aggregate function, we should specify all outputs that will be used
11
15
  self.defineOutputs(
12
- "trees", "treeNodes", "treeNodesWithClones", "treeNodesUniqueIsotype",
13
- "tsvs",
16
+ "alleles", "downsampled", "shmt",
14
17
  "allelesLog", "treesLog",
15
18
  "allelesReport", "treesReport",
16
19
  "allelesReportJson", "treesReportJson"
17
20
  )
18
21
 
19
- //// import function for aggregating by-nodes output to make it uniquely addressable by it's native key
20
- tablesAggregation := import(":tablesAggregation")
21
-
22
22
  // import MiXCR as a software to use
23
23
  mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
24
24
 
25
- // used to postprocess some tables
26
- paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-ptransform:main")
27
-
28
25
  // env for MiXCR to format progress messages
29
26
  progressPrefix := "[==PROGRESS==]"
30
27
 
@@ -32,9 +29,9 @@ self.body(func(inputs) {
32
29
  inputData := inputs[pConstants.VALUE_FIELD_NAME]
33
30
  globalParams := inputs.globalParams
34
31
  datasetTypes := globalParams.datasetTypes
35
- downsampling := globalParams.downsampling
36
32
  library := inputs.library
37
33
  libraryFormat := globalParams.libraryFormat
34
+ downsampling := globalParams.downsampling
38
35
 
39
36
  ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
40
37
 
@@ -50,6 +47,10 @@ self.body(func(inputs) {
50
47
 
51
48
  // seed := times.time_string(times.now())
52
49
 
50
+ //
51
+ // Alleles inference
52
+ //
53
+
53
54
  allelesCmdBuilder := exec.builder().
54
55
  printErrStreamToStdout().
55
56
  secret("MI_LICENSE", "MI_LICENSE").
@@ -70,14 +71,14 @@ self.body(func(inputs) {
70
71
  allelesCmdBuilder.env("SEED", globalParams.seed)
71
72
  }
72
73
 
73
- toProcess := []
74
+ entries := []
74
75
  for sKey, inputFile in inputData.inputs() {
75
76
  key := json.decode(sKey)
76
77
  sampleId := key[0]
77
78
  clonotypingBlockId := key[1]
78
79
  // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
79
80
  fileName := sampleId + "___" + clonotypingBlockId + ".clns"
80
- toProcess = append(toProcess, {
81
+ entries = append(entries, {
81
82
  clonotypingBlockId: clonotypingBlockId,
82
83
  sampleId: sampleId,
83
84
  fileName: fileName,
@@ -85,18 +86,23 @@ self.body(func(inputs) {
85
86
  })
86
87
  }
87
88
 
88
- for input in toProcess {
89
- allelesCmdBuilder.addFile(input.fileName, input.input).
90
- arg(input.fileName).
91
- saveFile("alleles/" + input.fileName)
89
+ for entry in entries {
90
+ allelesCmdBuilder.addFile(entry.fileName, entry.input).
91
+ arg(entry.fileName).
92
+ saveFile("alleles/" + entry.fileName)
92
93
  }
93
94
 
94
95
  alleles := allelesCmdBuilder.run()
95
96
 
96
- for input in toProcess {
97
- input.alleles = alleles.getFile("alleles/" + input.fileName)
97
+ for entry in entries {
98
+ entry.afterAlleles = alleles.getFile("alleles/" + entry.fileName)
99
+ entry.treesInput = entry.afterAlleles
98
100
  }
99
101
 
102
+ //
103
+ // Optional downsampling
104
+ //
105
+
100
106
  if !is_undefined(downsampling) {
101
107
  downsamplingParam := ""
102
108
  if downsampling.type == "CountReadsFixed" {
@@ -115,8 +121,8 @@ self.body(func(inputs) {
115
121
  ll.panic("Unknown downsampling type: " + downsampling.type)
116
122
  }
117
123
 
118
- for input in toProcess {
119
- if datasetTypes[input.clonotypingBlockId] == "bulk" {
124
+ for entry in entries {
125
+ if datasetTypes[entry.clonotypingBlockId] == "bulk" {
120
126
  downsamplingCmdBuilder := exec.builder().
121
127
  printErrStreamToStdout().
122
128
  secret("MI_LICENSE", "MI_LICENSE").
@@ -126,15 +132,21 @@ self.body(func(inputs) {
126
132
  arg("--downsampling").
127
133
  arg(downsamplingParam).
128
134
  arg("clones.clns").
129
- addFile("clones.clns", input.alleles).
135
+ addFile("clones.clns", entry.afterAlleles).
130
136
  saveFile("clones.downsampled.clns")
131
137
  addLibraryFile(downsamplingCmdBuilder)
132
138
  downsamplingCmd := downsamplingCmdBuilder.run()
133
- input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
139
+
140
+ // overriding trees input with downsampled file
141
+ entry.treesInput = downsamplingCmd.getFile("clones.downsampled.clns")
134
142
  }
135
143
  }
136
144
  }
137
145
 
146
+ //
147
+ // SHM trees inference
148
+ //
149
+
138
150
  shmTreesCmdBuilder := exec.builder().
139
151
  printErrStreamToStdout().
140
152
  secret("MI_LICENSE", "MI_LICENSE").
@@ -152,10 +164,10 @@ self.body(func(inputs) {
152
164
 
153
165
  addLibraryFile(shmTreesCmdBuilder)
154
166
 
155
- for input in toProcess {
167
+ for entry in entries {
156
168
  shmTreesCmdBuilder.
157
- addFile(input.fileName, input.alleles).
158
- arg(input.fileName)
169
+ addFile(entry.fileName, entry.treesInput).
170
+ arg(entry.fileName)
159
171
  }
160
172
 
161
173
  shmTreesCmdBuilder.arg("output.shmt").saveFile("output.shmt")
@@ -163,107 +175,29 @@ self.body(func(inputs) {
163
175
  shmTrees := shmTreesCmdBuilder.run()
164
176
  outputShmt := shmTrees.getFile("output.shmt")
165
177
 
166
- // export trees without nodes
167
- shmTreeExportsCmdBuilder := exec.builder().
168
- printErrStreamToStdout().
169
- dontSaveStdoutOrStderr().
170
- inLightQueue().
171
- secret("MI_LICENSE", "MI_LICENSE").
172
- software(mixcrSw).
173
- arg("exportShmTrees")
174
-
175
- addLibraryFile(shmTreeExportsCmdBuilder)
176
-
177
- for arg in inputs.shmTreeTableArgs {
178
- shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.arg(arg)
179
- }
180
-
181
- shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.
182
- arg("input.shmt").
183
- arg("output.tsv").
184
- addFile("input.shmt", outputShmt).
185
- saveFile("output.tsv")
186
-
187
- shmTreeExports := shmTreeExportsCmdBuilder.run()
188
- shmTreeTsvRaw := shmTreeExports.getFile("output.tsv")
189
-
190
- shmTreeTsv := tablesAggregation.ensureUniqueness(shmTreeTsvRaw, inputs.shmTreeTableOptions.pfconvParams, "max_by", "totalReadsCountInTree")
191
-
192
- // export tree nodes with data uniq for the node
193
- shmTreeNodesExportsCmdBuilder := exec.builder().
194
- printErrStreamToStdout().
195
- dontSaveStdoutOrStderr().
196
- inLightQueue().
197
- secret("MI_LICENSE", "MI_LICENSE").
198
- software(mixcrSw).
199
- arg("exportShmTreesWithNodes")
200
-
201
- addLibraryFile(shmTreeNodesExportsCmdBuilder)
202
-
203
- for arg in inputs.shmTreeNodesTableOptions.cmdArgs {
204
- shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.arg(arg)
205
- }
206
-
207
- shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.
208
- arg("input.shmt").
209
- arg("output.tsv").
210
- addFile("input.shmt", outputShmt).
211
- saveFile("output.tsv")
212
-
213
- shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
214
- shmTreeNodesTsvRaw := shmTreeNodesExports.getFile("output.tsv")
215
-
216
- shmTreeNodesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams, "first")
217
-
218
- // export nodes with clones. For each node could be several clones
219
- shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
220
- printErrStreamToStdout().
221
- dontSaveStdoutOrStderr().
222
- inLightQueue().
223
- secret("MI_LICENSE", "MI_LICENSE").
224
- env("MI_PROGRESS_PREFIX", progressPrefix).
225
- software(mixcrSw).
226
- arg("exportShmTreesWithNodes").
227
- // don't export nodes that don't have clones
228
- arg("--only-observed")
229
-
230
- addLibraryFile(shmTreeNodesWithClonesExportsCmdBuilder)
231
-
232
- for arg in inputs.shmTreeNodesWithClonesTableArgs {
233
- shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.arg(arg)
234
- }
235
-
236
- shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.
237
- arg("input.shmt").
238
- arg("output.tsv").
239
- addFile("input.shmt", outputShmt).
240
- saveFile("output.tsv")
241
-
242
- shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
243
- shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
178
+ return {
179
+ // returning to be cached
180
+ alleles: smart.createMapResource(maps.mapValues(entries, func(entry) {
181
+ return entry.afterAlleles
182
+ })),
244
183
 
245
- shmTreeNodesWithClonesTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams, "max_by", "readCount")
246
- shmTreeNodesUniqueIsotypeTsv := tablesAggregation.ensureUniqueness(shmTreeNodesWithClonesTsv, inputs.shmTreeNodesUniqueIsotypeTableOptions.pfconvParams, "max_by", "readCount")
184
+ // returning to be cached
185
+ downsampled: smart.createMapResource(maps.mapValues(entries, func(entry) {
186
+ return entry.treesInput
187
+ })),
247
188
 
248
- return {
249
- trees: shmTreeTsv,
250
- treeNodes: shmTreeNodesTsv,
251
- treeNodesWithClones: shmTreeNodesWithClonesTsv,
252
- treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv,
253
-
254
- tsvs: {
255
- trees: shmTreeTsv,
256
- treeNodes: shmTreeNodesTsv,
257
- treeNodesWithClones: shmTreeNodesWithClonesTsv,
258
- treeNodesUniqueIsotype : shmTreeNodesUniqueIsotypeTsv
259
- },
189
+ // main output
190
+ shmt: outputShmt,
260
191
 
192
+ // logs
261
193
  allelesLog: alleles.getStdoutStream(),
262
194
  treesLog: shmTrees.getStdoutStream(),
263
195
 
196
+ // reports
264
197
  allelesReport: alleles.getFile("report.txt"),
265
198
  treesReport: shmTrees.getFile("report.txt"),
266
199
 
200
+ // json reports
267
201
  allelesReportJson: alleles.getFile("report.json"),
268
202
  treesReportJson: shmTrees.getFile("report.json")
269
203
  }
@@ -1,16 +1,18 @@
1
1
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
2
  smart := import("@platforma-sdk/workflow-tengo:smart")
3
3
  maps := import("@platforma-sdk/workflow-tengo:maps")
4
+ slices := import("@platforma-sdk/workflow-tengo:slices")
5
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
4
6
 
5
7
  json := import("json")
6
8
 
7
9
  _P_COLUMN_DATA_RESOURCE_MAP := { Name: "PColumnData/ResourceMap", Version: "1" }
8
10
 
9
- groupDataByDonorId := func(donorColumn, datasets) {
11
+ groupDataByDonorId := func(donorColumn, datasets, firstDatasetSpec) {
10
12
  // we need to form a pColumn with two axes:
11
13
  // axes[0]: donorId
12
14
  // axes[1]: sampleId
13
- // axes[2]: mixcrclonotypingBlockId
15
+ // axes[2]: mixcrClonotypingBlockId
14
16
  // value: fileRef resource
15
17
 
16
18
  // we have:
@@ -23,9 +25,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
23
25
 
24
26
  donorColumnSpec := donorColumn.spec
25
27
 
28
+ trace := pSpec.makeTrace(firstDatasetSpec)
29
+
26
30
  sampleIdAxis := donorColumnSpec.axesSpec[0]
27
31
 
28
- resultSpec := maps.clone({
32
+ resultSpec := trace.inject(maps.clone({
29
33
  kind: "PColumn",
30
34
  name: "mixcr.com/clns",
31
35
  valueType: "File",
@@ -47,7 +51,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
47
51
  }
48
52
  }
49
53
  ]
50
- }, { removeUndefs: true })
54
+ }, { removeUndefs: true }))
51
55
 
52
56
  // creating sample to donor map
53
57