@platforma-open/milaboratories.mixcr-shm-trees.workflow 3.2.1 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @platforma-open/milaboratories.mixcr-shm-trees.workflow@3.2.1 build /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow
2
+ > @platforma-open/milaboratories.mixcr-shm-trees.workflow@3.3.1 build /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow
3
3
  > rm -rf dist && pl-tengo check && pl-tengo build
4
4
 
5
5
  Processing "src/export-settings.lib.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-shm-trees.workflow
2
2
 
3
+ ## 3.3.1
4
+
5
+ ### Patch Changes
6
+
7
+ - d8d8f49: Additional ensure uniqueness step in trees reconstruction workflow
8
+
9
+ ## 3.3.0
10
+
11
+ ### Minor Changes
12
+
13
+ - b8c818f: Alignment-based sequence search for sequence collections, multiple other improvements for this subsystem
14
+
3
15
  ## 3.2.1
4
16
 
5
17
  ### Patch Changes
@@ -1,6 +1,9 @@
1
1
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
2
 
3
3
  soiResultImportColumns := func(dbParameters) {
4
+ commonDomain := {
5
+ "pl7.app/list": dbParameters.id
6
+ }
4
7
  if dbParameters.searchParameters.type == "tree_search_top" {
5
8
  return [ {
6
9
  column: "topHit",
@@ -12,9 +15,7 @@ soiResultImportColumns := func(dbParameters) {
12
15
  "pl7.app/label": "Top Hit " + dbParameters.name
13
16
  }
14
17
  },
15
- domain: {
16
- "pl7.app/list": dbParameters.id
17
- }
18
+ domain: commonDomain
18
19
  }, {
19
20
  column: "mutations",
20
21
  id: "mutations",
@@ -25,9 +26,42 @@ soiResultImportColumns := func(dbParameters) {
25
26
  "pl7.app/label": "Number of mutations " + dbParameters.name
26
27
  }
27
28
  },
28
- domain: {
29
- "pl7.app/list": dbParameters.id
30
- }
29
+ domain: commonDomain
30
+ } ]
31
+ } else if dbParameters.searchParameters.type == "preset_alignment_search_top" {
32
+ return [ {
33
+ column: "topHit",
34
+ id: "topHit",
35
+ spec: {
36
+ name: "pl7.app/search/topHit",
37
+ valueType: "String",
38
+ annotations: {
39
+ "pl7.app/label": "Top Hit " + dbParameters.name
40
+ }
41
+ },
42
+ domain: commonDomain
43
+ }, {
44
+ column: "penalty",
45
+ id: "penalty",
46
+ spec: {
47
+ name: "pl7.app/search/alignmentPenalty",
48
+ valueType: "Int",
49
+ annotations: {
50
+ "pl7.app/label": "Alignment penalty " + dbParameters.name
51
+ }
52
+ },
53
+ domain: commonDomain
54
+ }, {
55
+ column: "score",
56
+ id: "score",
57
+ spec: {
58
+ name: "pl7.app/search/alignmentScore",
59
+ valueType: "Int",
60
+ annotations: {
61
+ "pl7.app/label": "Alignment score " + dbParameters.name
62
+ }
63
+ },
64
+ domain: commonDomain
31
65
  } ]
32
66
  } else {
33
67
  ll.panic("Unknown search mode: " + dbParameters.searchParameters.type)
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
3
- "version": "3.2.1",
3
+ "version": "3.3.1",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "//": {
@@ -8,13 +8,13 @@
8
8
  },
9
9
  "devDependencies": {
10
10
  "@platforma-sdk/tengo-builder": "^1.17.5",
11
- "@platforma-sdk/workflow-tengo": "^2.9.2",
12
- "@milaboratories/software-pframes-conv": "^2.1.3",
11
+ "@platforma-sdk/workflow-tengo": "^2.9.6",
12
+ "@milaboratories/software-pframes-conv": "^2.1.5",
13
13
  "@platforma-open/milaboratories.software-small-binaries": "^1.15.6",
14
14
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-139-develop",
15
- "@platforma-open/milaboratories.software-mitool": "2.3.1-2-main",
15
+ "@platforma-open/milaboratories.software-mitool": "2.3.1-5-main",
16
16
  "@platforma-open/milaboratories.software-paggregate": "^1.0.1",
17
- "@platforma-sdk/test": "^1.21.9",
17
+ "@platforma-sdk/test": "^1.21.20",
18
18
  "vitest": "^2.1.8",
19
19
  "typescript": "~5.6.3"
20
20
  },
@@ -122,6 +122,7 @@ self.body(func(inputs) {
122
122
  {
123
123
  shmTreeTableArgs: shmTreeTableOptions.cmdArgs,
124
124
  shmTreeNodesTableOptions: shmTreeNodesTableOptions,
125
+ shmTreeNodesWithClonesTableOptions: shmTreeNodesWithClonesTableOptions,
125
126
  shmTreeNodesWithClonesTableArgs: shmTreeNodesWithClonesTableOptions.cmdArgs,
126
127
  globalParams: maps.merge(
127
128
  inputs.params,
@@ -194,37 +194,40 @@ self.body(func(inputs) {
194
194
 
195
195
  // aggregating by-nodes output to make it uniquely addressable by it's native key
196
196
 
197
- aggregations := []
198
- for col in inputs.shmTreeNodesTableOptions.pfconvParams.columns {
199
- aggregations = append(aggregations, {
200
- type: "first",
201
- src: col.column,
202
- dst: col.column
203
- })
204
- }
205
-
206
- keyColumns := []
207
- for axis in inputs.shmTreeNodesTableOptions.pfconvParams.axes {
208
- keyColumns = append(keyColumns, axis.column)
209
- }
210
-
211
- aggregationWorkflow := { steps: [ {
212
- type: "aggregate",
213
- groupBy: keyColumns,
214
- aggregations: aggregations
215
- } ] }
216
-
217
- aggregateCmd := exec.builder().
218
- printErrStreamToStdout().
219
- software(paggregateSw).
220
- arg("--workflow").arg("wf.json").
221
- writeFile("wf.json", json.encode(aggregationWorkflow)).
222
- arg("input.tsv").addFile("input.tsv", shmTreeNodesTsvRaw).
223
- arg("output.tsv").saveFile("output.tsv").
224
- run()
197
+ ensureUniqueness := func(inputTsv, pfConvParams) {
198
+ aggregations := []
199
+ for col in pfConvParams.columns {
200
+ aggregations = append(aggregations, {
201
+ type: "first",
202
+ src: col.column,
203
+ dst: col.column
204
+ })
205
+ }
206
+
207
+ keyColumns := []
208
+ for axis in pfConvParams.axes {
209
+ keyColumns = append(keyColumns, axis.column)
210
+ }
225
211
 
226
- shmTreeNodesTsv := aggregateCmd.getFile("output.tsv")
212
+ aggregationWorkflow := { steps: [ {
213
+ type: "aggregate",
214
+ groupBy: keyColumns,
215
+ aggregations: aggregations
216
+ } ] }
217
+
218
+ aggregateCmd := exec.builder().
219
+ printErrStreamToStdout().
220
+ software(paggregateSw).
221
+ arg("--workflow").arg("wf.json").
222
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
223
+ arg("input.tsv").addFile("input.tsv", inputTsv).
224
+ arg("output.tsv").saveFile("output.tsv").
225
+ run()
226
+
227
+ return aggregateCmd.getFile("output.tsv")
228
+ }
227
229
 
230
+ shmTreeNodesTsv := ensureUniqueness(shmTreeNodesTsvRaw, inputs.shmTreeNodesTableOptions.pfconvParams)
228
231
 
229
232
  // export nodes with clones. For each node could be several clones
230
233
  shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
@@ -248,7 +251,9 @@ self.body(func(inputs) {
248
251
  saveFile("output.tsv")
249
252
 
250
253
  shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
251
- shmTreeNodesWithClonesTsv := shmTreeNodesWithClonesExports.getFile("output.tsv")
254
+ shmTreeNodesWithClonesTsvRaw := shmTreeNodesWithClonesExports.getFile("output.tsv")
255
+
256
+ shmTreeNodesWithClonesTsv := ensureUniqueness(shmTreeNodesWithClonesTsvRaw, inputs.shmTreeNodesWithClonesTableOptions.pfconvParams)
252
257
 
253
258
  return {
254
259
  trees: shmTreeTsv,
@@ -1,6 +1,9 @@
1
1
  ll := import("@platforma-sdk/workflow-tengo:ll")
2
2
 
3
3
  soiResultImportColumns := func(dbParameters) {
4
+ commonDomain := {
5
+ "pl7.app/list": dbParameters.id
6
+ }
4
7
  if dbParameters.searchParameters.type == "tree_search_top" {
5
8
  return [ {
6
9
  column: "topHit",
@@ -12,9 +15,7 @@ soiResultImportColumns := func(dbParameters) {
12
15
  "pl7.app/label": "Top Hit " + dbParameters.name
13
16
  }
14
17
  },
15
- domain: {
16
- "pl7.app/list": dbParameters.id
17
- }
18
+ domain: commonDomain
18
19
  }, {
19
20
  column: "mutations",
20
21
  id: "mutations",
@@ -25,9 +26,42 @@ soiResultImportColumns := func(dbParameters) {
25
26
  "pl7.app/label": "Number of mutations " + dbParameters.name
26
27
  }
27
28
  },
28
- domain: {
29
- "pl7.app/list": dbParameters.id
30
- }
29
+ domain: commonDomain
30
+ } ]
31
+ } else if dbParameters.searchParameters.type == "preset_alignment_search_top" {
32
+ return [ {
33
+ column: "topHit",
34
+ id: "topHit",
35
+ spec: {
36
+ name: "pl7.app/search/topHit",
37
+ valueType: "String",
38
+ annotations: {
39
+ "pl7.app/label": "Top Hit " + dbParameters.name
40
+ }
41
+ },
42
+ domain: commonDomain
43
+ }, {
44
+ column: "penalty",
45
+ id: "penalty",
46
+ spec: {
47
+ name: "pl7.app/search/alignmentPenalty",
48
+ valueType: "Int",
49
+ annotations: {
50
+ "pl7.app/label": "Alignment penalty " + dbParameters.name
51
+ }
52
+ },
53
+ domain: commonDomain
54
+ }, {
55
+ column: "score",
56
+ id: "score",
57
+ spec: {
58
+ name: "pl7.app/search/alignmentScore",
59
+ valueType: "Int",
60
+ annotations: {
61
+ "pl7.app/label": "Alignment score " + dbParameters.name
62
+ }
63
+ },
64
+ domain: commonDomain
31
65
  } ]
32
66
  } else {
33
67
  ll.panic("Unknown search mode: " + dbParameters.searchParameters.type)
package/src/soi.tpl.tengo CHANGED
@@ -1,4 +1,5 @@
1
1
  self := import("@platforma-sdk/workflow-tengo:tpl")
2
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
3
  assets := import("@platforma-sdk/workflow-tengo:assets")
3
4
  maps := import("@platforma-sdk/workflow-tengo:maps")
4
5
  exec := import("@platforma-sdk/workflow-tengo:exec")
@@ -74,6 +75,58 @@ self.body(func(inputs) {
74
75
 
75
76
  inputTsv := xsv.exportFrame([{spec: modifiedQuerySpec, data: queryData}], "tsv", {})
76
77
 
78
+ actualSearchParameters := db.parameters.searchParameters
79
+ if actualSearchParameters.type == "preset_alignment_search_top" {
80
+ // nVDJRegion: 369
81
+ // nCDR3: 51
82
+ // aaVDJRegion: 641
83
+ // aaCDR3: 93
84
+ if db.parameters.type == "nucleotide" {
85
+ actualSearchParameters = {
86
+ type: "banded_alignment_search_top",
87
+ scoring: {
88
+ match: 1,
89
+ mismatch: -2,
90
+ gap: {
91
+ type: "affine",
92
+ openPenalty: -3,
93
+ extensionPenalty: -1
94
+ }
95
+ },
96
+ band: 9
97
+ }
98
+ if db.parameters.targetFeature == "CDR3" {
99
+ actualSearchParameters.maxPenalty = 1 + int(51 * db.parameters.searchParameters.dissimilarityPercent / 100)
100
+ } else if db.parameters.targetFeature == "VDJRegion" {
101
+ actualSearchParameters.maxPenalty = 1 + int(369 * db.parameters.searchParameters.dissimilarityPercent / 100)
102
+ } else {
103
+ ll.panic("Unknown target feature: " + db.parameters.targetFeature)
104
+ }
105
+ } else if db.parameters.type == "amino-acid" {
106
+ actualSearchParameters = {
107
+ type: "banded_alignment_search_top",
108
+ scoring: {
109
+ matrix: "BLOSUM62",
110
+ gap: {
111
+ type: "affine",
112
+ openPenalty: -10,
113
+ extensionPenalty: -1
114
+ }
115
+ },
116
+ band: 4
117
+ }
118
+ if db.parameters.targetFeature == "CDR3" {
119
+ actualSearchParameters.maxPenalty = 1 + int(93 * db.parameters.searchParameters.dissimilarityPercent / 100)
120
+ } else if db.parameters.targetFeature == "VDJRegion" {
121
+ actualSearchParameters.maxPenalty = 1 + int(641 * db.parameters.searchParameters.dissimilarityPercent / 100)
122
+ } else {
123
+ ll.panic("Unknown target feature: " + db.parameters.targetFeature)
124
+ }
125
+ } else {
126
+ ll.panic("Unknown alphabet: " + db.parameters.type)
127
+ }
128
+ }
129
+
77
130
  searchCmd := exec.builder().
78
131
  printErrStreamToStdout().
79
132
  secret("MI_LICENSE", "MI_LICENSE").
@@ -83,7 +136,7 @@ self.body(func(inputs) {
83
136
  arg("--database").arg("database.tsv").
84
137
  writeFile("database.tsv", dbData).
85
138
  arg("--parameters").arg("params.json").
86
- writeFile("params.json", json.encode(db.parameters.searchParameters)).
139
+ writeFile("params.json", json.encode(actualSearchParameters)).
87
140
  arg("--hits-only").
88
141
  arg("--target-column").arg("query").
89
142
  arg("input.tsv").addFile("input.tsv", inputTsv).