@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.3.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+
2
+ > @platforma-open/milaboratories.mixcr-shm-trees.workflow@3.0.0 build /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow
3
+ > rm -rf dist && pl-tengo check && pl-tengo build
4
+
5
+ Processing "src/export-settings.lib.tengo"...
6
+ Processing "src/main.tpl.tengo"...
7
+ Processing "src/prepare-donor-column.lib.tengo"...
8
+ Processing "src/process.tpl.tengo"...
9
+ Processing "src/reconstruct-shm-trees.tpl.tengo"...
10
+ Processing "src/soi-export.lib.tengo"...
11
+ Processing "src/soi.tpl.tengo"...
12
+ No syntax errors found.
13
+ info: Compiling 'dist'...
14
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/lib/export-settings.lib.tengo
15
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/lib/prepare-donor-column.lib.tengo
16
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/lib/soi-export.lib.tengo
17
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/tpl/reconstruct-shm-trees.plj.gz
18
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/tpl/soi.plj.gz
19
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/tpl/process.plj.gz
20
+ info: - writing /home/runner/work/mixcr-shm-trees/mixcr-shm-trees/workflow/dist/tengo/tpl/main.plj.gz
21
+ info:
22
+ info: Template Pack build done.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.mixcr-shm-trees.workflow
2
2
 
3
+ ## 3.0.0
4
+
5
+ ### Major Changes
6
+
7
+ - 84fec04: Sequence Search Support
8
+
9
+ ## 2.3.4
10
+
11
+ ### Patch Changes
12
+
13
+ - ef7df65: SDK upgrade
14
+
3
15
  ## 2.3.3
4
16
 
5
17
  ### Patch Changes
package/dist/index.cjs CHANGED
@@ -1,5 +1,6 @@
1
1
  module.exports = { Templates: {
2
2
  'reconstruct-shm-trees': { type: 'from-file', path: require.resolve('./tengo/tpl/reconstruct-shm-trees.plj.gz') },
3
+ 'soi': { type: 'from-file', path: require.resolve('./tengo/tpl/soi.plj.gz') },
3
4
  'process': { type: 'from-file', path: require.resolve('./tengo/tpl/process.plj.gz') },
4
5
  'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
5
6
  }};
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
- declare type TplName = "reconstruct-shm-trees" | "process" | "main";
2
+ declare type TplName = "reconstruct-shm-trees" | "soi" | "process" | "main";
3
3
  declare const Templates: Record<TplName, TemplateFromFile>;
4
4
  export { Templates };
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { resolve } from 'node:path';
2
2
  export const Templates = {
3
3
  'reconstruct-shm-trees': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/reconstruct-shm-trees.plj.gz') },
4
+ 'soi': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/soi.plj.gz') },
4
5
  'process': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/process.plj.gz') },
5
6
  'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
6
7
  };
@@ -444,7 +444,7 @@ shmTreeNodesTableOptions := func(dataDescription) {
444
444
  // export data that is unique for clones, but not unique for a node
445
445
  // (different clones could be in the same topology node, for example, different time points)
446
446
  shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn) {
447
- donorColumnSpec := donorColumn.get("spec").getDataAsJson()
447
+ donorColumnSpec := donorColumn.spec
448
448
 
449
449
  axes := []
450
450
  columns := []
@@ -20,7 +20,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
20
20
  // axes[0]: sampleId
21
21
  // value: fileRef resource
22
22
 
23
- donorColumnSpec := donorColumn.get("spec").getDataAsJson()
23
+ donorColumnSpec := donorColumn.spec
24
24
 
25
25
  sampleIdAxis := donorColumnSpec.axesSpec[0]
26
26
 
@@ -53,7 +53,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
53
53
  sampleToDonor := {}
54
54
 
55
55
  // columns with meta could be fetched as data direcctly
56
- for k, v in donorColumn.get("data").getDataAsJson()["data"] {
56
+ for k, v in donorColumn.data.getDataAsJson()["data"] {
57
57
  sampleId := json.decode(k)[0]
58
58
  sampleToDonor[sampleId] = v
59
59
  }
@@ -63,7 +63,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
65
  for clonotypingBlockId, dataset in datasets {
66
- for sKey, fileRef in dataset.get("data").inputs() {
66
+ for sKey, fileRef in dataset.data.inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
69
  dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
@@ -0,0 +1,39 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ soiResultImportColumns := func(dbParameters) {
4
+ if dbParameters.searchParameters.type == "tree_search_top" {
5
+ return [ {
6
+ "column": "topHit",
7
+ "id": "topHit",
8
+ "spec": {
9
+ "name": "pl7.app/search/topHit",
10
+ "valueType": "String",
11
+ "annotations": {
12
+ "pl7.app/label": "Top Hit " + dbParameters.name
13
+ }
14
+ },
15
+ "domain": {
16
+ "pl7.app/list": dbParameters.id
17
+ }
18
+ }, {
19
+ "column": "mutations",
20
+ "id": "mutations",
21
+ "spec": {
22
+ "name": "pl7.app/search/numberOfMutations",
23
+ "valueType": "Int",
24
+ "annotations": {
25
+ "pl7.app/label": "Number of mutations " + dbParameters.name
26
+ }
27
+ },
28
+ "domain": {
29
+ "pl7.app/list": dbParameters.id
30
+ }
31
+ } ]
32
+ } else {
33
+ ll.panic("Unknown search mode: " + dbParameters.searchParameters.type)
34
+ }
35
+ }
36
+
37
+ export ll.toStrict({
38
+ soiResultImportColumns: soiResultImportColumns
39
+ })
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,18 +1,20 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
3
- "version": "2.3.3",
3
+ "version": "3.0.0",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "//": {
7
7
  "build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
8
8
  },
9
9
  "devDependencies": {
10
- "@platforma-sdk/tengo-builder": "^1.17.3",
11
- "@platforma-sdk/workflow-tengo": "^2.6.0",
10
+ "@platforma-sdk/tengo-builder": "^1.17.4",
11
+ "@platforma-sdk/workflow-tengo": "^2.8.2",
12
12
  "@milaboratories/software-pframes-conv": "^2.1.2",
13
- "@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
13
+ "@platforma-open/milaboratories.software-small-binaries": "^1.15.0",
14
14
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
15
- "@platforma-sdk/test": "^1.17.0",
15
+ "@platforma-open/milaboratories.software-mitool": "2.3.1-2-main",
16
+ "@platforma-open/milaboratories.software-paggregate": "^1.0.1",
17
+ "@platforma-sdk/test": "^1.20.9",
16
18
  "vitest": "^2.1.8",
17
19
  "typescript": "~5.6.3"
18
20
  },
@@ -444,7 +444,7 @@ shmTreeNodesTableOptions := func(dataDescription) {
444
444
  // export data that is unique for clones, but not unique for a node
445
445
  // (different clones could be in the same topology node, for example, different time points)
446
446
  shmTreeNodesWithClonesTableOptions := func(dataDescription, donorColumn) {
447
- donorColumnSpec := donorColumn.get("spec").getDataAsJson()
447
+ donorColumnSpec := donorColumn.spec
448
448
 
449
449
  axes := []
450
450
  columns := []
@@ -35,7 +35,8 @@ wf.body(func(args) {
35
35
  datasets: datasets,
36
36
  donorColumn: donorColumn,
37
37
  params: {
38
- downsampling: args.downsampling
38
+ downsampling: args.downsampling,
39
+ sequencesOfInterest: args.sequencesOfInterest
39
40
  }
40
41
  })
41
42
 
@@ -45,6 +46,9 @@ wf.body(func(args) {
45
46
  "treeNodes": results.output("treeNodes"),
46
47
  "treeNodesWithClones": results.output("treeNodesWithClones"),
47
48
 
49
+ "soiNodesResults": results.output("soiNodesResults"),
50
+ "soiTreesResults": results.output("soiTreesResults"),
51
+
48
52
  "tsvs": results.output("tsvs"),
49
53
  "allelesLogs": results.output("allelesLogs"),
50
54
  "treesLogs": results.output("treesLogs"),
@@ -20,7 +20,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
20
20
  // axes[0]: sampleId
21
21
  // value: fileRef resource
22
22
 
23
- donorColumnSpec := donorColumn.get("spec").getDataAsJson()
23
+ donorColumnSpec := donorColumn.spec
24
24
 
25
25
  sampleIdAxis := donorColumnSpec.axesSpec[0]
26
26
 
@@ -53,7 +53,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
53
53
  sampleToDonor := {}
54
54
 
55
55
  // columns with meta could be fetched as data direcctly
56
- for k, v in donorColumn.get("data").getDataAsJson()["data"] {
56
+ for k, v in donorColumn.data.getDataAsJson()["data"] {
57
57
  sampleId := json.decode(k)[0]
58
58
  sampleToDonor[sampleId] = v
59
59
  }
@@ -63,7 +63,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
65
  for clonotypingBlockId, dataset in datasets {
66
- for sKey, fileRef in dataset.get("data").inputs() {
66
+ for sKey, fileRef in dataset.data.inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
69
  dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
@@ -4,6 +4,7 @@ llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
4
4
  ll := import("@platforma-sdk/workflow-tengo:ll")
5
5
  maps := import("@platforma-sdk/workflow-tengo:maps")
6
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
7
+ render := import("@platforma-sdk/workflow-tengo:render")
7
8
  xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
8
9
  text := import("text")
9
10
  exportSettings := import(":export-settings")
@@ -20,6 +21,8 @@ self.awaitState("datasets", { wildcard: "*" }, "spec", "ResourceReady")
20
21
  self.awaitState("donorColumn", "ResourceReady")
21
22
  self.awaitState("params", "ResourceReady")
22
23
 
24
+ soiTpl := assets.importTemplate(":soi")
25
+
23
26
  self.body(func(inputs) {
24
27
  // overall description of data that we have.
25
28
  dataDescription := {
@@ -35,7 +38,9 @@ self.body(func(inputs) {
35
38
 
36
39
  assemblingFeature := ""
37
40
  for clonotypingBlockId, dataset in inputs.datasets {
38
- presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
41
+ presetAnnotations := dataset.spec.annotations
42
+
43
+ ll.assert(!is_undefined(presetAnnotations), "No annotations in dataset specs")
39
44
 
40
45
  datasetTypes[clonotypingBlockId] = "bulk"
41
46
 
@@ -149,6 +154,41 @@ self.body(func(inputs) {
149
154
  additionalArgsForImportTsv
150
155
  )
151
156
 
157
+ // Running SOI search for the data
158
+ soiNodesResults := {}
159
+ soiTreesResults := {}
160
+ for soiDb in inputs.params.sequencesOfInterest {
161
+
162
+ columnId := ""
163
+ if soiDb.parameters.type == "nucleotide" {
164
+ columnId = "n-seq-"
165
+ } else if soiDb.parameters.type == "amino-acid" {
166
+ columnId = "aa-seq-"
167
+ } else {
168
+ ll.panic("unknown alphabet: " + soiDb.parameters.type)
169
+ }
170
+
171
+ if soiDb.parameters.targetFeature == "CDR3" {
172
+ columnId = columnId + "CDR3"
173
+ } else if soiDb.parameters.targetFeature == "VDJRegion" {
174
+ columnId = columnId + "VDJRegion"
175
+ } else {
176
+ ll.panic("unknown target feature: " + soiDb.parameters.targetFeature)
177
+ }
178
+
179
+ querySpec := treeNodes[columnId + ".spec"]
180
+ queryData := treeNodes[columnId + ".data"]
181
+
182
+ soiResult := render.create(soiTpl, {
183
+ querySpec: querySpec,
184
+ queryData: queryData,
185
+ db: soiDb
186
+ })
187
+
188
+ soiNodesResults[soiDb.parameters.id] = soiResult.output("nodesResult")
189
+ soiTreesResults[soiDb.parameters.id] = soiResult.output("treesResult")
190
+ }
191
+
152
192
  tsvs := mixcrResults.output("tsvs")
153
193
 
154
194
  return {
@@ -159,6 +199,9 @@ self.body(func(inputs) {
159
199
  // combine columns into pFrame
160
200
  "treeNodesWithClones": pframes.exportFrame(treeNodesWithClones),
161
201
 
202
+ "soiNodesResults": maps.mapValues(soiNodesResults, pframes.exportFrame),
203
+ "soiTreesResults": maps.mapValues(soiTreesResults, pframes.exportFrame),
204
+
162
205
  "tsvs": tsvs,
163
206
 
164
207
  "allelesLogs": mixcrResults.output("allelesLog"),
@@ -0,0 +1,39 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ soiResultImportColumns := func(dbParameters) {
4
+ if dbParameters.searchParameters.type == "tree_search_top" {
5
+ return [ {
6
+ "column": "topHit",
7
+ "id": "topHit",
8
+ "spec": {
9
+ "name": "pl7.app/search/topHit",
10
+ "valueType": "String",
11
+ "annotations": {
12
+ "pl7.app/label": "Top Hit " + dbParameters.name
13
+ }
14
+ },
15
+ "domain": {
16
+ "pl7.app/list": dbParameters.id
17
+ }
18
+ }, {
19
+ "column": "mutations",
20
+ "id": "mutations",
21
+ "spec": {
22
+ "name": "pl7.app/search/numberOfMutations",
23
+ "valueType": "Int",
24
+ "annotations": {
25
+ "pl7.app/label": "Number of mutations " + dbParameters.name
26
+ }
27
+ },
28
+ "domain": {
29
+ "pl7.app/list": dbParameters.id
30
+ }
31
+ } ]
32
+ } else {
33
+ ll.panic("Unknown search mode: " + dbParameters.searchParameters.type)
34
+ }
35
+ }
36
+
37
+ export ll.toStrict({
38
+ soiResultImportColumns: soiResultImportColumns
39
+ })
@@ -0,0 +1,152 @@
1
+ self := import("@platforma-sdk/workflow-tengo:tpl")
2
+ assets := import("@platforma-sdk/workflow-tengo:assets")
3
+ maps := import("@platforma-sdk/workflow-tengo:maps")
4
+ exec := import("@platforma-sdk/workflow-tengo:exec")
5
+ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
6
+ json := import("json")
7
+ pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
8
+ soiExport := import(":soi-export")
9
+
10
+ self.defineOutputs("nodesResult", "treesResult")
11
+
12
+ // import MiXCR as a software to use
13
+ mitoolSw := assets.importSoftware("@platforma-open/milaboratories.software-mitool:main")
14
+ paggregateSw := assets.importSoftware("@platforma-open/milaboratories.software-paggregate:main")
15
+
16
+ inferPartitionKeyLength := func(data) {
17
+ rType := data.info().Type.Name
18
+ if (rType == pConstants.RTYPE_P_COLUMN_DATA_JSON_PARTITIONED.Name) {
19
+ return data.getDataAsJson().partitionKeyLength
20
+ } else if (rType == pConstants.RTYPE_P_COLUMN_DATA_BINARY_PARTITIONED.Name) {
21
+ return data.getDataAsJson().partitionKeyLength
22
+ } else if (rType == pConstants.RTYPE_P_COLUMN_DATA_JSON_SUPER_PARTITIONED.Name) {
23
+ superPartitionKeyLength := data.getDataAsJson().superPartitionKeyLength
24
+ partitionKeyLength := data.getDataAsJson().partitionKeyLength
25
+ return superPartitionKeyLength + partitionKeyLength
26
+ } else if (rType == pConstants.RTYPE_P_COLUMN_DATA_BINARY_SUPER_PARTITIONED.Name) {
27
+ superPartitionKeyLength := data.getDataAsJson().superPartitionKeyLength
28
+ partitionKeyLength := data.getDataAsJson().partitionKeyLength
29
+ return superPartitionKeyLength + partitionKeyLength
30
+ }
31
+ return 0
32
+ }
33
+
34
+ self.body(func(inputs) {
35
+ querySpec := inputs.querySpec
36
+ queryData := inputs.queryData
37
+ db := inputs.db
38
+
39
+ dbData := "sequence\tname\n"
40
+ for entry in db.sequences {
41
+ dbData = dbData + entry.sequence + "\t" + entry.name + "\n"
42
+ }
43
+
44
+ originalAxesSpecs := querySpec.axesSpec
45
+ modifiedAxesSpecs := []
46
+ importAxesSpec := []
47
+
48
+ aggregationGroupByTargets := []
49
+ aggregationImportAxesSpec := []
50
+
51
+ for idx, spec in originalAxesSpecs {
52
+ colName := "key" + idx
53
+ modifiedAxesSpecs = append(modifiedAxesSpecs, maps.deepMerge(
54
+ spec,
55
+ { annotations: { "pl7.app/label": colName } }
56
+ ))
57
+ importAxesSpec = append(importAxesSpec, {
58
+ "column": colName,
59
+ "spec": spec
60
+ })
61
+
62
+ // aggregating clonal and subtree axes away
63
+ if spec.name != "pl7.app/dendrogram/subtreeId" && spec.name != "pl7.app/dendrogram/nodeId" {
64
+ aggregationGroupByTargets = append(aggregationGroupByTargets, colName)
65
+ aggregationImportAxesSpec = append(aggregationImportAxesSpec, {
66
+ "column": colName,
67
+ "spec": spec
68
+ })
69
+ }
70
+ }
71
+ modifiedQuerySpec := maps.deepMerge(querySpec, {
72
+ axesSpec: modifiedAxesSpecs,
73
+ annotations: {"pl7.app/label": "query"} })
74
+
75
+ inputTsv := xsv.exportFrame([{spec: modifiedQuerySpec, data: queryData}], "tsv", {})
76
+
77
+ searchCmd := exec.builder().
78
+ printErrStreamToStdout().
79
+ secret("MI_LICENSE", "MI_LICENSE").
80
+ software(mitoolSw).
81
+ arg("search").
82
+ arg("--alphabet").arg(db.parameters.type).
83
+ arg("--database").arg("database.tsv").
84
+ writeFile("database.tsv", dbData).
85
+ arg("--parameters").arg("params.json").
86
+ writeFile("params.json", json.encode(db.parameters.searchParameters)).
87
+ arg("--hits-only").
88
+ arg("--target-column").arg("query").
89
+ arg("input.tsv").addFile("input.tsv", inputTsv).
90
+ arg("output.tsv").saveFile("output.tsv").
91
+ run()
92
+
93
+ resultCsv := searchCmd.getFile("output.tsv")
94
+
95
+ resultColumns := soiExport.soiResultImportColumns(db.parameters)
96
+ resultConvParams := {
97
+ "axes": importAxesSpec,
98
+ "columns": resultColumns,
99
+ "storageFormat": "Binary",
100
+ "partitionKeyLength": 0 // inferPartitionKeyLength(queryData)
101
+ }
102
+
103
+ aggregatedConvParams := {
104
+ "axes": aggregationImportAxesSpec,
105
+ "columns": resultColumns,
106
+ "storageFormat": "Binary",
107
+ "partitionKeyLength": 0 // inferPartitionKeyLength(queryData)
108
+ }
109
+
110
+ aggregations := []
111
+ for col in resultColumns {
112
+ aggregations = append(aggregations, {
113
+ type: "first",
114
+ src: col.column,
115
+ dst: col.column
116
+ })
117
+ }
118
+
119
+ aggregationWorkflow := { steps: [ {
120
+ type: "aggregate",
121
+ groupBy: aggregationGroupByTargets,
122
+ aggregations: aggregations
123
+ } ] }
124
+
125
+ aggregateCmd := exec.builder().
126
+ printErrStreamToStdout().
127
+ software(paggregateSw).
128
+ arg("--workflow").arg("wf.json").
129
+ writeFile("wf.json", json.encode(aggregationWorkflow)).
130
+ arg("input.tsv").addFile("input.tsv", resultCsv).
131
+ arg("output.tsv").saveFile("output.tsv").
132
+ run()
133
+
134
+ aggregatedCsv := aggregateCmd.getFile("output.tsv")
135
+
136
+ nodesResult := xsv.importFile(
137
+ resultCsv,
138
+ "tsv",
139
+ resultConvParams
140
+ )
141
+
142
+ treesResult := xsv.importFile(
143
+ aggregatedCsv,
144
+ "tsv",
145
+ aggregatedConvParams
146
+ )
147
+
148
+ return {
149
+ nodesResult: nodesResult,
150
+ treesResult: treesResult
151
+ }
152
+ })