@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # @platforma-open/milaboratories.mixcr-shm-trees.workflow
2
2
 
3
+ ## 2.3.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 1edb8af: - Fix for column name "Number of clones" -> "Number of nodes"
8
+ - Added "Distance to parent" column
9
+ - "Distance to parent" column is now used as distance measure for tree visualization by default
10
+ - Assembling feature (i.e. VDJRegion) is added as individual export column
11
+
12
+ ## 2.3.0
13
+
14
+ ### Minor Changes
15
+
16
+ - 34af50b: Optional dowsampling of clonesets before building SHM trees
17
+
18
+ ### Patch Changes
19
+
20
+ - 61c4b6b: SDK upgrade
21
+
3
22
  ## 2.2.0
4
23
 
5
24
  ### Minor Changes
@@ -101,7 +101,7 @@ shmTreeTableOptions := func(dataDescription) {
101
101
  "name": "pl7.app/vdj/numberOfNodesWithClones",
102
102
  "valueType": "Long",
103
103
  "annotations": {
104
- "pl7.app/label": "Number of clones"
104
+ "pl7.app/label": "Number of nodes"
105
105
  }
106
106
  }
107
107
  })
@@ -306,8 +306,31 @@ shmTreeNodesTableOptions := func(dataDescription) {
306
306
  "spec": {
307
307
  "name": "pl7.app/dendrogram/distance",
308
308
  "valueType": "Double",
309
+ "domain": {
310
+ "pl7.app/dendrogram/distance/from": "germline"
311
+ },
309
312
  "annotations": {
310
313
  "pl7.app/label": "Distanse from germline",
314
+ "pl7.app/dendrogram/distance/from": "germline", // change to domain only, once can be selected in graphmaker
315
+ "pl7.app/dendrogram/isDistance": "true"
316
+ }
317
+ }
318
+ })
319
+
320
+ cmdArgs = append(cmdArgs, "-distance", "parent")
321
+ columns = append(columns, {
322
+ "column": "DistanceFromParent",
323
+ "id": "distance-from-parent",
324
+ "allowNA": true,
325
+ "spec": {
326
+ "name": "pl7.app/dendrogram/distance",
327
+ "valueType": "Double",
328
+ "domain": {
329
+ "pl7.app/dendrogram/distance/from": "parent"
330
+ },
331
+ "annotations": {
332
+ "pl7.app/label": "Distanse from parent",
333
+ "pl7.app/dendrogram/distance/from": "parent", // change to domain only, once can be selected in graphmaker
311
334
  "pl7.app/dendrogram/isDistance": "true"
312
335
  }
313
336
  }
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
9
9
  // we need to form a pColumn with two axes:
10
10
  // axes[0]: donorId
11
11
  // axes[1]: sampleId
12
- // axes[2]: mixcrBlockId
12
+ // axes[2]: mixcrclonotypingBlockId
13
13
  // value: fileRef resource
14
14
 
15
15
  // we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
21
21
  // value: fileRef resource
22
22
 
23
23
  donorColumnSpec := donorColumn.get("spec").getDataAsJson()
24
-
25
- domain := {}
26
- if !is_undefined(donorColumnSpec["domain"]) {
27
- domain = donorColumnSpec["domain"]
28
- }
24
+
25
+ sampleIdAxis := donorColumnSpec.axesSpec[0]
26
+
29
27
  resultSpec := {
28
+ kind: "PColumn",
29
+ name: "mixcr.com/clns",
30
+ valueType: "File",
31
+
30
32
  // annotations and domain could differ between datasets
31
- "axesSpec": [
33
+ axesSpec: [
32
34
  {
33
- "annotations": donorColumnSpec["annotations"],
34
- "domain": domain,
35
- "name": donorColumnSpec["name"],
36
- "type": donorColumnSpec["valueType"]
35
+ name: donorColumnSpec.name,
36
+ type: donorColumnSpec.valueType,
37
+ domain: donorColumnSpec.domain,
38
+ annotations: donorColumnSpec.annotations
37
39
  },
38
- donorColumnSpec["axesSpec"][0],
40
+ sampleIdAxis,
39
41
  {
40
- "annotations": {
42
+ name: "pl7.app/block",
43
+ type: "String",
44
+ annotations: {
41
45
  "pl7.app/label": "Clonotyping block id"
42
- },
43
- "name": "pl7.app/blockId",
44
- "type": "String"
46
+ }
45
47
  }
46
- ],
47
- "kind": "PColumn",
48
- "name": "mixcr.com/clns",
49
- "valueType": "File"
48
+ ]
50
49
  }
51
-
50
+
51
+ // creating sample to donor map
52
52
 
53
53
  sampleToDonor := {}
54
54
 
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
62
62
  dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
- for blockId, dataset in datasets {
65
+ for clonotypingBlockId, dataset in datasets {
66
66
  for sKey, fileRef in dataset.get("data").inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
- dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
69
+ dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
70
70
  }
71
71
  }
72
72
 
Binary file
Binary file
package/package.json CHANGED
@@ -1,20 +1,20 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
3
- "version": "2.2.0",
3
+ "version": "2.3.1",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "//": {
7
7
  "build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
8
8
  },
9
9
  "devDependencies": {
10
- "@platforma-sdk/tengo-builder": "^1.16.1",
11
- "@platforma-sdk/workflow-tengo": "^2.2.0",
12
- "@milaboratories/software-pframes-conv": "^2.0.1",
10
+ "@platforma-sdk/tengo-builder": "^1.17.3",
11
+ "@platforma-sdk/workflow-tengo": "^2.6.0",
12
+ "@milaboratories/software-pframes-conv": "^2.1.2",
13
13
  "@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
14
14
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
15
- "@platforma-sdk/test": "^1.9.0",
16
- "vitest": "^2.1.5",
17
- "typescript": "~5.5.4"
15
+ "@platforma-sdk/test": "^1.17.0",
16
+ "vitest": "^2.1.8",
17
+ "typescript": "~5.6.3"
18
18
  },
19
19
  "scripts": {
20
20
  "build": "rm -rf dist && pl-tengo check && pl-tengo build",
@@ -101,7 +101,7 @@ shmTreeTableOptions := func(dataDescription) {
101
101
  "name": "pl7.app/vdj/numberOfNodesWithClones",
102
102
  "valueType": "Long",
103
103
  "annotations": {
104
- "pl7.app/label": "Number of clones"
104
+ "pl7.app/label": "Number of nodes"
105
105
  }
106
106
  }
107
107
  })
@@ -306,8 +306,31 @@ shmTreeNodesTableOptions := func(dataDescription) {
306
306
  "spec": {
307
307
  "name": "pl7.app/dendrogram/distance",
308
308
  "valueType": "Double",
309
+ "domain": {
310
+ "pl7.app/dendrogram/distance/from": "germline"
311
+ },
309
312
  "annotations": {
310
313
  "pl7.app/label": "Distanse from germline",
314
+ "pl7.app/dendrogram/distance/from": "germline", // change to domain only, once can be selected in graphmaker
315
+ "pl7.app/dendrogram/isDistance": "true"
316
+ }
317
+ }
318
+ })
319
+
320
+ cmdArgs = append(cmdArgs, "-distance", "parent")
321
+ columns = append(columns, {
322
+ "column": "DistanceFromParent",
323
+ "id": "distance-from-parent",
324
+ "allowNA": true,
325
+ "spec": {
326
+ "name": "pl7.app/dendrogram/distance",
327
+ "valueType": "Double",
328
+ "domain": {
329
+ "pl7.app/dendrogram/distance/from": "parent"
330
+ },
331
+ "annotations": {
332
+ "pl7.app/label": "Distanse from parent",
333
+ "pl7.app/dendrogram/distance/from": "parent", // change to domain only, once can be selected in graphmaker
311
334
  "pl7.app/dendrogram/isDistance": "true"
312
335
  }
313
336
  }
@@ -35,7 +35,7 @@ wf.body(func(args) {
35
35
  datasets: datasets,
36
36
  donorColumn: donorColumn,
37
37
  params: {
38
- seed: args.seed
38
+ downsampling: args.downsampling
39
39
  }
40
40
  })
41
41
 
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
9
9
  // we need to form a pColumn with two axes:
10
10
  // axes[0]: donorId
11
11
  // axes[1]: sampleId
12
- // axes[2]: mixcrBlockId
12
+ // axes[2]: mixcrclonotypingBlockId
13
13
  // value: fileRef resource
14
14
 
15
15
  // we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
21
21
  // value: fileRef resource
22
22
 
23
23
  donorColumnSpec := donorColumn.get("spec").getDataAsJson()
24
-
25
- domain := {}
26
- if !is_undefined(donorColumnSpec["domain"]) {
27
- domain = donorColumnSpec["domain"]
28
- }
24
+
25
+ sampleIdAxis := donorColumnSpec.axesSpec[0]
26
+
29
27
  resultSpec := {
28
+ kind: "PColumn",
29
+ name: "mixcr.com/clns",
30
+ valueType: "File",
31
+
30
32
  // annotations and domain could differ between datasets
31
- "axesSpec": [
33
+ axesSpec: [
32
34
  {
33
- "annotations": donorColumnSpec["annotations"],
34
- "domain": domain,
35
- "name": donorColumnSpec["name"],
36
- "type": donorColumnSpec["valueType"]
35
+ name: donorColumnSpec.name,
36
+ type: donorColumnSpec.valueType,
37
+ domain: donorColumnSpec.domain,
38
+ annotations: donorColumnSpec.annotations
37
39
  },
38
- donorColumnSpec["axesSpec"][0],
40
+ sampleIdAxis,
39
41
  {
40
- "annotations": {
42
+ name: "pl7.app/block",
43
+ type: "String",
44
+ annotations: {
41
45
  "pl7.app/label": "Clonotyping block id"
42
- },
43
- "name": "pl7.app/blockId",
44
- "type": "String"
46
+ }
45
47
  }
46
- ],
47
- "kind": "PColumn",
48
- "name": "mixcr.com/clns",
49
- "valueType": "File"
48
+ ]
50
49
  }
51
-
50
+
51
+ // creating sample to donor map
52
52
 
53
53
  sampleToDonor := {}
54
54
 
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
62
62
  dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
- for blockId, dataset in datasets {
65
+ for clonotypingBlockId, dataset in datasets {
66
66
  for sKey, fileRef in dataset.get("data").inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
- dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
69
+ dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
70
70
  }
71
71
  }
72
72
 
@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
2
2
 
3
3
  llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
4
4
  ll := import("@platforma-sdk/workflow-tengo:ll")
5
+ maps := import("@platforma-sdk/workflow-tengo:maps")
5
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
6
7
  xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
7
8
  text := import("text")
@@ -22,35 +23,44 @@ self.awaitState("params", "ResourceReady")
22
23
  self.body(func(inputs) {
23
24
  // overall description of data that we have.
24
25
  dataDescription := {
25
- "hasUmiTags": false,
26
- "hasCellTags": false,
26
+ hasUmiTags: false,
27
+ hasCellTags: false,
27
28
  // will be filled
28
- "coveredFeatures": [],
29
- "cellsAssembled": false
29
+ coveredFeatures: [],
30
+ cellsAssembled: false
30
31
  }
31
32
 
33
+ // clonotypingBlockId -> "bulk" | "sc"
34
+ datasetTypes := {}
35
+
32
36
  assemblingFeature := ""
33
- for _, dataset in inputs.datasets {
37
+ for clonotypingBlockId, dataset in inputs.datasets {
34
38
  presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
35
39
 
40
+ datasetTypes[clonotypingBlockId] = "bulk"
41
+
36
42
  if presetAnnotations["mixcr.com/cellTags"] != "" {
37
- dataDescription["hasCellTags"] = true
43
+ dataDescription.hasCellTags = true
38
44
  }
39
45
  if presetAnnotations["mixcr.com/umiTags"] != "" {
40
- dataDescription["hasUmiTags"] = true
46
+ dataDescription.hasUmiTags = true
41
47
  }
42
48
  if presetAnnotations["mixcr.com/cellsAssembled"] == "true" {
43
- dataDescription["cellsAssembled"] = true
49
+ dataDescription.cellsAssembled = true
50
+ datasetTypes[clonotypingBlockId] = "sc"
44
51
  }
45
- dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
52
+ dataDescription.coveredFeatures = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
46
53
  // check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
47
54
  if (assemblingFeature == "") {
48
- assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
49
- } else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
50
- ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
55
+ assemblingFeature = presetAnnotations["mixcr.com/assemblingFeature"]
56
+ } else if (assemblingFeature != presetAnnotations["mixcr.com/assemblingFeature"]) {
57
+ ll.panic("Assmble features should be the same to process tress. Got " + assemblingFeature + " and " + presetAnnotations["mixcr.com/assemblingFeature"])
51
58
  }
52
59
  }
53
60
 
61
+ // adding assembling feature to the list of covered features
62
+ dataDescription.coveredFeatures = append(dataDescription.coveredFeatures, assemblingFeature)
63
+
54
64
  // there should be call join on pfFrames, but it's not implements, so we will do it by hand
55
65
  dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
56
66
 
@@ -62,7 +72,7 @@ self.body(func(inputs) {
62
72
  // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
63
73
  mixcrResults := llPFrames.aggregate(
64
74
  // files to iterate through
65
- dataGroupedByDonorId["data"],
75
+ dataGroupedByDonorId.data,
66
76
  // columns not to combine - sampleId and mixcrBlockId
67
77
  [1, 2],
68
78
  reconstructShmTreesTpl,
@@ -106,7 +116,10 @@ self.body(func(inputs) {
106
116
  "shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
107
117
  "shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
108
118
  "shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
109
- "globalParams": inputs.params
119
+ "globalParams": maps.merge(
120
+ inputs.params,
121
+ { datasetTypes: datasetTypes }
122
+ )
110
123
  }
111
124
  )
112
125
 
@@ -24,6 +24,13 @@ progressPrefix := "[==PROGRESS==]"
24
24
  self.body(func(inputs) {
25
25
  inputData := inputs[pConstants.VALUE_FIELD_NAME]
26
26
  globalParams := inputs.globalParams
27
+ datasetTypes := globalParams.datasetTypes
28
+ downsampling := globalParams.downsampling
29
+
30
+ ll.print("__THE_LOG__ " + json.encode(datasetTypes))
31
+ ll.print("__THE_LOG__ " + json.encode(downsampling))
32
+
33
+ ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
27
34
 
28
35
  allelesCmdBuilder := exec.builder().
29
36
  printErrStreamToStdout().
@@ -50,8 +57,10 @@ self.body(func(inputs) {
50
57
  // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
51
58
  fileName := sampleId + "___" + clonotypingBlockId + ".clns"
52
59
  toProcess = append(toProcess, {
53
- "fileName": fileName,
54
- "input": inputFile
60
+ clonotypingBlockId: clonotypingBlockId,
61
+ sampleId: sampleId,
62
+ fileName: fileName,
63
+ input: inputFile
55
64
  })
56
65
  }
57
66
 
@@ -63,6 +72,49 @@ self.body(func(inputs) {
63
72
 
64
73
  alleles := allelesCmdBuilder.run()
65
74
 
75
+ for input in toProcess {
76
+ input.alleles = alleles.getFile("alleles/" + input.fileName)
77
+ }
78
+
79
+ if !is_undefined(downsampling) {
80
+ downsamplingParam := ""
81
+ if downsampling.type == "CountReadsFixed" {
82
+ downsamplingParam = "count-reads-fixed-" + string(downsampling.number)
83
+ } else if downsampling.type == "CountMoleculesFixed" {
84
+ downsamplingParam = "count-molecule-fixed-" + string(downsampling.number)
85
+ } else if downsampling.type == "TopClonotypesByReads" {
86
+ downsamplingParam = "top-reads-" + string(downsampling.number)
87
+ } else if downsampling.type == "TopClonotypesByMolecules" {
88
+ downsamplingParam = "top-molecule-" + string(downsampling.number)
89
+ } else if downsampling.type == "CumulativeTopClonotypesByReads" {
90
+ downsamplingParam = "cumtop-reads-" + string(downsampling.percent)
91
+ } else if downsampling.type == "CumulativeTopClonotypesByMolecules" {
92
+ downsamplingParam = "cumtop-molecule-" + string(downsampling.percent)
93
+ } else {
94
+ ll.panic("Unknown downsampling type: " + downsampling.type)
95
+ }
96
+
97
+ ll.print("__THE_LOG__ " + downsamplingParam)
98
+
99
+ for input in toProcess {
100
+ if datasetTypes[input.clonotypingBlockId] == "bulk" {
101
+ downsamplingCmd := exec.builder().
102
+ printErrStreamToStdout().
103
+ secret("MI_LICENSE", "MI_LICENSE").
104
+ env("MI_PROGRESS_PREFIX", progressPrefix).
105
+ software(mixcrSw).
106
+ arg("downsample").
107
+ arg("--downsampling").
108
+ arg(downsamplingParam).
109
+ arg("clones.clns").
110
+ addFile("clones.clns", input.alleles).
111
+ saveFile("clones.downsampled.clns").
112
+ run()
113
+ input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
114
+ }
115
+ }
116
+ }
117
+
66
118
  shmTreesCmdBuilder := exec.builder().
67
119
  printErrStreamToStdout().
68
120
  secret("MI_LICENSE", "MI_LICENSE").
@@ -80,7 +132,7 @@ self.body(func(inputs) {
80
132
 
81
133
  for input in toProcess {
82
134
  shmTreesCmdBuilder.
83
- addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
135
+ addFile(input.fileName, input.alleles).
84
136
  arg(input.fileName)
85
137
  }
86
138