@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # @platforma-open/milaboratories.mixcr-shm-trees.workflow
2
2
 
3
+ ## 2.3.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 34af50b: Optional dowsampling of clonesets before building SHM trees
8
+
9
+ ### Patch Changes
10
+
11
+ - 61c4b6b: SDK upgrade
12
+
3
13
  ## 2.2.0
4
14
 
5
15
  ### Minor Changes
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
9
9
  // we need to form a pColumn with two axes:
10
10
  // axes[0]: donorId
11
11
  // axes[1]: sampleId
12
- // axes[2]: mixcrBlockId
12
+ // axes[2]: mixcrclonotypingBlockId
13
13
  // value: fileRef resource
14
14
 
15
15
  // we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
21
21
  // value: fileRef resource
22
22
 
23
23
  donorColumnSpec := donorColumn.get("spec").getDataAsJson()
24
-
25
- domain := {}
26
- if !is_undefined(donorColumnSpec["domain"]) {
27
- domain = donorColumnSpec["domain"]
28
- }
24
+
25
+ sampleIdAxis := donorColumnSpec.axesSpec[0]
26
+
29
27
  resultSpec := {
28
+ kind: "PColumn",
29
+ name: "mixcr.com/clns",
30
+ valueType: "File",
31
+
30
32
  // annotations and domain could differ between datasets
31
- "axesSpec": [
33
+ axesSpec: [
32
34
  {
33
- "annotations": donorColumnSpec["annotations"],
34
- "domain": domain,
35
- "name": donorColumnSpec["name"],
36
- "type": donorColumnSpec["valueType"]
35
+ name: donorColumnSpec.name,
36
+ type: donorColumnSpec.valueType,
37
+ domain: donorColumnSpec.domain,
38
+ annotations: donorColumnSpec.annotations
37
39
  },
38
- donorColumnSpec["axesSpec"][0],
40
+ sampleIdAxis,
39
41
  {
40
- "annotations": {
42
+ name: "pl7.app/block",
43
+ type: "String",
44
+ annotations: {
41
45
  "pl7.app/label": "Clonotyping block id"
42
- },
43
- "name": "pl7.app/blockId",
44
- "type": "String"
46
+ }
45
47
  }
46
- ],
47
- "kind": "PColumn",
48
- "name": "mixcr.com/clns",
49
- "valueType": "File"
48
+ ]
50
49
  }
51
-
50
+
51
+ // creating sample to donor map
52
52
 
53
53
  sampleToDonor := {}
54
54
 
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
62
62
  dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
- for blockId, dataset in datasets {
65
+ for clonotypingBlockId, dataset in datasets {
66
66
  for sKey, fileRef in dataset.get("data").inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
- dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
69
+ dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
70
70
  }
71
71
  }
72
72
 
Binary file
Binary file
package/package.json CHANGED
@@ -1,20 +1,20 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
3
- "version": "2.2.0",
3
+ "version": "2.3.0",
4
4
  "type": "module",
5
5
  "description": "Tengo-based template",
6
6
  "//": {
7
7
  "build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
8
8
  },
9
9
  "devDependencies": {
10
- "@platforma-sdk/tengo-builder": "^1.16.1",
11
- "@platforma-sdk/workflow-tengo": "^2.2.0",
12
- "@milaboratories/software-pframes-conv": "^2.0.1",
10
+ "@platforma-sdk/tengo-builder": "^1.17.3",
11
+ "@platforma-sdk/workflow-tengo": "^2.6.0",
12
+ "@milaboratories/software-pframes-conv": "^2.1.2",
13
13
  "@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
14
14
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
15
- "@platforma-sdk/test": "^1.9.0",
16
- "vitest": "^2.1.5",
17
- "typescript": "~5.5.4"
15
+ "@platforma-sdk/test": "^1.17.0",
16
+ "vitest": "^2.1.8",
17
+ "typescript": "~5.6.3"
18
18
  },
19
19
  "scripts": {
20
20
  "build": "rm -rf dist && pl-tengo check && pl-tengo build",
@@ -35,7 +35,7 @@ wf.body(func(args) {
35
35
  datasets: datasets,
36
36
  donorColumn: donorColumn,
37
37
  params: {
38
- seed: args.seed
38
+ downsampling: args.downsampling
39
39
  }
40
40
  })
41
41
 
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
9
9
  // we need to form a pColumn with two axes:
10
10
  // axes[0]: donorId
11
11
  // axes[1]: sampleId
12
- // axes[2]: mixcrBlockId
12
+ // axes[2]: mixcrclonotypingBlockId
13
13
  // value: fileRef resource
14
14
 
15
15
  // we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
21
21
  // value: fileRef resource
22
22
 
23
23
  donorColumnSpec := donorColumn.get("spec").getDataAsJson()
24
-
25
- domain := {}
26
- if !is_undefined(donorColumnSpec["domain"]) {
27
- domain = donorColumnSpec["domain"]
28
- }
24
+
25
+ sampleIdAxis := donorColumnSpec.axesSpec[0]
26
+
29
27
  resultSpec := {
28
+ kind: "PColumn",
29
+ name: "mixcr.com/clns",
30
+ valueType: "File",
31
+
30
32
  // annotations and domain could differ between datasets
31
- "axesSpec": [
33
+ axesSpec: [
32
34
  {
33
- "annotations": donorColumnSpec["annotations"],
34
- "domain": domain,
35
- "name": donorColumnSpec["name"],
36
- "type": donorColumnSpec["valueType"]
35
+ name: donorColumnSpec.name,
36
+ type: donorColumnSpec.valueType,
37
+ domain: donorColumnSpec.domain,
38
+ annotations: donorColumnSpec.annotations
37
39
  },
38
- donorColumnSpec["axesSpec"][0],
40
+ sampleIdAxis,
39
41
  {
40
- "annotations": {
42
+ name: "pl7.app/block",
43
+ type: "String",
44
+ annotations: {
41
45
  "pl7.app/label": "Clonotyping block id"
42
- },
43
- "name": "pl7.app/blockId",
44
- "type": "String"
46
+ }
45
47
  }
46
- ],
47
- "kind": "PColumn",
48
- "name": "mixcr.com/clns",
49
- "valueType": "File"
48
+ ]
50
49
  }
51
-
50
+
51
+ // creating sample to donor map
52
52
 
53
53
  sampleToDonor := {}
54
54
 
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
62
62
  dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
63
63
 
64
64
  // collect all the clns files that we have into pColumn
65
- for blockId, dataset in datasets {
65
+ for clonotypingBlockId, dataset in datasets {
66
66
  for sKey, fileRef in dataset.get("data").inputs() {
67
67
  sampleId := json.decode(sKey)[0]
68
68
  donor := sampleToDonor[sampleId]
69
- dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
69
+ dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
70
70
  }
71
71
  }
72
72
 
@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
2
2
 
3
3
  llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
4
4
  ll := import("@platforma-sdk/workflow-tengo:ll")
5
+ maps := import("@platforma-sdk/workflow-tengo:maps")
5
6
  assets := import("@platforma-sdk/workflow-tengo:assets")
6
7
  xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
7
8
  text := import("text")
@@ -22,32 +23,38 @@ self.awaitState("params", "ResourceReady")
22
23
  self.body(func(inputs) {
23
24
  // overall description of data that we have.
24
25
  dataDescription := {
25
- "hasUmiTags": false,
26
- "hasCellTags": false,
26
+ hasUmiTags: false,
27
+ hasCellTags: false,
27
28
  // will be filled
28
- "coveredFeatures": [],
29
- "cellsAssembled": false
29
+ coveredFeatures: [],
30
+ cellsAssembled: false
30
31
  }
31
32
 
33
+ // clonotypingBlockId -> "bulk" | "sc"
34
+ datasetTypes := {}
35
+
32
36
  assemblingFeature := ""
33
- for _, dataset in inputs.datasets {
37
+ for clonotypingBlockId, dataset in inputs.datasets {
34
38
  presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
35
39
 
40
+ datasetTypes[clonotypingBlockId] = "bulk"
41
+
36
42
  if presetAnnotations["mixcr.com/cellTags"] != "" {
37
- dataDescription["hasCellTags"] = true
43
+ dataDescription.hasCellTags = true
38
44
  }
39
45
  if presetAnnotations["mixcr.com/umiTags"] != "" {
40
- dataDescription["hasUmiTags"] = true
46
+ dataDescription.hasUmiTags = true
41
47
  }
42
48
  if presetAnnotations["mixcr.com/cellsAssembled"] == "true" {
43
- dataDescription["cellsAssembled"] = true
49
+ dataDescription.cellsAssembled = true
50
+ datasetTypes[clonotypingBlockId] = "sc"
44
51
  }
45
- dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
52
+ dataDescription.coveredFeatures = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
46
53
  // check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
47
54
  if (assemblingFeature == "") {
48
55
  assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
49
56
  } else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
50
- ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
57
+ ll.panic("Assmble features should be the same to process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
51
58
  }
52
59
  }
53
60
 
@@ -62,7 +69,7 @@ self.body(func(inputs) {
62
69
  // TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
63
70
  mixcrResults := llPFrames.aggregate(
64
71
  // files to iterate through
65
- dataGroupedByDonorId["data"],
72
+ dataGroupedByDonorId.data,
66
73
  // columns not to combine - sampleId and mixcrBlockId
67
74
  [1, 2],
68
75
  reconstructShmTreesTpl,
@@ -106,7 +113,10 @@ self.body(func(inputs) {
106
113
  "shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
107
114
  "shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
108
115
  "shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
109
- "globalParams": inputs.params
116
+ "globalParams": maps.merge(
117
+ inputs.params,
118
+ { datasetTypes: datasetTypes }
119
+ )
110
120
  }
111
121
  )
112
122
 
@@ -24,6 +24,13 @@ progressPrefix := "[==PROGRESS==]"
24
24
  self.body(func(inputs) {
25
25
  inputData := inputs[pConstants.VALUE_FIELD_NAME]
26
26
  globalParams := inputs.globalParams
27
+ datasetTypes := globalParams.datasetTypes
28
+ downsampling := globalParams.downsampling
29
+
30
+ ll.print("__THE_LOG__ " + json.encode(datasetTypes))
31
+ ll.print("__THE_LOG__ " + json.encode(downsampling))
32
+
33
+ ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
27
34
 
28
35
  allelesCmdBuilder := exec.builder().
29
36
  printErrStreamToStdout().
@@ -50,8 +57,10 @@ self.body(func(inputs) {
50
57
  // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
51
58
  fileName := sampleId + "___" + clonotypingBlockId + ".clns"
52
59
  toProcess = append(toProcess, {
53
- "fileName": fileName,
54
- "input": inputFile
60
+ clonotypingBlockId: clonotypingBlockId,
61
+ sampleId: sampleId,
62
+ fileName: fileName,
63
+ input: inputFile
55
64
  })
56
65
  }
57
66
 
@@ -63,6 +72,49 @@ self.body(func(inputs) {
63
72
 
64
73
  alleles := allelesCmdBuilder.run()
65
74
 
75
+ for input in toProcess {
76
+ input.alleles = alleles.getFile("alleles/" + input.fileName)
77
+ }
78
+
79
+ if !is_undefined(downsampling) {
80
+ downsamplingParam := ""
81
+ if downsampling.type == "CountReadsFixed" {
82
+ downsamplingParam = "count-reads-fixed-" + string(downsampling.number)
83
+ } else if downsampling.type == "CountMoleculesFixed" {
84
+ downsamplingParam = "count-molecule-fixed-" + string(downsampling.number)
85
+ } else if downsampling.type == "TopClonotypesByReads" {
86
+ downsamplingParam = "top-reads-" + string(downsampling.number)
87
+ } else if downsampling.type == "TopClonotypesByMolecules" {
88
+ downsamplingParam = "top-molecule-" + string(downsampling.number)
89
+ } else if downsampling.type == "CumulativeTopClonotypesByReads" {
90
+ downsamplingParam = "cumtop-reads-" + string(downsampling.percent)
91
+ } else if downsampling.type == "CumulativeTopClonotypesByMolecules" {
92
+ downsamplingParam = "cumtop-molecule-" + string(downsampling.percent)
93
+ } else {
94
+ ll.panic("Unknown downsampling type: " + downsampling.type)
95
+ }
96
+
97
+ ll.print("__THE_LOG__ " + downsamplingParam)
98
+
99
+ for input in toProcess {
100
+ if datasetTypes[input.clonotypingBlockId] == "bulk" {
101
+ downsamplingCmd := exec.builder().
102
+ printErrStreamToStdout().
103
+ secret("MI_LICENSE", "MI_LICENSE").
104
+ env("MI_PROGRESS_PREFIX", progressPrefix).
105
+ software(mixcrSw).
106
+ arg("downsample").
107
+ arg("--downsampling").
108
+ arg(downsamplingParam).
109
+ arg("clones.clns").
110
+ addFile("clones.clns", input.alleles).
111
+ saveFile("clones.downsampled.clns").
112
+ run()
113
+ input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
114
+ }
115
+ }
116
+ }
117
+
66
118
  shmTreesCmdBuilder := exec.builder().
67
119
  printErrStreamToStdout().
68
120
  secret("MI_LICENSE", "MI_LICENSE").
@@ -80,7 +132,7 @@ self.body(func(inputs) {
80
132
 
81
133
  for input in toProcess {
82
134
  shmTreesCmdBuilder.
83
- addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
135
+ addFile(input.fileName, input.alleles).
84
136
  arg(input.fileName)
85
137
  }
86
138