@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/tengo/lib/prepare-donor-column.lib.tengo +23 -19
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
- package/package.json +8 -8
- package/src/main.tpl.tengo +1 -1
- package/src/prepare-donor-column.lib.tengo +23 -19
- package/src/process.tpl.tengo +22 -12
- package/src/reconstruct-shm-trees.tpl.tengo +55 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-shm-trees.workflow
|
|
2
2
|
|
|
3
|
+
## 2.3.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 34af50b: Optional dowsampling of clonesets before building SHM trees
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- 61c4b6b: SDK upgrade
|
|
12
|
+
|
|
13
|
+
## 2.2.0
|
|
14
|
+
|
|
15
|
+
### Minor Changes
|
|
16
|
+
|
|
17
|
+
- 898579f: Major SDK upgrade, minor UX fixes.
|
|
18
|
+
|
|
3
19
|
## 2.1.1
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
|
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
9
9
|
// we need to form a pColumn with two axes:
|
|
10
10
|
// axes[0]: donorId
|
|
11
11
|
// axes[1]: sampleId
|
|
12
|
-
// axes[2]:
|
|
12
|
+
// axes[2]: mixcrclonotypingBlockId
|
|
13
13
|
// value: fileRef resource
|
|
14
14
|
|
|
15
15
|
// we have:
|
|
@@ -21,30 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
21
21
|
// value: fileRef resource
|
|
22
22
|
|
|
23
23
|
donorColumnSpec := donorColumn.get("spec").getDataAsJson()
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
sampleIdAxis := donorColumnSpec.axesSpec[0]
|
|
26
|
+
|
|
25
27
|
resultSpec := {
|
|
28
|
+
kind: "PColumn",
|
|
29
|
+
name: "mixcr.com/clns",
|
|
30
|
+
valueType: "File",
|
|
31
|
+
|
|
26
32
|
// annotations and domain could differ between datasets
|
|
27
|
-
|
|
33
|
+
axesSpec: [
|
|
28
34
|
{
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
35
|
+
name: donorColumnSpec.name,
|
|
36
|
+
type: donorColumnSpec.valueType,
|
|
37
|
+
domain: donorColumnSpec.domain,
|
|
38
|
+
annotations: donorColumnSpec.annotations
|
|
33
39
|
},
|
|
34
|
-
|
|
40
|
+
sampleIdAxis,
|
|
35
41
|
{
|
|
36
|
-
"
|
|
42
|
+
name: "pl7.app/block",
|
|
43
|
+
type: "String",
|
|
44
|
+
annotations: {
|
|
37
45
|
"pl7.app/label": "Clonotyping block id"
|
|
38
|
-
}
|
|
39
|
-
"name": "pl7.app/blockId",
|
|
40
|
-
"type": "String"
|
|
46
|
+
}
|
|
41
47
|
}
|
|
42
|
-
]
|
|
43
|
-
"kind": "PColumn",
|
|
44
|
-
"name": "mixcr.com/clns",
|
|
45
|
-
"valueType": "File"
|
|
48
|
+
]
|
|
46
49
|
}
|
|
47
|
-
|
|
50
|
+
|
|
51
|
+
// creating sample to donor map
|
|
48
52
|
|
|
49
53
|
sampleToDonor := {}
|
|
50
54
|
|
|
@@ -58,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
58
62
|
dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
|
|
59
63
|
|
|
60
64
|
// collect all the clns files that we have into pColumn
|
|
61
|
-
for
|
|
65
|
+
for clonotypingBlockId, dataset in datasets {
|
|
62
66
|
for sKey, fileRef in dataset.get("data").inputs() {
|
|
63
67
|
sampleId := json.decode(sKey)[0]
|
|
64
68
|
donor := sampleToDonor[sampleId]
|
|
65
|
-
dataBuilder.createInputField(json.encode([donor, sampleId,
|
|
69
|
+
dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
|
|
66
70
|
}
|
|
67
71
|
}
|
|
68
72
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Tengo-based template",
|
|
6
6
|
"//": {
|
|
7
7
|
"build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
|
|
8
8
|
},
|
|
9
9
|
"devDependencies": {
|
|
10
|
-
"@platforma-sdk/tengo-builder": "^1.
|
|
11
|
-
"@platforma-sdk/workflow-tengo": "^
|
|
12
|
-
"@milaboratories/software-pframes-conv": "^1.
|
|
13
|
-
"@platforma-open/milaboratories.software-small-binaries": "^1.14.
|
|
10
|
+
"@platforma-sdk/tengo-builder": "^1.17.3",
|
|
11
|
+
"@platforma-sdk/workflow-tengo": "^2.6.0",
|
|
12
|
+
"@milaboratories/software-pframes-conv": "^2.1.2",
|
|
13
|
+
"@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
|
|
14
14
|
"@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
|
|
15
|
-
"@platforma-sdk/test": "^1.
|
|
16
|
-
"vitest": "^2.1.
|
|
17
|
-
"typescript": "~5.
|
|
15
|
+
"@platforma-sdk/test": "^1.17.0",
|
|
16
|
+
"vitest": "^2.1.8",
|
|
17
|
+
"typescript": "~5.6.3"
|
|
18
18
|
},
|
|
19
19
|
"scripts": {
|
|
20
20
|
"build": "rm -rf dist && pl-tengo check && pl-tengo build",
|
package/src/main.tpl.tengo
CHANGED
|
@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
9
9
|
// we need to form a pColumn with two axes:
|
|
10
10
|
// axes[0]: donorId
|
|
11
11
|
// axes[1]: sampleId
|
|
12
|
-
// axes[2]:
|
|
12
|
+
// axes[2]: mixcrclonotypingBlockId
|
|
13
13
|
// value: fileRef resource
|
|
14
14
|
|
|
15
15
|
// we have:
|
|
@@ -21,30 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
21
21
|
// value: fileRef resource
|
|
22
22
|
|
|
23
23
|
donorColumnSpec := donorColumn.get("spec").getDataAsJson()
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
sampleIdAxis := donorColumnSpec.axesSpec[0]
|
|
26
|
+
|
|
25
27
|
resultSpec := {
|
|
28
|
+
kind: "PColumn",
|
|
29
|
+
name: "mixcr.com/clns",
|
|
30
|
+
valueType: "File",
|
|
31
|
+
|
|
26
32
|
// annotations and domain could differ between datasets
|
|
27
|
-
|
|
33
|
+
axesSpec: [
|
|
28
34
|
{
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
35
|
+
name: donorColumnSpec.name,
|
|
36
|
+
type: donorColumnSpec.valueType,
|
|
37
|
+
domain: donorColumnSpec.domain,
|
|
38
|
+
annotations: donorColumnSpec.annotations
|
|
33
39
|
},
|
|
34
|
-
|
|
40
|
+
sampleIdAxis,
|
|
35
41
|
{
|
|
36
|
-
"
|
|
42
|
+
name: "pl7.app/block",
|
|
43
|
+
type: "String",
|
|
44
|
+
annotations: {
|
|
37
45
|
"pl7.app/label": "Clonotyping block id"
|
|
38
|
-
}
|
|
39
|
-
"name": "pl7.app/blockId",
|
|
40
|
-
"type": "String"
|
|
46
|
+
}
|
|
41
47
|
}
|
|
42
|
-
]
|
|
43
|
-
"kind": "PColumn",
|
|
44
|
-
"name": "mixcr.com/clns",
|
|
45
|
-
"valueType": "File"
|
|
48
|
+
]
|
|
46
49
|
}
|
|
47
|
-
|
|
50
|
+
|
|
51
|
+
// creating sample to donor map
|
|
48
52
|
|
|
49
53
|
sampleToDonor := {}
|
|
50
54
|
|
|
@@ -58,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
|
|
|
58
62
|
dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
|
|
59
63
|
|
|
60
64
|
// collect all the clns files that we have into pColumn
|
|
61
|
-
for
|
|
65
|
+
for clonotypingBlockId, dataset in datasets {
|
|
62
66
|
for sKey, fileRef in dataset.get("data").inputs() {
|
|
63
67
|
sampleId := json.decode(sKey)[0]
|
|
64
68
|
donor := sampleToDonor[sampleId]
|
|
65
|
-
dataBuilder.createInputField(json.encode([donor, sampleId,
|
|
69
|
+
dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
|
|
66
70
|
}
|
|
67
71
|
}
|
|
68
72
|
|
package/src/process.tpl.tengo
CHANGED
|
@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
|
|
|
2
2
|
|
|
3
3
|
llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
|
|
4
4
|
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
5
|
+
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
5
6
|
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
6
7
|
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
7
8
|
text := import("text")
|
|
@@ -22,32 +23,38 @@ self.awaitState("params", "ResourceReady")
|
|
|
22
23
|
self.body(func(inputs) {
|
|
23
24
|
// overall description of data that we have.
|
|
24
25
|
dataDescription := {
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
hasUmiTags: false,
|
|
27
|
+
hasCellTags: false,
|
|
27
28
|
// will be filled
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
coveredFeatures: [],
|
|
30
|
+
cellsAssembled: false
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
// clonotypingBlockId -> "bulk" | "sc"
|
|
34
|
+
datasetTypes := {}
|
|
35
|
+
|
|
32
36
|
assemblingFeature := ""
|
|
33
|
-
for
|
|
37
|
+
for clonotypingBlockId, dataset in inputs.datasets {
|
|
34
38
|
presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
|
|
35
39
|
|
|
40
|
+
datasetTypes[clonotypingBlockId] = "bulk"
|
|
41
|
+
|
|
36
42
|
if presetAnnotations["mixcr.com/cellTags"] != "" {
|
|
37
|
-
dataDescription
|
|
43
|
+
dataDescription.hasCellTags = true
|
|
38
44
|
}
|
|
39
45
|
if presetAnnotations["mixcr.com/umiTags"] != "" {
|
|
40
|
-
dataDescription
|
|
46
|
+
dataDescription.hasUmiTags = true
|
|
41
47
|
}
|
|
42
48
|
if presetAnnotations["mixcr.com/cellsAssembled"] == "true" {
|
|
43
|
-
dataDescription
|
|
49
|
+
dataDescription.cellsAssembled = true
|
|
50
|
+
datasetTypes[clonotypingBlockId] = "sc"
|
|
44
51
|
}
|
|
45
|
-
dataDescription
|
|
52
|
+
dataDescription.coveredFeatures = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
|
|
46
53
|
// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
|
|
47
54
|
if (assemblingFeature == "") {
|
|
48
55
|
assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
|
|
49
56
|
} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
|
|
50
|
-
ll.panic("Assmble features should be the same
|
|
57
|
+
ll.panic("Assmble features should be the same to process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
|
|
51
58
|
}
|
|
52
59
|
}
|
|
53
60
|
|
|
@@ -62,7 +69,7 @@ self.body(func(inputs) {
|
|
|
62
69
|
// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
|
|
63
70
|
mixcrResults := llPFrames.aggregate(
|
|
64
71
|
// files to iterate through
|
|
65
|
-
dataGroupedByDonorId
|
|
72
|
+
dataGroupedByDonorId.data,
|
|
66
73
|
// columns not to combine - sampleId and mixcrBlockId
|
|
67
74
|
[1, 2],
|
|
68
75
|
reconstructShmTreesTpl,
|
|
@@ -106,7 +113,10 @@ self.body(func(inputs) {
|
|
|
106
113
|
"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
|
|
107
114
|
"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
|
|
108
115
|
"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
|
|
109
|
-
"globalParams":
|
|
116
|
+
"globalParams": maps.merge(
|
|
117
|
+
inputs.params,
|
|
118
|
+
{ datasetTypes: datasetTypes }
|
|
119
|
+
)
|
|
110
120
|
}
|
|
111
121
|
)
|
|
112
122
|
|
|
@@ -24,6 +24,13 @@ progressPrefix := "[==PROGRESS==]"
|
|
|
24
24
|
self.body(func(inputs) {
|
|
25
25
|
inputData := inputs[pConstants.VALUE_FIELD_NAME]
|
|
26
26
|
globalParams := inputs.globalParams
|
|
27
|
+
datasetTypes := globalParams.datasetTypes
|
|
28
|
+
downsampling := globalParams.downsampling
|
|
29
|
+
|
|
30
|
+
ll.print("__THE_LOG__ " + json.encode(datasetTypes))
|
|
31
|
+
ll.print("__THE_LOG__ " + json.encode(downsampling))
|
|
32
|
+
|
|
33
|
+
ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
|
|
27
34
|
|
|
28
35
|
allelesCmdBuilder := exec.builder().
|
|
29
36
|
printErrStreamToStdout().
|
|
@@ -50,8 +57,10 @@ self.body(func(inputs) {
|
|
|
50
57
|
// file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
|
|
51
58
|
fileName := sampleId + "___" + clonotypingBlockId + ".clns"
|
|
52
59
|
toProcess = append(toProcess, {
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
clonotypingBlockId: clonotypingBlockId,
|
|
61
|
+
sampleId: sampleId,
|
|
62
|
+
fileName: fileName,
|
|
63
|
+
input: inputFile
|
|
55
64
|
})
|
|
56
65
|
}
|
|
57
66
|
|
|
@@ -63,6 +72,49 @@ self.body(func(inputs) {
|
|
|
63
72
|
|
|
64
73
|
alleles := allelesCmdBuilder.run()
|
|
65
74
|
|
|
75
|
+
for input in toProcess {
|
|
76
|
+
input.alleles = alleles.getFile("alleles/" + input.fileName)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if !is_undefined(downsampling) {
|
|
80
|
+
downsamplingParam := ""
|
|
81
|
+
if downsampling.type == "CountReadsFixed" {
|
|
82
|
+
downsamplingParam = "count-reads-fixed-" + string(downsampling.number)
|
|
83
|
+
} else if downsampling.type == "CountMoleculesFixed" {
|
|
84
|
+
downsamplingParam = "count-molecule-fixed-" + string(downsampling.number)
|
|
85
|
+
} else if downsampling.type == "TopClonotypesByReads" {
|
|
86
|
+
downsamplingParam = "top-reads-" + string(downsampling.number)
|
|
87
|
+
} else if downsampling.type == "TopClonotypesByMolecules" {
|
|
88
|
+
downsamplingParam = "top-molecule-" + string(downsampling.number)
|
|
89
|
+
} else if downsampling.type == "CumulativeTopClonotypesByReads" {
|
|
90
|
+
downsamplingParam = "cumtop-reads-" + string(downsampling.percent)
|
|
91
|
+
} else if downsampling.type == "CumulativeTopClonotypesByMolecules" {
|
|
92
|
+
downsamplingParam = "cumtop-molecule-" + string(downsampling.percent)
|
|
93
|
+
} else {
|
|
94
|
+
ll.panic("Unknown downsampling type: " + downsampling.type)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
ll.print("__THE_LOG__ " + downsamplingParam)
|
|
98
|
+
|
|
99
|
+
for input in toProcess {
|
|
100
|
+
if datasetTypes[input.clonotypingBlockId] == "bulk" {
|
|
101
|
+
downsamplingCmd := exec.builder().
|
|
102
|
+
printErrStreamToStdout().
|
|
103
|
+
secret("MI_LICENSE", "MI_LICENSE").
|
|
104
|
+
env("MI_PROGRESS_PREFIX", progressPrefix).
|
|
105
|
+
software(mixcrSw).
|
|
106
|
+
arg("downsample").
|
|
107
|
+
arg("--downsampling").
|
|
108
|
+
arg(downsamplingParam).
|
|
109
|
+
arg("clones.clns").
|
|
110
|
+
addFile("clones.clns", input.alleles).
|
|
111
|
+
saveFile("clones.downsampled.clns").
|
|
112
|
+
run()
|
|
113
|
+
input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
66
118
|
shmTreesCmdBuilder := exec.builder().
|
|
67
119
|
printErrStreamToStdout().
|
|
68
120
|
secret("MI_LICENSE", "MI_LICENSE").
|
|
@@ -80,7 +132,7 @@ self.body(func(inputs) {
|
|
|
80
132
|
|
|
81
133
|
for input in toProcess {
|
|
82
134
|
shmTreesCmdBuilder.
|
|
83
|
-
addFile(input.fileName,
|
|
135
|
+
addFile(input.fileName, input.alleles).
|
|
84
136
|
arg(input.fileName)
|
|
85
137
|
}
|
|
86
138
|
|