@platforma-open/milaboratories.3d-structure-prediction.workflow 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +26 -0
- package/dist/tengo/lib/structure-cols-conv.lib.tengo +204 -0
- package/dist/tengo/tpl/build-pdbs-map.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/predict-batch.plj.gz +0 -0
- package/package.json +4 -4
- package/src/main.tpl.tengo +17 -184
- package/src/structure-cols-conv.lib.tengo +204 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/3d-structure-prediction/3d-structure-prediction/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.
|
|
3
|
+
> @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.5 build /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow
|
|
4
4
|
> shx rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
info: Skipping unknown file type: wf.test.ts
|
|
7
7
|
Processing "src/build-pdbs-map.tpl.tengo"...
|
|
8
8
|
Processing "src/main.tpl.tengo"...
|
|
9
9
|
Processing "src/predict-batch.tpl.tengo"...
|
|
10
|
+
Processing "src/structure-cols-conv.lib.tengo"...
|
|
10
11
|
No syntax errors found.
|
|
11
12
|
info: Skipping unknown file type: wf.test.ts
|
|
12
13
|
info: Compiling 'dist'...
|
|
14
|
+
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/lib/structure-cols-conv.lib.tengo
|
|
13
15
|
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/build-pdbs-map.plj.gz
|
|
14
16
|
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/predict-batch.plj.gz
|
|
15
17
|
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/main.plj.gz
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.3d-structure-prediction.workflow
|
|
2
2
|
|
|
3
|
+
## 1.0.5
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 4438d9d: Clarify NanoBodyBuilder2 usage in the settings panel: relabel the light chain dropdown as optional with a tooltip, and expand the mode-info alert to note that NanoBodyBuilder2 is camelid-VHH-trained, so for conventional heavy-only inputs (e.g. human bulk IGH-only) the produced structure has VHH-biased framework geometry.
|
|
8
|
+
|
|
9
|
+
Drop the user-facing CPU and memory inputs (per-batch resources are fixed). Each prediction batch now requests 4 CPU cores and 4 GiB of memory.
|
|
10
|
+
|
|
11
|
+
Stop writing a wall-clock `prediction-date` REMARK into emitted PDBs. The timestamp made every PDB byte-different on every run, breaking the platforma backend's content-addressed caching — downstream nodes that consumed merged PDB ResourceMaps hit `CIDConflictError`. The other provenance REMARKs (immunebuilder version, torch seed, block version, numbering scheme) plus the seeded ensemble fully determine the prediction.
|
|
12
|
+
|
|
13
|
+
Make the saved Python wrapper log byte-stable for identical inputs: drop the per-line UTC timestamp prefix and remove every wall-clock duration printed by `_log` (`predictor ready in Xs`, `predicted in Xs`, `elapsed=Xs`). The exec template saves stdout via `saveStdoutStream()` into the regular file output set, so its content hash flows into the resource CID; timestamped logs would re-introduce the same `CIDConflictError` failure mode as the PDB date.
|
|
14
|
+
|
|
15
|
+
Set `stepCache: 30 * times.minute` on the `processColumn` call so per-batch outputs stay reachable for the dedup/recovery path across project re-renders, matching the convention used by mixcr-clonotyping and miltenyi-tcr-bcr-clonotyping.
|
|
16
|
+
|
|
17
|
+
Add the species selector (spec R44): `human | mouse | camelid | rat | rabbit | other`, default `human`. Species is included in the block subtitle (R56) alongside the engine. The mode-info banner now splits into two cases: when the species is `camelid` and the light chain is unset we treat it as a true VHH input (informational); for any other species combined with heavy-only input we surface a warning that NanoBodyBuilder2's framework geometry is biased away from conventional VH. A separate warning fires for ABodyBuilder2 runs on species outside the training distribution (anything other than human or mouse). Species is held in `BlockData` only — the workflow does not consume it yet, so switching species does not invalidate cached predictions. Upstream clonotyping blocks do not propagate species through PColumn specs today, so the selector is user-supplied; once upstream wires `pl7.app/species` onto the clonotype-axis domain, this block can pre-fill the default.
|
|
18
|
+
|
|
19
|
+
- Updated dependencies [4438d9d]
|
|
20
|
+
- @platforma-open/milaboratories.3d-structure-prediction.software@1.0.5
|
|
21
|
+
|
|
22
|
+
## 1.0.4
|
|
23
|
+
|
|
24
|
+
### Patch Changes
|
|
25
|
+
|
|
26
|
+
- Updated dependencies [7a0fedd]
|
|
27
|
+
- @platforma-open/milaboratories.3d-structure-prediction.software@1.0.4
|
|
28
|
+
|
|
3
29
|
## 1.0.3
|
|
4
30
|
|
|
5
31
|
### Patch Changes
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
2
|
+
|
|
3
|
+
confidenceColumn := func(columnId, pName, label, extraAnnotations) {
|
|
4
|
+
annotations := {
|
|
5
|
+
"pl7.app/label": label,
|
|
6
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
|
|
7
|
+
}
|
|
8
|
+
for k, v in extraAnnotations { annotations[k] = v }
|
|
9
|
+
return {
|
|
10
|
+
column: columnId,
|
|
11
|
+
id: columnId,
|
|
12
|
+
spec: {
|
|
13
|
+
valueType: "Double",
|
|
14
|
+
name: pName,
|
|
15
|
+
annotations: annotations
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
getConfidenceColumns := func(isPaired) {
|
|
35
|
+
cols := [
|
|
36
|
+
{
|
|
37
|
+
column: "clonotypeLabel",
|
|
38
|
+
id: "clonotypeLabel",
|
|
39
|
+
spec: {
|
|
40
|
+
valueType: "String",
|
|
41
|
+
name: "pl7.app/label",
|
|
42
|
+
annotations: {
|
|
43
|
+
"pl7.app/label": "Clone",
|
|
44
|
+
"pl7.app/table/orderPriority": "100000"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
|
|
49
|
+
"Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
|
|
50
|
+
confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
|
|
51
|
+
confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
|
|
52
|
+
confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
|
|
53
|
+
{
|
|
54
|
+
"pl7.app/isScore": "true",
|
|
55
|
+
"pl7.app/table/orderPriority": "89000",
|
|
56
|
+
"pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
|
|
57
|
+
})
|
|
58
|
+
]
|
|
59
|
+
if isPaired {
|
|
60
|
+
cols = append(cols,
|
|
61
|
+
confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
|
|
62
|
+
cols = append(cols,
|
|
63
|
+
confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
|
|
64
|
+
cols = append(cols,
|
|
65
|
+
confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
|
|
66
|
+
}
|
|
67
|
+
cols = append(cols, {
|
|
68
|
+
column: "perResidueError",
|
|
69
|
+
id: "perResidueError",
|
|
70
|
+
spec: {
|
|
71
|
+
valueType: "String",
|
|
72
|
+
name: "pl7.app/structure/confidence/perResidue",
|
|
73
|
+
annotations: {
|
|
74
|
+
"pl7.app/label": "Per-residue error (JSON)",
|
|
75
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
|
|
76
|
+
"pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
|
|
77
|
+
"pl7.app/table/visibility": "hidden"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
})
|
|
81
|
+
cols = append(cols, {
|
|
82
|
+
column: "cdrh3Length",
|
|
83
|
+
id: "cdrh3Length",
|
|
84
|
+
spec: {
|
|
85
|
+
valueType: "Long",
|
|
86
|
+
name: "pl7.app/structure/cdrh3Length",
|
|
87
|
+
annotations: { "pl7.app/label": "CDRH3 length (aa)" }
|
|
88
|
+
}
|
|
89
|
+
})
|
|
90
|
+
cols = append(cols, {
|
|
91
|
+
column: "failureReasonText",
|
|
92
|
+
id: "failureReasonText",
|
|
93
|
+
spec: {
|
|
94
|
+
valueType: "String",
|
|
95
|
+
name: "pl7.app/structure/failureReason/text",
|
|
96
|
+
annotations: {
|
|
97
|
+
"pl7.app/label": "Failure reason",
|
|
98
|
+
"pl7.app/table/visibility": "optional"
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
})
|
|
102
|
+
cols = append(cols, {
|
|
103
|
+
column: "failureReason",
|
|
104
|
+
id: "failureReason",
|
|
105
|
+
spec: {
|
|
106
|
+
valueType: "String",
|
|
107
|
+
name: "pl7.app/structure/failureReason",
|
|
108
|
+
annotations: {
|
|
109
|
+
"pl7.app/label": "Failure reason (code)",
|
|
110
|
+
"pl7.app/table/visibility": "hidden"
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
})
|
|
114
|
+
cols = append(cols, {
|
|
115
|
+
column: "warningText",
|
|
116
|
+
id: "warningText",
|
|
117
|
+
spec: {
|
|
118
|
+
valueType: "String",
|
|
119
|
+
name: "pl7.app/structure/warning/text",
|
|
120
|
+
annotations: {
|
|
121
|
+
"pl7.app/label": "Warnings",
|
|
122
|
+
"pl7.app/table/visibility": "optional"
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
cols = append(cols, {
|
|
127
|
+
column: "warning",
|
|
128
|
+
id: "warning",
|
|
129
|
+
spec: {
|
|
130
|
+
valueType: "String",
|
|
131
|
+
name: "pl7.app/structure/warning",
|
|
132
|
+
annotations: {
|
|
133
|
+
"pl7.app/label": "Warnings (codes)",
|
|
134
|
+
"pl7.app/table/visibility": "hidden"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
})
|
|
138
|
+
return cols
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
getPdbsMapSpec := func() {
|
|
144
|
+
return {
|
|
145
|
+
kind: "PColumn",
|
|
146
|
+
name: "pl7.app/structure/pdb",
|
|
147
|
+
domain: { "pl7.app/structure/numbering": "imgt" },
|
|
148
|
+
valueType: "File",
|
|
149
|
+
axesSpec: [],
|
|
150
|
+
annotations: {
|
|
151
|
+
"pl7.app/label": "Predicted PDB structure",
|
|
152
|
+
"pl7.app/structure/numbering": "imgt"
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
getPredictionSuccessfulSettings := func(clonotypeAxisSpec, clonotypeAxisName) {
|
|
160
|
+
return {
|
|
161
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
162
|
+
columns: [{
|
|
163
|
+
column: "predictionSuccessful",
|
|
164
|
+
spec: {
|
|
165
|
+
valueType: "Int",
|
|
166
|
+
name: "pl7.app/structure/predictionSuccessful",
|
|
167
|
+
annotations: {
|
|
168
|
+
"pl7.app/label": "Structure predicted",
|
|
169
|
+
"pl7.app/isSubset": "true",
|
|
170
|
+
"pl7.app/table/visibility": "hidden"
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}],
|
|
174
|
+
storageFormat: "Parquet"
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
getConfidentSettings := func(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold) {
|
|
181
|
+
return {
|
|
182
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
183
|
+
columns: [{
|
|
184
|
+
column: "confident",
|
|
185
|
+
spec: {
|
|
186
|
+
valueType: "Int",
|
|
187
|
+
name: "pl7.app/structure/confident",
|
|
188
|
+
annotations: {
|
|
189
|
+
"pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
|
|
190
|
+
"pl7.app/isSubset": "true",
|
|
191
|
+
"pl7.app/table/visibility": "hidden"
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}],
|
|
195
|
+
storageFormat: "Parquet"
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export ll.toStrict({
|
|
200
|
+
getConfidenceColumns: getConfidenceColumns,
|
|
201
|
+
getPdbsMapSpec: getPdbsMapSpec,
|
|
202
|
+
getPredictionSuccessfulSettings: getPredictionSuccessfulSettings,
|
|
203
|
+
getConfidentSettings: getConfidentSettings
|
|
204
|
+
})
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.3d-structure-prediction.workflow",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "Block Workflow",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@platforma-sdk/workflow-tengo": "5.21.0",
|
|
8
|
-
"@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.
|
|
8
|
+
"@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.5"
|
|
9
9
|
},
|
|
10
10
|
"devDependencies": {
|
|
11
|
-
"@platforma-sdk/tengo-builder": "2.5.
|
|
12
|
-
"@platforma-sdk/test": "1.
|
|
11
|
+
"@platforma-sdk/tengo-builder": "2.5.26",
|
|
12
|
+
"@platforma-sdk/test": "1.75.6"
|
|
13
13
|
},
|
|
14
14
|
"peerDependencies": {
|
|
15
15
|
"vitest": "*"
|
package/src/main.tpl.tengo
CHANGED
|
@@ -7,29 +7,13 @@ pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
|
7
7
|
smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
8
8
|
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
9
9
|
units := import("@platforma-sdk/workflow-tengo:units")
|
|
10
|
+
times := import("times")
|
|
11
|
+
|
|
12
|
+
structureColsConv := import(":structure-cols-conv")
|
|
10
13
|
|
|
11
14
|
predictBatchTpl := assets.importTemplate(":predict-batch")
|
|
12
15
|
immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
|
|
13
16
|
|
|
14
|
-
CONFIDENCE_METRIC_ANNOTATION := { "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms" }
|
|
15
|
-
|
|
16
|
-
confidenceColumn := func(columnId, pName, label, extraAnnotations) {
|
|
17
|
-
annotations := {
|
|
18
|
-
"pl7.app/label": label,
|
|
19
|
-
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
|
|
20
|
-
}
|
|
21
|
-
for k, v in extraAnnotations { annotations[k] = v }
|
|
22
|
-
return {
|
|
23
|
-
column: columnId,
|
|
24
|
-
id: columnId,
|
|
25
|
-
spec: {
|
|
26
|
-
valueType: "Double",
|
|
27
|
-
name: pName,
|
|
28
|
-
annotations: annotations
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
17
|
wf.prepare(func(args) {
|
|
34
18
|
// args.dataset is a PrimaryRef { __isPrimaryRef: "v1", column: PlRef, filter?: PlRef }.
|
|
35
19
|
// `addAnchor` resolves the dataset column (also gives us its spec for the
|
|
@@ -74,8 +58,8 @@ wf.body(func(args) {
|
|
|
74
58
|
labelEntry := len(labelColumns) > 0 ? labelColumns[0] : undefined
|
|
75
59
|
|
|
76
60
|
seed := is_undefined(args.torchSeed) ? 42 : args.torchSeed
|
|
77
|
-
cpu :=
|
|
78
|
-
memGiB :=
|
|
61
|
+
cpu := 4
|
|
62
|
+
memGiB := 4
|
|
79
63
|
confidenceMetric := is_undefined(args.confidenceMetric) ? "cdrh3Mean" : args.confidenceMetric
|
|
80
64
|
threshold := is_undefined(args.confidenceThresholdAngstroms) ? 2.5 : args.confidenceThresholdAngstroms
|
|
81
65
|
batchSize := is_undefined(args.batchSize) ? 50 : args.batchSize
|
|
@@ -110,120 +94,7 @@ wf.body(func(args) {
|
|
|
110
94
|
})
|
|
111
95
|
}
|
|
112
96
|
|
|
113
|
-
|
|
114
|
-
// batch-key axis (clonotype) automatically; `batchKeyColumns` names it.
|
|
115
|
-
// `clonotypeLabel` is echoed by the python wrapper so the V3 structures
|
|
116
|
-
// table substitutes it into the row-axis cells (single-axis pl7.app/label
|
|
117
|
-
// PColumn → recognised by PlAgDataTable's `isLabelColumn`).
|
|
118
|
-
confidenceXsvColumns := [
|
|
119
|
-
{
|
|
120
|
-
column: "clonotypeLabel",
|
|
121
|
-
id: "clonotypeLabel",
|
|
122
|
-
spec: {
|
|
123
|
-
valueType: "String",
|
|
124
|
-
name: "pl7.app/label",
|
|
125
|
-
annotations: {
|
|
126
|
-
"pl7.app/label": "Clone",
|
|
127
|
-
"pl7.app/table/orderPriority": "100000"
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
},
|
|
131
|
-
confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
|
|
132
|
-
"Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
|
|
133
|
-
confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
|
|
134
|
-
confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
|
|
135
|
-
confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
|
|
136
|
-
{
|
|
137
|
-
"pl7.app/isScore": "true",
|
|
138
|
-
"pl7.app/table/orderPriority": "89000",
|
|
139
|
-
"pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
|
|
140
|
-
})
|
|
141
|
-
]
|
|
142
|
-
if isPaired {
|
|
143
|
-
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
144
|
-
confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
|
|
145
|
-
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
146
|
-
confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
|
|
147
|
-
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
148
|
-
confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
|
|
149
|
-
}
|
|
150
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
151
|
-
column: "perResidueError",
|
|
152
|
-
id: "perResidueError",
|
|
153
|
-
spec: {
|
|
154
|
-
valueType: "String",
|
|
155
|
-
name: "pl7.app/structure/confidence/perResidue",
|
|
156
|
-
annotations: {
|
|
157
|
-
"pl7.app/label": "Per-residue error (JSON)",
|
|
158
|
-
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
|
|
159
|
-
"pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
|
|
160
|
-
"pl7.app/table/visibility": "hidden"
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
})
|
|
164
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
165
|
-
column: "cdrh3Length",
|
|
166
|
-
id: "cdrh3Length",
|
|
167
|
-
spec: {
|
|
168
|
-
valueType: "Long",
|
|
169
|
-
name: "pl7.app/structure/cdrh3Length",
|
|
170
|
-
annotations: { "pl7.app/label": "CDRH3 length (aa)" }
|
|
171
|
-
}
|
|
172
|
-
})
|
|
173
|
-
// failureReason / warning come in two flavours:
|
|
174
|
-
// - the *Text columns carry human-readable strings (translated python-side)
|
|
175
|
-
// and are surfaced in the table.
|
|
176
|
-
// - the code columns retain the raw enum value, hidden by default; they
|
|
177
|
-
// stay in the schema so downstream blocks / future failure-stats logic
|
|
178
|
-
// can group on a stable identifier.
|
|
179
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
180
|
-
column: "failureReasonText",
|
|
181
|
-
id: "failureReasonText",
|
|
182
|
-
spec: {
|
|
183
|
-
valueType: "String",
|
|
184
|
-
name: "pl7.app/structure/failureReason/text",
|
|
185
|
-
annotations: {
|
|
186
|
-
"pl7.app/label": "Failure reason",
|
|
187
|
-
"pl7.app/table/visibility": "optional"
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
})
|
|
191
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
192
|
-
column: "failureReason",
|
|
193
|
-
id: "failureReason",
|
|
194
|
-
spec: {
|
|
195
|
-
valueType: "String",
|
|
196
|
-
name: "pl7.app/structure/failureReason",
|
|
197
|
-
annotations: {
|
|
198
|
-
"pl7.app/label": "Failure reason (code)",
|
|
199
|
-
"pl7.app/table/visibility": "hidden"
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
})
|
|
203
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
204
|
-
column: "warningText",
|
|
205
|
-
id: "warningText",
|
|
206
|
-
spec: {
|
|
207
|
-
valueType: "String",
|
|
208
|
-
name: "pl7.app/structure/warning/text",
|
|
209
|
-
annotations: {
|
|
210
|
-
"pl7.app/label": "Warnings",
|
|
211
|
-
"pl7.app/table/visibility": "optional"
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
})
|
|
215
|
-
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
216
|
-
column: "warning",
|
|
217
|
-
id: "warning",
|
|
218
|
-
spec: {
|
|
219
|
-
valueType: "String",
|
|
220
|
-
name: "pl7.app/structure/warning",
|
|
221
|
-
annotations: {
|
|
222
|
-
"pl7.app/label": "Warnings (codes)",
|
|
223
|
-
"pl7.app/table/visibility": "hidden"
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
})
|
|
97
|
+
confidenceXsvColumns := structureColsConv.getConfidenceColumns(isPaired)
|
|
227
98
|
|
|
228
99
|
// Pre-download model weights once, before the batch fan-out. ImmuneBuilder
|
|
229
100
|
// fetches weights on first predictor construction into a shared on-disk
|
|
@@ -278,17 +149,7 @@ wf.body(func(args) {
|
|
|
278
149
|
{
|
|
279
150
|
type: "ResourceMap",
|
|
280
151
|
name: "pdbsMap",
|
|
281
|
-
spec:
|
|
282
|
-
kind: "PColumn",
|
|
283
|
-
name: "pl7.app/structure/pdb",
|
|
284
|
-
domain: { "pl7.app/structure/numbering": "imgt" },
|
|
285
|
-
valueType: "File",
|
|
286
|
-
axesSpec: [],
|
|
287
|
-
annotations: {
|
|
288
|
-
"pl7.app/label": "Predicted PDB structure",
|
|
289
|
-
"pl7.app/structure/numbering": "imgt"
|
|
290
|
-
}
|
|
291
|
-
}
|
|
152
|
+
spec: structureColsConv.getPdbsMapSpec()
|
|
292
153
|
}
|
|
293
154
|
],
|
|
294
155
|
{
|
|
@@ -296,14 +157,10 @@ wf.body(func(args) {
|
|
|
296
157
|
size: batchSize,
|
|
297
158
|
keyColumns: [clonotypeAxisName],
|
|
298
159
|
format: "tsv",
|
|
299
|
-
// passContent=false: orchestrator writes per-batch blob files
|
|
300
|
-
// (one per slice) and hands each as a file reference to the body.
|
|
301
|
-
// passContent=true would ship the entire per-scope joined TSV as
|
|
302
|
-
// a single value resource — that's capped at 3MiB and overflows
|
|
303
|
-
// at ~22MiB on real datasets.
|
|
304
160
|
passContent: false
|
|
305
161
|
},
|
|
306
|
-
extra: bodyExtra
|
|
162
|
+
extra: bodyExtra,
|
|
163
|
+
stepCache: 30 * times.minute
|
|
307
164
|
}
|
|
308
165
|
)
|
|
309
166
|
|
|
@@ -344,39 +201,15 @@ wf.body(func(args) {
|
|
|
344
201
|
|
|
345
202
|
subsetRun := subsetWf.run()
|
|
346
203
|
|
|
347
|
-
predictionSuccessfulPf := xsv.importFile(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
spec: {
|
|
352
|
-
valueType: "Int",
|
|
353
|
-
name: "pl7.app/structure/predictionSuccessful",
|
|
354
|
-
annotations: {
|
|
355
|
-
"pl7.app/label": "Structure predicted",
|
|
356
|
-
"pl7.app/isSubset": "true",
|
|
357
|
-
"pl7.app/table/visibility": "hidden"
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
}],
|
|
361
|
-
storageFormat: "Parquet"
|
|
362
|
-
}, { splitDataAndSpec: true })
|
|
204
|
+
predictionSuccessfulPf := xsv.importFile(
|
|
205
|
+
subsetRun.getFile("subsets.tsv"), "tsv",
|
|
206
|
+
structureColsConv.getPredictionSuccessfulSettings(clonotypeAxisSpec, clonotypeAxisName),
|
|
207
|
+
{ splitDataAndSpec: true })
|
|
363
208
|
|
|
364
|
-
confidentPf := xsv.importFile(
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
spec: {
|
|
369
|
-
valueType: "Int",
|
|
370
|
-
name: "pl7.app/structure/confident",
|
|
371
|
-
annotations: {
|
|
372
|
-
"pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
|
|
373
|
-
"pl7.app/isSubset": "true",
|
|
374
|
-
"pl7.app/table/visibility": "hidden"
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
}],
|
|
378
|
-
storageFormat: "Parquet"
|
|
379
|
-
}, { splitDataAndSpec: true })
|
|
209
|
+
confidentPf := xsv.importFile(
|
|
210
|
+
subsetRun.getFile("confident.tsv"), "tsv",
|
|
211
|
+
structureColsConv.getConfidentSettings(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold),
|
|
212
|
+
{ splitDataAndSpec: true })
|
|
380
213
|
|
|
381
214
|
// Final structures pframe: confidence + subset columns. The label column
|
|
382
215
|
// (clonotypeLabel) is part of confidence.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
2
|
+
|
|
3
|
+
confidenceColumn := func(columnId, pName, label, extraAnnotations) {
|
|
4
|
+
annotations := {
|
|
5
|
+
"pl7.app/label": label,
|
|
6
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
|
|
7
|
+
}
|
|
8
|
+
for k, v in extraAnnotations { annotations[k] = v }
|
|
9
|
+
return {
|
|
10
|
+
column: columnId,
|
|
11
|
+
id: columnId,
|
|
12
|
+
spec: {
|
|
13
|
+
valueType: "Double",
|
|
14
|
+
name: pName,
|
|
15
|
+
annotations: annotations
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Per-batch confidence.tsv → typed PColumns. The processColumn orchestrator
|
|
21
|
+
// adds the batch-key axis (clonotype) automatically; the caller passes
|
|
22
|
+
// `batchKeyColumns: [clonotypeAxisName]` alongside this column list.
|
|
23
|
+
//
|
|
24
|
+
// `clonotypeLabel` is echoed by the python wrapper so the V3 structures
|
|
25
|
+
// table substitutes it into the row-axis cells (single-axis pl7.app/label
|
|
26
|
+
// PColumn → recognised by PlAgDataTable's `isLabelColumn`).
|
|
27
|
+
//
|
|
28
|
+
// failureReason / warning come in two flavours:
|
|
29
|
+
// - the *Text columns carry human-readable strings (translated python-side)
|
|
30
|
+
// and are surfaced in the table.
|
|
31
|
+
// - the code columns retain the raw enum value, hidden by default; they
|
|
32
|
+
// stay in the schema so downstream blocks / future failure-stats logic
|
|
33
|
+
// can group on a stable identifier.
|
|
34
|
+
getConfidenceColumns := func(isPaired) {
|
|
35
|
+
cols := [
|
|
36
|
+
{
|
|
37
|
+
column: "clonotypeLabel",
|
|
38
|
+
id: "clonotypeLabel",
|
|
39
|
+
spec: {
|
|
40
|
+
valueType: "String",
|
|
41
|
+
name: "pl7.app/label",
|
|
42
|
+
annotations: {
|
|
43
|
+
"pl7.app/label": "Clone",
|
|
44
|
+
"pl7.app/table/orderPriority": "100000"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
|
|
49
|
+
"Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
|
|
50
|
+
confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
|
|
51
|
+
confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
|
|
52
|
+
confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
|
|
53
|
+
{
|
|
54
|
+
"pl7.app/isScore": "true",
|
|
55
|
+
"pl7.app/table/orderPriority": "89000",
|
|
56
|
+
"pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
|
|
57
|
+
})
|
|
58
|
+
]
|
|
59
|
+
if isPaired {
|
|
60
|
+
cols = append(cols,
|
|
61
|
+
confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
|
|
62
|
+
cols = append(cols,
|
|
63
|
+
confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
|
|
64
|
+
cols = append(cols,
|
|
65
|
+
confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
|
|
66
|
+
}
|
|
67
|
+
cols = append(cols, {
|
|
68
|
+
column: "perResidueError",
|
|
69
|
+
id: "perResidueError",
|
|
70
|
+
spec: {
|
|
71
|
+
valueType: "String",
|
|
72
|
+
name: "pl7.app/structure/confidence/perResidue",
|
|
73
|
+
annotations: {
|
|
74
|
+
"pl7.app/label": "Per-residue error (JSON)",
|
|
75
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
|
|
76
|
+
"pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
|
|
77
|
+
"pl7.app/table/visibility": "hidden"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
})
|
|
81
|
+
cols = append(cols, {
|
|
82
|
+
column: "cdrh3Length",
|
|
83
|
+
id: "cdrh3Length",
|
|
84
|
+
spec: {
|
|
85
|
+
valueType: "Long",
|
|
86
|
+
name: "pl7.app/structure/cdrh3Length",
|
|
87
|
+
annotations: { "pl7.app/label": "CDRH3 length (aa)" }
|
|
88
|
+
}
|
|
89
|
+
})
|
|
90
|
+
cols = append(cols, {
|
|
91
|
+
column: "failureReasonText",
|
|
92
|
+
id: "failureReasonText",
|
|
93
|
+
spec: {
|
|
94
|
+
valueType: "String",
|
|
95
|
+
name: "pl7.app/structure/failureReason/text",
|
|
96
|
+
annotations: {
|
|
97
|
+
"pl7.app/label": "Failure reason",
|
|
98
|
+
"pl7.app/table/visibility": "optional"
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
})
|
|
102
|
+
cols = append(cols, {
|
|
103
|
+
column: "failureReason",
|
|
104
|
+
id: "failureReason",
|
|
105
|
+
spec: {
|
|
106
|
+
valueType: "String",
|
|
107
|
+
name: "pl7.app/structure/failureReason",
|
|
108
|
+
annotations: {
|
|
109
|
+
"pl7.app/label": "Failure reason (code)",
|
|
110
|
+
"pl7.app/table/visibility": "hidden"
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
})
|
|
114
|
+
cols = append(cols, {
|
|
115
|
+
column: "warningText",
|
|
116
|
+
id: "warningText",
|
|
117
|
+
spec: {
|
|
118
|
+
valueType: "String",
|
|
119
|
+
name: "pl7.app/structure/warning/text",
|
|
120
|
+
annotations: {
|
|
121
|
+
"pl7.app/label": "Warnings",
|
|
122
|
+
"pl7.app/table/visibility": "optional"
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
cols = append(cols, {
|
|
127
|
+
column: "warning",
|
|
128
|
+
id: "warning",
|
|
129
|
+
spec: {
|
|
130
|
+
valueType: "String",
|
|
131
|
+
name: "pl7.app/structure/warning",
|
|
132
|
+
annotations: {
|
|
133
|
+
"pl7.app/label": "Warnings (codes)",
|
|
134
|
+
"pl7.app/table/visibility": "hidden"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
})
|
|
138
|
+
return cols
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// PDB ResourceMap spec — produced by the batch orchestrator (merged across
|
|
142
|
+
// batches via merge-resource-maps). Failed clonotypes have no entry.
|
|
143
|
+
getPdbsMapSpec := func() {
|
|
144
|
+
return {
|
|
145
|
+
kind: "PColumn",
|
|
146
|
+
name: "pl7.app/structure/pdb",
|
|
147
|
+
domain: { "pl7.app/structure/numbering": "imgt" },
|
|
148
|
+
valueType: "File",
|
|
149
|
+
axesSpec: [],
|
|
150
|
+
annotations: {
|
|
151
|
+
"pl7.app/label": "Predicted PDB structure",
|
|
152
|
+
"pl7.app/structure/numbering": "imgt"
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// xsv.importFile settings for the predictionSuccessful subset PColumn —
|
|
158
|
+
// 1 = prediction succeeded (failureReason was null), 0 = failed.
|
|
159
|
+
getPredictionSuccessfulSettings := func(clonotypeAxisSpec, clonotypeAxisName) {
|
|
160
|
+
return {
|
|
161
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
162
|
+
columns: [{
|
|
163
|
+
column: "predictionSuccessful",
|
|
164
|
+
spec: {
|
|
165
|
+
valueType: "Int",
|
|
166
|
+
name: "pl7.app/structure/predictionSuccessful",
|
|
167
|
+
annotations: {
|
|
168
|
+
"pl7.app/label": "Structure predicted",
|
|
169
|
+
"pl7.app/isSubset": "true",
|
|
170
|
+
"pl7.app/table/visibility": "hidden"
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}],
|
|
174
|
+
storageFormat: "Parquet"
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// xsv.importFile settings for the `confident` subset PColumn — 1 when the
|
|
179
|
+
// user-selected confidence metric is ≤ threshold AND prediction succeeded.
|
|
180
|
+
getConfidentSettings := func(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold) {
|
|
181
|
+
return {
|
|
182
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
183
|
+
columns: [{
|
|
184
|
+
column: "confident",
|
|
185
|
+
spec: {
|
|
186
|
+
valueType: "Int",
|
|
187
|
+
name: "pl7.app/structure/confident",
|
|
188
|
+
annotations: {
|
|
189
|
+
"pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
|
|
190
|
+
"pl7.app/isSubset": "true",
|
|
191
|
+
"pl7.app/table/visibility": "hidden"
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}],
|
|
195
|
+
storageFormat: "Parquet"
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export ll.toStrict({
|
|
200
|
+
getConfidenceColumns: getConfidenceColumns,
|
|
201
|
+
getPdbsMapSpec: getPdbsMapSpec,
|
|
202
|
+
getPredictionSuccessfulSettings: getPredictionSuccessfulSettings,
|
|
203
|
+
getConfidentSettings: getConfidentSettings
|
|
204
|
+
})
|