@platforma-open/milaboratories.3d-structure-prediction.workflow 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,17 @@
1
1
   WARN  Issue while reading "/home/runner/work/3d-structure-prediction/3d-structure-prediction/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.3 build /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow
3
+ > @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.5 build /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow
4
4
  > shx rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  info: Skipping unknown file type: wf.test.ts
7
7
  Processing "src/build-pdbs-map.tpl.tengo"...
8
8
  Processing "src/main.tpl.tengo"...
9
9
  Processing "src/predict-batch.tpl.tengo"...
10
+ Processing "src/structure-cols-conv.lib.tengo"...
10
11
  No syntax errors found.
11
12
  info: Skipping unknown file type: wf.test.ts
12
13
  info: Compiling 'dist'...
14
+ info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/lib/structure-cols-conv.lib.tengo
13
15
  info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/build-pdbs-map.plj.gz
14
16
  info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/predict-batch.plj.gz
15
17
  info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/main.plj.gz
package/CHANGELOG.md CHANGED
@@ -1,5 +1,31 @@
1
1
  # @platforma-open/milaboratories.3d-structure-prediction.workflow
2
2
 
3
+ ## 1.0.5
4
+
5
+ ### Patch Changes
6
+
7
+ - 4438d9d: Clarify NanoBodyBuilder2 usage in the settings panel: relabel the light chain dropdown as optional with a tooltip, and expand the mode-info alert to note that NanoBodyBuilder2 is camelid-VHH-trained, so for conventional heavy-only inputs (e.g. human bulk IGH-only) the produced structure has VHH-biased framework geometry.
8
+
9
+ Drop the user-facing CPU and memory inputs (per-batch resources are fixed). Each prediction batch now requests 4 CPU cores and 4 GiB of memory.
10
+
11
+ Stop writing a wall-clock `prediction-date` REMARK into emitted PDBs. The timestamp made every PDB byte-different on every run, breaking the platforma backend's content-addressed caching — downstream nodes that consumed merged PDB ResourceMaps hit `CIDConflictError`. The other provenance REMARKs (immunebuilder version, torch seed, block version, numbering scheme) plus the seeded ensemble fully determine the prediction.
12
+
13
+ Make the saved Python wrapper log byte-stable for identical inputs: drop the per-line UTC timestamp prefix and remove every wall-clock duration printed by `_log` (`predictor ready in Xs`, `predicted in Xs`, `elapsed=Xs`). The exec template saves stdout via `saveStdoutStream()` into the regular file output set, so its content hash flows into the resource CID; timestamped logs would re-introduce the same `CIDConflictError` failure mode as the PDB date.
14
+
15
+ Set `stepCache: 30 * times.minute` on the `processColumn` call so per-batch outputs stay reachable for the dedup/recovery path across project re-renders, matching the convention used by mixcr-clonotyping and miltenyi-tcr-bcr-clonotyping.
16
+
17
+ Add the species selector (spec R44): `human | mouse | camelid | rat | rabbit | other`, default `human`. Species is included in the block subtitle (R56) alongside the engine. The mode-info banner now splits into two cases: when the species is `camelid` and the light chain is unset we treat it as a true VHH input (informational); for any other species combined with heavy-only input we surface a warning that NanoBodyBuilder2's framework geometry is biased away from conventional VH. A separate warning fires for ABodyBuilder2 runs on species outside the training distribution (anything other than human or mouse). Species is held in `BlockData` only — the workflow does not consume it yet, so switching species does not invalidate cached predictions. Upstream clonotyping blocks do not propagate species through PColumn specs today, so the selector is user-supplied; once upstream wires `pl7.app/species` onto the clonotype-axis domain, this block can pre-fill the default.
18
+
19
+ - Updated dependencies [4438d9d]
20
+ - @platforma-open/milaboratories.3d-structure-prediction.software@1.0.5
21
+
22
+ ## 1.0.4
23
+
24
+ ### Patch Changes
25
+
26
+ - Updated dependencies [7a0fedd]
27
+ - @platforma-open/milaboratories.3d-structure-prediction.software@1.0.4
28
+
3
29
  ## 1.0.3
4
30
 
5
31
  ### Patch Changes
@@ -0,0 +1,204 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ confidenceColumn := func(columnId, pName, label, extraAnnotations) {
4
+ annotations := {
5
+ "pl7.app/label": label,
6
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
7
+ }
8
+ for k, v in extraAnnotations { annotations[k] = v }
9
+ return {
10
+ column: columnId,
11
+ id: columnId,
12
+ spec: {
13
+ valueType: "Double",
14
+ name: pName,
15
+ annotations: annotations
16
+ }
17
+ }
18
+ }
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ getConfidenceColumns := func(isPaired) {
35
+ cols := [
36
+ {
37
+ column: "clonotypeLabel",
38
+ id: "clonotypeLabel",
39
+ spec: {
40
+ valueType: "String",
41
+ name: "pl7.app/label",
42
+ annotations: {
43
+ "pl7.app/label": "Clone",
44
+ "pl7.app/table/orderPriority": "100000"
45
+ }
46
+ }
47
+ },
48
+ confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
49
+ "Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
50
+ confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
51
+ confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
52
+ confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
53
+ {
54
+ "pl7.app/isScore": "true",
55
+ "pl7.app/table/orderPriority": "89000",
56
+ "pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
57
+ })
58
+ ]
59
+ if isPaired {
60
+ cols = append(cols,
61
+ confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
62
+ cols = append(cols,
63
+ confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
64
+ cols = append(cols,
65
+ confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
66
+ }
67
+ cols = append(cols, {
68
+ column: "perResidueError",
69
+ id: "perResidueError",
70
+ spec: {
71
+ valueType: "String",
72
+ name: "pl7.app/structure/confidence/perResidue",
73
+ annotations: {
74
+ "pl7.app/label": "Per-residue error (JSON)",
75
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
76
+ "pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
77
+ "pl7.app/table/visibility": "hidden"
78
+ }
79
+ }
80
+ })
81
+ cols = append(cols, {
82
+ column: "cdrh3Length",
83
+ id: "cdrh3Length",
84
+ spec: {
85
+ valueType: "Long",
86
+ name: "pl7.app/structure/cdrh3Length",
87
+ annotations: { "pl7.app/label": "CDRH3 length (aa)" }
88
+ }
89
+ })
90
+ cols = append(cols, {
91
+ column: "failureReasonText",
92
+ id: "failureReasonText",
93
+ spec: {
94
+ valueType: "String",
95
+ name: "pl7.app/structure/failureReason/text",
96
+ annotations: {
97
+ "pl7.app/label": "Failure reason",
98
+ "pl7.app/table/visibility": "optional"
99
+ }
100
+ }
101
+ })
102
+ cols = append(cols, {
103
+ column: "failureReason",
104
+ id: "failureReason",
105
+ spec: {
106
+ valueType: "String",
107
+ name: "pl7.app/structure/failureReason",
108
+ annotations: {
109
+ "pl7.app/label": "Failure reason (code)",
110
+ "pl7.app/table/visibility": "hidden"
111
+ }
112
+ }
113
+ })
114
+ cols = append(cols, {
115
+ column: "warningText",
116
+ id: "warningText",
117
+ spec: {
118
+ valueType: "String",
119
+ name: "pl7.app/structure/warning/text",
120
+ annotations: {
121
+ "pl7.app/label": "Warnings",
122
+ "pl7.app/table/visibility": "optional"
123
+ }
124
+ }
125
+ })
126
+ cols = append(cols, {
127
+ column: "warning",
128
+ id: "warning",
129
+ spec: {
130
+ valueType: "String",
131
+ name: "pl7.app/structure/warning",
132
+ annotations: {
133
+ "pl7.app/label": "Warnings (codes)",
134
+ "pl7.app/table/visibility": "hidden"
135
+ }
136
+ }
137
+ })
138
+ return cols
139
+ }
140
+
141
+
142
+
143
+ getPdbsMapSpec := func() {
144
+ return {
145
+ kind: "PColumn",
146
+ name: "pl7.app/structure/pdb",
147
+ domain: { "pl7.app/structure/numbering": "imgt" },
148
+ valueType: "File",
149
+ axesSpec: [],
150
+ annotations: {
151
+ "pl7.app/label": "Predicted PDB structure",
152
+ "pl7.app/structure/numbering": "imgt"
153
+ }
154
+ }
155
+ }
156
+
157
+
158
+
159
+ getPredictionSuccessfulSettings := func(clonotypeAxisSpec, clonotypeAxisName) {
160
+ return {
161
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
162
+ columns: [{
163
+ column: "predictionSuccessful",
164
+ spec: {
165
+ valueType: "Int",
166
+ name: "pl7.app/structure/predictionSuccessful",
167
+ annotations: {
168
+ "pl7.app/label": "Structure predicted",
169
+ "pl7.app/isSubset": "true",
170
+ "pl7.app/table/visibility": "hidden"
171
+ }
172
+ }
173
+ }],
174
+ storageFormat: "Parquet"
175
+ }
176
+ }
177
+
178
+
179
+
180
+ getConfidentSettings := func(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold) {
181
+ return {
182
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
183
+ columns: [{
184
+ column: "confident",
185
+ spec: {
186
+ valueType: "Int",
187
+ name: "pl7.app/structure/confident",
188
+ annotations: {
189
+ "pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
190
+ "pl7.app/isSubset": "true",
191
+ "pl7.app/table/visibility": "hidden"
192
+ }
193
+ }
194
+ }],
195
+ storageFormat: "Parquet"
196
+ }
197
+ }
198
+
199
+ export ll.toStrict({
200
+ getConfidenceColumns: getConfidenceColumns,
201
+ getPdbsMapSpec: getPdbsMapSpec,
202
+ getPredictionSuccessfulSettings: getPredictionSuccessfulSettings,
203
+ getConfidentSettings: getConfidentSettings
204
+ })
Binary file
Binary file
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.3d-structure-prediction.workflow",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "Block Workflow",
5
5
  "type": "module",
6
6
  "dependencies": {
7
7
  "@platforma-sdk/workflow-tengo": "5.21.0",
8
- "@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.3"
8
+ "@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.5"
9
9
  },
10
10
  "devDependencies": {
11
- "@platforma-sdk/tengo-builder": "2.5.21",
12
- "@platforma-sdk/test": "1.73.0"
11
+ "@platforma-sdk/tengo-builder": "2.5.26",
12
+ "@platforma-sdk/test": "1.75.6"
13
13
  },
14
14
  "peerDependencies": {
15
15
  "vitest": "*"
@@ -7,29 +7,13 @@ pt := import("@platforma-sdk/workflow-tengo:pt")
7
7
  smart := import("@platforma-sdk/workflow-tengo:smart")
8
8
  exec := import("@platforma-sdk/workflow-tengo:exec")
9
9
  units := import("@platforma-sdk/workflow-tengo:units")
10
+ times := import("times")
11
+
12
+ structureColsConv := import(":structure-cols-conv")
10
13
 
11
14
  predictBatchTpl := assets.importTemplate(":predict-batch")
12
15
  immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
13
16
 
14
- CONFIDENCE_METRIC_ANNOTATION := { "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms" }
15
-
16
- confidenceColumn := func(columnId, pName, label, extraAnnotations) {
17
- annotations := {
18
- "pl7.app/label": label,
19
- "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
20
- }
21
- for k, v in extraAnnotations { annotations[k] = v }
22
- return {
23
- column: columnId,
24
- id: columnId,
25
- spec: {
26
- valueType: "Double",
27
- name: pName,
28
- annotations: annotations
29
- }
30
- }
31
- }
32
-
33
17
  wf.prepare(func(args) {
34
18
  // args.dataset is a PrimaryRef { __isPrimaryRef: "v1", column: PlRef, filter?: PlRef }.
35
19
  // `addAnchor` resolves the dataset column (also gives us its spec for the
@@ -74,8 +58,8 @@ wf.body(func(args) {
74
58
  labelEntry := len(labelColumns) > 0 ? labelColumns[0] : undefined
75
59
 
76
60
  seed := is_undefined(args.torchSeed) ? 42 : args.torchSeed
77
- cpu := is_undefined(args.cpu) ? 4 : args.cpu
78
- memGiB := is_undefined(args.mem) ? 16 : args.mem
61
+ cpu := 4
62
+ memGiB := 4
79
63
  confidenceMetric := is_undefined(args.confidenceMetric) ? "cdrh3Mean" : args.confidenceMetric
80
64
  threshold := is_undefined(args.confidenceThresholdAngstroms) ? 2.5 : args.confidenceThresholdAngstroms
81
65
  batchSize := is_undefined(args.batchSize) ? 50 : args.batchSize
@@ -110,120 +94,7 @@ wf.body(func(args) {
110
94
  })
111
95
  }
112
96
 
113
- // Per-batch confidence.tsv → typed PColumns. The orchestrator adds the
114
- // batch-key axis (clonotype) automatically; `batchKeyColumns` names it.
115
- // `clonotypeLabel` is echoed by the python wrapper so the V3 structures
116
- // table substitutes it into the row-axis cells (single-axis pl7.app/label
117
- // PColumn → recognised by PlAgDataTable's `isLabelColumn`).
118
- confidenceXsvColumns := [
119
- {
120
- column: "clonotypeLabel",
121
- id: "clonotypeLabel",
122
- spec: {
123
- valueType: "String",
124
- name: "pl7.app/label",
125
- annotations: {
126
- "pl7.app/label": "Clone",
127
- "pl7.app/table/orderPriority": "100000"
128
- }
129
- }
130
- },
131
- confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
132
- "Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
133
- confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
134
- confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
135
- confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
136
- {
137
- "pl7.app/isScore": "true",
138
- "pl7.app/table/orderPriority": "89000",
139
- "pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
140
- })
141
- ]
142
- if isPaired {
143
- confidenceXsvColumns = append(confidenceXsvColumns,
144
- confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
145
- confidenceXsvColumns = append(confidenceXsvColumns,
146
- confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
147
- confidenceXsvColumns = append(confidenceXsvColumns,
148
- confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
149
- }
150
- confidenceXsvColumns = append(confidenceXsvColumns, {
151
- column: "perResidueError",
152
- id: "perResidueError",
153
- spec: {
154
- valueType: "String",
155
- name: "pl7.app/structure/confidence/perResidue",
156
- annotations: {
157
- "pl7.app/label": "Per-residue error (JSON)",
158
- "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
159
- "pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
160
- "pl7.app/table/visibility": "hidden"
161
- }
162
- }
163
- })
164
- confidenceXsvColumns = append(confidenceXsvColumns, {
165
- column: "cdrh3Length",
166
- id: "cdrh3Length",
167
- spec: {
168
- valueType: "Long",
169
- name: "pl7.app/structure/cdrh3Length",
170
- annotations: { "pl7.app/label": "CDRH3 length (aa)" }
171
- }
172
- })
173
- // failureReason / warning come in two flavours:
174
- // - the *Text columns carry human-readable strings (translated python-side)
175
- // and are surfaced in the table.
176
- // - the code columns retain the raw enum value, hidden by default; they
177
- // stay in the schema so downstream blocks / future failure-stats logic
178
- // can group on a stable identifier.
179
- confidenceXsvColumns = append(confidenceXsvColumns, {
180
- column: "failureReasonText",
181
- id: "failureReasonText",
182
- spec: {
183
- valueType: "String",
184
- name: "pl7.app/structure/failureReason/text",
185
- annotations: {
186
- "pl7.app/label": "Failure reason",
187
- "pl7.app/table/visibility": "optional"
188
- }
189
- }
190
- })
191
- confidenceXsvColumns = append(confidenceXsvColumns, {
192
- column: "failureReason",
193
- id: "failureReason",
194
- spec: {
195
- valueType: "String",
196
- name: "pl7.app/structure/failureReason",
197
- annotations: {
198
- "pl7.app/label": "Failure reason (code)",
199
- "pl7.app/table/visibility": "hidden"
200
- }
201
- }
202
- })
203
- confidenceXsvColumns = append(confidenceXsvColumns, {
204
- column: "warningText",
205
- id: "warningText",
206
- spec: {
207
- valueType: "String",
208
- name: "pl7.app/structure/warning/text",
209
- annotations: {
210
- "pl7.app/label": "Warnings",
211
- "pl7.app/table/visibility": "optional"
212
- }
213
- }
214
- })
215
- confidenceXsvColumns = append(confidenceXsvColumns, {
216
- column: "warning",
217
- id: "warning",
218
- spec: {
219
- valueType: "String",
220
- name: "pl7.app/structure/warning",
221
- annotations: {
222
- "pl7.app/label": "Warnings (codes)",
223
- "pl7.app/table/visibility": "hidden"
224
- }
225
- }
226
- })
97
+ confidenceXsvColumns := structureColsConv.getConfidenceColumns(isPaired)
227
98
 
228
99
  // Pre-download model weights once, before the batch fan-out. ImmuneBuilder
229
100
  // fetches weights on first predictor construction into a shared on-disk
@@ -278,17 +149,7 @@ wf.body(func(args) {
278
149
  {
279
150
  type: "ResourceMap",
280
151
  name: "pdbsMap",
281
- spec: {
282
- kind: "PColumn",
283
- name: "pl7.app/structure/pdb",
284
- domain: { "pl7.app/structure/numbering": "imgt" },
285
- valueType: "File",
286
- axesSpec: [],
287
- annotations: {
288
- "pl7.app/label": "Predicted PDB structure",
289
- "pl7.app/structure/numbering": "imgt"
290
- }
291
- }
152
+ spec: structureColsConv.getPdbsMapSpec()
292
153
  }
293
154
  ],
294
155
  {
@@ -296,14 +157,10 @@ wf.body(func(args) {
296
157
  size: batchSize,
297
158
  keyColumns: [clonotypeAxisName],
298
159
  format: "tsv",
299
- // passContent=false: orchestrator writes per-batch blob files
300
- // (one per slice) and hands each as a file reference to the body.
301
- // passContent=true would ship the entire per-scope joined TSV as
302
- // a single value resource — that's capped at 3MiB and overflows
303
- // at ~22MiB on real datasets.
304
160
  passContent: false
305
161
  },
306
- extra: bodyExtra
162
+ extra: bodyExtra,
163
+ stepCache: 30 * times.minute
307
164
  }
308
165
  )
309
166
 
@@ -344,39 +201,15 @@ wf.body(func(args) {
344
201
 
345
202
  subsetRun := subsetWf.run()
346
203
 
347
- predictionSuccessfulPf := xsv.importFile(subsetRun.getFile("subsets.tsv"), "tsv", {
348
- axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
349
- columns: [{
350
- column: "predictionSuccessful",
351
- spec: {
352
- valueType: "Int",
353
- name: "pl7.app/structure/predictionSuccessful",
354
- annotations: {
355
- "pl7.app/label": "Structure predicted",
356
- "pl7.app/isSubset": "true",
357
- "pl7.app/table/visibility": "hidden"
358
- }
359
- }
360
- }],
361
- storageFormat: "Parquet"
362
- }, { splitDataAndSpec: true })
204
+ predictionSuccessfulPf := xsv.importFile(
205
+ subsetRun.getFile("subsets.tsv"), "tsv",
206
+ structureColsConv.getPredictionSuccessfulSettings(clonotypeAxisSpec, clonotypeAxisName),
207
+ { splitDataAndSpec: true })
363
208
 
364
- confidentPf := xsv.importFile(subsetRun.getFile("confident.tsv"), "tsv", {
365
- axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
366
- columns: [{
367
- column: "confident",
368
- spec: {
369
- valueType: "Int",
370
- name: "pl7.app/structure/confident",
371
- annotations: {
372
- "pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
373
- "pl7.app/isSubset": "true",
374
- "pl7.app/table/visibility": "hidden"
375
- }
376
- }
377
- }],
378
- storageFormat: "Parquet"
379
- }, { splitDataAndSpec: true })
209
+ confidentPf := xsv.importFile(
210
+ subsetRun.getFile("confident.tsv"), "tsv",
211
+ structureColsConv.getConfidentSettings(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold),
212
+ { splitDataAndSpec: true })
380
213
 
381
214
  // Final structures pframe: confidence + subset columns. The label column
382
215
  // (clonotypeLabel) is part of confidence.
@@ -0,0 +1,204 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ confidenceColumn := func(columnId, pName, label, extraAnnotations) {
4
+ annotations := {
5
+ "pl7.app/label": label,
6
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
7
+ }
8
+ for k, v in extraAnnotations { annotations[k] = v }
9
+ return {
10
+ column: columnId,
11
+ id: columnId,
12
+ spec: {
13
+ valueType: "Double",
14
+ name: pName,
15
+ annotations: annotations
16
+ }
17
+ }
18
+ }
19
+
20
+ // Per-batch confidence.tsv → typed PColumns. The processColumn orchestrator
21
+ // adds the batch-key axis (clonotype) automatically; the caller passes
22
+ // `batchKeyColumns: [clonotypeAxisName]` alongside this column list.
23
+ //
24
+ // `clonotypeLabel` is echoed by the python wrapper so the V3 structures
25
+ // table substitutes it into the row-axis cells (single-axis pl7.app/label
26
+ // PColumn → recognised by PlAgDataTable's `isLabelColumn`).
27
+ //
28
+ // failureReason / warning come in two flavours:
29
+ // - the *Text columns carry human-readable strings (translated python-side)
30
+ // and are surfaced in the table.
31
+ // - the code columns retain the raw enum value, hidden by default; they
32
+ // stay in the schema so downstream blocks / future failure-stats logic
33
+ // can group on a stable identifier.
34
+ getConfidenceColumns := func(isPaired) {
35
+ cols := [
36
+ {
37
+ column: "clonotypeLabel",
38
+ id: "clonotypeLabel",
39
+ spec: {
40
+ valueType: "String",
41
+ name: "pl7.app/label",
42
+ annotations: {
43
+ "pl7.app/label": "Clone",
44
+ "pl7.app/table/orderPriority": "100000"
45
+ }
46
+ }
47
+ },
48
+ confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
49
+ "Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
50
+ confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
51
+ confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
52
+ confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
53
+ {
54
+ "pl7.app/isScore": "true",
55
+ "pl7.app/table/orderPriority": "89000",
56
+ "pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
57
+ })
58
+ ]
59
+ if isPaired {
60
+ cols = append(cols,
61
+ confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
62
+ cols = append(cols,
63
+ confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
64
+ cols = append(cols,
65
+ confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
66
+ }
67
+ cols = append(cols, {
68
+ column: "perResidueError",
69
+ id: "perResidueError",
70
+ spec: {
71
+ valueType: "String",
72
+ name: "pl7.app/structure/confidence/perResidue",
73
+ annotations: {
74
+ "pl7.app/label": "Per-residue error (JSON)",
75
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
76
+ "pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
77
+ "pl7.app/table/visibility": "hidden"
78
+ }
79
+ }
80
+ })
81
+ cols = append(cols, {
82
+ column: "cdrh3Length",
83
+ id: "cdrh3Length",
84
+ spec: {
85
+ valueType: "Long",
86
+ name: "pl7.app/structure/cdrh3Length",
87
+ annotations: { "pl7.app/label": "CDRH3 length (aa)" }
88
+ }
89
+ })
90
+ cols = append(cols, {
91
+ column: "failureReasonText",
92
+ id: "failureReasonText",
93
+ spec: {
94
+ valueType: "String",
95
+ name: "pl7.app/structure/failureReason/text",
96
+ annotations: {
97
+ "pl7.app/label": "Failure reason",
98
+ "pl7.app/table/visibility": "optional"
99
+ }
100
+ }
101
+ })
102
+ cols = append(cols, {
103
+ column: "failureReason",
104
+ id: "failureReason",
105
+ spec: {
106
+ valueType: "String",
107
+ name: "pl7.app/structure/failureReason",
108
+ annotations: {
109
+ "pl7.app/label": "Failure reason (code)",
110
+ "pl7.app/table/visibility": "hidden"
111
+ }
112
+ }
113
+ })
114
+ cols = append(cols, {
115
+ column: "warningText",
116
+ id: "warningText",
117
+ spec: {
118
+ valueType: "String",
119
+ name: "pl7.app/structure/warning/text",
120
+ annotations: {
121
+ "pl7.app/label": "Warnings",
122
+ "pl7.app/table/visibility": "optional"
123
+ }
124
+ }
125
+ })
126
+ cols = append(cols, {
127
+ column: "warning",
128
+ id: "warning",
129
+ spec: {
130
+ valueType: "String",
131
+ name: "pl7.app/structure/warning",
132
+ annotations: {
133
+ "pl7.app/label": "Warnings (codes)",
134
+ "pl7.app/table/visibility": "hidden"
135
+ }
136
+ }
137
+ })
138
+ return cols
139
+ }
140
+
141
+ // PDB ResourceMap spec — produced by the batch orchestrator (merged across
142
+ // batches via merge-resource-maps). Failed clonotypes have no entry.
143
+ getPdbsMapSpec := func() {
144
+ return {
145
+ kind: "PColumn",
146
+ name: "pl7.app/structure/pdb",
147
+ domain: { "pl7.app/structure/numbering": "imgt" },
148
+ valueType: "File",
149
+ axesSpec: [],
150
+ annotations: {
151
+ "pl7.app/label": "Predicted PDB structure",
152
+ "pl7.app/structure/numbering": "imgt"
153
+ }
154
+ }
155
+ }
156
+
157
+ // xsv.importFile settings for the predictionSuccessful subset PColumn —
158
+ // 1 = prediction succeeded (failureReason was null), 0 = failed.
159
+ getPredictionSuccessfulSettings := func(clonotypeAxisSpec, clonotypeAxisName) {
160
+ return {
161
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
162
+ columns: [{
163
+ column: "predictionSuccessful",
164
+ spec: {
165
+ valueType: "Int",
166
+ name: "pl7.app/structure/predictionSuccessful",
167
+ annotations: {
168
+ "pl7.app/label": "Structure predicted",
169
+ "pl7.app/isSubset": "true",
170
+ "pl7.app/table/visibility": "hidden"
171
+ }
172
+ }
173
+ }],
174
+ storageFormat: "Parquet"
175
+ }
176
+ }
177
+
178
+ // xsv.importFile settings for the `confident` subset PColumn — 1 when the
179
+ // user-selected confidence metric is ≤ threshold AND prediction succeeded.
180
+ getConfidentSettings := func(clonotypeAxisSpec, clonotypeAxisName, confidenceMetric, threshold) {
181
+ return {
182
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
183
+ columns: [{
184
+ column: "confident",
185
+ spec: {
186
+ valueType: "Int",
187
+ name: "pl7.app/structure/confident",
188
+ annotations: {
189
+ "pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
190
+ "pl7.app/isSubset": "true",
191
+ "pl7.app/table/visibility": "hidden"
192
+ }
193
+ }
194
+ }],
195
+ storageFormat: "Parquet"
196
+ }
197
+ }
198
+
199
+ export ll.toStrict({
200
+ getConfidenceColumns: getConfidenceColumns,
201
+ getPdbsMapSpec: getPdbsMapSpec,
202
+ getPredictionSuccessfulSettings: getPredictionSuccessfulSettings,
203
+ getConfidentSettings: getConfidentSettings
204
+ })