@platforma-open/milaboratories.3d-structure-prediction.workflow 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+  WARN  Issue while reading "/home/runner/work/3d-structure-prediction/3d-structure-prediction/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
+
3
+ > @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.0 build /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow
4
+ > shx rm -rf dist && pl-tengo check && pl-tengo build
5
+
6
+ info: Skipping unknown file type: wf.test.ts
7
+ Processing "src/build-pdbs-map.tpl.tengo"...
8
+ Processing "src/main.tpl.tengo"...
9
+ Processing "src/predict-batch.tpl.tengo"...
10
+ No syntax errors found.
11
+ info: Skipping unknown file type: wf.test.ts
12
+ info: Compiling 'dist'...
13
+ info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/build-pdbs-map.plj.gz
14
+ info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/predict-batch.plj.gz
15
+ info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/main.plj.gz
16
+ info: Template Pack build done.
17
+ info: Template Pack build done.
package/dist/index.cjs ADDED
@@ -0,0 +1,5 @@
1
+ module.exports = { Templates: {
2
+ 'build-pdbs-map': { type: 'from-file', path: require.resolve('./tengo/tpl/build-pdbs-map.plj.gz') },
3
+ 'predict-batch': { type: 'from-file', path: require.resolve('./tengo/tpl/predict-batch.plj.gz') },
4
+ 'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
5
+ }};
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "build-pdbs-map" | "predict-batch" | "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/dist/index.js ADDED
@@ -0,0 +1,6 @@
1
+ import { resolve } from 'node:path';
2
+ export const Templates = {
3
+ 'build-pdbs-map': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/build-pdbs-map.plj.gz') },
4
+ 'predict-batch': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/predict-batch.plj.gz') },
5
+ 'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
6
+ };
Binary file
package/format.el ADDED
@@ -0,0 +1,43 @@
1
+ ;; This program formats all files inside src directory. Usage: emacs --script ./format.el
2
+
3
+ (defun install-go-mode ()
4
+ "Installs go-mode"
5
+ (require 'package)
6
+ (add-to-list 'package-archives
7
+ '("melpa-stable" . "https://stable.melpa.org/packages/"))
8
+ (package-initialize)
9
+ (unless package-archive-contents
10
+ (package-refresh-contents))
11
+
12
+ (package-install 'go-mode t)
13
+ (require 'go-mode))
14
+
15
+ ;; spaces -> tabs only at the beginning of lines
16
+ (setq tabify-regexp "^\t* [ \t]+")
17
+
18
+ (defun format-file (file)
19
+ "Formats a file according to slightly changed Go rules"
20
+ (message "Format %s" file)
21
+ (save-excursion
22
+ (find-file file)
23
+ (delete-trailing-whitespace) ;; deletes whitespaces
24
+ (go-mode) ;; sets golang rules for indentation
25
+ (tabify (point-min) (point-max)) ;; spaces -> tabs in the whole file
26
+ (indent-region (point-min) (point-max)) ;; indentation in the whole file
27
+ (save-buffer))) ;; save file
28
+
29
+ (install-go-mode)
30
+
31
+ ;; change syntax of a standard go-mode a bit
32
+ (advice-add
33
+ 'go--in-composite-literal-p
34
+ :filter-return
35
+ (lambda (&rest r) t))
36
+
37
+ ;; find all files in src
38
+ (setq files (directory-files-recursively "src" "\\.tengo\\'"))
39
+
40
+ ;; call format on every file.
41
+ (dolist (file files)
42
+ (format-file file))
43
+
package/index.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/index.js ADDED
@@ -0,0 +1,3 @@
1
+ module.exports = { Templates: {
2
+ 'main': { type: 'from-file', path: require.resolve('./dist/tengo/tpl/main.plj.gz') }
3
+ }}
package/package.json ADDED
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "@platforma-open/milaboratories.3d-structure-prediction.workflow",
3
+ "version": "1.0.0",
4
+ "description": "Block Workflow",
5
+ "type": "module",
6
+ "dependencies": {
7
+ "@platforma-sdk/workflow-tengo": "5.21.0",
8
+ "@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.0"
9
+ },
10
+ "devDependencies": {
11
+ "@platforma-sdk/tengo-builder": "2.5.21",
12
+ "@platforma-sdk/test": "1.73.0"
13
+ },
14
+ "peerDependencies": {
15
+ "vitest": "*"
16
+ },
17
+ "scripts": {
18
+ "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
19
+ "test": "vitest",
20
+ "format": "/usr/bin/env emacs --script ./format.el"
21
+ }
22
+ }
@@ -0,0 +1,43 @@
1
+ // Workdir processor template: builds a File-typed ResourceMap PColumn
2
+ // keyed by clonotypeKey from the post-prediction workdir.
3
+ //
4
+ // Reads `manifest.tsv` (written by run_immunebuilder.py — header
5
+ // `clonotypeKey\tpdb_filename`, one row per successful prediction),
6
+ // then saves each referenced PDB file from `pdbs/` and adds it to the
7
+ // ResourceMap under its clonotypeKey. Only successful rows appear; failed
8
+ // rows have no manifest entry and therefore no ResourceMap entry.
9
+
10
+ self := import("@platforma-sdk/workflow-tengo:workdir.proc")
11
+
12
+ text := import("text")
13
+ path := import("@platforma-sdk/workflow-tengo:path")
14
+ pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
15
+
16
+ self.readFiles(func(inputs) {
17
+ return { manifest: "manifest.tsv" }
18
+ })
19
+
20
+ self.body(func(inputs) {
21
+ manifestRaw := string(inputs.manifest.getData())
22
+
23
+ data := pcolumn.resourceMapBuilder(/* keyLength */ 1)
24
+
25
+ lines := text.split(text.trim_space(manifestRaw), "\n")
26
+ // Skip header; iterate data rows. trim_space on each line + each field
27
+ // strips stray CR introduced when csv writes manifest with CRLF endings.
28
+ if len(lines) > 1 {
29
+ for i := 1; i < len(lines); i++ {
30
+ line := text.trim_space(lines[i])
31
+ if line == "" { continue }
32
+ fields := text.split(line, "\t")
33
+ if len(fields) < 2 { continue }
34
+ clonotypeKey := text.trim_space(fields[0])
35
+ pdbFilename := text.trim_space(fields[1])
36
+ if pdbFilename == "" { continue }
37
+ pdbFile := self.saveFile(path.join("pdbs", pdbFilename))
38
+ data.add([clonotypeKey], pdbFile)
39
+ }
40
+ }
41
+
42
+ return data.build()
43
+ })
@@ -0,0 +1,414 @@
1
+ wf := import("@platforma-sdk/workflow-tengo:workflow")
2
+ assets := import("@platforma-sdk/workflow-tengo:assets")
3
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
4
+ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
5
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
6
+ pt := import("@platforma-sdk/workflow-tengo:pt")
7
+ smart := import("@platforma-sdk/workflow-tengo:smart")
8
+ exec := import("@platforma-sdk/workflow-tengo:exec")
9
+ units := import("@platforma-sdk/workflow-tengo:units")
10
+
11
+ predictBatchTpl := assets.importTemplate(":predict-batch")
12
+ immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
13
+
14
+ CONFIDENCE_METRIC_ANNOTATION := { "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms" }
15
+
16
+ confidenceColumn := func(columnId, pName, label, extraAnnotations) {
17
+ annotations := {
18
+ "pl7.app/label": label,
19
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
20
+ }
21
+ for k, v in extraAnnotations { annotations[k] = v }
22
+ return {
23
+ column: columnId,
24
+ id: columnId,
25
+ spec: {
26
+ valueType: "Double",
27
+ name: pName,
28
+ annotations: annotations
29
+ }
30
+ }
31
+ }
32
+
33
+ wf.prepare(func(args) {
34
+ // args.dataset is a PrimaryRef { __isPrimaryRef: "v1", column: PlRef, filter?: PlRef }.
35
+ // `addAnchor` resolves the dataset column (also gives us its spec for the
36
+ // row-axis lookup); `addRef` is the no-anchor form for the optional filter
37
+ // PlRef so the bundle resolves both spec and data — needed when we wrap
38
+ // heavy chain as a ResolvedPrimaryRef for processColumn batch.
39
+ bb := wf.createPBundleBuilder()
40
+ bb.ignoreMissingDomains()
41
+ bb.addAnchor("main", args.dataset.column)
42
+ bb.addSingle(args.heavyChainRef)
43
+ if args.mode == "ABodyBuilder2" && !is_undefined(args.lightChainRef) {
44
+ bb.addSingle(args.lightChainRef)
45
+ }
46
+ if !is_undefined(args.dataset.filter) {
47
+ bb.addRef(args.dataset.filter)
48
+ }
49
+ // Clonotype-axis label column — passed as a secondary `columns` entry to
50
+ // processColumn so the python wrapper sees `clonotypeLabel` per row and
51
+ // echoes it into confidence.tsv (used as the row-axis label substitution
52
+ // in the V3 structures table).
53
+ bb.addMulti(
54
+ {
55
+ axes: [{ anchor: "main", idx: 1 }],
56
+ name: "pl7.app/label"
57
+ },
58
+ "clonotypeKeyLabels")
59
+ return { columns: bb.build() }
60
+ })
61
+
62
+ wf.body(func(args) {
63
+ columns := args.columns
64
+ datasetCol := args.dataset.column
65
+ datasetSpec := columns.getSpec(datasetCol)
66
+ clonotypeAxisSpec := datasetSpec.axesSpec[1]
67
+ clonotypeAxisName := clonotypeAxisSpec.name
68
+
69
+ heavy := columns.getColumn(args.heavyChainRef)
70
+ isPaired := args.mode == "ABodyBuilder2"
71
+ light := undefined
72
+ if isPaired { light = columns.getColumn(args.lightChainRef) }
73
+ labelColumns := columns.getColumns("clonotypeKeyLabels")
74
+ labelEntry := len(labelColumns) > 0 ? labelColumns[0] : undefined
75
+
76
+ seed := is_undefined(args.torchSeed) ? 42 : args.torchSeed
77
+ cpu := is_undefined(args.cpu) ? 4 : args.cpu
78
+ memGiB := is_undefined(args.mem) ? 16 : args.mem
79
+ confidenceMetric := is_undefined(args.confidenceMetric) ? "cdrh3Mean" : args.confidenceMetric
80
+ threshold := is_undefined(args.confidenceThresholdAngstroms) ? 2.5 : args.confidenceThresholdAngstroms
81
+ batchSize := is_undefined(args.batchSize) ? 50 : args.batchSize
82
+
83
+ blockId := wf.blockId().getDataAsJson()
84
+
85
+ // Heavy chain is the keyspace primary. When the dataset PrimaryRef carries
86
+ // a filter, wrap heavy as a ResolvedPrimaryRef so the batch orchestrator
87
+ // inner-joins the filter on each batch's clonotype keys.
88
+ primarySrc := { spec: heavy.spec, data: heavy.data }
89
+ if !is_undefined(args.dataset.filter) {
90
+ filterCol := columns.getColumn(args.dataset.filter)
91
+ primarySrc = {
92
+ __isPrimaryRef: "v1",
93
+ column: { spec: heavy.spec, data: heavy.data },
94
+ filter: { spec: filterCol.spec, data: filterCol.data }
95
+ }
96
+ }
97
+
98
+ // Secondary entries — outer-joined onto each batch's keyspace.
99
+ secondaryEntries := []
100
+ if isPaired {
101
+ secondaryEntries = append(secondaryEntries, {
102
+ src: { spec: light.spec, data: light.data },
103
+ header: "lightChain"
104
+ })
105
+ }
106
+ if !is_undefined(labelEntry) {
107
+ secondaryEntries = append(secondaryEntries, {
108
+ src: { spec: labelEntry.spec, data: labelEntry.data },
109
+ header: "clonotypeLabel"
110
+ })
111
+ }
112
+
113
+ // Per-batch confidence.tsv → typed PColumns. The orchestrator adds the
114
+ // batch-key axis (clonotype) automatically; `batchKeyColumns` names it.
115
+ // `clonotypeLabel` is echoed by the python wrapper so the V3 structures
116
+ // table substitutes it into the row-axis cells (single-axis pl7.app/label
117
+ // PColumn → recognised by PlAgDataTable's `isLabelColumn`).
118
+ confidenceXsvColumns := [
119
+ {
120
+ column: "clonotypeLabel",
121
+ id: "clonotypeLabel",
122
+ spec: {
123
+ valueType: "String",
124
+ name: "pl7.app/label",
125
+ annotations: {
126
+ "pl7.app/label": "Clone",
127
+ "pl7.app/table/orderPriority": "100000"
128
+ }
129
+ }
130
+ },
131
+ confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
132
+ "Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
133
+ confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
134
+ confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
135
+ confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
136
+ {
137
+ "pl7.app/isScore": "true",
138
+ "pl7.app/table/orderPriority": "89000",
139
+ "pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
140
+ })
141
+ ]
142
+ if isPaired {
143
+ confidenceXsvColumns = append(confidenceXsvColumns,
144
+ confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
145
+ confidenceXsvColumns = append(confidenceXsvColumns,
146
+ confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
147
+ confidenceXsvColumns = append(confidenceXsvColumns,
148
+ confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
149
+ }
150
+ confidenceXsvColumns = append(confidenceXsvColumns, {
151
+ column: "perResidueError",
152
+ id: "perResidueError",
153
+ spec: {
154
+ valueType: "String",
155
+ name: "pl7.app/structure/confidence/perResidue",
156
+ annotations: {
157
+ "pl7.app/label": "Per-residue error (JSON)",
158
+ "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
159
+ "pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
160
+ "pl7.app/table/visibility": "hidden"
161
+ }
162
+ }
163
+ })
164
+ confidenceXsvColumns = append(confidenceXsvColumns, {
165
+ column: "cdrh3Length",
166
+ id: "cdrh3Length",
167
+ spec: {
168
+ valueType: "Long",
169
+ name: "pl7.app/structure/cdrh3Length",
170
+ annotations: { "pl7.app/label": "CDRH3 length (aa)" }
171
+ }
172
+ })
173
+ // failureReason / warning come in two flavours:
174
+ // - the *Text columns carry human-readable strings (translated python-side)
175
+ // and are surfaced in the table.
176
+ // - the code columns retain the raw enum value, hidden by default; they
177
+ // stay in the schema so downstream blocks / future failure-stats logic
178
+ // can group on a stable identifier.
179
+ confidenceXsvColumns = append(confidenceXsvColumns, {
180
+ column: "failureReasonText",
181
+ id: "failureReasonText",
182
+ spec: {
183
+ valueType: "String",
184
+ name: "pl7.app/structure/failureReason/text",
185
+ annotations: {
186
+ "pl7.app/label": "Failure reason",
187
+ "pl7.app/table/visibility": "optional"
188
+ }
189
+ }
190
+ })
191
+ confidenceXsvColumns = append(confidenceXsvColumns, {
192
+ column: "failureReason",
193
+ id: "failureReason",
194
+ spec: {
195
+ valueType: "String",
196
+ name: "pl7.app/structure/failureReason",
197
+ annotations: {
198
+ "pl7.app/label": "Failure reason (code)",
199
+ "pl7.app/table/visibility": "hidden"
200
+ }
201
+ }
202
+ })
203
+ confidenceXsvColumns = append(confidenceXsvColumns, {
204
+ column: "warningText",
205
+ id: "warningText",
206
+ spec: {
207
+ valueType: "String",
208
+ name: "pl7.app/structure/warning/text",
209
+ annotations: {
210
+ "pl7.app/label": "Warnings",
211
+ "pl7.app/table/visibility": "optional"
212
+ }
213
+ }
214
+ })
215
+ confidenceXsvColumns = append(confidenceXsvColumns, {
216
+ column: "warning",
217
+ id: "warning",
218
+ spec: {
219
+ valueType: "String",
220
+ name: "pl7.app/structure/warning",
221
+ annotations: {
222
+ "pl7.app/label": "Warnings (codes)",
223
+ "pl7.app/table/visibility": "hidden"
224
+ }
225
+ }
226
+ })
227
+
228
+ // Pre-download model weights once, before the batch fan-out. ImmuneBuilder
229
+ // fetches weights on first predictor construction into a shared on-disk
230
+ // cache; without this step, N parallel batch containers race to write the
231
+ // same files and produce partial/corrupt caches that fail with no retry.
232
+ // The sentinel file is forwarded as a body input — each batch awaits it
233
+ // (via addFile in predict-batch.tpl.tengo) before its own exec runs, so
234
+ // every batch starts against an already-warm cache.
235
+ warmupRun := exec.builder().
236
+ software(immuneBuilderSw).
237
+ cpu(1).mem(2 * units.GiB).
238
+ env("BLOCK_VERSION", blockId).
239
+ arg("--warmup").
240
+ arg("--mode").arg(args.mode).
241
+ arg("--sentinel").arg("warmup.done").
242
+ saveFile("warmup.done").
243
+ printErrStreamToStdout().
244
+ saveStdoutStream().
245
+ run()
246
+ warmupSentinel := warmupRun.getFile("warmup.done")
247
+
248
+ // Per-batch scalar settings forwarded to predict-batch as `inputs.<key>`.
249
+ bodyExtra := {
250
+ mode: smart.createJsonResource(args.mode),
251
+ seed: smart.createJsonResource(seed),
252
+ cpu: smart.createJsonResource(cpu),
253
+ memGiB: smart.createJsonResource(memGiB),
254
+ confidenceMetric: smart.createJsonResource(confidenceMetric),
255
+ threshold: smart.createJsonResource(threshold),
256
+ blockId: smart.createJsonResource(blockId),
257
+ warmupSentinel: warmupSentinel
258
+ }
259
+
260
+ processResult := pframes.processColumn(
261
+ {
262
+ primary: { src: primarySrc, header: "heavyChain" },
263
+ columns: secondaryEntries,
264
+ primaryJoin: "inner"
265
+ },
266
+ predictBatchTpl,
267
+ [
268
+ {
269
+ type: "Xsv",
270
+ name: "confidence",
271
+ xsvType: "tsv",
272
+ settings: {
273
+ batchKeyColumns: [clonotypeAxisName],
274
+ columns: confidenceXsvColumns,
275
+ storageFormat: "Parquet"
276
+ }
277
+ },
278
+ {
279
+ type: "ResourceMap",
280
+ name: "pdbsMap",
281
+ spec: {
282
+ kind: "PColumn",
283
+ name: "pl7.app/structure/pdb",
284
+ domain: { "pl7.app/structure/numbering": "imgt" },
285
+ valueType: "File",
286
+ axesSpec: [],
287
+ annotations: {
288
+ "pl7.app/label": "Predicted PDB structure",
289
+ "pl7.app/structure/numbering": "imgt"
290
+ }
291
+ }
292
+ }
293
+ ],
294
+ {
295
+ batch: {
296
+ size: batchSize,
297
+ keyColumns: [clonotypeAxisName],
298
+ format: "tsv",
299
+ // passContent=false: orchestrator writes per-batch blob files
300
+ // (one per slice) and hands each as a file reference to the body.
301
+ // passContent=true would ship the entire per-scope joined TSV as
302
+ // a single value resource — that's capped at 3MiB and overflows
303
+ // at ~22MiB on real datasets.
304
+ passContent: false
305
+ },
306
+ extra: bodyExtra
307
+ }
308
+ )
309
+
310
+ // Trace stamp for every confidence + subset column.
311
+ trace := pSpec.makeTrace(datasetSpec, {
312
+ type: "milaboratories.3d-structure-prediction",
313
+ id: blockId,
314
+ importance: 20,
315
+ label: "3D Structure Prediction"
316
+ })
317
+
318
+ // Confidence pframe — one PColumn per Xsv column declared above.
319
+ confidencePfb := pframes.pFrameBuilder()
320
+ for col in confidenceXsvColumns {
321
+ out := processResult.output("confidence", col.id)
322
+ confidencePfb.add(col.id, trace.inject(out.spec), out.data)
323
+ }
324
+ confidencePfHandle := confidencePfb.build()
325
+
326
+ // Subset columns (predictionSuccessful + confident) computed via pt over
327
+ // the assembled confidence pframe. pt addresses columns by xsv id, not by
328
+ // spec.name.
329
+ metricColumn := "cdrh3Error"
330
+ if confidenceMetric == "overallMean" {
331
+ metricColumn = "meanError"
332
+ }
333
+
334
+ subsetWf := pt.workflow().mem("2GiB").cpu(1)
335
+ df := subsetWf.frame(pt.p.full(confidencePfHandle))
336
+
337
+ successExpr := pt.col("failureReason").isNull().cast("Int").alias("predictionSuccessful")
338
+ successDf := df.select(pt.axis(clonotypeAxisName), successExpr)
339
+ successDf.save("subsets.tsv")
340
+
341
+ confidentExpr := pt.col("failureReason").isNull().and(pt.col(metricColumn).le(threshold)).fillNull(false).cast("Int").alias("confident")
342
+ confidentDf := df.select(pt.axis(clonotypeAxisName), confidentExpr)
343
+ confidentDf.save("confident.tsv")
344
+
345
+ subsetRun := subsetWf.run()
346
+
347
+ predictionSuccessfulPf := xsv.importFile(subsetRun.getFile("subsets.tsv"), "tsv", {
348
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
349
+ columns: [{
350
+ column: "predictionSuccessful",
351
+ spec: {
352
+ valueType: "Int",
353
+ name: "pl7.app/structure/predictionSuccessful",
354
+ annotations: {
355
+ "pl7.app/label": "Structure predicted",
356
+ "pl7.app/isSubset": "true",
357
+ "pl7.app/table/visibility": "hidden"
358
+ }
359
+ }
360
+ }],
361
+ storageFormat: "Parquet"
362
+ }, { splitDataAndSpec: true })
363
+
364
+ confidentPf := xsv.importFile(subsetRun.getFile("confident.tsv"), "tsv", {
365
+ axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
366
+ columns: [{
367
+ column: "confident",
368
+ spec: {
369
+ valueType: "Int",
370
+ name: "pl7.app/structure/confident",
371
+ annotations: {
372
+ "pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
373
+ "pl7.app/isSubset": "true",
374
+ "pl7.app/table/visibility": "hidden"
375
+ }
376
+ }
377
+ }],
378
+ storageFormat: "Parquet"
379
+ }, { splitDataAndSpec: true })
380
+
381
+ // Final structures pframe: confidence + subset columns. The label column
382
+ // (clonotypeLabel) is part of confidence.
383
+ finalPfb := pframes.pFrameBuilder()
384
+ for col in confidenceXsvColumns {
385
+ out := processResult.output("confidence", col.id)
386
+ finalPfb.add(col.id, trace.inject(out.spec), out.data)
387
+ }
388
+ for k, v in predictionSuccessfulPf {
389
+ finalPfb.add("subset/" + k, trace.inject(v.spec), v.data)
390
+ }
391
+ for k, v in confidentPf {
392
+ finalPfb.add("subset/" + k, trace.inject(v.spec), v.data)
393
+ }
394
+ finalPf := finalPfb.build()
395
+
396
+ // PDB ResourceMap pframe — produced directly by the batch orchestrator
397
+ // (merged across batches via merge-resource-maps). Failed clonotypes have
398
+ // no entry.
399
+ pdbsOut := processResult.output("pdbsMap")
400
+ pdbsPfb := pframes.pFrameBuilder()
401
+ pdbsPfb.add("pdb", trace.inject(pdbsOut.spec), pdbsOut.data)
402
+ pdbsPf := pdbsPfb.build()
403
+
404
+ return {
405
+ outputs: {
406
+ structuresTable: pframes.exportFrame(finalPf),
407
+ pdbsMap: pframes.exportFrame(pdbsPf)
408
+ },
409
+ exports: {
410
+ structures: finalPf,
411
+ pdbs: pdbsPf
412
+ }
413
+ }
414
+ })
@@ -0,0 +1,73 @@
1
+ // predict-batch — body template for `pframes.processColumn` batch mode.
2
+ // Receives one batch's TSV as a Blob file reference at `__value__`
3
+ // (passContent=false), runs the ImmuneBuilder Python wrapper on it, and
4
+ // returns:
5
+ // - "confidence" — confidence.tsv file ref (Xsv-imported by the orchestrator)
6
+ // - "pdbsMap" — ResourceMap of per-clonotype PDB files (built via the
7
+ // same workdir processor used by the legacy single-batch
8
+ // flow)
9
+ //
10
+ // Aggregate `summary.json` is intentionally NOT emitted — in batch mode each
11
+ // batch has its own summary; aggregation across batches is a separate concern
12
+ // the model can compute from the confidence PFrame post-hoc.
13
+
14
+ self := import("@platforma-sdk/workflow-tengo:tpl")
15
+ ll := import("@platforma-sdk/workflow-tengo:ll")
16
+ smart := import("@platforma-sdk/workflow-tengo:smart")
17
+ exec := import("@platforma-sdk/workflow-tengo:exec")
18
+ assets := import("@platforma-sdk/workflow-tengo:assets")
19
+ pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
20
+ units := import("@platforma-sdk/workflow-tengo:units")
21
+
22
+ immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
23
+ buildPdbsMapTpl := assets.importTemplate(":build-pdbs-map")
24
+
25
+ self.defineOutputs(
26
+ "confidence",
27
+ "pdbsMap"
28
+ )
29
+
30
+ self.body(func(inputs) {
31
+ batchFile := inputs[pConstants.VALUE_FIELD_NAME]
32
+ if !smart.isReference(batchFile) {
33
+ ll.panic("predict-batch: expected __value__ to be a file reference (passContent=false), got: %v", batchFile)
34
+ }
35
+
36
+ mode := inputs.mode
37
+ seed := inputs.seed
38
+ cpu := inputs.cpu
39
+ memGiB := inputs.memGiB
40
+ confidenceMetric := inputs.confidenceMetric
41
+ threshold := inputs.threshold
42
+ blockId := inputs.blockId
43
+ // Sentinel produced by the workflow's pre-warmup step. We don't consume
44
+ // the file content — adding it as a workdir input is what wires this batch
45
+ // to await the warmup resource, so every batch only starts after the
46
+ // shared model cache is populated.
47
+ warmupSentinel := inputs.warmupSentinel
48
+
49
+ predictRun := exec.builder().
50
+ software(immuneBuilderSw).
51
+ cpu(cpu).mem(memGiB * units.GiB).
52
+ env("BLOCK_VERSION", blockId).
53
+ addFile("warmup.done", warmupSentinel).
54
+ addFile("batch.tsv", batchFile).
55
+ arg("--mode").arg(mode).
56
+ arg("--input").arg("batch.tsv").
57
+ arg("--output-dir").arg("pdbs").
58
+ arg("--manifest").arg("manifest.tsv").
59
+ arg("--confidence").arg("confidence.tsv").
60
+ arg("--metric").arg(confidenceMetric).
61
+ arg("--threshold").arg(string(threshold)).
62
+ arg("--seed").arg(string(seed)).
63
+ saveFile("confidence.tsv").
64
+ processWorkdir("pdbsMap", buildPdbsMapTpl, {}).
65
+ printErrStreamToStdout().
66
+ saveStdoutStream().
67
+ run()
68
+
69
+ return {
70
+ confidence: predictRun.getFile("confidence.tsv"),
71
+ pdbsMap: predictRun.getProcessorResult("pdbsMap")
72
+ }
73
+ })
package/src/wf.test.ts ADDED
@@ -0,0 +1,16 @@
1
+ import { tplTest } from "@platforma-sdk/test";
2
+
3
+ tplTest(
4
+ 'should return a concatenated string',
5
+ async ({ helper, expect }) => {
6
+ const results = await helper.renderWorkflow("main", false, {
7
+ name: 'World'
8
+ });
9
+
10
+ const pythonMessage = results.output("pythonMessage", (a) => a?.getDataAsString());
11
+ expect(await pythonMessage.awaitStableValue()).eq('Hello from Python, World!\n');
12
+
13
+ const tengoMessage = results.output("tengoMessage", (a) => a?.getDataAsJson<string>());
14
+ expect(await tengoMessage.awaitStableValue()).eq('Hello from Tengo, World!');
15
+ }
16
+ );
package/tsconfig.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "node",
6
+ "esModuleInterop": true,
7
+ "strict": true,
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "sourceMap": true,
11
+ "declaration": true
12
+ },
13
+ "types": [],
14
+ "include": ["src/**/*"],
15
+ "exclude": ["node_modules", "dist"]
16
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ watch: false,
6
+ maxConcurrency: 3,
7
+ testTimeout: 5000
8
+ }
9
+ });