@platforma-open/milaboratories.3d-structure-prediction.workflow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +17 -0
- package/dist/index.cjs +5 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +6 -0
- package/dist/tengo/tpl/build-pdbs-map.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/predict-batch.plj.gz +0 -0
- package/format.el +43 -0
- package/index.d.ts +4 -0
- package/index.js +3 -0
- package/package.json +22 -0
- package/src/build-pdbs-map.tpl.tengo +43 -0
- package/src/main.tpl.tengo +414 -0
- package/src/predict-batch.tpl.tengo +73 -0
- package/src/wf.test.ts +16 -0
- package/tsconfig.json +16 -0
- package/vitest.config.mts +9 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
WARN Issue while reading "/home/runner/work/3d-structure-prediction/3d-structure-prediction/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
|
+
|
|
3
|
+
> @platforma-open/milaboratories.3d-structure-prediction.workflow@1.0.0 build /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow
|
|
4
|
+
> shx rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
|
+
|
|
6
|
+
info: Skipping unknown file type: wf.test.ts
|
|
7
|
+
Processing "src/build-pdbs-map.tpl.tengo"...
|
|
8
|
+
Processing "src/main.tpl.tengo"...
|
|
9
|
+
Processing "src/predict-batch.tpl.tengo"...
|
|
10
|
+
No syntax errors found.
|
|
11
|
+
info: Skipping unknown file type: wf.test.ts
|
|
12
|
+
info: Compiling 'dist'...
|
|
13
|
+
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/build-pdbs-map.plj.gz
|
|
14
|
+
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/predict-batch.plj.gz
|
|
15
|
+
info: - writing /home/runner/work/3d-structure-prediction/3d-structure-prediction/workflow/dist/tengo/tpl/main.plj.gz
|
|
16
|
+
info: Template Pack build done.
|
|
17
|
+
info: Template Pack build done.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
module.exports = { Templates: {
|
|
2
|
+
'build-pdbs-map': { type: 'from-file', path: require.resolve('./tengo/tpl/build-pdbs-map.plj.gz') },
|
|
3
|
+
'predict-batch': { type: 'from-file', path: require.resolve('./tengo/tpl/predict-batch.plj.gz') },
|
|
4
|
+
'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
|
|
5
|
+
}};
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { resolve } from 'node:path';
|
|
2
|
+
export const Templates = {
|
|
3
|
+
'build-pdbs-map': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/build-pdbs-map.plj.gz') },
|
|
4
|
+
'predict-batch': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/predict-batch.plj.gz') },
|
|
5
|
+
'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
|
|
6
|
+
};
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/format.el
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
;; This program formats all files inside src directory. Usage: emacs --script ./format.el
|
|
2
|
+
|
|
3
|
+
(defun install-go-mode ()
|
|
4
|
+
"Installs go-mode"
|
|
5
|
+
(require 'package)
|
|
6
|
+
(add-to-list 'package-archives
|
|
7
|
+
'("melpa-stable" . "https://stable.melpa.org/packages/"))
|
|
8
|
+
(package-initialize)
|
|
9
|
+
(unless package-archive-contents
|
|
10
|
+
(package-refresh-contents))
|
|
11
|
+
|
|
12
|
+
(package-install 'go-mode t)
|
|
13
|
+
(require 'go-mode))
|
|
14
|
+
|
|
15
|
+
;; spaces -> tabs only at the beginning of lines
|
|
16
|
+
(setq tabify-regexp "^\t* [ \t]+")
|
|
17
|
+
|
|
18
|
+
(defun format-file (file)
|
|
19
|
+
"Formats a file according to slightly changed Go rules"
|
|
20
|
+
(message "Format %s" file)
|
|
21
|
+
(save-excursion
|
|
22
|
+
(find-file file)
|
|
23
|
+
(delete-trailing-whitespace) ;; deletes whitespaces
|
|
24
|
+
(go-mode) ;; sets golang rules for indentation
|
|
25
|
+
(tabify (point-min) (point-max)) ;; spaces -> tabs in the whole file
|
|
26
|
+
(indent-region (point-min) (point-max)) ;; indentation in the whole file
|
|
27
|
+
(save-buffer))) ;; save file
|
|
28
|
+
|
|
29
|
+
(install-go-mode)
|
|
30
|
+
|
|
31
|
+
;; change syntax of a standard go-mode a bit
|
|
32
|
+
(advice-add
|
|
33
|
+
'go--in-composite-literal-p
|
|
34
|
+
:filter-return
|
|
35
|
+
(lambda (&rest r) t))
|
|
36
|
+
|
|
37
|
+
;; find all files in src
|
|
38
|
+
(setq files (directory-files-recursively "src" "\\.tengo\\'"))
|
|
39
|
+
|
|
40
|
+
;; call format on every file.
|
|
41
|
+
(dolist (file files)
|
|
42
|
+
(format-file file))
|
|
43
|
+
|
package/index.d.ts
ADDED
package/index.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@platforma-open/milaboratories.3d-structure-prediction.workflow",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Block Workflow",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"dependencies": {
|
|
7
|
+
"@platforma-sdk/workflow-tengo": "5.21.0",
|
|
8
|
+
"@platforma-open/milaboratories.3d-structure-prediction.software": "1.0.0"
|
|
9
|
+
},
|
|
10
|
+
"devDependencies": {
|
|
11
|
+
"@platforma-sdk/tengo-builder": "2.5.21",
|
|
12
|
+
"@platforma-sdk/test": "1.73.0"
|
|
13
|
+
},
|
|
14
|
+
"peerDependencies": {
|
|
15
|
+
"vitest": "*"
|
|
16
|
+
},
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
|
|
19
|
+
"test": "vitest",
|
|
20
|
+
"format": "/usr/bin/env emacs --script ./format.el"
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// Workdir processor template: builds a File-typed ResourceMap PColumn
|
|
2
|
+
// keyed by clonotypeKey from the post-prediction workdir.
|
|
3
|
+
//
|
|
4
|
+
// Reads `manifest.tsv` (written by run_immunebuilder.py — header
|
|
5
|
+
// `clonotypeKey\tpdb_filename`, one row per successful prediction),
|
|
6
|
+
// then saves each referenced PDB file from `pdbs/` and adds it to the
|
|
7
|
+
// ResourceMap under its clonotypeKey. Only successful rows appear; failed
|
|
8
|
+
// rows have no manifest entry and therefore no ResourceMap entry.
|
|
9
|
+
|
|
10
|
+
self := import("@platforma-sdk/workflow-tengo:workdir.proc")
|
|
11
|
+
|
|
12
|
+
text := import("text")
|
|
13
|
+
path := import("@platforma-sdk/workflow-tengo:path")
|
|
14
|
+
pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
|
|
15
|
+
|
|
16
|
+
self.readFiles(func(inputs) {
|
|
17
|
+
return { manifest: "manifest.tsv" }
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
self.body(func(inputs) {
|
|
21
|
+
manifestRaw := string(inputs.manifest.getData())
|
|
22
|
+
|
|
23
|
+
data := pcolumn.resourceMapBuilder(/* keyLength */ 1)
|
|
24
|
+
|
|
25
|
+
lines := text.split(text.trim_space(manifestRaw), "\n")
|
|
26
|
+
// Skip header; iterate data rows. trim_space on each line + each field
|
|
27
|
+
// strips stray CR introduced when csv writes manifest with CRLF endings.
|
|
28
|
+
if len(lines) > 1 {
|
|
29
|
+
for i := 1; i < len(lines); i++ {
|
|
30
|
+
line := text.trim_space(lines[i])
|
|
31
|
+
if line == "" { continue }
|
|
32
|
+
fields := text.split(line, "\t")
|
|
33
|
+
if len(fields) < 2 { continue }
|
|
34
|
+
clonotypeKey := text.trim_space(fields[0])
|
|
35
|
+
pdbFilename := text.trim_space(fields[1])
|
|
36
|
+
if pdbFilename == "" { continue }
|
|
37
|
+
pdbFile := self.saveFile(path.join("pdbs", pdbFilename))
|
|
38
|
+
data.add([clonotypeKey], pdbFile)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return data.build()
|
|
43
|
+
})
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
wf := import("@platforma-sdk/workflow-tengo:workflow")
|
|
2
|
+
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
3
|
+
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
4
|
+
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
5
|
+
pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
|
|
6
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
7
|
+
smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
8
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
9
|
+
units := import("@platforma-sdk/workflow-tengo:units")
|
|
10
|
+
|
|
11
|
+
predictBatchTpl := assets.importTemplate(":predict-batch")
|
|
12
|
+
immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
|
|
13
|
+
|
|
14
|
+
CONFIDENCE_METRIC_ANNOTATION := { "pl7.app/structure/confidenceMetric": "predictedErrorAngstroms" }
|
|
15
|
+
|
|
16
|
+
confidenceColumn := func(columnId, pName, label, extraAnnotations) {
|
|
17
|
+
annotations := {
|
|
18
|
+
"pl7.app/label": label,
|
|
19
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms"
|
|
20
|
+
}
|
|
21
|
+
for k, v in extraAnnotations { annotations[k] = v }
|
|
22
|
+
return {
|
|
23
|
+
column: columnId,
|
|
24
|
+
id: columnId,
|
|
25
|
+
spec: {
|
|
26
|
+
valueType: "Double",
|
|
27
|
+
name: pName,
|
|
28
|
+
annotations: annotations
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
wf.prepare(func(args) {
|
|
34
|
+
// args.dataset is a PrimaryRef { __isPrimaryRef: "v1", column: PlRef, filter?: PlRef }.
|
|
35
|
+
// `addAnchor` resolves the dataset column (also gives us its spec for the
|
|
36
|
+
// row-axis lookup); `addRef` is the no-anchor form for the optional filter
|
|
37
|
+
// PlRef so the bundle resolves both spec and data — needed when we wrap
|
|
38
|
+
// heavy chain as a ResolvedPrimaryRef for processColumn batch.
|
|
39
|
+
bb := wf.createPBundleBuilder()
|
|
40
|
+
bb.ignoreMissingDomains()
|
|
41
|
+
bb.addAnchor("main", args.dataset.column)
|
|
42
|
+
bb.addSingle(args.heavyChainRef)
|
|
43
|
+
if args.mode == "ABodyBuilder2" && !is_undefined(args.lightChainRef) {
|
|
44
|
+
bb.addSingle(args.lightChainRef)
|
|
45
|
+
}
|
|
46
|
+
if !is_undefined(args.dataset.filter) {
|
|
47
|
+
bb.addRef(args.dataset.filter)
|
|
48
|
+
}
|
|
49
|
+
// Clonotype-axis label column — passed as a secondary `columns` entry to
|
|
50
|
+
// processColumn so the python wrapper sees `clonotypeLabel` per row and
|
|
51
|
+
// echoes it into confidence.tsv (used as the row-axis label substitution
|
|
52
|
+
// in the V3 structures table).
|
|
53
|
+
bb.addMulti(
|
|
54
|
+
{
|
|
55
|
+
axes: [{ anchor: "main", idx: 1 }],
|
|
56
|
+
name: "pl7.app/label"
|
|
57
|
+
},
|
|
58
|
+
"clonotypeKeyLabels")
|
|
59
|
+
return { columns: bb.build() }
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
wf.body(func(args) {
|
|
63
|
+
columns := args.columns
|
|
64
|
+
datasetCol := args.dataset.column
|
|
65
|
+
datasetSpec := columns.getSpec(datasetCol)
|
|
66
|
+
clonotypeAxisSpec := datasetSpec.axesSpec[1]
|
|
67
|
+
clonotypeAxisName := clonotypeAxisSpec.name
|
|
68
|
+
|
|
69
|
+
heavy := columns.getColumn(args.heavyChainRef)
|
|
70
|
+
isPaired := args.mode == "ABodyBuilder2"
|
|
71
|
+
light := undefined
|
|
72
|
+
if isPaired { light = columns.getColumn(args.lightChainRef) }
|
|
73
|
+
labelColumns := columns.getColumns("clonotypeKeyLabels")
|
|
74
|
+
labelEntry := len(labelColumns) > 0 ? labelColumns[0] : undefined
|
|
75
|
+
|
|
76
|
+
seed := is_undefined(args.torchSeed) ? 42 : args.torchSeed
|
|
77
|
+
cpu := is_undefined(args.cpu) ? 4 : args.cpu
|
|
78
|
+
memGiB := is_undefined(args.mem) ? 16 : args.mem
|
|
79
|
+
confidenceMetric := is_undefined(args.confidenceMetric) ? "cdrh3Mean" : args.confidenceMetric
|
|
80
|
+
threshold := is_undefined(args.confidenceThresholdAngstroms) ? 2.5 : args.confidenceThresholdAngstroms
|
|
81
|
+
batchSize := is_undefined(args.batchSize) ? 50 : args.batchSize
|
|
82
|
+
|
|
83
|
+
blockId := wf.blockId().getDataAsJson()
|
|
84
|
+
|
|
85
|
+
// Heavy chain is the keyspace primary. When the dataset PrimaryRef carries
|
|
86
|
+
// a filter, wrap heavy as a ResolvedPrimaryRef so the batch orchestrator
|
|
87
|
+
// inner-joins the filter on each batch's clonotype keys.
|
|
88
|
+
primarySrc := { spec: heavy.spec, data: heavy.data }
|
|
89
|
+
if !is_undefined(args.dataset.filter) {
|
|
90
|
+
filterCol := columns.getColumn(args.dataset.filter)
|
|
91
|
+
primarySrc = {
|
|
92
|
+
__isPrimaryRef: "v1",
|
|
93
|
+
column: { spec: heavy.spec, data: heavy.data },
|
|
94
|
+
filter: { spec: filterCol.spec, data: filterCol.data }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Secondary entries — outer-joined onto each batch's keyspace.
|
|
99
|
+
secondaryEntries := []
|
|
100
|
+
if isPaired {
|
|
101
|
+
secondaryEntries = append(secondaryEntries, {
|
|
102
|
+
src: { spec: light.spec, data: light.data },
|
|
103
|
+
header: "lightChain"
|
|
104
|
+
})
|
|
105
|
+
}
|
|
106
|
+
if !is_undefined(labelEntry) {
|
|
107
|
+
secondaryEntries = append(secondaryEntries, {
|
|
108
|
+
src: { spec: labelEntry.spec, data: labelEntry.data },
|
|
109
|
+
header: "clonotypeLabel"
|
|
110
|
+
})
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Per-batch confidence.tsv → typed PColumns. The orchestrator adds the
|
|
114
|
+
// batch-key axis (clonotype) automatically; `batchKeyColumns` names it.
|
|
115
|
+
// `clonotypeLabel` is echoed by the python wrapper so the V3 structures
|
|
116
|
+
// table substitutes it into the row-axis cells (single-axis pl7.app/label
|
|
117
|
+
// PColumn → recognised by PlAgDataTable's `isLabelColumn`).
|
|
118
|
+
confidenceXsvColumns := [
|
|
119
|
+
{
|
|
120
|
+
column: "clonotypeLabel",
|
|
121
|
+
id: "clonotypeLabel",
|
|
122
|
+
spec: {
|
|
123
|
+
valueType: "String",
|
|
124
|
+
name: "pl7.app/label",
|
|
125
|
+
annotations: {
|
|
126
|
+
"pl7.app/label": "Clone",
|
|
127
|
+
"pl7.app/table/orderPriority": "100000"
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
confidenceColumn("meanError", "pl7.app/structure/confidence/mean",
|
|
132
|
+
"Mean error (Å)", { "pl7.app/isScore": "true", "pl7.app/table/orderPriority": "90000" }),
|
|
133
|
+
confidenceColumn("cdrh1Error", "pl7.app/structure/confidence/cdrh1", "CDRH1 error (Å)", {}),
|
|
134
|
+
confidenceColumn("cdrh2Error", "pl7.app/structure/confidence/cdrh2", "CDRH2 error (Å)", {}),
|
|
135
|
+
confidenceColumn("cdrh3Error", "pl7.app/structure/confidence/cdrh3", "CDRH3 error (Å)",
|
|
136
|
+
{
|
|
137
|
+
"pl7.app/isScore": "true",
|
|
138
|
+
"pl7.app/table/orderPriority": "89000",
|
|
139
|
+
"pl7.app/description": "ABodyBuilder2 accuracy degrades for CDRH3 ≥ ~20 aa (Abanades et al., 2023, Fig. 4); long-CDRH3 predictions should be treated as lower-confidence regardless of the error value."
|
|
140
|
+
})
|
|
141
|
+
]
|
|
142
|
+
if isPaired {
|
|
143
|
+
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
144
|
+
confidenceColumn("cdrl1Error", "pl7.app/structure/confidence/cdrl1", "CDRL1 error (Å)", {}))
|
|
145
|
+
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
146
|
+
confidenceColumn("cdrl2Error", "pl7.app/structure/confidence/cdrl2", "CDRL2 error (Å)", {}))
|
|
147
|
+
confidenceXsvColumns = append(confidenceXsvColumns,
|
|
148
|
+
confidenceColumn("cdrl3Error", "pl7.app/structure/confidence/cdrl3", "CDRL3 error (Å)", {}))
|
|
149
|
+
}
|
|
150
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
151
|
+
column: "perResidueError",
|
|
152
|
+
id: "perResidueError",
|
|
153
|
+
spec: {
|
|
154
|
+
valueType: "String",
|
|
155
|
+
name: "pl7.app/structure/confidence/perResidue",
|
|
156
|
+
annotations: {
|
|
157
|
+
"pl7.app/label": "Per-residue error (JSON)",
|
|
158
|
+
"pl7.app/structure/confidenceMetric": "predictedErrorAngstroms",
|
|
159
|
+
"pl7.app/structure/perResidueSchema": "json_pos_chain_err_v1",
|
|
160
|
+
"pl7.app/table/visibility": "hidden"
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
})
|
|
164
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
165
|
+
column: "cdrh3Length",
|
|
166
|
+
id: "cdrh3Length",
|
|
167
|
+
spec: {
|
|
168
|
+
valueType: "Long",
|
|
169
|
+
name: "pl7.app/structure/cdrh3Length",
|
|
170
|
+
annotations: { "pl7.app/label": "CDRH3 length (aa)" }
|
|
171
|
+
}
|
|
172
|
+
})
|
|
173
|
+
// failureReason / warning come in two flavours:
|
|
174
|
+
// - the *Text columns carry human-readable strings (translated python-side)
|
|
175
|
+
// and are surfaced in the table.
|
|
176
|
+
// - the code columns retain the raw enum value, hidden by default; they
|
|
177
|
+
// stay in the schema so downstream blocks / future failure-stats logic
|
|
178
|
+
// can group on a stable identifier.
|
|
179
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
180
|
+
column: "failureReasonText",
|
|
181
|
+
id: "failureReasonText",
|
|
182
|
+
spec: {
|
|
183
|
+
valueType: "String",
|
|
184
|
+
name: "pl7.app/structure/failureReason/text",
|
|
185
|
+
annotations: {
|
|
186
|
+
"pl7.app/label": "Failure reason",
|
|
187
|
+
"pl7.app/table/visibility": "optional"
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
})
|
|
191
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
192
|
+
column: "failureReason",
|
|
193
|
+
id: "failureReason",
|
|
194
|
+
spec: {
|
|
195
|
+
valueType: "String",
|
|
196
|
+
name: "pl7.app/structure/failureReason",
|
|
197
|
+
annotations: {
|
|
198
|
+
"pl7.app/label": "Failure reason (code)",
|
|
199
|
+
"pl7.app/table/visibility": "hidden"
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
})
|
|
203
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
204
|
+
column: "warningText",
|
|
205
|
+
id: "warningText",
|
|
206
|
+
spec: {
|
|
207
|
+
valueType: "String",
|
|
208
|
+
name: "pl7.app/structure/warning/text",
|
|
209
|
+
annotations: {
|
|
210
|
+
"pl7.app/label": "Warnings",
|
|
211
|
+
"pl7.app/table/visibility": "optional"
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
})
|
|
215
|
+
confidenceXsvColumns = append(confidenceXsvColumns, {
|
|
216
|
+
column: "warning",
|
|
217
|
+
id: "warning",
|
|
218
|
+
spec: {
|
|
219
|
+
valueType: "String",
|
|
220
|
+
name: "pl7.app/structure/warning",
|
|
221
|
+
annotations: {
|
|
222
|
+
"pl7.app/label": "Warnings (codes)",
|
|
223
|
+
"pl7.app/table/visibility": "hidden"
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
// Pre-download model weights once, before the batch fan-out. ImmuneBuilder
|
|
229
|
+
// fetches weights on first predictor construction into a shared on-disk
|
|
230
|
+
// cache; without this step, N parallel batch containers race to write the
|
|
231
|
+
// same files and produce partial/corrupt caches that fail with no retry.
|
|
232
|
+
// The sentinel file is forwarded as a body input — each batch awaits it
|
|
233
|
+
// (via addFile in predict-batch.tpl.tengo) before its own exec runs, so
|
|
234
|
+
// every batch starts against an already-warm cache.
|
|
235
|
+
warmupRun := exec.builder().
|
|
236
|
+
software(immuneBuilderSw).
|
|
237
|
+
cpu(1).mem(2 * units.GiB).
|
|
238
|
+
env("BLOCK_VERSION", blockId).
|
|
239
|
+
arg("--warmup").
|
|
240
|
+
arg("--mode").arg(args.mode).
|
|
241
|
+
arg("--sentinel").arg("warmup.done").
|
|
242
|
+
saveFile("warmup.done").
|
|
243
|
+
printErrStreamToStdout().
|
|
244
|
+
saveStdoutStream().
|
|
245
|
+
run()
|
|
246
|
+
warmupSentinel := warmupRun.getFile("warmup.done")
|
|
247
|
+
|
|
248
|
+
// Per-batch scalar settings forwarded to predict-batch as `inputs.<key>`.
|
|
249
|
+
bodyExtra := {
|
|
250
|
+
mode: smart.createJsonResource(args.mode),
|
|
251
|
+
seed: smart.createJsonResource(seed),
|
|
252
|
+
cpu: smart.createJsonResource(cpu),
|
|
253
|
+
memGiB: smart.createJsonResource(memGiB),
|
|
254
|
+
confidenceMetric: smart.createJsonResource(confidenceMetric),
|
|
255
|
+
threshold: smart.createJsonResource(threshold),
|
|
256
|
+
blockId: smart.createJsonResource(blockId),
|
|
257
|
+
warmupSentinel: warmupSentinel
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
processResult := pframes.processColumn(
|
|
261
|
+
{
|
|
262
|
+
primary: { src: primarySrc, header: "heavyChain" },
|
|
263
|
+
columns: secondaryEntries,
|
|
264
|
+
primaryJoin: "inner"
|
|
265
|
+
},
|
|
266
|
+
predictBatchTpl,
|
|
267
|
+
[
|
|
268
|
+
{
|
|
269
|
+
type: "Xsv",
|
|
270
|
+
name: "confidence",
|
|
271
|
+
xsvType: "tsv",
|
|
272
|
+
settings: {
|
|
273
|
+
batchKeyColumns: [clonotypeAxisName],
|
|
274
|
+
columns: confidenceXsvColumns,
|
|
275
|
+
storageFormat: "Parquet"
|
|
276
|
+
}
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
type: "ResourceMap",
|
|
280
|
+
name: "pdbsMap",
|
|
281
|
+
spec: {
|
|
282
|
+
kind: "PColumn",
|
|
283
|
+
name: "pl7.app/structure/pdb",
|
|
284
|
+
domain: { "pl7.app/structure/numbering": "imgt" },
|
|
285
|
+
valueType: "File",
|
|
286
|
+
axesSpec: [],
|
|
287
|
+
annotations: {
|
|
288
|
+
"pl7.app/label": "Predicted PDB structure",
|
|
289
|
+
"pl7.app/structure/numbering": "imgt"
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
],
|
|
294
|
+
{
|
|
295
|
+
batch: {
|
|
296
|
+
size: batchSize,
|
|
297
|
+
keyColumns: [clonotypeAxisName],
|
|
298
|
+
format: "tsv",
|
|
299
|
+
// passContent=false: orchestrator writes per-batch blob files
|
|
300
|
+
// (one per slice) and hands each as a file reference to the body.
|
|
301
|
+
// passContent=true would ship the entire per-scope joined TSV as
|
|
302
|
+
// a single value resource — that's capped at 3MiB and overflows
|
|
303
|
+
// at ~22MiB on real datasets.
|
|
304
|
+
passContent: false
|
|
305
|
+
},
|
|
306
|
+
extra: bodyExtra
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
// Trace stamp for every confidence + subset column.
|
|
311
|
+
trace := pSpec.makeTrace(datasetSpec, {
|
|
312
|
+
type: "milaboratories.3d-structure-prediction",
|
|
313
|
+
id: blockId,
|
|
314
|
+
importance: 20,
|
|
315
|
+
label: "3D Structure Prediction"
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
// Confidence pframe — one PColumn per Xsv column declared above.
|
|
319
|
+
confidencePfb := pframes.pFrameBuilder()
|
|
320
|
+
for col in confidenceXsvColumns {
|
|
321
|
+
out := processResult.output("confidence", col.id)
|
|
322
|
+
confidencePfb.add(col.id, trace.inject(out.spec), out.data)
|
|
323
|
+
}
|
|
324
|
+
confidencePfHandle := confidencePfb.build()
|
|
325
|
+
|
|
326
|
+
// Subset columns (predictionSuccessful + confident) computed via pt over
|
|
327
|
+
// the assembled confidence pframe. pt addresses columns by xsv id, not by
|
|
328
|
+
// spec.name.
|
|
329
|
+
metricColumn := "cdrh3Error"
|
|
330
|
+
if confidenceMetric == "overallMean" {
|
|
331
|
+
metricColumn = "meanError"
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
subsetWf := pt.workflow().mem("2GiB").cpu(1)
|
|
335
|
+
df := subsetWf.frame(pt.p.full(confidencePfHandle))
|
|
336
|
+
|
|
337
|
+
successExpr := pt.col("failureReason").isNull().cast("Int").alias("predictionSuccessful")
|
|
338
|
+
successDf := df.select(pt.axis(clonotypeAxisName), successExpr)
|
|
339
|
+
successDf.save("subsets.tsv")
|
|
340
|
+
|
|
341
|
+
confidentExpr := pt.col("failureReason").isNull().and(pt.col(metricColumn).le(threshold)).fillNull(false).cast("Int").alias("confident")
|
|
342
|
+
confidentDf := df.select(pt.axis(clonotypeAxisName), confidentExpr)
|
|
343
|
+
confidentDf.save("confident.tsv")
|
|
344
|
+
|
|
345
|
+
subsetRun := subsetWf.run()
|
|
346
|
+
|
|
347
|
+
predictionSuccessfulPf := xsv.importFile(subsetRun.getFile("subsets.tsv"), "tsv", {
|
|
348
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
349
|
+
columns: [{
|
|
350
|
+
column: "predictionSuccessful",
|
|
351
|
+
spec: {
|
|
352
|
+
valueType: "Int",
|
|
353
|
+
name: "pl7.app/structure/predictionSuccessful",
|
|
354
|
+
annotations: {
|
|
355
|
+
"pl7.app/label": "Structure predicted",
|
|
356
|
+
"pl7.app/isSubset": "true",
|
|
357
|
+
"pl7.app/table/visibility": "hidden"
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}],
|
|
361
|
+
storageFormat: "Parquet"
|
|
362
|
+
}, { splitDataAndSpec: true })
|
|
363
|
+
|
|
364
|
+
confidentPf := xsv.importFile(subsetRun.getFile("confident.tsv"), "tsv", {
|
|
365
|
+
axes: [{ column: clonotypeAxisName, spec: clonotypeAxisSpec }],
|
|
366
|
+
columns: [{
|
|
367
|
+
column: "confident",
|
|
368
|
+
spec: {
|
|
369
|
+
valueType: "Int",
|
|
370
|
+
name: "pl7.app/structure/confident",
|
|
371
|
+
annotations: {
|
|
372
|
+
"pl7.app/label": "Confident structure (" + confidenceMetric + " ≤ " + string(threshold) + " Å)",
|
|
373
|
+
"pl7.app/isSubset": "true",
|
|
374
|
+
"pl7.app/table/visibility": "hidden"
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}],
|
|
378
|
+
storageFormat: "Parquet"
|
|
379
|
+
}, { splitDataAndSpec: true })
|
|
380
|
+
|
|
381
|
+
// Final structures pframe: confidence + subset columns. The label column
|
|
382
|
+
// (clonotypeLabel) is part of confidence.
|
|
383
|
+
finalPfb := pframes.pFrameBuilder()
|
|
384
|
+
for col in confidenceXsvColumns {
|
|
385
|
+
out := processResult.output("confidence", col.id)
|
|
386
|
+
finalPfb.add(col.id, trace.inject(out.spec), out.data)
|
|
387
|
+
}
|
|
388
|
+
for k, v in predictionSuccessfulPf {
|
|
389
|
+
finalPfb.add("subset/" + k, trace.inject(v.spec), v.data)
|
|
390
|
+
}
|
|
391
|
+
for k, v in confidentPf {
|
|
392
|
+
finalPfb.add("subset/" + k, trace.inject(v.spec), v.data)
|
|
393
|
+
}
|
|
394
|
+
finalPf := finalPfb.build()
|
|
395
|
+
|
|
396
|
+
// PDB ResourceMap pframe — produced directly by the batch orchestrator
|
|
397
|
+
// (merged across batches via merge-resource-maps). Failed clonotypes have
|
|
398
|
+
// no entry.
|
|
399
|
+
pdbsOut := processResult.output("pdbsMap")
|
|
400
|
+
pdbsPfb := pframes.pFrameBuilder()
|
|
401
|
+
pdbsPfb.add("pdb", trace.inject(pdbsOut.spec), pdbsOut.data)
|
|
402
|
+
pdbsPf := pdbsPfb.build()
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
outputs: {
|
|
406
|
+
structuresTable: pframes.exportFrame(finalPf),
|
|
407
|
+
pdbsMap: pframes.exportFrame(pdbsPf)
|
|
408
|
+
},
|
|
409
|
+
exports: {
|
|
410
|
+
structures: finalPf,
|
|
411
|
+
pdbs: pdbsPf
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
})
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// predict-batch — body template for `pframes.processColumn` batch mode.
|
|
2
|
+
// Receives one batch's TSV as a Blob file reference at `__value__`
|
|
3
|
+
// (passContent=false), runs the ImmuneBuilder Python wrapper on it, and
|
|
4
|
+
// returns:
|
|
5
|
+
// - "confidence" — confidence.tsv file ref (Xsv-imported by the orchestrator)
|
|
6
|
+
// - "pdbsMap" — ResourceMap of per-clonotype PDB files (built via the
|
|
7
|
+
// same workdir processor used by the legacy single-batch
|
|
8
|
+
// flow)
|
|
9
|
+
//
|
|
10
|
+
// Aggregate `summary.json` is intentionally NOT emitted — in batch mode each
|
|
11
|
+
// batch has its own summary; aggregation across batches is a separate concern
|
|
12
|
+
// the model can compute from the confidence PFrame post-hoc.
|
|
13
|
+
|
|
14
|
+
self := import("@platforma-sdk/workflow-tengo:tpl")
|
|
15
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
16
|
+
smart := import("@platforma-sdk/workflow-tengo:smart")
|
|
17
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
18
|
+
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
19
|
+
pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
20
|
+
units := import("@platforma-sdk/workflow-tengo:units")
|
|
21
|
+
|
|
22
|
+
immuneBuilderSw := assets.importSoftware("@platforma-open/milaboratories.3d-structure-prediction.software:immunebuilder-predict")
|
|
23
|
+
buildPdbsMapTpl := assets.importTemplate(":build-pdbs-map")
|
|
24
|
+
|
|
25
|
+
self.defineOutputs(
|
|
26
|
+
"confidence",
|
|
27
|
+
"pdbsMap"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
self.body(func(inputs) {
|
|
31
|
+
batchFile := inputs[pConstants.VALUE_FIELD_NAME]
|
|
32
|
+
if !smart.isReference(batchFile) {
|
|
33
|
+
ll.panic("predict-batch: expected __value__ to be a file reference (passContent=false), got: %v", batchFile)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
mode := inputs.mode
|
|
37
|
+
seed := inputs.seed
|
|
38
|
+
cpu := inputs.cpu
|
|
39
|
+
memGiB := inputs.memGiB
|
|
40
|
+
confidenceMetric := inputs.confidenceMetric
|
|
41
|
+
threshold := inputs.threshold
|
|
42
|
+
blockId := inputs.blockId
|
|
43
|
+
// Sentinel produced by the workflow's pre-warmup step. We don't consume
|
|
44
|
+
// the file content — adding it as a workdir input is what wires this batch
|
|
45
|
+
// to await the warmup resource, so every batch only starts after the
|
|
46
|
+
// shared model cache is populated.
|
|
47
|
+
warmupSentinel := inputs.warmupSentinel
|
|
48
|
+
|
|
49
|
+
predictRun := exec.builder().
|
|
50
|
+
software(immuneBuilderSw).
|
|
51
|
+
cpu(cpu).mem(memGiB * units.GiB).
|
|
52
|
+
env("BLOCK_VERSION", blockId).
|
|
53
|
+
addFile("warmup.done", warmupSentinel).
|
|
54
|
+
addFile("batch.tsv", batchFile).
|
|
55
|
+
arg("--mode").arg(mode).
|
|
56
|
+
arg("--input").arg("batch.tsv").
|
|
57
|
+
arg("--output-dir").arg("pdbs").
|
|
58
|
+
arg("--manifest").arg("manifest.tsv").
|
|
59
|
+
arg("--confidence").arg("confidence.tsv").
|
|
60
|
+
arg("--metric").arg(confidenceMetric).
|
|
61
|
+
arg("--threshold").arg(string(threshold)).
|
|
62
|
+
arg("--seed").arg(string(seed)).
|
|
63
|
+
saveFile("confidence.tsv").
|
|
64
|
+
processWorkdir("pdbsMap", buildPdbsMapTpl, {}).
|
|
65
|
+
printErrStreamToStdout().
|
|
66
|
+
saveStdoutStream().
|
|
67
|
+
run()
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
confidence: predictRun.getFile("confidence.tsv"),
|
|
71
|
+
pdbsMap: predictRun.getProcessorResult("pdbsMap")
|
|
72
|
+
}
|
|
73
|
+
})
|
package/src/wf.test.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { tplTest } from "@platforma-sdk/test";
|
|
2
|
+
|
|
3
|
+
tplTest(
|
|
4
|
+
'should return a concatenated string',
|
|
5
|
+
async ({ helper, expect }) => {
|
|
6
|
+
const results = await helper.renderWorkflow("main", false, {
|
|
7
|
+
name: 'World'
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
const pythonMessage = results.output("pythonMessage", (a) => a?.getDataAsString());
|
|
11
|
+
expect(await pythonMessage.awaitStableValue()).eq('Hello from Python, World!\n');
|
|
12
|
+
|
|
13
|
+
const tengoMessage = results.output("tengoMessage", (a) => a?.getDataAsJson<string>());
|
|
14
|
+
expect(await tengoMessage.awaitStableValue()).eq('Hello from Tengo, World!');
|
|
15
|
+
}
|
|
16
|
+
);
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "es2022",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"moduleResolution": "node",
|
|
6
|
+
"esModuleInterop": true,
|
|
7
|
+
"strict": true,
|
|
8
|
+
"outDir": "./dist",
|
|
9
|
+
"rootDir": "./src",
|
|
10
|
+
"sourceMap": true,
|
|
11
|
+
"declaration": true
|
|
12
|
+
},
|
|
13
|
+
"types": [],
|
|
14
|
+
"include": ["src/**/*"],
|
|
15
|
+
"exclude": ["node_modules", "dist"]
|
|
16
|
+
}
|