@platforma-open/milaboratories.sequence-properties.workflow 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+  WARN  Issue while reading "/home/runner/work/sequence-properties/sequence-properties/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
+
3
+ > @platforma-open/milaboratories.sequence-properties.workflow@1.1.1 build /home/runner/work/sequence-properties/sequence-properties/workflow
4
+ > shx rm -rf dist && pl-tengo check && pl-tengo build
5
+
6
+ info: Skipping unknown file type: wf.test.ts
7
+ Processing "src/main.tpl.tengo"...
8
+ Processing "src/messages.lib.tengo"...
9
+ Processing "src/process.tpl.tengo"...
10
+ No syntax errors found.
11
+ info: Skipping unknown file type: wf.test.ts
12
+ info: Compiling 'dist'...
13
+ info: - writing /home/runner/work/sequence-properties/sequence-properties/workflow/dist/tengo/lib/messages.lib.tengo
14
+ info: - writing /home/runner/work/sequence-properties/sequence-properties/workflow/dist/tengo/tpl/process.plj.gz
15
+ info: - writing /home/runner/work/sequence-properties/sequence-properties/workflow/dist/tengo/tpl/main.plj.gz
16
+ info: Template Pack build done.
17
+ info: Template Pack build done.
package/CHANGELOG.md ADDED
@@ -0,0 +1,40 @@
1
+ # @platforma-open/MiLaboratories.sequence-properties.workflow
2
+
3
+ ## 1.1.1
4
+
5
+ ### Patch Changes
6
+
7
+ - bb07f98: Rename all package scopes from `MiLaboratories.sequence-properties` to `milaboratories.sequence-properties`. npm registry rejects new package names with uppercase letters, which blocked the first publish. Lowercase form aligns with the existing `@platforma-open/milaboratories.*` convention used by sibling blocks. Also corrects the GitHub URL in the block manifest to point at the actual repo (`platforma-open/sequence-properties`).
8
+
9
+ ## 1.1.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 1059d80: Initial release of the Sequence Properties block.
14
+
15
+ Computes physico-chemical properties (charge, pI, GRAVY, MW, extinction
16
+ coefficients, instability and aliphatic indices, aromaticity, AA composition)
17
+ for peptide and antibody/TCR sequence inputs. The block auto-detects modality
18
+ from the input axes and degrades gracefully on partial coverage: CDR3
19
+ properties when CDR3 is present, full-chain VH/VL when all seven IMGT regions
20
+ are exported, and Fv-level properties when both chains reconstruct. An R11c
21
+ heuristic flags likely VHH/single-domain inputs.
22
+
23
+ Property math uses BioPython ProtParam + IsoelectricPoint with IPC 2.0 pKa
24
+ overrides — peptide set for peptide and CDR3 inputs, protein set for full
25
+ VH/VL. Charge and pI round to 3 decimals at the output boundary; combined
26
+ with sorted Tengo iteration, canonical-JSON resources, and sorted TSV writes,
27
+ output bytes hash identically across runs so the block joins the dedup path.
28
+
29
+ M3 validation is locked down by `tests/unit/test_m3_validation.py` (38 cases:
30
+ ≥5 VH pI, ≥2 VL pI, Fv on ≥2 paired chains, ≥10 CDR-H3 charge, ≥3 CDR-L3
31
+ charge, ≥3 VH aliphatic) against pinned IPC 2.0 webserver values and an
32
+ independent Henderson-Hasselbalch reference.
33
+
34
+ Block title is the static "Sequence Properties"; the selected input dataset
35
+ appears as the subtitle.
36
+
37
+ ### Patch Changes
38
+
39
+ - Updated dependencies [1059d80]
40
+ - @platforma-open/MiLaboratories.sequence-properties.software@1.1.0
package/dist/index.cjs ADDED
@@ -0,0 +1,4 @@
1
+ module.exports = { Templates: {
2
+ 'process': { type: 'from-file', path: require.resolve('./tengo/tpl/process.plj.gz') },
3
+ 'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
4
+ }};
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "process" | "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ import { resolve } from 'node:path';
2
+ export const Templates = {
3
+ 'process': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/process.plj.gz') },
4
+ 'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
5
+ };
@@ -0,0 +1,58 @@
1
+
2
+
3
+
4
+
5
+
6
+
7
+
8
+ ll := import("@platforma-sdk/workflow-tengo:ll")
9
+
10
+ partialChainMissingFullChain := func(present, chainLabel) {
11
+ return "Partial-region input: " + string(present) + " of 7 required regions found for " +
12
+ chainLabel + " chain — full-chain properties not computed. " +
13
+ "All seven regions (FR1, CDR1, FR2, CDR2, FR3, CDR3, FR4) are required."
14
+ }
15
+
16
+ partialChainNoCdr3 := func(present, chainLabel) {
17
+ return "Partial-region input: " + string(present) + " of 7 required regions found for " +
18
+ chainLabel + " chain (CDR3 absent) — no per-chain properties computed. " +
19
+ "CDR3 is required for per-chain charge / hydrophobicity; " +
20
+ "all seven regions are required for full-chain properties."
21
+ }
22
+
23
+ noRecognizedColumns := func() {
24
+ return "No recognized VDJ region columns found in the input dataset."
25
+ }
26
+
27
+ cdr3OnlyInput := func() {
28
+ return "CDR3-only input detected — full-chain properties not computed. " +
29
+ "To enable them, use a MiXCR preset that exports all VDJ regions."
30
+ }
31
+
32
+ gammaDeltaTcr := func() {
33
+ return "γδ TCR input detected — displaying with γδ-specific labels; " +
34
+ "Fv columns are not computed for TCR inputs."
35
+ }
36
+
37
+ receptorNotDetected := func() {
38
+ return "Receptor type not detected on the input dataset; defaulting to antibody labels. " +
39
+ "Use a MiXCR preset that emits the receptor annotation if this is a TCR dataset."
40
+ }
41
+
42
+
43
+ vhh := func() {
44
+ return "Possible VHH/single-domain antibody input detected (heavy chain only; " +
45
+ "CDR-H3 length distribution consistent with VHH). IgG-calibrated CDR-H3 length " +
46
+ "thresholds (>15 aa elevated risk, >20 aa high risk) do not apply to VHH — " +
47
+ "disregard these thresholds for nanobody libraries."
48
+ }
49
+
50
+ export ll.toStrict({
51
+ partialChainMissingFullChain: partialChainMissingFullChain,
52
+ partialChainNoCdr3: partialChainNoCdr3,
53
+ noRecognizedColumns: noRecognizedColumns,
54
+ cdr3OnlyInput: cdr3OnlyInput,
55
+ gammaDeltaTcr: gammaDeltaTcr,
56
+ receptorNotDetected: receptorNotDetected,
57
+ vhh: vhh
58
+ })
Binary file
Binary file
package/format.el ADDED
@@ -0,0 +1,43 @@
1
+ ;; This program formats all files inside src directory. Usage: emacs --script ./format.el
2
+
3
+ (defun install-go-mode ()
4
+ "Installs go-mode"
5
+ (require 'package)
6
+ (add-to-list 'package-archives
7
+ '("melpa-stable" . "https://stable.melpa.org/packages/"))
8
+ (package-initialize)
9
+ (unless package-archive-contents
10
+ (package-refresh-contents))
11
+
12
+ (package-install 'go-mode t)
13
+ (require 'go-mode))
14
+
15
+ ;; spaces -> tabs only at the beginning of lines
16
+ (setq tabify-regexp "^\t* [ \t]+")
17
+
18
+ (defun format-file (file)
19
+ "Formats a file according to slightly changed Go rules"
20
+ (message "Format %s" file)
21
+ (save-excursion
22
+ (find-file file)
23
+ (delete-trailing-whitespace) ;; deletes whitespaces
24
+ (go-mode) ;; sets golang rules for indentation
25
+ (tabify (point-min) (point-max)) ;; spaces -> tabs in the whole file
26
+ (indent-region (point-min) (point-max)) ;; indentation in the whole file
27
+ (save-buffer))) ;; save file
28
+
29
+ (install-go-mode)
30
+
31
+ ;; change syntax of a standard go-mode a bit
32
+ (advice-add
33
+ 'go--in-composite-literal-p
34
+ :filter-return
35
+ (lambda (&rest r) t))
36
+
37
+ ;; find all files in src
38
+ (setq files (directory-files-recursively "src" "\\.tengo\\'"))
39
+
40
+ ;; call format on every file.
41
+ (dolist (file files)
42
+ (format-file file))
43
+
package/index.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
+ declare type TplName = "main";
3
+ declare const Templates: Record<TplName, TemplateFromFile>;
4
+ export { Templates };
package/index.js ADDED
@@ -0,0 +1,3 @@
1
+ module.exports = { Templates: {
2
+ 'main': { type: 'from-file', path: require.resolve('./dist/tengo/tpl/main.plj.gz') }
3
+ }}
package/package.json ADDED
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "@platforma-open/milaboratories.sequence-properties.workflow",
3
+ "version": "1.1.1",
4
+ "description": "Block Workflow",
5
+ "type": "module",
6
+ "dependencies": {
7
+ "@platforma-sdk/workflow-tengo": "5.16.0",
8
+ "@platforma-open/milaboratories.sequence-properties.software": "1.1.0"
9
+ },
10
+ "devDependencies": {
11
+ "@platforma-sdk/tengo-builder": "2.5.17",
12
+ "@platforma-sdk/test": "1.69.0"
13
+ },
14
+ "peerDependencies": {
15
+ "vitest": "*"
16
+ },
17
+ "scripts": {
18
+ "build": "shx rm -rf dist && pl-tengo check && pl-tengo build",
19
+ "test": "vitest",
20
+ "format": "/usr/bin/env emacs --script ./format.el"
21
+ }
22
+ }
@@ -0,0 +1,365 @@
1
+ // Sequence Properties — workflow root.
2
+ //
3
+ // Detects modality (peptide vs antibody/TCR), collects amino-acid sequence columns,
4
+ // builds a per-entity TSV, runs the Python computation step, and hands off to
5
+ // process.tpl.tengo for output PColumn construction.
6
+
7
+ wf := import("@platforma-sdk/workflow-tengo:workflow")
8
+ exec := import("@platforma-sdk/workflow-tengo:exec")
9
+ assets := import("@platforma-sdk/workflow-tengo:assets")
10
+ render := import("@platforma-sdk/workflow-tengo:render")
11
+ smart := import("@platforma-sdk/workflow-tengo:smart")
12
+ ll := import("@platforma-sdk/workflow-tengo:ll")
13
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
14
+ canonical := import("@platforma-sdk/workflow-tengo:canonical")
15
+ maps := import("@platforma-sdk/workflow-tengo:maps")
16
+ constants := import("@platforma-sdk/workflow-tengo:constants")
17
+ messages := import(":messages")
18
+
19
+ processTpl := assets.importTemplate(":process")
20
+
21
+ // JSON resource with sorted-key canonical bytes. smart.createJsonResource uses
22
+ // Tengo's stdlib json.encode, which preserves Go's randomized map iteration —
23
+ // resource bytes vary across runs and the CID becomes non-deterministic,
24
+ // defeating dedup. canonical.encode sorts keys at every level so identical
25
+ // values always produce identical bytes.
26
+ canonicalJsonResource := func(value) {
27
+ return smart.createValueResource(constants.RTYPE_JSON, canonical.encode(value))
28
+ }
29
+
30
+ REQUIRED_FEATURES := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
31
+
32
+ detectMode := func(axisSpec) {
33
+ dom := axisSpec.domain
34
+ if axisSpec.name == "pl7.app/variantKey" {
35
+ if dom != undefined && dom["pl7.app/peptide/extractionRunId"] != undefined {
36
+ return "peptide"
37
+ }
38
+ if dom != undefined && dom["pl7.app/vdj/clonotypingRunId"] != undefined {
39
+ return "antibody_tcr_universal"
40
+ }
41
+ }
42
+ if axisSpec.name == "pl7.app/vdj/cloneId" {
43
+ return "antibody_tcr_legacy_bulk"
44
+ }
45
+ if axisSpec.name == "pl7.app/vdj/clonotypeKey" {
46
+ return "antibody_tcr_legacy_bulk"
47
+ }
48
+ if axisSpec.name == "pl7.app/vdj/scClonotypeKey" {
49
+ return "antibody_tcr_legacy_sc"
50
+ }
51
+ return ""
52
+ }
53
+
54
+ contains := func(arr, x) {
55
+ for v in arr {
56
+ if v == x { return true }
57
+ }
58
+ return false
59
+ }
60
+
61
+ wf.prepare(func(args) {
62
+ bb := wf.createPBundleBuilder()
63
+ bb.ignoreMissingDomains()
64
+ bb.addAnchor("main", args.inputAnchor)
65
+
66
+ // Peptide sequence column (universal naming, post-peptide-extraction).
67
+ bb.addMulti({
68
+ axes: [{ anchor: "main", idx: 1 }],
69
+ name: "pl7.app/sequence",
70
+ domain: {
71
+ "pl7.app/feature": "peptide",
72
+ "pl7.app/alphabet": "aminoacid"
73
+ }
74
+ }, "peptideSequences")
75
+
76
+ // VDJ region columns (legacy MiXCR path).
77
+ // Filter on alphabet only — NOT on pl7.app/vdj/isAssemblingFeature, per spec R4.
78
+ bb.addMulti({
79
+ axes: [{ anchor: "main", idx: 1 }],
80
+ name: "pl7.app/vdj/sequence",
81
+ domain: {
82
+ "pl7.app/alphabet": "aminoacid"
83
+ }
84
+ }, "vdjSequences")
85
+
86
+ // Universal-naming VDJ region columns (forward compatibility, post-MiXCR migration).
87
+ bb.addMulti({
88
+ axes: [{ anchor: "main", idx: 1 }],
89
+ name: "pl7.app/sequence",
90
+ domain: {
91
+ "pl7.app/alphabet": "aminoacid"
92
+ }
93
+ }, "universalSequences")
94
+
95
+ return { columns: bb.build() }
96
+ })
97
+
98
+ wf.body(func(args) {
99
+ blockId := wf.blockId().getDataAsJson()
100
+ bundle := args.columns
101
+ datasetSpec := bundle.getSpec(args.inputAnchor)
102
+
103
+ axes := datasetSpec.axesSpec
104
+ if len(axes) == 0 {
105
+ ll.panic("input anchor has no axes")
106
+ }
107
+
108
+ // R1a: first axis matching a recognized name + domain. Equivalent to
109
+ // `axes[len-1]` on every observed `[sampleId, key]` input.
110
+ keyAxisIdx := -1
111
+ keyAxisSpec := undefined
112
+ mode := ""
113
+ for i, axisSpec in axes {
114
+ m := detectMode(axisSpec)
115
+ if m != "" {
116
+ keyAxisIdx = i
117
+ keyAxisSpec = axisSpec
118
+ mode = m
119
+ break
120
+ }
121
+ }
122
+ if mode == "" {
123
+ ll.panic("no recognized sequence key axis found; connect a peptide extraction or MiXCR clonotyping dataset")
124
+ }
125
+
126
+ infoMessages := []
127
+ receptor := "IG" // R13b: default when receptor key absent
128
+ receptorSeen := false
129
+
130
+ // Spec deviation SD-003 — see docs/spec-deviations.md.
131
+ // MiXCR places the receptor key on the clonotypeKey AXIS domain (the input
132
+ // anchor's secondary axis), not on per-region sequence column domains. Read
133
+ // from the axis first; the per-column check inside the loop stays as a
134
+ // fallback for non-MiXCR producers.
135
+ if keyAxisSpec.domain != undefined {
136
+ axisR := keyAxisSpec.domain["pl7.app/vdj/receptor"]
137
+ if axisR == "IG" || axisR == "TCRAB" || axisR == "TCRGD" {
138
+ receptor = axisR
139
+ receptorSeen = true
140
+ }
141
+ }
142
+
143
+ chainsFound := {} // chain -> { feature -> 1 }
144
+
145
+ seqTb := pframes.tsvFileBuilder()
146
+ seqTb.setAxisHeader(keyAxisSpec, "entity_key")
147
+
148
+ if mode == "peptide" {
149
+ peptideCols := bundle.getColumns("peptideSequences")
150
+ if len(peptideCols) == 0 {
151
+ ll.panic("peptide mode detected but no peptide amino-acid sequence column was found in the input dataset")
152
+ }
153
+ seqTb.add(bundle.getColumn(peptideCols[0].key), { header: "sequence" })
154
+
155
+ } else {
156
+ // Legacy MiXCR sequences first; fall back to universal naming for forward
157
+ // compatibility with the post-MiXCR-migration column shape.
158
+ vdjCols := bundle.getColumns("vdjSequences")
159
+ if len(vdjCols) == 0 {
160
+ vdjCols = bundle.getColumns("universalSequences")
161
+ }
162
+ if len(vdjCols) == 0 {
163
+ ll.panic("antibody/TCR mode detected but no amino-acid VDJ sequence columns found")
164
+ }
165
+
166
+ for s in vdjCols {
167
+ d := s.spec.domain
168
+ if d == undefined { continue }
169
+
170
+ feat := d["pl7.app/vdj/feature"]
171
+ if d["pl7.app/feature"] != undefined { feat = d["pl7.app/feature"] }
172
+
173
+ // Spec deviation SD-002 — see docs/spec-deviations.md.
174
+ // MiXCR emits FR4 only as "FR4InFrame" (in-frame-filtered translation).
175
+ // Normalise to "FR4" so the REQUIRED_FEATURES check and downstream
176
+ // header naming treat it as the canonical FR4 region.
177
+ if feat == "FR4InFrame" { feat = "FR4" }
178
+
179
+ if !contains(REQUIRED_FEATURES, feat) { continue }
180
+
181
+ // Spec deviation SD-001 — see docs/spec-deviations.md.
182
+ // MiXCR single-cell emits primary + secondary alleles per chain. Spec assumes
183
+ // one allele per chain slot; secondary alleles would collide on the TSV header.
184
+ // Keep primary only.
185
+ idx := d["pl7.app/vdj/scClonotypeChain/index"]
186
+ if idx != undefined && idx != "primary" { continue }
187
+
188
+ chain := d["pl7.app/vdj/scClonotypeChain"]
189
+ if chain == undefined || chain == "" {
190
+ // Bulk MiXCR data without chain annotation — assume primary chain "A".
191
+ chain = "A"
192
+ }
193
+
194
+ // Same receptor value is expected on every input column; last seen wins.
195
+ r := d["pl7.app/vdj/receptor"]
196
+ if r == "IG" || r == "TCRAB" || r == "TCRGD" {
197
+ receptor = r
198
+ receptorSeen = true
199
+ }
200
+
201
+ header := chain + "_" + feat
202
+ seqTb.add(bundle.getColumn(s.key), { header: header })
203
+
204
+ if chainsFound[chain] == undefined { chainsFound[chain] = {} }
205
+ chainsFound[chain][feat] = 1
206
+ }
207
+ }
208
+
209
+ // Receptor-aware chain label for user-facing messages (R11b).
210
+ chainLabel := func(ch) {
211
+ if receptor == "TCRAB" {
212
+ if ch == "A" { return "alpha" }
213
+ if ch == "B" { return "beta" }
214
+ }
215
+ if receptor == "TCRGD" {
216
+ if ch == "A" { return "gamma" }
217
+ if ch == "B" { return "delta" }
218
+ }
219
+ // IG / unknown — antibody convention.
220
+ if ch == "A" { return "heavy" }
221
+ if ch == "B" { return "light" }
222
+ return ch
223
+ }
224
+
225
+ fullChain := {}
226
+ cdr3Only := {}
227
+ chainsWithCdr3 := []
228
+ if mode != "peptide" {
229
+ // Sorted iteration so partial-region messages append in a stable order.
230
+ chainKeys := maps.getKeys(chainsFound)
231
+ for _, chain in chainKeys {
232
+ feats := chainsFound[chain]
233
+ present := 0
234
+ for rf in REQUIRED_FEATURES {
235
+ if feats[rf] { present += 1 }
236
+ }
237
+ if feats["CDR3"] {
238
+ chainsWithCdr3 += [chain]
239
+ }
240
+ if present == len(REQUIRED_FEATURES) {
241
+ fullChain[chain] = true
242
+ } else if feats["CDR3"] && present == 1 {
243
+ cdr3Only[chain] = true
244
+ } else if feats["CDR3"] {
245
+ infoMessages += [messages.partialChainMissingFullChain(present, chainLabel(chain))]
246
+ } else {
247
+ // Chain has 1-6 of 7 regions but lacks CDR3 — neither CDR3-mode nor
248
+ // full-chain mode applies. Without R11b's surfaced silent fallthrough
249
+ // the user would see neither full-chain nor CDR3 columns and no
250
+ // explanation why.
251
+ infoMessages += [messages.partialChainNoCdr3(present, chainLabel(chain))]
252
+ }
253
+ }
254
+ anyChain := len(chainKeys) > 0
255
+ if !anyChain {
256
+ infoMessages += [messages.noRecognizedColumns()]
257
+ }
258
+
259
+ if len(cdr3Only) > 0 && len(fullChain) == 0 {
260
+ infoMessages += [messages.cdr3OnlyInput()]
261
+ }
262
+ if receptor == "TCRGD" {
263
+ infoMessages += [messages.gammaDeltaTcr()]
264
+ }
265
+ // R13b: warn when no recognised receptor was seen — defaults to IG.
266
+ if !receptorSeen && anyChain {
267
+ infoMessages += [messages.receptorNotDetected()]
268
+ }
269
+ }
270
+
271
+ seqTb.mem("4GiB")
272
+ seqTb.cpu(1)
273
+ seqTable := seqTb.build()
274
+
275
+ // Sorted lists feed both plan.json (Python step input) and the params
276
+ // resource (process template input) — they must hash deterministically so
277
+ // the CIDs land on the dedup path across runs of identical input.
278
+ hasFv := mode != "peptide" && receptor == "IG" && fullChain["A"] && fullChain["B"]
279
+ chainList := maps.getKeys(chainsFound)
280
+ fullChainList := maps.getKeys(fullChain)
281
+
282
+ plan := {
283
+ mode: mode,
284
+ receptor: receptor,
285
+ chains: chainList,
286
+ fullChains: fullChainList,
287
+ hasFv: hasFv
288
+ }
289
+
290
+ // Python step contract: reads input.tsv + plan.json; writes properties.tsv,
291
+ // plus aa_fraction.tsv in peptide mode (empty body in antibody/TCR mode), plus
292
+ // stats.json (dataset-level scalars consumed by the info layer — e.g. R11c
293
+ // median CDR-H3 length per chain).
294
+ soft := assets.importSoftware("@platforma-open/milaboratories.sequence-properties.software:compute-properties")
295
+ pyRun := exec.builder().
296
+ software(soft).
297
+ mem("4GiB").
298
+ cpu(1).
299
+ addFile("input.tsv", seqTable).
300
+ writeFile("plan.json", canonical.encode(plan)).
301
+ arg("--input").arg("input.tsv").
302
+ arg("--plan").arg("plan.json").
303
+ arg("--output").arg("properties.tsv").
304
+ arg("--aa-fraction").arg("aa_fraction.tsv").
305
+ arg("--stats").arg("stats.json").
306
+ saveFile("properties.tsv").
307
+ saveFile("aa_fraction.tsv").
308
+ saveFileContent("stats.json").
309
+ saveStderrStream().
310
+ run()
311
+
312
+ propertiesTsv := pyRun.getFile("properties.tsv")
313
+ aaFractionTsv := pyRun.getFile("aa_fraction.tsv")
314
+ statsResource := pyRun.getFileContent("stats.json")
315
+ processingLog := pyRun.getStderrStream()
316
+
317
+ coverageTier := "peptide"
318
+ if mode != "peptide" {
319
+ if len(fullChainList) > 0 {
320
+ coverageTier = "full_chain"
321
+ } else if len(infoMessages) > 0 {
322
+ coverageTier = "cdr3_only"
323
+ } else {
324
+ coverageTier = "partial"
325
+ }
326
+ }
327
+
328
+ // Hand off to process template for column specs, pFrame export, and
329
+ // info-blob assembly. The info blob depends on Python's stats output, so
330
+ // it builds inside the render template rather than at workflow body time.
331
+ processResult := render.create(processTpl, {
332
+ blockId: blockId,
333
+ propertiesTsv: propertiesTsv,
334
+ aaFractionTsv: aaFractionTsv,
335
+ stats: statsResource,
336
+ params: canonicalJsonResource({
337
+ datasetSpec: datasetSpec,
338
+ keyAxisIdx: keyAxisIdx,
339
+ mode: mode,
340
+ receptor: receptor,
341
+ chains: chainList,
342
+ chainsWithCdr3: chainsWithCdr3,
343
+ fullChains: fullChainList,
344
+ hasFv: hasFv,
345
+ coverageTier: coverageTier,
346
+ infoMessages: infoMessages
347
+ })
348
+ })
349
+
350
+ // Cache outputs for 24 hours (ms) to skip re-running identical work.
351
+ propertiesPf := processResult.output("propertiesPf", 24 * 60 * 60 * 1000)
352
+ exportPframe := processResult.output("exportPframe", 24 * 60 * 60 * 1000)
353
+ infoBlob := processResult.output("info", 24 * 60 * 60 * 1000)
354
+
355
+ return {
356
+ outputs: {
357
+ propertiesPf: propertiesPf,
358
+ info: infoBlob,
359
+ processingLog: processingLog
360
+ },
361
+ exports: {
362
+ properties: exportPframe
363
+ }
364
+ }
365
+ })
@@ -0,0 +1,58 @@
1
+ // User-facing info messages emitted by the workflow.
2
+ //
3
+ // Centralised so the inventory of UX strings is scannable in one place.
4
+ // Each helper returns a single message string; callers append it to the
5
+ // running info-message list. Receptor/chain rendering is the caller's job —
6
+ // helpers accept already-rendered chain labels.
7
+
8
+ ll := import("@platforma-sdk/workflow-tengo:ll")
9
+
10
+ partialChainMissingFullChain := func(present, chainLabel) {
11
+ return "Partial-region input: " + string(present) + " of 7 required regions found for " +
12
+ chainLabel + " chain — full-chain properties not computed. " +
13
+ "All seven regions (FR1, CDR1, FR2, CDR2, FR3, CDR3, FR4) are required."
14
+ }
15
+
16
+ partialChainNoCdr3 := func(present, chainLabel) {
17
+ return "Partial-region input: " + string(present) + " of 7 required regions found for " +
18
+ chainLabel + " chain (CDR3 absent) — no per-chain properties computed. " +
19
+ "CDR3 is required for per-chain charge / hydrophobicity; " +
20
+ "all seven regions are required for full-chain properties."
21
+ }
22
+
23
+ noRecognizedColumns := func() {
24
+ return "No recognized VDJ region columns found in the input dataset."
25
+ }
26
+
27
+ cdr3OnlyInput := func() {
28
+ return "CDR3-only input detected — full-chain properties not computed. " +
29
+ "To enable them, use a MiXCR preset that exports all VDJ regions."
30
+ }
31
+
32
+ gammaDeltaTcr := func() {
33
+ return "γδ TCR input detected — displaying with γδ-specific labels; " +
34
+ "Fv columns are not computed for TCR inputs."
35
+ }
36
+
37
+ receptorNotDetected := func() {
38
+ return "Receptor type not detected on the input dataset; defaulting to antibody labels. " +
39
+ "Use a MiXCR preset that emits the receptor annotation if this is a TCR dataset."
40
+ }
41
+
42
+ // R11c — single-domain antibody (VHH / nanobody) heuristic.
43
+ vhh := func() {
44
+ return "Possible VHH/single-domain antibody input detected (heavy chain only; " +
45
+ "CDR-H3 length distribution consistent with VHH). IgG-calibrated CDR-H3 length " +
46
+ "thresholds (>15 aa elevated risk, >20 aa high risk) do not apply to VHH — " +
47
+ "disregard these thresholds for nanobody libraries."
48
+ }
49
+
50
+ export ll.toStrict({
51
+ partialChainMissingFullChain: partialChainMissingFullChain,
52
+ partialChainNoCdr3: partialChainNoCdr3,
53
+ noRecognizedColumns: noRecognizedColumns,
54
+ cdr3OnlyInput: cdr3OnlyInput,
55
+ gammaDeltaTcr: gammaDeltaTcr,
56
+ receptorNotDetected: receptorNotDetected,
57
+ vhh: vhh
58
+ })
@@ -0,0 +1,474 @@
1
+ // Process — receives the property TSV(s) from the Python step and the column-emission
2
+ // plan from main.tpl.tengo, builds output PColumn specs per the project's pcolumn-spec.md,
3
+ // imports the file as a pFrame, and returns the result and a sliced export pFrame.
4
+
5
+ self := import("@platforma-sdk/workflow-tengo:tpl")
6
+ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
7
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
8
+ pSpec := import("@platforma-sdk/workflow-tengo:pframes.spec")
9
+ maps := import("@platforma-sdk/workflow-tengo:maps")
10
+ smart := import("@platforma-sdk/workflow-tengo:smart")
11
+ canonical := import("@platforma-sdk/workflow-tengo:canonical")
12
+ constants := import("@platforma-sdk/workflow-tengo:constants")
13
+ messages := import(":messages")
14
+
15
+ self.defineOutputs("propertiesPf", "exportPframe", "info")
16
+
17
+ // Receptor + chain → human label fragments (CDR3 / full-chain).
18
+ // Spec R13a: PColumn name and chain domain are unchanged; only the label varies.
19
+ labelFragments := func(receptor, chain) {
20
+ if receptor == "TCRAB" {
21
+ if chain == "A" { return { cdr3: "CDR-α3", fullChain: "Vα" } }
22
+ if chain == "B" { return { cdr3: "CDR-β3", fullChain: "Vβ" } }
23
+ }
24
+ if receptor == "TCRGD" {
25
+ if chain == "A" { return { cdr3: "CDR-γ3", fullChain: "Vγ" } }
26
+ if chain == "B" { return { cdr3: "CDR-δ3", fullChain: "Vδ" } }
27
+ }
28
+ // IG / unknown — antibody convention.
29
+ if chain == "A" { return { cdr3: "CDR-H3", fullChain: "VH" } }
30
+ return { cdr3: "CDR-L3", fullChain: "VL" }
31
+ }
32
+
33
+ // Build a single output column descriptor consumed by xsv.importFile.
34
+ // `tsvCol` is the TSV column header emitted by Python (e.g. "charge_peptide", "charge_A_CDR3").
35
+ // Clones the caller's `annotations` dict — mutating it in place would stamp
36
+ // the label into shared references if any caller ever reused the literal,
37
+ // the same aliasing footgun that the export-domain clone below already guards.
38
+ makeCol := func(tsvCol, valName, valueType, label, domain, annotations) {
39
+ newAnnotations := {}
40
+ if annotations {
41
+ for k, v in annotations {
42
+ newAnnotations[k] = v
43
+ }
44
+ }
45
+ newAnnotations["pl7.app/label"] = label
46
+ spec := {
47
+ name: valName,
48
+ valueType: valueType,
49
+ domain: domain,
50
+ annotations: newAnnotations
51
+ }
52
+ return { column: tsvCol, id: tsvCol, naRegex: "", allowNA: true, spec: spec }
53
+ }
54
+
55
+ self.body(func(args) {
56
+ blockId := args.blockId
57
+ propertiesTsv := args.propertiesTsv
58
+ params := args.params
59
+
60
+ datasetSpec := params.datasetSpec
61
+ keyAxisIdx := params.keyAxisIdx
62
+ mode := params.mode
63
+ receptor := params.receptor
64
+ chains := params.chains
65
+ chainsWithCdr3 := params.chainsWithCdr3
66
+ fullChains := params.fullChains
67
+ hasFv := params.hasFv
68
+ coverageTier := params.coverageTier
69
+ infoMessages := params.infoMessages
70
+
71
+ stats := args.stats.getDataAsJson()
72
+ medians := stats.medianCdr3Length
73
+
74
+ // R11c — single-domain antibodies (nanobodies / VHH) miss the IgG-calibrated
75
+ // CDR-H3 length risk thresholds. Surface an info message when the dataset
76
+ // looks like VHH (heavy chain only, long median CDR-H3 ≥ 16 aa).
77
+ if receptor == "IG" && len(chainsWithCdr3) == 1 && chainsWithCdr3[0] == "A" {
78
+ medA := medians["A"]
79
+ if medA != undefined && medA >= 16 {
80
+ infoMessages += [messages.vhh()]
81
+ }
82
+ }
83
+
84
+ infoBlob := smart.createValueResource(constants.RTYPE_JSON, canonical.encode({
85
+ mode: mode,
86
+ receptor: receptor,
87
+ coverageTier: coverageTier,
88
+ messages: infoMessages
89
+ }))
90
+
91
+ keyAxisSpec := datasetSpec.axesSpec[keyAxisIdx]
92
+
93
+ axes := [{ column: "entity_key", spec: keyAxisSpec }]
94
+ columns := []
95
+
96
+ if mode == "peptide" {
97
+ // Peptide mode — 9 scalar properties on `pl7.app/feature: "peptide"`.
98
+ dom := { "pl7.app/feature": "peptide" }
99
+
100
+ columns += [makeCol("charge_peptide", "pl7.app/charge", "Double",
101
+ "Net Charge (pH 7)", dom, {
102
+ "pl7.app/format": ".2f",
103
+ "pl7.app/isScore": "true",
104
+ "pl7.app/description": "Net charge at pH 7 (Henderson-Hasselbalch, IPC 2.0 peptide pKa set). Positive = net basic (Arg, Lys, His dominate); negative = net acidic (Asp, Glu dominate). No universal preferred direction.",
105
+ "pl7.app/table/visibility": "default",
106
+ "pl7.app/table/orderPriority": "70000"
107
+ })]
108
+
109
+ columns += [makeCol("gravy_peptide", "pl7.app/hydrophobicity", "Double",
110
+ "Hydrophobicity (GRAVY)", dom, {
111
+ "pl7.app/format": ".3f",
112
+ "pl7.app/isScore": "true",
113
+ "pl7.app/score/rankingOrder": "increasing",
114
+ "pl7.app/description": "rankingOrder: increasing reflects preference for lower hydrophobicity. Invert direction in Lead Selection for hydrophobic-target applications.",
115
+ "pl7.app/table/visibility": "default",
116
+ "pl7.app/table/orderPriority": "69900"
117
+ })]
118
+
119
+ columns += [makeCol("mw_peptide", "pl7.app/molecularWeight", "Double",
120
+ "Molecular Weight (Da, average masses)", dom, {
121
+ "pl7.app/format": ".1f",
122
+ "pl7.app/min": "0",
123
+ "pl7.app/table/visibility": "default",
124
+ "pl7.app/table/orderPriority": "69800"
125
+ })]
126
+
127
+ columns += [makeCol("pi_peptide", "pl7.app/isoelectricPoint", "Double",
128
+ "Isoelectric Point (pI)", dom, {
129
+ "pl7.app/format": ".2f",
130
+ "pl7.app/min": "0",
131
+ "pl7.app/max": "14",
132
+ "pl7.app/table/visibility": "default",
133
+ "pl7.app/table/orderPriority": "69700"
134
+ })]
135
+
136
+ columns += [makeCol("eox_peptide", "pl7.app/extinctionCoefficientOx", "Double",
137
+ "Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", dom, {
138
+ "pl7.app/format": ".0f",
139
+ "pl7.app/min": "0",
140
+ "pl7.app/description": "Assumes all Cys are in disulfide bonds. For unprotected linear peptides use the reduced form.",
141
+ "pl7.app/table/visibility": "optional",
142
+ "pl7.app/table/orderPriority": "69600"
143
+ })]
144
+
145
+ columns += [makeCol("ered_peptide", "pl7.app/extinctionCoefficientRed", "Double",
146
+ "Extinction Coeff., Reduced (M⁻¹cm⁻¹)", dom, {
147
+ "pl7.app/format": ".0f",
148
+ "pl7.app/min": "0",
149
+ "pl7.app/description": "Extinction coefficient at 280 nm, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
150
+ "pl7.app/table/visibility": "optional",
151
+ "pl7.app/table/orderPriority": "69500"
152
+ })]
153
+
154
+ columns += [makeCol("instability_peptide", "pl7.app/instabilityIndex", "Double",
155
+ "Instability Index", dom, {
156
+ "pl7.app/format": ".2f",
157
+ "pl7.app/description": "Guruprasad index — derived from globular proteins. The II > 40 threshold does not apply to short linear peptides; use as a relative composition ranking aid only.",
158
+ "pl7.app/table/visibility": "default",
159
+ "pl7.app/table/orderPriority": "69400"
160
+ })]
161
+
162
+ columns += [makeCol("aliphatic_peptide", "pl7.app/aliphaticIndex", "Double",
163
+ "Aliphatic Index", dom, {
164
+ "pl7.app/format": ".1f",
165
+ "pl7.app/min": "0",
166
+ "pl7.app/description": "Measures fraction of nonpolar aliphatic residues (Ala, Val, Ile, Leu). For short linear peptides, thermostability interpretation does not apply — the Ikai index was derived for globular mesophilic enzymes, and thermostability is not a meaningful concept for unstructured peptides. Useful as a composition indicator and a proxy for hydrophobic character alongside GRAVY — both metrics increase with Ala/Val/Ile/Leu content, but neither has a universal preferred direction for therapeutic peptides.",
167
+ "pl7.app/table/visibility": "optional",
168
+ "pl7.app/table/orderPriority": "69300"
169
+ })]
170
+
171
+ columns += [makeCol("aromaticity_peptide", "pl7.app/aromaticity", "Double",
172
+ "Aromaticity", dom, {
173
+ "pl7.app/format": ".3f",
174
+ "pl7.app/min": "0",
175
+ "pl7.app/max": "1",
176
+ "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr).",
177
+ "pl7.app/table/visibility": "optional",
178
+ "pl7.app/table/orderPriority": "69200"
179
+ })]
180
+
181
+ } else {
182
+ // Antibody/TCR mode — CDR3 columns per chain, full-chain when present, Fv when paired.
183
+ // CDR-H3 (chain A) and CDR-L3 (chain B) carry different descriptions per
184
+ // pcolumn-spec.md — different developability signals.
185
+ cdr3ChargeDesc := {
186
+ A: "Strongly positive CDR3 charge correlates with polyreactivity via electrostatic interactions. No universal preferred direction in Lead Selection. IPC 2.0 peptide pKa set.",
187
+ B: "Strongly positive CDR-L3 charge contributes to paratope polyreactivity. Strongly negative charge is primarily a PK concern. No universal preferred direction. IPC 2.0 peptide pKa set."
188
+ }
189
+ cdr3GravyDesc := {
190
+ A: "Lower hydrophobicity preferred for developability. CDR3 GRAVY > 0 is an informal aggregation/polyreactivity heuristic.",
191
+ B: "Same aggregation and polyreactivity signal as CDR-H3 hydrophobicity; lower independent predictive weight. The TAP score uses combined 6-CDR GRAVY — CDR-L3 alone has limited independent validation."
192
+ }
193
+ cdr3OrderA := 68000
194
+ cdr3OrderB := 67700
195
+ for chain in chains {
196
+ frag := labelFragments(receptor, chain)
197
+ cdr3Dom := { "pl7.app/feature": "CDR3", "pl7.app/vdj/scClonotypeChain": chain }
198
+ chargeOrder := (chain == "A" ? cdr3OrderA : cdr3OrderB)
199
+ gravyOrder := chargeOrder - 100
200
+
201
+ columns += [makeCol("charge_" + chain + "_CDR3", "pl7.app/charge", "Double",
202
+ frag.cdr3 + " Net Charge (pH 7)", cdr3Dom, {
203
+ "pl7.app/format": ".2f",
204
+ "pl7.app/isScore": "true",
205
+ "pl7.app/description": cdr3ChargeDesc[chain],
206
+ "pl7.app/table/visibility": "default",
207
+ "pl7.app/table/orderPriority": string(chargeOrder)
208
+ })]
209
+ columns += [makeCol("gravy_" + chain + "_CDR3", "pl7.app/hydrophobicity", "Double",
210
+ frag.cdr3 + " Hydrophobicity (GRAVY)", cdr3Dom, {
211
+ "pl7.app/format": ".3f",
212
+ "pl7.app/isScore": "true",
213
+ "pl7.app/score/rankingOrder": "increasing",
214
+ "pl7.app/description": cdr3GravyDesc[chain],
215
+ "pl7.app/table/visibility": "default",
216
+ "pl7.app/table/orderPriority": string(gravyOrder)
217
+ })]
218
+ }
219
+
220
+ // Full-chain columns (9 per chain when reconstructed).
221
+ fcOrderBaseA := 67000
222
+ fcOrderBaseB := 66000
223
+ for chain in fullChains {
224
+ frag := labelFragments(receptor, chain)
225
+ fcDom := { "pl7.app/feature": "VDJRegion", "pl7.app/vdj/scClonotypeChain": chain }
226
+ base := (chain == "A" ? fcOrderBaseA : fcOrderBaseB)
227
+ fcLabel := frag.fullChain
228
+
229
+ columns += [makeCol("charge_" + chain + "_VDJRegion", "pl7.app/charge", "Double",
230
+ fcLabel + " Net Charge (pH 7)", fcDom, {
231
+ "pl7.app/format": ".2f",
232
+ "pl7.app/isScore": "true",
233
+ "pl7.app/description": "Non-monotonic vs developability: strongly positive correlates with polyreactivity; strongly negative with rapid clearance.",
234
+ "pl7.app/table/visibility": "default",
235
+ "pl7.app/table/orderPriority": string(base)
236
+ })]
237
+ columns += [makeCol("pi_" + chain + "_VDJRegion", "pl7.app/isoelectricPoint", "Double",
238
+ fcLabel + " Isoelectric Point (pI)", fcDom, {
239
+ "pl7.app/format": ".2f",
240
+ "pl7.app/isScore": "true",
241
+ "pl7.app/min": "0",
242
+ "pl7.app/max": "14",
243
+ "pl7.app/table/visibility": "default",
244
+ "pl7.app/table/orderPriority": string(base - 100)
245
+ })]
246
+ columns += [makeCol("gravy_" + chain + "_VDJRegion", "pl7.app/hydrophobicity", "Double",
247
+ fcLabel + " Hydrophobicity (GRAVY)", fcDom, {
248
+ "pl7.app/format": ".3f",
249
+ "pl7.app/description": "Framework regions dominate; weak developability signal at chain level — CDR3 hydrophobicity is more discriminating.",
250
+ "pl7.app/table/visibility": "default",
251
+ "pl7.app/table/orderPriority": string(base - 200)
252
+ })]
253
+ columns += [makeCol("mw_" + chain + "_VDJRegion", "pl7.app/molecularWeight", "Double",
254
+ fcLabel + " Molecular Weight (Da, average masses)", fcDom, {
255
+ "pl7.app/format": ".1f",
256
+ "pl7.app/min": "0",
257
+ "pl7.app/description": "Unglycosylated sequence mass — does not include N-glycan contributions from any NXS/NXT sequons in the variable region.",
258
+ "pl7.app/table/visibility": "optional",
259
+ "pl7.app/table/orderPriority": string(base - 300)
260
+ })]
261
+ columns += [makeCol("eox_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientOx", "Double",
262
+ fcLabel + " Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fcDom, {
263
+ "pl7.app/format": ".0f",
264
+ "pl7.app/min": "0",
265
+ "pl7.app/table/visibility": "optional",
266
+ "pl7.app/table/orderPriority": string(base - 400)
267
+ })]
268
+ columns += [makeCol("ered_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientRed", "Double",
269
+ fcLabel + " Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fcDom, {
270
+ "pl7.app/format": ".0f",
271
+ "pl7.app/min": "0",
272
+ "pl7.app/table/visibility": "optional",
273
+ "pl7.app/table/orderPriority": string(base - 500)
274
+ })]
275
+ columns += [makeCol("instability_" + chain + "_VDJRegion", "pl7.app/instabilityIndex", "Double",
276
+ fcLabel + " Instability Index", fcDom, {
277
+ "pl7.app/format": ".2f",
278
+ "pl7.app/description": "Guruprasad index, calibrated for in-vitro stability of soluble globular proteins via dipeptide composition. Weak predictor of antibody Tm — use as supplementary ranking aid.",
279
+ "pl7.app/table/visibility": "optional",
280
+ "pl7.app/table/orderPriority": string(base - 600)
281
+ })]
282
+ columns += [makeCol("aliphatic_" + chain + "_VDJRegion", "pl7.app/aliphaticIndex", "Double",
283
+ fcLabel + " Aliphatic Index", fcDom, {
284
+ "pl7.app/format": ".1f",
285
+ "pl7.app/min": "0",
286
+ "pl7.app/description": "Ikai aliphatic index, derived from globular mesophilic enzymes. Weak correlation with antibody Tm. No rankingOrder — high values can correlate with aggregation propensity.",
287
+ "pl7.app/table/visibility": "optional",
288
+ "pl7.app/table/orderPriority": string(base - 700)
289
+ })]
290
+ columns += [makeCol("aromaticity_" + chain + "_VDJRegion", "pl7.app/aromaticity", "Double",
291
+ fcLabel + " Aromaticity", fcDom, {
292
+ "pl7.app/format": ".3f",
293
+ "pl7.app/min": "0",
294
+ "pl7.app/max": "1",
295
+ "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr) over the full chain. Framework dominates; CDR-specific aromaticity is a stronger predictor (Phase 2).",
296
+ "pl7.app/table/visibility": "optional",
297
+ "pl7.app/table/orderPriority": string(base - 800)
298
+ })]
299
+ }
300
+
301
+ // Fv columns — only when both VH and VL full chains reconstructed (antibody only).
302
+ if hasFv {
303
+ fvDom := { "pl7.app/feature": "Fv" }
304
+ columns += [makeCol("charge_Fv", "pl7.app/charge", "Double",
305
+ "Fv Net Charge (pH 7)", fvDom, {
306
+ "pl7.app/format": ".2f",
307
+ "pl7.app/isScore": "true",
308
+ "pl7.app/table/visibility": "default",
309
+ "pl7.app/table/orderPriority": "65100"
310
+ })]
311
+ columns += [makeCol("pi_Fv", "pl7.app/isoelectricPoint", "Double",
312
+ "Fv Isoelectric Point (pI)", fvDom, {
313
+ "pl7.app/format": ".2f",
314
+ "pl7.app/isScore": "true",
315
+ "pl7.app/min": "0",
316
+ "pl7.app/max": "14",
317
+ "pl7.app/description": "Variable region (VH+VL) only. Fv pI is typically 2–4 pH units higher than whole-IgG cIEF measurements, which include constant regions (IgG1 Fc pI ≈ 5–6).",
318
+ "pl7.app/table/visibility": "default",
319
+ "pl7.app/table/orderPriority": "65000"
320
+ })]
321
+ columns += [makeCol("eox_Fv", "pl7.app/extinctionCoefficientOx", "Double",
322
+ "Fv Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fvDom, {
323
+ "pl7.app/format": ".0f",
324
+ "pl7.app/min": "0",
325
+ "pl7.app/description": "Variable region (VH+VL) only — does not include constant regions. For whole-IgG A280 quantification, use the full-antibody ε.",
326
+ "pl7.app/table/visibility": "optional",
327
+ "pl7.app/table/orderPriority": "64900"
328
+ })]
329
+ columns += [makeCol("ered_Fv", "pl7.app/extinctionCoefficientRed", "Double",
330
+ "Fv Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fvDom, {
331
+ "pl7.app/format": ".0f",
332
+ "pl7.app/min": "0",
333
+ "pl7.app/description": "Variable region (VH+VL) only, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
334
+ "pl7.app/table/visibility": "optional",
335
+ "pl7.app/table/orderPriority": "64800"
336
+ })]
337
+ columns += [makeCol("mw_Fv", "pl7.app/molecularWeight", "Double",
338
+ "Fv Molecular Weight (Da, average masses)", fvDom, {
339
+ "pl7.app/format": ".1f",
340
+ "pl7.app/min": "0",
341
+ "pl7.app/description": "Unglycosylated sequence mass (VH + VL).",
342
+ "pl7.app/table/visibility": "optional",
343
+ "pl7.app/table/orderPriority": "64700"
344
+ })]
345
+ }
346
+ }
347
+
348
+ outputSpecs := {
349
+ axes: axes,
350
+ columns: columns,
351
+ storageFormat: "Parquet",
352
+ partitionKeyLength: 0
353
+ }
354
+
355
+ // Stamp blockId on the export-only columns (lets downstream blocks distinguish runs).
356
+ // Build a fresh column with a cloned domain dict — mutating col.spec.domain
357
+ // in place would also stamp blockId on outputSpecs.columns since both lists
358
+ // share the same column references.
359
+ exportColumns := []
360
+ for col in columns {
361
+ if col.spec.annotations && col.spec.annotations["pl7.app/isScore"] == "true" {
362
+ newDomain := {}
363
+ if col.spec.domain {
364
+ for k, v in col.spec.domain {
365
+ newDomain[k] = v
366
+ }
367
+ }
368
+ newDomain["pl7.app/blockId"] = blockId
369
+ exportColumns += [{
370
+ column: col.column,
371
+ id: col.id,
372
+ naRegex: col.naRegex,
373
+ allowNA: col.allowNA,
374
+ spec: {
375
+ name: col.spec.name,
376
+ valueType: col.spec.valueType,
377
+ domain: newDomain,
378
+ annotations: col.spec.annotations
379
+ }
380
+ }]
381
+ }
382
+ }
383
+
384
+ exportSpecs := {
385
+ axes: axes,
386
+ columns: exportColumns,
387
+ storageFormat: "Parquet",
388
+ partitionKeyLength: 0
389
+ }
390
+
391
+ scalarOut := xsv.importFile(propertiesTsv, "tsv", outputSpecs, { splitDataAndSpec: true, cpu: 1, mem: "4GiB" })
392
+ exportOut := xsv.importFile(propertiesTsv, "tsv", exportSpecs, { splitDataAndSpec: true, cpu: 1, mem: "4GiB" })
393
+
394
+ trace := pSpec.makeTrace(datasetSpec, {
395
+ type: "milaboratories.sequence-properties",
396
+ importance: 30,
397
+ label: "Sequence Properties",
398
+ id: blockId
399
+ })
400
+
401
+ // AA fraction — peptide mode only (R7). 2-axis: [variantKey, aminoAcid].
402
+ // The TSV is long format: entity_key, aminoAcid, value. The aminoAcid axis
403
+ // values are the 20 standard single-letter codes (R7).
404
+ aaOut := undefined
405
+ if mode == "peptide" {
406
+ aaAxes := [
407
+ { column: "entity_key", spec: keyAxisSpec },
408
+ {
409
+ column: "aminoAcid",
410
+ spec: {
411
+ name: "pl7.app/aminoAcid",
412
+ type: "String",
413
+ annotations: { "pl7.app/label": "Amino Acid" }
414
+ }
415
+ }
416
+ ]
417
+ aaCols := [
418
+ {
419
+ column: "value",
420
+ id: "aaFraction",
421
+ naRegex: "",
422
+ allowNA: true,
423
+ spec: {
424
+ name: "pl7.app/aaFraction",
425
+ valueType: "Double",
426
+ domain: { "pl7.app/feature": "peptide" },
427
+ annotations: {
428
+ "pl7.app/label": "AA Fraction",
429
+ "pl7.app/format": ".3f",
430
+ "pl7.app/min": "0",
431
+ "pl7.app/max": "1",
432
+ "pl7.app/table/visibility": "optional",
433
+ "pl7.app/table/orderPriority": "69000"
434
+ }
435
+ }
436
+ }
437
+ ]
438
+ aaSpecs := {
439
+ axes: aaAxes,
440
+ columns: aaCols,
441
+ storageFormat: "Parquet",
442
+ partitionKeyLength: 0
443
+ }
444
+ aaOut = xsv.importFile(args.aaFractionTsv, "tsv", aaSpecs, { splitDataAndSpec: true, cpu: 1, mem: "4GiB" })
445
+ }
446
+
447
+ // Combined output pFrame — scalar properties + (peptide mode only) AA fraction.
448
+ // Sorted .add() order keeps the pframe resource bytes stable across runs (dedup).
449
+ resultPframe := pframes.pFrameBuilder()
450
+ for _, k in maps.getKeys(scalarOut) {
451
+ v := scalarOut[k]
452
+ resultPframe.add(k, trace.inject(v.spec), v.data)
453
+ }
454
+ if aaOut != undefined {
455
+ for _, k in maps.getKeys(aaOut) {
456
+ v := aaOut[k]
457
+ resultPframe.add(k, trace.inject(v.spec), v.data)
458
+ }
459
+ }
460
+ resultPframe = resultPframe.build()
461
+
462
+ exportPframe := pframes.pFrameBuilder()
463
+ for _, k in maps.getKeys(exportOut) {
464
+ v := exportOut[k]
465
+ exportPframe.add(k, trace.inject(v.spec), v.data)
466
+ }
467
+ exportPframe = exportPframe.build()
468
+
469
+ return {
470
+ propertiesPf: pframes.exportFrame(resultPframe),
471
+ exportPframe: exportPframe,
472
+ info: infoBlob
473
+ }
474
+ })
package/src/wf.test.ts ADDED
@@ -0,0 +1,9 @@
1
+ // Workflow integration tests will be added once Python property computation lands.
2
+ // Placeholder kept so vitest discovers the suite without failures.
3
+ import { describe, it } from "vitest";
4
+
5
+ describe("sequence-properties workflow", () => {
6
+ it.skip("computes peptide properties end-to-end", () => {
7
+ // TODO: implement once compute_properties.py is real.
8
+ });
9
+ });
package/tsconfig.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "node",
6
+ "esModuleInterop": true,
7
+ "strict": true,
8
+ "outDir": "./dist",
9
+ "rootDir": "./src",
10
+ "sourceMap": true,
11
+ "declaration": true
12
+ },
13
+ "types": [],
14
+ "include": ["src/**/*"],
15
+ "exclude": ["node_modules", "dist"]
16
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ watch: false,
6
+ maxConcurrency: 3,
7
+ testTimeout: 5000
8
+ }
9
+ });