@platforma-open/milaboratories.sequence-properties.workflow 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,58 +11,10 @@ smart := import("@platforma-sdk/workflow-tengo:smart")
11
11
  canonical := import("@platforma-sdk/workflow-tengo:canonical")
12
12
  constants := import("@platforma-sdk/workflow-tengo:constants")
13
13
  messages := import(":messages")
14
+ columnSpecs := import(":columns")
14
15
 
15
16
  self.defineOutputs("propertiesPf", "exportPframe", "info")
16
17
 
17
- // ΔCharge (pH 7.4 → 6.0) endpoints — fixed in v1 per spec; schema accepts
18
- // additional pH pairs without breaking existing column identities.
19
- CHARGE_SHIFT_PH_FROM := "7.4"
20
- CHARGE_SHIFT_PH_TO := "6.0"
21
-
22
- // Spec R6b — same description on every chargeShift column (peptide / CDR3 / Fv).
23
- // Trimmed to ~30 words for the column-header tooltip; PlAgDataTableV2 clips the
24
- // top edge against the page header. Magnitude rule and scope-exclusion caveats
25
- // live in pcolumn-spec.md / the spec, not in the tooltip.
26
- CHARGE_SHIFT_DESC := "Net charge change from pH 7.4 (blood) to pH 6.0 (endosome). Negative values mean the molecule gains positive charge on acidification — the productive direction for histidine-driven pH switching. Histidine dominates (~−0.46 per His)."
27
-
28
- // Receptor + chain → human label fragments (CDR3 / full-chain).
29
- // Spec R13a: PColumn name and chain domain are unchanged; only the label varies.
30
- labelFragments := func(receptor, chain) {
31
- if receptor == "TCRAB" {
32
- if chain == "A" { return { cdr3: "CDR-α3", fullChain: "Vα" } }
33
- if chain == "B" { return { cdr3: "CDR-β3", fullChain: "Vβ" } }
34
- }
35
- if receptor == "TCRGD" {
36
- if chain == "A" { return { cdr3: "CDR-γ3", fullChain: "Vγ" } }
37
- if chain == "B" { return { cdr3: "CDR-δ3", fullChain: "Vδ" } }
38
- }
39
- // IG / unknown — antibody convention.
40
- if chain == "A" { return { cdr3: "CDR-H3", fullChain: "VH" } }
41
- return { cdr3: "CDR-L3", fullChain: "VL" }
42
- }
43
-
44
- // Build a single output column descriptor consumed by xsv.importFile.
45
- // `tsvCol` is the TSV column header emitted by Python (e.g. "charge_peptide", "charge_A_CDR3").
46
- // Clones the caller's `annotations` dict — mutating it in place would stamp
47
- // the label into shared references if any caller ever reused the literal,
48
- // the same aliasing footgun that the export-domain clone below already guards.
49
- makeCol := func(tsvCol, valName, valueType, label, domain, annotations) {
50
- newAnnotations := {}
51
- if annotations {
52
- for k, v in annotations {
53
- newAnnotations[k] = v
54
- }
55
- }
56
- newAnnotations["pl7.app/label"] = label
57
- spec := {
58
- name: valName,
59
- valueType: valueType,
60
- domain: domain,
61
- annotations: newAnnotations
62
- }
63
- return { column: tsvCol, id: tsvCol, naRegex: "", allowNA: true, spec: spec }
64
- }
65
-
66
18
  self.body(func(args) {
67
19
  blockId := args.blockId
68
20
  propertiesTsv := args.propertiesTsv
@@ -109,294 +61,16 @@ self.body(func(args) {
109
61
  keyAxisSpec := datasetSpec.axesSpec[keyAxisIdx]
110
62
 
111
63
  axes := [{ column: "entity_key", spec: keyAxisSpec }]
112
- columns := []
113
-
114
- if mode == "peptide" {
115
- // Peptide mode — 9 scalar properties on `pl7.app/feature: "peptide"`.
116
- dom := { "pl7.app/feature": "peptide" }
117
-
118
- columns += [makeCol("charge_peptide", "pl7.app/charge", "Double",
119
- "Net Charge (pH 7)", dom, {
120
- "pl7.app/format": ".2f",
121
- "pl7.app/isScore": "true",
122
- "pl7.app/description": "Net charge at pH 7 (Henderson-Hasselbalch, IPC 2.0 peptide pKa set). Positive = net basic (Arg, Lys, His dominate); negative = net acidic (Asp, Glu dominate). No universal preferred direction.",
123
- "pl7.app/table/visibility": "default",
124
- "pl7.app/table/orderPriority": "70000"
125
- })]
126
-
127
- columns += [makeCol("chargeShift_peptide", "pl7.app/chargeShift", "Double",
128
- "Peptide ΔCharge (pH 7.4 → 6.0)", {
129
- "pl7.app/feature": "peptide",
130
- "pl7.app/pH/from": CHARGE_SHIFT_PH_FROM,
131
- "pl7.app/pH/to": CHARGE_SHIFT_PH_TO
132
- }, {
133
- "pl7.app/format": ".2f",
134
- "pl7.app/description": CHARGE_SHIFT_DESC,
135
- "pl7.app/table/visibility": "default",
136
- "pl7.app/table/orderPriority": "69950"
137
- })]
138
-
139
- columns += [makeCol("gravy_peptide", "pl7.app/hydrophobicity", "Double",
140
- "Hydrophobicity (GRAVY)", dom, {
141
- "pl7.app/format": ".3f",
142
- "pl7.app/isScore": "true",
143
- "pl7.app/score/rankingOrder": "increasing",
144
- "pl7.app/description": "rankingOrder: increasing reflects preference for lower hydrophobicity. Invert direction in Lead Selection for hydrophobic-target applications.",
145
- "pl7.app/table/visibility": "default",
146
- "pl7.app/table/orderPriority": "69900"
147
- })]
148
-
149
- columns += [makeCol("mw_peptide", "pl7.app/molecularWeight", "Double",
150
- "Molecular Weight (Da, average masses)", dom, {
151
- "pl7.app/format": ".1f",
152
- "pl7.app/min": "0",
153
- "pl7.app/table/visibility": "default",
154
- "pl7.app/table/orderPriority": "69800"
155
- })]
156
-
157
- columns += [makeCol("pi_peptide", "pl7.app/isoelectricPoint", "Double",
158
- "Isoelectric Point (pI)", dom, {
159
- "pl7.app/format": ".2f",
160
- "pl7.app/min": "0",
161
- "pl7.app/max": "14",
162
- "pl7.app/table/visibility": "default",
163
- "pl7.app/table/orderPriority": "69700"
164
- })]
165
-
166
- columns += [makeCol("eox_peptide", "pl7.app/extinctionCoefficientOx", "Double",
167
- "Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", dom, {
168
- "pl7.app/format": ".0f",
169
- "pl7.app/min": "0",
170
- "pl7.app/description": "Assumes all Cys are in disulfide bonds. For unprotected linear peptides use the reduced form.",
171
- "pl7.app/table/visibility": "optional",
172
- "pl7.app/table/orderPriority": "69600"
173
- })]
174
-
175
- columns += [makeCol("ered_peptide", "pl7.app/extinctionCoefficientRed", "Double",
176
- "Extinction Coeff., Reduced (M⁻¹cm⁻¹)", dom, {
177
- "pl7.app/format": ".0f",
178
- "pl7.app/min": "0",
179
- "pl7.app/description": "Extinction coefficient at 280 nm, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
180
- "pl7.app/table/visibility": "optional",
181
- "pl7.app/table/orderPriority": "69500"
182
- })]
183
-
184
- columns += [makeCol("instability_peptide", "pl7.app/instabilityIndex", "Double",
185
- "Instability Index", dom, {
186
- "pl7.app/format": ".2f",
187
- "pl7.app/description": "Guruprasad index — derived from globular proteins. The II > 40 threshold does not apply to short linear peptides; use as a relative composition ranking aid only.",
188
- "pl7.app/table/visibility": "default",
189
- "pl7.app/table/orderPriority": "69400"
190
- })]
191
-
192
- columns += [makeCol("aliphatic_peptide", "pl7.app/aliphaticIndex", "Double",
193
- "Aliphatic Index", dom, {
194
- "pl7.app/format": ".1f",
195
- "pl7.app/min": "0",
196
- "pl7.app/description": "Measures fraction of nonpolar aliphatic residues (Ala, Val, Ile, Leu). For short linear peptides, thermostability interpretation does not apply — the Ikai index was derived for globular mesophilic enzymes, and thermostability is not a meaningful concept for unstructured peptides. Useful as a composition indicator and a proxy for hydrophobic character alongside GRAVY — both metrics increase with Ala/Val/Ile/Leu content, but neither has a universal preferred direction for therapeutic peptides.",
197
- "pl7.app/table/visibility": "optional",
198
- "pl7.app/table/orderPriority": "69300"
199
- })]
200
64
 
201
- columns += [makeCol("aromaticity_peptide", "pl7.app/aromaticity", "Double",
202
- "Aromaticity", dom, {
203
- "pl7.app/format": ".3f",
204
- "pl7.app/min": "0",
205
- "pl7.app/max": "1",
206
- "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr).",
207
- "pl7.app/table/visibility": "optional",
208
- "pl7.app/table/orderPriority": "69200"
209
- })]
210
-
211
- } else {
212
- // Antibody/TCR mode — CDR3 columns per chain, full-chain when present, Fv when paired.
213
- // CDR-H3 (chain A) and CDR-L3 (chain B) carry different descriptions per
214
- // pcolumn-spec.md — different developability signals.
215
- cdr3ChargeDesc := {
216
- A: "Strongly positive CDR3 charge correlates with polyreactivity via electrostatic interactions. No universal preferred direction in Lead Selection. IPC 2.0 peptide pKa set.",
217
- B: "Strongly positive CDR-L3 charge contributes to paratope polyreactivity. Strongly negative charge is primarily a PK concern. No universal preferred direction. IPC 2.0 peptide pKa set."
218
- }
219
- cdr3GravyDesc := {
220
- A: "Lower hydrophobicity preferred for developability. CDR3 GRAVY > 0 is an informal aggregation/polyreactivity heuristic.",
221
- B: "Same aggregation and polyreactivity signal as CDR-H3 hydrophobicity; lower independent predictive weight. The TAP score uses combined 6-CDR GRAVY — CDR-L3 alone has limited independent validation."
222
- }
223
- cdr3OrderA := 68000
224
- cdr3OrderB := 67700
225
- for chain in chains {
226
- frag := labelFragments(receptor, chain)
227
- cdr3Dom := { "pl7.app/feature": "CDR3", "pl7.app/vdj/scClonotypeChain": chain }
228
- chargeOrder := (chain == "A" ? cdr3OrderA : cdr3OrderB)
229
- gravyOrder := chargeOrder - 100
230
-
231
- columns += [makeCol("charge_" + chain + "_CDR3", "pl7.app/charge", "Double",
232
- frag.cdr3 + " Net Charge (pH 7)", cdr3Dom, {
233
- "pl7.app/format": ".2f",
234
- "pl7.app/isScore": "true",
235
- "pl7.app/description": cdr3ChargeDesc[chain],
236
- "pl7.app/table/visibility": "default",
237
- "pl7.app/table/orderPriority": string(chargeOrder)
238
- })]
239
- columns += [makeCol("chargeShift_" + chain + "_CDR3", "pl7.app/chargeShift", "Double",
240
- frag.cdr3 + " ΔCharge (pH 7.4 → 6.0)", {
241
- "pl7.app/feature": "CDR3",
242
- "pl7.app/vdj/scClonotypeChain": chain,
243
- "pl7.app/pH/from": CHARGE_SHIFT_PH_FROM,
244
- "pl7.app/pH/to": CHARGE_SHIFT_PH_TO
245
- }, {
246
- "pl7.app/format": ".2f",
247
- "pl7.app/description": CHARGE_SHIFT_DESC,
248
- "pl7.app/table/visibility": "default",
249
- "pl7.app/table/orderPriority": string(chargeOrder - 50)
250
- })]
251
- columns += [makeCol("gravy_" + chain + "_CDR3", "pl7.app/hydrophobicity", "Double",
252
- frag.cdr3 + " Hydrophobicity (GRAVY)", cdr3Dom, {
253
- "pl7.app/format": ".3f",
254
- "pl7.app/isScore": "true",
255
- "pl7.app/score/rankingOrder": "increasing",
256
- "pl7.app/description": cdr3GravyDesc[chain],
257
- "pl7.app/table/visibility": "default",
258
- "pl7.app/table/orderPriority": string(gravyOrder)
259
- })]
260
- }
261
-
262
- // Full-chain columns (9 per chain when reconstructed).
263
- fcOrderBaseA := 67000
264
- fcOrderBaseB := 66000
265
- for chain in fullChains {
266
- frag := labelFragments(receptor, chain)
267
- fcDom := { "pl7.app/feature": "VDJRegion", "pl7.app/vdj/scClonotypeChain": chain }
268
- base := (chain == "A" ? fcOrderBaseA : fcOrderBaseB)
269
- fcLabel := frag.fullChain
270
-
271
- columns += [makeCol("charge_" + chain + "_VDJRegion", "pl7.app/charge", "Double",
272
- fcLabel + " Net Charge (pH 7)", fcDom, {
273
- "pl7.app/format": ".2f",
274
- "pl7.app/isScore": "true",
275
- "pl7.app/description": "Non-monotonic vs developability: strongly positive correlates with polyreactivity; strongly negative with rapid clearance.",
276
- "pl7.app/table/visibility": "default",
277
- "pl7.app/table/orderPriority": string(base)
278
- })]
279
- columns += [makeCol("pi_" + chain + "_VDJRegion", "pl7.app/isoelectricPoint", "Double",
280
- fcLabel + " Isoelectric Point (pI)", fcDom, {
281
- "pl7.app/format": ".2f",
282
- "pl7.app/isScore": "true",
283
- "pl7.app/min": "0",
284
- "pl7.app/max": "14",
285
- "pl7.app/table/visibility": "default",
286
- "pl7.app/table/orderPriority": string(base - 100)
287
- })]
288
- columns += [makeCol("gravy_" + chain + "_VDJRegion", "pl7.app/hydrophobicity", "Double",
289
- fcLabel + " Hydrophobicity (GRAVY)", fcDom, {
290
- "pl7.app/format": ".3f",
291
- "pl7.app/description": "Framework regions dominate; weak developability signal at chain level — CDR3 hydrophobicity is more discriminating.",
292
- "pl7.app/table/visibility": "default",
293
- "pl7.app/table/orderPriority": string(base - 200)
294
- })]
295
- columns += [makeCol("mw_" + chain + "_VDJRegion", "pl7.app/molecularWeight", "Double",
296
- fcLabel + " Molecular Weight (Da, average masses)", fcDom, {
297
- "pl7.app/format": ".1f",
298
- "pl7.app/min": "0",
299
- "pl7.app/description": "Unglycosylated sequence mass — does not include N-glycan contributions from any NXS/NXT sequons in the variable region.",
300
- "pl7.app/table/visibility": "optional",
301
- "pl7.app/table/orderPriority": string(base - 300)
302
- })]
303
- columns += [makeCol("eox_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientOx", "Double",
304
- fcLabel + " Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fcDom, {
305
- "pl7.app/format": ".0f",
306
- "pl7.app/min": "0",
307
- "pl7.app/table/visibility": "optional",
308
- "pl7.app/table/orderPriority": string(base - 400)
309
- })]
310
- columns += [makeCol("ered_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientRed", "Double",
311
- fcLabel + " Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fcDom, {
312
- "pl7.app/format": ".0f",
313
- "pl7.app/min": "0",
314
- "pl7.app/table/visibility": "optional",
315
- "pl7.app/table/orderPriority": string(base - 500)
316
- })]
317
- columns += [makeCol("instability_" + chain + "_VDJRegion", "pl7.app/instabilityIndex", "Double",
318
- fcLabel + " Instability Index", fcDom, {
319
- "pl7.app/format": ".2f",
320
- "pl7.app/description": "Guruprasad index, calibrated for in-vitro stability of soluble globular proteins via dipeptide composition. Weak predictor of antibody Tm — use as supplementary ranking aid.",
321
- "pl7.app/table/visibility": "optional",
322
- "pl7.app/table/orderPriority": string(base - 600)
323
- })]
324
- columns += [makeCol("aliphatic_" + chain + "_VDJRegion", "pl7.app/aliphaticIndex", "Double",
325
- fcLabel + " Aliphatic Index", fcDom, {
326
- "pl7.app/format": ".1f",
327
- "pl7.app/min": "0",
328
- "pl7.app/description": "Ikai aliphatic index, derived from globular mesophilic enzymes. Weak correlation with antibody Tm. No rankingOrder — high values can correlate with aggregation propensity.",
329
- "pl7.app/table/visibility": "optional",
330
- "pl7.app/table/orderPriority": string(base - 700)
331
- })]
332
- columns += [makeCol("aromaticity_" + chain + "_VDJRegion", "pl7.app/aromaticity", "Double",
333
- fcLabel + " Aromaticity", fcDom, {
334
- "pl7.app/format": ".3f",
335
- "pl7.app/min": "0",
336
- "pl7.app/max": "1",
337
- "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr) over the full chain. Framework dominates; CDR-specific aromaticity is a stronger predictor (Phase 2).",
338
- "pl7.app/table/visibility": "optional",
339
- "pl7.app/table/orderPriority": string(base - 800)
340
- })]
341
- }
342
-
343
- // Fv columns — only when both VH and VL full chains reconstructed (antibody only).
344
- if hasFv {
345
- fvDom := { "pl7.app/feature": "Fv" }
346
- columns += [makeCol("charge_Fv", "pl7.app/charge", "Double",
347
- "Fv Net Charge (pH 7)", fvDom, {
348
- "pl7.app/format": ".2f",
349
- "pl7.app/isScore": "true",
350
- "pl7.app/table/visibility": "default",
351
- "pl7.app/table/orderPriority": "65100"
352
- })]
353
- columns += [makeCol("chargeShift_Fv", "pl7.app/chargeShift", "Double",
354
- "Fv ΔCharge (pH 7.4 → 6.0)", {
355
- "pl7.app/feature": "Fv",
356
- "pl7.app/pH/from": CHARGE_SHIFT_PH_FROM,
357
- "pl7.app/pH/to": CHARGE_SHIFT_PH_TO
358
- }, {
359
- "pl7.app/format": ".2f",
360
- "pl7.app/description": CHARGE_SHIFT_DESC,
361
- "pl7.app/table/visibility": "default",
362
- "pl7.app/table/orderPriority": "65050"
363
- })]
364
- columns += [makeCol("pi_Fv", "pl7.app/isoelectricPoint", "Double",
365
- "Fv Isoelectric Point (pI)", fvDom, {
366
- "pl7.app/format": ".2f",
367
- "pl7.app/isScore": "true",
368
- "pl7.app/min": "0",
369
- "pl7.app/max": "14",
370
- "pl7.app/description": "Variable region (VH+VL) only. Fv pI is typically 2–4 pH units higher than whole-IgG cIEF measurements, which include constant regions (IgG1 Fc pI ≈ 5–6).",
371
- "pl7.app/table/visibility": "default",
372
- "pl7.app/table/orderPriority": "65000"
373
- })]
374
- columns += [makeCol("eox_Fv", "pl7.app/extinctionCoefficientOx", "Double",
375
- "Fv Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fvDom, {
376
- "pl7.app/format": ".0f",
377
- "pl7.app/min": "0",
378
- "pl7.app/description": "Variable region (VH+VL) only — does not include constant regions. For whole-IgG A280 quantification, use the full-antibody ε.",
379
- "pl7.app/table/visibility": "optional",
380
- "pl7.app/table/orderPriority": "64900"
381
- })]
382
- columns += [makeCol("ered_Fv", "pl7.app/extinctionCoefficientRed", "Double",
383
- "Fv Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fvDom, {
384
- "pl7.app/format": ".0f",
385
- "pl7.app/min": "0",
386
- "pl7.app/description": "Variable region (VH+VL) only, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
387
- "pl7.app/table/visibility": "optional",
388
- "pl7.app/table/orderPriority": "64800"
389
- })]
390
- columns += [makeCol("mw_Fv", "pl7.app/molecularWeight", "Double",
391
- "Fv Molecular Weight (Da, average masses)", fvDom, {
392
- "pl7.app/format": ".1f",
393
- "pl7.app/min": "0",
394
- "pl7.app/description": "Unglycosylated sequence mass (VH + VL).",
395
- "pl7.app/table/visibility": "optional",
396
- "pl7.app/table/orderPriority": "64700"
397
- })]
398
- }
65
+ colArgs := {
66
+ mode: mode,
67
+ receptor: receptor,
68
+ chains: chains,
69
+ fullChains: fullChains,
70
+ hasFv: hasFv
399
71
  }
72
+ columns := columnSpecs.forPropertiesPf(colArgs)
73
+ exportColumns := columnSpecs.forExport(colArgs, blockId)
400
74
 
401
75
  outputSpecs := {
402
76
  axes: axes,
@@ -405,35 +79,6 @@ self.body(func(args) {
405
79
  partitionKeyLength: 0
406
80
  }
407
81
 
408
- // Stamp blockId on the export-only columns (lets downstream blocks distinguish runs).
409
- // Build a fresh column with a cloned domain dict — mutating col.spec.domain
410
- // in place would also stamp blockId on outputSpecs.columns since both lists
411
- // share the same column references.
412
- exportColumns := []
413
- for col in columns {
414
- if col.spec.annotations && col.spec.annotations["pl7.app/isScore"] == "true" {
415
- newDomain := {}
416
- if col.spec.domain {
417
- for k, v in col.spec.domain {
418
- newDomain[k] = v
419
- }
420
- }
421
- newDomain["pl7.app/blockId"] = blockId
422
- exportColumns += [{
423
- column: col.column,
424
- id: col.id,
425
- naRegex: col.naRegex,
426
- allowNA: col.allowNA,
427
- spec: {
428
- name: col.spec.name,
429
- valueType: col.spec.valueType,
430
- domain: newDomain,
431
- annotations: col.spec.annotations
432
- }
433
- }]
434
- }
435
- }
436
-
437
82
  exportSpecs := {
438
83
  axes: axes,
439
84
  columns: exportColumns,
@@ -502,12 +147,12 @@ self.body(func(args) {
502
147
  resultPframe := pframes.pFrameBuilder()
503
148
  for _, k in maps.getKeys(scalarOut) {
504
149
  v := scalarOut[k]
505
- resultPframe.add(k, trace.inject(v.spec), v.data)
150
+ resultPframe.add(k, v.spec, v.data)
506
151
  }
507
152
  if aaOut != undefined {
508
153
  for _, k in maps.getKeys(aaOut) {
509
154
  v := aaOut[k]
510
- resultPframe.add(k, trace.inject(v.spec), v.data)
155
+ resultPframe.add(k, v.spec, v.data)
511
156
  }
512
157
  }
513
158
  resultPframe = resultPframe.build()