@platforma-open/milaboratories.sequence-properties.workflow 1.1.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,47 +11,10 @@ smart := import("@platforma-sdk/workflow-tengo:smart")
11
11
  canonical := import("@platforma-sdk/workflow-tengo:canonical")
12
12
  constants := import("@platforma-sdk/workflow-tengo:constants")
13
13
  messages := import(":messages")
14
+ columnSpecs := import(":columns")
14
15
 
15
16
  self.defineOutputs("propertiesPf", "exportPframe", "info")
16
17
 
17
- // Receptor + chain → human label fragments (CDR3 / full-chain).
18
- // Spec R13a: PColumn name and chain domain are unchanged; only the label varies.
19
- labelFragments := func(receptor, chain) {
20
- if receptor == "TCRAB" {
21
- if chain == "A" { return { cdr3: "CDR-α3", fullChain: "Vα" } }
22
- if chain == "B" { return { cdr3: "CDR-β3", fullChain: "Vβ" } }
23
- }
24
- if receptor == "TCRGD" {
25
- if chain == "A" { return { cdr3: "CDR-γ3", fullChain: "Vγ" } }
26
- if chain == "B" { return { cdr3: "CDR-δ3", fullChain: "Vδ" } }
27
- }
28
- // IG / unknown — antibody convention.
29
- if chain == "A" { return { cdr3: "CDR-H3", fullChain: "VH" } }
30
- return { cdr3: "CDR-L3", fullChain: "VL" }
31
- }
32
-
33
- // Build a single output column descriptor consumed by xsv.importFile.
34
- // `tsvCol` is the TSV column header emitted by Python (e.g. "charge_peptide", "charge_A_CDR3").
35
- // Clones the caller's `annotations` dict — mutating it in place would stamp
36
- // the label into shared references if any caller ever reused the literal,
37
- // the same aliasing footgun that the export-domain clone below already guards.
38
- makeCol := func(tsvCol, valName, valueType, label, domain, annotations) {
39
- newAnnotations := {}
40
- if annotations {
41
- for k, v in annotations {
42
- newAnnotations[k] = v
43
- }
44
- }
45
- newAnnotations["pl7.app/label"] = label
46
- spec := {
47
- name: valName,
48
- valueType: valueType,
49
- domain: domain,
50
- annotations: newAnnotations
51
- }
52
- return { column: tsvCol, id: tsvCol, naRegex: "", allowNA: true, spec: spec }
53
- }
54
-
55
18
  self.body(func(args) {
56
19
  blockId := args.blockId
57
20
  propertiesTsv := args.propertiesTsv
@@ -81,6 +44,13 @@ self.body(func(args) {
81
44
  }
82
45
  }
83
46
 
47
+ // R9 — Instability Index is NA for peptides shorter than 10 aa. Surface a
48
+ // banner in peptide mode whenever any row falls below the floor so the
49
+ // user understands why the Instability Index column is blank.
50
+ if mode == "peptide" && stats.hasPeptideBelowInstabilityFloor == true {
51
+ infoMessages += [messages.peptidesShortInstability()]
52
+ }
53
+
84
54
  infoBlob := smart.createValueResource(constants.RTYPE_JSON, canonical.encode({
85
55
  mode: mode,
86
56
  receptor: receptor,
@@ -91,259 +61,16 @@ self.body(func(args) {
91
61
  keyAxisSpec := datasetSpec.axesSpec[keyAxisIdx]
92
62
 
93
63
  axes := [{ column: "entity_key", spec: keyAxisSpec }]
94
- columns := []
95
-
96
- if mode == "peptide" {
97
- // Peptide mode — 9 scalar properties on `pl7.app/feature: "peptide"`.
98
- dom := { "pl7.app/feature": "peptide" }
99
-
100
- columns += [makeCol("charge_peptide", "pl7.app/charge", "Double",
101
- "Net Charge (pH 7)", dom, {
102
- "pl7.app/format": ".2f",
103
- "pl7.app/isScore": "true",
104
- "pl7.app/description": "Net charge at pH 7 (Henderson-Hasselbalch, IPC 2.0 peptide pKa set). Positive = net basic (Arg, Lys, His dominate); negative = net acidic (Asp, Glu dominate). No universal preferred direction.",
105
- "pl7.app/table/visibility": "default",
106
- "pl7.app/table/orderPriority": "70000"
107
- })]
108
-
109
- columns += [makeCol("gravy_peptide", "pl7.app/hydrophobicity", "Double",
110
- "Hydrophobicity (GRAVY)", dom, {
111
- "pl7.app/format": ".3f",
112
- "pl7.app/isScore": "true",
113
- "pl7.app/score/rankingOrder": "increasing",
114
- "pl7.app/description": "rankingOrder: increasing reflects preference for lower hydrophobicity. Invert direction in Lead Selection for hydrophobic-target applications.",
115
- "pl7.app/table/visibility": "default",
116
- "pl7.app/table/orderPriority": "69900"
117
- })]
118
-
119
- columns += [makeCol("mw_peptide", "pl7.app/molecularWeight", "Double",
120
- "Molecular Weight (Da, average masses)", dom, {
121
- "pl7.app/format": ".1f",
122
- "pl7.app/min": "0",
123
- "pl7.app/table/visibility": "default",
124
- "pl7.app/table/orderPriority": "69800"
125
- })]
126
-
127
- columns += [makeCol("pi_peptide", "pl7.app/isoelectricPoint", "Double",
128
- "Isoelectric Point (pI)", dom, {
129
- "pl7.app/format": ".2f",
130
- "pl7.app/min": "0",
131
- "pl7.app/max": "14",
132
- "pl7.app/table/visibility": "default",
133
- "pl7.app/table/orderPriority": "69700"
134
- })]
135
-
136
- columns += [makeCol("eox_peptide", "pl7.app/extinctionCoefficientOx", "Double",
137
- "Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", dom, {
138
- "pl7.app/format": ".0f",
139
- "pl7.app/min": "0",
140
- "pl7.app/description": "Assumes all Cys are in disulfide bonds. For unprotected linear peptides use the reduced form.",
141
- "pl7.app/table/visibility": "optional",
142
- "pl7.app/table/orderPriority": "69600"
143
- })]
144
-
145
- columns += [makeCol("ered_peptide", "pl7.app/extinctionCoefficientRed", "Double",
146
- "Extinction Coeff., Reduced (M⁻¹cm⁻¹)", dom, {
147
- "pl7.app/format": ".0f",
148
- "pl7.app/min": "0",
149
- "pl7.app/description": "Extinction coefficient at 280 nm, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
150
- "pl7.app/table/visibility": "optional",
151
- "pl7.app/table/orderPriority": "69500"
152
- })]
153
-
154
- columns += [makeCol("instability_peptide", "pl7.app/instabilityIndex", "Double",
155
- "Instability Index", dom, {
156
- "pl7.app/format": ".2f",
157
- "pl7.app/description": "Guruprasad index — derived from globular proteins. The II > 40 threshold does not apply to short linear peptides; use as a relative composition ranking aid only.",
158
- "pl7.app/table/visibility": "default",
159
- "pl7.app/table/orderPriority": "69400"
160
- })]
161
-
162
- columns += [makeCol("aliphatic_peptide", "pl7.app/aliphaticIndex", "Double",
163
- "Aliphatic Index", dom, {
164
- "pl7.app/format": ".1f",
165
- "pl7.app/min": "0",
166
- "pl7.app/description": "Measures fraction of nonpolar aliphatic residues (Ala, Val, Ile, Leu). For short linear peptides, thermostability interpretation does not apply — the Ikai index was derived for globular mesophilic enzymes, and thermostability is not a meaningful concept for unstructured peptides. Useful as a composition indicator and a proxy for hydrophobic character alongside GRAVY — both metrics increase with Ala/Val/Ile/Leu content, but neither has a universal preferred direction for therapeutic peptides.",
167
- "pl7.app/table/visibility": "optional",
168
- "pl7.app/table/orderPriority": "69300"
169
- })]
170
64
 
171
- columns += [makeCol("aromaticity_peptide", "pl7.app/aromaticity", "Double",
172
- "Aromaticity", dom, {
173
- "pl7.app/format": ".3f",
174
- "pl7.app/min": "0",
175
- "pl7.app/max": "1",
176
- "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr).",
177
- "pl7.app/table/visibility": "optional",
178
- "pl7.app/table/orderPriority": "69200"
179
- })]
180
-
181
- } else {
182
- // Antibody/TCR mode — CDR3 columns per chain, full-chain when present, Fv when paired.
183
- // CDR-H3 (chain A) and CDR-L3 (chain B) carry different descriptions per
184
- // pcolumn-spec.md — different developability signals.
185
- cdr3ChargeDesc := {
186
- A: "Strongly positive CDR3 charge correlates with polyreactivity via electrostatic interactions. No universal preferred direction in Lead Selection. IPC 2.0 peptide pKa set.",
187
- B: "Strongly positive CDR-L3 charge contributes to paratope polyreactivity. Strongly negative charge is primarily a PK concern. No universal preferred direction. IPC 2.0 peptide pKa set."
188
- }
189
- cdr3GravyDesc := {
190
- A: "Lower hydrophobicity preferred for developability. CDR3 GRAVY > 0 is an informal aggregation/polyreactivity heuristic.",
191
- B: "Same aggregation and polyreactivity signal as CDR-H3 hydrophobicity; lower independent predictive weight. The TAP score uses combined 6-CDR GRAVY — CDR-L3 alone has limited independent validation."
192
- }
193
- cdr3OrderA := 68000
194
- cdr3OrderB := 67700
195
- for chain in chains {
196
- frag := labelFragments(receptor, chain)
197
- cdr3Dom := { "pl7.app/feature": "CDR3", "pl7.app/vdj/scClonotypeChain": chain }
198
- chargeOrder := (chain == "A" ? cdr3OrderA : cdr3OrderB)
199
- gravyOrder := chargeOrder - 100
200
-
201
- columns += [makeCol("charge_" + chain + "_CDR3", "pl7.app/charge", "Double",
202
- frag.cdr3 + " Net Charge (pH 7)", cdr3Dom, {
203
- "pl7.app/format": ".2f",
204
- "pl7.app/isScore": "true",
205
- "pl7.app/description": cdr3ChargeDesc[chain],
206
- "pl7.app/table/visibility": "default",
207
- "pl7.app/table/orderPriority": string(chargeOrder)
208
- })]
209
- columns += [makeCol("gravy_" + chain + "_CDR3", "pl7.app/hydrophobicity", "Double",
210
- frag.cdr3 + " Hydrophobicity (GRAVY)", cdr3Dom, {
211
- "pl7.app/format": ".3f",
212
- "pl7.app/isScore": "true",
213
- "pl7.app/score/rankingOrder": "increasing",
214
- "pl7.app/description": cdr3GravyDesc[chain],
215
- "pl7.app/table/visibility": "default",
216
- "pl7.app/table/orderPriority": string(gravyOrder)
217
- })]
218
- }
219
-
220
- // Full-chain columns (9 per chain when reconstructed).
221
- fcOrderBaseA := 67000
222
- fcOrderBaseB := 66000
223
- for chain in fullChains {
224
- frag := labelFragments(receptor, chain)
225
- fcDom := { "pl7.app/feature": "VDJRegion", "pl7.app/vdj/scClonotypeChain": chain }
226
- base := (chain == "A" ? fcOrderBaseA : fcOrderBaseB)
227
- fcLabel := frag.fullChain
228
-
229
- columns += [makeCol("charge_" + chain + "_VDJRegion", "pl7.app/charge", "Double",
230
- fcLabel + " Net Charge (pH 7)", fcDom, {
231
- "pl7.app/format": ".2f",
232
- "pl7.app/isScore": "true",
233
- "pl7.app/description": "Non-monotonic vs developability: strongly positive correlates with polyreactivity; strongly negative with rapid clearance.",
234
- "pl7.app/table/visibility": "default",
235
- "pl7.app/table/orderPriority": string(base)
236
- })]
237
- columns += [makeCol("pi_" + chain + "_VDJRegion", "pl7.app/isoelectricPoint", "Double",
238
- fcLabel + " Isoelectric Point (pI)", fcDom, {
239
- "pl7.app/format": ".2f",
240
- "pl7.app/isScore": "true",
241
- "pl7.app/min": "0",
242
- "pl7.app/max": "14",
243
- "pl7.app/table/visibility": "default",
244
- "pl7.app/table/orderPriority": string(base - 100)
245
- })]
246
- columns += [makeCol("gravy_" + chain + "_VDJRegion", "pl7.app/hydrophobicity", "Double",
247
- fcLabel + " Hydrophobicity (GRAVY)", fcDom, {
248
- "pl7.app/format": ".3f",
249
- "pl7.app/description": "Framework regions dominate; weak developability signal at chain level — CDR3 hydrophobicity is more discriminating.",
250
- "pl7.app/table/visibility": "default",
251
- "pl7.app/table/orderPriority": string(base - 200)
252
- })]
253
- columns += [makeCol("mw_" + chain + "_VDJRegion", "pl7.app/molecularWeight", "Double",
254
- fcLabel + " Molecular Weight (Da, average masses)", fcDom, {
255
- "pl7.app/format": ".1f",
256
- "pl7.app/min": "0",
257
- "pl7.app/description": "Unglycosylated sequence mass — does not include N-glycan contributions from any NXS/NXT sequons in the variable region.",
258
- "pl7.app/table/visibility": "optional",
259
- "pl7.app/table/orderPriority": string(base - 300)
260
- })]
261
- columns += [makeCol("eox_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientOx", "Double",
262
- fcLabel + " Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fcDom, {
263
- "pl7.app/format": ".0f",
264
- "pl7.app/min": "0",
265
- "pl7.app/table/visibility": "optional",
266
- "pl7.app/table/orderPriority": string(base - 400)
267
- })]
268
- columns += [makeCol("ered_" + chain + "_VDJRegion", "pl7.app/extinctionCoefficientRed", "Double",
269
- fcLabel + " Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fcDom, {
270
- "pl7.app/format": ".0f",
271
- "pl7.app/min": "0",
272
- "pl7.app/table/visibility": "optional",
273
- "pl7.app/table/orderPriority": string(base - 500)
274
- })]
275
- columns += [makeCol("instability_" + chain + "_VDJRegion", "pl7.app/instabilityIndex", "Double",
276
- fcLabel + " Instability Index", fcDom, {
277
- "pl7.app/format": ".2f",
278
- "pl7.app/description": "Guruprasad index, calibrated for in-vitro stability of soluble globular proteins via dipeptide composition. Weak predictor of antibody Tm — use as supplementary ranking aid.",
279
- "pl7.app/table/visibility": "optional",
280
- "pl7.app/table/orderPriority": string(base - 600)
281
- })]
282
- columns += [makeCol("aliphatic_" + chain + "_VDJRegion", "pl7.app/aliphaticIndex", "Double",
283
- fcLabel + " Aliphatic Index", fcDom, {
284
- "pl7.app/format": ".1f",
285
- "pl7.app/min": "0",
286
- "pl7.app/description": "Ikai aliphatic index, derived from globular mesophilic enzymes. Weak correlation with antibody Tm. No rankingOrder — high values can correlate with aggregation propensity.",
287
- "pl7.app/table/visibility": "optional",
288
- "pl7.app/table/orderPriority": string(base - 700)
289
- })]
290
- columns += [makeCol("aromaticity_" + chain + "_VDJRegion", "pl7.app/aromaticity", "Double",
291
- fcLabel + " Aromaticity", fcDom, {
292
- "pl7.app/format": ".3f",
293
- "pl7.app/min": "0",
294
- "pl7.app/max": "1",
295
- "pl7.app/description": "Fraction of aromatic residues (Phe, Trp, Tyr) over the full chain. Framework dominates; CDR-specific aromaticity is a stronger predictor (Phase 2).",
296
- "pl7.app/table/visibility": "optional",
297
- "pl7.app/table/orderPriority": string(base - 800)
298
- })]
299
- }
300
-
301
- // Fv columns — only when both VH and VL full chains reconstructed (antibody only).
302
- if hasFv {
303
- fvDom := { "pl7.app/feature": "Fv" }
304
- columns += [makeCol("charge_Fv", "pl7.app/charge", "Double",
305
- "Fv Net Charge (pH 7)", fvDom, {
306
- "pl7.app/format": ".2f",
307
- "pl7.app/isScore": "true",
308
- "pl7.app/table/visibility": "default",
309
- "pl7.app/table/orderPriority": "65100"
310
- })]
311
- columns += [makeCol("pi_Fv", "pl7.app/isoelectricPoint", "Double",
312
- "Fv Isoelectric Point (pI)", fvDom, {
313
- "pl7.app/format": ".2f",
314
- "pl7.app/isScore": "true",
315
- "pl7.app/min": "0",
316
- "pl7.app/max": "14",
317
- "pl7.app/description": "Variable region (VH+VL) only. Fv pI is typically 2–4 pH units higher than whole-IgG cIEF measurements, which include constant regions (IgG1 Fc pI ≈ 5–6).",
318
- "pl7.app/table/visibility": "default",
319
- "pl7.app/table/orderPriority": "65000"
320
- })]
321
- columns += [makeCol("eox_Fv", "pl7.app/extinctionCoefficientOx", "Double",
322
- "Fv Extinction Coeff., Oxidized (M⁻¹cm⁻¹)", fvDom, {
323
- "pl7.app/format": ".0f",
324
- "pl7.app/min": "0",
325
- "pl7.app/description": "Variable region (VH+VL) only — does not include constant regions. For whole-IgG A280 quantification, use the full-antibody ε.",
326
- "pl7.app/table/visibility": "optional",
327
- "pl7.app/table/orderPriority": "64900"
328
- })]
329
- columns += [makeCol("ered_Fv", "pl7.app/extinctionCoefficientRed", "Double",
330
- "Fv Extinction Coeff., Reduced (M⁻¹cm⁻¹)", fvDom, {
331
- "pl7.app/format": ".0f",
332
- "pl7.app/min": "0",
333
- "pl7.app/description": "Variable region (VH+VL) only, disulfide bonds reduced (Cys contribution omitted). A value of 0 means no Tyr or Trp — A280-based quantification is not possible.",
334
- "pl7.app/table/visibility": "optional",
335
- "pl7.app/table/orderPriority": "64800"
336
- })]
337
- columns += [makeCol("mw_Fv", "pl7.app/molecularWeight", "Double",
338
- "Fv Molecular Weight (Da, average masses)", fvDom, {
339
- "pl7.app/format": ".1f",
340
- "pl7.app/min": "0",
341
- "pl7.app/description": "Unglycosylated sequence mass (VH + VL).",
342
- "pl7.app/table/visibility": "optional",
343
- "pl7.app/table/orderPriority": "64700"
344
- })]
345
- }
65
+ colArgs := {
66
+ mode: mode,
67
+ receptor: receptor,
68
+ chains: chains,
69
+ fullChains: fullChains,
70
+ hasFv: hasFv
346
71
  }
72
+ columns := columnSpecs.forPropertiesPf(colArgs)
73
+ exportColumns := columnSpecs.forExport(colArgs, blockId)
347
74
 
348
75
  outputSpecs := {
349
76
  axes: axes,
@@ -352,35 +79,6 @@ self.body(func(args) {
352
79
  partitionKeyLength: 0
353
80
  }
354
81
 
355
- // Stamp blockId on the export-only columns (lets downstream blocks distinguish runs).
356
- // Build a fresh column with a cloned domain dict — mutating col.spec.domain
357
- // in place would also stamp blockId on outputSpecs.columns since both lists
358
- // share the same column references.
359
- exportColumns := []
360
- for col in columns {
361
- if col.spec.annotations && col.spec.annotations["pl7.app/isScore"] == "true" {
362
- newDomain := {}
363
- if col.spec.domain {
364
- for k, v in col.spec.domain {
365
- newDomain[k] = v
366
- }
367
- }
368
- newDomain["pl7.app/blockId"] = blockId
369
- exportColumns += [{
370
- column: col.column,
371
- id: col.id,
372
- naRegex: col.naRegex,
373
- allowNA: col.allowNA,
374
- spec: {
375
- name: col.spec.name,
376
- valueType: col.spec.valueType,
377
- domain: newDomain,
378
- annotations: col.spec.annotations
379
- }
380
- }]
381
- }
382
- }
383
-
384
82
  exportSpecs := {
385
83
  axes: axes,
386
84
  columns: exportColumns,
@@ -449,12 +147,12 @@ self.body(func(args) {
449
147
  resultPframe := pframes.pFrameBuilder()
450
148
  for _, k in maps.getKeys(scalarOut) {
451
149
  v := scalarOut[k]
452
- resultPframe.add(k, trace.inject(v.spec), v.data)
150
+ resultPframe.add(k, v.spec, v.data)
453
151
  }
454
152
  if aaOut != undefined {
455
153
  for _, k in maps.getKeys(aaOut) {
456
154
  v := aaOut[k]
457
- resultPframe.add(k, trace.inject(v.spec), v.data)
155
+ resultPframe.add(k, v.spec, v.data)
458
156
  }
459
157
  }
460
158
  resultPframe = resultPframe.build()