openms-insight 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +11 -7
- openms_insight/components/__init__.py +2 -2
- openms_insight/components/heatmap.py +163 -101
- openms_insight/components/lineplot.py +377 -82
- openms_insight/components/sequenceview.py +677 -213
- openms_insight/components/table.py +86 -58
- openms_insight/core/__init__.py +2 -2
- openms_insight/core/base.py +102 -47
- openms_insight/core/registry.py +6 -5
- openms_insight/core/state.py +33 -31
- openms_insight/core/subprocess_preprocess.py +1 -3
- openms_insight/js-component/dist/assets/index.css +1 -1
- openms_insight/js-component/dist/assets/index.js +105 -105
- openms_insight/preprocessing/__init__.py +5 -6
- openms_insight/preprocessing/compression.py +68 -66
- openms_insight/preprocessing/filtering.py +39 -13
- openms_insight/rendering/__init__.py +1 -1
- openms_insight/rendering/bridge.py +192 -42
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/METADATA +163 -20
- openms_insight-0.1.3.dist-info/RECORD +28 -0
- openms_insight-0.1.2.dist-info/RECORD +0 -28
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/WHEEL +0 -0
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
"""SequenceView component for peptide/protein sequence visualization with fragment matching."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
4
8
|
|
|
5
9
|
import polars as pl
|
|
6
10
|
|
|
7
|
-
from ..core.base import BaseComponent
|
|
8
11
|
from ..core.registry import register_component
|
|
12
|
+
from ..preprocessing.filtering import optimize_for_transfer
|
|
13
|
+
|
|
14
|
+
# Proton mass for m/z calculations
|
|
15
|
+
PROTON_MASS = 1.007276
|
|
16
|
+
|
|
17
|
+
# Cache version - increment when cache format changes
|
|
18
|
+
CACHE_VERSION = 1
|
|
9
19
|
|
|
10
20
|
|
|
11
21
|
def parse_openms_sequence(sequence_str: str) -> Tuple[List[str], List[Optional[float]]]:
|
|
@@ -51,9 +61,9 @@ def parse_openms_sequence(sequence_str: str) -> Tuple[List[str], List[Optional[f
|
|
|
51
61
|
residues.append(sequence_str[i])
|
|
52
62
|
modifications.append(None)
|
|
53
63
|
i += 1
|
|
54
|
-
elif sequence_str[i] ==
|
|
64
|
+
elif sequence_str[i] == "(":
|
|
55
65
|
# Skip modification name in parentheses
|
|
56
|
-
end = sequence_str.find(
|
|
66
|
+
end = sequence_str.find(")", i)
|
|
57
67
|
if end > i:
|
|
58
68
|
i = end + 1
|
|
59
69
|
else:
|
|
@@ -66,116 +76,281 @@ def parse_openms_sequence(sequence_str: str) -> Tuple[List[str], List[Optional[f
|
|
|
66
76
|
return list(sequence_str), [None] * len(sequence_str)
|
|
67
77
|
|
|
68
78
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
'H': 137.058912, 'I': 113.084064, 'L': 113.084064, 'K': 128.094963,
|
|
74
|
-
'M': 131.040485, 'F': 147.068414, 'P': 97.052764, 'S': 87.032028,
|
|
75
|
-
'T': 101.047679, 'U': 150.953633, 'W': 186.079313, 'Y': 163.063329,
|
|
76
|
-
'V': 99.068414, 'X': 0, 'Z': 0,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
# Ion type mass adjustments
|
|
80
|
-
# These are approximate - for precise values, use pyOpenMS
|
|
81
|
-
H2O = 18.010565
|
|
82
|
-
NH3 = 17.026549
|
|
83
|
-
PROTON = 1.007276
|
|
84
|
-
|
|
85
|
-
# Ion type offsets (from N-terminus for prefix, C-terminus for suffix)
|
|
86
|
-
ION_OFFSETS = {
|
|
87
|
-
'a': -27.994915, # CO loss from b
|
|
88
|
-
'b': 0.0,
|
|
89
|
-
'c': 17.026549, # NH3 addition to b
|
|
90
|
-
'x': 43.989829, # CO + CO addition to y
|
|
91
|
-
'y': 18.010565, # H2O addition (protonated)
|
|
92
|
-
'z': 1.991841, # NH loss from y
|
|
93
|
-
}
|
|
94
|
-
|
|
79
|
+
def calculate_fragment_masses_pyopenms(
|
|
80
|
+
sequence_str: str,
|
|
81
|
+
) -> Dict[str, List[List[float]]]:
|
|
82
|
+
"""Calculate theoretical fragment masses using pyOpenMS TheoreticalSpectrumGenerator.
|
|
95
83
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
mass = 0.0
|
|
99
|
-
for i in range(position + 1):
|
|
100
|
-
mass += AA_MASSES.get(sequence[i], 0.0)
|
|
101
|
-
return mass
|
|
84
|
+
Args:
|
|
85
|
+
sequence_str: Peptide sequence string (can include modifications)
|
|
102
86
|
|
|
87
|
+
Returns:
|
|
88
|
+
Dict with fragment_masses_a, fragment_masses_b, etc.
|
|
89
|
+
Each is a list of lists (one per position, supporting multiple masses).
|
|
90
|
+
"""
|
|
91
|
+
try:
|
|
92
|
+
from pyopenms import AASequence, MSSpectrum, TheoreticalSpectrumGenerator
|
|
103
93
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
94
|
+
aa_seq = AASequence.fromString(sequence_str)
|
|
95
|
+
n = aa_seq.size()
|
|
96
|
+
|
|
97
|
+
# Configure TheoreticalSpectrumGenerator
|
|
98
|
+
tsg = TheoreticalSpectrumGenerator()
|
|
99
|
+
params = tsg.getParameters()
|
|
100
|
+
|
|
101
|
+
params.setValue("add_a_ions", "true")
|
|
102
|
+
params.setValue("add_b_ions", "true")
|
|
103
|
+
params.setValue("add_c_ions", "true")
|
|
104
|
+
params.setValue("add_x_ions", "true")
|
|
105
|
+
params.setValue("add_y_ions", "true")
|
|
106
|
+
params.setValue("add_z_ions", "true")
|
|
107
|
+
params.setValue("add_first_prefix_ion", "true") # Include b1/a1/c1 ions
|
|
108
|
+
params.setValue("add_metainfo", "true")
|
|
109
|
+
|
|
110
|
+
tsg.setParameters(params)
|
|
111
|
+
|
|
112
|
+
# Generate spectrum for charge 1, then convert to neutral masses
|
|
113
|
+
spec = MSSpectrum()
|
|
114
|
+
tsg.getSpectrum(spec, aa_seq, 1, 1)
|
|
115
|
+
|
|
116
|
+
ion_types = ["a", "b", "c", "x", "y", "z"]
|
|
117
|
+
result = {f"fragment_masses_{ion}": [[] for _ in range(n)] for ion in ion_types}
|
|
118
|
+
|
|
119
|
+
# Get ion names from StringDataArrays
|
|
120
|
+
ion_names = []
|
|
121
|
+
sdas = spec.getStringDataArrays()
|
|
122
|
+
for sda in sdas:
|
|
123
|
+
if sda.getName() == "IonNames":
|
|
124
|
+
for i in range(sda.size()):
|
|
125
|
+
name = sda[i]
|
|
126
|
+
if isinstance(name, bytes):
|
|
127
|
+
name = name.decode("utf-8")
|
|
128
|
+
ion_names.append(name)
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
# Parse peaks and organize by ion type and position
|
|
132
|
+
for i in range(spec.size()):
|
|
133
|
+
peak = spec[i]
|
|
134
|
+
# Convert singly-charged m/z to neutral mass
|
|
135
|
+
mz_charge1 = peak.getMZ()
|
|
136
|
+
neutral_mass = mz_charge1 - PROTON_MASS
|
|
137
|
+
ion_name = ion_names[i] if i < len(ion_names) else ""
|
|
138
|
+
|
|
139
|
+
if not ion_name:
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
# Parse ion name (e.g., "b3+", "y5++")
|
|
143
|
+
ion_type = None
|
|
144
|
+
ion_number = None
|
|
145
|
+
|
|
146
|
+
for t in ion_types:
|
|
147
|
+
if ion_name.lower().startswith(t):
|
|
148
|
+
ion_type = t
|
|
149
|
+
try:
|
|
150
|
+
num_str = ""
|
|
151
|
+
for c in ion_name[1:]:
|
|
152
|
+
if c.isdigit():
|
|
153
|
+
num_str += c
|
|
154
|
+
else:
|
|
155
|
+
break
|
|
156
|
+
if num_str:
|
|
157
|
+
ion_number = int(num_str)
|
|
158
|
+
except (ValueError, IndexError):
|
|
159
|
+
pass
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
if ion_type and ion_number and 1 <= ion_number <= n:
|
|
163
|
+
idx = ion_number - 1
|
|
164
|
+
key = f"fragment_masses_{ion_type}"
|
|
165
|
+
if idx < len(result[key]):
|
|
166
|
+
result[key][idx].append(neutral_mass)
|
|
110
167
|
|
|
168
|
+
return result
|
|
111
169
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
170
|
+
except ImportError:
|
|
171
|
+
# Fallback to simple calculation without pyOpenMS
|
|
172
|
+
return _calculate_fragment_masses_simple(sequence_str)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(f"Error calculating fragments for {sequence_str}: {e}")
|
|
175
|
+
return {f"fragment_masses_{ion}": [] for ion in ["a", "b", "c", "x", "y", "z"]}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _calculate_fragment_masses_simple(
|
|
179
|
+
sequence_str: str,
|
|
180
|
+
) -> Dict[str, List[List[float]]]:
|
|
181
|
+
"""Fallback fragment calculation without pyOpenMS."""
|
|
182
|
+
# Amino acid monoisotopic masses
|
|
183
|
+
AA_MASSES = {
|
|
184
|
+
"A": 71.037114,
|
|
185
|
+
"R": 156.101111,
|
|
186
|
+
"N": 114.042927,
|
|
187
|
+
"D": 115.026943,
|
|
188
|
+
"C": 103.009185,
|
|
189
|
+
"E": 129.042593,
|
|
190
|
+
"Q": 128.058578,
|
|
191
|
+
"G": 57.021464,
|
|
192
|
+
"H": 137.058912,
|
|
193
|
+
"I": 113.084064,
|
|
194
|
+
"L": 113.084064,
|
|
195
|
+
"K": 128.094963,
|
|
196
|
+
"M": 131.040485,
|
|
197
|
+
"F": 147.068414,
|
|
198
|
+
"P": 97.052764,
|
|
199
|
+
"S": 87.032028,
|
|
200
|
+
"T": 101.047679,
|
|
201
|
+
"U": 150.953633,
|
|
202
|
+
"W": 186.079313,
|
|
203
|
+
"Y": 163.063329,
|
|
204
|
+
"V": 99.068414,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
# Ion type offsets
|
|
208
|
+
ION_OFFSETS = {
|
|
209
|
+
"a": -27.994915,
|
|
210
|
+
"b": 0.0,
|
|
211
|
+
"c": 17.026549,
|
|
212
|
+
"x": 43.989829,
|
|
213
|
+
"y": 18.010565,
|
|
214
|
+
"z": 1.991841,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Extract plain sequence
|
|
218
|
+
residues, _ = parse_openms_sequence(sequence_str)
|
|
219
|
+
n = len(residues)
|
|
220
|
+
result = {}
|
|
115
221
|
|
|
116
|
-
|
|
117
|
-
|
|
222
|
+
# Calculate prefix masses
|
|
223
|
+
prefix_masses = []
|
|
224
|
+
mass = 0.0
|
|
225
|
+
for aa in residues:
|
|
226
|
+
mass += AA_MASSES.get(aa, 0.0)
|
|
227
|
+
prefix_masses.append(mass)
|
|
118
228
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
229
|
+
# Calculate suffix masses
|
|
230
|
+
suffix_masses = []
|
|
231
|
+
mass = 0.0
|
|
232
|
+
for aa in reversed(residues):
|
|
233
|
+
mass += AA_MASSES.get(aa, 0.0)
|
|
234
|
+
suffix_masses.append(mass)
|
|
235
|
+
suffix_masses = list(reversed(suffix_masses))
|
|
125
236
|
|
|
126
|
-
# Prefix ions (a, b, c)
|
|
127
|
-
for ion_type in [
|
|
237
|
+
# Prefix ions (a, b, c)
|
|
238
|
+
for ion_type in ["a", "b", "c"]:
|
|
128
239
|
masses = []
|
|
129
240
|
for i in range(n):
|
|
130
|
-
|
|
131
|
-
ion_mass = prefix_mass + ION_OFFSETS[ion_type]
|
|
241
|
+
ion_mass = prefix_masses[i] + ION_OFFSETS[ion_type]
|
|
132
242
|
masses.append([ion_mass])
|
|
133
|
-
result[f
|
|
243
|
+
result[f"fragment_masses_{ion_type}"] = masses
|
|
134
244
|
|
|
135
|
-
# Suffix ions (x, y, z)
|
|
136
|
-
for ion_type in [
|
|
245
|
+
# Suffix ions (x, y, z)
|
|
246
|
+
for ion_type in ["x", "y", "z"]:
|
|
137
247
|
masses = []
|
|
138
248
|
for i in range(n):
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
ion_mass = suffix_mass + ION_OFFSETS[ion_type]
|
|
249
|
+
idx = n - i - 1
|
|
250
|
+
ion_mass = suffix_masses[idx] + ION_OFFSETS[ion_type]
|
|
142
251
|
masses.append([ion_mass])
|
|
143
|
-
result[f
|
|
252
|
+
result[f"fragment_masses_{ion_type}"] = masses
|
|
144
253
|
|
|
145
254
|
return result
|
|
146
255
|
|
|
147
256
|
|
|
148
|
-
def
|
|
149
|
-
"""Calculate monoisotopic mass of
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
257
|
+
def get_theoretical_mass(sequence_str: str) -> float:
|
|
258
|
+
"""Calculate monoisotopic mass of a peptide sequence."""
|
|
259
|
+
try:
|
|
260
|
+
from pyopenms import AASequence
|
|
261
|
+
|
|
262
|
+
aa_seq = AASequence.fromString(sequence_str)
|
|
263
|
+
return aa_seq.getMonoWeight()
|
|
264
|
+
except ImportError:
|
|
265
|
+
# Fallback
|
|
266
|
+
H2O = 18.010565
|
|
267
|
+
AA_MASSES = {
|
|
268
|
+
"A": 71.037114,
|
|
269
|
+
"R": 156.101111,
|
|
270
|
+
"N": 114.042927,
|
|
271
|
+
"D": 115.026943,
|
|
272
|
+
"C": 103.009185,
|
|
273
|
+
"E": 129.042593,
|
|
274
|
+
"Q": 128.058578,
|
|
275
|
+
"G": 57.021464,
|
|
276
|
+
"H": 137.058912,
|
|
277
|
+
"I": 113.084064,
|
|
278
|
+
"L": 113.084064,
|
|
279
|
+
"K": 128.094963,
|
|
280
|
+
"M": 131.040485,
|
|
281
|
+
"F": 147.068414,
|
|
282
|
+
"P": 97.052764,
|
|
283
|
+
"S": 87.032028,
|
|
284
|
+
"T": 101.047679,
|
|
285
|
+
"U": 150.953633,
|
|
286
|
+
"W": 186.079313,
|
|
287
|
+
"Y": 163.063329,
|
|
288
|
+
"V": 99.068414,
|
|
289
|
+
}
|
|
290
|
+
residues, _ = parse_openms_sequence(sequence_str)
|
|
291
|
+
mass = H2O
|
|
292
|
+
for aa in residues:
|
|
293
|
+
mass += AA_MASSES.get(aa, 0.0)
|
|
294
|
+
return mass
|
|
295
|
+
except Exception:
|
|
296
|
+
return 0.0
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# Default annotation configuration
|
|
300
|
+
DEFAULT_ANNOTATION_CONFIG = {
|
|
301
|
+
"ion_types": ["b", "y"],
|
|
302
|
+
"neutral_losses": True,
|
|
303
|
+
"proton_loss_addition": False,
|
|
304
|
+
"tolerance": 20.0,
|
|
305
|
+
"tolerance_ppm": True,
|
|
306
|
+
"colors": {
|
|
307
|
+
"a": "#9B59B6",
|
|
308
|
+
"b": "#E74C3C",
|
|
309
|
+
"c": "#E67E22",
|
|
310
|
+
"x": "#1ABC9C",
|
|
311
|
+
"y": "#3498DB",
|
|
312
|
+
"z": "#2ECC71",
|
|
313
|
+
},
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@dataclass
|
|
318
|
+
class SequenceViewResult:
|
|
319
|
+
"""Result returned by SequenceView.__call__().
|
|
320
|
+
|
|
321
|
+
Attributes:
|
|
322
|
+
annotations: DataFrame with columns (peak_id, highlight_color, annotation)
|
|
323
|
+
containing fragment annotations computed by Vue. None if not yet available.
|
|
324
|
+
"""
|
|
325
|
+
|
|
326
|
+
annotations: Optional[pl.DataFrame] = None
|
|
154
327
|
|
|
155
328
|
|
|
156
329
|
@register_component("sequence_view")
|
|
157
|
-
class SequenceView
|
|
330
|
+
class SequenceView:
|
|
158
331
|
"""
|
|
159
332
|
Interactive sequence view component for peptide/protein visualization.
|
|
160
333
|
|
|
161
334
|
Displays amino acid sequence with fragment ion markers. When provided with
|
|
162
|
-
|
|
335
|
+
peaks data, performs fragment matching on the Vue side and returns annotations.
|
|
163
336
|
|
|
164
337
|
Features:
|
|
165
338
|
- Amino acid grid display with configurable row width
|
|
166
|
-
- Fragment ion markers (a, b, c, x, y, z)
|
|
167
|
-
- Tolerance-based fragment matching
|
|
168
|
-
-
|
|
169
|
-
-
|
|
339
|
+
- Fragment ion markers (a, b, c, x, y, z) with configurable colors
|
|
340
|
+
- Tolerance-based fragment matching (done in Vue)
|
|
341
|
+
- Returns annotation dataframe for linked components
|
|
342
|
+
- Supports filtering by spectrum and sequence identifiers
|
|
170
343
|
|
|
171
344
|
Example:
|
|
172
345
|
sequence_view = SequenceView(
|
|
173
346
|
cache_id="peptide_view",
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
347
|
+
sequence_data=pl.scan_parquet("sequences.parquet"),
|
|
348
|
+
peaks_data=pl.scan_parquet("peaks.parquet"),
|
|
349
|
+
filters={"spectrum": "scan_id", "sequence": "sequence_id"},
|
|
350
|
+
annotation_config={"ion_types": ["b", "y"], "tolerance": 20.0},
|
|
177
351
|
)
|
|
178
|
-
sequence_view(state_manager=state_manager)
|
|
352
|
+
result = sequence_view(key="sv", state_manager=state_manager)
|
|
353
|
+
# result.annotations contains the matched fragment annotations
|
|
179
354
|
"""
|
|
180
355
|
|
|
181
356
|
_component_type: str = "sequence_view"
|
|
@@ -183,202 +358,491 @@ class SequenceView(BaseComponent):
|
|
|
183
358
|
def __init__(
|
|
184
359
|
self,
|
|
185
360
|
cache_id: str,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
data: Optional[pl.LazyFrame] = None, # Not used but required by base
|
|
361
|
+
sequence_data: Optional[Union[pl.LazyFrame, Tuple[str, int], str]] = None,
|
|
362
|
+
sequence_data_path: Optional[str] = None,
|
|
363
|
+
peaks_data: Optional[pl.LazyFrame] = None,
|
|
364
|
+
peaks_data_path: Optional[str] = None,
|
|
191
365
|
filters: Optional[Dict[str, str]] = None,
|
|
192
366
|
interactivity: Optional[Dict[str, str]] = None,
|
|
367
|
+
deconvolved: bool = False,
|
|
368
|
+
annotation_config: Optional[Dict[str, Any]] = None,
|
|
193
369
|
cache_path: str = ".",
|
|
194
|
-
regenerate_cache: bool = False,
|
|
195
|
-
fixed_modifications: Optional[List[str]] = None,
|
|
196
370
|
title: Optional[str] = None,
|
|
197
371
|
height: int = 400,
|
|
198
|
-
|
|
199
|
-
precursor_charge: int = 1,
|
|
200
|
-
_precomputed_sequence_data: Optional[Dict[str, Any]] = None,
|
|
201
|
-
**kwargs
|
|
372
|
+
**kwargs,
|
|
202
373
|
):
|
|
203
374
|
"""
|
|
204
375
|
Initialize the SequenceView component.
|
|
205
376
|
|
|
206
377
|
Args:
|
|
207
|
-
cache_id: Unique identifier for this component
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
378
|
+
cache_id: Unique identifier for this component instance.
|
|
379
|
+
sequence_data: Sequence information in one of three formats:
|
|
380
|
+
- LazyFrame with columns: sequence_id (if filtered), sequence, precursor_charge
|
|
381
|
+
- Tuple of (sequence_string, precursor_charge)
|
|
382
|
+
- String with just the sequence (charge defaults to 1)
|
|
383
|
+
sequence_data_path: Path to parquet file with sequence data.
|
|
384
|
+
peaks_data: LazyFrame with columns: scan_id (if filtered), peak_id, mass, intensity
|
|
385
|
+
peaks_data_path: Path to parquet file with peaks data.
|
|
213
386
|
filters: Mapping of identifier names to column names for filtering.
|
|
387
|
+
Example: {"spectrum": "scan_id", "sequence": "sequence_id"}
|
|
214
388
|
interactivity: Mapping of identifier names to column names for clicks.
|
|
215
|
-
Example: {
|
|
389
|
+
Example: {"peak": "peak_id"} sets 'peak' selection to clicked peak's ID.
|
|
390
|
+
deconvolved: If False (default), peaks are m/z values and matching considers
|
|
391
|
+
charge states 1 to precursor_charge. If True, peaks are neutral masses.
|
|
392
|
+
annotation_config: Configuration for fragment matching:
|
|
393
|
+
- ion_types: List of ion types to consider (default: ["b", "y"])
|
|
394
|
+
- neutral_losses: Whether to consider -H2O, -NH3 losses (default: True)
|
|
395
|
+
- tolerance: Mass tolerance value (default: 20.0)
|
|
396
|
+
- tolerance_ppm: True for ppm, False for Da (default: True)
|
|
397
|
+
- colors: Dict mapping ion types to hex colors
|
|
216
398
|
cache_path: Base path for cache storage.
|
|
217
|
-
regenerate_cache: If True, regenerate cache even if valid.
|
|
218
|
-
fixed_modifications: List of amino acids with fixed modifications (e.g., ['C']).
|
|
219
399
|
title: Optional title displayed above the sequence.
|
|
220
400
|
height: Component height in pixels.
|
|
221
|
-
deconvolved: If True (default), observed_masses are neutral masses.
|
|
222
|
-
If False, observed_masses are m/z values and fragment matching
|
|
223
|
-
considers charge states 1 to precursor_charge.
|
|
224
|
-
precursor_charge: Maximum charge state to consider for fragment matching
|
|
225
|
-
when deconvolved=False. Fragments can have charge 1 to this value.
|
|
226
|
-
_precomputed_sequence_data: Optional pre-computed sequence data dict.
|
|
227
|
-
If provided, skips fragment mass calculation (used when fragment
|
|
228
|
-
masses are already cached externally, e.g., in identification preprocessing).
|
|
229
401
|
**kwargs: Additional configuration options.
|
|
230
402
|
"""
|
|
231
|
-
self.
|
|
232
|
-
self.
|
|
233
|
-
self.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
if data is None:
|
|
250
|
-
if self._observed_masses:
|
|
251
|
-
ids = self._peak_ids if self._peak_ids is not None else list(range(len(self._observed_masses)))
|
|
252
|
-
data = pl.LazyFrame({
|
|
253
|
-
'peak_id': ids,
|
|
254
|
-
'mass': self._observed_masses,
|
|
255
|
-
})
|
|
256
|
-
else:
|
|
257
|
-
# Empty peaks - use schema so validation still passes
|
|
258
|
-
data = pl.LazyFrame(schema={'peak_id': pl.Int64, 'mass': pl.Float64})
|
|
259
|
-
|
|
260
|
-
super().__init__(
|
|
261
|
-
cache_id=cache_id,
|
|
262
|
-
data=data,
|
|
263
|
-
filters=filters,
|
|
264
|
-
interactivity=interactivity,
|
|
265
|
-
cache_path=cache_path,
|
|
266
|
-
regenerate_cache=regenerate_cache,
|
|
267
|
-
**kwargs
|
|
403
|
+
self._cache_id = cache_id
|
|
404
|
+
self._cache_path = Path(cache_path)
|
|
405
|
+
self._cache_dir = self._cache_path / cache_id
|
|
406
|
+
|
|
407
|
+
# Determine if data is provided (creation mode vs reconstruction mode)
|
|
408
|
+
has_sequence_data = sequence_data is not None or sequence_data_path is not None
|
|
409
|
+
|
|
410
|
+
# Check if any configuration arguments were provided
|
|
411
|
+
has_config = (
|
|
412
|
+
peaks_data is not None
|
|
413
|
+
or peaks_data_path is not None
|
|
414
|
+
or filters is not None
|
|
415
|
+
or interactivity is not None
|
|
416
|
+
or deconvolved is not False
|
|
417
|
+
or annotation_config is not None
|
|
418
|
+
or title is not None
|
|
419
|
+
or height != 400
|
|
420
|
+
or bool(kwargs)
|
|
268
421
|
)
|
|
269
422
|
|
|
423
|
+
if not has_sequence_data:
|
|
424
|
+
# Reconstruction mode - only cache_id and cache_path allowed
|
|
425
|
+
if has_config:
|
|
426
|
+
raise ValueError(
|
|
427
|
+
"Configuration arguments require sequence_data= or sequence_data_path= to be provided. "
|
|
428
|
+
"For reconstruction from cache, use only cache_id and cache_path."
|
|
429
|
+
)
|
|
430
|
+
if not self._cache_exists():
|
|
431
|
+
raise ValueError(
|
|
432
|
+
f"Cache not found at '{self._cache_dir}'. "
|
|
433
|
+
f"Provide sequence_data= or sequence_data_path= to create the cache."
|
|
434
|
+
)
|
|
435
|
+
self._load_from_cache()
|
|
436
|
+
else:
|
|
437
|
+
# Creation mode - use provided config
|
|
438
|
+
self._title = title
|
|
439
|
+
self._height = height
|
|
440
|
+
self._deconvolved = deconvolved
|
|
441
|
+
self._config = kwargs
|
|
442
|
+
self._filters = filters or {}
|
|
443
|
+
self._interactivity = interactivity or {}
|
|
444
|
+
|
|
445
|
+
# Store annotation config with defaults
|
|
446
|
+
self._annotation_config = {**DEFAULT_ANNOTATION_CONFIG}
|
|
447
|
+
if annotation_config:
|
|
448
|
+
self._annotation_config.update(annotation_config)
|
|
449
|
+
|
|
450
|
+
# Parse sequence data input
|
|
451
|
+
if sequence_data is not None and sequence_data_path is not None:
|
|
452
|
+
raise ValueError(
|
|
453
|
+
"Provide either 'sequence_data' or 'sequence_data_path', not both"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
self._source_sequence_data: Optional[pl.LazyFrame] = None
|
|
457
|
+
self._source_static_sequence: Optional[str] = None
|
|
458
|
+
self._source_static_charge: int = 1
|
|
459
|
+
|
|
460
|
+
if sequence_data_path is not None:
|
|
461
|
+
self._source_sequence_data = pl.scan_parquet(sequence_data_path)
|
|
462
|
+
elif isinstance(sequence_data, pl.LazyFrame):
|
|
463
|
+
self._source_sequence_data = sequence_data
|
|
464
|
+
elif isinstance(sequence_data, tuple):
|
|
465
|
+
self._source_static_sequence = sequence_data[0]
|
|
466
|
+
self._source_static_charge = sequence_data[1]
|
|
467
|
+
elif isinstance(sequence_data, str):
|
|
468
|
+
self._source_static_sequence = sequence_data
|
|
469
|
+
self._source_static_charge = 1
|
|
470
|
+
|
|
471
|
+
# Parse peaks data input
|
|
472
|
+
if peaks_data is not None and peaks_data_path is not None:
|
|
473
|
+
raise ValueError(
|
|
474
|
+
"Provide either 'peaks_data' or 'peaks_data_path', not both"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
self._source_peaks_data: Optional[pl.LazyFrame] = None
|
|
478
|
+
if peaks_data_path is not None:
|
|
479
|
+
self._source_peaks_data = pl.scan_parquet(peaks_data_path)
|
|
480
|
+
elif peaks_data is not None:
|
|
481
|
+
self._source_peaks_data = peaks_data
|
|
482
|
+
|
|
483
|
+
# Create and save cache
|
|
484
|
+
self._create_cache()
|
|
485
|
+
|
|
486
|
+
# Discard source references - only cache is used from now on
|
|
487
|
+
self._source_sequence_data = None
|
|
488
|
+
self._source_static_sequence = None
|
|
489
|
+
self._source_peaks_data = None
|
|
490
|
+
|
|
491
|
+
# Load cached LazyFrames for reading
|
|
492
|
+
self._cached_sequences = pl.scan_parquet(
|
|
493
|
+
self._cache_dir / "sequences.parquet"
|
|
494
|
+
)
|
|
495
|
+
peaks_path = self._cache_dir / "peaks.parquet"
|
|
496
|
+
self._cached_peaks = (
|
|
497
|
+
pl.scan_parquet(peaks_path) if peaks_path.exists() else None
|
|
498
|
+
)
|
|
499
|
+
|
|
270
500
|
def _get_cache_config(self) -> Dict[str, Any]:
|
|
271
|
-
"""Get configuration
|
|
501
|
+
"""Get all configuration to store in cache."""
|
|
272
502
|
return {
|
|
273
|
-
|
|
274
|
-
|
|
503
|
+
"version": CACHE_VERSION,
|
|
504
|
+
"filters": self._filters,
|
|
505
|
+
"interactivity": self._interactivity,
|
|
506
|
+
"title": self._title,
|
|
507
|
+
"height": self._height,
|
|
508
|
+
"deconvolved": self._deconvolved,
|
|
509
|
+
"annotation_config": self._annotation_config,
|
|
275
510
|
}
|
|
276
511
|
|
|
277
|
-
def
|
|
278
|
-
"""
|
|
279
|
-
|
|
512
|
+
def _cache_exists(self) -> bool:
|
|
513
|
+
"""Check if a valid cache exists that can be loaded."""
|
|
514
|
+
config_file = self._cache_dir / ".cache_config.json"
|
|
515
|
+
sequences_file = self._cache_dir / "sequences.parquet"
|
|
516
|
+
|
|
517
|
+
if not config_file.exists() or not sequences_file.exists():
|
|
518
|
+
return False
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
with open(config_file, "r") as f:
|
|
522
|
+
cached_config = json.load(f)
|
|
523
|
+
# Just check version matches
|
|
524
|
+
return cached_config.get("version") == CACHE_VERSION
|
|
525
|
+
except Exception:
|
|
526
|
+
return False
|
|
527
|
+
|
|
528
|
+
def _load_from_cache(self) -> None:
|
|
529
|
+
"""Load all configuration and data from cache."""
|
|
530
|
+
config_file = self._cache_dir / ".cache_config.json"
|
|
531
|
+
|
|
532
|
+
with open(config_file, "r") as f:
|
|
533
|
+
config = json.load(f)
|
|
534
|
+
|
|
535
|
+
# Restore all configuration
|
|
536
|
+
self._filters = config.get("filters", {})
|
|
537
|
+
self._interactivity = config.get("interactivity", {})
|
|
538
|
+
self._title = config.get("title")
|
|
539
|
+
self._height = config.get("height", 400)
|
|
540
|
+
self._deconvolved = config.get("deconvolved", False)
|
|
541
|
+
self._annotation_config = config.get(
|
|
542
|
+
"annotation_config", {**DEFAULT_ANNOTATION_CONFIG}
|
|
543
|
+
)
|
|
544
|
+
self._config = {}
|
|
545
|
+
|
|
546
|
+
# Load cached LazyFrames
|
|
547
|
+
self._cached_sequences = pl.scan_parquet(self._cache_dir / "sequences.parquet")
|
|
548
|
+
peaks_path = self._cache_dir / "peaks.parquet"
|
|
549
|
+
self._cached_peaks = (
|
|
550
|
+
pl.scan_parquet(peaks_path) if peaks_path.exists() else None
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
def _create_cache(self) -> None:
|
|
554
|
+
"""Create cache from source data."""
|
|
555
|
+
# Create cache directory
|
|
556
|
+
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
|
557
|
+
|
|
558
|
+
# Preprocess and write caches
|
|
559
|
+
self._preprocess_sequences()
|
|
560
|
+
self._preprocess_peaks()
|
|
561
|
+
|
|
562
|
+
# Write config
|
|
563
|
+
config_file = self._cache_dir / ".cache_config.json"
|
|
564
|
+
with open(config_file, "w") as f:
|
|
565
|
+
json.dump(self._get_cache_config(), f, indent=2)
|
|
566
|
+
|
|
567
|
+
def _preprocess_sequences(self) -> None:
|
|
568
|
+
"""Preprocess and cache sequence data."""
|
|
569
|
+
output_path = self._cache_dir / "sequences.parquet"
|
|
570
|
+
|
|
571
|
+
if self._source_sequence_data is not None:
|
|
572
|
+
# LazyFrame input - select required columns, sort by filters
|
|
573
|
+
schema = self._source_sequence_data.collect_schema()
|
|
574
|
+
filter_cols = [c for c in self._filters.values() if c in schema.names()]
|
|
575
|
+
|
|
576
|
+
# Build column list: filter columns + required columns
|
|
577
|
+
required = ["sequence", "precursor_charge"]
|
|
578
|
+
cols = list(
|
|
579
|
+
dict.fromkeys(
|
|
580
|
+
filter_cols + [c for c in required if c in schema.names()]
|
|
581
|
+
)
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
lf = self._source_sequence_data.select(cols)
|
|
585
|
+
|
|
586
|
+
# Sort by filter columns for predicate pushdown
|
|
587
|
+
if filter_cols:
|
|
588
|
+
lf = lf.sort(filter_cols)
|
|
589
|
+
|
|
590
|
+
df = lf.collect()
|
|
591
|
+
else:
|
|
592
|
+
# Static input (string or tuple) - create single-row DataFrame
|
|
593
|
+
df = pl.DataFrame(
|
|
594
|
+
{
|
|
595
|
+
"sequence": [self._source_static_sequence or ""],
|
|
596
|
+
"precursor_charge": [self._source_static_charge],
|
|
597
|
+
}
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
# Optimize types and write
|
|
601
|
+
df = optimize_for_transfer(df)
|
|
602
|
+
df.write_parquet(output_path, compression="zstd")
|
|
603
|
+
|
|
604
|
+
def _preprocess_peaks(self) -> None:
|
|
605
|
+
"""Preprocess and cache peaks data."""
|
|
606
|
+
if self._source_peaks_data is None:
|
|
607
|
+
return # No peaks to cache
|
|
608
|
+
|
|
609
|
+
output_path = self._cache_dir / "peaks.parquet"
|
|
610
|
+
schema = self._source_peaks_data.collect_schema()
|
|
611
|
+
filter_cols = [c for c in self._filters.values() if c in schema.names()]
|
|
612
|
+
|
|
613
|
+
# Build column list: filter columns + required columns
|
|
614
|
+
required = ["peak_id", "mass"]
|
|
615
|
+
optional = ["intensity"]
|
|
616
|
+
cols = list(
|
|
617
|
+
dict.fromkeys(
|
|
618
|
+
filter_cols
|
|
619
|
+
+ [c for c in required if c in schema.names()]
|
|
620
|
+
+ [c for c in optional if c in schema.names()]
|
|
621
|
+
)
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
lf = self._source_peaks_data.select(cols)
|
|
280
625
|
|
|
281
|
-
|
|
282
|
-
|
|
626
|
+
# Sort by filter columns for predicate pushdown
|
|
627
|
+
if filter_cols:
|
|
628
|
+
lf = lf.sort(filter_cols)
|
|
629
|
+
|
|
630
|
+
df = lf.collect()
|
|
631
|
+
|
|
632
|
+
# Optimize types and write
|
|
633
|
+
df = optimize_for_transfer(df)
|
|
634
|
+
df.write_parquet(output_path, compression="zstd")
|
|
635
|
+
|
|
636
|
+
def _get_sequence_for_state(self, state: Dict[str, Any]) -> Tuple[str, int]:
|
|
637
|
+
"""Get sequence and charge for current state.
|
|
638
|
+
|
|
639
|
+
Reads from cached sequences.parquet with predicate pushdown.
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
Tuple of (sequence_string, precursor_charge)
|
|
283
643
|
"""
|
|
284
|
-
|
|
285
|
-
plain_sequence = ''.join(self._parsed_residues)
|
|
286
|
-
fragment_masses = calculate_fragment_masses(plain_sequence)
|
|
644
|
+
filtered = self._cached_sequences
|
|
287
645
|
|
|
288
|
-
#
|
|
289
|
-
|
|
646
|
+
# Apply filters for columns that exist in cached data
|
|
647
|
+
schema = filtered.collect_schema()
|
|
648
|
+
for identifier, column in self._filters.items():
|
|
649
|
+
if column in schema.names():
|
|
650
|
+
filter_value = state.get(identifier)
|
|
651
|
+
if filter_value is not None:
|
|
652
|
+
filtered = filtered.filter(pl.col(column) == filter_value)
|
|
290
653
|
|
|
291
|
-
#
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
}
|
|
654
|
+
# Collect and get first row
|
|
655
|
+
try:
|
|
656
|
+
df = filtered.select(["sequence", "precursor_charge"]).head(1).collect()
|
|
657
|
+
if df.height > 0:
|
|
658
|
+
return df["sequence"][0], df["precursor_charge"][0]
|
|
659
|
+
except Exception:
|
|
660
|
+
pass
|
|
299
661
|
|
|
300
|
-
|
|
662
|
+
return "", 1
|
|
301
663
|
|
|
302
|
-
def
|
|
303
|
-
"""
|
|
304
|
-
return 'SequenceView'
|
|
664
|
+
def _get_peaks_for_state(self, state: Dict[str, Any]) -> pl.DataFrame:
|
|
665
|
+
"""Get filtered peaks data for current state.
|
|
305
666
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
667
|
+
Reads from cached peaks.parquet with predicate pushdown.
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
DataFrame with columns: peak_id, mass, (intensity if available)
|
|
671
|
+
"""
|
|
672
|
+
if self._cached_peaks is None:
|
|
673
|
+
return pl.DataFrame(schema={"peak_id": pl.Int64, "mass": pl.Float64})
|
|
674
|
+
|
|
675
|
+
filtered = self._cached_peaks
|
|
676
|
+
|
|
677
|
+
# Apply filters for columns that exist in cached data
|
|
678
|
+
schema = filtered.collect_schema()
|
|
679
|
+
for identifier, column in self._filters.items():
|
|
680
|
+
if column in schema.names():
|
|
681
|
+
filter_value = state.get(identifier)
|
|
682
|
+
if filter_value is not None:
|
|
683
|
+
filtered = filtered.filter(pl.col(column) == filter_value)
|
|
684
|
+
|
|
685
|
+
# Select available columns
|
|
686
|
+
cols = ["peak_id", "mass"]
|
|
687
|
+
if "intensity" in schema.names():
|
|
688
|
+
cols.append("intensity")
|
|
689
|
+
|
|
690
|
+
try:
|
|
691
|
+
return filtered.select(cols).collect()
|
|
692
|
+
except Exception:
|
|
693
|
+
return pl.DataFrame(schema={"peak_id": pl.Int64, "mass": pl.Float64})
|
|
309
694
|
|
|
310
695
|
def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
311
696
|
"""
|
|
312
|
-
Prepare
|
|
697
|
+
Prepare data for Vue component.
|
|
313
698
|
|
|
314
699
|
Args:
|
|
315
700
|
state: Current selection state from StateManager
|
|
316
701
|
|
|
317
702
|
Returns:
|
|
318
|
-
Dict with sequenceData,
|
|
703
|
+
Dict with sequenceData, peaksData, annotationConfig, etc.
|
|
319
704
|
"""
|
|
320
|
-
#
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
705
|
+
# Get sequence for current state
|
|
706
|
+
sequence_str, precursor_charge = self._get_sequence_for_state(state)
|
|
707
|
+
|
|
708
|
+
# Parse sequence
|
|
709
|
+
residues, modifications = parse_openms_sequence(sequence_str)
|
|
325
710
|
|
|
326
|
-
#
|
|
327
|
-
|
|
328
|
-
|
|
711
|
+
# Calculate theoretical fragment masses
|
|
712
|
+
fragment_masses = calculate_fragment_masses_pyopenms(sequence_str)
|
|
713
|
+
|
|
714
|
+
# Calculate theoretical mass
|
|
715
|
+
theoretical_mass = get_theoretical_mass(sequence_str)
|
|
716
|
+
|
|
717
|
+
# Build sequence data structure
|
|
718
|
+
sequence_data = {
|
|
719
|
+
"sequence": residues,
|
|
720
|
+
"modifications": modifications,
|
|
721
|
+
"theoretical_mass": theoretical_mass,
|
|
722
|
+
"fixed_modifications": [],
|
|
723
|
+
# Include settings for Vue initialization
|
|
724
|
+
"fragment_tolerance": self._annotation_config.get("tolerance"),
|
|
725
|
+
"fragment_tolerance_ppm": self._annotation_config.get("tolerance_ppm"),
|
|
726
|
+
"neutral_losses": self._annotation_config.get("neutral_losses"),
|
|
727
|
+
"proton_loss_addition": self._annotation_config.get("proton_loss_addition"),
|
|
728
|
+
**fragment_masses,
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
# Get filtered peaks
|
|
732
|
+
peaks_df = self._get_peaks_for_state(state)
|
|
733
|
+
|
|
734
|
+
# Extract arrays from peaks DataFrame for Vue
|
|
735
|
+
# Vue expects observedMasses and peakIds as separate arrays
|
|
736
|
+
observed_masses: List[float] = []
|
|
737
|
+
peak_ids: List[int] = []
|
|
738
|
+
precursor_mass: float = 0.0
|
|
739
|
+
|
|
740
|
+
if peaks_df.height > 0:
|
|
741
|
+
observed_masses = peaks_df["mass"].to_list()
|
|
742
|
+
peak_ids = peaks_df["peak_id"].to_list()
|
|
743
|
+
|
|
744
|
+
# Create hash for change detection
|
|
745
|
+
hash_input = f"{sequence_str}:{peaks_df.height}:{precursor_charge}"
|
|
329
746
|
data_hash = hashlib.md5(hash_input.encode()).hexdigest()[:8]
|
|
330
747
|
|
|
331
748
|
result = {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
749
|
+
"sequenceData": sequence_data,
|
|
750
|
+
"observedMasses": observed_masses,
|
|
751
|
+
"peakIds": peak_ids,
|
|
752
|
+
"precursorMass": precursor_mass,
|
|
753
|
+
"annotationConfig": self._annotation_config,
|
|
754
|
+
"precursorCharge": precursor_charge,
|
|
755
|
+
"_hash": data_hash,
|
|
336
756
|
}
|
|
337
757
|
|
|
338
|
-
# Include peak_ids if provided (for interactivity linking)
|
|
339
|
-
if self._peak_ids is not None:
|
|
340
|
-
result['peakIds'] = self._peak_ids
|
|
341
|
-
|
|
342
758
|
return result
|
|
343
759
|
|
|
760
|
+
def _get_vue_component_name(self) -> str:
|
|
761
|
+
"""Return the Vue component name."""
|
|
762
|
+
return "SequenceView"
|
|
763
|
+
|
|
764
|
+
def _get_data_key(self) -> str:
|
|
765
|
+
"""Return the key used to send primary data to Vue."""
|
|
766
|
+
return "sequenceData"
|
|
767
|
+
|
|
344
768
|
def _get_component_args(self) -> Dict[str, Any]:
|
|
345
769
|
"""Get component arguments to send to Vue."""
|
|
346
770
|
args: Dict[str, Any] = {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
'precursorCharge': self._precursor_charge,
|
|
771
|
+
"componentType": self._get_vue_component_name(),
|
|
772
|
+
"height": self._height,
|
|
773
|
+
"deconvolved": self._deconvolved,
|
|
351
774
|
}
|
|
352
775
|
|
|
353
776
|
if self._title:
|
|
354
|
-
args[
|
|
777
|
+
args["title"] = self._title
|
|
355
778
|
|
|
356
|
-
# Pass interactivity mapping to Vue (similar to other components)
|
|
357
779
|
if self._interactivity:
|
|
358
|
-
args[
|
|
780
|
+
args["interactivity"] = self._interactivity
|
|
359
781
|
|
|
360
782
|
args.update(self._config)
|
|
361
783
|
return args
|
|
362
784
|
|
|
363
|
-
|
|
785
|
+
@property
|
|
786
|
+
def peaks_data(self) -> Optional[pl.LazyFrame]:
|
|
787
|
+
"""Return the cached peaks LazyFrame for linked components."""
|
|
788
|
+
return self._cached_peaks
|
|
789
|
+
|
|
790
|
+
def get_filters_mapping(self) -> Dict[str, str]:
|
|
791
|
+
"""Return the filters identifier-to-column mapping."""
|
|
792
|
+
return self._filters.copy()
|
|
793
|
+
|
|
794
|
+
def get_interactivity_mapping(self) -> Dict[str, str]:
|
|
795
|
+
"""Return the interactivity identifier-to-column mapping."""
|
|
796
|
+
return self._interactivity.copy()
|
|
797
|
+
|
|
798
|
+
def get_state_dependencies(self) -> List[str]:
|
|
799
|
+
"""Return list of state keys that affect this component's data."""
|
|
800
|
+
return list(self._filters.keys())
|
|
801
|
+
|
|
802
|
+
def __call__(
|
|
364
803
|
self,
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
804
|
+
key: Optional[str] = None,
|
|
805
|
+
state_manager: Optional["StateManager"] = None,
|
|
806
|
+
height: Optional[int] = None,
|
|
807
|
+
) -> SequenceViewResult:
|
|
368
808
|
"""
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
This allows reusing the same cached sequence data with different
|
|
372
|
-
spectra for matching.
|
|
809
|
+
Render the component in Streamlit.
|
|
373
810
|
|
|
374
811
|
Args:
|
|
375
|
-
|
|
376
|
-
|
|
812
|
+
key: Optional unique key for the Streamlit component
|
|
813
|
+
state_manager: Optional StateManager for cross-component state.
|
|
814
|
+
If not provided, uses a default shared StateManager.
|
|
815
|
+
height: Optional height in pixels for the component
|
|
377
816
|
|
|
378
817
|
Returns:
|
|
379
|
-
|
|
818
|
+
SequenceViewResult with annotations DataFrame (if available)
|
|
380
819
|
"""
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
820
|
+
from ..core.state import get_default_state_manager
|
|
821
|
+
from ..rendering.bridge import get_component_annotations, render_component
|
|
822
|
+
|
|
823
|
+
if state_manager is None:
|
|
824
|
+
state_manager = get_default_state_manager()
|
|
825
|
+
|
|
826
|
+
# Use provided height or default
|
|
827
|
+
render_height = height if height is not None else self._height
|
|
828
|
+
|
|
829
|
+
render_component(
|
|
830
|
+
component=self, state_manager=state_manager, key=key, height=render_height
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
# Get annotations from session state (set by Vue)
|
|
834
|
+
annotations = get_component_annotations(key) if key else None
|
|
835
|
+
|
|
836
|
+
return SequenceViewResult(annotations=annotations)
|
|
837
|
+
|
|
838
|
+
def __repr__(self) -> str:
|
|
839
|
+
return (
|
|
840
|
+
f"SequenceView("
|
|
841
|
+
f"cache_id='{self._cache_id}', "
|
|
842
|
+
f"filters={self._filters}, "
|
|
843
|
+
f"interactivity={self._interactivity})"
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
if TYPE_CHECKING:
|
|
848
|
+
from ..core.state import StateManager
|