openms-insight 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +32 -0
- openms_insight/components/__init__.py +11 -0
- openms_insight/components/heatmap.py +823 -0
- openms_insight/components/lineplot.py +492 -0
- openms_insight/components/sequenceview.py +384 -0
- openms_insight/components/table.py +400 -0
- openms_insight/core/__init__.py +14 -0
- openms_insight/core/base.py +413 -0
- openms_insight/core/cache.py +39 -0
- openms_insight/core/registry.py +82 -0
- openms_insight/core/state.py +215 -0
- openms_insight/js-component/dist/assets/index.css +5 -0
- openms_insight/js-component/dist/assets/index.js +4220 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2 +0 -0
- openms_insight/js-component/dist/index.html +14 -0
- openms_insight/preprocessing/__init__.py +22 -0
- openms_insight/preprocessing/compression.py +338 -0
- openms_insight/preprocessing/filtering.py +316 -0
- openms_insight/rendering/__init__.py +8 -0
- openms_insight/rendering/bridge.py +312 -0
- openms_insight-0.1.0.dist-info/METADATA +256 -0
- openms_insight-0.1.0.dist-info/RECORD +27 -0
- openms_insight-0.1.0.dist-info/WHEEL +4 -0
- openms_insight-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""SequenceView component for peptide/protein sequence visualization with fragment matching."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from ..core.base import BaseComponent
|
|
8
|
+
from ..core.registry import register_component
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_openms_sequence(sequence_str: str) -> Tuple[List[str], List[Optional[float]]]:
|
|
12
|
+
"""Parse OpenMS sequence format to extract residues and modification mass shifts.
|
|
13
|
+
|
|
14
|
+
Converts e.g. 'SHC(Carbamidomethyl)IAEVEK' to:
|
|
15
|
+
- residues: ['S', 'H', 'C', 'I', 'A', 'E', 'V', 'E', 'K']
|
|
16
|
+
- modifications: [None, None, 57.02, None, None, None, None, None, None]
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
sequence_str: Peptide sequence in OpenMS format with modifications in parentheses
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Tuple of (residues list, modifications list where None means unmodified)
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
from pyopenms import AASequence
|
|
26
|
+
|
|
27
|
+
aa_seq = AASequence.fromString(sequence_str)
|
|
28
|
+
residues = []
|
|
29
|
+
modifications = []
|
|
30
|
+
|
|
31
|
+
for i in range(aa_seq.size()):
|
|
32
|
+
residue = aa_seq.getResidue(i)
|
|
33
|
+
one_letter = residue.getOneLetterCode()
|
|
34
|
+
residues.append(one_letter)
|
|
35
|
+
|
|
36
|
+
mod = residue.getModification()
|
|
37
|
+
if mod:
|
|
38
|
+
diff_mono = mod.getDiffMonoMass()
|
|
39
|
+
modifications.append(round(diff_mono, 2))
|
|
40
|
+
else:
|
|
41
|
+
modifications.append(None)
|
|
42
|
+
|
|
43
|
+
return residues, modifications
|
|
44
|
+
except ImportError:
|
|
45
|
+
# Fallback: just extract single-letter codes (naive parsing)
|
|
46
|
+
residues = []
|
|
47
|
+
modifications = []
|
|
48
|
+
i = 0
|
|
49
|
+
while i < len(sequence_str):
|
|
50
|
+
if sequence_str[i].isupper():
|
|
51
|
+
residues.append(sequence_str[i])
|
|
52
|
+
modifications.append(None)
|
|
53
|
+
i += 1
|
|
54
|
+
elif sequence_str[i] == '(':
|
|
55
|
+
# Skip modification name in parentheses
|
|
56
|
+
end = sequence_str.find(')', i)
|
|
57
|
+
if end > i:
|
|
58
|
+
i = end + 1
|
|
59
|
+
else:
|
|
60
|
+
i += 1
|
|
61
|
+
else:
|
|
62
|
+
i += 1
|
|
63
|
+
return residues, modifications
|
|
64
|
+
except Exception:
|
|
65
|
+
# On any error, return the raw sequence as single letters
|
|
66
|
+
return list(sequence_str), [None] * len(sequence_str)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Amino acid monoisotopic masses
|
|
70
|
+
AA_MASSES = {
|
|
71
|
+
'A': 71.037114, 'R': 156.101111, 'N': 114.042927, 'D': 115.026943,
|
|
72
|
+
'C': 103.009185, 'E': 129.042593, 'Q': 128.058578, 'G': 57.021464,
|
|
73
|
+
'H': 137.058912, 'I': 113.084064, 'L': 113.084064, 'K': 128.094963,
|
|
74
|
+
'M': 131.040485, 'F': 147.068414, 'P': 97.052764, 'S': 87.032028,
|
|
75
|
+
'T': 101.047679, 'U': 150.953633, 'W': 186.079313, 'Y': 163.063329,
|
|
76
|
+
'V': 99.068414, 'X': 0, 'Z': 0,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Ion type mass adjustments
|
|
80
|
+
# These are approximate - for precise values, use pyOpenMS
|
|
81
|
+
H2O = 18.010565
|
|
82
|
+
NH3 = 17.026549
|
|
83
|
+
PROTON = 1.007276
|
|
84
|
+
|
|
85
|
+
# Ion type offsets (from N-terminus for prefix, C-terminus for suffix)
|
|
86
|
+
ION_OFFSETS = {
|
|
87
|
+
'a': -27.994915, # CO loss from b
|
|
88
|
+
'b': 0.0,
|
|
89
|
+
'c': 17.026549, # NH3 addition to b
|
|
90
|
+
'x': 43.989829, # CO + CO addition to y
|
|
91
|
+
'y': 18.010565, # H2O addition (protonated)
|
|
92
|
+
'z': 1.991841, # NH loss from y
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def calculate_prefix_mass(sequence: str, position: int) -> float:
|
|
97
|
+
"""Calculate mass of N-terminal fragment (positions 0 to position inclusive)."""
|
|
98
|
+
mass = 0.0
|
|
99
|
+
for i in range(position + 1):
|
|
100
|
+
mass += AA_MASSES.get(sequence[i], 0.0)
|
|
101
|
+
return mass
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def calculate_suffix_mass(sequence: str, position: int) -> float:
|
|
105
|
+
"""Calculate mass of C-terminal fragment (positions position to end)."""
|
|
106
|
+
mass = 0.0
|
|
107
|
+
for i in range(position, len(sequence)):
|
|
108
|
+
mass += AA_MASSES.get(sequence[i], 0.0)
|
|
109
|
+
return mass
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def calculate_fragment_masses(sequence: str) -> Dict[str, List[List[float]]]:
|
|
113
|
+
"""
|
|
114
|
+
Calculate theoretical fragment masses for all ion types.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
sequence: Amino acid sequence string
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Dict with keys fragment_masses_a, fragment_masses_b, etc.
|
|
121
|
+
Each value is a list of lists (to support ambiguous modifications)
|
|
122
|
+
"""
|
|
123
|
+
n = len(sequence)
|
|
124
|
+
result = {}
|
|
125
|
+
|
|
126
|
+
# Prefix ions (a, b, c) - from N-terminus
|
|
127
|
+
for ion_type in ['a', 'b', 'c']:
|
|
128
|
+
masses = []
|
|
129
|
+
for i in range(n):
|
|
130
|
+
prefix_mass = calculate_prefix_mass(sequence, i)
|
|
131
|
+
ion_mass = prefix_mass + ION_OFFSETS[ion_type]
|
|
132
|
+
masses.append([ion_mass])
|
|
133
|
+
result[f'fragment_masses_{ion_type}'] = masses
|
|
134
|
+
|
|
135
|
+
# Suffix ions (x, y, z) - from C-terminus
|
|
136
|
+
for ion_type in ['x', 'y', 'z']:
|
|
137
|
+
masses = []
|
|
138
|
+
for i in range(n):
|
|
139
|
+
# For suffix ions, position i means i+1 residues from C-terminus
|
|
140
|
+
suffix_mass = calculate_suffix_mass(sequence, n - i - 1)
|
|
141
|
+
ion_mass = suffix_mass + ION_OFFSETS[ion_type]
|
|
142
|
+
masses.append([ion_mass])
|
|
143
|
+
result[f'fragment_masses_{ion_type}'] = masses
|
|
144
|
+
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def calculate_theoretical_mass(sequence: str) -> float:
|
|
149
|
+
"""Calculate monoisotopic mass of full sequence."""
|
|
150
|
+
mass = H2O # Add water for full peptide
|
|
151
|
+
for aa in sequence:
|
|
152
|
+
mass += AA_MASSES.get(aa, 0.0)
|
|
153
|
+
return mass
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@register_component("sequence_view")
|
|
157
|
+
class SequenceView(BaseComponent):
|
|
158
|
+
"""
|
|
159
|
+
Interactive sequence view component for peptide/protein visualization.
|
|
160
|
+
|
|
161
|
+
Displays amino acid sequence with fragment ion markers. When provided with
|
|
162
|
+
observed masses from a spectrum, highlights matched theoretical fragments.
|
|
163
|
+
|
|
164
|
+
Features:
|
|
165
|
+
- Amino acid grid display with configurable row width
|
|
166
|
+
- Fragment ion markers (a, b, c, x, y, z)
|
|
167
|
+
- Tolerance-based fragment matching
|
|
168
|
+
- Fragment table showing matches
|
|
169
|
+
- Residue cleavage percentage calculation
|
|
170
|
+
|
|
171
|
+
Example:
|
|
172
|
+
sequence_view = SequenceView(
|
|
173
|
+
cache_id="peptide_view",
|
|
174
|
+
sequence="PEPTIDEK",
|
|
175
|
+
observed_masses=[147.1, 244.2, 359.3, ...],
|
|
176
|
+
precursor_mass=944.5,
|
|
177
|
+
)
|
|
178
|
+
sequence_view(state_manager=state_manager)
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
_component_type: str = "sequence_view"
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
cache_id: str,
|
|
186
|
+
sequence: str,
|
|
187
|
+
observed_masses: Optional[List[float]] = None,
|
|
188
|
+
peak_ids: Optional[List[int]] = None,
|
|
189
|
+
precursor_mass: Optional[float] = None,
|
|
190
|
+
data: Optional[pl.LazyFrame] = None, # Not used but required by base
|
|
191
|
+
filters: Optional[Dict[str, str]] = None,
|
|
192
|
+
interactivity: Optional[Dict[str, str]] = None,
|
|
193
|
+
cache_path: str = ".",
|
|
194
|
+
regenerate_cache: bool = False,
|
|
195
|
+
fixed_modifications: Optional[List[str]] = None,
|
|
196
|
+
title: Optional[str] = None,
|
|
197
|
+
height: int = 400,
|
|
198
|
+
deconvolved: bool = True,
|
|
199
|
+
precursor_charge: int = 1,
|
|
200
|
+
_precomputed_sequence_data: Optional[Dict[str, Any]] = None,
|
|
201
|
+
**kwargs
|
|
202
|
+
):
|
|
203
|
+
"""
|
|
204
|
+
Initialize the SequenceView component.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
cache_id: Unique identifier for this component's cache.
|
|
208
|
+
sequence: Amino acid sequence string (single-letter codes).
|
|
209
|
+
observed_masses: List of observed peak masses from spectrum.
|
|
210
|
+
peak_ids: List of peak IDs corresponding to observed_masses (for interactivity).
|
|
211
|
+
precursor_mass: Observed precursor mass.
|
|
212
|
+
data: Not used for SequenceView, but required by base class.
|
|
213
|
+
filters: Mapping of identifier names to column names for filtering.
|
|
214
|
+
interactivity: Mapping of identifier names to column names for clicks.
|
|
215
|
+
Example: {'peak': 'peak_id'} sets 'peak' selection to 'peak_id' value on click.
|
|
216
|
+
cache_path: Base path for cache storage.
|
|
217
|
+
regenerate_cache: If True, regenerate cache even if valid.
|
|
218
|
+
fixed_modifications: List of amino acids with fixed modifications (e.g., ['C']).
|
|
219
|
+
title: Optional title displayed above the sequence.
|
|
220
|
+
height: Component height in pixels.
|
|
221
|
+
deconvolved: If True (default), observed_masses are neutral masses.
|
|
222
|
+
If False, observed_masses are m/z values and fragment matching
|
|
223
|
+
considers charge states 1 to precursor_charge.
|
|
224
|
+
precursor_charge: Maximum charge state to consider for fragment matching
|
|
225
|
+
when deconvolved=False. Fragments can have charge 1 to this value.
|
|
226
|
+
_precomputed_sequence_data: Optional pre-computed sequence data dict.
|
|
227
|
+
If provided, skips fragment mass calculation (used when fragment
|
|
228
|
+
masses are already cached externally, e.g., in identification preprocessing).
|
|
229
|
+
**kwargs: Additional configuration options.
|
|
230
|
+
"""
|
|
231
|
+
self._sequence_raw = sequence # Keep original for calculations
|
|
232
|
+
self._sequence = sequence.upper().replace(' ', '').replace('\n', '')
|
|
233
|
+
self._observed_masses = observed_masses or []
|
|
234
|
+
self._peak_ids = peak_ids # peak_ids corresponding to observed_masses
|
|
235
|
+
self._precursor_mass = precursor_mass or 0.0
|
|
236
|
+
self._fixed_modifications = fixed_modifications or []
|
|
237
|
+
self._title = title
|
|
238
|
+
self._height = height
|
|
239
|
+
self._deconvolved = deconvolved
|
|
240
|
+
self._precursor_charge = max(1, precursor_charge)
|
|
241
|
+
self._precomputed_sequence_data = _precomputed_sequence_data
|
|
242
|
+
|
|
243
|
+
# Parse sequence to extract residues and modifications
|
|
244
|
+
self._parsed_residues, self._parsed_modifications = parse_openms_sequence(self._sequence)
|
|
245
|
+
|
|
246
|
+
# Build peaks DataFrame for interactivity validation
|
|
247
|
+
# This allows interactivity={'peak': 'peak_id'} to validate naturally
|
|
248
|
+
# Note: Cache validity is based on sequence (via _get_cache_config), not peaks data
|
|
249
|
+
if data is None:
|
|
250
|
+
if self._observed_masses:
|
|
251
|
+
ids = self._peak_ids if self._peak_ids is not None else list(range(len(self._observed_masses)))
|
|
252
|
+
data = pl.LazyFrame({
|
|
253
|
+
'peak_id': ids,
|
|
254
|
+
'mass': self._observed_masses,
|
|
255
|
+
})
|
|
256
|
+
else:
|
|
257
|
+
# Empty peaks - use schema so validation still passes
|
|
258
|
+
data = pl.LazyFrame(schema={'peak_id': pl.Int64, 'mass': pl.Float64})
|
|
259
|
+
|
|
260
|
+
super().__init__(
|
|
261
|
+
cache_id=cache_id,
|
|
262
|
+
data=data,
|
|
263
|
+
filters=filters,
|
|
264
|
+
interactivity=interactivity,
|
|
265
|
+
cache_path=cache_path,
|
|
266
|
+
regenerate_cache=regenerate_cache,
|
|
267
|
+
**kwargs
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def _get_cache_config(self) -> Dict[str, Any]:
|
|
271
|
+
"""Get configuration that affects cache validity."""
|
|
272
|
+
return {
|
|
273
|
+
'sequence': self._sequence,
|
|
274
|
+
'fixed_modifications': self._fixed_modifications,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
def _preprocess(self) -> None:
|
|
278
|
+
"""
|
|
279
|
+
Preprocess sequence data.
|
|
280
|
+
|
|
281
|
+
Calculates theoretical fragment masses for all ion types.
|
|
282
|
+
This is cached so subsequent renders are fast.
|
|
283
|
+
"""
|
|
284
|
+
# Calculate fragment masses using plain residues
|
|
285
|
+
plain_sequence = ''.join(self._parsed_residues)
|
|
286
|
+
fragment_masses = calculate_fragment_masses(plain_sequence)
|
|
287
|
+
|
|
288
|
+
# Calculate theoretical mass
|
|
289
|
+
theoretical_mass = calculate_theoretical_mass(plain_sequence)
|
|
290
|
+
|
|
291
|
+
# Build sequence data structure
|
|
292
|
+
sequence_data = {
|
|
293
|
+
'sequence': self._parsed_residues,
|
|
294
|
+
'modifications': self._parsed_modifications, # New: list of mass shifts per position
|
|
295
|
+
'theoretical_mass': theoretical_mass,
|
|
296
|
+
'fixed_modifications': self._fixed_modifications,
|
|
297
|
+
**fragment_masses,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
self._preprocessed_data['sequence_data'] = sequence_data
|
|
301
|
+
|
|
302
|
+
def _get_vue_component_name(self) -> str:
|
|
303
|
+
"""Return the Vue component name."""
|
|
304
|
+
return 'SequenceView'
|
|
305
|
+
|
|
306
|
+
def _get_data_key(self) -> str:
|
|
307
|
+
"""Return the key used to send primary data to Vue."""
|
|
308
|
+
return 'sequenceData'
|
|
309
|
+
|
|
310
|
+
def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
311
|
+
"""
|
|
312
|
+
Prepare sequence data for Vue component.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
state: Current selection state from StateManager
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Dict with sequenceData, observedMasses, precursorMass, and _hash
|
|
319
|
+
"""
|
|
320
|
+
# Use precomputed data if available, otherwise use cached/computed data
|
|
321
|
+
if self._precomputed_sequence_data is not None:
|
|
322
|
+
sequence_data = self._precomputed_sequence_data
|
|
323
|
+
else:
|
|
324
|
+
sequence_data = self._preprocessed_data.get('sequence_data', {})
|
|
325
|
+
|
|
326
|
+
# Create a hash based on sequence and observed masses
|
|
327
|
+
import hashlib
|
|
328
|
+
hash_input = f"{self._sequence}:{len(self._observed_masses)}:{self._precursor_mass}"
|
|
329
|
+
data_hash = hashlib.md5(hash_input.encode()).hexdigest()[:8]
|
|
330
|
+
|
|
331
|
+
result = {
|
|
332
|
+
'sequenceData': sequence_data,
|
|
333
|
+
'observedMasses': self._observed_masses,
|
|
334
|
+
'precursorMass': self._precursor_mass,
|
|
335
|
+
'_hash': data_hash,
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
# Include peak_ids if provided (for interactivity linking)
|
|
339
|
+
if self._peak_ids is not None:
|
|
340
|
+
result['peakIds'] = self._peak_ids
|
|
341
|
+
|
|
342
|
+
return result
|
|
343
|
+
|
|
344
|
+
def _get_component_args(self) -> Dict[str, Any]:
|
|
345
|
+
"""Get component arguments to send to Vue."""
|
|
346
|
+
args: Dict[str, Any] = {
|
|
347
|
+
'componentType': self._get_vue_component_name(),
|
|
348
|
+
'height': self._height,
|
|
349
|
+
'deconvolved': self._deconvolved,
|
|
350
|
+
'precursorCharge': self._precursor_charge,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if self._title:
|
|
354
|
+
args['title'] = self._title
|
|
355
|
+
|
|
356
|
+
# Pass interactivity mapping to Vue (similar to other components)
|
|
357
|
+
if self._interactivity:
|
|
358
|
+
args['interactivity'] = self._interactivity
|
|
359
|
+
|
|
360
|
+
args.update(self._config)
|
|
361
|
+
return args
|
|
362
|
+
|
|
363
|
+
def update_observed_masses(
|
|
364
|
+
self,
|
|
365
|
+
observed_masses: List[float],
|
|
366
|
+
precursor_mass: Optional[float] = None
|
|
367
|
+
) -> 'SequenceView':
|
|
368
|
+
"""
|
|
369
|
+
Update the observed masses for fragment matching.
|
|
370
|
+
|
|
371
|
+
This allows reusing the same cached sequence data with different
|
|
372
|
+
spectra for matching.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
observed_masses: New list of observed peak masses.
|
|
376
|
+
precursor_mass: Optional new precursor mass.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Self for method chaining.
|
|
380
|
+
"""
|
|
381
|
+
self._observed_masses = observed_masses
|
|
382
|
+
if precursor_mass is not None:
|
|
383
|
+
self._precursor_mass = precursor_mass
|
|
384
|
+
return self
|