openms-insight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,492 @@
1
+ """Line plot component using Plotly.js."""
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ import polars as pl
6
+
7
+ from ..core.base import BaseComponent
8
+ from ..core.registry import register_component
9
+ from ..preprocessing.filtering import filter_and_collect_cached
10
+
11
+
12
+ @register_component("lineplot")
13
+ class LinePlot(BaseComponent):
14
+ """
15
+ Interactive stick plot component using Plotly.js.
16
+
17
+ Features:
18
+ - Stick-style peak visualization (vertical lines from baseline)
19
+ - Highlighting of selected data points
20
+ - Annotations with labels (mass labels, charge states, etc.)
21
+ - Zoom controls with auto-fit to highlighted data
22
+ - SVG export
23
+ - Click-to-select peaks with gold highlighting
24
+ - Cross-component linking via filters and interactivity
25
+
26
+ LinePlots can have separate `filters` and `interactivity` mappings:
27
+ - `filters`: Which selections filter this plot's data
28
+ - `interactivity`: What selection is set when a peak is clicked
29
+
30
+ Example:
31
+ # Plot filters by spectrum, clicking selects a peak
32
+ plot = LinePlot(
33
+ cache_id="peaks_plot",
34
+ data=peaks_df,
35
+ filters={'spectrum': 'scan_id'},
36
+ interactivity={'my_selection': 'mass'},
37
+ x_column='mass',
38
+ y_column='intensity',
39
+ )
40
+ """
41
+
42
+ _component_type: str = "lineplot"
43
+
44
+ def __init__(
45
+ self,
46
+ cache_id: str,
47
+ data: Optional[pl.LazyFrame] = None,
48
+ filters: Optional[Dict[str, str]] = None,
49
+ filter_defaults: Optional[Dict[str, Any]] = None,
50
+ interactivity: Optional[Dict[str, str]] = None,
51
+ cache_path: str = ".",
52
+ regenerate_cache: bool = False,
53
+ x_column: str = 'x',
54
+ y_column: str = 'y',
55
+ title: Optional[str] = None,
56
+ x_label: Optional[str] = None,
57
+ y_label: Optional[str] = None,
58
+ highlight_column: Optional[str] = None,
59
+ annotation_column: Optional[str] = None,
60
+ styling: Optional[Dict[str, Any]] = None,
61
+ config: Optional[Dict[str, Any]] = None,
62
+ **kwargs
63
+ ):
64
+ """
65
+ Initialize the LinePlot component.
66
+
67
+ Args:
68
+ cache_id: Unique identifier for this component's cache (MANDATORY).
69
+ Creates a folder {cache_path}/{cache_id}/ for cached data.
70
+ data: Polars LazyFrame with plot data. Optional if cache exists.
71
+ filters: Mapping of identifier names to column names for filtering.
72
+ Example: {'spectrum': 'scan_id'}
73
+ When 'spectrum' selection exists, plot shows only data where
74
+ scan_id equals the selected value.
75
+ filter_defaults: Default values for filters when state is None.
76
+ Example: {'identification': -1}
77
+ When 'identification' selection is None, filter uses -1 instead.
78
+ This enables showing unannotated data when no identification selected.
79
+ interactivity: Mapping of identifier names to column names for clicks.
80
+ Example: {'my_selection': 'mass'}
81
+ When a peak is clicked, sets 'my_selection' to that peak's mass.
82
+ The selected peak is highlighted in gold (selectedColor).
83
+ cache_path: Base path for cache storage. Default "." (current dir).
84
+ regenerate_cache: If True, regenerate cache even if valid cache exists.
85
+ x_column: Column name for x-axis values
86
+ y_column: Column name for y-axis values
87
+ title: Plot title
88
+ x_label: X-axis label (defaults to x_column)
89
+ y_label: Y-axis label (defaults to y_column)
90
+ highlight_column: Optional column name containing boolean/int
91
+ indicating which points to highlight
92
+ annotation_column: Optional column name containing text annotations
93
+ to display on highlighted points
94
+ styling: Style configuration dict with keys:
95
+ - highlightColor: Color for highlighted points (default: '#E4572E')
96
+ - selectedColor: Color for clicked/selected peak (default: '#F3A712')
97
+ - unhighlightedColor: Color for normal points (default: 'lightblue')
98
+ - annotationBackground: Background color for annotations
99
+ config: Additional Plotly config options
100
+ **kwargs: Additional configuration options
101
+ """
102
+ self._x_column = x_column
103
+ self._y_column = y_column
104
+ self._title = title
105
+ self._x_label = x_label or x_column
106
+ self._y_label = y_label or y_column
107
+ self._highlight_column = highlight_column
108
+ self._annotation_column = annotation_column
109
+ self._styling = styling or {}
110
+ self._plot_config = config or {}
111
+
112
+ # Dynamic annotations set at render time (not cached)
113
+ self._dynamic_annotations: Optional[Dict[str, Any]] = None
114
+ self._dynamic_title: Optional[str] = None
115
+
116
+ super().__init__(
117
+ cache_id=cache_id,
118
+ data=data,
119
+ filters=filters,
120
+ filter_defaults=filter_defaults,
121
+ interactivity=interactivity,
122
+ cache_path=cache_path,
123
+ regenerate_cache=regenerate_cache,
124
+ **kwargs
125
+ )
126
+
127
+ def _get_cache_config(self) -> Dict[str, Any]:
128
+ """
129
+ Get configuration that affects cache validity.
130
+
131
+ Returns:
132
+ Dict of config values that affect preprocessing
133
+ """
134
+ return {
135
+ 'x_column': self._x_column,
136
+ 'y_column': self._y_column,
137
+ 'highlight_column': self._highlight_column,
138
+ 'annotation_column': self._annotation_column,
139
+ }
140
+
141
+ def _get_row_group_size(self) -> int:
142
+ """
143
+ Get optimal row group size for parquet writing.
144
+
145
+ Filtered plots use smaller row groups (10K) for better predicate
146
+ pushdown granularity - this allows Polars to skip row groups that
147
+ don't contain the filter value. Unfiltered plots use larger groups
148
+ (50K) since we read all data anyway.
149
+
150
+ Returns:
151
+ Number of rows per row group
152
+ """
153
+ if self._filters:
154
+ return 10_000 # Smaller groups for better filter performance
155
+ return 50_000 # Larger groups for unfiltered plots
156
+
157
+ def _validate_mappings(self) -> None:
158
+ """Validate columns exist in data schema."""
159
+ super()._validate_mappings()
160
+
161
+ schema = self._raw_data.collect_schema()
162
+ column_names = schema.names()
163
+
164
+ # Validate x and y columns exist
165
+ for col_name, col_label in [(self._x_column, 'x_column'),
166
+ (self._y_column, 'y_column')]:
167
+ if col_name not in column_names:
168
+ raise ValueError(
169
+ f"{col_label} '{col_name}' not found in data. "
170
+ f"Available columns: {column_names}"
171
+ )
172
+
173
+ # Validate optional columns if specified
174
+ if self._highlight_column and self._highlight_column not in column_names:
175
+ raise ValueError(
176
+ f"highlight_column '{self._highlight_column}' not found in data. "
177
+ f"Available columns: {column_names}"
178
+ )
179
+
180
+ if self._annotation_column and self._annotation_column not in column_names:
181
+ raise ValueError(
182
+ f"annotation_column '{self._annotation_column}' not found in data. "
183
+ f"Available columns: {column_names}"
184
+ )
185
+
186
+ def _preprocess(self) -> None:
187
+ """
188
+ Preprocess plot data.
189
+
190
+ Sorts by filter columns for efficient predicate pushdown, then
191
+ collects the LazyFrame for caching by base class.
192
+ """
193
+ data = self._raw_data
194
+
195
+ # Sort by filter columns for efficient predicate pushdown.
196
+ # This clusters identical filter values together, enabling Polars
197
+ # to skip row groups that don't contain the target value when
198
+ # filtering by selection state.
199
+ if self._filters:
200
+ sort_columns = list(self._filters.values())
201
+ data = data.sort(sort_columns)
202
+
203
+ # Store configuration in preprocessed data for serialization
204
+ self._preprocessed_data['plot_config'] = {
205
+ 'x_column': self._x_column,
206
+ 'y_column': self._y_column,
207
+ 'highlight_column': self._highlight_column,
208
+ 'annotation_column': self._annotation_column,
209
+ }
210
+
211
+ # Collect data for caching (filter happens at render time)
212
+ # Base class will serialize this to parquet
213
+ self._preprocessed_data['data'] = data.collect()
214
+
215
+ def _get_vue_component_name(self) -> str:
216
+ """Return the Vue component name."""
217
+ return 'PlotlyLineplotUnified'
218
+
219
+ def _get_data_key(self) -> str:
220
+ """Return the key used to send primary data to Vue."""
221
+ return 'plotData'
222
+
223
+ def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
224
+ """
225
+ Prepare plot data for Vue component.
226
+
227
+ LinePlots filter based on filters mapping if provided.
228
+
229
+ Sends data as a pandas DataFrame for efficient Arrow serialization.
230
+ Vue parses the Arrow table and extracts column arrays for rendering.
231
+
232
+ Args:
233
+ state: Current selection state from StateManager
234
+
235
+ Returns:
236
+ Dict with plotData (pandas DataFrame) and _hash for change detection
237
+ """
238
+ # Build list of columns to select (projection pushdown for efficiency)
239
+ columns_to_select = [self._x_column, self._y_column]
240
+ if self._highlight_column:
241
+ columns_to_select.append(self._highlight_column)
242
+ if self._annotation_column:
243
+ columns_to_select.append(self._annotation_column)
244
+ # Include columns needed for interactivity (e.g., peak_id)
245
+ if self._interactivity:
246
+ for col in self._interactivity.values():
247
+ if col not in columns_to_select:
248
+ columns_to_select.append(col)
249
+ # Include filter columns for filtering to work
250
+ if self._filters:
251
+ for col in self._filters.values():
252
+ if col not in columns_to_select:
253
+ columns_to_select.append(col)
254
+
255
+ # Get cached data (DataFrame or LazyFrame)
256
+ data = self._preprocessed_data.get('data')
257
+ if data is None:
258
+ # Fallback to raw data if available
259
+ data = self._raw_data
260
+
261
+ # Ensure we have a LazyFrame for filtering
262
+ if isinstance(data, pl.DataFrame):
263
+ data = data.lazy()
264
+
265
+ # Use cached filter+collect - returns (pandas DataFrame, hash)
266
+ df_pandas, data_hash = filter_and_collect_cached(
267
+ data,
268
+ self._filters,
269
+ state,
270
+ columns=columns_to_select,
271
+ filter_defaults=self._filter_defaults,
272
+ )
273
+
274
+ # Determine which highlight/annotation columns to use
275
+ highlight_col = self._highlight_column
276
+ annotation_col = self._annotation_column
277
+
278
+ # Apply dynamic annotations if set
279
+ # Annotations are keyed by peak_id (stable identifier from interactivity column)
280
+ if self._dynamic_annotations and len(df_pandas) > 0:
281
+ num_rows = len(df_pandas)
282
+ highlights = [False] * num_rows
283
+ annotations = [''] * num_rows
284
+
285
+ # Get the interactivity column to use for lookup (e.g., 'peak_id')
286
+ # Use the first interactivity column as the ID column for annotation lookup
287
+ id_column = None
288
+ if self._interactivity:
289
+ id_column = list(self._interactivity.values())[0]
290
+
291
+ # Apply annotations by peak_id lookup
292
+ if id_column and id_column in df_pandas.columns:
293
+ peak_ids = df_pandas[id_column].tolist()
294
+ for row_idx, peak_id in enumerate(peak_ids):
295
+ if peak_id in self._dynamic_annotations:
296
+ ann_data = self._dynamic_annotations[peak_id]
297
+ highlights[row_idx] = ann_data.get('highlight', False)
298
+ annotations[row_idx] = ann_data.get('annotation', '')
299
+ else:
300
+ # Fallback: use row index as key (legacy behavior)
301
+ for idx, ann_data in self._dynamic_annotations.items():
302
+ if isinstance(idx, int) and 0 <= idx < num_rows:
303
+ highlights[idx] = ann_data.get('highlight', False)
304
+ annotations[idx] = ann_data.get('annotation', '')
305
+
306
+ # Add dynamic columns to dataframe
307
+ df_pandas = df_pandas.copy()
308
+ df_pandas['_dynamic_highlight'] = highlights
309
+ df_pandas['_dynamic_annotation'] = annotations
310
+
311
+ # Update column names to use dynamic columns
312
+ highlight_col = '_dynamic_highlight'
313
+ annotation_col = '_dynamic_annotation'
314
+
315
+ # Update hash to include dynamic annotation state
316
+ import hashlib
317
+ ann_hash = hashlib.md5(str(sorted(self._dynamic_annotations.keys())).encode()).hexdigest()[:8]
318
+ data_hash = f"{data_hash}_{ann_hash}"
319
+
320
+ # Send as DataFrame for Arrow serialization (efficient binary transfer)
321
+ # Vue will parse and extract columns using the config
322
+ return {
323
+ 'plotData': df_pandas,
324
+ '_hash': data_hash,
325
+ # Config tells Vue which columns map to x, y, etc.
326
+ '_plotConfig': {
327
+ 'xColumn': self._x_column,
328
+ 'yColumn': self._y_column,
329
+ 'highlightColumn': highlight_col,
330
+ 'annotationColumn': annotation_col,
331
+ 'interactivityColumns': {
332
+ col: col for col in (self._interactivity.values() if self._interactivity else [])
333
+ },
334
+ }
335
+ }
336
+
337
+ def _get_component_args(self) -> Dict[str, Any]:
338
+ """
339
+ Get component arguments to send to Vue.
340
+
341
+ Returns:
342
+ Dict with all plot configuration for Vue
343
+ """
344
+ # Default styling
345
+ default_styling = {
346
+ 'highlightColor': '#E4572E',
347
+ 'selectedColor': '#F3A712',
348
+ 'unhighlightedColor': 'lightblue',
349
+ 'highlightHiddenColor': '#1f77b4',
350
+ 'annotationColors': {
351
+ 'massButton': '#E4572E',
352
+ 'selectedMassButton': '#F3A712',
353
+ 'sequenceArrow': '#E4572E',
354
+ 'selectedSequenceArrow': '#F3A712',
355
+ 'background': '#f0f0f0',
356
+ 'buttonHover': '#e0e0e0',
357
+ }
358
+ }
359
+
360
+ # Merge user styling with defaults
361
+ styling = {**default_styling, **self._styling}
362
+ if 'annotationColors' in self._styling:
363
+ styling['annotationColors'] = {
364
+ **default_styling['annotationColors'],
365
+ **self._styling['annotationColors']
366
+ }
367
+
368
+ # Use dynamic title if set, otherwise static title
369
+ title = self._dynamic_title if self._dynamic_title else (self._title or '')
370
+
371
+ args: Dict[str, Any] = {
372
+ 'componentType': self._get_vue_component_name(),
373
+ 'title': title,
374
+ 'xLabel': self._x_label,
375
+ 'yLabel': self._y_label,
376
+ 'styling': styling,
377
+ 'config': self._plot_config,
378
+ # Pass interactivity for click handling (sets selection on peak click)
379
+ 'interactivity': self._interactivity,
380
+ # Column mappings for Arrow data parsing in Vue
381
+ 'xColumn': self._x_column,
382
+ 'yColumn': self._y_column,
383
+ 'highlightColumn': self._highlight_column,
384
+ 'annotationColumn': self._annotation_column,
385
+ }
386
+
387
+ # Add any extra config options
388
+ args.update(self._config)
389
+
390
+ return args
391
+
392
+ def with_styling(
393
+ self,
394
+ highlight_color: Optional[str] = None,
395
+ selected_color: Optional[str] = None,
396
+ unhighlighted_color: Optional[str] = None,
397
+ ) -> 'LinePlot':
398
+ """
399
+ Update plot styling.
400
+
401
+ Args:
402
+ highlight_color: Color for highlighted points
403
+ selected_color: Color for selected points
404
+ unhighlighted_color: Color for unhighlighted points
405
+
406
+ Returns:
407
+ Self for method chaining
408
+ """
409
+ if highlight_color:
410
+ self._styling['highlightColor'] = highlight_color
411
+ if selected_color:
412
+ self._styling['selectedColor'] = selected_color
413
+ if unhighlighted_color:
414
+ self._styling['unhighlightedColor'] = unhighlighted_color
415
+ return self
416
+
417
+ def with_annotations(
418
+ self,
419
+ background_color: Optional[str] = None,
420
+ button_color: Optional[str] = None,
421
+ selected_button_color: Optional[str] = None,
422
+ ) -> 'LinePlot':
423
+ """
424
+ Configure annotation styling.
425
+
426
+ Args:
427
+ background_color: Background color for annotation boxes
428
+ button_color: Color for annotation buttons
429
+ selected_button_color: Color for selected annotation buttons
430
+
431
+ Returns:
432
+ Self for method chaining
433
+ """
434
+ if 'annotationColors' not in self._styling:
435
+ self._styling['annotationColors'] = {}
436
+
437
+ if background_color:
438
+ self._styling['annotationColors']['background'] = background_color
439
+ if button_color:
440
+ self._styling['annotationColors']['massButton'] = button_color
441
+ if selected_button_color:
442
+ self._styling['annotationColors']['selectedMassButton'] = selected_button_color
443
+
444
+ return self
445
+
446
+ def set_dynamic_annotations(
447
+ self,
448
+ annotations: Optional[Dict[int, Dict[str, Any]]] = None,
449
+ title: Optional[str] = None,
450
+ ) -> 'LinePlot':
451
+ """
452
+ Set dynamic annotations to be applied at render time.
453
+
454
+ This allows updating peak annotations without recreating the component.
455
+ Annotations are keyed by the interactivity column value (e.g., peak_id),
456
+ which provides a stable identifier independent of row order.
457
+
458
+ Args:
459
+ annotations: Dict mapping peak IDs to annotation data.
460
+ Keys should match values in the first interactivity column.
461
+ Each entry should have:
462
+ - 'highlight': bool - whether to highlight this peak
463
+ - 'annotation': str - label text (e.g., "b3¹⁺")
464
+ Example: {123: {'highlight': True, 'annotation': 'b2¹⁺'}}
465
+ title: Optional dynamic title override
466
+
467
+ Returns:
468
+ Self for method chaining
469
+
470
+ Example:
471
+ # Compute annotations for current identification (keyed by peak_id)
472
+ annotations = {
473
+ 123: {'highlight': True, 'annotation': 'b2¹⁺'},
474
+ 456: {'highlight': True, 'annotation': 'b3¹⁺'},
475
+ }
476
+ spectrum_plot.set_dynamic_annotations(annotations, title="PEPTIDER")
477
+ spectrum_plot(key="plot", state_manager=sm)
478
+ """
479
+ self._dynamic_annotations = annotations
480
+ self._dynamic_title = title
481
+ return self
482
+
483
+ def clear_dynamic_annotations(self) -> 'LinePlot':
484
+ """
485
+ Clear any dynamic annotations.
486
+
487
+ Returns:
488
+ Self for method chaining
489
+ """
490
+ self._dynamic_annotations = None
491
+ self._dynamic_title = None
492
+ return self