openms-insight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,413 @@
1
+ """Base component class for all visualization components."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from datetime import datetime
5
+ import hashlib
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
9
+ import polars as pl
10
+
11
+ from .cache import CacheMissError, get_cache_dir
12
+
13
+ if TYPE_CHECKING:
14
+ from .state import StateManager
15
+
16
+ # Cache format version - increment when cache structure changes
17
+ # Version 2: Added sorting by filter columns + smaller row groups for predicate pushdown
18
+ CACHE_VERSION = 2
19
+
20
+
21
+ class BaseComponent(ABC):
22
+ """
23
+ Abstract base class for all visualization components.
24
+
25
+ Components are created with a mandatory cache_id and optional data,
26
+ filters, and interactivity mappings. Preprocessed data is automatically
27
+ cached to disk for fast subsequent loads.
28
+
29
+ Attributes:
30
+ _cache_id: Unique identifier for this component's cache
31
+ _cache_dir: Path to cache directory
32
+ _raw_data: Original polars LazyFrame (None if loaded from cache)
33
+ _filters: Dict mapping identifiers to column names for filtering
34
+ _interactivity: Dict mapping identifiers to column names for click actions
35
+ _preprocessed_data: Dict of preprocessed data structures
36
+ _config: Component configuration options
37
+ _component_type: Class-level component type identifier
38
+ """
39
+
40
+ _component_type: str = ""
41
+
42
+ def __init__(
43
+ self,
44
+ cache_id: str,
45
+ data: Optional[pl.LazyFrame] = None,
46
+ filters: Optional[Dict[str, str]] = None,
47
+ filter_defaults: Optional[Dict[str, Any]] = None,
48
+ interactivity: Optional[Dict[str, str]] = None,
49
+ cache_path: str = ".",
50
+ regenerate_cache: bool = False,
51
+ **kwargs
52
+ ):
53
+ """
54
+ Initialize the component.
55
+
56
+ Args:
57
+ cache_id: Unique identifier for this component's cache (MANDATORY).
58
+ Creates a folder {cache_path}/{cache_id}/ for cached data.
59
+ data: Polars LazyFrame with source data. Optional if cache exists.
60
+ filters: Mapping of identifier names to column names for filtering.
61
+ Example: {'spectrum': 'scan_id'}
62
+ When 'spectrum' selection exists, component filters data where
63
+ scan_id equals the selected value.
64
+ filter_defaults: Default values for filters when state is None.
65
+ Example: {'identification': -1}
66
+ When 'identification' selection is None, filter uses -1 instead.
67
+ This enables showing default/unannotated data when no selection.
68
+ interactivity: Mapping of identifier names to column names for clicks.
69
+ Example: {'my_selection': 'mass'}
70
+ When user clicks/selects, sets 'my_selection' to the clicked
71
+ row's mass value.
72
+ cache_path: Base path for cache storage. Default "." (current dir).
73
+ regenerate_cache: If True, regenerate cache even if valid cache exists.
74
+ **kwargs: Component-specific configuration options
75
+ """
76
+ self._cache_id = cache_id
77
+ self._cache_dir = get_cache_dir(cache_path, cache_id)
78
+ self._filters = filters or {}
79
+ self._filter_defaults = filter_defaults or {}
80
+ self._interactivity = interactivity or {}
81
+ self._preprocessed_data: Dict[str, Any] = {}
82
+ self._config = kwargs
83
+
84
+ # Check if we should load from cache or preprocess
85
+ if regenerate_cache or not self._is_cache_valid():
86
+ if data is None:
87
+ raise CacheMissError(
88
+ f"Cache not found at '{self._cache_dir}' and no data provided. "
89
+ f"Either provide data= or ensure cache exists from a previous run."
90
+ )
91
+ self._raw_data = data
92
+ # Validate columns exist in data
93
+ self._validate_mappings()
94
+ # Run component-specific preprocessing
95
+ self._preprocess()
96
+ # Save to cache for next time
97
+ self._save_to_cache()
98
+ else:
99
+ # Load from valid cache
100
+ self._raw_data = None
101
+ self._load_from_cache()
102
+
103
+ def _validate_mappings(self) -> None:
104
+ """Validate that filter and interactivity columns exist in the data schema."""
105
+ if self._raw_data is None:
106
+ return # Skip validation when loaded from cache
107
+
108
+ schema = self._raw_data.collect_schema()
109
+ column_names = schema.names()
110
+
111
+ for identifier, column in self._filters.items():
112
+ if column not in column_names:
113
+ raise ValueError(
114
+ f"Filter column '{column}' for identifier '{identifier}' not found in data. "
115
+ f"Available columns: {column_names}"
116
+ )
117
+
118
+ for identifier, column in self._interactivity.items():
119
+ if column not in column_names:
120
+ raise ValueError(
121
+ f"Interactivity column '{column}' for identifier '{identifier}' not found in data. "
122
+ f"Available columns: {column_names}"
123
+ )
124
+
125
+ def _get_cache_config(self) -> Dict[str, Any]:
126
+ """
127
+ Get configuration that affects cache validity.
128
+
129
+ Override in subclasses to include component-specific config.
130
+ Config changes will invalidate the cache.
131
+
132
+ Returns:
133
+ Dict of config values that affect preprocessing
134
+ """
135
+ return {}
136
+
137
+ def _compute_config_hash(self) -> str:
138
+ """Compute hash of configuration for cache validation."""
139
+ config_dict = {
140
+ "filters": self._filters,
141
+ "interactivity": self._interactivity,
142
+ **self._get_cache_config()
143
+ }
144
+ config_str = json.dumps(config_dict, sort_keys=True, default=str)
145
+ return hashlib.sha256(config_str.encode()).hexdigest()
146
+
147
+ def _get_manifest_path(self) -> Path:
148
+ """Get path to cache manifest file."""
149
+ return self._cache_dir / "manifest.json"
150
+
151
+ def _get_preprocessed_dir(self) -> Path:
152
+ """Get path to preprocessed data directory."""
153
+ return self._cache_dir / "preprocessed"
154
+
155
+ def _is_cache_valid(self) -> bool:
156
+ """
157
+ Check if cache is valid and can be loaded.
158
+
159
+ Cache is valid when:
160
+ 1. manifest.json exists
161
+ 2. version matches current CACHE_VERSION
162
+ 3. component_type matches
163
+ 4. config_hash matches current config
164
+ """
165
+ manifest_path = self._get_manifest_path()
166
+ if not manifest_path.exists():
167
+ return False
168
+
169
+ try:
170
+ with open(manifest_path) as f:
171
+ manifest = json.load(f)
172
+ except (json.JSONDecodeError, IOError):
173
+ return False
174
+
175
+ # Check version
176
+ if manifest.get("version") != CACHE_VERSION:
177
+ return False
178
+
179
+ # Check component type
180
+ if manifest.get("component_type") != self._component_type:
181
+ return False
182
+
183
+ # Check config hash
184
+ current_hash = self._compute_config_hash()
185
+ if manifest.get("config_hash") != current_hash:
186
+ return False
187
+
188
+ return True
189
+
190
+ def _load_from_cache(self) -> None:
191
+ """Load preprocessed data from cache."""
192
+ manifest_path = self._get_manifest_path()
193
+ preprocessed_dir = self._get_preprocessed_dir()
194
+
195
+ with open(manifest_path) as f:
196
+ manifest = json.load(f)
197
+
198
+ # Load filters and interactivity from manifest
199
+ self._filters = manifest.get("filters", {})
200
+ self._interactivity = manifest.get("interactivity", {})
201
+
202
+ # Load preprocessed data files
203
+ data_files = manifest.get("data_files", {})
204
+ for key, filename in data_files.items():
205
+ filepath = preprocessed_dir / filename
206
+ if filepath.exists():
207
+ self._preprocessed_data[key] = pl.scan_parquet(filepath)
208
+
209
+ # Load simple values
210
+ data_values = manifest.get("data_values", {})
211
+ for key, value in data_values.items():
212
+ self._preprocessed_data[key] = value
213
+
214
+ def _save_to_cache(self) -> None:
215
+ """Save preprocessed data to cache."""
216
+ # Create directories
217
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
218
+ preprocessed_dir = self._get_preprocessed_dir()
219
+ preprocessed_dir.mkdir(parents=True, exist_ok=True)
220
+
221
+ # Prepare manifest
222
+ manifest = {
223
+ "version": CACHE_VERSION,
224
+ "component_type": self._component_type,
225
+ "created_at": datetime.now().isoformat(),
226
+ "config_hash": self._compute_config_hash(),
227
+ "config": self._get_cache_config(),
228
+ "filters": self._filters,
229
+ "interactivity": self._interactivity,
230
+ "data_files": {},
231
+ "data_values": {},
232
+ }
233
+
234
+ # Save preprocessed data
235
+ row_group_size = self._get_row_group_size()
236
+ for key, value in self._preprocessed_data.items():
237
+ if isinstance(value, pl.LazyFrame):
238
+ filename = f"{key}.parquet"
239
+ filepath = preprocessed_dir / filename
240
+ value.collect().write_parquet(
241
+ filepath,
242
+ compression='zstd',
243
+ statistics=True,
244
+ row_group_size=row_group_size,
245
+ )
246
+ manifest["data_files"][key] = filename
247
+ elif isinstance(value, pl.DataFrame):
248
+ filename = f"{key}.parquet"
249
+ filepath = preprocessed_dir / filename
250
+ value.write_parquet(
251
+ filepath,
252
+ compression='zstd',
253
+ statistics=True,
254
+ row_group_size=row_group_size,
255
+ )
256
+ manifest["data_files"][key] = filename
257
+ elif self._is_json_serializable(value):
258
+ manifest["data_values"][key] = value
259
+
260
+ # Write manifest
261
+ with open(self._get_manifest_path(), "w") as f:
262
+ json.dump(manifest, f, indent=2)
263
+
264
+ def _is_json_serializable(self, value: Any) -> bool:
265
+ """Check if value can be JSON serialized."""
266
+ try:
267
+ json.dumps(value)
268
+ return True
269
+ except (TypeError, ValueError):
270
+ return False
271
+
272
+ def _get_row_group_size(self) -> int:
273
+ """
274
+ Get optimal row group size for parquet writing.
275
+
276
+ Smaller row groups enable better predicate pushdown when filtering,
277
+ but increase metadata overhead. Override in subclasses for
278
+ component-specific tuning.
279
+
280
+ Returns:
281
+ Number of rows per row group (default: 50,000)
282
+ """
283
+ return 50_000
284
+
285
+ @abstractmethod
286
+ def _preprocess(self) -> None:
287
+ """
288
+ Run component-specific preprocessing.
289
+
290
+ This method should populate self._preprocessed_data with any
291
+ preprocessed data structures needed for rendering.
292
+ """
293
+ pass
294
+
295
+ @abstractmethod
296
+ def _get_vue_component_name(self) -> str:
297
+ """Return the Vue component name to render (e.g., 'TabulatorTable')."""
298
+ pass
299
+
300
+ @abstractmethod
301
+ def _get_data_key(self) -> str:
302
+ """Return the key used to send primary data to Vue."""
303
+ pass
304
+
305
+ @abstractmethod
306
+ def _prepare_vue_data(
307
+ self,
308
+ state: Dict[str, Any]
309
+ ) -> Dict[str, Any]:
310
+ """
311
+ Prepare data payload for Vue component.
312
+
313
+ Args:
314
+ state: Current selection state from StateManager
315
+
316
+ Returns:
317
+ Dict with data to send to Vue component
318
+ """
319
+ pass
320
+
321
+ @abstractmethod
322
+ def _get_component_args(self) -> Dict[str, Any]:
323
+ """
324
+ Get component arguments to send to Vue.
325
+
326
+ Returns:
327
+ Dict with component configuration for Vue
328
+ """
329
+ pass
330
+
331
+ def get_filters_mapping(self) -> Dict[str, str]:
332
+ """Return the filters identifier-to-column mapping."""
333
+ return self._filters.copy()
334
+
335
+ def get_filter_defaults(self) -> Dict[str, Any]:
336
+ """Return the filter defaults mapping."""
337
+ return self._filter_defaults.copy()
338
+
339
+ def get_interactivity_mapping(self) -> Dict[str, str]:
340
+ """Return the interactivity identifier-to-column mapping."""
341
+ return self._interactivity.copy()
342
+
343
+ def get_filter_identifiers(self) -> List[str]:
344
+ """Return list of filter identifiers this component uses."""
345
+ return list(self._filters.keys())
346
+
347
+ def get_interactivity_identifiers(self) -> List[str]:
348
+ """Return list of interactivity identifiers this component sets."""
349
+ return list(self._interactivity.keys())
350
+
351
+ def get_state_dependencies(self) -> List[str]:
352
+ """
353
+ Return list of state keys that affect this component's data.
354
+
355
+ By default, returns filter identifiers. Override in subclasses
356
+ to include additional state keys (e.g., zoom state for heatmaps).
357
+
358
+ The returned keys are used in the cache key calculation, so
359
+ changes to any of these state values will trigger data recomputation.
360
+
361
+ Returns:
362
+ List of state identifier keys
363
+ """
364
+ return list(self._filters.keys())
365
+
366
+ def _get_primary_data(self) -> Optional[pl.LazyFrame]:
367
+ """
368
+ Get the primary data for operations.
369
+
370
+ Override in subclasses for complex data structures.
371
+ Returns None if component was loaded from cache without raw data.
372
+ """
373
+ return self._raw_data
374
+
375
+ def __call__(
376
+ self,
377
+ key: Optional[str] = None,
378
+ state_manager: Optional['StateManager'] = None,
379
+ height: Optional[int] = None
380
+ ) -> Any:
381
+ """
382
+ Render the component in Streamlit.
383
+
384
+ Args:
385
+ key: Optional unique key for the Streamlit component
386
+ state_manager: Optional StateManager for cross-component state.
387
+ If not provided, uses a default shared StateManager.
388
+ height: Optional height in pixels for the component
389
+
390
+ Returns:
391
+ The value returned by the Vue component (usually selection state)
392
+ """
393
+ from .state import get_default_state_manager
394
+ from ..rendering.bridge import render_component
395
+
396
+ if state_manager is None:
397
+ state_manager = get_default_state_manager()
398
+
399
+ return render_component(
400
+ component=self,
401
+ state_manager=state_manager,
402
+ key=key,
403
+ height=height
404
+ )
405
+
406
+ def __repr__(self) -> str:
407
+ return (
408
+ f"{self.__class__.__name__}("
409
+ f"cache_id='{self._cache_id}', "
410
+ f"filters={self._filters}, "
411
+ f"interactivity={self._interactivity}, "
412
+ f"config={self._config})"
413
+ )
@@ -0,0 +1,39 @@
1
+ """Caching utilities for components.
2
+
3
+ This module provides the infrastructure for automatic component caching:
4
+ - CacheMissError: Exception raised when cache not found and data not provided
5
+ - get_cache_dir: Utility to compute cache directory path
6
+ """
7
+
8
+ from pathlib import Path
9
+
10
+
11
+ class CacheMissError(Exception):
12
+ """Raised when cache not found and no data provided.
13
+
14
+ This error is raised when:
15
+ 1. A component is instantiated without the `data` parameter
16
+ 2. No valid cache exists at the specified cache_path/cache_id location
17
+
18
+ To resolve, either:
19
+ - Provide the `data` parameter to create the cache
20
+ - Ensure the cache exists from a previous run
21
+ - Set `regenerate_cache=True` and provide `data` to rebuild
22
+ """
23
+
24
+ pass
25
+
26
+
27
+ def get_cache_dir(cache_path: str, cache_id: str) -> Path:
28
+ """Get cache directory for a component.
29
+
30
+ The cache directory structure is: {cache_path}/{cache_id}/
31
+
32
+ Args:
33
+ cache_path: Base path for cache storage (default "." for current dir)
34
+ cache_id: Unique identifier for this component's cache
35
+
36
+ Returns:
37
+ Path object pointing to the component's cache directory
38
+ """
39
+ return Path(cache_path) / cache_id
@@ -0,0 +1,82 @@
1
+ """Component type registry for serialization and deserialization."""
2
+
3
+ from typing import Dict, Type, TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from .base import BaseComponent
7
+
8
+ # Global registry mapping component type names to their classes
9
+ _COMPONENT_REGISTRY: Dict[str, Type['BaseComponent']] = {}
10
+
11
+
12
+ def register_component(name: str):
13
+ """
14
+ Decorator to register a component class in the registry.
15
+
16
+ Args:
17
+ name: Unique name for the component type (e.g., 'table', 'lineplot')
18
+
19
+ Returns:
20
+ Decorator function
21
+
22
+ Example:
23
+ @register_component("table")
24
+ class Table(BaseComponent):
25
+ ...
26
+ """
27
+ def decorator(cls: Type['BaseComponent']) -> Type['BaseComponent']:
28
+ if name in _COMPONENT_REGISTRY:
29
+ raise ValueError(
30
+ f"Component type '{name}' is already registered to "
31
+ f"{_COMPONENT_REGISTRY[name].__name__}"
32
+ )
33
+ _COMPONENT_REGISTRY[name] = cls
34
+ cls._component_type = name
35
+ return cls
36
+
37
+ return decorator
38
+
39
+
40
+ def get_component_class(name: str) -> Type['BaseComponent']:
41
+ """
42
+ Get a component class by its registered name.
43
+
44
+ Args:
45
+ name: The registered component type name
46
+
47
+ Returns:
48
+ The component class
49
+
50
+ Raises:
51
+ KeyError: If no component is registered with that name
52
+ """
53
+ if name not in _COMPONENT_REGISTRY:
54
+ available = list(_COMPONENT_REGISTRY.keys())
55
+ raise KeyError(
56
+ f"No component registered with name '{name}'. "
57
+ f"Available components: {available}"
58
+ )
59
+ return _COMPONENT_REGISTRY[name]
60
+
61
+
62
+ def list_registered_components() -> Dict[str, Type['BaseComponent']]:
63
+ """
64
+ Get all registered component types.
65
+
66
+ Returns:
67
+ Dict mapping component names to their classes
68
+ """
69
+ return _COMPONENT_REGISTRY.copy()
70
+
71
+
72
+ def is_registered(name: str) -> bool:
73
+ """
74
+ Check if a component type is registered.
75
+
76
+ Args:
77
+ name: The component type name to check
78
+
79
+ Returns:
80
+ True if registered, False otherwise
81
+ """
82
+ return name in _COMPONENT_REGISTRY