openms-insight 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ def _make_hashable(value: Any) -> Any:
29
29
  return json.dumps(value)
30
30
  return value
31
31
 
32
+
32
33
  if TYPE_CHECKING:
33
34
  from ..core.base import BaseComponent
34
35
  from ..core.state import StateManager
@@ -46,6 +47,10 @@ _VUE_ECHOED_HASH_KEY = "_svc_vue_echoed_hashes"
46
47
  # Each component stores exactly one entry (current filter state)
47
48
  _COMPONENT_DATA_CACHE_KEY = "_svc_component_data_cache"
48
49
 
50
+ # Session state key for component annotations (from Vue)
51
+ # Stores annotation dataframes returned by components like SequenceView
52
+ _COMPONENT_ANNOTATIONS_KEY = "_svc_component_annotations"
53
+
49
54
 
50
55
  def _get_component_cache() -> Dict[str, Any]:
51
56
  """Get per-component data cache from session state."""
@@ -64,6 +69,55 @@ def clear_component_cache() -> None:
64
69
  st.session_state[_COMPONENT_DATA_CACHE_KEY].clear()
65
70
 
66
71
 
72
+ def _store_component_annotations(
73
+ component_key: str, annotations: Dict[str, Any]
74
+ ) -> None:
75
+ """
76
+ Store annotations returned by a Vue component.
77
+
78
+ Args:
79
+ component_key: Unique key for the component
80
+ annotations: Dict with annotation arrays (e.g., peak_id, highlight_color, annotation)
81
+ """
82
+ if _COMPONENT_ANNOTATIONS_KEY not in st.session_state:
83
+ st.session_state[_COMPONENT_ANNOTATIONS_KEY] = {}
84
+ st.session_state[_COMPONENT_ANNOTATIONS_KEY][component_key] = annotations
85
+
86
+
87
+ def get_component_annotations(component_key: Optional[str]) -> Optional[pl.DataFrame]:
88
+ """
89
+ Get annotations stored by a Vue component.
90
+
91
+ Args:
92
+ component_key: Unique key for the component
93
+
94
+ Returns:
95
+ Polars DataFrame with annotations, or None if not available
96
+ """
97
+ if component_key is None:
98
+ return None
99
+
100
+ if _COMPONENT_ANNOTATIONS_KEY not in st.session_state:
101
+ return None
102
+
103
+ annotations = st.session_state[_COMPONENT_ANNOTATIONS_KEY].get(component_key)
104
+ if annotations is None:
105
+ return None
106
+
107
+ # Convert to DataFrame
108
+ try:
109
+ # annotations should be a dict with arrays: {peak_id: [...], highlight_color: [...], annotation: [...]}
110
+ return pl.DataFrame(annotations)
111
+ except Exception:
112
+ return None
113
+
114
+
115
+ def clear_component_annotations() -> None:
116
+ """Clear all component annotations."""
117
+ if _COMPONENT_ANNOTATIONS_KEY in st.session_state:
118
+ st.session_state[_COMPONENT_ANNOTATIONS_KEY].clear()
119
+
120
+
67
121
  def _get_cached_vue_data(
68
122
  component_id: str,
69
123
  filter_state_hashable: Tuple[Tuple[str, Any], ...],
@@ -111,7 +165,7 @@ def _set_cached_vue_data(
111
165
 
112
166
 
113
167
  def _prepare_vue_data_cached(
114
- component: 'BaseComponent',
168
+ component: "BaseComponent",
115
169
  component_id: str,
116
170
  filter_state_hashable: Tuple[Tuple[str, Any], ...],
117
171
  state_dict: Dict[str, Any],
@@ -122,6 +176,11 @@ def _prepare_vue_data_cached(
122
176
  Each component caches exactly one entry (its current filter state).
123
177
  When filter state changes, old entry is replaced - memory stays bounded.
124
178
 
179
+ For components with dynamic annotations (e.g., LinePlot linked to SequenceView):
180
+ - Cache stores BASE data (without annotation columns)
181
+ - Annotations are re-applied fresh each render (cheap operation)
182
+ - Final hash reflects current annotation state
183
+
125
184
  Args:
126
185
  component: The component to prepare data for
127
186
  component_id: Unique identifier for this component
@@ -131,19 +190,56 @@ def _prepare_vue_data_cached(
131
190
  Returns:
132
191
  Tuple of (vue_data dict, data_hash string)
133
192
  """
134
- # Check cache first
193
+ # Check if component has dynamic annotations (e.g., LinePlot linked to SequenceView)
194
+ has_dynamic_annotations = (
195
+ getattr(component, "_dynamic_annotations", None) is not None
196
+ )
197
+
198
+ # Try cache first (works for ALL components now)
135
199
  cached = _get_cached_vue_data(component_id, filter_state_hashable)
200
+
136
201
  if cached is not None:
137
- return cached
202
+ cached_data, cached_hash = cached
203
+
204
+ if has_dynamic_annotations:
205
+ # Cache hit but need to re-apply annotations (they may have changed)
206
+ # Use component method to apply fresh annotations to cached base data
207
+ if hasattr(component, "_apply_fresh_annotations"):
208
+ # Shallow copy to avoid mutating cache
209
+ vue_data = component._apply_fresh_annotations(dict(cached_data))
210
+ # Hash final data (includes new annotations)
211
+ data_hash = _hash_data(vue_data)
212
+ return vue_data, data_hash
213
+ else:
214
+ # Fallback: recompute if component doesn't support fresh annotations
215
+ vue_data = component._prepare_vue_data(state_dict)
216
+ data_hash = _hash_data(vue_data)
217
+ return vue_data, data_hash
218
+ else:
219
+ # No dynamic annotations - return cached as-is
220
+ return cached_data, cached_hash
138
221
 
139
222
  # Cache miss - compute data
140
223
  vue_data = component._prepare_vue_data(state_dict)
141
- data_hash = _hash_data(vue_data)
142
224
 
143
- # Store in cache (replaces any previous entry for this component)
144
- _set_cached_vue_data(component_id, filter_state_hashable, vue_data, data_hash)
145
-
146
- return vue_data, data_hash
225
+ if has_dynamic_annotations:
226
+ # Store BASE data (without dynamic annotation columns) in cache
227
+ if hasattr(component, "_strip_dynamic_columns"):
228
+ base_data = component._strip_dynamic_columns(vue_data)
229
+ else:
230
+ # Fallback: store without _plotConfig (may have stale column refs)
231
+ base_data = {k: v for k, v in vue_data.items() if k != "_plotConfig"}
232
+ base_hash = _hash_data(base_data)
233
+ _set_cached_vue_data(component_id, filter_state_hashable, base_data, base_hash)
234
+
235
+ # Return full data with annotations
236
+ data_hash = _hash_data(vue_data)
237
+ return vue_data, data_hash
238
+ else:
239
+ # Store complete data in cache
240
+ data_hash = _hash_data(vue_data)
241
+ _set_cached_vue_data(component_id, filter_state_hashable, vue_data, data_hash)
242
+ return vue_data, data_hash
147
243
 
148
244
 
149
245
  def get_vue_component_function():
@@ -159,11 +255,11 @@ def get_vue_component_function():
159
255
  import streamlit.components.v1 as st_components
160
256
 
161
257
  # Check for development mode
162
- dev_mode = os.environ.get('SVC_DEV_MODE', 'false').lower() == 'true'
258
+ dev_mode = os.environ.get("SVC_DEV_MODE", "false").lower() == "true"
163
259
 
164
260
  if dev_mode:
165
261
  # Development mode: connect to Vite dev server
166
- dev_url = os.environ.get('SVC_DEV_URL', 'http://localhost:5173')
262
+ dev_url = os.environ.get("SVC_DEV_URL", "http://localhost:5173")
167
263
  _vue_component_func = st_components.declare_component(
168
264
  "streamlit_vue_component",
169
265
  url=dev_url,
@@ -171,7 +267,7 @@ def get_vue_component_function():
171
267
  else:
172
268
  # Production mode: use built component
173
269
  parent_dir = os.path.dirname(os.path.abspath(__file__))
174
- build_dir = os.path.join(parent_dir, '..', 'js-component', 'dist')
270
+ build_dir = os.path.join(parent_dir, "..", "js-component", "dist")
175
271
 
176
272
  if not os.path.exists(build_dir):
177
273
  raise RuntimeError(
@@ -189,8 +285,8 @@ def get_vue_component_function():
189
285
 
190
286
 
191
287
  def render_component(
192
- component: 'BaseComponent',
193
- state_manager: 'StateManager',
288
+ component: "BaseComponent",
289
+ state_manager: "StateManager",
194
290
  key: Optional[str] = None,
195
291
  height: Optional[int] = None,
196
292
  ) -> Any:
@@ -223,15 +319,30 @@ def render_component(
223
319
  if key is None:
224
320
  key = f"svc_{component._cache_id}_{hash(str(component._interactivity))}"
225
321
 
322
+ # Check if component has required filters without values
323
+ # Don't send potentially huge unfiltered datasets - wait for filter selection
324
+ filters = getattr(component, "_filters", None) or {}
325
+ filter_defaults = getattr(component, "_filter_defaults", None) or {}
326
+
327
+ awaiting_filter = False
328
+ if filters:
329
+ # Check each filter - if no value AND no default, we're waiting
330
+ for identifier in filters.keys():
331
+ filter_value = state.get(identifier)
332
+ has_default = identifier in filter_defaults
333
+ if filter_value is None and not has_default:
334
+ awaiting_filter = True
335
+ break
336
+
226
337
  # Extract state keys that affect this component's data for cache key
227
338
  # This includes filters and any additional dependencies (e.g., zoom for heatmaps)
228
339
  # Uses get_state_dependencies() which can be overridden by subclasses
229
340
  state_keys = set(component.get_state_dependencies())
230
341
 
231
342
  # Build hashable version for cache key (converts dicts/lists to JSON strings)
232
- filter_state_hashable = tuple(sorted(
233
- (k, _make_hashable(state.get(k))) for k in state_keys
234
- ))
343
+ filter_state_hashable = tuple(
344
+ sorted((k, _make_hashable(state.get(k))) for k in state_keys)
345
+ )
235
346
 
236
347
  # Build original state dict for passing to _prepare_vue_data
237
348
  # (contains actual values, not JSON strings)
@@ -241,13 +352,19 @@ def render_component(
241
352
  component_type = component._get_vue_component_name()
242
353
  component_id = f"{component_type}:{key}"
243
354
 
244
- # Get component data using per-component cache
245
- # Each component stores exactly one entry (current filter state)
246
- # - Filterless components: filter_state=() always → always cache hit
247
- # - Filtered components: cache hit when filter values unchanged
248
- vue_data, data_hash = _prepare_vue_data_cached(
249
- component, component_id, filter_state_hashable, relevant_state
250
- )
355
+ # Skip data preparation if awaiting required filter selection
356
+ # This prevents sending huge unfiltered datasets
357
+ if awaiting_filter:
358
+ vue_data = {}
359
+ data_hash = "awaiting_filter"
360
+ else:
361
+ # Get component data using per-component cache
362
+ # Each component stores exactly one entry (current filter state)
363
+ # - Filterless components: filter_state=() always → always cache hit
364
+ # - Filtered components: cache hit when filter values unchanged
365
+ vue_data, data_hash = _prepare_vue_data_cached(
366
+ component, component_id, filter_state_hashable, relevant_state
367
+ )
251
368
 
252
369
  component_args = component._get_component_args()
253
370
 
@@ -268,17 +385,19 @@ def render_component(
268
385
  # Vue echoes null/None if it has no data, so mismatch triggers send
269
386
  # IMPORTANT: Also send data if vue_echoed_hash is None - this means Vue
270
387
  # hasn't confirmed receipt yet (e.g., after page navigation destroys Vue component)
388
+ # NOTE: Hash now correctly reflects annotation state (annotations included in hash),
389
+ # so normal comparison works for all components including those with dynamic annotations
271
390
  data_changed = (vue_echoed_hash is None) or (vue_echoed_hash != data_hash)
272
391
 
273
392
  # Only include full data if hash changed
274
393
  if data_changed:
275
394
  # Convert any non-pandas data to pandas for Arrow serialization
276
395
  # pandas DataFrames are passed through (already optimal for Arrow)
277
- # Also filter out internal keys (starting with _)
396
+ # Filter out _hash (internal metadata) but keep _plotConfig (needed by Vue)
278
397
  converted_data = {}
279
398
  for data_key, value in vue_data.items():
280
- if data_key.startswith('_'):
281
- # Skip metadata keys like _hash, _plotConfig
399
+ if data_key == "_hash":
400
+ # Skip internal hash metadata
282
401
  continue
283
402
  if isinstance(value, pl.LazyFrame):
284
403
  converted_data[data_key] = value.collect().to_pandas()
@@ -289,9 +408,10 @@ def render_component(
289
408
  # pandas DataFrames pass through unchanged (optimal for Arrow)
290
409
  data_payload = {
291
410
  **converted_data,
292
- 'selection_store': state,
293
- 'hash': data_hash,
294
- 'dataChanged': True,
411
+ "selection_store": state,
412
+ "hash": data_hash,
413
+ "dataChanged": True,
414
+ "awaitingFilter": awaiting_filter,
295
415
  }
296
416
  # Note: We don't pre-set the hash here anymore. We trust Vue's echo
297
417
  # at the end of the render cycle. This ensures we detect when Vue
@@ -299,28 +419,29 @@ def render_component(
299
419
  else:
300
420
  # Data unchanged - only send hash and state, Vue will use cached data
301
421
  data_payload = {
302
- 'selection_store': state,
303
- 'hash': data_hash,
304
- 'dataChanged': False,
422
+ "selection_store": state,
423
+ "hash": data_hash,
424
+ "dataChanged": False,
425
+ "awaitingFilter": awaiting_filter,
305
426
  }
306
427
 
307
428
  # Add height to component args if specified
308
429
  if height is not None:
309
- component_args['height'] = height
430
+ component_args["height"] = height
310
431
 
311
432
  # Component layout: [[{componentArgs: {...}}]]
312
- components = [[{'componentArgs': component_args}]]
433
+ components = [[{"componentArgs": component_args}]]
313
434
 
314
435
  # Call Vue component
315
436
  vue_func = get_vue_component_function()
316
437
 
317
438
  kwargs = {
318
- 'components': components,
319
- 'key': key,
439
+ "components": components,
440
+ "key": key,
320
441
  **data_payload,
321
442
  }
322
443
  if height is not None:
323
- kwargs['height'] = height
444
+ kwargs["height"] = height
324
445
 
325
446
  result = vue_func(**kwargs)
326
447
 
@@ -329,11 +450,39 @@ def render_component(
329
450
  # Store Vue's echoed hash for next render comparison
330
451
  # ALWAYS update from Vue's echo - if Vue lost its data (page navigation),
331
452
  # it echoes None, and we need to know that to resend data next time
332
- vue_hash = result.get('_vueDataHash')
453
+ vue_hash = result.get("_vueDataHash")
333
454
  st.session_state[_VUE_ECHOED_HASH_KEY][hash_tracking_key] = vue_hash
334
455
 
335
- # Update state and rerun if state changed
336
- if state_manager.update_from_vue(result):
456
+ # Capture annotations from Vue (e.g., from SequenceView)
457
+ # Use hash-based change detection for robustness
458
+ annotations = result.get("_annotations")
459
+ annotations_changed = False
460
+
461
+ if annotations is not None:
462
+ # Compute hash of new annotations
463
+ peak_ids = annotations.get("peak_id", [])
464
+ new_hash = hash(tuple(peak_ids)) if peak_ids else 0
465
+
466
+ # Compare with stored hash
467
+ ann_hash_key = f"_svc_ann_hash_{key}"
468
+ old_hash = st.session_state.get(ann_hash_key)
469
+
470
+ if old_hash != new_hash:
471
+ annotations_changed = True
472
+ st.session_state[ann_hash_key] = new_hash
473
+
474
+ _store_component_annotations(key, annotations)
475
+ else:
476
+ # Annotations cleared - check if we had annotations before
477
+ ann_hash_key = f"_svc_ann_hash_{key}"
478
+ if st.session_state.get(ann_hash_key) is not None:
479
+ annotations_changed = True
480
+ st.session_state[ann_hash_key] = None
481
+
482
+ # Update state and rerun if state changed OR annotations changed
483
+ # Hash comparison will naturally detect changes on the next render
484
+ state_changed = state_manager.update_from_vue(result)
485
+ if state_changed or annotations_changed:
337
486
  st.rerun()
338
487
 
339
488
  return result
@@ -356,8 +505,9 @@ def _hash_data(data: Dict[str, Any]) -> str:
356
505
 
357
506
  hash_parts = []
358
507
  for key, value in sorted(data.items()):
359
- if key.startswith('_'):
360
- continue # Skip metadata
508
+ # Skip internal metadata but NOT dynamic annotation columns
509
+ if key.startswith("_") and not key.startswith("_dynamic"):
510
+ continue
361
511
  if isinstance(value, pd.DataFrame):
362
512
  # Efficient hash for DataFrames
363
513
  df_polars = pl.from_pandas(value)
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openms-insight
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Interactive visualization components for mass spectrometry data in Streamlit
5
5
  Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
6
6
  Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
7
7
  Project-URL: Repository, https://github.com/t0mdavid-m/OpenMS-Insight
8
8
  Project-URL: Issues, https://github.com/t0mdavid-m/OpenMS-Insight/issues
9
- Author: Kohlbacher Lab
9
+ Author: Tom David Müller
10
10
  License-Expression: BSD-3-Clause
11
11
  License-File: LICENSE
12
12
  Keywords: mass-spectrometry,openms,plotly,proteomics,streamlit,tabulator,visualization,vue
@@ -37,6 +37,7 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  [![PyPI version](https://badge.fury.io/py/openms-insight.svg)](https://badge.fury.io/py/openms-insight)
39
39
  [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
40
+ [![Tests](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml/badge.svg)](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
40
41
 
41
42
  Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
42
43
 
@@ -45,10 +46,11 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
45
46
  - **Cross-component selection linking** via shared identifiers
46
47
  - **Memory-efficient preprocessing** via subprocess isolation
47
48
  - **Automatic disk caching** with config-based invalidation
48
- - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
49
+ - **Cache reconstruction** - components can be restored from cache without re-specifying configuration
50
+ - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
49
51
  - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
50
- - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling
51
- - **Sequence view component** for peptide/protein visualization
52
+ - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
53
+ - **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
52
54
 
53
55
  ## Installation
54
56
 
@@ -90,9 +92,10 @@ plot(state_manager=state_manager)
90
92
 
91
93
  ## Cross-Component Linking
92
94
 
93
- Components communicate through **identifiers** using two mechanisms:
95
+ Components communicate through **identifiers** using three mechanisms:
94
96
 
95
97
  - **`filters`**: INPUT - filter this component's data by the selection
98
+ - **`filter_defaults`**: INPUT - default value when selection is None
96
99
  - **`interactivity`**: OUTPUT - set a selection when user clicks
97
100
 
98
101
  ```python
@@ -120,6 +123,14 @@ plot = LinePlot(
120
123
  x_column='mass',
121
124
  y_column='intensity',
122
125
  )
126
+
127
+ # Table with filter defaults - shows unannotated data when no identification selected
128
+ annotations = Table(
129
+ cache_id="annotations",
130
+ data_path="annotations.parquet",
131
+ filters={'identification': 'id_idx'},
132
+ filter_defaults={'identification': -1}, # Use -1 when identification is None
133
+ )
123
134
  ```
124
135
 
125
136
  ---
@@ -137,17 +148,27 @@ Table(
137
148
  interactivity={'spectrum': 'scan_id'},
138
149
  column_definitions=[
139
150
  {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
140
- {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right'},
151
+ {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
152
+ 'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
141
153
  {'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
142
154
  ],
143
155
  index_field='scan_id',
144
156
  go_to_fields=['scan_id'],
157
+ initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
145
158
  default_row=0,
146
159
  pagination=True,
147
- page_size=50,
160
+ page_size=100,
148
161
  )
149
162
  ```
150
163
 
164
+ **Key parameters:**
165
+ - `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
166
+ - `index_field`: Column used as unique row identifier (default: 'id')
167
+ - `go_to_fields`: Columns available in "Go to" navigation
168
+ - `initial_sort`: Default sort configuration
169
+ - `pagination`: Enable pagination for large tables (default: True)
170
+ - `page_size`: Rows per page (default: 100)
171
+
151
172
  ### LinePlot
152
173
 
153
174
  Stick-style line plot using Plotly.js for mass spectra visualization.
@@ -165,9 +186,20 @@ LinePlot(
165
186
  title="MS/MS Spectrum",
166
187
  x_label="m/z",
167
188
  y_label="Intensity",
189
+ styling={
190
+ 'highlightColor': '#E4572E',
191
+ 'selectedColor': '#F3A712',
192
+ 'unhighlightedColor': 'lightblue',
193
+ },
168
194
  )
169
195
  ```
170
196
 
197
+ **Key parameters:**
198
+ - `x_column`, `y_column`: Column names for x/y values
199
+ - `highlight_column`: Boolean/int column indicating which points to highlight
200
+ - `annotation_column`: Text column for labels on highlighted points
201
+ - `styling`: Color configuration dict
202
+
171
203
  ### Heatmap
172
204
 
173
205
  2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
@@ -181,28 +213,67 @@ Heatmap(
181
213
  intensity_column='intensity',
182
214
  interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
183
215
  min_points=30000,
216
+ x_bins=400,
217
+ y_bins=50,
184
218
  title="Peak Map",
185
219
  x_label="Retention Time (min)",
186
220
  y_label="m/z",
221
+ colorscale='Portland',
187
222
  )
188
223
  ```
189
224
 
225
+ **Key parameters:**
226
+ - `x_column`, `y_column`, `intensity_column`: Column names for axes and color
227
+ - `min_points`: Target size for downsampling (default: 20000)
228
+ - `x_bins`, `y_bins`: Grid resolution for spatial binning
229
+ - `colorscale`: Plotly colorscale name (default: 'Portland')
230
+
190
231
  ### SequenceView
191
232
 
192
- Peptide/protein sequence visualization with fragment ion matching.
233
+ Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
193
234
 
194
235
  ```python
236
+ # Dynamic: sequence from DataFrame filtered by selection
195
237
  SequenceView(
196
238
  cache_id="peptide_view",
197
- sequence="PEPTIDEK",
198
- observed_masses=[147.1, 244.2, 359.3, 456.4],
199
- peak_ids=[0, 1, 2, 3],
200
- precursor_mass=944.5,
239
+ sequence_data_path="sequences.parquet", # columns: scan_id, sequence, precursor_charge
240
+ peaks_data_path="peaks.parquet", # columns: scan_id, peak_id, mass, intensity
241
+ filters={'spectrum': 'scan_id'},
201
242
  interactivity={'peak': 'peak_id'},
243
+ deconvolved=False, # peaks are m/z values, consider charge states
202
244
  title="Fragment Coverage",
203
245
  )
246
+
247
+ # Static: single sequence with optional peaks
248
+ SequenceView(
249
+ cache_id="static_peptide",
250
+ sequence_data=("PEPTIDEK", 2), # (sequence, charge) tuple
251
+ peaks_data=peaks_df, # Optional: LazyFrame with mass, intensity columns
252
+ deconvolved=True, # peaks are neutral masses
253
+ )
254
+
255
+ # Simplest: just a sequence string
256
+ SequenceView(
257
+ cache_id="simple_seq",
258
+ sequence_data="PEPTIDEK", # charge defaults to 1
259
+ )
204
260
  ```
205
261
 
262
+ **Key parameters:**
263
+ - `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
264
+ - `sequence_data_path`: Path to parquet with sequence data
265
+ - `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
266
+ - `deconvolved`: If False (default), peaks are m/z and matching considers charge states
267
+ - `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
268
+
269
+ **Features:**
270
+ - Automatic fragment ion matching (a/b/c/x/y/z ions)
271
+ - Configurable mass tolerance (ppm or Da)
272
+ - Neutral loss support (-H2O, -NH3)
273
+ - Auto-zoom for short sequences (≤20 amino acids)
274
+ - Fragment coverage statistics
275
+ - Click-to-select peaks with cross-component linking
276
+
206
277
  ---
207
278
 
208
279
  ## Shared Component Arguments
@@ -212,11 +283,59 @@ All components accept these common arguments:
212
283
  | Argument | Type | Default | Description |
213
284
  |----------|------|---------|-------------|
214
285
  | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
215
- | `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
216
- | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
286
+ | `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
287
+ | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
217
288
  | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
289
+ | `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
218
290
  | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
219
291
  | `cache_path` | `str` | `"."` | Base directory for cache storage |
292
+ | `regenerate_cache` | `bool` | `False` | Force cache regeneration |
293
+
294
+ ## Memory-Efficient Preprocessing
295
+
296
+ When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
297
+
298
+ ```python
299
+ # Subprocess preprocessing (recommended for large datasets)
300
+ # Memory is fully released after cache creation
301
+ heatmap = Heatmap(
302
+ data_path="large_peaks.parquet", # triggers subprocess
303
+ cache_id="peaks_heatmap",
304
+ ...
305
+ )
306
+
307
+ # In-process preprocessing (for smaller datasets or debugging)
308
+ # Memory may be retained by allocator after preprocessing
309
+ heatmap = Heatmap(
310
+ data=pl.scan_parquet("large_peaks.parquet"), # runs in main process
311
+ cache_id="peaks_heatmap",
312
+ ...
313
+ )
314
+ ```
315
+
316
+ **Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
317
+
318
+ ## Cache Reconstruction
319
+
320
+ Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
321
+
322
+ ```python
323
+ # First run: create component with data and config
324
+ table = Table(
325
+ cache_id="my_table",
326
+ data_path="data.parquet",
327
+ filters={'spectrum': 'scan_id'},
328
+ column_definitions=[...],
329
+ cache_path="./cache",
330
+ )
331
+
332
+ # Subsequent runs: reconstruct from cache only
333
+ table = Table(
334
+ cache_id="my_table",
335
+ cache_path="./cache",
336
+ )
337
+ # All config (filters, column_definitions, etc.) restored from cache
338
+ ```
220
339
 
221
340
  ## Rendering
222
341
 
@@ -243,14 +362,38 @@ npm install
243
362
  npm run build
244
363
  ```
245
364
 
246
- ### Development Mode
365
+ ### Development Mode (Hot Reload)
247
366
 
248
367
  ```bash
368
+ # Terminal 1: Vue dev server
249
369
  cd js-component
250
370
  npm run dev
251
371
 
252
- # In another terminal:
253
- export SVC_DEV_MODE=true
254
- export SVC_DEV_URL=http://localhost:5173
255
- streamlit run app.py
372
+ # Terminal 2: Streamlit with dev mode
373
+ SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
374
+ ```
375
+
376
+ ### Running Tests
377
+
378
+ ```bash
379
+ # Python tests
380
+ pip install -e ".[dev]"
381
+ pytest tests/ -v
382
+
383
+ # TypeScript type checking
384
+ cd js-component
385
+ npm run type-check
386
+ ```
387
+
388
+ ### Linting and Formatting
389
+
390
+ ```bash
391
+ # Python
392
+ ruff check .
393
+ ruff format .
394
+
395
+ # JavaScript/TypeScript
396
+ cd js-component
397
+ npm run lint
398
+ npm run format
256
399
  ```