openms-insight 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,35 +81,30 @@ def compute_dataframe_hash(df: pl.DataFrame) -> str:
81
81
  return hashlib.sha256(hash_input).hexdigest()
82
82
 
83
83
 
84
- @st.cache_data(ttl=300, max_entries=100)
85
- def _cached_filter_and_collect(
86
- _data: pl.LazyFrame,
84
+ def _filter_and_collect(
85
+ data: pl.LazyFrame,
87
86
  filters_tuple: Tuple[Tuple[str, str], ...],
88
87
  state_tuple: Tuple[Tuple[str, Any], ...],
89
88
  columns_tuple: Optional[Tuple[str, ...]] = None,
90
- filter_defaults_tuple: Optional[Tuple[Tuple[str, Any], ...]] = None,
91
89
  ) -> Tuple[pd.DataFrame, str]:
92
90
  """
93
- Filter data and collect with caching.
91
+ Filter data and collect.
94
92
 
95
- This function is cached by Streamlit, so repeated calls with the same
96
- filter state will return cached results without re-executing the query.
93
+ This function executes the filter query. Caching is handled at a higher
94
+ level (per-component in bridge.py) to ensure memory = O(num_components).
97
95
 
98
96
  Returns pandas DataFrame for efficient Arrow serialization to frontend.
99
97
 
100
98
  Args:
101
- _data: LazyFrame to filter (underscore prefix tells st.cache_data to hash by id)
99
+ data: LazyFrame to filter
102
100
  filters_tuple: Tuple of (identifier, column) pairs from filters dict
103
101
  state_tuple: Tuple of (identifier, value) pairs for current selection state
104
102
  (already has defaults applied from _make_cache_key)
105
103
  columns_tuple: Optional tuple of column names to select (projection)
106
- filter_defaults_tuple: Optional tuple of (identifier, default_value) pairs
107
- (included for cache key differentiation)
108
104
 
109
105
  Returns:
110
106
  Tuple of (pandas DataFrame, hash string)
111
107
  """
112
- data = _data
113
108
  filters = dict(filters_tuple)
114
109
  state = dict(state_tuple) # Already has defaults applied
115
110
 
@@ -182,19 +177,17 @@ def filter_and_collect_cached(
182
177
  if isinstance(data, pl.DataFrame):
183
178
  data = data.lazy()
184
179
 
185
- # Convert to tuples for caching (dicts aren't hashable)
180
+ # Convert to tuples for consistent processing
186
181
  filters_tuple = tuple(sorted(filters.items()))
187
182
  # Pass filter_defaults to _make_cache_key so defaults are applied to state
188
183
  state_tuple = _make_cache_key(filters, state, filter_defaults)
189
184
  columns_tuple = tuple(columns) if columns else None
190
- filter_defaults_tuple = tuple(sorted(filter_defaults.items())) if filter_defaults else None
191
185
 
192
- return _cached_filter_and_collect(
186
+ return _filter_and_collect(
193
187
  data,
194
188
  filters_tuple,
195
189
  state_tuple,
196
190
  columns_tuple,
197
- filter_defaults_tuple,
198
191
  )
199
192
 
200
193
 
@@ -42,43 +42,107 @@ _vue_component_func = None
42
42
  # Vue's echoed hash doesn't match the current data hash
43
43
  _VUE_ECHOED_HASH_KEY = "_svc_vue_echoed_hashes"
44
44
 
45
+ # Session state key for per-component data cache
46
+ # Each component stores exactly one entry (current filter state)
47
+ _COMPONENT_DATA_CACHE_KEY = "_svc_component_data_cache"
45
48
 
46
49
 
47
- @st.cache_data(max_entries=10, show_spinner=False)
48
- def _cached_prepare_vue_data(
49
- _component: 'BaseComponent',
50
+ def _get_component_cache() -> Dict[str, Any]:
51
+ """Get per-component data cache from session state."""
52
+ if _COMPONENT_DATA_CACHE_KEY not in st.session_state:
53
+ st.session_state[_COMPONENT_DATA_CACHE_KEY] = {}
54
+ return st.session_state[_COMPONENT_DATA_CACHE_KEY]
55
+
56
+
57
+ def clear_component_cache() -> None:
58
+ """
59
+ Clear all per-component cached data.
60
+
61
+ Call this when loading a new file to ensure fresh data is sent to Vue.
62
+ """
63
+ if _COMPONENT_DATA_CACHE_KEY in st.session_state:
64
+ st.session_state[_COMPONENT_DATA_CACHE_KEY].clear()
65
+
66
+
67
+ def _get_cached_vue_data(
50
68
  component_id: str,
51
69
  filter_state_hashable: Tuple[Tuple[str, Any], ...],
52
- _data_id: int,
53
- _state_dict: Dict[str, Any],
54
- ) -> Tuple[Dict[str, Any], str]:
70
+ ) -> Optional[Tuple[Dict[str, Any], str]]:
55
71
  """
56
- Cached wrapper for _prepare_vue_data.
72
+ Get cached Vue data for component if filter state matches.
57
73
 
58
- Cache key is based on:
59
- - component_id: unique key for this component instance
60
- - filter_state_hashable: hashable version of state values (for cache key only)
74
+ Each component has exactly one cached entry. If filter state changed,
75
+ returns None (cache miss).
61
76
 
62
- The _component, _data_id, and _state_dict parameters are prefixed with underscore
63
- so they are not hashed (component instances are not hashable, and state_dict
64
- may contain unhashable values like dicts).
77
+ Args:
78
+ component_id: Unique identifier for this component
79
+ filter_state_hashable: Current filter state (for cache validation)
80
+
81
+ Returns:
82
+ Tuple of (vue_data, data_hash) if cache hit, None otherwise
83
+ """
84
+ cache = _get_component_cache()
85
+ if component_id in cache:
86
+ cached_state, vue_data, data_hash = cache[component_id]
87
+ if cached_state == filter_state_hashable:
88
+ return (vue_data, data_hash)
89
+ return None
90
+
91
+
92
+ def _set_cached_vue_data(
93
+ component_id: str,
94
+ filter_state_hashable: Tuple[Tuple[str, Any], ...],
95
+ vue_data: Dict[str, Any],
96
+ data_hash: str,
97
+ ) -> None:
98
+ """
99
+ Cache Vue data for component, replacing any previous entry.
100
+
101
+ Each component stores exactly one entry, so memory = O(num_components).
102
+
103
+ Args:
104
+ component_id: Unique identifier for this component
105
+ filter_state_hashable: Current filter state
106
+ vue_data: Data to cache
107
+ data_hash: Hash of the data
108
+ """
109
+ cache = _get_component_cache()
110
+ cache[component_id] = (filter_state_hashable, vue_data, data_hash)
111
+
112
+
113
+ def _prepare_vue_data_cached(
114
+ component: 'BaseComponent',
115
+ component_id: str,
116
+ filter_state_hashable: Tuple[Tuple[str, Any], ...],
117
+ state_dict: Dict[str, Any],
118
+ ) -> Tuple[Dict[str, Any], str]:
119
+ """
120
+ Prepare Vue data with per-component caching.
121
+
122
+ Each component caches exactly one entry (its current filter state).
123
+ When filter state changes, old entry is replaced - memory stays bounded.
65
124
 
66
125
  Args:
67
- _component: The component to prepare data for (not hashed)
126
+ component: The component to prepare data for
68
127
  component_id: Unique identifier for this component
69
- filter_state_hashable: Tuple of (identifier, hashable_value) for cache key
70
- _data_id: id() of the raw data object (not used in cache key)
71
- _state_dict: Original state dict with actual values (not hashed)
128
+ filter_state_hashable: Hashable version of filter state (for cache key)
129
+ state_dict: Original state dict with actual values
72
130
 
73
131
  Returns:
74
132
  Tuple of (vue_data dict, data_hash string)
75
133
  """
76
- # Use the original state dict (not the hashable version)
77
- vue_data = _component._prepare_vue_data(_state_dict)
134
+ # Check cache first
135
+ cached = _get_cached_vue_data(component_id, filter_state_hashable)
136
+ if cached is not None:
137
+ return cached
78
138
 
79
- # Compute hash before any conversion
139
+ # Cache miss - compute data
140
+ vue_data = component._prepare_vue_data(state_dict)
80
141
  data_hash = _hash_data(vue_data)
81
142
 
143
+ # Store in cache (replaces any previous entry for this component)
144
+ _set_cached_vue_data(component_id, filter_state_hashable, vue_data, data_hash)
145
+
82
146
  return vue_data, data_hash
83
147
 
84
148
 
@@ -177,15 +241,12 @@ def render_component(
177
241
  component_type = component._get_vue_component_name()
178
242
  component_id = f"{component_type}:{key}"
179
243
 
180
- # Get data identity - cache invalidates if raw data object changes
181
- data_id = id(component._raw_data)
182
-
183
- # Get component data using cached function
184
- # Cache key: (component_id, filter_state_hashable, data_id)
244
+ # Get component data using per-component cache
245
+ # Each component stores exactly one entry (current filter state)
185
246
  # - Filterless components: filter_state=() always → always cache hit
186
247
  # - Filtered components: cache hit when filter values unchanged
187
- vue_data, data_hash = _cached_prepare_vue_data(
188
- component, component_id, filter_state_hashable, data_id, relevant_state
248
+ vue_data, data_hash = _prepare_vue_data_cached(
249
+ component, component_id, filter_state_hashable, relevant_state
189
250
  )
190
251
 
191
252
  component_args = component._get_component_args()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openms-insight
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Interactive visualization components for mass spectrometry data in Streamlit
5
5
  Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
6
6
  Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
@@ -43,7 +43,7 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
43
43
  ## Features
44
44
 
45
45
  - **Cross-component selection linking** via shared identifiers
46
- - **Polars LazyFrame support** for efficient data handling
46
+ - **Memory-efficient preprocessing** via subprocess isolation
47
47
  - **Automatic disk caching** with config-based invalidation
48
48
  - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
49
49
  - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
@@ -60,7 +60,6 @@ pip install openms-insight
60
60
 
61
61
  ```python
62
62
  import streamlit as st
63
- import polars as pl
64
63
  from openms_insight import Table, LinePlot, StateManager
65
64
 
66
65
  # Create state manager for cross-component linking
@@ -69,7 +68,7 @@ state_manager = StateManager()
69
68
  # Create a table - clicking a row sets the 'item' selection
70
69
  table = Table(
71
70
  cache_id="items_table",
72
- data=pl.scan_parquet("items.parquet"),
71
+ data_path="items.parquet",
73
72
  interactivity={'item': 'item_id'},
74
73
  column_definitions=[
75
74
  {'field': 'item_id', 'title': 'ID', 'sorter': 'number'},
@@ -81,7 +80,7 @@ table(state_manager=state_manager)
81
80
  # Create a linked plot - filters by the selected 'item'
82
81
  plot = LinePlot(
83
82
  cache_id="values_plot",
84
- data=pl.scan_parquet("values.parquet"),
83
+ data_path="values.parquet",
85
84
  filters={'item': 'item_id'},
86
85
  x_column='x',
87
86
  y_column='y',
@@ -100,14 +99,14 @@ Components communicate through **identifiers** using two mechanisms:
100
99
  # Master table: no filters, sets 'spectrum' on click
101
100
  master = Table(
102
101
  cache_id="spectra",
103
- data=spectra_data,
102
+ data_path="spectra.parquet",
104
103
  interactivity={'spectrum': 'scan_id'}, # Click -> sets spectrum=scan_id
105
104
  )
106
105
 
107
106
  # Detail table: filters by 'spectrum', sets 'peak' on click
108
107
  detail = Table(
109
108
  cache_id="peaks",
110
- data=peaks_data,
109
+ data_path="peaks.parquet",
111
110
  filters={'spectrum': 'scan_id'}, # Filters where scan_id = selected spectrum
112
111
  interactivity={'peak': 'peak_id'}, # Click -> sets peak=peak_id
113
112
  )
@@ -115,7 +114,7 @@ detail = Table(
115
114
  # Plot: filters by 'spectrum', highlights selected 'peak'
116
115
  plot = LinePlot(
117
116
  cache_id="plot",
118
- data=peaks_data,
117
+ data_path="peaks.parquet",
119
118
  filters={'spectrum': 'scan_id'},
120
119
  interactivity={'peak': 'peak_id'},
121
120
  x_column='mass',
@@ -134,7 +133,7 @@ Interactive table using Tabulator.js with filtering dialogs, sorting, pagination
134
133
  ```python
135
134
  Table(
136
135
  cache_id="spectra_table",
137
- data=pl.scan_parquet("spectra.parquet"),
136
+ data_path="spectra.parquet",
138
137
  interactivity={'spectrum': 'scan_id'},
139
138
  column_definitions=[
140
139
  {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
@@ -156,7 +155,7 @@ Stick-style line plot using Plotly.js for mass spectra visualization.
156
155
  ```python
157
156
  LinePlot(
158
157
  cache_id="spectrum_plot",
159
- data=pl.scan_parquet("peaks.parquet"),
158
+ data_path="peaks.parquet",
160
159
  filters={'spectrum': 'scan_id'},
161
160
  interactivity={'peak': 'peak_id'},
162
161
  x_column='mass',
@@ -176,7 +175,7 @@ LinePlot(
176
175
  ```python
177
176
  Heatmap(
178
177
  cache_id="peaks_heatmap",
179
- data=pl.scan_parquet("all_peaks.parquet"),
178
+ data_path="all_peaks.parquet",
180
179
  x_column='retention_time',
181
180
  y_column='mass',
182
181
  intensity_column='intensity',
@@ -213,7 +212,8 @@ All components accept these common arguments:
213
212
  | Argument | Type | Default | Description |
214
213
  |----------|------|---------|-------------|
215
214
  | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
216
- | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame with source data |
215
+ | `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
216
+ | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
217
217
  | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
218
218
  | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
219
219
  | `cache_path` | `str` | `"."` | Base directory for cache storage |
@@ -1,27 +1,28 @@
1
1
  openms_insight/__init__.py,sha256=-QMPUfCk1DwwlQKW6NFSZodRz-r82R4rJPOiWDf_mwA,830
2
2
  openms_insight/components/__init__.py,sha256=aNy1E8ttqQiiA5Ka9sviRy17gQT8lfc_EHsl1v1o-UQ,177
3
- openms_insight/components/heatmap.py,sha256=wqNgmccxI5PeTC08kVf06UUtVEg8YhUf_eY9CN4wAMw,33125
4
- openms_insight/components/lineplot.py,sha256=klGzpGO-gi76sIocr9T4TYUTV-ztfIIAtdd5k-U-aGE,19382
3
+ openms_insight/components/heatmap.py,sha256=7nkNqdogEj6vnm90FeiTOyDtcy_vlb-HydcgZjmwSd4,34612
4
+ openms_insight/components/lineplot.py,sha256=fcul7W2RZlTQBse6pDbTAS14_HE14gLDdsUZ-YLjgAQ,19948
5
5
  openms_insight/components/sequenceview.py,sha256=IPLyUiXTLYwdZ79B3pZDH1intUsjBIknTSLUQ2ftdD4,14359
6
- openms_insight/components/table.py,sha256=VaUbn2lckeT3shqn9quDvw1HGMsiQrIPPK4w1XQFzzc,14795
6
+ openms_insight/components/table.py,sha256=P2VIGcPf2iuVJ07rl1el_kTjUKBjlJ_PQeFQAO9sgbw,15361
7
7
  openms_insight/core/__init__.py,sha256=yKRgLfRm1CQm6ZGqQiyDwblzw2Qh3jw57U-ZSd63PYk,338
8
- openms_insight/core/base.py,sha256=e939N2cu0TEllj09Uw7AKS0FqxE2n_XecwF3ckM_YM0,14746
8
+ openms_insight/core/base.py,sha256=IUtnLnoLz6yFRsyGrYaCRzmoh6Eq6dRdnpGj2owxnd4,15754
9
9
  openms_insight/core/cache.py,sha256=3fnPDWjuWUnxazK2XflcUIeRZZPQ3N45kAKYu-xGBKw,1197
10
10
  openms_insight/core/registry.py,sha256=iBb9xWvgOisxjVX74wtCvqWO20eBW5xH-XcNcVuW-Dg,2107
11
11
  openms_insight/core/state.py,sha256=VhY_3Yg-zbx9VsdqHElsNyD2UbJj6ApE4TRcUywDrjQ,6871
12
+ openms_insight/core/subprocess_preprocess.py,sha256=E08qEyVinwRM_PXq0bZk-lHJcYeDHmdr3ok4yTRC63I,3155
12
13
  openms_insight/preprocessing/__init__.py,sha256=XFnxlvG-VRMrFbreGmFSIDqfYhev8WPUyZluadMmCgY,462
13
14
  openms_insight/preprocessing/compression.py,sha256=8oe-vqeSX-Xf1okPX0C0km2DRnl6Iz53Y1UgsLdMUPI,10499
14
- openms_insight/preprocessing/filtering.py,sha256=chvmCqLLsMe5mXwG_S4KUagNqJZFLI_iLKKr9g-MkLM,10907
15
+ openms_insight/preprocessing/filtering.py,sha256=DuVdi7tofZyWbGqdRXJy8G1OYptmf1zxnPgsSoYU_zw,10423
15
16
  openms_insight/rendering/__init__.py,sha256=i9MRFrAEAX5kjbOnMjw2woP_6mEaI6HcxtbrNwUxNaM,198
16
- openms_insight/rendering/bridge.py,sha256=yzXPyK_oKVGBymeDWxgzXeI_jOP7ntJ4p_npOQ1-d1A,11279
17
+ openms_insight/rendering/bridge.py,sha256=cPZWMz07jKOcWkkL7FdCY8w1lNk8bME_1xuulCO3VgY,13146
17
18
  openms_insight/js-component/dist/index.html,sha256=LSJ3B_YmGUrCCdZ1UaZO2p6Wqsih6nTH62Z_0uZxpD8,430
18
- openms_insight/js-component/dist/assets/index.css,sha256=h0LrX503uebeJrJB0LtoPCVwA4QfDBMmOOR_tU2Wkic,883995
19
- openms_insight/js-component/dist/assets/index.js,sha256=pakZe7BdeEjiDCPhzGYqEFumB3WtbdnnXYnBuTDVfWo,6060653
19
+ openms_insight/js-component/dist/assets/index.css,sha256=ACP0LGYUGQusvfDX9igw5DfoQr60P4IFgSqWhACY46A,884228
20
+ openms_insight/js-component/dist/assets/index.js,sha256=DEqCo6eSi4pFuTafgsFg6imO1xe1g-Vx1KQsEAir3t4,6061023
20
21
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot,sha256=CxgxBNL8XyYZbnc8d72vLgVQn9QlnS0V7O3Kebh-hPk,1307880
21
22
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf,sha256=YeirpaTpgf4iz3yOi82-oAR251xiw38Bv37jM2HWhCg,1307660
22
23
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff,sha256=pZKKDVwvYk5G-Y2bFcL2AEU3f3xZTdeKF1kTLqO0Y-s,587984
23
24
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2,sha256=Zi_vqPL4qVwYWI0hd0eJwQfGTnccvmWmmvRikcQxGvw,403216
24
- openms_insight-0.1.0.dist-info/METADATA,sha256=S7rH1PAk9nJDomrQSDnJFmiGC7v8JcwnfrGcBqe55iM,7288
25
- openms_insight-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
26
- openms_insight-0.1.0.dist-info/licenses/LICENSE,sha256=INFF4rOMmpah7Oi14hLqu7NTOsx56KRRNChAAUcfh2E,1823
27
- openms_insight-0.1.0.dist-info/RECORD,,
25
+ openms_insight-0.1.1.dist-info/METADATA,sha256=_PtgWhRmLvKT0VsXjT9b8vQ7ZkSomdHGeD4sDmaUq_s,7385
26
+ openms_insight-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
+ openms_insight-0.1.1.dist-info/licenses/LICENSE,sha256=INFF4rOMmpah7Oi14hLqu7NTOsx56KRRNChAAUcfh2E,1823
28
+ openms_insight-0.1.1.dist-info/RECORD,,