PyPI - openms-insight - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

openms-insight 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

openms_insight/__init__.py +11 -7
openms_insight/components/__init__.py +2 -2
openms_insight/components/heatmap.py +163 -101
openms_insight/components/lineplot.py +377 -82
openms_insight/components/sequenceview.py +677 -213
openms_insight/components/table.py +86 -58
openms_insight/core/__init__.py +2 -2
openms_insight/core/base.py +102 -47
openms_insight/core/registry.py +6 -5
openms_insight/core/state.py +33 -31
openms_insight/core/subprocess_preprocess.py +1 -3
openms_insight/js-component/dist/assets/index.css +1 -1
openms_insight/js-component/dist/assets/index.js +105 -105
openms_insight/preprocessing/__init__.py +5 -6
openms_insight/preprocessing/compression.py +68 -66
openms_insight/preprocessing/filtering.py +39 -13
openms_insight/rendering/__init__.py +1 -1
openms_insight/rendering/bridge.py +192 -42
{openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/METADATA +163 -20
openms_insight-0.1.3.dist-info/RECORD +28 -0
openms_insight-0.1.2.dist-info/RECORD +0 -28
{openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/WHEEL +0 -0
{openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/licenses/LICENSE +0 -0

openms_insight/rendering/bridge.py CHANGED Viewed

@@ -29,6 +29,7 @@ def _make_hashable(value: Any) -> Any:
         return json.dumps(value)
     return value
 if TYPE_CHECKING:
     from ..core.base import BaseComponent
     from ..core.state import StateManager
@@ -46,6 +47,10 @@ _VUE_ECHOED_HASH_KEY = "_svc_vue_echoed_hashes"
 # Each component stores exactly one entry (current filter state)
 _COMPONENT_DATA_CACHE_KEY = "_svc_component_data_cache"
+# Session state key for component annotations (from Vue)
+# Stores annotation dataframes returned by components like SequenceView
+_COMPONENT_ANNOTATIONS_KEY = "_svc_component_annotations"
 def _get_component_cache() -> Dict[str, Any]:
     """Get per-component data cache from session state."""
@@ -64,6 +69,55 @@ def clear_component_cache() -> None:
         st.session_state[_COMPONENT_DATA_CACHE_KEY].clear()
+def _store_component_annotations(
+    component_key: str, annotations: Dict[str, Any]
+) -> None:
+    """
+    Store annotations returned by a Vue component.
+    Args:
+        component_key: Unique key for the component
+        annotations: Dict with annotation arrays (e.g., peak_id, highlight_color, annotation)
+    """
+    if _COMPONENT_ANNOTATIONS_KEY not in st.session_state:
+        st.session_state[_COMPONENT_ANNOTATIONS_KEY] = {}
+    st.session_state[_COMPONENT_ANNOTATIONS_KEY][component_key] = annotations
+def get_component_annotations(component_key: Optional[str]) -> Optional[pl.DataFrame]:
+    """
+    Get annotations stored by a Vue component.
+    Args:
+        component_key: Unique key for the component
+    Returns:
+        Polars DataFrame with annotations, or None if not available
+    """
+    if component_key is None:
+        return None
+    if _COMPONENT_ANNOTATIONS_KEY not in st.session_state:
+        return None
+    annotations = st.session_state[_COMPONENT_ANNOTATIONS_KEY].get(component_key)
+    if annotations is None:
+        return None
+    # Convert to DataFrame
+    try:
+        # annotations should be a dict with arrays: {peak_id: [...], highlight_color: [...], annotation: [...]}
+        return pl.DataFrame(annotations)
+    except Exception:
+        return None
+def clear_component_annotations() -> None:
+    """Clear all component annotations."""
+    if _COMPONENT_ANNOTATIONS_KEY in st.session_state:
+        st.session_state[_COMPONENT_ANNOTATIONS_KEY].clear()
 def _get_cached_vue_data(
     component_id: str,
     filter_state_hashable: Tuple[Tuple[str, Any], ...],
@@ -111,7 +165,7 @@ def _set_cached_vue_data(
 def _prepare_vue_data_cached(
-    component: 'BaseComponent',
+    component: "BaseComponent",
     component_id: str,
     filter_state_hashable: Tuple[Tuple[str, Any], ...],
     state_dict: Dict[str, Any],
@@ -122,6 +176,11 @@ def _prepare_vue_data_cached(
     Each component caches exactly one entry (its current filter state).
     When filter state changes, old entry is replaced - memory stays bounded.
+    For components with dynamic annotations (e.g., LinePlot linked to SequenceView):
+    - Cache stores BASE data (without annotation columns)
+    - Annotations are re-applied fresh each render (cheap operation)
+    - Final hash reflects current annotation state
     Args:
         component: The component to prepare data for
         component_id: Unique identifier for this component
@@ -131,19 +190,56 @@ def _prepare_vue_data_cached(
     Returns:
         Tuple of (vue_data dict, data_hash string)
     """
-    # Check cache first
+    # Check if component has dynamic annotations (e.g., LinePlot linked to SequenceView)
+    has_dynamic_annotations = (
+        getattr(component, "_dynamic_annotations", None) is not None
+    )
+    # Try cache first (works for ALL components now)
     cached = _get_cached_vue_data(component_id, filter_state_hashable)
     if cached is not None:
-        return cached
+        cached_data, cached_hash = cached
+        if has_dynamic_annotations:
+            # Cache hit but need to re-apply annotations (they may have changed)
+            # Use component method to apply fresh annotations to cached base data
+            if hasattr(component, "_apply_fresh_annotations"):
+                # Shallow copy to avoid mutating cache
+                vue_data = component._apply_fresh_annotations(dict(cached_data))
+                # Hash final data (includes new annotations)
+                data_hash = _hash_data(vue_data)
+                return vue_data, data_hash
+            else:
+                # Fallback: recompute if component doesn't support fresh annotations
+                vue_data = component._prepare_vue_data(state_dict)
+                data_hash = _hash_data(vue_data)
+                return vue_data, data_hash
+        else:
+            # No dynamic annotations - return cached as-is
+            return cached_data, cached_hash
     # Cache miss - compute data
     vue_data = component._prepare_vue_data(state_dict)
-    data_hash = _hash_data(vue_data)
-    # Store in cache (replaces any previous entry for this component)
-    _set_cached_vue_data(component_id, filter_state_hashable, vue_data, data_hash)
-    return vue_data, data_hash
+    if has_dynamic_annotations:
+        # Store BASE data (without dynamic annotation columns) in cache
+        if hasattr(component, "_strip_dynamic_columns"):
+            base_data = component._strip_dynamic_columns(vue_data)
+        else:
+            # Fallback: store without _plotConfig (may have stale column refs)
+            base_data = {k: v for k, v in vue_data.items() if k != "_plotConfig"}
+        base_hash = _hash_data(base_data)
+        _set_cached_vue_data(component_id, filter_state_hashable, base_data, base_hash)
+        # Return full data with annotations
+        data_hash = _hash_data(vue_data)
+        return vue_data, data_hash
+    else:
+        # Store complete data in cache
+        data_hash = _hash_data(vue_data)
+        _set_cached_vue_data(component_id, filter_state_hashable, vue_data, data_hash)
+        return vue_data, data_hash
 def get_vue_component_function():
@@ -159,11 +255,11 @@ def get_vue_component_function():
         import streamlit.components.v1 as st_components
         # Check for development mode
-        dev_mode = os.environ.get('SVC_DEV_MODE', 'false').lower() == 'true'
+        dev_mode = os.environ.get("SVC_DEV_MODE", "false").lower() == "true"
         if dev_mode:
             # Development mode: connect to Vite dev server
-            dev_url = os.environ.get('SVC_DEV_URL', 'http://localhost:5173')
+            dev_url = os.environ.get("SVC_DEV_URL", "http://localhost:5173")
             _vue_component_func = st_components.declare_component(
                 "streamlit_vue_component",
                 url=dev_url,
@@ -171,7 +267,7 @@ def get_vue_component_function():
         else:
             # Production mode: use built component
             parent_dir = os.path.dirname(os.path.abspath(__file__))
-            build_dir = os.path.join(parent_dir, '..', 'js-component', 'dist')
+            build_dir = os.path.join(parent_dir, "..", "js-component", "dist")
             if not os.path.exists(build_dir):
                 raise RuntimeError(
@@ -189,8 +285,8 @@ def get_vue_component_function():
 def render_component(
-    component: 'BaseComponent',
-    state_manager: 'StateManager',
+    component: "BaseComponent",
+    state_manager: "StateManager",
     key: Optional[str] = None,
     height: Optional[int] = None,
 ) -> Any:
@@ -223,15 +319,30 @@ def render_component(
     if key is None:
         key = f"svc_{component._cache_id}_{hash(str(component._interactivity))}"
+    # Check if component has required filters without values
+    # Don't send potentially huge unfiltered datasets - wait for filter selection
+    filters = getattr(component, "_filters", None) or {}
+    filter_defaults = getattr(component, "_filter_defaults", None) or {}
+    awaiting_filter = False
+    if filters:
+        # Check each filter - if no value AND no default, we're waiting
+        for identifier in filters.keys():
+            filter_value = state.get(identifier)
+            has_default = identifier in filter_defaults
+            if filter_value is None and not has_default:
+                awaiting_filter = True
+                break
     # Extract state keys that affect this component's data for cache key
     # This includes filters and any additional dependencies (e.g., zoom for heatmaps)
     # Uses get_state_dependencies() which can be overridden by subclasses
     state_keys = set(component.get_state_dependencies())
     # Build hashable version for cache key (converts dicts/lists to JSON strings)
-    filter_state_hashable = tuple(sorted(
-        (k, _make_hashable(state.get(k))) for k in state_keys
-    ))
+    filter_state_hashable = tuple(
+        sorted((k, _make_hashable(state.get(k))) for k in state_keys)
+    )
     # Build original state dict for passing to _prepare_vue_data
     # (contains actual values, not JSON strings)
@@ -241,13 +352,19 @@ def render_component(
     component_type = component._get_vue_component_name()
     component_id = f"{component_type}:{key}"
-    # Get component data using per-component cache
-    # Each component stores exactly one entry (current filter state)
-    # - Filterless components: filter_state=() always → always cache hit
-    # - Filtered components: cache hit when filter values unchanged
-    vue_data, data_hash = _prepare_vue_data_cached(
-        component, component_id, filter_state_hashable, relevant_state
-    )
+    # Skip data preparation if awaiting required filter selection
+    # This prevents sending huge unfiltered datasets
+    if awaiting_filter:
+        vue_data = {}
+        data_hash = "awaiting_filter"
+    else:
+        # Get component data using per-component cache
+        # Each component stores exactly one entry (current filter state)
+        # - Filterless components: filter_state=() always → always cache hit
+        # - Filtered components: cache hit when filter values unchanged
+        vue_data, data_hash = _prepare_vue_data_cached(
+            component, component_id, filter_state_hashable, relevant_state
+        )
     component_args = component._get_component_args()
@@ -268,17 +385,19 @@ def render_component(
     # Vue echoes null/None if it has no data, so mismatch triggers send
     # IMPORTANT: Also send data if vue_echoed_hash is None - this means Vue
     # hasn't confirmed receipt yet (e.g., after page navigation destroys Vue component)
+    # NOTE: Hash now correctly reflects annotation state (annotations included in hash),
+    # so normal comparison works for all components including those with dynamic annotations
     data_changed = (vue_echoed_hash is None) or (vue_echoed_hash != data_hash)
     # Only include full data if hash changed
     if data_changed:
         # Convert any non-pandas data to pandas for Arrow serialization
         # pandas DataFrames are passed through (already optimal for Arrow)
-        # Also filter out internal keys (starting with _)
+        # Filter out _hash (internal metadata) but keep _plotConfig (needed by Vue)
         converted_data = {}
         for data_key, value in vue_data.items():
-            if data_key.startswith('_'):
-                # Skip metadata keys like _hash, _plotConfig
+            if data_key == "_hash":
+                # Skip internal hash metadata
                 continue
             if isinstance(value, pl.LazyFrame):
                 converted_data[data_key] = value.collect().to_pandas()
@@ -289,9 +408,10 @@ def render_component(
             # pandas DataFrames pass through unchanged (optimal for Arrow)
         data_payload = {
             **converted_data,
-            'selection_store': state,
-            'hash': data_hash,
-            'dataChanged': True,
+            "selection_store": state,
+            "hash": data_hash,
+            "dataChanged": True,
+            "awaitingFilter": awaiting_filter,
         }
         # Note: We don't pre-set the hash here anymore. We trust Vue's echo
         # at the end of the render cycle. This ensures we detect when Vue
@@ -299,28 +419,29 @@ def render_component(
     else:
         # Data unchanged - only send hash and state, Vue will use cached data
         data_payload = {
-            'selection_store': state,
-            'hash': data_hash,
-            'dataChanged': False,
+            "selection_store": state,
+            "hash": data_hash,
+            "dataChanged": False,
+            "awaitingFilter": awaiting_filter,
         }
     # Add height to component args if specified
     if height is not None:
-        component_args['height'] = height
+        component_args["height"] = height
     # Component layout: [[{componentArgs: {...}}]]
-    components = [[{'componentArgs': component_args}]]
+    components = [[{"componentArgs": component_args}]]
     # Call Vue component
     vue_func = get_vue_component_function()
     kwargs = {
-        'components': components,
-        'key': key,
+        "components": components,
+        "key": key,
         **data_payload,
     }
     if height is not None:
-        kwargs['height'] = height
+        kwargs["height"] = height
     result = vue_func(**kwargs)
@@ -329,11 +450,39 @@ def render_component(
         # Store Vue's echoed hash for next render comparison
         # ALWAYS update from Vue's echo - if Vue lost its data (page navigation),
         # it echoes None, and we need to know that to resend data next time
-        vue_hash = result.get('_vueDataHash')
+        vue_hash = result.get("_vueDataHash")
         st.session_state[_VUE_ECHOED_HASH_KEY][hash_tracking_key] = vue_hash
-        # Update state and rerun if state changed
-        if state_manager.update_from_vue(result):
+        # Capture annotations from Vue (e.g., from SequenceView)
+        # Use hash-based change detection for robustness
+        annotations = result.get("_annotations")
+        annotations_changed = False
+        if annotations is not None:
+            # Compute hash of new annotations
+            peak_ids = annotations.get("peak_id", [])
+            new_hash = hash(tuple(peak_ids)) if peak_ids else 0
+            # Compare with stored hash
+            ann_hash_key = f"_svc_ann_hash_{key}"
+            old_hash = st.session_state.get(ann_hash_key)
+            if old_hash != new_hash:
+                annotations_changed = True
+                st.session_state[ann_hash_key] = new_hash
+            _store_component_annotations(key, annotations)
+        else:
+            # Annotations cleared - check if we had annotations before
+            ann_hash_key = f"_svc_ann_hash_{key}"
+            if st.session_state.get(ann_hash_key) is not None:
+                annotations_changed = True
+                st.session_state[ann_hash_key] = None
+        # Update state and rerun if state changed OR annotations changed
+        # Hash comparison will naturally detect changes on the next render
+        state_changed = state_manager.update_from_vue(result)
+        if state_changed or annotations_changed:
             st.rerun()
     return result
@@ -356,8 +505,9 @@ def _hash_data(data: Dict[str, Any]) -> str:
     hash_parts = []
     for key, value in sorted(data.items()):
-        if key.startswith('_'):
-            continue  # Skip metadata
+        # Skip internal metadata but NOT dynamic annotation columns
+        if key.startswith("_") and not key.startswith("_dynamic"):
+            continue
         if isinstance(value, pd.DataFrame):
             # Efficient hash for DataFrames
             df_polars = pl.from_pandas(value)

{openms_insight-0.1.2.dist-info → openms_insight-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.4
 Name: openms-insight
-Version: 0.1.2
+Version: 0.1.3
 Summary: Interactive visualization components for mass spectrometry data in Streamlit
 Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
 Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
 Project-URL: Repository, https://github.com/t0mdavid-m/OpenMS-Insight
 Project-URL: Issues, https://github.com/t0mdavid-m/OpenMS-Insight/issues
-Author: Kohlbacher Lab
+Author: Tom David Müller
 License-Expression: BSD-3-Clause
 License-File: LICENSE
 Keywords: mass-spectrometry,openms,plotly,proteomics,streamlit,tabulator,visualization,vue
@@ -37,6 +37,7 @@ Description-Content-Type: text/markdown
 [![PyPI version](https://badge.fury.io/py/openms-insight.svg)](https://badge.fury.io/py/openms-insight)
 [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
+[![Tests](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml/badge.svg)](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
 Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
@@ -45,10 +46,11 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
 - **Cross-component selection linking** via shared identifiers
 - **Memory-efficient preprocessing** via subprocess isolation
 - **Automatic disk caching** with config-based invalidation
-- **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
+- **Cache reconstruction** - components can be restored from cache without re-specifying configuration
+- **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
 - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
-- **Heatmap component** (Plotly scattergl) with multi-resolution downsampling
-- **Sequence view component** for peptide/protein visualization
+- **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
+- **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
 ## Installation
@@ -90,9 +92,10 @@ plot(state_manager=state_manager)
 ## Cross-Component Linking
-Components communicate through **identifiers** using two mechanisms:
+Components communicate through **identifiers** using three mechanisms:
 - **`filters`**: INPUT - filter this component's data by the selection
+- **`filter_defaults`**: INPUT - default value when selection is None
 - **`interactivity`**: OUTPUT - set a selection when user clicks
 ```python
@@ -120,6 +123,14 @@ plot = LinePlot(
     x_column='mass',
     y_column='intensity',
 )
+# Table with filter defaults - shows unannotated data when no identification selected
+annotations = Table(
+    cache_id="annotations",
+    data_path="annotations.parquet",
+    filters={'identification': 'id_idx'},
+    filter_defaults={'identification': -1},  # Use -1 when identification is None
+)
 ```
 ---
@@ -137,17 +148,27 @@ Table(
     interactivity={'spectrum': 'scan_id'},
     column_definitions=[
         {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
-        {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right'},
+        {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
+         'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
         {'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
     ],
     index_field='scan_id',
     go_to_fields=['scan_id'],
+    initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
     default_row=0,
     pagination=True,
-    page_size=50,
+    page_size=100,
 )
 ```
+**Key parameters:**
+- `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
+- `index_field`: Column used as unique row identifier (default: 'id')
+- `go_to_fields`: Columns available in "Go to" navigation
+- `initial_sort`: Default sort configuration
+- `pagination`: Enable pagination for large tables (default: True)
+- `page_size`: Rows per page (default: 100)
 ### LinePlot
 Stick-style line plot using Plotly.js for mass spectra visualization.
@@ -165,9 +186,20 @@ LinePlot(
     title="MS/MS Spectrum",
     x_label="m/z",
     y_label="Intensity",
+    styling={
+        'highlightColor': '#E4572E',
+        'selectedColor': '#F3A712',
+        'unhighlightedColor': 'lightblue',
+    },
 )
 ```
+**Key parameters:**
+- `x_column`, `y_column`: Column names for x/y values
+- `highlight_column`: Boolean/int column indicating which points to highlight
+- `annotation_column`: Text column for labels on highlighted points
+- `styling`: Color configuration dict
 ### Heatmap
 2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
@@ -181,28 +213,67 @@ Heatmap(
     intensity_column='intensity',
     interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
     min_points=30000,
+    x_bins=400,
+    y_bins=50,
     title="Peak Map",
     x_label="Retention Time (min)",
     y_label="m/z",
+    colorscale='Portland',
 )
 ```
+**Key parameters:**
+- `x_column`, `y_column`, `intensity_column`: Column names for axes and color
+- `min_points`: Target size for downsampling (default: 20000)
+- `x_bins`, `y_bins`: Grid resolution for spatial binning
+- `colorscale`: Plotly colorscale name (default: 'Portland')
 ### SequenceView
-Peptide/protein sequence visualization with fragment ion matching.
+Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
 ```python
+# Dynamic: sequence from DataFrame filtered by selection
 SequenceView(
     cache_id="peptide_view",
-    sequence="PEPTIDEK",
-    observed_masses=[147.1, 244.2, 359.3, 456.4],
-    peak_ids=[0, 1, 2, 3],
-    precursor_mass=944.5,
+    sequence_data_path="sequences.parquet",  # columns: scan_id, sequence, precursor_charge
+    peaks_data_path="peaks.parquet",         # columns: scan_id, peak_id, mass, intensity
+    filters={'spectrum': 'scan_id'},
     interactivity={'peak': 'peak_id'},
+    deconvolved=False,  # peaks are m/z values, consider charge states
     title="Fragment Coverage",
 )
+# Static: single sequence with optional peaks
+SequenceView(
+    cache_id="static_peptide",
+    sequence_data=("PEPTIDEK", 2),  # (sequence, charge) tuple
+    peaks_data=peaks_df,            # Optional: LazyFrame with mass, intensity columns
+    deconvolved=True,               # peaks are neutral masses
+)
+# Simplest: just a sequence string
+SequenceView(
+    cache_id="simple_seq",
+    sequence_data="PEPTIDEK",  # charge defaults to 1
+)
 ```
+**Key parameters:**
+- `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
+- `sequence_data_path`: Path to parquet with sequence data
+- `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
+- `deconvolved`: If False (default), peaks are m/z and matching considers charge states
+- `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
+**Features:**
+- Automatic fragment ion matching (a/b/c/x/y/z ions)
+- Configurable mass tolerance (ppm or Da)
+- Neutral loss support (-H2O, -NH3)
+- Auto-zoom for short sequences (≤20 amino acids)
+- Fragment coverage statistics
+- Click-to-select peaks with cross-component linking
 ---
 ## Shared Component Arguments
@@ -212,11 +283,59 @@ All components accept these common arguments:
 | Argument | Type | Default | Description |
 |----------|------|---------|-------------|
 | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
-| `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
-| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
+| `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
+| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
 | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
+| `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
 | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
 | `cache_path` | `str` | `"."` | Base directory for cache storage |
+| `regenerate_cache` | `bool` | `False` | Force cache regeneration |
+## Memory-Efficient Preprocessing
+When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
+```python
+# Subprocess preprocessing (recommended for large datasets)
+# Memory is fully released after cache creation
+heatmap = Heatmap(
+    data_path="large_peaks.parquet",  # triggers subprocess
+    cache_id="peaks_heatmap",
+    ...
+)
+# In-process preprocessing (for smaller datasets or debugging)
+# Memory may be retained by allocator after preprocessing
+heatmap = Heatmap(
+    data=pl.scan_parquet("large_peaks.parquet"),  # runs in main process
+    cache_id="peaks_heatmap",
+    ...
+)
+```
+**Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
+## Cache Reconstruction
+Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
+```python
+# First run: create component with data and config
+table = Table(
+    cache_id="my_table",
+    data_path="data.parquet",
+    filters={'spectrum': 'scan_id'},
+    column_definitions=[...],
+    cache_path="./cache",
+)
+# Subsequent runs: reconstruct from cache only
+table = Table(
+    cache_id="my_table",
+    cache_path="./cache",
+)
+# All config (filters, column_definitions, etc.) restored from cache
+```
 ## Rendering
@@ -243,14 +362,38 @@ npm install
 npm run build
 ```
-### Development Mode
+### Development Mode (Hot Reload)
 ```bash
+# Terminal 1: Vue dev server
 cd js-component
 npm run dev
-# In another terminal:
-export SVC_DEV_MODE=true
-export SVC_DEV_URL=http://localhost:5173
-streamlit run app.py
+# Terminal 2: Streamlit with dev mode
+SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
+```
+### Running Tests
+```bash
+# Python tests
+pip install -e ".[dev]"
+pytest tests/ -v
+# TypeScript type checking
+cd js-component
+npm run type-check
+```
+### Linting and Formatting
+```bash
+# Python
+ruff check .
+ruff format .
+# JavaScript/TypeScript
+cd js-component
+npm run lint
+npm run format
 ```

openms-insight 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

openms-insight 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl