PyPI - openms-insight - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

openms-insight 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{openms_insight-0.1.0 → openms_insight-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openms-insight
-Version: 0.1.0
+Version: 0.1.2
 Summary: Interactive visualization components for mass spectrometry data in Streamlit
 Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
 Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
@@ -43,7 +43,7 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
 ## Features
 - **Cross-component selection linking** via shared identifiers
-- **Polars LazyFrame support** for efficient data handling
+- **Memory-efficient preprocessing** via subprocess isolation
 - **Automatic disk caching** with config-based invalidation
 - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
 - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
@@ -60,7 +60,6 @@ pip install openms-insight
 ```python
 import streamlit as st
-import polars as pl
 from openms_insight import Table, LinePlot, StateManager
 # Create state manager for cross-component linking
@@ -69,7 +68,7 @@ state_manager = StateManager()
 # Create a table - clicking a row sets the 'item' selection
 table = Table(
     cache_id="items_table",
-    data=pl.scan_parquet("items.parquet"),
+    data_path="items.parquet",
     interactivity={'item': 'item_id'},
     column_definitions=[
         {'field': 'item_id', 'title': 'ID', 'sorter': 'number'},
@@ -81,7 +80,7 @@ table(state_manager=state_manager)
 # Create a linked plot - filters by the selected 'item'
 plot = LinePlot(
     cache_id="values_plot",
-    data=pl.scan_parquet("values.parquet"),
+    data_path="values.parquet",
     filters={'item': 'item_id'},
     x_column='x',
     y_column='y',
@@ -100,14 +99,14 @@ Components communicate through **identifiers** using two mechanisms:
 # Master table: no filters, sets 'spectrum' on click
 master = Table(
     cache_id="spectra",
-    data=spectra_data,
+    data_path="spectra.parquet",
     interactivity={'spectrum': 'scan_id'},  # Click -> sets spectrum=scan_id
 )
 # Detail table: filters by 'spectrum', sets 'peak' on click
 detail = Table(
     cache_id="peaks",
-    data=peaks_data,
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},        # Filters where scan_id = selected spectrum
     interactivity={'peak': 'peak_id'},      # Click -> sets peak=peak_id
 )
@@ -115,7 +114,7 @@ detail = Table(
 # Plot: filters by 'spectrum', highlights selected 'peak'
 plot = LinePlot(
     cache_id="plot",
-    data=peaks_data,
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},
     interactivity={'peak': 'peak_id'},
     x_column='mass',
@@ -134,7 +133,7 @@ Interactive table using Tabulator.js with filtering dialogs, sorting, pagination
 ```python
 Table(
     cache_id="spectra_table",
-    data=pl.scan_parquet("spectra.parquet"),
+    data_path="spectra.parquet",
     interactivity={'spectrum': 'scan_id'},
     column_definitions=[
         {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
@@ -156,7 +155,7 @@ Stick-style line plot using Plotly.js for mass spectra visualization.
 ```python
 LinePlot(
     cache_id="spectrum_plot",
-    data=pl.scan_parquet("peaks.parquet"),
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},
     interactivity={'peak': 'peak_id'},
     x_column='mass',
@@ -176,7 +175,7 @@ LinePlot(
 ```python
 Heatmap(
     cache_id="peaks_heatmap",
-    data=pl.scan_parquet("all_peaks.parquet"),
+    data_path="all_peaks.parquet",
     x_column='retention_time',
     y_column='mass',
     intensity_column='intensity',
@@ -213,7 +212,8 @@ All components accept these common arguments:
 | Argument | Type | Default | Description |
 |----------|------|---------|-------------|
 | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
-| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame with source data |
+| `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
+| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
 | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
 | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
 | `cache_path` | `str` | `"."` | Base directory for cache storage |

{openms_insight-0.1.0 → openms_insight-0.1.2}/README.md RENAMED Viewed

@@ -8,7 +8,7 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
 ## Features
 - **Cross-component selection linking** via shared identifiers
-- **Polars LazyFrame support** for efficient data handling
+- **Memory-efficient preprocessing** via subprocess isolation
 - **Automatic disk caching** with config-based invalidation
 - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
 - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
@@ -25,7 +25,6 @@ pip install openms-insight
 ```python
 import streamlit as st
-import polars as pl
 from openms_insight import Table, LinePlot, StateManager
 # Create state manager for cross-component linking
@@ -34,7 +33,7 @@ state_manager = StateManager()
 # Create a table - clicking a row sets the 'item' selection
 table = Table(
     cache_id="items_table",
-    data=pl.scan_parquet("items.parquet"),
+    data_path="items.parquet",
     interactivity={'item': 'item_id'},
     column_definitions=[
         {'field': 'item_id', 'title': 'ID', 'sorter': 'number'},
@@ -46,7 +45,7 @@ table(state_manager=state_manager)
 # Create a linked plot - filters by the selected 'item'
 plot = LinePlot(
     cache_id="values_plot",
-    data=pl.scan_parquet("values.parquet"),
+    data_path="values.parquet",
     filters={'item': 'item_id'},
     x_column='x',
     y_column='y',
@@ -65,14 +64,14 @@ Components communicate through **identifiers** using two mechanisms:
 # Master table: no filters, sets 'spectrum' on click
 master = Table(
     cache_id="spectra",
-    data=spectra_data,
+    data_path="spectra.parquet",
     interactivity={'spectrum': 'scan_id'},  # Click -> sets spectrum=scan_id
 )
 # Detail table: filters by 'spectrum', sets 'peak' on click
 detail = Table(
     cache_id="peaks",
-    data=peaks_data,
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},        # Filters where scan_id = selected spectrum
     interactivity={'peak': 'peak_id'},      # Click -> sets peak=peak_id
 )
@@ -80,7 +79,7 @@ detail = Table(
 # Plot: filters by 'spectrum', highlights selected 'peak'
 plot = LinePlot(
     cache_id="plot",
-    data=peaks_data,
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},
     interactivity={'peak': 'peak_id'},
     x_column='mass',
@@ -99,7 +98,7 @@ Interactive table using Tabulator.js with filtering dialogs, sorting, pagination
 ```python
 Table(
     cache_id="spectra_table",
-    data=pl.scan_parquet("spectra.parquet"),
+    data_path="spectra.parquet",
     interactivity={'spectrum': 'scan_id'},
     column_definitions=[
         {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
@@ -121,7 +120,7 @@ Stick-style line plot using Plotly.js for mass spectra visualization.
 ```python
 LinePlot(
     cache_id="spectrum_plot",
-    data=pl.scan_parquet("peaks.parquet"),
+    data_path="peaks.parquet",
     filters={'spectrum': 'scan_id'},
     interactivity={'peak': 'peak_id'},
     x_column='mass',
@@ -141,7 +140,7 @@ LinePlot(
 ```python
 Heatmap(
     cache_id="peaks_heatmap",
-    data=pl.scan_parquet("all_peaks.parquet"),
+    data_path="all_peaks.parquet",
     x_column='retention_time',
     y_column='mass',
     intensity_column='intensity',
@@ -178,7 +177,8 @@ All components accept these common arguments:
 | Argument | Type | Default | Description |
 |----------|------|---------|-------------|
 | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
-| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame with source data |
+| `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
+| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
 | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
 | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
 | `cache_path` | `str` | `"."` | Base directory for cache storage |

{openms_insight-0.1.0 → openms_insight-0.1.2}/openms_insight/components/heatmap.py RENAMED Viewed

@@ -69,6 +69,7 @@ class Heatmap(BaseComponent):
         x_column: str,
         y_column: str,
         data: Optional[pl.LazyFrame] = None,
+        data_path: Optional[str] = None,
         intensity_column: str = 'intensity',
         filters: Optional[Dict[str, str]] = None,
         filter_defaults: Optional[Dict[str, Any]] = None,
@@ -97,6 +98,7 @@ class Heatmap(BaseComponent):
             x_column: Name of column for x-axis values
             y_column: Name of column for y-axis values
             data: Polars LazyFrame with heatmap data. Optional if cache exists.
+            data_path: Path to parquet file (preferred for large datasets).
             intensity_column: Name of column for intensity/color values
             filters: Mapping of identifier names to column names for filtering
             interactivity: Mapping of identifier names to column names for clicks.
@@ -142,11 +144,27 @@ class Heatmap(BaseComponent):
         super().__init__(
             cache_id=cache_id,
             data=data,
+            data_path=data_path,
             filters=filters,
             filter_defaults=filter_defaults,
             interactivity=interactivity,
             cache_path=cache_path,
             regenerate_cache=regenerate_cache,
+            # Pass component-specific params for subprocess recreation
+            x_column=x_column,
+            y_column=y_column,
+            intensity_column=intensity_column,
+            min_points=min_points,
+            x_bins=x_bins,
+            y_bins=y_bins,
+            zoom_identifier=zoom_identifier,
+            title=title,
+            x_label=x_label,
+            y_label=y_label,
+            colorscale=colorscale,
+            use_simple_downsample=use_simple_downsample,
+            use_streaming=use_streaming,
+            categorical_filters=categorical_filters,
             **kwargs
         )
@@ -211,6 +229,8 @@ class Heatmap(BaseComponent):
         render time, the resulting data has ~min_points regardless of the
         filter value selected.
+        Data is sorted by x, y columns for efficient range query predicate pushdown.
         Example: For im_dimension with values [0, 1, 2, 3], creates:
         - cat_level_im_dimension_0_0: 20K points with im_id=0
         - cat_level_im_dimension_0_1: 20K points with im_id=1
@@ -271,9 +291,8 @@ class Heatmap(BaseComponent):
                 # Store level sizes for this filter value
                 self._preprocessed_data[f'cat_level_sizes_{filter_id}_{filter_value}'] = level_sizes
-                self._preprocessed_data[f'cat_num_levels_{filter_id}_{filter_value}'] = len(level_sizes)
-                # Build each level
+                # Build each compressed level
                 for level_idx, target_size in enumerate(level_sizes):
                     # If target size equals total, skip downsampling - use all data
                     if target_size >= filtered_total:
@@ -297,15 +316,25 @@ class Heatmap(BaseComponent):
                             y_range=y_range,
                         )
-                    # Collect and store
+                    # Sort by x, y for efficient range query predicate pushdown
+                    level = level.sort([self._x_column, self._y_column])
+                    # Store LazyFrame for streaming to disk
                     level_key = f'cat_level_{filter_id}_{filter_value}_{level_idx}'
-                    self._preprocessed_data[level_key] = level.collect()
+                    self._preprocessed_data[level_key] = level  # Keep lazy
+                # Add full resolution as final level (for zoom fallback)
+                # Also sorted for consistent predicate pushdown behavior
+                num_compressed = len(level_sizes)
+                full_res_key = f'cat_level_{filter_id}_{filter_value}_{num_compressed}'
+                self._preprocessed_data[full_res_key] = filtered_data.sort(
+                    [self._x_column, self._y_column]
+                )
+                self._preprocessed_data[f'cat_num_levels_{filter_id}_{filter_value}'] = num_compressed + 1
         # Also create global levels for when no categorical filter is selected
         # (fallback to standard behavior)
         level_sizes = compute_compression_levels(self._min_points, total)
         self._preprocessed_data['level_sizes'] = level_sizes
-        self._preprocessed_data['num_levels'] = len(level_sizes)
         for i, size in enumerate(level_sizes):
             # If target size equals total, skip downsampling - use all data
@@ -329,13 +358,24 @@ class Heatmap(BaseComponent):
                     x_range=x_range,
                     y_range=y_range,
                 )
-            self._preprocessed_data[f'level_{i}'] = level.collect()
+            # Sort by x, y for efficient range query predicate pushdown
+            level = level.sort([self._x_column, self._y_column])
+            self._preprocessed_data[f'level_{i}'] = level  # Keep lazy
+        # Add full resolution as final level (for zoom fallback)
+        # Also sorted for consistent predicate pushdown behavior
+        num_compressed = len(level_sizes)
+        self._preprocessed_data[f'level_{num_compressed}'] = self._raw_data.sort(
+            [self._x_column, self._y_column]
+        )
+        self._preprocessed_data['num_levels'] = num_compressed + 1
     def _preprocess_streaming(self) -> None:
         """
-        Streaming preprocessing - levels stay lazy until render.
+        Streaming preprocessing - levels stay lazy through caching.
-        Builds lazy query plans and collects them for caching.
+        Builds lazy query plans that are streamed to disk via sink_parquet().
+        Data is sorted by x, y columns for efficient range query predicate pushdown.
         """
         # Get data ranges (minimal collect - just 4 values)
         x_range, y_range = get_data_range(
@@ -379,12 +419,22 @@ class Heatmap(BaseComponent):
                     x_range=x_range,
                     y_range=y_range,
                 )
-            # Collect and store as DataFrame for caching
-            # Base class will serialize these to parquet
-            self._preprocessed_data[f'level_{i}'] = level.collect()
+            # Sort by x, y for efficient range query predicate pushdown
+            # This clusters spatially close points together in row groups
+            level = level.sort([self._x_column, self._y_column])
+            # Store LazyFrame for streaming to disk
+            # Base class will use sink_parquet() to stream without full materialization
+            self._preprocessed_data[f'level_{i}'] = level  # Keep lazy
+        # Add full resolution as final level (for zoom fallback)
+        # Also sorted for consistent predicate pushdown behavior
+        num_compressed = len(level_sizes)
+        self._preprocessed_data[f'level_{num_compressed}'] = self._raw_data.sort(
+            [self._x_column, self._y_column]
+        )
-        # Store number of levels for reconstruction
-        self._preprocessed_data['num_levels'] = len(level_sizes)
+        # Store number of levels for reconstruction (includes full resolution)
+        self._preprocessed_data['num_levels'] = num_compressed + 1
     def _preprocess_eager(self) -> None:
         """
@@ -392,6 +442,7 @@ class Heatmap(BaseComponent):
         Uses more memory at init but faster rendering. Uses scipy-based
         downsampling for better spatial distribution.
+        Data is sorted by x, y columns for efficient range query predicate pushdown.
         """
         # Get data ranges
         x_range, y_range = get_data_range(
@@ -434,16 +485,29 @@ class Heatmap(BaseComponent):
                         x_bins=self._x_bins,
                         y_bins=self._y_bins,
                     )
-                # Collect for caching - store with reversed index
+                # Sort by x, y for efficient range query predicate pushdown
+                if isinstance(downsampled, pl.LazyFrame):
+                    downsampled = downsampled.sort([self._x_column, self._y_column])
+                else:
+                    downsampled = downsampled.sort([self._x_column, self._y_column])
+                # Store LazyFrame for streaming to disk
                 level_idx = len(level_sizes) - 1 - i
                 if isinstance(downsampled, pl.LazyFrame):
-                    self._preprocessed_data[f'level_{level_idx}'] = downsampled.collect()
+                    self._preprocessed_data[f'level_{level_idx}'] = downsampled  # Keep lazy
                 else:
-                    self._preprocessed_data[f'level_{level_idx}'] = downsampled
+                    # DataFrame from downsample_2d - convert back to lazy
+                    self._preprocessed_data[f'level_{level_idx}'] = downsampled.lazy()
                 current = downsampled
-        # Store number of levels for reconstruction
-        self._preprocessed_data['num_levels'] = len(level_sizes)
+        # Add full resolution as final level (for zoom fallback)
+        # Also sorted for consistent predicate pushdown behavior
+        num_compressed = len(level_sizes)
+        self._preprocessed_data[f'level_{num_compressed}'] = self._raw_data.sort(
+            [self._x_column, self._y_column]
+        )
+        # Store number of levels for reconstruction (includes full resolution)
+        self._preprocessed_data['num_levels'] = num_compressed + 1
     def _get_levels(self) -> list:
         """
@@ -460,10 +524,6 @@ class Heatmap(BaseComponent):
             if level_data is not None:
                 levels.append(level_data)
-        # Add full resolution at end (if raw data available)
-        if self._raw_data is not None:
-            levels.append(self._raw_data)
         return levels
     def _get_categorical_levels(
@@ -496,13 +556,7 @@ class Heatmap(BaseComponent):
             if level_data is not None:
                 levels.append(level_data)
-        # Get filtered raw data for full resolution (if available)
-        filtered_raw = None
-        if self._raw_data is not None and filter_id in self._filters:
-            column_name = self._filters[filter_id]
-            filtered_raw = self._raw_data.filter(pl.col(column_name) == filter_value)
-        return levels, filtered_raw
+        return levels, None  # Full resolution included in cached levels
     def _get_levels_for_state(self, state: Dict[str, Any]) -> Tuple[list, Optional[pl.LazyFrame]]:
         """
@@ -630,10 +684,11 @@ class Heatmap(BaseComponent):
             if count >= self._min_points:
                 # This level has enough detail
-                if count > self._min_points * 2:
-                    # Still too many - downsample further
-                    x_range = self._preprocessed_data.get('x_range')
-                    y_range = self._preprocessed_data.get('y_range')
+                if count > self._min_points:
+                    # Over limit - downsample to stay at/under max
+                    # Use ZOOM range for binning (not global) to avoid sparse bins
+                    zoom_x_range = (x0, x1)
+                    zoom_y_range = (y0, y1)
                     if self._use_streaming or self._use_simple_downsample:
                         if self._use_simple_downsample:
                             return downsample_2d_simple(
@@ -650,8 +705,8 @@ class Heatmap(BaseComponent):
                                 intensity_column=self._intensity_column,
                                 x_bins=self._x_bins,
                                 y_bins=self._y_bins,
-                                x_range=x_range,
-                                y_range=y_range,
+                                x_range=zoom_x_range,
+                                y_range=zoom_y_range,
                             ).collect()
                     else:
                         return downsample_2d(
@@ -744,7 +799,8 @@ class Heatmap(BaseComponent):
                 df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(drop=True)
             else:
                 # No filters to apply - levels already filtered by categorical filter
-                available_cols = [c for c in columns_to_select if c in data.columns]
+                schema_names = data.collect_schema().names()
+                available_cols = [c for c in columns_to_select if c in schema_names]
                 df_polars = data.select(available_cols).collect()
                 # Sort by intensity ascending so high-intensity points are drawn on top
                 df_polars = df_polars.sort(self._intensity_column)

{openms_insight-0.1.0 → openms_insight-0.1.2}/openms_insight/components/lineplot.py RENAMED Viewed

@@ -45,6 +45,7 @@ class LinePlot(BaseComponent):
         self,
         cache_id: str,
         data: Optional[pl.LazyFrame] = None,
+        data_path: Optional[str] = None,
         filters: Optional[Dict[str, str]] = None,
         filter_defaults: Optional[Dict[str, Any]] = None,
         interactivity: Optional[Dict[str, str]] = None,
@@ -68,6 +69,7 @@ class LinePlot(BaseComponent):
             cache_id: Unique identifier for this component's cache (MANDATORY).
                 Creates a folder {cache_path}/{cache_id}/ for cached data.
             data: Polars LazyFrame with plot data. Optional if cache exists.
+            data_path: Path to parquet file (preferred for large datasets).
             filters: Mapping of identifier names to column names for filtering.
                 Example: {'spectrum': 'scan_id'}
                 When 'spectrum' selection exists, plot shows only data where
@@ -116,11 +118,22 @@ class LinePlot(BaseComponent):
         super().__init__(
             cache_id=cache_id,
             data=data,
+            data_path=data_path,
             filters=filters,
             filter_defaults=filter_defaults,
             interactivity=interactivity,
             cache_path=cache_path,
             regenerate_cache=regenerate_cache,
+            # Pass component-specific params for subprocess recreation
+            x_column=x_column,
+            y_column=y_column,
+            title=title,
+            x_label=x_label,
+            y_label=y_label,
+            highlight_column=highlight_column,
+            annotation_column=annotation_column,
+            styling=styling,
+            config=config,
             **kwargs
         )
@@ -208,9 +221,9 @@ class LinePlot(BaseComponent):
             'annotation_column': self._annotation_column,
         }
-        # Collect data for caching (filter happens at render time)
-        # Base class will serialize this to parquet
-        self._preprocessed_data['data'] = data.collect()
+        # Store LazyFrame for streaming to disk (filter happens at render time)
+        # Base class will use sink_parquet() to stream without full materialization
+        self._preprocessed_data['data'] = data  # Keep lazy
     def _get_vue_component_name(self) -> str:
         """Return the Vue component name."""

{openms_insight-0.1.0 → openms_insight-0.1.2}/openms_insight/components/table.py RENAMED Viewed

@@ -49,6 +49,7 @@ class Table(BaseComponent):
         self,
         cache_id: str,
         data: Optional[pl.LazyFrame] = None,
+        data_path: Optional[str] = None,
         filters: Optional[Dict[str, str]] = None,
         filter_defaults: Optional[Dict[str, Any]] = None,
         interactivity: Optional[Dict[str, str]] = None,
@@ -72,6 +73,7 @@ class Table(BaseComponent):
             cache_id: Unique identifier for this component's cache (MANDATORY).
                 Creates a folder {cache_path}/{cache_id}/ for cached data.
             data: Polars LazyFrame with table data. Optional if cache exists.
+            data_path: Path to parquet file (preferred for large datasets).
             filters: Mapping of identifier names to column names for filtering.
                 Example: {'spectrum': 'scan_id'}
                 When 'spectrum' selection exists, table shows only rows where
@@ -120,11 +122,22 @@ class Table(BaseComponent):
         super().__init__(
             cache_id=cache_id,
             data=data,
+            data_path=data_path,
             filters=filters,
             filter_defaults=filter_defaults,
             interactivity=interactivity,
             cache_path=cache_path,
             regenerate_cache=regenerate_cache,
+            # Pass component-specific params for subprocess recreation
+            column_definitions=column_definitions,
+            title=title,
+            index_field=index_field,
+            go_to_fields=go_to_fields,
+            layout=layout,
+            default_row=default_row,
+            initial_sort=initial_sort,
+            pagination=pagination,
+            page_size=page_size,
             **kwargs
         )
@@ -204,9 +217,9 @@ class Table(BaseComponent):
         # Store column definitions in preprocessed data for serialization
         self._preprocessed_data['column_definitions'] = self._column_definitions
-        # Collect data for caching (filter happens at render time)
-        # Base class will serialize this to parquet with optimized row groups
-        self._preprocessed_data['data'] = data.collect()
+        # Store LazyFrame for streaming to disk (filter happens at render time)
+        # Base class will use sink_parquet() to stream without full materialization
+        self._preprocessed_data['data'] = data  # Keep lazy
     def _get_columns_to_select(self) -> Optional[List[str]]:
         """Get list of columns needed for this table."""

openms-insight 0.1.0__tar.gz → 0.1.2__tar.gz

openms-insight 0.1.0tar.gz → 0.1.2tar.gz