PyPI - openms-insight - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

openms-insight 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

openms_insight/__init__.py +3 -1
openms_insight/components/__init__.py +2 -0
openms_insight/components/heatmap.py +348 -151
openms_insight/components/volcanoplot.py +374 -0
openms_insight/core/base.py +32 -10
openms_insight/js-component/dist/assets/index.css +1 -1
openms_insight/js-component/dist/assets/index.js +137 -128
openms_insight/preprocessing/__init__.py +6 -0
openms_insight/preprocessing/compression.py +55 -1
openms_insight/preprocessing/scatter.py +136 -0
openms_insight/rendering/bridge.py +23 -4
{openms_insight-0.1.3.dist-info → openms_insight-0.1.5.dist-info}/METADATA +101 -2
{openms_insight-0.1.3.dist-info → openms_insight-0.1.5.dist-info}/RECORD +15 -13
{openms_insight-0.1.3.dist-info → openms_insight-0.1.5.dist-info}/WHEEL +0 -0
{openms_insight-0.1.3.dist-info → openms_insight-0.1.5.dist-info}/licenses/LICENSE +0 -0

openms_insight/components/heatmap.py CHANGED Viewed

@@ -8,6 +8,7 @@ from ..core.base import BaseComponent
 from ..core.registry import register_component
 from ..preprocessing.compression import (
     compute_compression_levels,
+    compute_optimal_bins,
     downsample_2d,
     downsample_2d_simple,
     downsample_2d_streaming,
@@ -76,17 +77,23 @@ class Heatmap(BaseComponent):
         interactivity: Optional[Dict[str, str]] = None,
         cache_path: str = ".",
         regenerate_cache: bool = False,
-        min_points: int = 20000,
-        x_bins: int = 400,
-        y_bins: int = 50,
+        min_points: int = 10000,
+        display_aspect_ratio: float = 16 / 9,
+        x_bins: Optional[int] = None,
+        y_bins: Optional[int] = None,
         zoom_identifier: str = "heatmap_zoom",
         title: Optional[str] = None,
         x_label: Optional[str] = None,
         y_label: Optional[str] = None,
         colorscale: str = "Portland",
+        reversescale: bool = False,
         use_simple_downsample: bool = False,
         use_streaming: bool = True,
         categorical_filters: Optional[List[str]] = None,
+        category_column: Optional[str] = None,
+        category_colors: Optional[Dict[str, str]] = None,
+        log_scale: bool = True,
+        intensity_label: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -106,10 +113,17 @@ class Heatmap(BaseComponent):
                 point's value in the corresponding column.
             cache_path: Base path for cache storage. Default "." (current dir).
             regenerate_cache: If True, regenerate cache even if valid cache exists.
-            min_points: Target size for smallest compression level and
-                threshold for level selection (default: 20000)
-            x_bins: Number of bins along x-axis for downsampling (default: 400)
-            y_bins: Number of bins along y-axis for downsampling (default: 50)
+            min_points: Target number of points to display (default: 10000).
+                Cache levels are built at 2× this value; final downsample
+                at render time reduces to exactly min_points.
+            display_aspect_ratio: Expected display width/height ratio for
+                optimal bin computation during caching (default: 16/9).
+                At render time, the actual zoom region's aspect ratio is used.
+            x_bins: Number of bins along x-axis for downsampling. If None
+                (default), auto-computed from display_aspect_ratio such that
+                x_bins × y_bins ≈ 2×min_points with even spatial distribution.
+            y_bins: Number of bins along y-axis for downsampling. If None
+                (default), auto-computed from display_aspect_ratio.
             zoom_identifier: State key for storing zoom range (default: 'heatmap_zoom')
             title: Heatmap title displayed above the plot
             x_label: X-axis label (defaults to x_column)
@@ -124,12 +138,25 @@ class Heatmap(BaseComponent):
                 are sent to the client regardless of filter selection. Should be
                 used for filters with a small number of unique values (<20).
                 Example: ['im_dimension'] for ion mobility filtering.
+            category_column: Optional column name for categorical coloring.
+                When provided, points are colored by discrete category values
+                instead of the continuous intensity colorscale. Useful for
+                condition-based heatmaps (e.g., coloring by sample group).
+            category_colors: Optional mapping of category values to colors.
+                Keys should match values in category_column.
+                Values should be CSS color strings (e.g., '#FF0000', 'red').
+                If not provided, default Plotly colors will be used.
+            log_scale: If True (default), apply log10 transformation to intensity
+                values for color mapping. Set to False for linear color mapping.
+            intensity_label: Custom label for the colorbar. Default is "Intensity".
+                Useful when displaying non-intensity values like scores or counts.
             **kwargs: Additional configuration options
         """
         self._x_column = x_column
         self._y_column = y_column
         self._intensity_column = intensity_column
         self._min_points = min_points
+        self._display_aspect_ratio = display_aspect_ratio
         self._x_bins = x_bins
         self._y_bins = y_bins
         self._zoom_identifier = zoom_identifier
@@ -137,7 +164,12 @@ class Heatmap(BaseComponent):
         self._x_label = x_label or x_column
         self._y_label = y_label or y_column
         self._colorscale = colorscale
+        self._reversescale = reversescale
         self._use_simple_downsample = use_simple_downsample
+        self._category_column = category_column
+        self._category_colors = category_colors or {}
+        self._log_scale = log_scale
+        self._intensity_label = intensity_label
         self._use_streaming = use_streaming
         self._categorical_filters = categorical_filters or []
@@ -155,6 +187,7 @@ class Heatmap(BaseComponent):
             y_column=y_column,
             intensity_column=intensity_column,
             min_points=min_points,
+            display_aspect_ratio=display_aspect_ratio,
             x_bins=x_bins,
             y_bins=y_bins,
             zoom_identifier=zoom_identifier,
@@ -165,6 +198,8 @@ class Heatmap(BaseComponent):
             use_simple_downsample=use_simple_downsample,
             use_streaming=use_streaming,
             categorical_filters=categorical_filters,
+            category_column=category_column,
+            category_colors=category_colors,
             **kwargs,
         )
@@ -180,6 +215,7 @@ class Heatmap(BaseComponent):
             "y_column": self._y_column,
             "intensity_column": self._intensity_column,
             "min_points": self._min_points,
+            "display_aspect_ratio": self._display_aspect_ratio,
             "x_bins": self._x_bins,
             "y_bins": self._y_bins,
             "use_simple_downsample": self._use_simple_downsample,
@@ -190,6 +226,10 @@ class Heatmap(BaseComponent):
             "x_label": self._x_label,
             "y_label": self._y_label,
             "colorscale": self._colorscale,
+            "category_column": self._category_column,
+            "log_scale": self._log_scale,
+            "intensity_label": self._intensity_label,
+            # Note: category_colors is render-time styling, doesn't affect cache
         }
     def _restore_cache_config(self, config: Dict[str, Any]) -> None:
@@ -197,7 +237,10 @@ class Heatmap(BaseComponent):
         self._x_column = config.get("x_column")
         self._y_column = config.get("y_column")
         self._intensity_column = config.get("intensity_column", "intensity")
-        self._min_points = config.get("min_points", 20000)
+        self._min_points = config.get("min_points", 10000)
+        self._display_aspect_ratio = config.get("display_aspect_ratio", 16 / 9)
+        # x_bins/y_bins are computed during preprocessing and stored in cache
+        # Fallback to old defaults for backward compatibility with old caches
         self._x_bins = config.get("x_bins", 400)
         self._y_bins = config.get("y_bins", 50)
         self._use_simple_downsample = config.get("use_simple_downsample", False)
@@ -208,6 +251,10 @@ class Heatmap(BaseComponent):
         self._x_label = config.get("x_label", self._x_column)
         self._y_label = config.get("y_label", self._y_column)
         self._colorscale = config.get("colorscale", "Portland")
+        self._category_column = config.get("category_column")
+        self._log_scale = config.get("log_scale", True)
+        self._intensity_label = config.get("intensity_label")
+        # category_colors is not stored in cache (render-time styling)
     def get_state_dependencies(self) -> list:
         """
@@ -242,14 +289,116 @@ class Heatmap(BaseComponent):
         else:
             self._preprocess_eager()
+    def _build_cascading_levels(
+        self,
+        source_data: pl.LazyFrame,
+        level_sizes: list,
+        x_range: tuple,
+        y_range: tuple,
+        cache_dir,
+        prefix: str = "level",
+    ) -> dict:
+        """
+        Build cascading compression levels from source data.
+        Each level is built from the previous larger level rather than from
+        raw data. This is efficient (raw data read once) and produces identical
+        results because the downsampling keeps top N highest-intensity points
+        per bin - points surviving at larger levels will also be selected at
+        smaller levels.
+        Args:
+            source_data: LazyFrame with raw/filtered data
+            level_sizes: List of target sizes for compressed levels (smallest first)
+            x_range: (x_min, x_max) for consistent bin boundaries
+            y_range: (y_min, y_max) for consistent bin boundaries
+            cache_dir: Path to save parquet files
+            prefix: Filename prefix (e.g., "level" or "cat_level_im_0")
+        Returns:
+            Dict with level LazyFrames keyed by "{prefix}_{idx}" and "num_levels"
+        """
+        import sys
+        result = {}
+        num_compressed = len(level_sizes)
+        # Get total count
+        total = source_data.select(pl.len()).collect().item()
+        # First: save full resolution as the largest level
+        full_res_path = cache_dir / f"{prefix}_{num_compressed}.parquet"
+        full_res = source_data.sort([self._x_column, self._y_column])
+        full_res.sink_parquet(full_res_path, compression="zstd")
+        print(
+            f"[HEATMAP] Saved {prefix}_{num_compressed} ({total:,} pts)",
+            file=sys.stderr,
+        )
+        # Start cascading from full resolution
+        current_source = pl.scan_parquet(full_res_path)
+        current_size = total
+        # Build compressed levels from largest to smallest
+        for i, target_size in enumerate(reversed(level_sizes)):
+            level_idx = num_compressed - 1 - i
+            level_path = cache_dir / f"{prefix}_{level_idx}.parquet"
+            # If target size equals or exceeds current, just copy reference
+            if target_size >= current_size:
+                level = current_source
+            elif self._use_simple_downsample:
+                level = downsample_2d_simple(
+                    current_source,
+                    max_points=target_size,
+                    intensity_column=self._intensity_column,
+                )
+            else:
+                level = downsample_2d_streaming(
+                    current_source,
+                    max_points=target_size,
+                    x_column=self._x_column,
+                    y_column=self._y_column,
+                    intensity_column=self._intensity_column,
+                    x_bins=self._x_bins,
+                    y_bins=self._y_bins,
+                    x_range=x_range,
+                    y_range=y_range,
+                )
+            # Sort and save immediately
+            level = level.sort([self._x_column, self._y_column])
+            level.sink_parquet(level_path, compression="zstd")
+            print(
+                f"[HEATMAP] Saved {prefix}_{level_idx} (target {target_size:,} pts)",
+                file=sys.stderr,
+            )
+            # Next iteration uses this level as source (cascading)
+            current_source = pl.scan_parquet(level_path)
+            current_size = target_size
+        # Load all levels back as LazyFrames
+        for i in range(num_compressed + 1):
+            level_path = cache_dir / f"{prefix}_{i}.parquet"
+            result[f"{prefix}_{i}"] = pl.scan_parquet(level_path)
+        result["num_levels"] = num_compressed + 1
+        return result
     def _preprocess_with_categorical_filters(self) -> None:
         """
-        Preprocess with per-filter-value compression levels.
+        Preprocess with per-filter-value compression levels using cascading.
         For each unique value of each categorical filter, creates separate
-        compression levels. This ensures that when a filter is applied at
-        render time, the resulting data has ~min_points regardless of the
-        filter value selected.
+        compression levels using cascading (building smaller levels from larger).
+        This ensures that when a filter is applied at render time, the resulting
+        data has ~min_points regardless of the filter value selected.
+        Uses cascading downsampling for efficiency - each level is built from
+        the previous larger level rather than from raw data.
         Data is sorted by x, y columns for efficient range query predicate pushdown.
@@ -261,6 +410,7 @@ class Heatmap(BaseComponent):
         import sys
         # Get data ranges (for the full dataset)
+        # These ranges are used for ALL levels to ensure consistent binning
         x_range, y_range = get_data_range(
             self._raw_data,
             self._x_column,
@@ -269,10 +419,31 @@ class Heatmap(BaseComponent):
         self._preprocessed_data["x_range"] = x_range
         self._preprocessed_data["y_range"] = y_range
+        # Compute optimal bins if not provided
+        # Cache at 2×min_points, use display_aspect_ratio for bin computation
+        cache_target = 2 * self._min_points
+        if self._x_bins is None or self._y_bins is None:
+            # Use display aspect ratio (not data aspect ratio) for optimal bins
+            self._x_bins, self._y_bins = compute_optimal_bins(
+                cache_target,
+                (0, self._display_aspect_ratio),  # Fake x_range matching aspect
+                (0, 1.0),  # Fake y_range
+            )
+            print(
+                f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
+                f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
+                f"display aspect: {self._display_aspect_ratio:.2f})",
+                file=sys.stderr,
+            )
         # Get total count
         total = self._raw_data.select(pl.len()).collect().item()
         self._preprocessed_data["total"] = total
+        # Create cache directory for immediate level saving
+        cache_dir = self._cache_dir / "preprocessed"
+        cache_dir.mkdir(parents=True, exist_ok=True)
         # Store metadata about categorical filters
         self._preprocessed_data["has_categorical_filters"] = True
         self._preprocessed_data["categorical_filter_values"] = {}
@@ -309,7 +480,7 @@ class Heatmap(BaseComponent):
                 unique_values
             )
-            # Create compression levels for each filter value
+            # Create compression levels for each filter value using cascading
             for filter_value in unique_values:
                 # Filter data to this value
                 filtered_data = self._raw_data.filter(
@@ -317,10 +488,8 @@ class Heatmap(BaseComponent):
                 )
                 filtered_total = filtered_data.select(pl.len()).collect().item()
-                # Compute level sizes for this filtered subset
-                level_sizes = compute_compression_levels(
-                    self._min_points, filtered_total
-                )
+                # Compute level sizes for this filtered subset (2× for cache buffer)
+                level_sizes = compute_compression_levels(cache_target, filtered_total)
                 print(
                     f"[HEATMAP]   Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}",
@@ -332,94 +501,71 @@ class Heatmap(BaseComponent):
                     f"cat_level_sizes_{filter_id}_{filter_value}"
                 ] = level_sizes
-                # Build each compressed level
-                for level_idx, target_size in enumerate(level_sizes):
-                    # If target size equals total, skip downsampling - use all data
-                    if target_size >= filtered_total:
-                        level = filtered_data
-                    elif self._use_simple_downsample:
-                        level = downsample_2d_simple(
-                            filtered_data,
-                            max_points=target_size,
-                            intensity_column=self._intensity_column,
-                        )
-                    else:
-                        level = downsample_2d_streaming(
-                            filtered_data,
-                            max_points=target_size,
-                            x_column=self._x_column,
-                            y_column=self._y_column,
-                            intensity_column=self._intensity_column,
-                            x_bins=self._x_bins,
-                            y_bins=self._y_bins,
-                            x_range=x_range,
-                            y_range=y_range,
-                        )
-                    # Sort by x, y for efficient range query predicate pushdown
-                    level = level.sort([self._x_column, self._y_column])
-                    # Store LazyFrame for streaming to disk
-                    level_key = f"cat_level_{filter_id}_{filter_value}_{level_idx}"
-                    self._preprocessed_data[level_key] = level  # Keep lazy
-                # Add full resolution as final level (for zoom fallback)
-                # Also sorted for consistent predicate pushdown behavior
-                num_compressed = len(level_sizes)
-                full_res_key = f"cat_level_{filter_id}_{filter_value}_{num_compressed}"
-                self._preprocessed_data[full_res_key] = filtered_data.sort(
-                    [self._x_column, self._y_column]
+                # Build cascading levels using helper
+                prefix = f"cat_level_{filter_id}_{filter_value}"
+                levels_result = self._build_cascading_levels(
+                    source_data=filtered_data,
+                    level_sizes=level_sizes,
+                    x_range=x_range,
+                    y_range=y_range,
+                    cache_dir=cache_dir,
+                    prefix=prefix,
                 )
-                self._preprocessed_data[
-                    f"cat_num_levels_{filter_id}_{filter_value}"
-                ] = num_compressed + 1
+                # Copy results to preprocessed_data
+                for key, value in levels_result.items():
+                    if key == "num_levels":
+                        self._preprocessed_data[
+                            f"cat_num_levels_{filter_id}_{filter_value}"
+                        ] = value
+                    else:
+                        self._preprocessed_data[key] = value
         # Also create global levels for when no categorical filter is selected
-        # (fallback to standard behavior)
-        level_sizes = compute_compression_levels(self._min_points, total)
+        # (fallback to standard behavior) - using cascading with 2× cache buffer
+        level_sizes = compute_compression_levels(cache_target, total)
         self._preprocessed_data["level_sizes"] = level_sizes
-        for i, size in enumerate(level_sizes):
-            # If target size equals total, skip downsampling - use all data
-            if size >= total:
-                level = self._raw_data
-            elif self._use_simple_downsample:
-                level = downsample_2d_simple(
-                    self._raw_data,
-                    max_points=size,
-                    intensity_column=self._intensity_column,
-                )
+        # Build global cascading levels using helper
+        levels_result = self._build_cascading_levels(
+            source_data=self._raw_data,
+            level_sizes=level_sizes,
+            x_range=x_range,
+            y_range=y_range,
+            cache_dir=cache_dir,
+            prefix="level",
+        )
+        # Copy results to preprocessed_data
+        for key, value in levels_result.items():
+            if key == "num_levels":
+                self._preprocessed_data["num_levels"] = value
             else:
-                level = downsample_2d_streaming(
-                    self._raw_data,
-                    max_points=size,
-                    x_column=self._x_column,
-                    y_column=self._y_column,
-                    intensity_column=self._intensity_column,
-                    x_bins=self._x_bins,
-                    y_bins=self._y_bins,
-                    x_range=x_range,
-                    y_range=y_range,
-                )
-            # Sort by x, y for efficient range query predicate pushdown
-            level = level.sort([self._x_column, self._y_column])
-            self._preprocessed_data[f"level_{i}"] = level  # Keep lazy
+                self._preprocessed_data[key] = value
-        # Add full resolution as final level (for zoom fallback)
-        # Also sorted for consistent predicate pushdown behavior
-        num_compressed = len(level_sizes)
-        self._preprocessed_data[f"level_{num_compressed}"] = self._raw_data.sort(
-            [self._x_column, self._y_column]
-        )
-        self._preprocessed_data["num_levels"] = num_compressed + 1
+        # Mark that files are already saved
+        self._preprocessed_data["_files_already_saved"] = True
     def _preprocess_streaming(self) -> None:
         """
-        Streaming preprocessing - levels stay lazy through caching.
+        Streaming preprocessing with cascading - builds smaller levels from larger.
+        Uses cascading downsampling: each level is built from the previous larger
+        level rather than from raw data. This is more efficient (raw data read once)
+        and produces identical results because the downsampling algorithm keeps
+        the TOP N highest-intensity points per bin - points that survive at a larger
+        level will also be selected at smaller levels.
+        Levels are saved to disk immediately after creation, then read back as the
+        source for the next smaller level. This keeps memory low while enabling
+        cascading.
-        Builds lazy query plans that are streamed to disk via sink_parquet().
         Data is sorted by x, y columns for efficient range query predicate pushdown.
         """
+        import sys
         # Get data ranges (minimal collect - just 4 values)
+        # These ranges are used for ALL levels to ensure consistent binning
         x_range, y_range = get_data_range(
             self._raw_data,
             self._x_column,
@@ -428,55 +574,55 @@ class Heatmap(BaseComponent):
         self._preprocessed_data["x_range"] = x_range
         self._preprocessed_data["y_range"] = y_range
+        # Compute optimal bins if not provided
+        # Cache at 2×min_points, use display_aspect_ratio for bin computation
+        cache_target = 2 * self._min_points
+        if self._x_bins is None or self._y_bins is None:
+            # Use display aspect ratio (not data aspect ratio) for optimal bins
+            # This ensures even distribution in the expected display dimensions
+            self._x_bins, self._y_bins = compute_optimal_bins(
+                cache_target,
+                (0, self._display_aspect_ratio),  # Fake x_range matching aspect
+                (0, 1.0),  # Fake y_range
+            )
+            print(
+                f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
+                f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
+                f"display aspect: {self._display_aspect_ratio:.2f})",
+                file=sys.stderr,
+            )
         # Get total count
         total = self._raw_data.select(pl.len()).collect().item()
         self._preprocessed_data["total"] = total
-        # Compute target sizes for levels
-        level_sizes = compute_compression_levels(self._min_points, total)
+        # Compute target sizes for levels (use 2×min_points for smallest cache level)
+        level_sizes = compute_compression_levels(cache_target, total)
         self._preprocessed_data["level_sizes"] = level_sizes
-        # Build and collect each level
-        self._preprocessed_data["levels"] = []
+        # Create cache directory for immediate level saving
+        cache_dir = self._cache_dir / "preprocessed"
+        cache_dir.mkdir(parents=True, exist_ok=True)
+        # Build cascading levels using helper
+        levels_result = self._build_cascading_levels(
+            source_data=self._raw_data,
+            level_sizes=level_sizes,
+            x_range=x_range,
+            y_range=y_range,
+            cache_dir=cache_dir,
+            prefix="level",
+        )
-        for i, size in enumerate(level_sizes):
-            # If target size equals total, skip downsampling - use all data
-            if size >= total:
-                level = self._raw_data
-            elif self._use_simple_downsample:
-                level = downsample_2d_simple(
-                    self._raw_data,
-                    max_points=size,
-                    intensity_column=self._intensity_column,
-                )
+        # Copy results to preprocessed_data
+        for key, value in levels_result.items():
+            if key == "num_levels":
+                self._preprocessed_data["num_levels"] = value
             else:
-                level = downsample_2d_streaming(
-                    self._raw_data,
-                    max_points=size,
-                    x_column=self._x_column,
-                    y_column=self._y_column,
-                    intensity_column=self._intensity_column,
-                    x_bins=self._x_bins,
-                    y_bins=self._y_bins,
-                    x_range=x_range,
-                    y_range=y_range,
-                )
-            # Sort by x, y for efficient range query predicate pushdown
-            # This clusters spatially close points together in row groups
-            level = level.sort([self._x_column, self._y_column])
-            # Store LazyFrame for streaming to disk
-            # Base class will use sink_parquet() to stream without full materialization
-            self._preprocessed_data[f"level_{i}"] = level  # Keep lazy
+                self._preprocessed_data[key] = value
-        # Add full resolution as final level (for zoom fallback)
-        # Also sorted for consistent predicate pushdown behavior
-        num_compressed = len(level_sizes)
-        self._preprocessed_data[f"level_{num_compressed}"] = self._raw_data.sort(
-            [self._x_column, self._y_column]
-        )
-        # Store number of levels for reconstruction (includes full resolution)
-        self._preprocessed_data["num_levels"] = num_compressed + 1
+        # Mark that files are already saved (base class should skip saving)
+        self._preprocessed_data["_files_already_saved"] = True
     def _preprocess_eager(self) -> None:
         """
@@ -486,6 +632,8 @@ class Heatmap(BaseComponent):
         downsampling for better spatial distribution.
         Data is sorted by x, y columns for efficient range query predicate pushdown.
         """
+        import sys
         # Get data ranges
         x_range, y_range = get_data_range(
             self._raw_data,
@@ -495,12 +643,29 @@ class Heatmap(BaseComponent):
         self._preprocessed_data["x_range"] = x_range
         self._preprocessed_data["y_range"] = y_range
+        # Compute optimal bins if not provided
+        # Cache at 2×min_points, use display_aspect_ratio for bin computation
+        cache_target = 2 * self._min_points
+        if self._x_bins is None or self._y_bins is None:
+            # Use display aspect ratio (not data aspect ratio) for optimal bins
+            self._x_bins, self._y_bins = compute_optimal_bins(
+                cache_target,
+                (0, self._display_aspect_ratio),  # Fake x_range matching aspect
+                (0, 1.0),  # Fake y_range
+            )
+            print(
+                f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
+                f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
+                f"display aspect: {self._display_aspect_ratio:.2f})",
+                file=sys.stderr,
+            )
         # Get total count
         total = self._raw_data.select(pl.len()).collect().item()
         self._preprocessed_data["total"] = total
-        # Compute compression level target sizes
-        level_sizes = compute_compression_levels(self._min_points, total)
+        # Compute compression level target sizes (2× for cache buffer)
+        level_sizes = compute_compression_levels(cache_target, total)
         self._preprocessed_data["level_sizes"] = level_sizes
         # Build levels from largest to smallest
@@ -736,10 +901,18 @@ class Heatmap(BaseComponent):
             if count >= self._min_points:
                 # This level has enough detail
                 if count > self._min_points:
-                    # Over limit - downsample to stay at/under max
-                    # Use ZOOM range for binning (not global) to avoid sparse bins
+                    # Over limit - downsample to exactly min_points
+                    # Compute optimal bins from ACTUAL zoom region aspect ratio
                     zoom_x_range = (x0, x1)
                     zoom_y_range = (y0, y1)
+                    render_x_bins, render_y_bins = compute_optimal_bins(
+                        self._min_points, zoom_x_range, zoom_y_range
+                    )
+                    print(
+                        f"[HEATMAP] Render downsample: {count:,} → {self._min_points:,} pts "
+                        f"(bins: {render_x_bins}x{render_y_bins})",
+                        file=sys.stderr,
+                    )
                     if self._use_streaming or self._use_simple_downsample:
                         if self._use_simple_downsample:
                             return downsample_2d_simple(
@@ -754,8 +927,8 @@ class Heatmap(BaseComponent):
                                 x_column=self._x_column,
                                 y_column=self._y_column,
                                 intensity_column=self._intensity_column,
-                                x_bins=self._x_bins,
-                                y_bins=self._y_bins,
+                                x_bins=render_x_bins,
+                                y_bins=render_y_bins,
                                 x_range=zoom_x_range,
                                 y_range=zoom_y_range,
                             ).collect()
@@ -766,8 +939,8 @@ class Heatmap(BaseComponent):
                             x_column=self._x_column,
                             y_column=self._y_column,
                             intensity_column=self._intensity_column,
-                            x_bins=self._x_bins,
-                            y_bins=self._y_bins,
+                            x_bins=render_x_bins,
+                            y_bins=render_y_bins,
                         ).collect()
                 return filtered
@@ -794,12 +967,15 @@ class Heatmap(BaseComponent):
         zoom = state.get(self._zoom_identifier)
-        # Build columns to select
+        # Build columns to select (filter out None values)
         columns_to_select = [
-            self._x_column,
-            self._y_column,
-            self._intensity_column,
+            col
+            for col in [self._x_column, self._y_column, self._intensity_column]
+            if col is not None
         ]
+        # Include category column if specified
+        if self._category_column and self._category_column not in columns_to_select:
+            columns_to_select.append(self._category_column)
         # Include columns needed for interactivity
         if self._interactivity:
             for col in self._interactivity.values():
@@ -852,17 +1028,25 @@ class Heatmap(BaseComponent):
                     columns=columns_to_select,
                     filter_defaults=self._filter_defaults,
                 )
-                # Sort by intensity ascending so high-intensity points are drawn on top
-                df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(
-                    drop=True
-                )
+                # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
+                if (
+                    self._intensity_column
+                    and self._intensity_column in df_pandas.columns
+                ):
+                    df_pandas = df_pandas.sort_values(
+                        self._intensity_column, ascending=True
+                    ).reset_index(drop=True)
             else:
                 # No filters to apply - levels already filtered by categorical filter
                 schema_names = data.collect_schema().names()
                 available_cols = [c for c in columns_to_select if c in schema_names]
                 df_polars = data.select(available_cols).collect()
-                # Sort by intensity ascending so high-intensity points are drawn on top
-                df_polars = df_polars.sort(self._intensity_column)
+                # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
+                if (
+                    self._intensity_column
+                    and self._intensity_column in df_polars.columns
+                ):
+                    df_polars = df_polars.sort(self._intensity_column)
                 data_hash = compute_dataframe_hash(df_polars)
                 df_pandas = df_polars.to_pandas()
         else:
@@ -874,8 +1058,9 @@ class Heatmap(BaseComponent):
             # Select only needed columns
             available_cols = [c for c in columns_to_select if c in df_polars.columns]
             df_polars = df_polars.select(available_cols)
-            # Sort by intensity ascending so high-intensity points are drawn on top
-            df_polars = df_polars.sort(self._intensity_column)
+            # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
+            if self._intensity_column and self._intensity_column in df_polars.columns:
+                df_polars = df_polars.sort(self._intensity_column)
             print(
                 f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}",
                 file=sys.stderr,
@@ -903,6 +1088,7 @@ class Heatmap(BaseComponent):
             "xLabel": self._x_label,
             "yLabel": self._y_label,
             "colorscale": self._colorscale,
+            "reversescale": self._reversescale,
             "zoomIdentifier": self._zoom_identifier,
             "interactivity": self._interactivity,
         }
@@ -910,6 +1096,17 @@ class Heatmap(BaseComponent):
         if self._title:
             args["title"] = self._title
+        # Add category column configuration for categorical coloring mode
+        if self._category_column:
+            args["categoryColumn"] = self._category_column
+            if self._category_colors:
+                args["categoryColors"] = self._category_colors
+        # Add log scale and intensity label configuration
+        args["logScale"] = self._log_scale
+        if self._intensity_label:
+            args["intensityLabel"] = self._intensity_label
         # Add any extra config options
         args.update(self._config)

openms-insight 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

openms-insight 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl