openms-insight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,823 @@
1
+ """Heatmap component using Plotly scattergl."""
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import polars as pl
6
+
7
+ from ..core.base import BaseComponent
8
+ from ..core.registry import register_component
9
+ from ..preprocessing.compression import (
10
+ compute_compression_levels,
11
+ downsample_2d,
12
+ downsample_2d_simple,
13
+ downsample_2d_streaming,
14
+ get_data_range,
15
+ )
16
+ from ..preprocessing.filtering import compute_dataframe_hash, filter_and_collect_cached
17
+
18
+
19
+ # Cache key only includes zoom state (not other selections)
20
+ def _make_zoom_cache_key(zoom: Optional[Dict[str, Any]]) -> tuple:
21
+ """Create hashable cache key from zoom state."""
22
+ if zoom is None:
23
+ return (None,)
24
+ return (
25
+ ('x0', zoom.get('xRange', [-1, -1])[0]),
26
+ ('x1', zoom.get('xRange', [-1, -1])[1]),
27
+ ('y0', zoom.get('yRange', [-1, -1])[0]),
28
+ ('y1', zoom.get('yRange', [-1, -1])[1]),
29
+ )
30
+
31
+
32
+ @register_component("heatmap")
33
+ class Heatmap(BaseComponent):
34
+ """
35
+ Interactive 2D scatter/heatmap component using Plotly scattergl.
36
+
37
+ Designed for large datasets (millions of points) using multi-resolution
38
+ preprocessing with zoom-based level selection. Points are colored by
39
+ intensity using a log-scale colormap.
40
+
41
+ Features:
42
+ - Multi-resolution downsampling for large datasets
43
+ - Zoom-based automatic level selection
44
+ - Click-to-select with cross-component linking
45
+ - Log-scale intensity colormap
46
+ - SVG export
47
+
48
+ Example:
49
+ heatmap = Heatmap(
50
+ cache_id="peaks_heatmap",
51
+ data=peaks_df,
52
+ x_column='retention_time',
53
+ y_column='mass',
54
+ intensity_column='intensity',
55
+ interactivity={
56
+ 'spectrum': 'scan_id',
57
+ 'peak': 'mass',
58
+ },
59
+ title="Peak Heatmap",
60
+ )
61
+ heatmap(state_manager=state_manager)
62
+ """
63
+
64
+ _component_type: str = "heatmap"
65
+
66
+ def __init__(
67
+ self,
68
+ cache_id: str,
69
+ x_column: str,
70
+ y_column: str,
71
+ data: Optional[pl.LazyFrame] = None,
72
+ intensity_column: str = 'intensity',
73
+ filters: Optional[Dict[str, str]] = None,
74
+ filter_defaults: Optional[Dict[str, Any]] = None,
75
+ interactivity: Optional[Dict[str, str]] = None,
76
+ cache_path: str = ".",
77
+ regenerate_cache: bool = False,
78
+ min_points: int = 20000,
79
+ x_bins: int = 400,
80
+ y_bins: int = 50,
81
+ zoom_identifier: str = 'heatmap_zoom',
82
+ title: Optional[str] = None,
83
+ x_label: Optional[str] = None,
84
+ y_label: Optional[str] = None,
85
+ colorscale: str = 'Portland',
86
+ use_simple_downsample: bool = False,
87
+ use_streaming: bool = True,
88
+ categorical_filters: Optional[List[str]] = None,
89
+ **kwargs
90
+ ):
91
+ """
92
+ Initialize the Heatmap component.
93
+
94
+ Args:
95
+ cache_id: Unique identifier for this component's cache (MANDATORY).
96
+ Creates a folder {cache_path}/{cache_id}/ for cached data.
97
+ x_column: Name of column for x-axis values
98
+ y_column: Name of column for y-axis values
99
+ data: Polars LazyFrame with heatmap data. Optional if cache exists.
100
+ intensity_column: Name of column for intensity/color values
101
+ filters: Mapping of identifier names to column names for filtering
102
+ interactivity: Mapping of identifier names to column names for clicks.
103
+ When a point is clicked, sets each identifier to the clicked
104
+ point's value in the corresponding column.
105
+ cache_path: Base path for cache storage. Default "." (current dir).
106
+ regenerate_cache: If True, regenerate cache even if valid cache exists.
107
+ min_points: Target size for smallest compression level and
108
+ threshold for level selection (default: 20000)
109
+ x_bins: Number of bins along x-axis for downsampling (default: 400)
110
+ y_bins: Number of bins along y-axis for downsampling (default: 50)
111
+ zoom_identifier: State key for storing zoom range (default: 'heatmap_zoom')
112
+ title: Heatmap title displayed above the plot
113
+ x_label: X-axis label (defaults to x_column)
114
+ y_label: Y-axis label (defaults to y_column)
115
+ colorscale: Plotly colorscale name (default: 'Portland')
116
+ use_simple_downsample: If True, use simple top-N downsampling instead
117
+ of spatial binning (doesn't require scipy)
118
+ use_streaming: If True (default), use streaming downsampling that
119
+ stays lazy until render time. Reduces memory on init.
120
+ categorical_filters: List of filter identifiers that should have
121
+ per-value compression levels. This ensures constant point counts
122
+ are sent to the client regardless of filter selection. Should be
123
+ used for filters with a small number of unique values (<20).
124
+ Example: ['im_dimension'] for ion mobility filtering.
125
+ **kwargs: Additional configuration options
126
+ """
127
+ self._x_column = x_column
128
+ self._y_column = y_column
129
+ self._intensity_column = intensity_column
130
+ self._min_points = min_points
131
+ self._x_bins = x_bins
132
+ self._y_bins = y_bins
133
+ self._zoom_identifier = zoom_identifier
134
+ self._title = title
135
+ self._x_label = x_label or x_column
136
+ self._y_label = y_label or y_column
137
+ self._colorscale = colorscale
138
+ self._use_simple_downsample = use_simple_downsample
139
+ self._use_streaming = use_streaming
140
+ self._categorical_filters = categorical_filters or []
141
+
142
+ super().__init__(
143
+ cache_id=cache_id,
144
+ data=data,
145
+ filters=filters,
146
+ filter_defaults=filter_defaults,
147
+ interactivity=interactivity,
148
+ cache_path=cache_path,
149
+ regenerate_cache=regenerate_cache,
150
+ **kwargs
151
+ )
152
+
153
+ def _get_cache_config(self) -> Dict[str, Any]:
154
+ """
155
+ Get configuration that affects cache validity.
156
+
157
+ Returns:
158
+ Dict of config values that affect preprocessing
159
+ """
160
+ return {
161
+ 'x_column': self._x_column,
162
+ 'y_column': self._y_column,
163
+ 'intensity_column': self._intensity_column,
164
+ 'min_points': self._min_points,
165
+ 'x_bins': self._x_bins,
166
+ 'y_bins': self._y_bins,
167
+ 'use_simple_downsample': self._use_simple_downsample,
168
+ 'use_streaming': self._use_streaming,
169
+ 'categorical_filters': sorted(self._categorical_filters),
170
+ }
171
+
172
+ def get_state_dependencies(self) -> list:
173
+ """
174
+ Return list of state keys that affect this component's data.
175
+
176
+ Heatmaps depend on both filters (like other components) and
177
+ the zoom state, which determines which resolution level is used.
178
+
179
+ Returns:
180
+ List of state identifier keys including zoom_identifier
181
+ """
182
+ deps = list(self._filters.keys()) if self._filters else []
183
+ deps.append(self._zoom_identifier)
184
+ return deps
185
+
186
+ def _preprocess(self) -> None:
187
+ """
188
+ Preprocess heatmap data by computing multi-resolution levels.
189
+
190
+ This is STAGE 1 processing. In streaming mode (default), levels stay
191
+ as lazy LazyFrames and are only collected at render time. In non-streaming
192
+ mode, levels are eagerly computed for faster rendering but higher memory.
193
+
194
+ If categorical_filters is specified, creates separate compression levels
195
+ for each unique value of those filters, ensuring constant point counts
196
+ regardless of filter selection.
197
+ """
198
+ if self._categorical_filters:
199
+ self._preprocess_with_categorical_filters()
200
+ elif self._use_streaming:
201
+ self._preprocess_streaming()
202
+ else:
203
+ self._preprocess_eager()
204
+
205
+ def _preprocess_with_categorical_filters(self) -> None:
206
+ """
207
+ Preprocess with per-filter-value compression levels.
208
+
209
+ For each unique value of each categorical filter, creates separate
210
+ compression levels. This ensures that when a filter is applied at
211
+ render time, the resulting data has ~min_points regardless of the
212
+ filter value selected.
213
+
214
+ Example: For im_dimension with values [0, 1, 2, 3], creates:
215
+ - cat_level_im_dimension_0_0: 20K points with im_id=0
216
+ - cat_level_im_dimension_0_1: 20K points with im_id=1
217
+ - etc.
218
+ """
219
+ import sys
220
+
221
+ # Get data ranges (for the full dataset)
222
+ x_range, y_range = get_data_range(
223
+ self._raw_data,
224
+ self._x_column,
225
+ self._y_column,
226
+ )
227
+ self._preprocessed_data['x_range'] = x_range
228
+ self._preprocessed_data['y_range'] = y_range
229
+
230
+ # Get total count
231
+ total = self._raw_data.select(pl.len()).collect().item()
232
+ self._preprocessed_data['total'] = total
233
+
234
+ # Store metadata about categorical filters
235
+ self._preprocessed_data['has_categorical_filters'] = True
236
+ self._preprocessed_data['categorical_filter_values'] = {}
237
+
238
+ # Process each categorical filter
239
+ for filter_id in self._categorical_filters:
240
+ if filter_id not in self._filters:
241
+ print(f"[HEATMAP] Warning: categorical_filter '{filter_id}' not in filters, skipping", file=sys.stderr)
242
+ continue
243
+
244
+ column_name = self._filters[filter_id]
245
+
246
+ # Get unique values for this filter
247
+ unique_values = (
248
+ self._raw_data
249
+ .select(pl.col(column_name))
250
+ .unique()
251
+ .collect()
252
+ .to_series()
253
+ .to_list()
254
+ )
255
+ unique_values = sorted([v for v in unique_values if v is not None and v >= 0])
256
+
257
+ print(f"[HEATMAP] Categorical filter '{filter_id}' ({column_name}): {len(unique_values)} unique values", file=sys.stderr)
258
+
259
+ self._preprocessed_data['categorical_filter_values'][filter_id] = unique_values
260
+
261
+ # Create compression levels for each filter value
262
+ for filter_value in unique_values:
263
+ # Filter data to this value
264
+ filtered_data = self._raw_data.filter(pl.col(column_name) == filter_value)
265
+ filtered_total = filtered_data.select(pl.len()).collect().item()
266
+
267
+ # Compute level sizes for this filtered subset
268
+ level_sizes = compute_compression_levels(self._min_points, filtered_total)
269
+
270
+ print(f"[HEATMAP] Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}", file=sys.stderr)
271
+
272
+ # Store level sizes for this filter value
273
+ self._preprocessed_data[f'cat_level_sizes_{filter_id}_{filter_value}'] = level_sizes
274
+ self._preprocessed_data[f'cat_num_levels_{filter_id}_{filter_value}'] = len(level_sizes)
275
+
276
+ # Build each level
277
+ for level_idx, target_size in enumerate(level_sizes):
278
+ # If target size equals total, skip downsampling - use all data
279
+ if target_size >= filtered_total:
280
+ level = filtered_data
281
+ elif self._use_simple_downsample:
282
+ level = downsample_2d_simple(
283
+ filtered_data,
284
+ max_points=target_size,
285
+ intensity_column=self._intensity_column,
286
+ )
287
+ else:
288
+ level = downsample_2d_streaming(
289
+ filtered_data,
290
+ max_points=target_size,
291
+ x_column=self._x_column,
292
+ y_column=self._y_column,
293
+ intensity_column=self._intensity_column,
294
+ x_bins=self._x_bins,
295
+ y_bins=self._y_bins,
296
+ x_range=x_range,
297
+ y_range=y_range,
298
+ )
299
+
300
+ # Collect and store
301
+ level_key = f'cat_level_{filter_id}_{filter_value}_{level_idx}'
302
+ self._preprocessed_data[level_key] = level.collect()
303
+
304
+ # Also create global levels for when no categorical filter is selected
305
+ # (fallback to standard behavior)
306
+ level_sizes = compute_compression_levels(self._min_points, total)
307
+ self._preprocessed_data['level_sizes'] = level_sizes
308
+ self._preprocessed_data['num_levels'] = len(level_sizes)
309
+
310
+ for i, size in enumerate(level_sizes):
311
+ # If target size equals total, skip downsampling - use all data
312
+ if size >= total:
313
+ level = self._raw_data
314
+ elif self._use_simple_downsample:
315
+ level = downsample_2d_simple(
316
+ self._raw_data,
317
+ max_points=size,
318
+ intensity_column=self._intensity_column,
319
+ )
320
+ else:
321
+ level = downsample_2d_streaming(
322
+ self._raw_data,
323
+ max_points=size,
324
+ x_column=self._x_column,
325
+ y_column=self._y_column,
326
+ intensity_column=self._intensity_column,
327
+ x_bins=self._x_bins,
328
+ y_bins=self._y_bins,
329
+ x_range=x_range,
330
+ y_range=y_range,
331
+ )
332
+ self._preprocessed_data[f'level_{i}'] = level.collect()
333
+
334
+ def _preprocess_streaming(self) -> None:
335
+ """
336
+ Streaming preprocessing - levels stay lazy until render.
337
+
338
+ Builds lazy query plans and collects them for caching.
339
+ """
340
+ # Get data ranges (minimal collect - just 4 values)
341
+ x_range, y_range = get_data_range(
342
+ self._raw_data,
343
+ self._x_column,
344
+ self._y_column,
345
+ )
346
+ self._preprocessed_data['x_range'] = x_range
347
+ self._preprocessed_data['y_range'] = y_range
348
+
349
+ # Get total count
350
+ total = self._raw_data.select(pl.len()).collect().item()
351
+ self._preprocessed_data['total'] = total
352
+
353
+ # Compute target sizes for levels
354
+ level_sizes = compute_compression_levels(self._min_points, total)
355
+ self._preprocessed_data['level_sizes'] = level_sizes
356
+
357
+ # Build and collect each level
358
+ self._preprocessed_data['levels'] = []
359
+
360
+ for i, size in enumerate(level_sizes):
361
+ # If target size equals total, skip downsampling - use all data
362
+ if size >= total:
363
+ level = self._raw_data
364
+ elif self._use_simple_downsample:
365
+ level = downsample_2d_simple(
366
+ self._raw_data,
367
+ max_points=size,
368
+ intensity_column=self._intensity_column,
369
+ )
370
+ else:
371
+ level = downsample_2d_streaming(
372
+ self._raw_data,
373
+ max_points=size,
374
+ x_column=self._x_column,
375
+ y_column=self._y_column,
376
+ intensity_column=self._intensity_column,
377
+ x_bins=self._x_bins,
378
+ y_bins=self._y_bins,
379
+ x_range=x_range,
380
+ y_range=y_range,
381
+ )
382
+ # Collect and store as DataFrame for caching
383
+ # Base class will serialize these to parquet
384
+ self._preprocessed_data[f'level_{i}'] = level.collect()
385
+
386
+ # Store number of levels for reconstruction
387
+ self._preprocessed_data['num_levels'] = len(level_sizes)
388
+
389
+ def _preprocess_eager(self) -> None:
390
+ """
391
+ Eager preprocessing - levels are computed upfront.
392
+
393
+ Uses more memory at init but faster rendering. Uses scipy-based
394
+ downsampling for better spatial distribution.
395
+ """
396
+ # Get data ranges
397
+ x_range, y_range = get_data_range(
398
+ self._raw_data,
399
+ self._x_column,
400
+ self._y_column,
401
+ )
402
+ self._preprocessed_data['x_range'] = x_range
403
+ self._preprocessed_data['y_range'] = y_range
404
+
405
+ # Get total count
406
+ total = self._raw_data.select(pl.len()).collect().item()
407
+ self._preprocessed_data['total'] = total
408
+
409
+ # Compute compression level target sizes
410
+ level_sizes = compute_compression_levels(self._min_points, total)
411
+ self._preprocessed_data['level_sizes'] = level_sizes
412
+
413
+ # Build levels from largest to smallest
414
+ if level_sizes:
415
+ current = self._raw_data
416
+
417
+ for i, size in enumerate(reversed(level_sizes)):
418
+ # If target size equals total, skip downsampling - use all data
419
+ if size >= total:
420
+ downsampled = current
421
+ elif self._use_simple_downsample:
422
+ downsampled = downsample_2d_simple(
423
+ current,
424
+ max_points=size,
425
+ intensity_column=self._intensity_column,
426
+ )
427
+ else:
428
+ downsampled = downsample_2d(
429
+ current,
430
+ max_points=size,
431
+ x_column=self._x_column,
432
+ y_column=self._y_column,
433
+ intensity_column=self._intensity_column,
434
+ x_bins=self._x_bins,
435
+ y_bins=self._y_bins,
436
+ )
437
+ # Collect for caching - store with reversed index
438
+ level_idx = len(level_sizes) - 1 - i
439
+ if isinstance(downsampled, pl.LazyFrame):
440
+ self._preprocessed_data[f'level_{level_idx}'] = downsampled.collect()
441
+ else:
442
+ self._preprocessed_data[f'level_{level_idx}'] = downsampled
443
+ current = downsampled
444
+
445
+ # Store number of levels for reconstruction
446
+ self._preprocessed_data['num_levels'] = len(level_sizes)
447
+
448
+ def _get_levels(self) -> list:
449
+ """
450
+ Get compression levels list for rendering.
451
+
452
+ Reconstructs the levels list from preprocessed data,
453
+ adding full resolution at the end.
454
+ """
455
+ num_levels = self._preprocessed_data.get('num_levels', 0)
456
+ levels = []
457
+
458
+ for i in range(num_levels):
459
+ level_data = self._preprocessed_data.get(f'level_{i}')
460
+ if level_data is not None:
461
+ levels.append(level_data)
462
+
463
+ # Add full resolution at end (if raw data available)
464
+ if self._raw_data is not None:
465
+ levels.append(self._raw_data)
466
+
467
+ return levels
468
+
469
+ def _get_categorical_levels(
470
+ self,
471
+ filter_id: str,
472
+ filter_value: Any,
473
+ ) -> Tuple[list, Optional[pl.LazyFrame]]:
474
+ """
475
+ Get compression levels for a specific categorical filter value.
476
+
477
+ Args:
478
+ filter_id: The filter identifier (e.g., 'im_dimension')
479
+ filter_value: The filter value to get levels for (e.g., 0)
480
+
481
+ Returns:
482
+ Tuple of (levels list, filtered raw data for full resolution)
483
+ Returns ([], None) if no categorical levels exist for this filter
484
+ """
485
+ # Check if we have categorical levels for this filter/value
486
+ num_levels_key = f'cat_num_levels_{filter_id}_{filter_value}'
487
+ num_levels = self._preprocessed_data.get(num_levels_key, 0)
488
+
489
+ if num_levels == 0:
490
+ return [], None
491
+
492
+ levels = []
493
+ for i in range(num_levels):
494
+ level_key = f'cat_level_{filter_id}_{filter_value}_{i}'
495
+ level_data = self._preprocessed_data.get(level_key)
496
+ if level_data is not None:
497
+ levels.append(level_data)
498
+
499
+ # Get filtered raw data for full resolution (if available)
500
+ filtered_raw = None
501
+ if self._raw_data is not None and filter_id in self._filters:
502
+ column_name = self._filters[filter_id]
503
+ filtered_raw = self._raw_data.filter(pl.col(column_name) == filter_value)
504
+
505
+ return levels, filtered_raw
506
+
507
+ def _get_levels_for_state(self, state: Dict[str, Any]) -> Tuple[list, Optional[pl.LazyFrame]]:
508
+ """
509
+ Get appropriate compression levels based on current filter state.
510
+
511
+ If categorical_filters are configured and a matching filter value is
512
+ selected in state, returns the per-value levels. Otherwise returns
513
+ the global levels.
514
+
515
+ Args:
516
+ state: Current selection state
517
+
518
+ Returns:
519
+ Tuple of (levels list, raw data for full resolution)
520
+ """
521
+ # Check if we have categorical filters and a selected value
522
+ if self._preprocessed_data.get('has_categorical_filters'):
523
+ cat_filter_values = self._preprocessed_data.get('categorical_filter_values', {})
524
+
525
+ for filter_id in self._categorical_filters:
526
+ if filter_id not in cat_filter_values:
527
+ continue
528
+
529
+ selected_value = state.get(filter_id)
530
+ if selected_value is None:
531
+ continue
532
+
533
+ # Convert float to int if needed (JS numbers come as floats)
534
+ if isinstance(selected_value, float) and selected_value.is_integer():
535
+ selected_value = int(selected_value)
536
+
537
+ # Check if this value has per-filter levels
538
+ if selected_value in cat_filter_values[filter_id]:
539
+ levels, filtered_raw = self._get_categorical_levels(filter_id, selected_value)
540
+ if levels:
541
+ return levels, filtered_raw
542
+
543
+ # Fall back to global levels
544
+ return self._get_levels(), self._raw_data
545
+
546
+ def _get_vue_component_name(self) -> str:
547
+ """Return the Vue component name."""
548
+ return 'PlotlyHeatmap'
549
+
550
+ def _get_data_key(self) -> str:
551
+ """Return the key used to send primary data to Vue."""
552
+ return 'heatmapData'
553
+
554
+ def _is_no_zoom(self, zoom: Optional[Dict[str, Any]]) -> bool:
555
+ """Check if zoom state represents no zoom (full view)."""
556
+ if zoom is None:
557
+ return True
558
+ x_range = zoom.get('xRange', [-1, -1])
559
+ y_range = zoom.get('yRange', [-1, -1])
560
+ return (
561
+ x_range[0] < 0 and x_range[1] < 0 and
562
+ y_range[0] < 0 and y_range[1] < 0
563
+ )
564
+
565
+ def _select_level_for_zoom(
566
+ self,
567
+ zoom: Dict[str, Any],
568
+ state: Dict[str, Any],
569
+ levels: list,
570
+ filtered_raw: Optional[pl.LazyFrame],
571
+ non_categorical_filters: Dict[str, str],
572
+ ) -> pl.DataFrame:
573
+ """
574
+ Select appropriate resolution level based on zoom range.
575
+
576
+ Iterates from smallest to largest resolution, finding the smallest
577
+ level that has at least min_points in the zoomed view.
578
+
579
+ Args:
580
+ zoom: Zoom state with xRange and yRange
581
+ state: Full selection state for applying filters
582
+ levels: List of compression levels to use
583
+ filtered_raw: Filtered raw data for full resolution (optional)
584
+ non_categorical_filters: Filters to apply (excluding categorical ones)
585
+
586
+ Returns:
587
+ Filtered Polars DataFrame at appropriate resolution
588
+ """
589
+ import sys
590
+ x0, x1 = zoom['xRange']
591
+ y0, y1 = zoom['yRange']
592
+
593
+ # Add raw data as final level if available
594
+ all_levels = list(levels)
595
+ if filtered_raw is not None:
596
+ all_levels.append(filtered_raw)
597
+
598
+ last_filtered = None
599
+
600
+ for level_idx, level_data in enumerate(all_levels):
601
+ # Ensure we have a LazyFrame for filtering
602
+ if isinstance(level_data, pl.DataFrame):
603
+ level_data = level_data.lazy()
604
+
605
+ # Filter to zoom range
606
+ filtered_lazy = level_data.filter(
607
+ (pl.col(self._x_column) >= x0) &
608
+ (pl.col(self._x_column) <= x1) &
609
+ (pl.col(self._y_column) >= y0) &
610
+ (pl.col(self._y_column) <= y1)
611
+ )
612
+
613
+ # Apply non-categorical filters if any
614
+ if non_categorical_filters:
615
+ # filter_and_collect_cached returns (pandas DataFrame, hash)
616
+ # We need Polars DataFrame for further processing
617
+ df_pandas, _ = filter_and_collect_cached(
618
+ filtered_lazy,
619
+ non_categorical_filters,
620
+ state,
621
+ filter_defaults=self._filter_defaults,
622
+ )
623
+ filtered = pl.from_pandas(df_pandas)
624
+ else:
625
+ filtered = filtered_lazy.collect()
626
+
627
+ count = len(filtered)
628
+ last_filtered = filtered
629
+ print(f"[HEATMAP] Level {level_idx}: {count} pts in zoom range", file=sys.stderr)
630
+
631
+ if count >= self._min_points:
632
+ # This level has enough detail
633
+ if count > self._min_points * 2:
634
+ # Still too many - downsample further
635
+ x_range = self._preprocessed_data.get('x_range')
636
+ y_range = self._preprocessed_data.get('y_range')
637
+ if self._use_streaming or self._use_simple_downsample:
638
+ if self._use_simple_downsample:
639
+ return downsample_2d_simple(
640
+ filtered.lazy(),
641
+ max_points=self._min_points,
642
+ intensity_column=self._intensity_column,
643
+ ).collect()
644
+ else:
645
+ return downsample_2d_streaming(
646
+ filtered.lazy(),
647
+ max_points=self._min_points,
648
+ x_column=self._x_column,
649
+ y_column=self._y_column,
650
+ intensity_column=self._intensity_column,
651
+ x_bins=self._x_bins,
652
+ y_bins=self._y_bins,
653
+ x_range=x_range,
654
+ y_range=y_range,
655
+ ).collect()
656
+ else:
657
+ return downsample_2d(
658
+ filtered.lazy(),
659
+ max_points=self._min_points,
660
+ x_column=self._x_column,
661
+ y_column=self._y_column,
662
+ intensity_column=self._intensity_column,
663
+ x_bins=self._x_bins,
664
+ y_bins=self._y_bins,
665
+ ).collect()
666
+ return filtered
667
+
668
+ # Even largest level has fewer points than threshold
669
+ return last_filtered if last_filtered is not None else pl.DataFrame()
670
+
671
+ def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
672
+ """
673
+ Prepare heatmap data for Vue component.
674
+
675
+ Selects appropriate resolution level based on zoom state.
676
+ If categorical_filters are configured, uses per-filter-value levels
677
+ to ensure constant point counts regardless of filter selection.
678
+
679
+ Returns pandas DataFrame for efficient Arrow serialization.
680
+
681
+ Args:
682
+ state: Current selection state from StateManager
683
+
684
+ Returns:
685
+ Dict with heatmapData (pandas DataFrame) and _hash for change detection
686
+ """
687
+ import sys
688
+ zoom = state.get(self._zoom_identifier)
689
+
690
+ # Build columns to select
691
+ columns_to_select = [
692
+ self._x_column,
693
+ self._y_column,
694
+ self._intensity_column,
695
+ ]
696
+ # Include columns needed for interactivity
697
+ if self._interactivity:
698
+ for col in self._interactivity.values():
699
+ if col not in columns_to_select:
700
+ columns_to_select.append(col)
701
+ # Include filter columns
702
+ if self._filters:
703
+ for col in self._filters.values():
704
+ if col not in columns_to_select:
705
+ columns_to_select.append(col)
706
+
707
+ # Get levels based on current state (may use per-filter levels)
708
+ levels, filtered_raw = self._get_levels_for_state(state)
709
+ level_sizes = [len(l) if isinstance(l, pl.DataFrame) else '?' for l in levels]
710
+
711
+ # Determine which filters still need to be applied at render time
712
+ # (filters not in categorical_filters need runtime application)
713
+ non_categorical_filters = {}
714
+ if self._filters:
715
+ for filter_id, column in self._filters.items():
716
+ if filter_id not in self._categorical_filters:
717
+ non_categorical_filters[filter_id] = column
718
+
719
+ if self._is_no_zoom(zoom):
720
+ # No zoom - use smallest level
721
+ if not levels:
722
+ # No levels available
723
+ print(f"[HEATMAP] No levels available", file=sys.stderr)
724
+ return {'heatmapData': pl.DataFrame().to_pandas(), '_hash': ''}
725
+
726
+ data = levels[0]
727
+ using_cat = self._preprocessed_data.get('has_categorical_filters', False)
728
+ print(f"[HEATMAP] No zoom → level 0 ({level_sizes[0]} pts), levels={level_sizes}, categorical={using_cat}", file=sys.stderr)
729
+
730
+ # Ensure we have a LazyFrame
731
+ if isinstance(data, pl.DataFrame):
732
+ data = data.lazy()
733
+
734
+ # Apply non-categorical filters if any - returns (pandas DataFrame, hash)
735
+ if non_categorical_filters:
736
+ df_pandas, data_hash = filter_and_collect_cached(
737
+ data,
738
+ non_categorical_filters,
739
+ state,
740
+ columns=columns_to_select,
741
+ filter_defaults=self._filter_defaults,
742
+ )
743
+ # Sort by intensity ascending so high-intensity points are drawn on top
744
+ df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(drop=True)
745
+ else:
746
+ # No filters to apply - levels already filtered by categorical filter
747
+ available_cols = [c for c in columns_to_select if c in data.columns]
748
+ df_polars = data.select(available_cols).collect()
749
+ # Sort by intensity ascending so high-intensity points are drawn on top
750
+ df_polars = df_polars.sort(self._intensity_column)
751
+ data_hash = compute_dataframe_hash(df_polars)
752
+ df_pandas = df_polars.to_pandas()
753
+ else:
754
+ # Zoomed - select appropriate level
755
+ print(f"[HEATMAP] Zoom {zoom} → selecting level...", file=sys.stderr)
756
+ df_polars = self._select_level_for_zoom(
757
+ zoom, state, levels, filtered_raw, non_categorical_filters
758
+ )
759
+ # Select only needed columns
760
+ available_cols = [c for c in columns_to_select if c in df_polars.columns]
761
+ df_polars = df_polars.select(available_cols)
762
+ # Sort by intensity ascending so high-intensity points are drawn on top
763
+ df_polars = df_polars.sort(self._intensity_column)
764
+ print(f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}", file=sys.stderr)
765
+ data_hash = compute_dataframe_hash(df_polars)
766
+ df_pandas = df_polars.to_pandas()
767
+
768
+ return {
769
+ 'heatmapData': df_pandas,
770
+ '_hash': data_hash,
771
+ }
772
+
773
+ def _get_component_args(self) -> Dict[str, Any]:
774
+ """
775
+ Get component arguments to send to Vue.
776
+
777
+ Returns:
778
+ Dict with all heatmap configuration for Vue
779
+ """
780
+ args: Dict[str, Any] = {
781
+ 'componentType': self._get_vue_component_name(),
782
+ 'xColumn': self._x_column,
783
+ 'yColumn': self._y_column,
784
+ 'intensityColumn': self._intensity_column,
785
+ 'xLabel': self._x_label,
786
+ 'yLabel': self._y_label,
787
+ 'colorscale': self._colorscale,
788
+ 'zoomIdentifier': self._zoom_identifier,
789
+ 'interactivity': self._interactivity,
790
+ }
791
+
792
+ if self._title:
793
+ args['title'] = self._title
794
+
795
+ # Add any extra config options
796
+ args.update(self._config)
797
+
798
+ return args
799
+
800
+ def with_styling(
801
+ self,
802
+ colorscale: Optional[str] = None,
803
+ x_label: Optional[str] = None,
804
+ y_label: Optional[str] = None,
805
+ ) -> 'Heatmap':
806
+ """
807
+ Update heatmap styling.
808
+
809
+ Args:
810
+ colorscale: Plotly colorscale name
811
+ x_label: X-axis label
812
+ y_label: Y-axis label
813
+
814
+ Returns:
815
+ Self for method chaining
816
+ """
817
+ if colorscale is not None:
818
+ self._colorscale = colorscale
819
+ if x_label is not None:
820
+ self._x_label = x_label
821
+ if y_label is not None:
822
+ self._y_label = y_label
823
+ return self