openms-insight 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +32 -0
- openms_insight/components/__init__.py +11 -0
- openms_insight/components/heatmap.py +823 -0
- openms_insight/components/lineplot.py +492 -0
- openms_insight/components/sequenceview.py +384 -0
- openms_insight/components/table.py +400 -0
- openms_insight/core/__init__.py +14 -0
- openms_insight/core/base.py +413 -0
- openms_insight/core/cache.py +39 -0
- openms_insight/core/registry.py +82 -0
- openms_insight/core/state.py +215 -0
- openms_insight/js-component/dist/assets/index.css +5 -0
- openms_insight/js-component/dist/assets/index.js +4220 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2 +0 -0
- openms_insight/js-component/dist/index.html +14 -0
- openms_insight/preprocessing/__init__.py +22 -0
- openms_insight/preprocessing/compression.py +338 -0
- openms_insight/preprocessing/filtering.py +316 -0
- openms_insight/rendering/__init__.py +8 -0
- openms_insight/rendering/bridge.py +312 -0
- openms_insight-0.1.0.dist-info/METADATA +256 -0
- openms_insight-0.1.0.dist-info/RECORD +27 -0
- openms_insight-0.1.0.dist-info/WHEEL +4 -0
- openms_insight-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,823 @@
|
|
|
1
|
+
"""Heatmap component using Plotly scattergl."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from ..core.base import BaseComponent
|
|
8
|
+
from ..core.registry import register_component
|
|
9
|
+
from ..preprocessing.compression import (
|
|
10
|
+
compute_compression_levels,
|
|
11
|
+
downsample_2d,
|
|
12
|
+
downsample_2d_simple,
|
|
13
|
+
downsample_2d_streaming,
|
|
14
|
+
get_data_range,
|
|
15
|
+
)
|
|
16
|
+
from ..preprocessing.filtering import compute_dataframe_hash, filter_and_collect_cached
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Cache key only includes zoom state (not other selections)
|
|
20
|
+
def _make_zoom_cache_key(zoom: Optional[Dict[str, Any]]) -> tuple:
|
|
21
|
+
"""Create hashable cache key from zoom state."""
|
|
22
|
+
if zoom is None:
|
|
23
|
+
return (None,)
|
|
24
|
+
return (
|
|
25
|
+
('x0', zoom.get('xRange', [-1, -1])[0]),
|
|
26
|
+
('x1', zoom.get('xRange', [-1, -1])[1]),
|
|
27
|
+
('y0', zoom.get('yRange', [-1, -1])[0]),
|
|
28
|
+
('y1', zoom.get('yRange', [-1, -1])[1]),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@register_component("heatmap")
|
|
33
|
+
class Heatmap(BaseComponent):
|
|
34
|
+
"""
|
|
35
|
+
Interactive 2D scatter/heatmap component using Plotly scattergl.
|
|
36
|
+
|
|
37
|
+
Designed for large datasets (millions of points) using multi-resolution
|
|
38
|
+
preprocessing with zoom-based level selection. Points are colored by
|
|
39
|
+
intensity using a log-scale colormap.
|
|
40
|
+
|
|
41
|
+
Features:
|
|
42
|
+
- Multi-resolution downsampling for large datasets
|
|
43
|
+
- Zoom-based automatic level selection
|
|
44
|
+
- Click-to-select with cross-component linking
|
|
45
|
+
- Log-scale intensity colormap
|
|
46
|
+
- SVG export
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
heatmap = Heatmap(
|
|
50
|
+
cache_id="peaks_heatmap",
|
|
51
|
+
data=peaks_df,
|
|
52
|
+
x_column='retention_time',
|
|
53
|
+
y_column='mass',
|
|
54
|
+
intensity_column='intensity',
|
|
55
|
+
interactivity={
|
|
56
|
+
'spectrum': 'scan_id',
|
|
57
|
+
'peak': 'mass',
|
|
58
|
+
},
|
|
59
|
+
title="Peak Heatmap",
|
|
60
|
+
)
|
|
61
|
+
heatmap(state_manager=state_manager)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
_component_type: str = "heatmap"
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
cache_id: str,
|
|
69
|
+
x_column: str,
|
|
70
|
+
y_column: str,
|
|
71
|
+
data: Optional[pl.LazyFrame] = None,
|
|
72
|
+
intensity_column: str = 'intensity',
|
|
73
|
+
filters: Optional[Dict[str, str]] = None,
|
|
74
|
+
filter_defaults: Optional[Dict[str, Any]] = None,
|
|
75
|
+
interactivity: Optional[Dict[str, str]] = None,
|
|
76
|
+
cache_path: str = ".",
|
|
77
|
+
regenerate_cache: bool = False,
|
|
78
|
+
min_points: int = 20000,
|
|
79
|
+
x_bins: int = 400,
|
|
80
|
+
y_bins: int = 50,
|
|
81
|
+
zoom_identifier: str = 'heatmap_zoom',
|
|
82
|
+
title: Optional[str] = None,
|
|
83
|
+
x_label: Optional[str] = None,
|
|
84
|
+
y_label: Optional[str] = None,
|
|
85
|
+
colorscale: str = 'Portland',
|
|
86
|
+
use_simple_downsample: bool = False,
|
|
87
|
+
use_streaming: bool = True,
|
|
88
|
+
categorical_filters: Optional[List[str]] = None,
|
|
89
|
+
**kwargs
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Initialize the Heatmap component.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
cache_id: Unique identifier for this component's cache (MANDATORY).
|
|
96
|
+
Creates a folder {cache_path}/{cache_id}/ for cached data.
|
|
97
|
+
x_column: Name of column for x-axis values
|
|
98
|
+
y_column: Name of column for y-axis values
|
|
99
|
+
data: Polars LazyFrame with heatmap data. Optional if cache exists.
|
|
100
|
+
intensity_column: Name of column for intensity/color values
|
|
101
|
+
filters: Mapping of identifier names to column names for filtering
|
|
102
|
+
interactivity: Mapping of identifier names to column names for clicks.
|
|
103
|
+
When a point is clicked, sets each identifier to the clicked
|
|
104
|
+
point's value in the corresponding column.
|
|
105
|
+
cache_path: Base path for cache storage. Default "." (current dir).
|
|
106
|
+
regenerate_cache: If True, regenerate cache even if valid cache exists.
|
|
107
|
+
min_points: Target size for smallest compression level and
|
|
108
|
+
threshold for level selection (default: 20000)
|
|
109
|
+
x_bins: Number of bins along x-axis for downsampling (default: 400)
|
|
110
|
+
y_bins: Number of bins along y-axis for downsampling (default: 50)
|
|
111
|
+
zoom_identifier: State key for storing zoom range (default: 'heatmap_zoom')
|
|
112
|
+
title: Heatmap title displayed above the plot
|
|
113
|
+
x_label: X-axis label (defaults to x_column)
|
|
114
|
+
y_label: Y-axis label (defaults to y_column)
|
|
115
|
+
colorscale: Plotly colorscale name (default: 'Portland')
|
|
116
|
+
use_simple_downsample: If True, use simple top-N downsampling instead
|
|
117
|
+
of spatial binning (doesn't require scipy)
|
|
118
|
+
use_streaming: If True (default), use streaming downsampling that
|
|
119
|
+
stays lazy until render time. Reduces memory on init.
|
|
120
|
+
categorical_filters: List of filter identifiers that should have
|
|
121
|
+
per-value compression levels. This ensures constant point counts
|
|
122
|
+
are sent to the client regardless of filter selection. Should be
|
|
123
|
+
used for filters with a small number of unique values (<20).
|
|
124
|
+
Example: ['im_dimension'] for ion mobility filtering.
|
|
125
|
+
**kwargs: Additional configuration options
|
|
126
|
+
"""
|
|
127
|
+
self._x_column = x_column
|
|
128
|
+
self._y_column = y_column
|
|
129
|
+
self._intensity_column = intensity_column
|
|
130
|
+
self._min_points = min_points
|
|
131
|
+
self._x_bins = x_bins
|
|
132
|
+
self._y_bins = y_bins
|
|
133
|
+
self._zoom_identifier = zoom_identifier
|
|
134
|
+
self._title = title
|
|
135
|
+
self._x_label = x_label or x_column
|
|
136
|
+
self._y_label = y_label or y_column
|
|
137
|
+
self._colorscale = colorscale
|
|
138
|
+
self._use_simple_downsample = use_simple_downsample
|
|
139
|
+
self._use_streaming = use_streaming
|
|
140
|
+
self._categorical_filters = categorical_filters or []
|
|
141
|
+
|
|
142
|
+
super().__init__(
|
|
143
|
+
cache_id=cache_id,
|
|
144
|
+
data=data,
|
|
145
|
+
filters=filters,
|
|
146
|
+
filter_defaults=filter_defaults,
|
|
147
|
+
interactivity=interactivity,
|
|
148
|
+
cache_path=cache_path,
|
|
149
|
+
regenerate_cache=regenerate_cache,
|
|
150
|
+
**kwargs
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def _get_cache_config(self) -> Dict[str, Any]:
|
|
154
|
+
"""
|
|
155
|
+
Get configuration that affects cache validity.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Dict of config values that affect preprocessing
|
|
159
|
+
"""
|
|
160
|
+
return {
|
|
161
|
+
'x_column': self._x_column,
|
|
162
|
+
'y_column': self._y_column,
|
|
163
|
+
'intensity_column': self._intensity_column,
|
|
164
|
+
'min_points': self._min_points,
|
|
165
|
+
'x_bins': self._x_bins,
|
|
166
|
+
'y_bins': self._y_bins,
|
|
167
|
+
'use_simple_downsample': self._use_simple_downsample,
|
|
168
|
+
'use_streaming': self._use_streaming,
|
|
169
|
+
'categorical_filters': sorted(self._categorical_filters),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
def get_state_dependencies(self) -> list:
|
|
173
|
+
"""
|
|
174
|
+
Return list of state keys that affect this component's data.
|
|
175
|
+
|
|
176
|
+
Heatmaps depend on both filters (like other components) and
|
|
177
|
+
the zoom state, which determines which resolution level is used.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of state identifier keys including zoom_identifier
|
|
181
|
+
"""
|
|
182
|
+
deps = list(self._filters.keys()) if self._filters else []
|
|
183
|
+
deps.append(self._zoom_identifier)
|
|
184
|
+
return deps
|
|
185
|
+
|
|
186
|
+
def _preprocess(self) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Preprocess heatmap data by computing multi-resolution levels.
|
|
189
|
+
|
|
190
|
+
This is STAGE 1 processing. In streaming mode (default), levels stay
|
|
191
|
+
as lazy LazyFrames and are only collected at render time. In non-streaming
|
|
192
|
+
mode, levels are eagerly computed for faster rendering but higher memory.
|
|
193
|
+
|
|
194
|
+
If categorical_filters is specified, creates separate compression levels
|
|
195
|
+
for each unique value of those filters, ensuring constant point counts
|
|
196
|
+
regardless of filter selection.
|
|
197
|
+
"""
|
|
198
|
+
if self._categorical_filters:
|
|
199
|
+
self._preprocess_with_categorical_filters()
|
|
200
|
+
elif self._use_streaming:
|
|
201
|
+
self._preprocess_streaming()
|
|
202
|
+
else:
|
|
203
|
+
self._preprocess_eager()
|
|
204
|
+
|
|
205
|
+
def _preprocess_with_categorical_filters(self) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Preprocess with per-filter-value compression levels.
|
|
208
|
+
|
|
209
|
+
For each unique value of each categorical filter, creates separate
|
|
210
|
+
compression levels. This ensures that when a filter is applied at
|
|
211
|
+
render time, the resulting data has ~min_points regardless of the
|
|
212
|
+
filter value selected.
|
|
213
|
+
|
|
214
|
+
Example: For im_dimension with values [0, 1, 2, 3], creates:
|
|
215
|
+
- cat_level_im_dimension_0_0: 20K points with im_id=0
|
|
216
|
+
- cat_level_im_dimension_0_1: 20K points with im_id=1
|
|
217
|
+
- etc.
|
|
218
|
+
"""
|
|
219
|
+
import sys
|
|
220
|
+
|
|
221
|
+
# Get data ranges (for the full dataset)
|
|
222
|
+
x_range, y_range = get_data_range(
|
|
223
|
+
self._raw_data,
|
|
224
|
+
self._x_column,
|
|
225
|
+
self._y_column,
|
|
226
|
+
)
|
|
227
|
+
self._preprocessed_data['x_range'] = x_range
|
|
228
|
+
self._preprocessed_data['y_range'] = y_range
|
|
229
|
+
|
|
230
|
+
# Get total count
|
|
231
|
+
total = self._raw_data.select(pl.len()).collect().item()
|
|
232
|
+
self._preprocessed_data['total'] = total
|
|
233
|
+
|
|
234
|
+
# Store metadata about categorical filters
|
|
235
|
+
self._preprocessed_data['has_categorical_filters'] = True
|
|
236
|
+
self._preprocessed_data['categorical_filter_values'] = {}
|
|
237
|
+
|
|
238
|
+
# Process each categorical filter
|
|
239
|
+
for filter_id in self._categorical_filters:
|
|
240
|
+
if filter_id not in self._filters:
|
|
241
|
+
print(f"[HEATMAP] Warning: categorical_filter '{filter_id}' not in filters, skipping", file=sys.stderr)
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
column_name = self._filters[filter_id]
|
|
245
|
+
|
|
246
|
+
# Get unique values for this filter
|
|
247
|
+
unique_values = (
|
|
248
|
+
self._raw_data
|
|
249
|
+
.select(pl.col(column_name))
|
|
250
|
+
.unique()
|
|
251
|
+
.collect()
|
|
252
|
+
.to_series()
|
|
253
|
+
.to_list()
|
|
254
|
+
)
|
|
255
|
+
unique_values = sorted([v for v in unique_values if v is not None and v >= 0])
|
|
256
|
+
|
|
257
|
+
print(f"[HEATMAP] Categorical filter '{filter_id}' ({column_name}): {len(unique_values)} unique values", file=sys.stderr)
|
|
258
|
+
|
|
259
|
+
self._preprocessed_data['categorical_filter_values'][filter_id] = unique_values
|
|
260
|
+
|
|
261
|
+
# Create compression levels for each filter value
|
|
262
|
+
for filter_value in unique_values:
|
|
263
|
+
# Filter data to this value
|
|
264
|
+
filtered_data = self._raw_data.filter(pl.col(column_name) == filter_value)
|
|
265
|
+
filtered_total = filtered_data.select(pl.len()).collect().item()
|
|
266
|
+
|
|
267
|
+
# Compute level sizes for this filtered subset
|
|
268
|
+
level_sizes = compute_compression_levels(self._min_points, filtered_total)
|
|
269
|
+
|
|
270
|
+
print(f"[HEATMAP] Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}", file=sys.stderr)
|
|
271
|
+
|
|
272
|
+
# Store level sizes for this filter value
|
|
273
|
+
self._preprocessed_data[f'cat_level_sizes_{filter_id}_{filter_value}'] = level_sizes
|
|
274
|
+
self._preprocessed_data[f'cat_num_levels_{filter_id}_{filter_value}'] = len(level_sizes)
|
|
275
|
+
|
|
276
|
+
# Build each level
|
|
277
|
+
for level_idx, target_size in enumerate(level_sizes):
|
|
278
|
+
# If target size equals total, skip downsampling - use all data
|
|
279
|
+
if target_size >= filtered_total:
|
|
280
|
+
level = filtered_data
|
|
281
|
+
elif self._use_simple_downsample:
|
|
282
|
+
level = downsample_2d_simple(
|
|
283
|
+
filtered_data,
|
|
284
|
+
max_points=target_size,
|
|
285
|
+
intensity_column=self._intensity_column,
|
|
286
|
+
)
|
|
287
|
+
else:
|
|
288
|
+
level = downsample_2d_streaming(
|
|
289
|
+
filtered_data,
|
|
290
|
+
max_points=target_size,
|
|
291
|
+
x_column=self._x_column,
|
|
292
|
+
y_column=self._y_column,
|
|
293
|
+
intensity_column=self._intensity_column,
|
|
294
|
+
x_bins=self._x_bins,
|
|
295
|
+
y_bins=self._y_bins,
|
|
296
|
+
x_range=x_range,
|
|
297
|
+
y_range=y_range,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Collect and store
|
|
301
|
+
level_key = f'cat_level_{filter_id}_{filter_value}_{level_idx}'
|
|
302
|
+
self._preprocessed_data[level_key] = level.collect()
|
|
303
|
+
|
|
304
|
+
# Also create global levels for when no categorical filter is selected
|
|
305
|
+
# (fallback to standard behavior)
|
|
306
|
+
level_sizes = compute_compression_levels(self._min_points, total)
|
|
307
|
+
self._preprocessed_data['level_sizes'] = level_sizes
|
|
308
|
+
self._preprocessed_data['num_levels'] = len(level_sizes)
|
|
309
|
+
|
|
310
|
+
for i, size in enumerate(level_sizes):
|
|
311
|
+
# If target size equals total, skip downsampling - use all data
|
|
312
|
+
if size >= total:
|
|
313
|
+
level = self._raw_data
|
|
314
|
+
elif self._use_simple_downsample:
|
|
315
|
+
level = downsample_2d_simple(
|
|
316
|
+
self._raw_data,
|
|
317
|
+
max_points=size,
|
|
318
|
+
intensity_column=self._intensity_column,
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
level = downsample_2d_streaming(
|
|
322
|
+
self._raw_data,
|
|
323
|
+
max_points=size,
|
|
324
|
+
x_column=self._x_column,
|
|
325
|
+
y_column=self._y_column,
|
|
326
|
+
intensity_column=self._intensity_column,
|
|
327
|
+
x_bins=self._x_bins,
|
|
328
|
+
y_bins=self._y_bins,
|
|
329
|
+
x_range=x_range,
|
|
330
|
+
y_range=y_range,
|
|
331
|
+
)
|
|
332
|
+
self._preprocessed_data[f'level_{i}'] = level.collect()
|
|
333
|
+
|
|
334
|
+
def _preprocess_streaming(self) -> None:
|
|
335
|
+
"""
|
|
336
|
+
Streaming preprocessing - levels stay lazy until render.
|
|
337
|
+
|
|
338
|
+
Builds lazy query plans and collects them for caching.
|
|
339
|
+
"""
|
|
340
|
+
# Get data ranges (minimal collect - just 4 values)
|
|
341
|
+
x_range, y_range = get_data_range(
|
|
342
|
+
self._raw_data,
|
|
343
|
+
self._x_column,
|
|
344
|
+
self._y_column,
|
|
345
|
+
)
|
|
346
|
+
self._preprocessed_data['x_range'] = x_range
|
|
347
|
+
self._preprocessed_data['y_range'] = y_range
|
|
348
|
+
|
|
349
|
+
# Get total count
|
|
350
|
+
total = self._raw_data.select(pl.len()).collect().item()
|
|
351
|
+
self._preprocessed_data['total'] = total
|
|
352
|
+
|
|
353
|
+
# Compute target sizes for levels
|
|
354
|
+
level_sizes = compute_compression_levels(self._min_points, total)
|
|
355
|
+
self._preprocessed_data['level_sizes'] = level_sizes
|
|
356
|
+
|
|
357
|
+
# Build and collect each level
|
|
358
|
+
self._preprocessed_data['levels'] = []
|
|
359
|
+
|
|
360
|
+
for i, size in enumerate(level_sizes):
|
|
361
|
+
# If target size equals total, skip downsampling - use all data
|
|
362
|
+
if size >= total:
|
|
363
|
+
level = self._raw_data
|
|
364
|
+
elif self._use_simple_downsample:
|
|
365
|
+
level = downsample_2d_simple(
|
|
366
|
+
self._raw_data,
|
|
367
|
+
max_points=size,
|
|
368
|
+
intensity_column=self._intensity_column,
|
|
369
|
+
)
|
|
370
|
+
else:
|
|
371
|
+
level = downsample_2d_streaming(
|
|
372
|
+
self._raw_data,
|
|
373
|
+
max_points=size,
|
|
374
|
+
x_column=self._x_column,
|
|
375
|
+
y_column=self._y_column,
|
|
376
|
+
intensity_column=self._intensity_column,
|
|
377
|
+
x_bins=self._x_bins,
|
|
378
|
+
y_bins=self._y_bins,
|
|
379
|
+
x_range=x_range,
|
|
380
|
+
y_range=y_range,
|
|
381
|
+
)
|
|
382
|
+
# Collect and store as DataFrame for caching
|
|
383
|
+
# Base class will serialize these to parquet
|
|
384
|
+
self._preprocessed_data[f'level_{i}'] = level.collect()
|
|
385
|
+
|
|
386
|
+
# Store number of levels for reconstruction
|
|
387
|
+
self._preprocessed_data['num_levels'] = len(level_sizes)
|
|
388
|
+
|
|
389
|
+
def _preprocess_eager(self) -> None:
|
|
390
|
+
"""
|
|
391
|
+
Eager preprocessing - levels are computed upfront.
|
|
392
|
+
|
|
393
|
+
Uses more memory at init but faster rendering. Uses scipy-based
|
|
394
|
+
downsampling for better spatial distribution.
|
|
395
|
+
"""
|
|
396
|
+
# Get data ranges
|
|
397
|
+
x_range, y_range = get_data_range(
|
|
398
|
+
self._raw_data,
|
|
399
|
+
self._x_column,
|
|
400
|
+
self._y_column,
|
|
401
|
+
)
|
|
402
|
+
self._preprocessed_data['x_range'] = x_range
|
|
403
|
+
self._preprocessed_data['y_range'] = y_range
|
|
404
|
+
|
|
405
|
+
# Get total count
|
|
406
|
+
total = self._raw_data.select(pl.len()).collect().item()
|
|
407
|
+
self._preprocessed_data['total'] = total
|
|
408
|
+
|
|
409
|
+
# Compute compression level target sizes
|
|
410
|
+
level_sizes = compute_compression_levels(self._min_points, total)
|
|
411
|
+
self._preprocessed_data['level_sizes'] = level_sizes
|
|
412
|
+
|
|
413
|
+
# Build levels from largest to smallest
|
|
414
|
+
if level_sizes:
|
|
415
|
+
current = self._raw_data
|
|
416
|
+
|
|
417
|
+
for i, size in enumerate(reversed(level_sizes)):
|
|
418
|
+
# If target size equals total, skip downsampling - use all data
|
|
419
|
+
if size >= total:
|
|
420
|
+
downsampled = current
|
|
421
|
+
elif self._use_simple_downsample:
|
|
422
|
+
downsampled = downsample_2d_simple(
|
|
423
|
+
current,
|
|
424
|
+
max_points=size,
|
|
425
|
+
intensity_column=self._intensity_column,
|
|
426
|
+
)
|
|
427
|
+
else:
|
|
428
|
+
downsampled = downsample_2d(
|
|
429
|
+
current,
|
|
430
|
+
max_points=size,
|
|
431
|
+
x_column=self._x_column,
|
|
432
|
+
y_column=self._y_column,
|
|
433
|
+
intensity_column=self._intensity_column,
|
|
434
|
+
x_bins=self._x_bins,
|
|
435
|
+
y_bins=self._y_bins,
|
|
436
|
+
)
|
|
437
|
+
# Collect for caching - store with reversed index
|
|
438
|
+
level_idx = len(level_sizes) - 1 - i
|
|
439
|
+
if isinstance(downsampled, pl.LazyFrame):
|
|
440
|
+
self._preprocessed_data[f'level_{level_idx}'] = downsampled.collect()
|
|
441
|
+
else:
|
|
442
|
+
self._preprocessed_data[f'level_{level_idx}'] = downsampled
|
|
443
|
+
current = downsampled
|
|
444
|
+
|
|
445
|
+
# Store number of levels for reconstruction
|
|
446
|
+
self._preprocessed_data['num_levels'] = len(level_sizes)
|
|
447
|
+
|
|
448
|
+
def _get_levels(self) -> list:
|
|
449
|
+
"""
|
|
450
|
+
Get compression levels list for rendering.
|
|
451
|
+
|
|
452
|
+
Reconstructs the levels list from preprocessed data,
|
|
453
|
+
adding full resolution at the end.
|
|
454
|
+
"""
|
|
455
|
+
num_levels = self._preprocessed_data.get('num_levels', 0)
|
|
456
|
+
levels = []
|
|
457
|
+
|
|
458
|
+
for i in range(num_levels):
|
|
459
|
+
level_data = self._preprocessed_data.get(f'level_{i}')
|
|
460
|
+
if level_data is not None:
|
|
461
|
+
levels.append(level_data)
|
|
462
|
+
|
|
463
|
+
# Add full resolution at end (if raw data available)
|
|
464
|
+
if self._raw_data is not None:
|
|
465
|
+
levels.append(self._raw_data)
|
|
466
|
+
|
|
467
|
+
return levels
|
|
468
|
+
|
|
469
|
+
def _get_categorical_levels(
|
|
470
|
+
self,
|
|
471
|
+
filter_id: str,
|
|
472
|
+
filter_value: Any,
|
|
473
|
+
) -> Tuple[list, Optional[pl.LazyFrame]]:
|
|
474
|
+
"""
|
|
475
|
+
Get compression levels for a specific categorical filter value.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
filter_id: The filter identifier (e.g., 'im_dimension')
|
|
479
|
+
filter_value: The filter value to get levels for (e.g., 0)
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Tuple of (levels list, filtered raw data for full resolution)
|
|
483
|
+
Returns ([], None) if no categorical levels exist for this filter
|
|
484
|
+
"""
|
|
485
|
+
# Check if we have categorical levels for this filter/value
|
|
486
|
+
num_levels_key = f'cat_num_levels_{filter_id}_{filter_value}'
|
|
487
|
+
num_levels = self._preprocessed_data.get(num_levels_key, 0)
|
|
488
|
+
|
|
489
|
+
if num_levels == 0:
|
|
490
|
+
return [], None
|
|
491
|
+
|
|
492
|
+
levels = []
|
|
493
|
+
for i in range(num_levels):
|
|
494
|
+
level_key = f'cat_level_{filter_id}_{filter_value}_{i}'
|
|
495
|
+
level_data = self._preprocessed_data.get(level_key)
|
|
496
|
+
if level_data is not None:
|
|
497
|
+
levels.append(level_data)
|
|
498
|
+
|
|
499
|
+
# Get filtered raw data for full resolution (if available)
|
|
500
|
+
filtered_raw = None
|
|
501
|
+
if self._raw_data is not None and filter_id in self._filters:
|
|
502
|
+
column_name = self._filters[filter_id]
|
|
503
|
+
filtered_raw = self._raw_data.filter(pl.col(column_name) == filter_value)
|
|
504
|
+
|
|
505
|
+
return levels, filtered_raw
|
|
506
|
+
|
|
507
|
+
def _get_levels_for_state(self, state: Dict[str, Any]) -> Tuple[list, Optional[pl.LazyFrame]]:
|
|
508
|
+
"""
|
|
509
|
+
Get appropriate compression levels based on current filter state.
|
|
510
|
+
|
|
511
|
+
If categorical_filters are configured and a matching filter value is
|
|
512
|
+
selected in state, returns the per-value levels. Otherwise returns
|
|
513
|
+
the global levels.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
state: Current selection state
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
Tuple of (levels list, raw data for full resolution)
|
|
520
|
+
"""
|
|
521
|
+
# Check if we have categorical filters and a selected value
|
|
522
|
+
if self._preprocessed_data.get('has_categorical_filters'):
|
|
523
|
+
cat_filter_values = self._preprocessed_data.get('categorical_filter_values', {})
|
|
524
|
+
|
|
525
|
+
for filter_id in self._categorical_filters:
|
|
526
|
+
if filter_id not in cat_filter_values:
|
|
527
|
+
continue
|
|
528
|
+
|
|
529
|
+
selected_value = state.get(filter_id)
|
|
530
|
+
if selected_value is None:
|
|
531
|
+
continue
|
|
532
|
+
|
|
533
|
+
# Convert float to int if needed (JS numbers come as floats)
|
|
534
|
+
if isinstance(selected_value, float) and selected_value.is_integer():
|
|
535
|
+
selected_value = int(selected_value)
|
|
536
|
+
|
|
537
|
+
# Check if this value has per-filter levels
|
|
538
|
+
if selected_value in cat_filter_values[filter_id]:
|
|
539
|
+
levels, filtered_raw = self._get_categorical_levels(filter_id, selected_value)
|
|
540
|
+
if levels:
|
|
541
|
+
return levels, filtered_raw
|
|
542
|
+
|
|
543
|
+
# Fall back to global levels
|
|
544
|
+
return self._get_levels(), self._raw_data
|
|
545
|
+
|
|
546
|
+
def _get_vue_component_name(self) -> str:
|
|
547
|
+
"""Return the Vue component name."""
|
|
548
|
+
return 'PlotlyHeatmap'
|
|
549
|
+
|
|
550
|
+
def _get_data_key(self) -> str:
|
|
551
|
+
"""Return the key used to send primary data to Vue."""
|
|
552
|
+
return 'heatmapData'
|
|
553
|
+
|
|
554
|
+
def _is_no_zoom(self, zoom: Optional[Dict[str, Any]]) -> bool:
|
|
555
|
+
"""Check if zoom state represents no zoom (full view)."""
|
|
556
|
+
if zoom is None:
|
|
557
|
+
return True
|
|
558
|
+
x_range = zoom.get('xRange', [-1, -1])
|
|
559
|
+
y_range = zoom.get('yRange', [-1, -1])
|
|
560
|
+
return (
|
|
561
|
+
x_range[0] < 0 and x_range[1] < 0 and
|
|
562
|
+
y_range[0] < 0 and y_range[1] < 0
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
def _select_level_for_zoom(
|
|
566
|
+
self,
|
|
567
|
+
zoom: Dict[str, Any],
|
|
568
|
+
state: Dict[str, Any],
|
|
569
|
+
levels: list,
|
|
570
|
+
filtered_raw: Optional[pl.LazyFrame],
|
|
571
|
+
non_categorical_filters: Dict[str, str],
|
|
572
|
+
) -> pl.DataFrame:
|
|
573
|
+
"""
|
|
574
|
+
Select appropriate resolution level based on zoom range.
|
|
575
|
+
|
|
576
|
+
Iterates from smallest to largest resolution, finding the smallest
|
|
577
|
+
level that has at least min_points in the zoomed view.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
zoom: Zoom state with xRange and yRange
|
|
581
|
+
state: Full selection state for applying filters
|
|
582
|
+
levels: List of compression levels to use
|
|
583
|
+
filtered_raw: Filtered raw data for full resolution (optional)
|
|
584
|
+
non_categorical_filters: Filters to apply (excluding categorical ones)
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
Filtered Polars DataFrame at appropriate resolution
|
|
588
|
+
"""
|
|
589
|
+
import sys
|
|
590
|
+
x0, x1 = zoom['xRange']
|
|
591
|
+
y0, y1 = zoom['yRange']
|
|
592
|
+
|
|
593
|
+
# Add raw data as final level if available
|
|
594
|
+
all_levels = list(levels)
|
|
595
|
+
if filtered_raw is not None:
|
|
596
|
+
all_levels.append(filtered_raw)
|
|
597
|
+
|
|
598
|
+
last_filtered = None
|
|
599
|
+
|
|
600
|
+
for level_idx, level_data in enumerate(all_levels):
|
|
601
|
+
# Ensure we have a LazyFrame for filtering
|
|
602
|
+
if isinstance(level_data, pl.DataFrame):
|
|
603
|
+
level_data = level_data.lazy()
|
|
604
|
+
|
|
605
|
+
# Filter to zoom range
|
|
606
|
+
filtered_lazy = level_data.filter(
|
|
607
|
+
(pl.col(self._x_column) >= x0) &
|
|
608
|
+
(pl.col(self._x_column) <= x1) &
|
|
609
|
+
(pl.col(self._y_column) >= y0) &
|
|
610
|
+
(pl.col(self._y_column) <= y1)
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# Apply non-categorical filters if any
|
|
614
|
+
if non_categorical_filters:
|
|
615
|
+
# filter_and_collect_cached returns (pandas DataFrame, hash)
|
|
616
|
+
# We need Polars DataFrame for further processing
|
|
617
|
+
df_pandas, _ = filter_and_collect_cached(
|
|
618
|
+
filtered_lazy,
|
|
619
|
+
non_categorical_filters,
|
|
620
|
+
state,
|
|
621
|
+
filter_defaults=self._filter_defaults,
|
|
622
|
+
)
|
|
623
|
+
filtered = pl.from_pandas(df_pandas)
|
|
624
|
+
else:
|
|
625
|
+
filtered = filtered_lazy.collect()
|
|
626
|
+
|
|
627
|
+
count = len(filtered)
|
|
628
|
+
last_filtered = filtered
|
|
629
|
+
print(f"[HEATMAP] Level {level_idx}: {count} pts in zoom range", file=sys.stderr)
|
|
630
|
+
|
|
631
|
+
if count >= self._min_points:
|
|
632
|
+
# This level has enough detail
|
|
633
|
+
if count > self._min_points * 2:
|
|
634
|
+
# Still too many - downsample further
|
|
635
|
+
x_range = self._preprocessed_data.get('x_range')
|
|
636
|
+
y_range = self._preprocessed_data.get('y_range')
|
|
637
|
+
if self._use_streaming or self._use_simple_downsample:
|
|
638
|
+
if self._use_simple_downsample:
|
|
639
|
+
return downsample_2d_simple(
|
|
640
|
+
filtered.lazy(),
|
|
641
|
+
max_points=self._min_points,
|
|
642
|
+
intensity_column=self._intensity_column,
|
|
643
|
+
).collect()
|
|
644
|
+
else:
|
|
645
|
+
return downsample_2d_streaming(
|
|
646
|
+
filtered.lazy(),
|
|
647
|
+
max_points=self._min_points,
|
|
648
|
+
x_column=self._x_column,
|
|
649
|
+
y_column=self._y_column,
|
|
650
|
+
intensity_column=self._intensity_column,
|
|
651
|
+
x_bins=self._x_bins,
|
|
652
|
+
y_bins=self._y_bins,
|
|
653
|
+
x_range=x_range,
|
|
654
|
+
y_range=y_range,
|
|
655
|
+
).collect()
|
|
656
|
+
else:
|
|
657
|
+
return downsample_2d(
|
|
658
|
+
filtered.lazy(),
|
|
659
|
+
max_points=self._min_points,
|
|
660
|
+
x_column=self._x_column,
|
|
661
|
+
y_column=self._y_column,
|
|
662
|
+
intensity_column=self._intensity_column,
|
|
663
|
+
x_bins=self._x_bins,
|
|
664
|
+
y_bins=self._y_bins,
|
|
665
|
+
).collect()
|
|
666
|
+
return filtered
|
|
667
|
+
|
|
668
|
+
# Even largest level has fewer points than threshold
|
|
669
|
+
return last_filtered if last_filtered is not None else pl.DataFrame()
|
|
670
|
+
|
|
671
|
+
def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
672
|
+
"""
|
|
673
|
+
Prepare heatmap data for Vue component.
|
|
674
|
+
|
|
675
|
+
Selects appropriate resolution level based on zoom state.
|
|
676
|
+
If categorical_filters are configured, uses per-filter-value levels
|
|
677
|
+
to ensure constant point counts regardless of filter selection.
|
|
678
|
+
|
|
679
|
+
Returns pandas DataFrame for efficient Arrow serialization.
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
state: Current selection state from StateManager
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
Dict with heatmapData (pandas DataFrame) and _hash for change detection
|
|
686
|
+
"""
|
|
687
|
+
import sys
|
|
688
|
+
zoom = state.get(self._zoom_identifier)
|
|
689
|
+
|
|
690
|
+
# Build columns to select
|
|
691
|
+
columns_to_select = [
|
|
692
|
+
self._x_column,
|
|
693
|
+
self._y_column,
|
|
694
|
+
self._intensity_column,
|
|
695
|
+
]
|
|
696
|
+
# Include columns needed for interactivity
|
|
697
|
+
if self._interactivity:
|
|
698
|
+
for col in self._interactivity.values():
|
|
699
|
+
if col not in columns_to_select:
|
|
700
|
+
columns_to_select.append(col)
|
|
701
|
+
# Include filter columns
|
|
702
|
+
if self._filters:
|
|
703
|
+
for col in self._filters.values():
|
|
704
|
+
if col not in columns_to_select:
|
|
705
|
+
columns_to_select.append(col)
|
|
706
|
+
|
|
707
|
+
# Get levels based on current state (may use per-filter levels)
|
|
708
|
+
levels, filtered_raw = self._get_levels_for_state(state)
|
|
709
|
+
level_sizes = [len(l) if isinstance(l, pl.DataFrame) else '?' for l in levels]
|
|
710
|
+
|
|
711
|
+
# Determine which filters still need to be applied at render time
|
|
712
|
+
# (filters not in categorical_filters need runtime application)
|
|
713
|
+
non_categorical_filters = {}
|
|
714
|
+
if self._filters:
|
|
715
|
+
for filter_id, column in self._filters.items():
|
|
716
|
+
if filter_id not in self._categorical_filters:
|
|
717
|
+
non_categorical_filters[filter_id] = column
|
|
718
|
+
|
|
719
|
+
if self._is_no_zoom(zoom):
|
|
720
|
+
# No zoom - use smallest level
|
|
721
|
+
if not levels:
|
|
722
|
+
# No levels available
|
|
723
|
+
print(f"[HEATMAP] No levels available", file=sys.stderr)
|
|
724
|
+
return {'heatmapData': pl.DataFrame().to_pandas(), '_hash': ''}
|
|
725
|
+
|
|
726
|
+
data = levels[0]
|
|
727
|
+
using_cat = self._preprocessed_data.get('has_categorical_filters', False)
|
|
728
|
+
print(f"[HEATMAP] No zoom → level 0 ({level_sizes[0]} pts), levels={level_sizes}, categorical={using_cat}", file=sys.stderr)
|
|
729
|
+
|
|
730
|
+
# Ensure we have a LazyFrame
|
|
731
|
+
if isinstance(data, pl.DataFrame):
|
|
732
|
+
data = data.lazy()
|
|
733
|
+
|
|
734
|
+
# Apply non-categorical filters if any - returns (pandas DataFrame, hash)
|
|
735
|
+
if non_categorical_filters:
|
|
736
|
+
df_pandas, data_hash = filter_and_collect_cached(
|
|
737
|
+
data,
|
|
738
|
+
non_categorical_filters,
|
|
739
|
+
state,
|
|
740
|
+
columns=columns_to_select,
|
|
741
|
+
filter_defaults=self._filter_defaults,
|
|
742
|
+
)
|
|
743
|
+
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
744
|
+
df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(drop=True)
|
|
745
|
+
else:
|
|
746
|
+
# No filters to apply - levels already filtered by categorical filter
|
|
747
|
+
available_cols = [c for c in columns_to_select if c in data.columns]
|
|
748
|
+
df_polars = data.select(available_cols).collect()
|
|
749
|
+
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
750
|
+
df_polars = df_polars.sort(self._intensity_column)
|
|
751
|
+
data_hash = compute_dataframe_hash(df_polars)
|
|
752
|
+
df_pandas = df_polars.to_pandas()
|
|
753
|
+
else:
|
|
754
|
+
# Zoomed - select appropriate level
|
|
755
|
+
print(f"[HEATMAP] Zoom {zoom} → selecting level...", file=sys.stderr)
|
|
756
|
+
df_polars = self._select_level_for_zoom(
|
|
757
|
+
zoom, state, levels, filtered_raw, non_categorical_filters
|
|
758
|
+
)
|
|
759
|
+
# Select only needed columns
|
|
760
|
+
available_cols = [c for c in columns_to_select if c in df_polars.columns]
|
|
761
|
+
df_polars = df_polars.select(available_cols)
|
|
762
|
+
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
763
|
+
df_polars = df_polars.sort(self._intensity_column)
|
|
764
|
+
print(f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}", file=sys.stderr)
|
|
765
|
+
data_hash = compute_dataframe_hash(df_polars)
|
|
766
|
+
df_pandas = df_polars.to_pandas()
|
|
767
|
+
|
|
768
|
+
return {
|
|
769
|
+
'heatmapData': df_pandas,
|
|
770
|
+
'_hash': data_hash,
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
def _get_component_args(self) -> Dict[str, Any]:
|
|
774
|
+
"""
|
|
775
|
+
Get component arguments to send to Vue.
|
|
776
|
+
|
|
777
|
+
Returns:
|
|
778
|
+
Dict with all heatmap configuration for Vue
|
|
779
|
+
"""
|
|
780
|
+
args: Dict[str, Any] = {
|
|
781
|
+
'componentType': self._get_vue_component_name(),
|
|
782
|
+
'xColumn': self._x_column,
|
|
783
|
+
'yColumn': self._y_column,
|
|
784
|
+
'intensityColumn': self._intensity_column,
|
|
785
|
+
'xLabel': self._x_label,
|
|
786
|
+
'yLabel': self._y_label,
|
|
787
|
+
'colorscale': self._colorscale,
|
|
788
|
+
'zoomIdentifier': self._zoom_identifier,
|
|
789
|
+
'interactivity': self._interactivity,
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
if self._title:
|
|
793
|
+
args['title'] = self._title
|
|
794
|
+
|
|
795
|
+
# Add any extra config options
|
|
796
|
+
args.update(self._config)
|
|
797
|
+
|
|
798
|
+
return args
|
|
799
|
+
|
|
800
|
+
def with_styling(
|
|
801
|
+
self,
|
|
802
|
+
colorscale: Optional[str] = None,
|
|
803
|
+
x_label: Optional[str] = None,
|
|
804
|
+
y_label: Optional[str] = None,
|
|
805
|
+
) -> 'Heatmap':
|
|
806
|
+
"""
|
|
807
|
+
Update heatmap styling.
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
colorscale: Plotly colorscale name
|
|
811
|
+
x_label: X-axis label
|
|
812
|
+
y_label: Y-axis label
|
|
813
|
+
|
|
814
|
+
Returns:
|
|
815
|
+
Self for method chaining
|
|
816
|
+
"""
|
|
817
|
+
if colorscale is not None:
|
|
818
|
+
self._colorscale = colorscale
|
|
819
|
+
if x_label is not None:
|
|
820
|
+
self._x_label = x_label
|
|
821
|
+
if y_label is not None:
|
|
822
|
+
self._y_label = y_label
|
|
823
|
+
return self
|