openms-insight 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +11 -7
- openms_insight/components/__init__.py +2 -2
- openms_insight/components/heatmap.py +433 -228
- openms_insight/components/lineplot.py +377 -82
- openms_insight/components/sequenceview.py +677 -213
- openms_insight/components/table.py +86 -58
- openms_insight/core/__init__.py +2 -2
- openms_insight/core/base.py +122 -54
- openms_insight/core/registry.py +6 -5
- openms_insight/core/state.py +33 -31
- openms_insight/core/subprocess_preprocess.py +1 -3
- openms_insight/js-component/dist/assets/index.css +1 -1
- openms_insight/js-component/dist/assets/index.js +105 -105
- openms_insight/preprocessing/__init__.py +5 -6
- openms_insight/preprocessing/compression.py +123 -67
- openms_insight/preprocessing/filtering.py +39 -13
- openms_insight/rendering/__init__.py +1 -1
- openms_insight/rendering/bridge.py +192 -42
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.4.dist-info}/METADATA +163 -20
- openms_insight-0.1.4.dist-info/RECORD +28 -0
- openms_insight-0.1.2.dist-info/RECORD +0 -28
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.4.dist-info}/WHEEL +0 -0
- {openms_insight-0.1.2.dist-info → openms_insight-0.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,6 +8,7 @@ from ..core.base import BaseComponent
|
|
|
8
8
|
from ..core.registry import register_component
|
|
9
9
|
from ..preprocessing.compression import (
|
|
10
10
|
compute_compression_levels,
|
|
11
|
+
compute_optimal_bins,
|
|
11
12
|
downsample_2d,
|
|
12
13
|
downsample_2d_simple,
|
|
13
14
|
downsample_2d_streaming,
|
|
@@ -22,10 +23,10 @@ def _make_zoom_cache_key(zoom: Optional[Dict[str, Any]]) -> tuple:
|
|
|
22
23
|
if zoom is None:
|
|
23
24
|
return (None,)
|
|
24
25
|
return (
|
|
25
|
-
(
|
|
26
|
-
(
|
|
27
|
-
(
|
|
28
|
-
(
|
|
26
|
+
("x0", zoom.get("xRange", [-1, -1])[0]),
|
|
27
|
+
("x1", zoom.get("xRange", [-1, -1])[1]),
|
|
28
|
+
("y0", zoom.get("yRange", [-1, -1])[0]),
|
|
29
|
+
("y1", zoom.get("yRange", [-1, -1])[1]),
|
|
29
30
|
)
|
|
30
31
|
|
|
31
32
|
|
|
@@ -66,28 +67,29 @@ class Heatmap(BaseComponent):
|
|
|
66
67
|
def __init__(
|
|
67
68
|
self,
|
|
68
69
|
cache_id: str,
|
|
69
|
-
x_column: str,
|
|
70
|
-
y_column: str,
|
|
70
|
+
x_column: Optional[str] = None,
|
|
71
|
+
y_column: Optional[str] = None,
|
|
71
72
|
data: Optional[pl.LazyFrame] = None,
|
|
72
73
|
data_path: Optional[str] = None,
|
|
73
|
-
intensity_column: str =
|
|
74
|
+
intensity_column: Optional[str] = None,
|
|
74
75
|
filters: Optional[Dict[str, str]] = None,
|
|
75
76
|
filter_defaults: Optional[Dict[str, Any]] = None,
|
|
76
77
|
interactivity: Optional[Dict[str, str]] = None,
|
|
77
78
|
cache_path: str = ".",
|
|
78
79
|
regenerate_cache: bool = False,
|
|
79
|
-
min_points: int =
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
min_points: int = 10000,
|
|
81
|
+
display_aspect_ratio: float = 16 / 9,
|
|
82
|
+
x_bins: Optional[int] = None,
|
|
83
|
+
y_bins: Optional[int] = None,
|
|
84
|
+
zoom_identifier: str = "heatmap_zoom",
|
|
83
85
|
title: Optional[str] = None,
|
|
84
86
|
x_label: Optional[str] = None,
|
|
85
87
|
y_label: Optional[str] = None,
|
|
86
|
-
colorscale: str =
|
|
88
|
+
colorscale: str = "Portland",
|
|
87
89
|
use_simple_downsample: bool = False,
|
|
88
90
|
use_streaming: bool = True,
|
|
89
91
|
categorical_filters: Optional[List[str]] = None,
|
|
90
|
-
**kwargs
|
|
92
|
+
**kwargs,
|
|
91
93
|
):
|
|
92
94
|
"""
|
|
93
95
|
Initialize the Heatmap component.
|
|
@@ -106,10 +108,17 @@ class Heatmap(BaseComponent):
|
|
|
106
108
|
point's value in the corresponding column.
|
|
107
109
|
cache_path: Base path for cache storage. Default "." (current dir).
|
|
108
110
|
regenerate_cache: If True, regenerate cache even if valid cache exists.
|
|
109
|
-
min_points: Target
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
111
|
+
min_points: Target number of points to display (default: 10000).
|
|
112
|
+
Cache levels are built at 2× this value; final downsample
|
|
113
|
+
at render time reduces to exactly min_points.
|
|
114
|
+
display_aspect_ratio: Expected display width/height ratio for
|
|
115
|
+
optimal bin computation during caching (default: 16/9).
|
|
116
|
+
At render time, the actual zoom region's aspect ratio is used.
|
|
117
|
+
x_bins: Number of bins along x-axis for downsampling. If None
|
|
118
|
+
(default), auto-computed from display_aspect_ratio such that
|
|
119
|
+
x_bins × y_bins ≈ 2×min_points with even spatial distribution.
|
|
120
|
+
y_bins: Number of bins along y-axis for downsampling. If None
|
|
121
|
+
(default), auto-computed from display_aspect_ratio.
|
|
113
122
|
zoom_identifier: State key for storing zoom range (default: 'heatmap_zoom')
|
|
114
123
|
title: Heatmap title displayed above the plot
|
|
115
124
|
x_label: X-axis label (defaults to x_column)
|
|
@@ -130,6 +139,7 @@ class Heatmap(BaseComponent):
|
|
|
130
139
|
self._y_column = y_column
|
|
131
140
|
self._intensity_column = intensity_column
|
|
132
141
|
self._min_points = min_points
|
|
142
|
+
self._display_aspect_ratio = display_aspect_ratio
|
|
133
143
|
self._x_bins = x_bins
|
|
134
144
|
self._y_bins = y_bins
|
|
135
145
|
self._zoom_identifier = zoom_identifier
|
|
@@ -155,6 +165,7 @@ class Heatmap(BaseComponent):
|
|
|
155
165
|
y_column=y_column,
|
|
156
166
|
intensity_column=intensity_column,
|
|
157
167
|
min_points=min_points,
|
|
168
|
+
display_aspect_ratio=display_aspect_ratio,
|
|
158
169
|
x_bins=x_bins,
|
|
159
170
|
y_bins=y_bins,
|
|
160
171
|
zoom_identifier=zoom_identifier,
|
|
@@ -165,7 +176,7 @@ class Heatmap(BaseComponent):
|
|
|
165
176
|
use_simple_downsample=use_simple_downsample,
|
|
166
177
|
use_streaming=use_streaming,
|
|
167
178
|
categorical_filters=categorical_filters,
|
|
168
|
-
**kwargs
|
|
179
|
+
**kwargs,
|
|
169
180
|
)
|
|
170
181
|
|
|
171
182
|
def _get_cache_config(self) -> Dict[str, Any]:
|
|
@@ -176,17 +187,43 @@ class Heatmap(BaseComponent):
|
|
|
176
187
|
Dict of config values that affect preprocessing
|
|
177
188
|
"""
|
|
178
189
|
return {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
190
|
+
"x_column": self._x_column,
|
|
191
|
+
"y_column": self._y_column,
|
|
192
|
+
"intensity_column": self._intensity_column,
|
|
193
|
+
"min_points": self._min_points,
|
|
194
|
+
"display_aspect_ratio": self._display_aspect_ratio,
|
|
195
|
+
"x_bins": self._x_bins,
|
|
196
|
+
"y_bins": self._y_bins,
|
|
197
|
+
"use_simple_downsample": self._use_simple_downsample,
|
|
198
|
+
"use_streaming": self._use_streaming,
|
|
199
|
+
"categorical_filters": sorted(self._categorical_filters),
|
|
200
|
+
"zoom_identifier": self._zoom_identifier,
|
|
201
|
+
"title": self._title,
|
|
202
|
+
"x_label": self._x_label,
|
|
203
|
+
"y_label": self._y_label,
|
|
204
|
+
"colorscale": self._colorscale,
|
|
188
205
|
}
|
|
189
206
|
|
|
207
|
+
def _restore_cache_config(self, config: Dict[str, Any]) -> None:
|
|
208
|
+
"""Restore component-specific configuration from cached config."""
|
|
209
|
+
self._x_column = config.get("x_column")
|
|
210
|
+
self._y_column = config.get("y_column")
|
|
211
|
+
self._intensity_column = config.get("intensity_column", "intensity")
|
|
212
|
+
self._min_points = config.get("min_points", 10000)
|
|
213
|
+
self._display_aspect_ratio = config.get("display_aspect_ratio", 16 / 9)
|
|
214
|
+
# x_bins/y_bins are computed during preprocessing and stored in cache
|
|
215
|
+
# Fallback to old defaults for backward compatibility with old caches
|
|
216
|
+
self._x_bins = config.get("x_bins", 400)
|
|
217
|
+
self._y_bins = config.get("y_bins", 50)
|
|
218
|
+
self._use_simple_downsample = config.get("use_simple_downsample", False)
|
|
219
|
+
self._use_streaming = config.get("use_streaming", True)
|
|
220
|
+
self._categorical_filters = config.get("categorical_filters", [])
|
|
221
|
+
self._zoom_identifier = config.get("zoom_identifier", "heatmap_zoom")
|
|
222
|
+
self._title = config.get("title")
|
|
223
|
+
self._x_label = config.get("x_label", self._x_column)
|
|
224
|
+
self._y_label = config.get("y_label", self._y_column)
|
|
225
|
+
self._colorscale = config.get("colorscale", "Portland")
|
|
226
|
+
|
|
190
227
|
def get_state_dependencies(self) -> list:
|
|
191
228
|
"""
|
|
192
229
|
Return list of state keys that affect this component's data.
|
|
@@ -220,14 +257,116 @@ class Heatmap(BaseComponent):
|
|
|
220
257
|
else:
|
|
221
258
|
self._preprocess_eager()
|
|
222
259
|
|
|
260
|
+
def _build_cascading_levels(
|
|
261
|
+
self,
|
|
262
|
+
source_data: pl.LazyFrame,
|
|
263
|
+
level_sizes: list,
|
|
264
|
+
x_range: tuple,
|
|
265
|
+
y_range: tuple,
|
|
266
|
+
cache_dir,
|
|
267
|
+
prefix: str = "level",
|
|
268
|
+
) -> dict:
|
|
269
|
+
"""
|
|
270
|
+
Build cascading compression levels from source data.
|
|
271
|
+
|
|
272
|
+
Each level is built from the previous larger level rather than from
|
|
273
|
+
raw data. This is efficient (raw data read once) and produces identical
|
|
274
|
+
results because the downsampling keeps top N highest-intensity points
|
|
275
|
+
per bin - points surviving at larger levels will also be selected at
|
|
276
|
+
smaller levels.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
source_data: LazyFrame with raw/filtered data
|
|
280
|
+
level_sizes: List of target sizes for compressed levels (smallest first)
|
|
281
|
+
x_range: (x_min, x_max) for consistent bin boundaries
|
|
282
|
+
y_range: (y_min, y_max) for consistent bin boundaries
|
|
283
|
+
cache_dir: Path to save parquet files
|
|
284
|
+
prefix: Filename prefix (e.g., "level" or "cat_level_im_0")
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Dict with level LazyFrames keyed by "{prefix}_{idx}" and "num_levels"
|
|
288
|
+
"""
|
|
289
|
+
import sys
|
|
290
|
+
|
|
291
|
+
result = {}
|
|
292
|
+
num_compressed = len(level_sizes)
|
|
293
|
+
|
|
294
|
+
# Get total count
|
|
295
|
+
total = source_data.select(pl.len()).collect().item()
|
|
296
|
+
|
|
297
|
+
# First: save full resolution as the largest level
|
|
298
|
+
full_res_path = cache_dir / f"{prefix}_{num_compressed}.parquet"
|
|
299
|
+
full_res = source_data.sort([self._x_column, self._y_column])
|
|
300
|
+
full_res.sink_parquet(full_res_path, compression="zstd")
|
|
301
|
+
print(
|
|
302
|
+
f"[HEATMAP] Saved {prefix}_{num_compressed} ({total:,} pts)",
|
|
303
|
+
file=sys.stderr,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Start cascading from full resolution
|
|
307
|
+
current_source = pl.scan_parquet(full_res_path)
|
|
308
|
+
current_size = total
|
|
309
|
+
|
|
310
|
+
# Build compressed levels from largest to smallest
|
|
311
|
+
for i, target_size in enumerate(reversed(level_sizes)):
|
|
312
|
+
level_idx = num_compressed - 1 - i
|
|
313
|
+
level_path = cache_dir / f"{prefix}_{level_idx}.parquet"
|
|
314
|
+
|
|
315
|
+
# If target size equals or exceeds current, just copy reference
|
|
316
|
+
if target_size >= current_size:
|
|
317
|
+
level = current_source
|
|
318
|
+
elif self._use_simple_downsample:
|
|
319
|
+
level = downsample_2d_simple(
|
|
320
|
+
current_source,
|
|
321
|
+
max_points=target_size,
|
|
322
|
+
intensity_column=self._intensity_column,
|
|
323
|
+
)
|
|
324
|
+
else:
|
|
325
|
+
level = downsample_2d_streaming(
|
|
326
|
+
current_source,
|
|
327
|
+
max_points=target_size,
|
|
328
|
+
x_column=self._x_column,
|
|
329
|
+
y_column=self._y_column,
|
|
330
|
+
intensity_column=self._intensity_column,
|
|
331
|
+
x_bins=self._x_bins,
|
|
332
|
+
y_bins=self._y_bins,
|
|
333
|
+
x_range=x_range,
|
|
334
|
+
y_range=y_range,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Sort and save immediately
|
|
338
|
+
level = level.sort([self._x_column, self._y_column])
|
|
339
|
+
level.sink_parquet(level_path, compression="zstd")
|
|
340
|
+
|
|
341
|
+
print(
|
|
342
|
+
f"[HEATMAP] Saved {prefix}_{level_idx} (target {target_size:,} pts)",
|
|
343
|
+
file=sys.stderr,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Next iteration uses this level as source (cascading)
|
|
347
|
+
current_source = pl.scan_parquet(level_path)
|
|
348
|
+
current_size = target_size
|
|
349
|
+
|
|
350
|
+
# Load all levels back as LazyFrames
|
|
351
|
+
for i in range(num_compressed + 1):
|
|
352
|
+
level_path = cache_dir / f"{prefix}_{i}.parquet"
|
|
353
|
+
result[f"{prefix}_{i}"] = pl.scan_parquet(level_path)
|
|
354
|
+
|
|
355
|
+
result["num_levels"] = num_compressed + 1
|
|
356
|
+
|
|
357
|
+
return result
|
|
358
|
+
|
|
223
359
|
def _preprocess_with_categorical_filters(self) -> None:
|
|
224
360
|
"""
|
|
225
|
-
Preprocess with per-filter-value compression levels.
|
|
361
|
+
Preprocess with per-filter-value compression levels using cascading.
|
|
226
362
|
|
|
227
363
|
For each unique value of each categorical filter, creates separate
|
|
228
|
-
compression levels
|
|
229
|
-
|
|
230
|
-
filter value selected.
|
|
364
|
+
compression levels using cascading (building smaller levels from larger).
|
|
365
|
+
This ensures that when a filter is applied at render time, the resulting
|
|
366
|
+
data has ~min_points regardless of the filter value selected.
|
|
367
|
+
|
|
368
|
+
Uses cascading downsampling for efficiency - each level is built from
|
|
369
|
+
the previous larger level rather than from raw data.
|
|
231
370
|
|
|
232
371
|
Data is sorted by x, y columns for efficient range query predicate pushdown.
|
|
233
372
|
|
|
@@ -239,202 +378,221 @@ class Heatmap(BaseComponent):
|
|
|
239
378
|
import sys
|
|
240
379
|
|
|
241
380
|
# Get data ranges (for the full dataset)
|
|
381
|
+
# These ranges are used for ALL levels to ensure consistent binning
|
|
242
382
|
x_range, y_range = get_data_range(
|
|
243
383
|
self._raw_data,
|
|
244
384
|
self._x_column,
|
|
245
385
|
self._y_column,
|
|
246
386
|
)
|
|
247
|
-
self._preprocessed_data[
|
|
248
|
-
self._preprocessed_data[
|
|
387
|
+
self._preprocessed_data["x_range"] = x_range
|
|
388
|
+
self._preprocessed_data["y_range"] = y_range
|
|
389
|
+
|
|
390
|
+
# Compute optimal bins if not provided
|
|
391
|
+
# Cache at 2×min_points, use display_aspect_ratio for bin computation
|
|
392
|
+
cache_target = 2 * self._min_points
|
|
393
|
+
if self._x_bins is None or self._y_bins is None:
|
|
394
|
+
# Use display aspect ratio (not data aspect ratio) for optimal bins
|
|
395
|
+
self._x_bins, self._y_bins = compute_optimal_bins(
|
|
396
|
+
cache_target,
|
|
397
|
+
(0, self._display_aspect_ratio), # Fake x_range matching aspect
|
|
398
|
+
(0, 1.0), # Fake y_range
|
|
399
|
+
)
|
|
400
|
+
print(
|
|
401
|
+
f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
|
|
402
|
+
f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
|
|
403
|
+
f"display aspect: {self._display_aspect_ratio:.2f})",
|
|
404
|
+
file=sys.stderr,
|
|
405
|
+
)
|
|
249
406
|
|
|
250
407
|
# Get total count
|
|
251
408
|
total = self._raw_data.select(pl.len()).collect().item()
|
|
252
|
-
self._preprocessed_data[
|
|
409
|
+
self._preprocessed_data["total"] = total
|
|
410
|
+
|
|
411
|
+
# Create cache directory for immediate level saving
|
|
412
|
+
cache_dir = self._cache_dir / "preprocessed"
|
|
413
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
253
414
|
|
|
254
415
|
# Store metadata about categorical filters
|
|
255
|
-
self._preprocessed_data[
|
|
256
|
-
self._preprocessed_data[
|
|
416
|
+
self._preprocessed_data["has_categorical_filters"] = True
|
|
417
|
+
self._preprocessed_data["categorical_filter_values"] = {}
|
|
257
418
|
|
|
258
419
|
# Process each categorical filter
|
|
259
420
|
for filter_id in self._categorical_filters:
|
|
260
421
|
if filter_id not in self._filters:
|
|
261
|
-
print(
|
|
422
|
+
print(
|
|
423
|
+
f"[HEATMAP] Warning: categorical_filter '{filter_id}' not in filters, skipping",
|
|
424
|
+
file=sys.stderr,
|
|
425
|
+
)
|
|
262
426
|
continue
|
|
263
427
|
|
|
264
428
|
column_name = self._filters[filter_id]
|
|
265
429
|
|
|
266
430
|
# Get unique values for this filter
|
|
267
431
|
unique_values = (
|
|
268
|
-
self._raw_data
|
|
269
|
-
.select(pl.col(column_name))
|
|
432
|
+
self._raw_data.select(pl.col(column_name))
|
|
270
433
|
.unique()
|
|
271
434
|
.collect()
|
|
272
435
|
.to_series()
|
|
273
436
|
.to_list()
|
|
274
437
|
)
|
|
275
|
-
unique_values = sorted(
|
|
438
|
+
unique_values = sorted(
|
|
439
|
+
[v for v in unique_values if v is not None and v >= 0]
|
|
440
|
+
)
|
|
276
441
|
|
|
277
|
-
print(
|
|
442
|
+
print(
|
|
443
|
+
f"[HEATMAP] Categorical filter '{filter_id}' ({column_name}): {len(unique_values)} unique values",
|
|
444
|
+
file=sys.stderr,
|
|
445
|
+
)
|
|
278
446
|
|
|
279
|
-
self._preprocessed_data[
|
|
447
|
+
self._preprocessed_data["categorical_filter_values"][filter_id] = (
|
|
448
|
+
unique_values
|
|
449
|
+
)
|
|
280
450
|
|
|
281
|
-
# Create compression levels for each filter value
|
|
451
|
+
# Create compression levels for each filter value using cascading
|
|
282
452
|
for filter_value in unique_values:
|
|
283
453
|
# Filter data to this value
|
|
284
|
-
filtered_data = self._raw_data.filter(
|
|
454
|
+
filtered_data = self._raw_data.filter(
|
|
455
|
+
pl.col(column_name) == filter_value
|
|
456
|
+
)
|
|
285
457
|
filtered_total = filtered_data.select(pl.len()).collect().item()
|
|
286
458
|
|
|
287
|
-
# Compute level sizes for this filtered subset
|
|
288
|
-
level_sizes = compute_compression_levels(
|
|
289
|
-
|
|
290
|
-
print(f"[HEATMAP] Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}", file=sys.stderr)
|
|
291
|
-
|
|
292
|
-
# Store level sizes for this filter value
|
|
293
|
-
self._preprocessed_data[f'cat_level_sizes_{filter_id}_{filter_value}'] = level_sizes
|
|
294
|
-
|
|
295
|
-
# Build each compressed level
|
|
296
|
-
for level_idx, target_size in enumerate(level_sizes):
|
|
297
|
-
# If target size equals total, skip downsampling - use all data
|
|
298
|
-
if target_size >= filtered_total:
|
|
299
|
-
level = filtered_data
|
|
300
|
-
elif self._use_simple_downsample:
|
|
301
|
-
level = downsample_2d_simple(
|
|
302
|
-
filtered_data,
|
|
303
|
-
max_points=target_size,
|
|
304
|
-
intensity_column=self._intensity_column,
|
|
305
|
-
)
|
|
306
|
-
else:
|
|
307
|
-
level = downsample_2d_streaming(
|
|
308
|
-
filtered_data,
|
|
309
|
-
max_points=target_size,
|
|
310
|
-
x_column=self._x_column,
|
|
311
|
-
y_column=self._y_column,
|
|
312
|
-
intensity_column=self._intensity_column,
|
|
313
|
-
x_bins=self._x_bins,
|
|
314
|
-
y_bins=self._y_bins,
|
|
315
|
-
x_range=x_range,
|
|
316
|
-
y_range=y_range,
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
# Sort by x, y for efficient range query predicate pushdown
|
|
320
|
-
level = level.sort([self._x_column, self._y_column])
|
|
321
|
-
# Store LazyFrame for streaming to disk
|
|
322
|
-
level_key = f'cat_level_{filter_id}_{filter_value}_{level_idx}'
|
|
323
|
-
self._preprocessed_data[level_key] = level # Keep lazy
|
|
324
|
-
|
|
325
|
-
# Add full resolution as final level (for zoom fallback)
|
|
326
|
-
# Also sorted for consistent predicate pushdown behavior
|
|
327
|
-
num_compressed = len(level_sizes)
|
|
328
|
-
full_res_key = f'cat_level_{filter_id}_{filter_value}_{num_compressed}'
|
|
329
|
-
self._preprocessed_data[full_res_key] = filtered_data.sort(
|
|
330
|
-
[self._x_column, self._y_column]
|
|
459
|
+
# Compute level sizes for this filtered subset (2× for cache buffer)
|
|
460
|
+
level_sizes = compute_compression_levels(
|
|
461
|
+
cache_target, filtered_total
|
|
331
462
|
)
|
|
332
|
-
self._preprocessed_data[f'cat_num_levels_{filter_id}_{filter_value}'] = num_compressed + 1
|
|
333
463
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
self._preprocessed_data['level_sizes'] = level_sizes
|
|
338
|
-
|
|
339
|
-
for i, size in enumerate(level_sizes):
|
|
340
|
-
# If target size equals total, skip downsampling - use all data
|
|
341
|
-
if size >= total:
|
|
342
|
-
level = self._raw_data
|
|
343
|
-
elif self._use_simple_downsample:
|
|
344
|
-
level = downsample_2d_simple(
|
|
345
|
-
self._raw_data,
|
|
346
|
-
max_points=size,
|
|
347
|
-
intensity_column=self._intensity_column,
|
|
464
|
+
print(
|
|
465
|
+
f"[HEATMAP] Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}",
|
|
466
|
+
file=sys.stderr,
|
|
348
467
|
)
|
|
349
|
-
|
|
350
|
-
level
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
468
|
+
|
|
469
|
+
# Store level sizes for this filter value
|
|
470
|
+
self._preprocessed_data[
|
|
471
|
+
f"cat_level_sizes_{filter_id}_{filter_value}"
|
|
472
|
+
] = level_sizes
|
|
473
|
+
|
|
474
|
+
# Build cascading levels using helper
|
|
475
|
+
prefix = f"cat_level_{filter_id}_{filter_value}"
|
|
476
|
+
levels_result = self._build_cascading_levels(
|
|
477
|
+
source_data=filtered_data,
|
|
478
|
+
level_sizes=level_sizes,
|
|
358
479
|
x_range=x_range,
|
|
359
480
|
y_range=y_range,
|
|
481
|
+
cache_dir=cache_dir,
|
|
482
|
+
prefix=prefix,
|
|
360
483
|
)
|
|
361
|
-
# Sort by x, y for efficient range query predicate pushdown
|
|
362
|
-
level = level.sort([self._x_column, self._y_column])
|
|
363
|
-
self._preprocessed_data[f'level_{i}'] = level # Keep lazy
|
|
364
484
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
485
|
+
# Copy results to preprocessed_data
|
|
486
|
+
for key, value in levels_result.items():
|
|
487
|
+
if key == "num_levels":
|
|
488
|
+
self._preprocessed_data[
|
|
489
|
+
f"cat_num_levels_{filter_id}_{filter_value}"
|
|
490
|
+
] = value
|
|
491
|
+
else:
|
|
492
|
+
self._preprocessed_data[key] = value
|
|
493
|
+
|
|
494
|
+
# Also create global levels for when no categorical filter is selected
|
|
495
|
+
# (fallback to standard behavior) - using cascading with 2× cache buffer
|
|
496
|
+
level_sizes = compute_compression_levels(cache_target, total)
|
|
497
|
+
self._preprocessed_data["level_sizes"] = level_sizes
|
|
498
|
+
|
|
499
|
+
# Build global cascading levels using helper
|
|
500
|
+
levels_result = self._build_cascading_levels(
|
|
501
|
+
source_data=self._raw_data,
|
|
502
|
+
level_sizes=level_sizes,
|
|
503
|
+
x_range=x_range,
|
|
504
|
+
y_range=y_range,
|
|
505
|
+
cache_dir=cache_dir,
|
|
506
|
+
prefix="level",
|
|
370
507
|
)
|
|
371
|
-
|
|
508
|
+
|
|
509
|
+
# Copy results to preprocessed_data
|
|
510
|
+
for key, value in levels_result.items():
|
|
511
|
+
if key == "num_levels":
|
|
512
|
+
self._preprocessed_data["num_levels"] = value
|
|
513
|
+
else:
|
|
514
|
+
self._preprocessed_data[key] = value
|
|
515
|
+
|
|
516
|
+
# Mark that files are already saved
|
|
517
|
+
self._preprocessed_data["_files_already_saved"] = True
|
|
372
518
|
|
|
373
519
|
def _preprocess_streaming(self) -> None:
|
|
374
520
|
"""
|
|
375
|
-
Streaming preprocessing -
|
|
521
|
+
Streaming preprocessing with cascading - builds smaller levels from larger.
|
|
522
|
+
|
|
523
|
+
Uses cascading downsampling: each level is built from the previous larger
|
|
524
|
+
level rather than from raw data. This is more efficient (raw data read once)
|
|
525
|
+
and produces identical results because the downsampling algorithm keeps
|
|
526
|
+
the TOP N highest-intensity points per bin - points that survive at a larger
|
|
527
|
+
level will also be selected at smaller levels.
|
|
528
|
+
|
|
529
|
+
Levels are saved to disk immediately after creation, then read back as the
|
|
530
|
+
source for the next smaller level. This keeps memory low while enabling
|
|
531
|
+
cascading.
|
|
376
532
|
|
|
377
|
-
Builds lazy query plans that are streamed to disk via sink_parquet().
|
|
378
533
|
Data is sorted by x, y columns for efficient range query predicate pushdown.
|
|
379
534
|
"""
|
|
535
|
+
import sys
|
|
536
|
+
|
|
380
537
|
# Get data ranges (minimal collect - just 4 values)
|
|
538
|
+
# These ranges are used for ALL levels to ensure consistent binning
|
|
381
539
|
x_range, y_range = get_data_range(
|
|
382
540
|
self._raw_data,
|
|
383
541
|
self._x_column,
|
|
384
542
|
self._y_column,
|
|
385
543
|
)
|
|
386
|
-
self._preprocessed_data[
|
|
387
|
-
self._preprocessed_data[
|
|
544
|
+
self._preprocessed_data["x_range"] = x_range
|
|
545
|
+
self._preprocessed_data["y_range"] = y_range
|
|
546
|
+
|
|
547
|
+
# Compute optimal bins if not provided
|
|
548
|
+
# Cache at 2×min_points, use display_aspect_ratio for bin computation
|
|
549
|
+
cache_target = 2 * self._min_points
|
|
550
|
+
if self._x_bins is None or self._y_bins is None:
|
|
551
|
+
# Use display aspect ratio (not data aspect ratio) for optimal bins
|
|
552
|
+
# This ensures even distribution in the expected display dimensions
|
|
553
|
+
self._x_bins, self._y_bins = compute_optimal_bins(
|
|
554
|
+
cache_target,
|
|
555
|
+
(0, self._display_aspect_ratio), # Fake x_range matching aspect
|
|
556
|
+
(0, 1.0), # Fake y_range
|
|
557
|
+
)
|
|
558
|
+
print(
|
|
559
|
+
f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
|
|
560
|
+
f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
|
|
561
|
+
f"display aspect: {self._display_aspect_ratio:.2f})",
|
|
562
|
+
file=sys.stderr,
|
|
563
|
+
)
|
|
388
564
|
|
|
389
565
|
# Get total count
|
|
390
566
|
total = self._raw_data.select(pl.len()).collect().item()
|
|
391
|
-
self._preprocessed_data[
|
|
392
|
-
|
|
393
|
-
# Compute target sizes for levels
|
|
394
|
-
level_sizes = compute_compression_levels(
|
|
395
|
-
self._preprocessed_data[
|
|
396
|
-
|
|
397
|
-
#
|
|
398
|
-
self.
|
|
567
|
+
self._preprocessed_data["total"] = total
|
|
568
|
+
|
|
569
|
+
# Compute target sizes for levels (use 2×min_points for smallest cache level)
|
|
570
|
+
level_sizes = compute_compression_levels(cache_target, total)
|
|
571
|
+
self._preprocessed_data["level_sizes"] = level_sizes
|
|
572
|
+
|
|
573
|
+
# Create cache directory for immediate level saving
|
|
574
|
+
cache_dir = self._cache_dir / "preprocessed"
|
|
575
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
576
|
+
|
|
577
|
+
# Build cascading levels using helper
|
|
578
|
+
levels_result = self._build_cascading_levels(
|
|
579
|
+
source_data=self._raw_data,
|
|
580
|
+
level_sizes=level_sizes,
|
|
581
|
+
x_range=x_range,
|
|
582
|
+
y_range=y_range,
|
|
583
|
+
cache_dir=cache_dir,
|
|
584
|
+
prefix="level",
|
|
585
|
+
)
|
|
399
586
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
if
|
|
403
|
-
|
|
404
|
-
elif self._use_simple_downsample:
|
|
405
|
-
level = downsample_2d_simple(
|
|
406
|
-
self._raw_data,
|
|
407
|
-
max_points=size,
|
|
408
|
-
intensity_column=self._intensity_column,
|
|
409
|
-
)
|
|
587
|
+
# Copy results to preprocessed_data
|
|
588
|
+
for key, value in levels_result.items():
|
|
589
|
+
if key == "num_levels":
|
|
590
|
+
self._preprocessed_data["num_levels"] = value
|
|
410
591
|
else:
|
|
411
|
-
|
|
412
|
-
self._raw_data,
|
|
413
|
-
max_points=size,
|
|
414
|
-
x_column=self._x_column,
|
|
415
|
-
y_column=self._y_column,
|
|
416
|
-
intensity_column=self._intensity_column,
|
|
417
|
-
x_bins=self._x_bins,
|
|
418
|
-
y_bins=self._y_bins,
|
|
419
|
-
x_range=x_range,
|
|
420
|
-
y_range=y_range,
|
|
421
|
-
)
|
|
422
|
-
# Sort by x, y for efficient range query predicate pushdown
|
|
423
|
-
# This clusters spatially close points together in row groups
|
|
424
|
-
level = level.sort([self._x_column, self._y_column])
|
|
425
|
-
# Store LazyFrame for streaming to disk
|
|
426
|
-
# Base class will use sink_parquet() to stream without full materialization
|
|
427
|
-
self._preprocessed_data[f'level_{i}'] = level # Keep lazy
|
|
428
|
-
|
|
429
|
-
# Add full resolution as final level (for zoom fallback)
|
|
430
|
-
# Also sorted for consistent predicate pushdown behavior
|
|
431
|
-
num_compressed = len(level_sizes)
|
|
432
|
-
self._preprocessed_data[f'level_{num_compressed}'] = self._raw_data.sort(
|
|
433
|
-
[self._x_column, self._y_column]
|
|
434
|
-
)
|
|
592
|
+
self._preprocessed_data[key] = value
|
|
435
593
|
|
|
436
|
-
#
|
|
437
|
-
self._preprocessed_data[
|
|
594
|
+
# Mark that files are already saved (base class should skip saving)
|
|
595
|
+
self._preprocessed_data["_files_already_saved"] = True
|
|
438
596
|
|
|
439
597
|
def _preprocess_eager(self) -> None:
|
|
440
598
|
"""
|
|
@@ -444,22 +602,41 @@ class Heatmap(BaseComponent):
|
|
|
444
602
|
downsampling for better spatial distribution.
|
|
445
603
|
Data is sorted by x, y columns for efficient range query predicate pushdown.
|
|
446
604
|
"""
|
|
605
|
+
import sys
|
|
606
|
+
|
|
447
607
|
# Get data ranges
|
|
448
608
|
x_range, y_range = get_data_range(
|
|
449
609
|
self._raw_data,
|
|
450
610
|
self._x_column,
|
|
451
611
|
self._y_column,
|
|
452
612
|
)
|
|
453
|
-
self._preprocessed_data[
|
|
454
|
-
self._preprocessed_data[
|
|
613
|
+
self._preprocessed_data["x_range"] = x_range
|
|
614
|
+
self._preprocessed_data["y_range"] = y_range
|
|
615
|
+
|
|
616
|
+
# Compute optimal bins if not provided
|
|
617
|
+
# Cache at 2×min_points, use display_aspect_ratio for bin computation
|
|
618
|
+
cache_target = 2 * self._min_points
|
|
619
|
+
if self._x_bins is None or self._y_bins is None:
|
|
620
|
+
# Use display aspect ratio (not data aspect ratio) for optimal bins
|
|
621
|
+
self._x_bins, self._y_bins = compute_optimal_bins(
|
|
622
|
+
cache_target,
|
|
623
|
+
(0, self._display_aspect_ratio), # Fake x_range matching aspect
|
|
624
|
+
(0, 1.0), # Fake y_range
|
|
625
|
+
)
|
|
626
|
+
print(
|
|
627
|
+
f"[HEATMAP] Auto-computed bins: {self._x_bins}x{self._y_bins} "
|
|
628
|
+
f"= {self._x_bins * self._y_bins:,} (cache target: {cache_target:,}, "
|
|
629
|
+
f"display aspect: {self._display_aspect_ratio:.2f})",
|
|
630
|
+
file=sys.stderr,
|
|
631
|
+
)
|
|
455
632
|
|
|
456
633
|
# Get total count
|
|
457
634
|
total = self._raw_data.select(pl.len()).collect().item()
|
|
458
|
-
self._preprocessed_data[
|
|
635
|
+
self._preprocessed_data["total"] = total
|
|
459
636
|
|
|
460
|
-
# Compute compression level target sizes
|
|
461
|
-
level_sizes = compute_compression_levels(
|
|
462
|
-
self._preprocessed_data[
|
|
637
|
+
# Compute compression level target sizes (2× for cache buffer)
|
|
638
|
+
level_sizes = compute_compression_levels(cache_target, total)
|
|
639
|
+
self._preprocessed_data["level_sizes"] = level_sizes
|
|
463
640
|
|
|
464
641
|
# Build levels from largest to smallest
|
|
465
642
|
if level_sizes:
|
|
@@ -493,21 +670,23 @@ class Heatmap(BaseComponent):
|
|
|
493
670
|
# Store LazyFrame for streaming to disk
|
|
494
671
|
level_idx = len(level_sizes) - 1 - i
|
|
495
672
|
if isinstance(downsampled, pl.LazyFrame):
|
|
496
|
-
self._preprocessed_data[f
|
|
673
|
+
self._preprocessed_data[f"level_{level_idx}"] = (
|
|
674
|
+
downsampled # Keep lazy
|
|
675
|
+
)
|
|
497
676
|
else:
|
|
498
677
|
# DataFrame from downsample_2d - convert back to lazy
|
|
499
|
-
self._preprocessed_data[f
|
|
678
|
+
self._preprocessed_data[f"level_{level_idx}"] = downsampled.lazy()
|
|
500
679
|
current = downsampled
|
|
501
680
|
|
|
502
681
|
# Add full resolution as final level (for zoom fallback)
|
|
503
682
|
# Also sorted for consistent predicate pushdown behavior
|
|
504
683
|
num_compressed = len(level_sizes)
|
|
505
|
-
self._preprocessed_data[f
|
|
684
|
+
self._preprocessed_data[f"level_{num_compressed}"] = self._raw_data.sort(
|
|
506
685
|
[self._x_column, self._y_column]
|
|
507
686
|
)
|
|
508
687
|
|
|
509
688
|
# Store number of levels for reconstruction (includes full resolution)
|
|
510
|
-
self._preprocessed_data[
|
|
689
|
+
self._preprocessed_data["num_levels"] = num_compressed + 1
|
|
511
690
|
|
|
512
691
|
def _get_levels(self) -> list:
|
|
513
692
|
"""
|
|
@@ -516,11 +695,11 @@ class Heatmap(BaseComponent):
|
|
|
516
695
|
Reconstructs the levels list from preprocessed data,
|
|
517
696
|
adding full resolution at the end.
|
|
518
697
|
"""
|
|
519
|
-
num_levels = self._preprocessed_data.get(
|
|
698
|
+
num_levels = self._preprocessed_data.get("num_levels", 0)
|
|
520
699
|
levels = []
|
|
521
700
|
|
|
522
701
|
for i in range(num_levels):
|
|
523
|
-
level_data = self._preprocessed_data.get(f
|
|
702
|
+
level_data = self._preprocessed_data.get(f"level_{i}")
|
|
524
703
|
if level_data is not None:
|
|
525
704
|
levels.append(level_data)
|
|
526
705
|
|
|
@@ -543,7 +722,7 @@ class Heatmap(BaseComponent):
|
|
|
543
722
|
Returns ([], None) if no categorical levels exist for this filter
|
|
544
723
|
"""
|
|
545
724
|
# Check if we have categorical levels for this filter/value
|
|
546
|
-
num_levels_key = f
|
|
725
|
+
num_levels_key = f"cat_num_levels_{filter_id}_{filter_value}"
|
|
547
726
|
num_levels = self._preprocessed_data.get(num_levels_key, 0)
|
|
548
727
|
|
|
549
728
|
if num_levels == 0:
|
|
@@ -551,14 +730,16 @@ class Heatmap(BaseComponent):
|
|
|
551
730
|
|
|
552
731
|
levels = []
|
|
553
732
|
for i in range(num_levels):
|
|
554
|
-
level_key = f
|
|
733
|
+
level_key = f"cat_level_{filter_id}_{filter_value}_{i}"
|
|
555
734
|
level_data = self._preprocessed_data.get(level_key)
|
|
556
735
|
if level_data is not None:
|
|
557
736
|
levels.append(level_data)
|
|
558
737
|
|
|
559
738
|
return levels, None # Full resolution included in cached levels
|
|
560
739
|
|
|
561
|
-
def _get_levels_for_state(
|
|
740
|
+
def _get_levels_for_state(
|
|
741
|
+
self, state: Dict[str, Any]
|
|
742
|
+
) -> Tuple[list, Optional[pl.LazyFrame]]:
|
|
562
743
|
"""
|
|
563
744
|
Get appropriate compression levels based on current filter state.
|
|
564
745
|
|
|
@@ -573,8 +754,10 @@ class Heatmap(BaseComponent):
|
|
|
573
754
|
Tuple of (levels list, raw data for full resolution)
|
|
574
755
|
"""
|
|
575
756
|
# Check if we have categorical filters and a selected value
|
|
576
|
-
if self._preprocessed_data.get(
|
|
577
|
-
cat_filter_values = self._preprocessed_data.get(
|
|
757
|
+
if self._preprocessed_data.get("has_categorical_filters"):
|
|
758
|
+
cat_filter_values = self._preprocessed_data.get(
|
|
759
|
+
"categorical_filter_values", {}
|
|
760
|
+
)
|
|
578
761
|
|
|
579
762
|
for filter_id in self._categorical_filters:
|
|
580
763
|
if filter_id not in cat_filter_values:
|
|
@@ -590,7 +773,9 @@ class Heatmap(BaseComponent):
|
|
|
590
773
|
|
|
591
774
|
# Check if this value has per-filter levels
|
|
592
775
|
if selected_value in cat_filter_values[filter_id]:
|
|
593
|
-
levels, filtered_raw = self._get_categorical_levels(
|
|
776
|
+
levels, filtered_raw = self._get_categorical_levels(
|
|
777
|
+
filter_id, selected_value
|
|
778
|
+
)
|
|
594
779
|
if levels:
|
|
595
780
|
return levels, filtered_raw
|
|
596
781
|
|
|
@@ -599,22 +784,19 @@ class Heatmap(BaseComponent):
|
|
|
599
784
|
|
|
600
785
|
def _get_vue_component_name(self) -> str:
|
|
601
786
|
"""Return the Vue component name."""
|
|
602
|
-
return
|
|
787
|
+
return "PlotlyHeatmap"
|
|
603
788
|
|
|
604
789
|
def _get_data_key(self) -> str:
|
|
605
790
|
"""Return the key used to send primary data to Vue."""
|
|
606
|
-
return
|
|
791
|
+
return "heatmapData"
|
|
607
792
|
|
|
608
793
|
def _is_no_zoom(self, zoom: Optional[Dict[str, Any]]) -> bool:
|
|
609
794
|
"""Check if zoom state represents no zoom (full view)."""
|
|
610
795
|
if zoom is None:
|
|
611
796
|
return True
|
|
612
|
-
x_range = zoom.get(
|
|
613
|
-
y_range = zoom.get(
|
|
614
|
-
return
|
|
615
|
-
x_range[0] < 0 and x_range[1] < 0 and
|
|
616
|
-
y_range[0] < 0 and y_range[1] < 0
|
|
617
|
-
)
|
|
797
|
+
x_range = zoom.get("xRange", [-1, -1])
|
|
798
|
+
y_range = zoom.get("yRange", [-1, -1])
|
|
799
|
+
return x_range[0] < 0 and x_range[1] < 0 and y_range[0] < 0 and y_range[1] < 0
|
|
618
800
|
|
|
619
801
|
def _select_level_for_zoom(
|
|
620
802
|
self,
|
|
@@ -641,8 +823,9 @@ class Heatmap(BaseComponent):
|
|
|
641
823
|
Filtered Polars DataFrame at appropriate resolution
|
|
642
824
|
"""
|
|
643
825
|
import sys
|
|
644
|
-
|
|
645
|
-
|
|
826
|
+
|
|
827
|
+
x0, x1 = zoom["xRange"]
|
|
828
|
+
y0, y1 = zoom["yRange"]
|
|
646
829
|
|
|
647
830
|
# Add raw data as final level if available
|
|
648
831
|
all_levels = list(levels)
|
|
@@ -658,10 +841,10 @@ class Heatmap(BaseComponent):
|
|
|
658
841
|
|
|
659
842
|
# Filter to zoom range
|
|
660
843
|
filtered_lazy = level_data.filter(
|
|
661
|
-
(pl.col(self._x_column) >= x0)
|
|
662
|
-
(pl.col(self._x_column) <= x1)
|
|
663
|
-
(pl.col(self._y_column) >= y0)
|
|
664
|
-
(pl.col(self._y_column) <= y1)
|
|
844
|
+
(pl.col(self._x_column) >= x0)
|
|
845
|
+
& (pl.col(self._x_column) <= x1)
|
|
846
|
+
& (pl.col(self._y_column) >= y0)
|
|
847
|
+
& (pl.col(self._y_column) <= y1)
|
|
665
848
|
)
|
|
666
849
|
|
|
667
850
|
# Apply non-categorical filters if any
|
|
@@ -680,15 +863,26 @@ class Heatmap(BaseComponent):
|
|
|
680
863
|
|
|
681
864
|
count = len(filtered)
|
|
682
865
|
last_filtered = filtered
|
|
683
|
-
print(
|
|
866
|
+
print(
|
|
867
|
+
f"[HEATMAP] Level {level_idx}: {count} pts in zoom range",
|
|
868
|
+
file=sys.stderr,
|
|
869
|
+
)
|
|
684
870
|
|
|
685
871
|
if count >= self._min_points:
|
|
686
872
|
# This level has enough detail
|
|
687
873
|
if count > self._min_points:
|
|
688
|
-
# Over limit - downsample to
|
|
689
|
-
#
|
|
874
|
+
# Over limit - downsample to exactly min_points
|
|
875
|
+
# Compute optimal bins from ACTUAL zoom region aspect ratio
|
|
690
876
|
zoom_x_range = (x0, x1)
|
|
691
877
|
zoom_y_range = (y0, y1)
|
|
878
|
+
render_x_bins, render_y_bins = compute_optimal_bins(
|
|
879
|
+
self._min_points, zoom_x_range, zoom_y_range
|
|
880
|
+
)
|
|
881
|
+
print(
|
|
882
|
+
f"[HEATMAP] Render downsample: {count:,} → {self._min_points:,} pts "
|
|
883
|
+
f"(bins: {render_x_bins}x{render_y_bins})",
|
|
884
|
+
file=sys.stderr,
|
|
885
|
+
)
|
|
692
886
|
if self._use_streaming or self._use_simple_downsample:
|
|
693
887
|
if self._use_simple_downsample:
|
|
694
888
|
return downsample_2d_simple(
|
|
@@ -703,8 +897,8 @@ class Heatmap(BaseComponent):
|
|
|
703
897
|
x_column=self._x_column,
|
|
704
898
|
y_column=self._y_column,
|
|
705
899
|
intensity_column=self._intensity_column,
|
|
706
|
-
x_bins=
|
|
707
|
-
y_bins=
|
|
900
|
+
x_bins=render_x_bins,
|
|
901
|
+
y_bins=render_y_bins,
|
|
708
902
|
x_range=zoom_x_range,
|
|
709
903
|
y_range=zoom_y_range,
|
|
710
904
|
).collect()
|
|
@@ -715,8 +909,8 @@ class Heatmap(BaseComponent):
|
|
|
715
909
|
x_column=self._x_column,
|
|
716
910
|
y_column=self._y_column,
|
|
717
911
|
intensity_column=self._intensity_column,
|
|
718
|
-
x_bins=
|
|
719
|
-
y_bins=
|
|
912
|
+
x_bins=render_x_bins,
|
|
913
|
+
y_bins=render_y_bins,
|
|
720
914
|
).collect()
|
|
721
915
|
return filtered
|
|
722
916
|
|
|
@@ -740,6 +934,7 @@ class Heatmap(BaseComponent):
|
|
|
740
934
|
Dict with heatmapData (pandas DataFrame) and _hash for change detection
|
|
741
935
|
"""
|
|
742
936
|
import sys
|
|
937
|
+
|
|
743
938
|
zoom = state.get(self._zoom_identifier)
|
|
744
939
|
|
|
745
940
|
# Build columns to select
|
|
@@ -761,7 +956,9 @@ class Heatmap(BaseComponent):
|
|
|
761
956
|
|
|
762
957
|
# Get levels based on current state (may use per-filter levels)
|
|
763
958
|
levels, filtered_raw = self._get_levels_for_state(state)
|
|
764
|
-
level_sizes = [
|
|
959
|
+
level_sizes = [
|
|
960
|
+
len(lvl) if isinstance(lvl, pl.DataFrame) else "?" for lvl in levels
|
|
961
|
+
]
|
|
765
962
|
|
|
766
963
|
# Determine which filters still need to be applied at render time
|
|
767
964
|
# (filters not in categorical_filters need runtime application)
|
|
@@ -775,12 +972,15 @@ class Heatmap(BaseComponent):
|
|
|
775
972
|
# No zoom - use smallest level
|
|
776
973
|
if not levels:
|
|
777
974
|
# No levels available
|
|
778
|
-
print(
|
|
779
|
-
return {
|
|
975
|
+
print("[HEATMAP] No levels available", file=sys.stderr)
|
|
976
|
+
return {"heatmapData": pl.DataFrame().to_pandas(), "_hash": ""}
|
|
780
977
|
|
|
781
978
|
data = levels[0]
|
|
782
|
-
using_cat = self._preprocessed_data.get(
|
|
783
|
-
print(
|
|
979
|
+
using_cat = self._preprocessed_data.get("has_categorical_filters", False)
|
|
980
|
+
print(
|
|
981
|
+
f"[HEATMAP] No zoom → level 0 ({level_sizes[0]} pts), levels={level_sizes}, categorical={using_cat}",
|
|
982
|
+
file=sys.stderr,
|
|
983
|
+
)
|
|
784
984
|
|
|
785
985
|
# Ensure we have a LazyFrame
|
|
786
986
|
if isinstance(data, pl.DataFrame):
|
|
@@ -796,7 +996,9 @@ class Heatmap(BaseComponent):
|
|
|
796
996
|
filter_defaults=self._filter_defaults,
|
|
797
997
|
)
|
|
798
998
|
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
799
|
-
df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(
|
|
999
|
+
df_pandas = df_pandas.sort_values(self._intensity_column).reset_index(
|
|
1000
|
+
drop=True
|
|
1001
|
+
)
|
|
800
1002
|
else:
|
|
801
1003
|
# No filters to apply - levels already filtered by categorical filter
|
|
802
1004
|
schema_names = data.collect_schema().names()
|
|
@@ -817,13 +1019,16 @@ class Heatmap(BaseComponent):
|
|
|
817
1019
|
df_polars = df_polars.select(available_cols)
|
|
818
1020
|
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
819
1021
|
df_polars = df_polars.sort(self._intensity_column)
|
|
820
|
-
print(
|
|
1022
|
+
print(
|
|
1023
|
+
f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}",
|
|
1024
|
+
file=sys.stderr,
|
|
1025
|
+
)
|
|
821
1026
|
data_hash = compute_dataframe_hash(df_polars)
|
|
822
1027
|
df_pandas = df_polars.to_pandas()
|
|
823
1028
|
|
|
824
1029
|
return {
|
|
825
|
-
|
|
826
|
-
|
|
1030
|
+
"heatmapData": df_pandas,
|
|
1031
|
+
"_hash": data_hash,
|
|
827
1032
|
}
|
|
828
1033
|
|
|
829
1034
|
def _get_component_args(self) -> Dict[str, Any]:
|
|
@@ -834,19 +1039,19 @@ class Heatmap(BaseComponent):
|
|
|
834
1039
|
Dict with all heatmap configuration for Vue
|
|
835
1040
|
"""
|
|
836
1041
|
args: Dict[str, Any] = {
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
1042
|
+
"componentType": self._get_vue_component_name(),
|
|
1043
|
+
"xColumn": self._x_column,
|
|
1044
|
+
"yColumn": self._y_column,
|
|
1045
|
+
"intensityColumn": self._intensity_column,
|
|
1046
|
+
"xLabel": self._x_label,
|
|
1047
|
+
"yLabel": self._y_label,
|
|
1048
|
+
"colorscale": self._colorscale,
|
|
1049
|
+
"zoomIdentifier": self._zoom_identifier,
|
|
1050
|
+
"interactivity": self._interactivity,
|
|
846
1051
|
}
|
|
847
1052
|
|
|
848
1053
|
if self._title:
|
|
849
|
-
args[
|
|
1054
|
+
args["title"] = self._title
|
|
850
1055
|
|
|
851
1056
|
# Add any extra config options
|
|
852
1057
|
args.update(self._config)
|
|
@@ -858,7 +1063,7 @@ class Heatmap(BaseComponent):
|
|
|
858
1063
|
colorscale: Optional[str] = None,
|
|
859
1064
|
x_label: Optional[str] = None,
|
|
860
1065
|
y_label: Optional[str] = None,
|
|
861
|
-
) ->
|
|
1066
|
+
) -> "Heatmap":
|
|
862
1067
|
"""
|
|
863
1068
|
Update heatmap styling.
|
|
864
1069
|
|