openms-insight 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ from .core.registry import get_component_class, register_component
16
16
  from .core.state import StateManager
17
17
  from .rendering.bridge import clear_component_annotations, get_component_annotations
18
18
 
19
- __version__ = "0.1.7"
19
+ __version__ = "0.1.11"
20
20
 
21
21
  __all__ = [
22
22
  # Core
@@ -93,6 +93,7 @@ class Heatmap(BaseComponent):
93
93
  category_column: Optional[str] = None,
94
94
  category_colors: Optional[Dict[str, str]] = None,
95
95
  log_scale: bool = True,
96
+ low_values_on_top: bool = False,
96
97
  intensity_label: Optional[str] = None,
97
98
  **kwargs,
98
99
  ):
@@ -148,6 +149,11 @@ class Heatmap(BaseComponent):
148
149
  If not provided, default Plotly colors will be used.
149
150
  log_scale: If True (default), apply log10 transformation to intensity
150
151
  values for color mapping. Set to False for linear color mapping.
152
+ low_values_on_top: If True, invert the intensity priority for both downsampling
153
+ and display order. Default False keeps high-intensity points during
154
+ downsampling and draws them on top. Set to True when lower values are
155
+ "better" (e.g., e-values, PEP scores, q-values) so that low values are
156
+ preserved during downsampling and appear on top of high values.
151
157
  intensity_label: Custom label for the colorbar. Default is "Intensity".
152
158
  Useful when displaying non-intensity values like scores or counts.
153
159
  **kwargs: Additional configuration options
@@ -169,6 +175,7 @@ class Heatmap(BaseComponent):
169
175
  self._category_column = category_column
170
176
  self._category_colors = category_colors or {}
171
177
  self._log_scale = log_scale
178
+ self._low_values_on_top = low_values_on_top
172
179
  self._intensity_label = intensity_label
173
180
  self._use_streaming = use_streaming
174
181
  self._categorical_filters = categorical_filters or []
@@ -228,6 +235,7 @@ class Heatmap(BaseComponent):
228
235
  "colorscale": self._colorscale,
229
236
  "category_column": self._category_column,
230
237
  "log_scale": self._log_scale,
238
+ "low_values_on_top": self._low_values_on_top,
231
239
  "intensity_label": self._intensity_label,
232
240
  # Note: category_colors is render-time styling, doesn't affect cache
233
241
  }
@@ -253,6 +261,7 @@ class Heatmap(BaseComponent):
253
261
  self._colorscale = config.get("colorscale", "Portland")
254
262
  self._category_column = config.get("category_column")
255
263
  self._log_scale = config.get("log_scale", True)
264
+ self._low_values_on_top = config.get("low_values_on_top", False)
256
265
  self._intensity_label = config.get("intensity_label")
257
266
  # category_colors is not stored in cache (render-time styling)
258
267
 
@@ -352,6 +361,7 @@ class Heatmap(BaseComponent):
352
361
  current_source,
353
362
  max_points=target_size,
354
363
  intensity_column=self._intensity_column,
364
+ descending=not self._low_values_on_top,
355
365
  )
356
366
  else:
357
367
  level = downsample_2d_streaming(
@@ -364,6 +374,7 @@ class Heatmap(BaseComponent):
364
374
  y_bins=self._y_bins,
365
375
  x_range=x_range,
366
376
  y_range=y_range,
377
+ descending=not self._low_values_on_top,
367
378
  )
368
379
 
369
380
  # Sort and save immediately
@@ -1028,25 +1039,29 @@ class Heatmap(BaseComponent):
1028
1039
  columns=columns_to_select,
1029
1040
  filter_defaults=self._filter_defaults,
1030
1041
  )
1031
- # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
1042
+ # Sort for render order (last drawn = on top in scattergl)
1043
+ # Default: ascending (high on top). low_values_on_top: descending (low on top)
1032
1044
  if (
1033
1045
  self._intensity_column
1034
1046
  and self._intensity_column in df_pandas.columns
1035
1047
  ):
1036
1048
  df_pandas = df_pandas.sort_values(
1037
- self._intensity_column, ascending=True
1049
+ self._intensity_column, ascending=not self._low_values_on_top
1038
1050
  ).reset_index(drop=True)
1039
1051
  else:
1040
1052
  # No filters to apply - levels already filtered by categorical filter
1041
1053
  schema_names = data.collect_schema().names()
1042
1054
  available_cols = [c for c in columns_to_select if c in schema_names]
1043
1055
  df_polars = data.select(available_cols).collect()
1044
- # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
1056
+ # Sort for render order (last drawn = on top in scattergl)
1057
+ # Default: ascending (high on top). low_values_on_top: descending (low on top)
1045
1058
  if (
1046
1059
  self._intensity_column
1047
1060
  and self._intensity_column in df_polars.columns
1048
1061
  ):
1049
- df_polars = df_polars.sort(self._intensity_column)
1062
+ df_polars = df_polars.sort(
1063
+ self._intensity_column, descending=self._low_values_on_top
1064
+ )
1050
1065
  data_hash = compute_dataframe_hash(df_polars)
1051
1066
  df_pandas = df_polars.to_pandas()
1052
1067
  else:
@@ -1058,9 +1073,12 @@ class Heatmap(BaseComponent):
1058
1073
  # Select only needed columns
1059
1074
  available_cols = [c for c in columns_to_select if c in df_polars.columns]
1060
1075
  df_polars = df_polars.select(available_cols)
1061
- # Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
1076
+ # Sort for render order (last drawn = on top in scattergl)
1077
+ # Default: ascending (high on top). low_values_on_top: descending (low on top)
1062
1078
  if self._intensity_column and self._intensity_column in df_polars.columns:
1063
- df_polars = df_polars.sort(self._intensity_column)
1079
+ df_polars = df_polars.sort(
1080
+ self._intensity_column, descending=self._low_values_on_top
1081
+ )
1064
1082
  print(
1065
1083
  f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}",
1066
1084
  file=sys.stderr,
@@ -440,6 +440,9 @@ class SequenceView:
440
440
  self._deconvolved = deconvolved
441
441
  self._config = kwargs
442
442
  self._filters = filters or {}
443
+ self._filter_defaults = {}
444
+ for identifier in self._filters.keys():
445
+ self._filter_defaults[identifier] = None
443
446
  self._interactivity = interactivity or {}
444
447
 
445
448
  # Store annotation config with defaults
@@ -534,6 +537,9 @@ class SequenceView:
534
537
 
535
538
  # Restore all configuration
536
539
  self._filters = config.get("filters", {})
540
+ self._filter_defaults = {}
541
+ for identifier in self._filters.keys():
542
+ self._filter_defaults[identifier] = None
537
543
  self._interactivity = config.get("interactivity", {})
538
544
  self._title = config.get("title")
539
545
  self._height = config.get("height", 400)
@@ -650,6 +656,12 @@ class SequenceView:
650
656
  filter_value = state.get(identifier)
651
657
  if filter_value is not None:
652
658
  filtered = filtered.filter(pl.col(column) == filter_value)
659
+ elif (
660
+ identifier in self._filter_defaults
661
+ and self._filter_defaults[identifier] is None
662
+ ):
663
+ # Filter has None default and state is None - return empty intentionally
664
+ return "", 1
653
665
 
654
666
  # Collect and get first row
655
667
  try:
@@ -681,6 +693,14 @@ class SequenceView:
681
693
  filter_value = state.get(identifier)
682
694
  if filter_value is not None:
683
695
  filtered = filtered.filter(pl.col(column) == filter_value)
696
+ elif (
697
+ identifier in self._filter_defaults
698
+ and self._filter_defaults[identifier] is None
699
+ ):
700
+ # Filter has None default and state is None - return empty intentionally
701
+ return pl.DataFrame(
702
+ schema={"peak_id": pl.Int64, "mass": pl.Float64}
703
+ )
684
704
 
685
705
  # Select available columns
686
706
  cols = ["peak_id", "mass"]
@@ -1,6 +1,7 @@
1
1
  """Table component using Tabulator.js."""
2
2
 
3
3
  import logging
4
+ import re
4
5
  from typing import Any, Dict, List, Optional
5
6
 
6
7
  import polars as pl
@@ -11,6 +12,20 @@ from ..preprocessing.filtering import compute_dataframe_hash
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
15
+ # Numeric data types for dtype checking
16
+ NUMERIC_DTYPES = (
17
+ pl.Int8,
18
+ pl.Int16,
19
+ pl.Int32,
20
+ pl.Int64,
21
+ pl.UInt8,
22
+ pl.UInt16,
23
+ pl.UInt32,
24
+ pl.UInt64,
25
+ pl.Float32,
26
+ pl.Float64,
27
+ )
28
+
14
29
  # Session state key for tracking last rendered selection per table component
15
30
  _LAST_SELECTION_KEY = "_svc_table_last_selection"
16
31
  # Session state key for tracking last sort/filter state per table component
@@ -419,10 +434,71 @@ class Table(BaseComponent):
419
434
 
420
435
  self._preprocessed_data["column_metadata"] = column_metadata
421
436
 
437
+ # Auto-detect go-to fields if not explicitly provided
438
+ if self._go_to_fields is None:
439
+ self._go_to_fields = self._auto_detect_go_to_fields(data)
440
+ elif self._go_to_fields == []:
441
+ # Explicitly disabled - keep empty list
442
+ pass
443
+ # else: use user-provided list as-is
444
+
422
445
  # Store LazyFrame for streaming to disk (filter happens at render time)
423
446
  # Base class will use sink_parquet() to stream without full materialization
424
447
  self._preprocessed_data["data"] = data # Keep lazy
425
448
 
449
+ def _auto_detect_go_to_fields(self, data: pl.LazyFrame) -> List[str]:
450
+ """
451
+ Auto-detect columns suitable for go-to navigation.
452
+
453
+ Criteria:
454
+ - Integer or String (Utf8) type only (excludes Float)
455
+ - 100% unique values (no duplicates)
456
+ - Samples first 10,000 rows for performance
457
+
458
+ Args:
459
+ data: LazyFrame to analyze for unique columns
460
+
461
+ Returns:
462
+ List of column names in original schema order
463
+ """
464
+ schema = data.collect_schema()
465
+ sample = data.head(10000)
466
+
467
+ candidates = []
468
+ for col_name in schema.names():
469
+ dtype = schema[col_name]
470
+
471
+ # Only Integer and String types (exclude Float)
472
+ if dtype not in (
473
+ pl.Int8,
474
+ pl.Int16,
475
+ pl.Int32,
476
+ pl.Int64,
477
+ pl.UInt8,
478
+ pl.UInt16,
479
+ pl.UInt32,
480
+ pl.UInt64,
481
+ pl.Utf8,
482
+ ):
483
+ continue
484
+
485
+ # Check 100% uniqueness in sample
486
+ stats = sample.select(
487
+ [
488
+ pl.col(col_name).len().alias("count"),
489
+ pl.col(col_name).n_unique().alias("n_unique"),
490
+ ]
491
+ ).collect()
492
+
493
+ count = stats["count"][0]
494
+ n_unique = stats["n_unique"][0]
495
+
496
+ # Must be 100% unique (count == n_unique)
497
+ if count > 0 and count == n_unique:
498
+ candidates.append(col_name)
499
+
500
+ return candidates
501
+
426
502
  def _get_columns_to_select(self) -> Optional[List[str]]:
427
503
  """Get list of columns needed for this table."""
428
504
  if not self._column_definitions:
@@ -527,6 +603,7 @@ class Table(BaseComponent):
527
603
  "total_rows": 0,
528
604
  "total_pages": 0,
529
605
  },
606
+ "_auto_selection": {}, # No data = no auto-selection
530
607
  }
531
608
 
532
609
  # Convert float to int for integer columns (JS numbers come as floats)
@@ -563,13 +640,29 @@ class Table(BaseComponent):
563
640
  elif filter_type == "<=":
564
641
  data = data.filter(pl.col(field) <= value)
565
642
  elif filter_type == "regex":
566
- # Text search with regex
567
- data = data.filter(pl.col(field).str.contains(value, literal=False))
643
+ # Text search with regex - invalid patterns match nothing
644
+ try:
645
+ re.compile(value)
646
+ data = data.filter(pl.col(field).str.contains(value, literal=False))
647
+ except re.error:
648
+ # Invalid regex pattern - filter to empty result
649
+ data = data.filter(pl.lit(False))
568
650
 
569
651
  # Apply server-side sort
570
652
  if sort_column:
653
+ # User-applied sort from pagination state takes precedence
571
654
  descending = sort_dir == "desc"
572
- data = data.sort(sort_column, descending=descending)
655
+ data = data.sort(sort_column, descending=descending, maintain_order=True)
656
+ elif self._initial_sort:
657
+ # Fall back to initial_sort configuration on initial load
658
+ # initial_sort is a list of dicts: [{"column": "mass", "dir": "desc"}, ...]
659
+ sort_columns = [s["column"] for s in self._initial_sort]
660
+ sort_descending = [
661
+ s.get("dir", "asc") == "desc" for s in self._initial_sort
662
+ ]
663
+ data = data.sort(
664
+ sort_columns, descending=sort_descending, maintain_order=True
665
+ )
573
666
 
574
667
  # Get total row count (after filters, before pagination)
575
668
  total_rows = data.select(pl.len()).collect().item()
@@ -578,34 +671,44 @@ class Table(BaseComponent):
578
671
  # Handle go-to request (server-side search for row by field value)
579
672
  navigate_to_page = None
580
673
  target_row_index = None
674
+ go_to_not_found = False
581
675
 
582
676
  if go_to_request:
583
677
  go_to_field = go_to_request.get("field")
584
678
  go_to_value = go_to_request.get("value")
585
679
  if go_to_field and go_to_value is not None:
586
- # Try to convert to number if applicable
587
- try:
588
- go_to_value = float(go_to_value)
589
- if go_to_value.is_integer():
590
- go_to_value = int(go_to_value)
591
- except (ValueError, TypeError):
592
- pass
593
-
594
- # Find the row with row_number
595
- search_result = (
596
- data.with_row_index("_row_num")
597
- .filter(pl.col(go_to_field) == go_to_value)
598
- .select("_row_num")
599
- .head(1)
600
- .collect()
601
- )
602
-
603
- if len(search_result) > 0:
604
- row_num = search_result["_row_num"][0]
605
- target_page = (row_num // page_size) + 1
606
- navigate_to_page = target_page
607
- target_row_index = row_num % page_size
608
- page = target_page # Jump to target page
680
+ # Only convert to numeric if the target column is numeric
681
+ schema = data.collect_schema()
682
+ if go_to_field in schema and schema[go_to_field] in NUMERIC_DTYPES:
683
+ try:
684
+ go_to_value = float(go_to_value)
685
+ if go_to_value.is_integer():
686
+ go_to_value = int(go_to_value)
687
+ except (ValueError, TypeError):
688
+ # Non-numeric string for numeric column - mark as not found
689
+ go_to_not_found = True
690
+ # If column is string (Utf8), keep go_to_value as-is
691
+
692
+ # Only search if we have a valid value (not already marked as not found)
693
+ if not go_to_not_found:
694
+ # Find the row with row_number
695
+ search_result = (
696
+ data.with_row_index("_row_num")
697
+ .filter(pl.col(go_to_field) == go_to_value)
698
+ .select("_row_num")
699
+ .head(1)
700
+ .collect()
701
+ )
702
+
703
+ if len(search_result) > 0:
704
+ row_num = search_result["_row_num"][0]
705
+ target_page = (row_num // page_size) + 1
706
+ navigate_to_page = target_page
707
+ target_row_index = row_num % page_size
708
+ page = target_page # Jump to target page
709
+ else:
710
+ # Row not found - set flag for Vue to show "not found" feedback
711
+ go_to_not_found = True
609
712
 
610
713
  # === Selection and Sort/Filter based navigation ===
611
714
  # PURPOSE: When user sorts/filters, find where the selected row ended up and navigate there
@@ -669,12 +772,28 @@ class Table(BaseComponent):
669
772
  for identifier, column in self._interactivity.items():
670
773
  selected_value = state.get(identifier)
671
774
  if selected_value is not None:
672
- # Convert float to int if needed (JS numbers come as floats)
673
- if (
674
- isinstance(selected_value, float)
675
- and selected_value.is_integer()
676
- ):
677
- selected_value = int(selected_value)
775
+ # Type conversion based on column dtype (same logic as go-to)
776
+ schema = data.collect_schema()
777
+ if column in schema:
778
+ col_dtype = schema[column]
779
+ if col_dtype in NUMERIC_DTYPES:
780
+ # Column is numeric - convert value to numeric if possible
781
+ if isinstance(selected_value, str):
782
+ try:
783
+ selected_value = float(selected_value)
784
+ if selected_value.is_integer():
785
+ selected_value = int(selected_value)
786
+ except (ValueError, TypeError):
787
+ pass
788
+ elif (
789
+ isinstance(selected_value, float)
790
+ and selected_value.is_integer()
791
+ ):
792
+ selected_value = int(selected_value)
793
+ else:
794
+ # Column is string - convert value to string
795
+ if not isinstance(selected_value, str):
796
+ selected_value = str(selected_value)
678
797
 
679
798
  # SEARCH for the selected row in the sorted/filtered data
680
799
  # with_row_index adds position so we know which page it's on
@@ -742,6 +861,22 @@ class Table(BaseComponent):
742
861
  # Clamp page to valid range
743
862
  page = max(1, min(page, total_pages))
744
863
 
864
+ # Compute auto-selection from first row (before pagination)
865
+ # This provides the first row's values for interactivity columns
866
+ # so downstream components can receive initial data when filters change
867
+ auto_selection: Dict[str, Any] = {}
868
+ if self._interactivity and total_rows > 0:
869
+ # Get the first row of sorted/filtered data
870
+ first_row = data.head(1).collect()
871
+ if first_row.height > 0:
872
+ for identifier, column in self._interactivity.items():
873
+ if column in first_row.columns:
874
+ value = first_row[column][0]
875
+ # Convert numpy/polars types to Python types for JSON
876
+ if hasattr(value, "item"):
877
+ value = value.item()
878
+ auto_selection[identifier] = value
879
+
745
880
  # Slice to current page
746
881
  offset = (page - 1) * page_size
747
882
  df_polars = data.slice(offset, page_size).collect()
@@ -761,12 +896,15 @@ class Table(BaseComponent):
761
896
  "sort_column": sort_column,
762
897
  "sort_dir": sort_dir,
763
898
  },
899
+ "_auto_selection": auto_selection,
764
900
  }
765
901
 
766
902
  if navigate_to_page is not None:
767
903
  result["_navigate_to_page"] = navigate_to_page
768
904
  if target_row_index is not None:
769
905
  result["_target_row_index"] = target_row_index
906
+ if go_to_not_found:
907
+ result["_go_to_not_found"] = True
770
908
 
771
909
  logger.info(
772
910
  f"[Table._prepare_vue_data] Returning: page={page}, total_rows={total_rows}, data_rows={len(df_polars)}"