openms-insight 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +3 -1
- openms_insight/components/__init__.py +2 -0
- openms_insight/components/heatmap.py +69 -15
- openms_insight/components/volcanoplot.py +374 -0
- openms_insight/core/base.py +10 -1
- openms_insight/js-component/dist/assets/index.css +1 -1
- openms_insight/js-component/dist/assets/index.js +137 -128
- openms_insight/preprocessing/__init__.py +6 -0
- openms_insight/preprocessing/scatter.py +136 -0
- openms_insight/rendering/bridge.py +23 -4
- {openms_insight-0.1.4.dist-info → openms_insight-0.1.5.dist-info}/METADATA +101 -2
- {openms_insight-0.1.4.dist-info → openms_insight-0.1.5.dist-info}/RECORD +14 -12
- {openms_insight-0.1.4.dist-info → openms_insight-0.1.5.dist-info}/WHEEL +0 -0
- {openms_insight-0.1.4.dist-info → openms_insight-0.1.5.dist-info}/licenses/LICENSE +0 -0
openms_insight/__init__.py
CHANGED
|
@@ -9,13 +9,14 @@ from .components.heatmap import Heatmap
|
|
|
9
9
|
from .components.lineplot import LinePlot
|
|
10
10
|
from .components.sequenceview import SequenceView, SequenceViewResult
|
|
11
11
|
from .components.table import Table
|
|
12
|
+
from .components.volcanoplot import VolcanoPlot
|
|
12
13
|
from .core.base import BaseComponent
|
|
13
14
|
from .core.cache import CacheMissError
|
|
14
15
|
from .core.registry import get_component_class, register_component
|
|
15
16
|
from .core.state import StateManager
|
|
16
17
|
from .rendering.bridge import clear_component_annotations, get_component_annotations
|
|
17
18
|
|
|
18
|
-
__version__ = "0.1.
|
|
19
|
+
__version__ = "0.1.5"
|
|
19
20
|
|
|
20
21
|
__all__ = [
|
|
21
22
|
# Core
|
|
@@ -28,6 +29,7 @@ __all__ = [
|
|
|
28
29
|
"Table",
|
|
29
30
|
"LinePlot",
|
|
30
31
|
"Heatmap",
|
|
32
|
+
"VolcanoPlot",
|
|
31
33
|
"SequenceView",
|
|
32
34
|
"SequenceViewResult",
|
|
33
35
|
# Utilities
|
|
@@ -86,9 +86,14 @@ class Heatmap(BaseComponent):
|
|
|
86
86
|
x_label: Optional[str] = None,
|
|
87
87
|
y_label: Optional[str] = None,
|
|
88
88
|
colorscale: str = "Portland",
|
|
89
|
+
reversescale: bool = False,
|
|
89
90
|
use_simple_downsample: bool = False,
|
|
90
91
|
use_streaming: bool = True,
|
|
91
92
|
categorical_filters: Optional[List[str]] = None,
|
|
93
|
+
category_column: Optional[str] = None,
|
|
94
|
+
category_colors: Optional[Dict[str, str]] = None,
|
|
95
|
+
log_scale: bool = True,
|
|
96
|
+
intensity_label: Optional[str] = None,
|
|
92
97
|
**kwargs,
|
|
93
98
|
):
|
|
94
99
|
"""
|
|
@@ -133,6 +138,18 @@ class Heatmap(BaseComponent):
|
|
|
133
138
|
are sent to the client regardless of filter selection. Should be
|
|
134
139
|
used for filters with a small number of unique values (<20).
|
|
135
140
|
Example: ['im_dimension'] for ion mobility filtering.
|
|
141
|
+
category_column: Optional column name for categorical coloring.
|
|
142
|
+
When provided, points are colored by discrete category values
|
|
143
|
+
instead of the continuous intensity colorscale. Useful for
|
|
144
|
+
condition-based heatmaps (e.g., coloring by sample group).
|
|
145
|
+
category_colors: Optional mapping of category values to colors.
|
|
146
|
+
Keys should match values in category_column.
|
|
147
|
+
Values should be CSS color strings (e.g., '#FF0000', 'red').
|
|
148
|
+
If not provided, default Plotly colors will be used.
|
|
149
|
+
log_scale: If True (default), apply log10 transformation to intensity
|
|
150
|
+
values for color mapping. Set to False for linear color mapping.
|
|
151
|
+
intensity_label: Custom label for the colorbar. Default is "Intensity".
|
|
152
|
+
Useful when displaying non-intensity values like scores or counts.
|
|
136
153
|
**kwargs: Additional configuration options
|
|
137
154
|
"""
|
|
138
155
|
self._x_column = x_column
|
|
@@ -147,7 +164,12 @@ class Heatmap(BaseComponent):
|
|
|
147
164
|
self._x_label = x_label or x_column
|
|
148
165
|
self._y_label = y_label or y_column
|
|
149
166
|
self._colorscale = colorscale
|
|
167
|
+
self._reversescale = reversescale
|
|
150
168
|
self._use_simple_downsample = use_simple_downsample
|
|
169
|
+
self._category_column = category_column
|
|
170
|
+
self._category_colors = category_colors or {}
|
|
171
|
+
self._log_scale = log_scale
|
|
172
|
+
self._intensity_label = intensity_label
|
|
151
173
|
self._use_streaming = use_streaming
|
|
152
174
|
self._categorical_filters = categorical_filters or []
|
|
153
175
|
|
|
@@ -176,6 +198,8 @@ class Heatmap(BaseComponent):
|
|
|
176
198
|
use_simple_downsample=use_simple_downsample,
|
|
177
199
|
use_streaming=use_streaming,
|
|
178
200
|
categorical_filters=categorical_filters,
|
|
201
|
+
category_column=category_column,
|
|
202
|
+
category_colors=category_colors,
|
|
179
203
|
**kwargs,
|
|
180
204
|
)
|
|
181
205
|
|
|
@@ -202,6 +226,10 @@ class Heatmap(BaseComponent):
|
|
|
202
226
|
"x_label": self._x_label,
|
|
203
227
|
"y_label": self._y_label,
|
|
204
228
|
"colorscale": self._colorscale,
|
|
229
|
+
"category_column": self._category_column,
|
|
230
|
+
"log_scale": self._log_scale,
|
|
231
|
+
"intensity_label": self._intensity_label,
|
|
232
|
+
# Note: category_colors is render-time styling, doesn't affect cache
|
|
205
233
|
}
|
|
206
234
|
|
|
207
235
|
def _restore_cache_config(self, config: Dict[str, Any]) -> None:
|
|
@@ -223,6 +251,10 @@ class Heatmap(BaseComponent):
|
|
|
223
251
|
self._x_label = config.get("x_label", self._x_column)
|
|
224
252
|
self._y_label = config.get("y_label", self._y_column)
|
|
225
253
|
self._colorscale = config.get("colorscale", "Portland")
|
|
254
|
+
self._category_column = config.get("category_column")
|
|
255
|
+
self._log_scale = config.get("log_scale", True)
|
|
256
|
+
self._intensity_label = config.get("intensity_label")
|
|
257
|
+
# category_colors is not stored in cache (render-time styling)
|
|
226
258
|
|
|
227
259
|
def get_state_dependencies(self) -> list:
|
|
228
260
|
"""
|
|
@@ -457,9 +489,7 @@ class Heatmap(BaseComponent):
|
|
|
457
489
|
filtered_total = filtered_data.select(pl.len()).collect().item()
|
|
458
490
|
|
|
459
491
|
# Compute level sizes for this filtered subset (2× for cache buffer)
|
|
460
|
-
level_sizes = compute_compression_levels(
|
|
461
|
-
cache_target, filtered_total
|
|
462
|
-
)
|
|
492
|
+
level_sizes = compute_compression_levels(cache_target, filtered_total)
|
|
463
493
|
|
|
464
494
|
print(
|
|
465
495
|
f"[HEATMAP] Value {filter_value}: {filtered_total:,} pts → levels {level_sizes}",
|
|
@@ -937,12 +967,15 @@ class Heatmap(BaseComponent):
|
|
|
937
967
|
|
|
938
968
|
zoom = state.get(self._zoom_identifier)
|
|
939
969
|
|
|
940
|
-
# Build columns to select
|
|
970
|
+
# Build columns to select (filter out None values)
|
|
941
971
|
columns_to_select = [
|
|
942
|
-
|
|
943
|
-
self._y_column,
|
|
944
|
-
|
|
972
|
+
col
|
|
973
|
+
for col in [self._x_column, self._y_column, self._intensity_column]
|
|
974
|
+
if col is not None
|
|
945
975
|
]
|
|
976
|
+
# Include category column if specified
|
|
977
|
+
if self._category_column and self._category_column not in columns_to_select:
|
|
978
|
+
columns_to_select.append(self._category_column)
|
|
946
979
|
# Include columns needed for interactivity
|
|
947
980
|
if self._interactivity:
|
|
948
981
|
for col in self._interactivity.values():
|
|
@@ -995,17 +1028,25 @@ class Heatmap(BaseComponent):
|
|
|
995
1028
|
columns=columns_to_select,
|
|
996
1029
|
filter_defaults=self._filter_defaults,
|
|
997
1030
|
)
|
|
998
|
-
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1031
|
+
# Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
|
|
1032
|
+
if (
|
|
1033
|
+
self._intensity_column
|
|
1034
|
+
and self._intensity_column in df_pandas.columns
|
|
1035
|
+
):
|
|
1036
|
+
df_pandas = df_pandas.sort_values(
|
|
1037
|
+
self._intensity_column, ascending=True
|
|
1038
|
+
).reset_index(drop=True)
|
|
1002
1039
|
else:
|
|
1003
1040
|
# No filters to apply - levels already filtered by categorical filter
|
|
1004
1041
|
schema_names = data.collect_schema().names()
|
|
1005
1042
|
available_cols = [c for c in columns_to_select if c in schema_names]
|
|
1006
1043
|
df_polars = data.select(available_cols).collect()
|
|
1007
|
-
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
1008
|
-
|
|
1044
|
+
# Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
|
|
1045
|
+
if (
|
|
1046
|
+
self._intensity_column
|
|
1047
|
+
and self._intensity_column in df_polars.columns
|
|
1048
|
+
):
|
|
1049
|
+
df_polars = df_polars.sort(self._intensity_column)
|
|
1009
1050
|
data_hash = compute_dataframe_hash(df_polars)
|
|
1010
1051
|
df_pandas = df_polars.to_pandas()
|
|
1011
1052
|
else:
|
|
@@ -1017,8 +1058,9 @@ class Heatmap(BaseComponent):
|
|
|
1017
1058
|
# Select only needed columns
|
|
1018
1059
|
available_cols = [c for c in columns_to_select if c in df_polars.columns]
|
|
1019
1060
|
df_polars = df_polars.select(available_cols)
|
|
1020
|
-
# Sort by intensity ascending so high-intensity points are drawn on top
|
|
1021
|
-
|
|
1061
|
+
# Sort by intensity ascending so high-intensity points are drawn on top (scattergl)
|
|
1062
|
+
if self._intensity_column and self._intensity_column in df_polars.columns:
|
|
1063
|
+
df_polars = df_polars.sort(self._intensity_column)
|
|
1022
1064
|
print(
|
|
1023
1065
|
f"[HEATMAP] Selected {len(df_polars)} pts for zoom, levels={level_sizes}",
|
|
1024
1066
|
file=sys.stderr,
|
|
@@ -1046,6 +1088,7 @@ class Heatmap(BaseComponent):
|
|
|
1046
1088
|
"xLabel": self._x_label,
|
|
1047
1089
|
"yLabel": self._y_label,
|
|
1048
1090
|
"colorscale": self._colorscale,
|
|
1091
|
+
"reversescale": self._reversescale,
|
|
1049
1092
|
"zoomIdentifier": self._zoom_identifier,
|
|
1050
1093
|
"interactivity": self._interactivity,
|
|
1051
1094
|
}
|
|
@@ -1053,6 +1096,17 @@ class Heatmap(BaseComponent):
|
|
|
1053
1096
|
if self._title:
|
|
1054
1097
|
args["title"] = self._title
|
|
1055
1098
|
|
|
1099
|
+
# Add category column configuration for categorical coloring mode
|
|
1100
|
+
if self._category_column:
|
|
1101
|
+
args["categoryColumn"] = self._category_column
|
|
1102
|
+
if self._category_colors:
|
|
1103
|
+
args["categoryColors"] = self._category_colors
|
|
1104
|
+
|
|
1105
|
+
# Add log scale and intensity label configuration
|
|
1106
|
+
args["logScale"] = self._log_scale
|
|
1107
|
+
if self._intensity_label:
|
|
1108
|
+
args["intensityLabel"] = self._intensity_label
|
|
1109
|
+
|
|
1056
1110
|
# Add any extra config options
|
|
1057
1111
|
args.update(self._config)
|
|
1058
1112
|
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""VolcanoPlot component for differential expression visualization."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from ..core.base import BaseComponent
|
|
8
|
+
from ..core.registry import register_component
|
|
9
|
+
from ..preprocessing.scatter import build_scatter_columns
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_component("volcanoplot")
|
|
13
|
+
class VolcanoPlot(BaseComponent):
|
|
14
|
+
"""
|
|
15
|
+
Interactive volcano plot for differential expression analysis.
|
|
16
|
+
|
|
17
|
+
Displays log2 fold change (x-axis) vs -log10(p-value) (y-axis) with
|
|
18
|
+
three-category coloring based on significance thresholds. Thresholds
|
|
19
|
+
are passed at render time to avoid cache invalidation when adjusting
|
|
20
|
+
sliders.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Client-side significance computation (instant threshold updates)
|
|
24
|
+
- Three-category coloring (up-regulated, down-regulated, not significant)
|
|
25
|
+
- Threshold lines at ±fc_threshold and -log10(p_threshold)
|
|
26
|
+
- Optional labels on significant points
|
|
27
|
+
- Click-to-select with cross-component linking
|
|
28
|
+
- SVG export
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
volcano = VolcanoPlot(
|
|
32
|
+
cache_id="protein_volcano",
|
|
33
|
+
data_path="proteins.parquet",
|
|
34
|
+
log2fc_column="log2FC",
|
|
35
|
+
pvalue_column="pvalue",
|
|
36
|
+
label_column="protein_name",
|
|
37
|
+
interactivity={'protein': 'protein_id'},
|
|
38
|
+
filters={'comparison': 'comparison_id'},
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Thresholds passed at render time - no cache impact
|
|
42
|
+
volcano(
|
|
43
|
+
state_manager=state,
|
|
44
|
+
fc_threshold=1.0,
|
|
45
|
+
p_threshold=0.05,
|
|
46
|
+
height=500,
|
|
47
|
+
)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
_component_type: str = "volcanoplot"
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
cache_id: str,
|
|
55
|
+
log2fc_column: str = "log2FC",
|
|
56
|
+
pvalue_column: str = "pvalue",
|
|
57
|
+
data: Optional[pl.LazyFrame] = None,
|
|
58
|
+
data_path: Optional[str] = None,
|
|
59
|
+
label_column: Optional[str] = None,
|
|
60
|
+
filters: Optional[Dict[str, str]] = None,
|
|
61
|
+
filter_defaults: Optional[Dict[str, Any]] = None,
|
|
62
|
+
interactivity: Optional[Dict[str, str]] = None,
|
|
63
|
+
cache_path: str = ".",
|
|
64
|
+
regenerate_cache: bool = False,
|
|
65
|
+
title: Optional[str] = None,
|
|
66
|
+
x_label: Optional[str] = None,
|
|
67
|
+
y_label: Optional[str] = None,
|
|
68
|
+
up_color: str = "#E74C3C",
|
|
69
|
+
down_color: str = "#3498DB",
|
|
70
|
+
ns_color: str = "#95A5A6",
|
|
71
|
+
show_threshold_lines: bool = True,
|
|
72
|
+
threshold_line_style: str = "dash",
|
|
73
|
+
**kwargs,
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
Initialize the VolcanoPlot component.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
cache_id: Unique identifier for this component's cache (MANDATORY).
|
|
80
|
+
Creates a folder {cache_path}/{cache_id}/ for cached data.
|
|
81
|
+
log2fc_column: Name of column for log2 fold change (x-axis).
|
|
82
|
+
pvalue_column: Name of column for p-value. Will be transformed
|
|
83
|
+
to -log10(pvalue) for display on y-axis.
|
|
84
|
+
data: Polars LazyFrame with volcano data. Optional if cache exists.
|
|
85
|
+
data_path: Path to parquet file (preferred for large datasets).
|
|
86
|
+
label_column: Name of column for point labels (shown on hover and
|
|
87
|
+
optionally as annotations on significant points).
|
|
88
|
+
filters: Mapping of identifier names to column names for filtering.
|
|
89
|
+
Example: {'comparison': 'comparison_id'} filters by comparison.
|
|
90
|
+
filter_defaults: Default values for filter identifiers when no
|
|
91
|
+
selection is present in state.
|
|
92
|
+
interactivity: Mapping of identifier names to column names for clicks.
|
|
93
|
+
When a point is clicked, sets each identifier to the clicked
|
|
94
|
+
point's value in the corresponding column.
|
|
95
|
+
cache_path: Base path for cache storage. Default "." (current dir).
|
|
96
|
+
regenerate_cache: If True, regenerate cache even if valid cache exists.
|
|
97
|
+
title: Plot title displayed above the volcano plot.
|
|
98
|
+
x_label: X-axis label (default: "log2 Fold Change").
|
|
99
|
+
y_label: Y-axis label (default: "-log10(p-value)").
|
|
100
|
+
up_color: Color for up-regulated points (default: red #E74C3C).
|
|
101
|
+
down_color: Color for down-regulated points (default: blue #3498DB).
|
|
102
|
+
ns_color: Color for not significant points (default: gray #95A5A6).
|
|
103
|
+
show_threshold_lines: Show threshold lines on plot (default: True).
|
|
104
|
+
threshold_line_style: Line style for thresholds (default: "dash").
|
|
105
|
+
**kwargs: Additional configuration options.
|
|
106
|
+
"""
|
|
107
|
+
self._log2fc_column = log2fc_column
|
|
108
|
+
self._pvalue_column = pvalue_column
|
|
109
|
+
self._label_column = label_column
|
|
110
|
+
self._title = title
|
|
111
|
+
self._x_label = x_label or "log2 Fold Change"
|
|
112
|
+
self._y_label = y_label or "-log10(p-value)"
|
|
113
|
+
self._up_color = up_color
|
|
114
|
+
self._down_color = down_color
|
|
115
|
+
self._ns_color = ns_color
|
|
116
|
+
self._show_threshold_lines = show_threshold_lines
|
|
117
|
+
self._threshold_line_style = threshold_line_style
|
|
118
|
+
|
|
119
|
+
# Render-time threshold values (set in __call__)
|
|
120
|
+
self._current_fc_threshold: float = 1.0
|
|
121
|
+
self._current_p_threshold: float = 0.05
|
|
122
|
+
self._current_max_labels: int = 10
|
|
123
|
+
|
|
124
|
+
# Computed -log10(pvalue) column name
|
|
125
|
+
self._neglog10p_column = "_neglog10_pvalue"
|
|
126
|
+
|
|
127
|
+
super().__init__(
|
|
128
|
+
cache_id=cache_id,
|
|
129
|
+
data=data,
|
|
130
|
+
data_path=data_path,
|
|
131
|
+
filters=filters,
|
|
132
|
+
filter_defaults=filter_defaults,
|
|
133
|
+
interactivity=interactivity,
|
|
134
|
+
cache_path=cache_path,
|
|
135
|
+
regenerate_cache=regenerate_cache,
|
|
136
|
+
**kwargs,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _validate_columns(self, schema: pl.Schema) -> None:
|
|
140
|
+
"""Validate that required columns exist in the data schema."""
|
|
141
|
+
available = set(schema.names())
|
|
142
|
+
|
|
143
|
+
required = [self._log2fc_column, self._pvalue_column]
|
|
144
|
+
missing = [col for col in required if col not in available]
|
|
145
|
+
if missing:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"Missing required columns: {missing}. "
|
|
148
|
+
f"Available columns: {sorted(available)}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if self._label_column and self._label_column not in available:
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"Label column '{self._label_column}' not found. "
|
|
154
|
+
f"Available columns: {sorted(available)}"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _get_component_config_hash_inputs(self) -> Dict[str, Any]:
|
|
158
|
+
"""Get inputs for component config hash (cache invalidation)."""
|
|
159
|
+
return {
|
|
160
|
+
"log2fc_column": self._log2fc_column,
|
|
161
|
+
"pvalue_column": self._pvalue_column,
|
|
162
|
+
"label_column": self._label_column,
|
|
163
|
+
# Note: thresholds are NOT included - they're render-time params
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
def _get_cache_config(self) -> Dict[str, Any]:
|
|
167
|
+
"""Get configuration that affects cache validity."""
|
|
168
|
+
return {
|
|
169
|
+
"log2fc_column": self._log2fc_column,
|
|
170
|
+
"pvalue_column": self._pvalue_column,
|
|
171
|
+
"label_column": self._label_column,
|
|
172
|
+
"title": self._title,
|
|
173
|
+
"x_label": self._x_label,
|
|
174
|
+
"y_label": self._y_label,
|
|
175
|
+
"up_color": self._up_color,
|
|
176
|
+
"down_color": self._down_color,
|
|
177
|
+
"ns_color": self._ns_color,
|
|
178
|
+
"show_threshold_lines": self._show_threshold_lines,
|
|
179
|
+
"threshold_line_style": self._threshold_line_style,
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
def _restore_cache_config(self, config: Dict[str, Any]) -> None:
|
|
183
|
+
"""Restore component-specific configuration from cached config."""
|
|
184
|
+
self._log2fc_column = config.get("log2fc_column", "log2FC")
|
|
185
|
+
self._pvalue_column = config.get("pvalue_column", "pvalue")
|
|
186
|
+
self._label_column = config.get("label_column")
|
|
187
|
+
self._title = config.get("title")
|
|
188
|
+
self._x_label = config.get("x_label", "log2 Fold Change")
|
|
189
|
+
self._y_label = config.get("y_label", "-log10(p-value)")
|
|
190
|
+
self._up_color = config.get("up_color", "#E74C3C")
|
|
191
|
+
self._down_color = config.get("down_color", "#3498DB")
|
|
192
|
+
self._ns_color = config.get("ns_color", "#95A5A6")
|
|
193
|
+
self._show_threshold_lines = config.get("show_threshold_lines", True)
|
|
194
|
+
self._threshold_line_style = config.get("threshold_line_style", "dash")
|
|
195
|
+
|
|
196
|
+
def _preprocess(self) -> None:
|
|
197
|
+
"""Preprocess data for volcano plot.
|
|
198
|
+
|
|
199
|
+
Computes -log10(pvalue) and caches the result. No downsampling is
|
|
200
|
+
typically needed for volcano plots (<10K proteins), but we handle
|
|
201
|
+
it if datasets get large.
|
|
202
|
+
"""
|
|
203
|
+
if self._raw_data is None:
|
|
204
|
+
raise ValueError("No data provided and no cache exists")
|
|
205
|
+
|
|
206
|
+
# Build list of columns to select
|
|
207
|
+
# Note: pvalue is passed as y_column only (no duplicate value_column)
|
|
208
|
+
extra_cols = [self._label_column] if self._label_column else []
|
|
209
|
+
columns = build_scatter_columns(
|
|
210
|
+
x_column=self._log2fc_column,
|
|
211
|
+
y_column=self._pvalue_column,
|
|
212
|
+
value_column=self._pvalue_column,
|
|
213
|
+
interactivity=self._interactivity,
|
|
214
|
+
filters=self._filters,
|
|
215
|
+
extra_columns=extra_cols if extra_cols else None,
|
|
216
|
+
)
|
|
217
|
+
# Remove duplicates while preserving order
|
|
218
|
+
columns = list(dict.fromkeys(columns))
|
|
219
|
+
|
|
220
|
+
# Select columns and compute -log10(pvalue)
|
|
221
|
+
schema_names = self._raw_data.collect_schema().names()
|
|
222
|
+
available_cols = [c for c in columns if c in schema_names]
|
|
223
|
+
|
|
224
|
+
df = (
|
|
225
|
+
self._raw_data.select(available_cols)
|
|
226
|
+
.with_columns(
|
|
227
|
+
pl.when(pl.col(self._pvalue_column) > 0)
|
|
228
|
+
.then(-pl.col(self._pvalue_column).log(10))
|
|
229
|
+
.otherwise(0.0)
|
|
230
|
+
.alias(self._neglog10p_column)
|
|
231
|
+
)
|
|
232
|
+
.collect()
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
self._preprocessed_data = {"volcanoData": df}
|
|
236
|
+
|
|
237
|
+
def _get_vue_component_name(self) -> str:
|
|
238
|
+
"""Return the Vue component name."""
|
|
239
|
+
return "PlotlyVolcano"
|
|
240
|
+
|
|
241
|
+
def _get_data_key(self) -> str:
|
|
242
|
+
"""Return the key for the primary data in Vue payload."""
|
|
243
|
+
return "volcanoData"
|
|
244
|
+
|
|
245
|
+
def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
246
|
+
"""Prepare filtered data for Vue component.
|
|
247
|
+
|
|
248
|
+
Uses shared prepare_scatter_data for filtering and conversion.
|
|
249
|
+
"""
|
|
250
|
+
if self._preprocessed_data is None or not self._preprocessed_data:
|
|
251
|
+
self._load_preprocessed_data()
|
|
252
|
+
|
|
253
|
+
data = self._preprocessed_data["volcanoData"]
|
|
254
|
+
# Handle both LazyFrame (from cache) and DataFrame
|
|
255
|
+
if isinstance(data, pl.LazyFrame):
|
|
256
|
+
df_polars = data.collect()
|
|
257
|
+
else:
|
|
258
|
+
df_polars = data
|
|
259
|
+
|
|
260
|
+
# Build columns to select (remove duplicates)
|
|
261
|
+
extra_cols = (
|
|
262
|
+
[self._label_column, self._pvalue_column]
|
|
263
|
+
if self._label_column
|
|
264
|
+
else [self._pvalue_column]
|
|
265
|
+
)
|
|
266
|
+
columns = build_scatter_columns(
|
|
267
|
+
x_column=self._log2fc_column,
|
|
268
|
+
y_column=self._neglog10p_column,
|
|
269
|
+
value_column=self._neglog10p_column,
|
|
270
|
+
interactivity=self._interactivity,
|
|
271
|
+
filters=self._filters,
|
|
272
|
+
extra_columns=extra_cols,
|
|
273
|
+
)
|
|
274
|
+
# Remove duplicates while preserving order
|
|
275
|
+
columns = list(dict.fromkeys(columns))
|
|
276
|
+
|
|
277
|
+
# Apply filters if any
|
|
278
|
+
if self._filters:
|
|
279
|
+
from ..preprocessing.filtering import (
|
|
280
|
+
compute_dataframe_hash,
|
|
281
|
+
filter_and_collect_cached,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
df_pandas, data_hash = filter_and_collect_cached(
|
|
285
|
+
df_polars.lazy(),
|
|
286
|
+
self._filters,
|
|
287
|
+
state,
|
|
288
|
+
columns=columns,
|
|
289
|
+
filter_defaults=self._filter_defaults,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Sort by significance (most significant on top for rendering)
|
|
293
|
+
if len(df_pandas) > 0 and self._neglog10p_column in df_pandas.columns:
|
|
294
|
+
df_pandas = df_pandas.sort_values(
|
|
295
|
+
self._neglog10p_column, ascending=True
|
|
296
|
+
).reset_index(drop=True)
|
|
297
|
+
|
|
298
|
+
return {"volcanoData": df_pandas, "_hash": data_hash}
|
|
299
|
+
else:
|
|
300
|
+
# No filters - select columns and convert to pandas
|
|
301
|
+
available_cols = [c for c in columns if c in df_polars.columns]
|
|
302
|
+
df_filtered = df_polars.select(available_cols)
|
|
303
|
+
|
|
304
|
+
# Sort by significance
|
|
305
|
+
if self._neglog10p_column in df_filtered.columns:
|
|
306
|
+
df_filtered = df_filtered.sort(self._neglog10p_column, descending=False)
|
|
307
|
+
|
|
308
|
+
from ..preprocessing.filtering import compute_dataframe_hash
|
|
309
|
+
|
|
310
|
+
data_hash = compute_dataframe_hash(df_filtered)
|
|
311
|
+
df_pandas = df_filtered.to_pandas()
|
|
312
|
+
|
|
313
|
+
return {"volcanoData": df_pandas, "_hash": data_hash}
|
|
314
|
+
|
|
315
|
+
def _get_component_args(self) -> Dict[str, Any]:
|
|
316
|
+
"""Return configuration for Vue component."""
|
|
317
|
+
return {
|
|
318
|
+
"componentType": self._get_vue_component_name(),
|
|
319
|
+
"log2fcColumn": self._log2fc_column,
|
|
320
|
+
"neglog10pColumn": self._neglog10p_column,
|
|
321
|
+
"pvalueColumn": self._pvalue_column,
|
|
322
|
+
"labelColumn": self._label_column,
|
|
323
|
+
"title": self._title,
|
|
324
|
+
"xLabel": self._x_label,
|
|
325
|
+
"yLabel": self._y_label,
|
|
326
|
+
"upColor": self._up_color,
|
|
327
|
+
"downColor": self._down_color,
|
|
328
|
+
"nsColor": self._ns_color,
|
|
329
|
+
"showThresholdLines": self._show_threshold_lines,
|
|
330
|
+
"thresholdLineStyle": self._threshold_line_style,
|
|
331
|
+
# Render-time threshold values
|
|
332
|
+
"fcThreshold": self._current_fc_threshold,
|
|
333
|
+
"pThreshold": self._current_p_threshold,
|
|
334
|
+
"maxLabels": self._current_max_labels,
|
|
335
|
+
"interactivity": self._interactivity or {},
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def __call__(
|
|
339
|
+
self,
|
|
340
|
+
key: Optional[str] = None,
|
|
341
|
+
state_manager: Optional[Any] = None,
|
|
342
|
+
height: Optional[int] = None,
|
|
343
|
+
fc_threshold: float = 1.0,
|
|
344
|
+
p_threshold: float = 0.05,
|
|
345
|
+
max_labels: int = 10,
|
|
346
|
+
) -> Any:
|
|
347
|
+
"""
|
|
348
|
+
Render the volcano plot component.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
key: Optional unique key for this component instance.
|
|
352
|
+
state_manager: StateManager for cross-component linking.
|
|
353
|
+
height: Optional height override in pixels.
|
|
354
|
+
fc_threshold: Fold change threshold for significance
|
|
355
|
+
(default: 1.0, meaning |log2FC| >= 1).
|
|
356
|
+
p_threshold: P-value threshold for significance
|
|
357
|
+
(default: 0.05). Points with p < threshold are significant.
|
|
358
|
+
max_labels: Maximum number of labels to show on significant
|
|
359
|
+
points (default: 10). Labels are shown for top N by
|
|
360
|
+
significance.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Component result for Streamlit rendering.
|
|
364
|
+
"""
|
|
365
|
+
# Store render-time threshold values
|
|
366
|
+
self._current_fc_threshold = fc_threshold
|
|
367
|
+
self._current_p_threshold = p_threshold
|
|
368
|
+
self._current_max_labels = max_labels
|
|
369
|
+
|
|
370
|
+
# Update height if provided
|
|
371
|
+
if height is not None:
|
|
372
|
+
self._height = height
|
|
373
|
+
|
|
374
|
+
return super().__call__(key=key, state_manager=state_manager, height=height)
|
openms_insight/core/base.py
CHANGED
|
@@ -19,6 +19,10 @@ if TYPE_CHECKING:
|
|
|
19
19
|
# Version 3: Downcast numeric types (Int64→Int32, Float64→Float32) for efficient transfer
|
|
20
20
|
CACHE_VERSION = 3
|
|
21
21
|
|
|
22
|
+
# Default height for components when not specified
|
|
23
|
+
# This is the single source of truth for component height
|
|
24
|
+
DEFAULT_COMPONENT_HEIGHT = 400
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
class BaseComponent(ABC):
|
|
24
28
|
"""
|
|
@@ -485,7 +489,8 @@ class BaseComponent(ABC):
|
|
|
485
489
|
key: Optional unique key for the Streamlit component
|
|
486
490
|
state_manager: Optional StateManager for cross-component state.
|
|
487
491
|
If not provided, uses a default shared StateManager.
|
|
488
|
-
height: Optional height in pixels for the component
|
|
492
|
+
height: Optional height in pixels for the component.
|
|
493
|
+
If not provided, uses DEFAULT_COMPONENT_HEIGHT (400px).
|
|
489
494
|
|
|
490
495
|
Returns:
|
|
491
496
|
The value returned by the Vue component (usually selection state)
|
|
@@ -496,6 +501,10 @@ class BaseComponent(ABC):
|
|
|
496
501
|
if state_manager is None:
|
|
497
502
|
state_manager = get_default_state_manager()
|
|
498
503
|
|
|
504
|
+
# Use default height if not specified
|
|
505
|
+
if height is None:
|
|
506
|
+
height = DEFAULT_COMPONENT_HEIGHT
|
|
507
|
+
|
|
499
508
|
return render_component(
|
|
500
509
|
component=self, state_manager=state_manager, key=key, height=height
|
|
501
510
|
)
|