openms-insight 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,10 @@ from .filtering import (
10
10
  filter_by_index,
11
11
  filter_by_selection,
12
12
  )
13
+ from .scatter import (
14
+ build_scatter_columns,
15
+ prepare_scatter_data,
16
+ )
13
17
 
14
18
  __all__ = [
15
19
  "filter_by_selection",
@@ -18,4 +22,6 @@ __all__ = [
18
22
  "compute_compression_levels",
19
23
  "downsample_2d",
20
24
  "downsample_2d_simple",
25
+ "build_scatter_columns",
26
+ "prepare_scatter_data",
21
27
  ]
@@ -0,0 +1,136 @@
1
+ """Shared utilities for scatter-based components (Heatmap, VolcanoPlot)."""
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import pandas as pd
6
+ import polars as pl
7
+
8
+ from .filtering import compute_dataframe_hash, filter_and_collect_cached
9
+
10
+
11
+ def build_scatter_columns(
12
+ x_column: str,
13
+ y_column: str,
14
+ value_column: str,
15
+ interactivity: Optional[Dict[str, str]] = None,
16
+ filters: Optional[Dict[str, str]] = None,
17
+ extra_columns: Optional[List[str]] = None,
18
+ ) -> List[str]:
19
+ """
20
+ Build list of columns needed for scatter-based component.
21
+
22
+ Includes x, y, value columns plus any columns needed for
23
+ interactivity and filtering.
24
+
25
+ Args:
26
+ x_column: Name of x-axis column
27
+ y_column: Name of y-axis column
28
+ value_column: Name of value column (intensity, -log10(pvalue), etc.)
29
+ interactivity: Mapping of identifier names to column names for clicks
30
+ filters: Mapping of identifier names to column names for filtering
31
+ extra_columns: Additional columns to include (e.g., label_column)
32
+
33
+ Returns:
34
+ List of unique column names to select
35
+ """
36
+ columns = [x_column, y_column, value_column]
37
+
38
+ # Include columns needed for interactivity
39
+ if interactivity:
40
+ for col in interactivity.values():
41
+ if col not in columns:
42
+ columns.append(col)
43
+
44
+ # Include filter columns
45
+ if filters:
46
+ for col in filters.values():
47
+ if col not in columns:
48
+ columns.append(col)
49
+
50
+ # Include extra columns (e.g., label column for VolcanoPlot)
51
+ if extra_columns:
52
+ for col in extra_columns:
53
+ if col and col not in columns:
54
+ columns.append(col)
55
+
56
+ return columns
57
+
58
+
59
+ def prepare_scatter_data(
60
+ data: pl.LazyFrame,
61
+ x_column: str,
62
+ y_column: str,
63
+ value_column: str,
64
+ filters: Optional[Dict[str, str]],
65
+ state: Dict[str, Any],
66
+ filter_defaults: Optional[Dict[str, Any]] = None,
67
+ interactivity: Optional[Dict[str, str]] = None,
68
+ extra_columns: Optional[List[str]] = None,
69
+ sort_by_value: bool = True,
70
+ sort_ascending: bool = True,
71
+ ) -> Tuple[pd.DataFrame, str]:
72
+ """
73
+ Prepare scatter data for Vue component.
74
+
75
+ Common data preparation for scatter-based components (Heatmap, VolcanoPlot).
76
+ Applies filters, selects columns, optionally sorts, and returns pandas
77
+ DataFrame with hash for change detection.
78
+
79
+ Args:
80
+ data: LazyFrame with scatter data
81
+ x_column: Name of x-axis column
82
+ y_column: Name of y-axis column
83
+ value_column: Name of value column (intensity, -log10(pvalue), etc.)
84
+ filters: Mapping of identifier names to column names for filtering
85
+ state: Current selection state from StateManager
86
+ filter_defaults: Optional default values for filters when state is None
87
+ interactivity: Mapping of identifier names to column names for clicks
88
+ extra_columns: Additional columns to include (e.g., label_column)
89
+ sort_by_value: If True, sort by value_column (default: True)
90
+ sort_ascending: Sort order - True for ascending (default: True)
91
+ Ascending puts high values on top in scatter plots.
92
+
93
+ Returns:
94
+ Tuple of (pandas DataFrame, hash string for change detection)
95
+ """
96
+ # Build columns to select
97
+ columns = build_scatter_columns(
98
+ x_column=x_column,
99
+ y_column=y_column,
100
+ value_column=value_column,
101
+ interactivity=interactivity,
102
+ filters=filters,
103
+ extra_columns=extra_columns,
104
+ )
105
+
106
+ # Apply filters if any
107
+ if filters:
108
+ df_pandas, data_hash = filter_and_collect_cached(
109
+ data,
110
+ filters,
111
+ state,
112
+ columns=columns,
113
+ filter_defaults=filter_defaults,
114
+ )
115
+
116
+ # Sort by value column so high-value points are drawn on top
117
+ if sort_by_value and len(df_pandas) > 0 and value_column in df_pandas.columns:
118
+ df_pandas = df_pandas.sort_values(
119
+ value_column, ascending=sort_ascending
120
+ ).reset_index(drop=True)
121
+
122
+ return df_pandas, data_hash
123
+ else:
124
+ # No filters - just select columns and collect
125
+ schema_names = data.collect_schema().names()
126
+ available_cols = [c for c in columns if c in schema_names]
127
+ df_polars = data.select(available_cols).collect()
128
+
129
+ # Sort by value column
130
+ if sort_by_value and len(df_polars) > 0 and value_column in df_polars.columns:
131
+ df_polars = df_polars.sort(value_column, descending=not sort_ascending)
132
+
133
+ data_hash = compute_dataframe_hash(df_polars)
134
+ df_pandas = df_polars.to_pandas()
135
+
136
+ return df_pandas, data_hash
@@ -2,6 +2,7 @@
2
2
 
3
3
  import hashlib
4
4
  import json
5
+ import logging
5
6
  import os
6
7
  from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
7
8
 
@@ -9,6 +10,10 @@ import pandas as pd
9
10
  import polars as pl
10
11
  import streamlit as st
11
12
 
13
+ # Configure debug logging for hash tracking
14
+ _DEBUG_HASH_TRACKING = os.environ.get("SVC_DEBUG_HASH", "").lower() == "true"
15
+ _logger = logging.getLogger(__name__)
16
+
12
17
 
13
18
  def _make_hashable(value: Any) -> Any:
14
19
  """
@@ -316,8 +321,10 @@ def render_component(
316
321
 
317
322
  # Generate unique key if not provided (needed for cache)
318
323
  # Use cache_id instead of id(component) since components are recreated each rerun
324
+ # Use JSON serialization for deterministic key generation (hash() can vary)
319
325
  if key is None:
320
- key = f"svc_{component._cache_id}_{hash(str(component._interactivity))}"
326
+ interactivity_str = json.dumps(component._interactivity or {}, sort_keys=True)
327
+ key = f"svc_{component._cache_id}_{hashlib.md5(interactivity_str.encode()).hexdigest()[:8]}"
321
328
 
322
329
  # Check if component has required filters without values
323
330
  # Don't send potentially huge unfiltered datasets - wait for filter selection
@@ -389,6 +396,16 @@ def render_component(
389
396
  # so normal comparison works for all components including those with dynamic annotations
390
397
  data_changed = (vue_echoed_hash is None) or (vue_echoed_hash != data_hash)
391
398
 
399
+ # Debug logging for hash tracking issues
400
+ if _DEBUG_HASH_TRACKING:
401
+ _logger.warning(
402
+ f"[HashTrack] {component._cache_id}: "
403
+ f"data_changed={data_changed}, "
404
+ f"vue_echoed={vue_echoed_hash[:8] if vue_echoed_hash else 'None'}, "
405
+ f"data_hash={data_hash[:8] if data_hash else 'None'}, "
406
+ f"key={hash_tracking_key[:50]}..."
407
+ )
408
+
392
409
  # Only include full data if hash changed
393
410
  if data_changed:
394
411
  # Convert any non-pandas data to pandas for Arrow serialization
@@ -448,10 +465,12 @@ def render_component(
448
465
  # Update state from Vue response
449
466
  if result is not None:
450
467
  # Store Vue's echoed hash for next render comparison
451
- # ALWAYS update from Vue's echo - if Vue lost its data (page navigation),
452
- # it echoes None, and we need to know that to resend data next time
468
+ # Only store non-None hashes - Vue echoes None during initialization
469
+ # before receiving data, which would corrupt our tracking. Preserving
470
+ # the previous valid hash prevents unnecessary data resends.
453
471
  vue_hash = result.get("_vueDataHash")
454
- st.session_state[_VUE_ECHOED_HASH_KEY][hash_tracking_key] = vue_hash
472
+ if vue_hash is not None:
473
+ st.session_state[_VUE_ECHOED_HASH_KEY][hash_tracking_key] = vue_hash
455
474
 
456
475
  # Capture annotations from Vue (e.g., from SequenceView)
457
476
  # Use hash-based change detection for robustness
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openms-insight
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Interactive visualization components for mass spectrometry data in Streamlit
5
5
  Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
6
6
  Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
@@ -50,6 +50,7 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
50
50
  - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
51
51
  - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
52
52
  - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
53
+ - **Volcano plot component** for differential expression visualization with significance thresholds
53
54
  - **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
54
55
 
55
56
  ## Installation
@@ -62,7 +63,7 @@ pip install openms-insight
62
63
 
63
64
  ```python
64
65
  import streamlit as st
65
- from openms_insight import Table, LinePlot, StateManager
66
+ from openms_insight import Table, LinePlot, Heatmap, VolcanoPlot, StateManager
66
67
 
67
68
  # Create state manager for cross-component linking
68
69
  state_manager = StateManager()
@@ -169,6 +170,21 @@ Table(
169
170
  - `pagination`: Enable pagination for large tables (default: True)
170
171
  - `page_size`: Rows per page (default: 100)
171
172
 
173
+ **Custom formatters:**
174
+ In addition to Tabulator's built-in formatters, these custom formatters are available:
175
+ - `scientific`: Exponential notation (e.g., "1.23e-05") - use `formatterParams: {precision: 3}`
176
+ - `signed`: Explicit +/- prefix (e.g., "+1.234") - use `formatterParams: {precision: 3, showPositive: true}`
177
+ - `badge`: Colored pill/badge for categorical values - use `formatterParams: {colorMap: {"Up": "#FF0000"}, defaultColor: "#888"}`
178
+
179
+ ```python
180
+ column_definitions=[
181
+ {'field': 'pvalue', 'title': 'P-value', 'formatter': 'scientific', 'formatterParams': {'precision': 2}},
182
+ {'field': 'log2fc', 'title': 'Log2 FC', 'formatter': 'signed', 'formatterParams': {'precision': 3}},
183
+ {'field': 'regulation', 'title': 'Status', 'formatter': 'badge',
184
+ 'formatterParams': {'colorMap': {'Up': '#d62728', 'Down': '#1f77b4', 'NS': '#888888'}}},
185
+ ]
186
+ ```
187
+
172
188
  ### LinePlot
173
189
 
174
190
  Stick-style line plot using Plotly.js for mass spectra visualization.
@@ -227,6 +243,80 @@ Heatmap(
227
243
  - `min_points`: Target size for downsampling (default: 20000)
228
244
  - `x_bins`, `y_bins`: Grid resolution for spatial binning
229
245
  - `colorscale`: Plotly colorscale name (default: 'Portland')
246
+ - `log_scale`: Use log10 color mapping (default: True). Set to False for linear.
247
+ - `intensity_label`: Custom colorbar label (default: 'Intensity')
248
+
249
+ **Linear scale example:**
250
+ ```python
251
+ Heatmap(
252
+ cache_id="psm_scores",
253
+ data_path="psm_data.parquet",
254
+ x_column='rt',
255
+ y_column='mz',
256
+ intensity_column='score',
257
+ log_scale=False, # Linear color mapping
258
+ intensity_label='Score', # Custom colorbar label
259
+ colorscale='Blues',
260
+ )
261
+ ```
262
+
263
+ **Categorical mode:**
264
+ Use `category_column` for discrete coloring by category instead of continuous intensity colorscale:
265
+
266
+ ```python
267
+ Heatmap(
268
+ cache_id="samples_heatmap",
269
+ data_path="samples.parquet",
270
+ x_column='retention_time',
271
+ y_column='mass',
272
+ intensity_column='intensity',
273
+ category_column='sample_group', # Color by category instead of intensity
274
+ category_colors={ # Optional custom colors
275
+ 'Control': '#1f77b4',
276
+ 'Treatment_A': '#ff7f0e',
277
+ 'Treatment_B': '#2ca02c',
278
+ },
279
+ )
280
+ ```
281
+
282
+ ### VolcanoPlot
283
+
284
+ Interactive volcano plot for differential expression analysis with significance thresholds.
285
+
286
+ ```python
287
+ from openms_insight import VolcanoPlot
288
+
289
+ VolcanoPlot(
290
+ cache_id="de_volcano",
291
+ data_path="differential_expression.parquet",
292
+ log2fc_column='log2FC',
293
+ pvalue_column='pvalue',
294
+ label_column='protein_name', # Optional: labels for significant points
295
+ filters={'comparison': 'comparison_id'},
296
+ interactivity={'protein': 'protein_id'},
297
+ title="Differential Expression",
298
+ x_label="Log2 Fold Change",
299
+ y_label="-log10(p-value)",
300
+ up_color='#d62728', # Color for up-regulated
301
+ down_color='#1f77b4', # Color for down-regulated
302
+ ns_color='#888888', # Color for not significant
303
+ )(
304
+ state_manager=state_manager,
305
+ fc_threshold=1.0, # Fold change threshold (render-time)
306
+ p_threshold=0.05, # P-value threshold (render-time)
307
+ max_labels=20, # Max labels to show
308
+ )
309
+ ```
310
+
311
+ **Key parameters:**
312
+ - `log2fc_column`: Column with log2 fold change values
313
+ - `pvalue_column`: Column with p-values (automatically converted to -log10)
314
+ - `label_column`: Optional column for point labels
315
+ - `up_color`, `down_color`, `ns_color`: Colors for significance categories
316
+ - `fc_threshold`, `p_threshold`: Significance thresholds (passed at render time, not cached)
317
+ - `max_labels`: Maximum number of labels to display on significant points
318
+
319
+ **Render-time thresholds:** The `fc_threshold` and `p_threshold` are passed via `__call__()`, not `__init__()`. This allows instant threshold adjustment without cache invalidation.
230
320
 
231
321
  ### SequenceView
232
322
 
@@ -290,6 +380,7 @@ All components accept these common arguments:
290
380
  | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
291
381
  | `cache_path` | `str` | `"."` | Base directory for cache storage |
292
382
  | `regenerate_cache` | `bool` | `False` | Force cache regeneration |
383
+ | `height` | `int` | `400` | Component height in pixels (render-time parameter) |
293
384
 
294
385
  ## Memory-Efficient Preprocessing
295
386
 
@@ -373,6 +464,14 @@ npm run dev
373
464
  SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
374
465
  ```
375
466
 
467
+ ### Debug Mode
468
+
469
+ Enable hash tracking logs to debug data synchronization issues:
470
+
471
+ ```bash
472
+ SVC_DEBUG_HASH=true streamlit run app.py
473
+ ```
474
+
376
475
  ### Running Tests
377
476
 
378
477
  ```bash
@@ -1,28 +1,30 @@
1
- openms_insight/__init__.py,sha256=Iv9w0J_7J3pMsyvM4xaYDMWt6IvrtAt6WqOmJ-_tUxk,1044
2
- openms_insight/components/__init__.py,sha256=T9mUxfgFUiHILmXh1VjcGVlnRvuxRMqi_GJJYOmJKwY,177
3
- openms_insight/components/heatmap.py,sha256=LigtpPbAPQpfjFljMWoEPPAc3t27Bl1ekr5uaR1Ctuk,44090
1
+ openms_insight/__init__.py,sha256=gOX7oIlGQ7TJmd5BfyL5SCQhV6IaPPsOGXOaWOCnCN8,1111
2
+ openms_insight/components/__init__.py,sha256=Lcg-D0FILta-YVgMJBlWMKLKC5G5kXOqdy9hBOENABw,233
3
+ openms_insight/components/heatmap.py,sha256=psrdW4gNzZR1jAIox9YS9EHaZaTRrDHFR0t2_0APU9Y,47214
4
4
  openms_insight/components/lineplot.py,sha256=I-JPvDzCr3Nu8Boc1V4D8QQ1bHgTqvM6CbeoIe7zJ-s,30896
5
5
  openms_insight/components/sequenceview.py,sha256=0pDOE0xeoc1-85QZNGdNwwoBwXi-5MFfeb9pCcOi6rc,30274
6
6
  openms_insight/components/table.py,sha256=wmq1rjGVe4Ef0SAf5p85pfVCeyLlVevZnxBc9EIg2uk,16458
7
+ openms_insight/components/volcanoplot.py,sha256=F-cmYxJMKXVK-aYJpifp8be7nB8hkQd2kLi9DrBElD8,15155
7
8
  openms_insight/core/__init__.py,sha256=EPjKX_FFQRgO8mWHs59I-o0BiuzEMzEU1Pfu9YOfLC4,338
8
- openms_insight/core/base.py,sha256=P2cOrPvPIzxfYQ7xMn9e0BlyKEMrhOCgD9FAtyxTiCc,19408
9
+ openms_insight/core/base.py,sha256=6hxmII90LlbwTy06BJvUdJjXpd8Oqjqdt452AJixJSs,19742
9
10
  openms_insight/core/cache.py,sha256=3fnPDWjuWUnxazK2XflcUIeRZZPQ3N45kAKYu-xGBKw,1197
10
11
  openms_insight/core/registry.py,sha256=Hak80Jqhx0qa4gbd1YolNZnM6xBrS8I4U_X7zC0bQ8Y,2108
11
12
  openms_insight/core/state.py,sha256=_vNYxYHYFgIigbkqYwkIO6cBGFJyF2VN9dr7CBEAQbY,6873
12
13
  openms_insight/core/subprocess_preprocess.py,sha256=m9FbAAFy9Do1Exlh-m4Wo-LDwv6yHlEI4klz5OVwemc,3133
13
- openms_insight/preprocessing/__init__.py,sha256=hXKTI9zHtMtHojqXq_0V62xfNokozpnpRAwEnxs81fM,461
14
+ openms_insight/preprocessing/__init__.py,sha256=xoGdhNVrX8Ty3ywmyaCcWAO3a6QlKceO1xxsy1C8ZTI,596
14
15
  openms_insight/preprocessing/compression.py,sha256=T4YbX9PUlfTfPit_kpuLZn8hYpqLYu3xtTme_CG2ymc,12241
15
16
  openms_insight/preprocessing/filtering.py,sha256=fkmaIXfR5hfjyWfaMYqaeybMHaZjvUZYaKCqvxPOWMQ,14152
17
+ openms_insight/preprocessing/scatter.py,sha256=2ifCNTUKHEW9UVpv4z9c5GaLnz5zw9o1119IenzAe9s,4703
16
18
  openms_insight/rendering/__init__.py,sha256=ApHvKeh87yY4GTIEai-tCeIXpNbwOXWlmcmIwMMRZYc,198
17
- openms_insight/rendering/bridge.py,sha256=i8cZq_ra13XpuV1KT0qC6Jf4VCAe4BGrLE-ybrFHwZE,19408
19
+ openms_insight/rendering/bridge.py,sha256=Y5ckR0Gb7s88bJJpVnM_fEZzN2MppN_PfDnsceAr3Mc,20276
18
20
  openms_insight/js-component/dist/index.html,sha256=LSJ3B_YmGUrCCdZ1UaZO2p6Wqsih6nTH62Z_0uZxpD8,430
19
- openms_insight/js-component/dist/assets/index.css,sha256=PBjVykbu3UhFmwMAFJEcMjQYPEU0IpGuaQWwt0c9r7E,884228
20
- openms_insight/js-component/dist/assets/index.js,sha256=AMODoRTGWDTNhCjNUMgTrvicm7ctOdrkohM8NIC9xtM,6063763
21
+ openms_insight/js-component/dist/assets/index.css,sha256=wFvo7FbG132LL7uw0slXfrL_oSQ8u2RKL1DW9hD9-kk,884264
22
+ openms_insight/js-component/dist/assets/index.js,sha256=EGp5VXlZWHp0jIsOg5jJeYaCz_eDN2R0KDT_btyc6ww,6074760
21
23
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot,sha256=CxgxBNL8XyYZbnc8d72vLgVQn9QlnS0V7O3Kebh-hPk,1307880
22
24
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf,sha256=YeirpaTpgf4iz3yOi82-oAR251xiw38Bv37jM2HWhCg,1307660
23
25
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff,sha256=pZKKDVwvYk5G-Y2bFcL2AEU3f3xZTdeKF1kTLqO0Y-s,587984
24
26
  openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2,sha256=Zi_vqPL4qVwYWI0hd0eJwQfGTnccvmWmmvRikcQxGvw,403216
25
- openms_insight-0.1.4.dist-info/METADATA,sha256=_c_eGoMj7wCxAWE5CHC6T2Emri6DEZRXwrQJ-RNptrI,12807
26
- openms_insight-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
- openms_insight-0.1.4.dist-info/licenses/LICENSE,sha256=INFF4rOMmpah7Oi14hLqu7NTOsx56KRRNChAAUcfh2E,1823
28
- openms_insight-0.1.4.dist-info/RECORD,,
27
+ openms_insight-0.1.5.dist-info/METADATA,sha256=ccUljk6Ixse_tMFpebzWg0Vj_qo4ZGa328flIrBzMKc,16670
28
+ openms_insight-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
29
+ openms_insight-0.1.5.dist-info/licenses/LICENSE,sha256=INFF4rOMmpah7Oi14hLqu7NTOsx56KRRNChAAUcfh2E,1823
30
+ openms_insight-0.1.5.dist-info/RECORD,,