openms-insight 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {openms_insight-0.1.2 → openms_insight-0.1.4}/PKG-INFO +163 -20
  2. openms_insight-0.1.4/README.md +364 -0
  3. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/__init__.py +11 -7
  4. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/components/heatmap.py +433 -228
  5. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/components/lineplot.py +377 -82
  6. openms_insight-0.1.4/openms_insight/components/sequenceview.py +848 -0
  7. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/components/table.py +86 -58
  8. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/__init__.py +2 -2
  9. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/base.py +122 -54
  10. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/registry.py +6 -5
  11. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/state.py +33 -31
  12. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/subprocess_preprocess.py +1 -3
  13. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/index.css +1 -1
  14. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/index.js +105 -105
  15. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/preprocessing/__init__.py +5 -6
  16. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/preprocessing/compression.py +123 -67
  17. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/preprocessing/filtering.py +39 -13
  18. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/rendering/__init__.py +1 -1
  19. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/rendering/bridge.py +192 -42
  20. {openms_insight-0.1.2 → openms_insight-0.1.4}/pyproject.toml +2 -2
  21. openms_insight-0.1.2/README.md +0 -221
  22. openms_insight-0.1.2/openms_insight/components/sequenceview.py +0 -384
  23. {openms_insight-0.1.2 → openms_insight-0.1.4}/.gitignore +0 -0
  24. {openms_insight-0.1.2 → openms_insight-0.1.4}/LICENSE +0 -0
  25. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/components/__init__.py +2 -2
  26. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/core/cache.py +0 -0
  27. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot +0 -0
  28. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf +0 -0
  29. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff +0 -0
  30. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2 +0 -0
  31. {openms_insight-0.1.2 → openms_insight-0.1.4}/openms_insight/js-component/dist/index.html +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openms-insight
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Interactive visualization components for mass spectrometry data in Streamlit
5
5
  Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
6
6
  Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
7
7
  Project-URL: Repository, https://github.com/t0mdavid-m/OpenMS-Insight
8
8
  Project-URL: Issues, https://github.com/t0mdavid-m/OpenMS-Insight/issues
9
- Author: Kohlbacher Lab
9
+ Author: Tom David Müller
10
10
  License-Expression: BSD-3-Clause
11
11
  License-File: LICENSE
12
12
  Keywords: mass-spectrometry,openms,plotly,proteomics,streamlit,tabulator,visualization,vue
@@ -37,6 +37,7 @@ Description-Content-Type: text/markdown
37
37
 
38
38
  [![PyPI version](https://badge.fury.io/py/openms-insight.svg)](https://badge.fury.io/py/openms-insight)
39
39
  [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
40
+ [![Tests](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml/badge.svg)](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
40
41
 
41
42
  Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
42
43
 
@@ -45,10 +46,11 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
45
46
  - **Cross-component selection linking** via shared identifiers
46
47
  - **Memory-efficient preprocessing** via subprocess isolation
47
48
  - **Automatic disk caching** with config-based invalidation
48
- - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination
49
+ - **Cache reconstruction** - components can be restored from cache without re-specifying configuration
50
+ - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
49
51
  - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
50
- - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling
51
- - **Sequence view component** for peptide/protein visualization
52
+ - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
53
+ - **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
52
54
 
53
55
  ## Installation
54
56
 
@@ -90,9 +92,10 @@ plot(state_manager=state_manager)
90
92
 
91
93
  ## Cross-Component Linking
92
94
 
93
- Components communicate through **identifiers** using two mechanisms:
95
+ Components communicate through **identifiers** using three mechanisms:
94
96
 
95
97
  - **`filters`**: INPUT - filter this component's data by the selection
98
+ - **`filter_defaults`**: INPUT - default value when selection is None
96
99
  - **`interactivity`**: OUTPUT - set a selection when user clicks
97
100
 
98
101
  ```python
@@ -120,6 +123,14 @@ plot = LinePlot(
120
123
  x_column='mass',
121
124
  y_column='intensity',
122
125
  )
126
+
127
+ # Table with filter defaults - shows unannotated data when no identification selected
128
+ annotations = Table(
129
+ cache_id="annotations",
130
+ data_path="annotations.parquet",
131
+ filters={'identification': 'id_idx'},
132
+ filter_defaults={'identification': -1}, # Use -1 when identification is None
133
+ )
123
134
  ```
124
135
 
125
136
  ---
@@ -137,17 +148,27 @@ Table(
137
148
  interactivity={'spectrum': 'scan_id'},
138
149
  column_definitions=[
139
150
  {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
140
- {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right'},
151
+ {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
152
+ 'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
141
153
  {'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
142
154
  ],
143
155
  index_field='scan_id',
144
156
  go_to_fields=['scan_id'],
157
+ initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
145
158
  default_row=0,
146
159
  pagination=True,
147
- page_size=50,
160
+ page_size=100,
148
161
  )
149
162
  ```
150
163
 
164
+ **Key parameters:**
165
+ - `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
166
+ - `index_field`: Column used as unique row identifier (default: 'id')
167
+ - `go_to_fields`: Columns available in "Go to" navigation
168
+ - `initial_sort`: Default sort configuration
169
+ - `pagination`: Enable pagination for large tables (default: True)
170
+ - `page_size`: Rows per page (default: 100)
171
+
151
172
  ### LinePlot
152
173
 
153
174
  Stick-style line plot using Plotly.js for mass spectra visualization.
@@ -165,9 +186,20 @@ LinePlot(
165
186
  title="MS/MS Spectrum",
166
187
  x_label="m/z",
167
188
  y_label="Intensity",
189
+ styling={
190
+ 'highlightColor': '#E4572E',
191
+ 'selectedColor': '#F3A712',
192
+ 'unhighlightedColor': 'lightblue',
193
+ },
168
194
  )
169
195
  ```
170
196
 
197
+ **Key parameters:**
198
+ - `x_column`, `y_column`: Column names for x/y values
199
+ - `highlight_column`: Boolean/int column indicating which points to highlight
200
+ - `annotation_column`: Text column for labels on highlighted points
201
+ - `styling`: Color configuration dict
202
+
171
203
  ### Heatmap
172
204
 
173
205
  2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
@@ -181,28 +213,67 @@ Heatmap(
181
213
  intensity_column='intensity',
182
214
  interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
183
215
  min_points=30000,
216
+ x_bins=400,
217
+ y_bins=50,
184
218
  title="Peak Map",
185
219
  x_label="Retention Time (min)",
186
220
  y_label="m/z",
221
+ colorscale='Portland',
187
222
  )
188
223
  ```
189
224
 
225
+ **Key parameters:**
226
+ - `x_column`, `y_column`, `intensity_column`: Column names for axes and color
227
+ - `min_points`: Target size for downsampling (default: 20000)
228
+ - `x_bins`, `y_bins`: Grid resolution for spatial binning
229
+ - `colorscale`: Plotly colorscale name (default: 'Portland')
230
+
190
231
  ### SequenceView
191
232
 
192
- Peptide/protein sequence visualization with fragment ion matching.
233
+ Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
193
234
 
194
235
  ```python
236
+ # Dynamic: sequence from DataFrame filtered by selection
195
237
  SequenceView(
196
238
  cache_id="peptide_view",
197
- sequence="PEPTIDEK",
198
- observed_masses=[147.1, 244.2, 359.3, 456.4],
199
- peak_ids=[0, 1, 2, 3],
200
- precursor_mass=944.5,
239
+ sequence_data_path="sequences.parquet", # columns: scan_id, sequence, precursor_charge
240
+ peaks_data_path="peaks.parquet", # columns: scan_id, peak_id, mass, intensity
241
+ filters={'spectrum': 'scan_id'},
201
242
  interactivity={'peak': 'peak_id'},
243
+ deconvolved=False, # peaks are m/z values, consider charge states
202
244
  title="Fragment Coverage",
203
245
  )
246
+
247
+ # Static: single sequence with optional peaks
248
+ SequenceView(
249
+ cache_id="static_peptide",
250
+ sequence_data=("PEPTIDEK", 2), # (sequence, charge) tuple
251
+ peaks_data=peaks_df, # Optional: LazyFrame with mass, intensity columns
252
+ deconvolved=True, # peaks are neutral masses
253
+ )
254
+
255
+ # Simplest: just a sequence string
256
+ SequenceView(
257
+ cache_id="simple_seq",
258
+ sequence_data="PEPTIDEK", # charge defaults to 1
259
+ )
204
260
  ```
205
261
 
262
+ **Key parameters:**
263
+ - `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
264
+ - `sequence_data_path`: Path to parquet with sequence data
265
+ - `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
266
+ - `deconvolved`: If False (default), peaks are m/z and matching considers charge states
267
+ - `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
268
+
269
+ **Features:**
270
+ - Automatic fragment ion matching (a/b/c/x/y/z ions)
271
+ - Configurable mass tolerance (ppm or Da)
272
+ - Neutral loss support (-H2O, -NH3)
273
+ - Auto-zoom for short sequences (≤20 amino acids)
274
+ - Fragment coverage statistics
275
+ - Click-to-select peaks with cross-component linking
276
+
206
277
  ---
207
278
 
208
279
  ## Shared Component Arguments
@@ -212,11 +283,59 @@ All components accept these common arguments:
212
283
  | Argument | Type | Default | Description |
213
284
  |----------|------|---------|-------------|
214
285
  | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
215
- | `data_path` | `str` | `None` | Path to parquet file (preferred - uses subprocess for memory efficiency) |
216
- | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path, in-process preprocessing) |
286
+ | `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
287
+ | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
217
288
  | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
289
+ | `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
218
290
  | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
219
291
  | `cache_path` | `str` | `"."` | Base directory for cache storage |
292
+ | `regenerate_cache` | `bool` | `False` | Force cache regeneration |
293
+
294
+ ## Memory-Efficient Preprocessing
295
+
296
+ When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
297
+
298
+ ```python
299
+ # Subprocess preprocessing (recommended for large datasets)
300
+ # Memory is fully released after cache creation
301
+ heatmap = Heatmap(
302
+ data_path="large_peaks.parquet", # triggers subprocess
303
+ cache_id="peaks_heatmap",
304
+ ...
305
+ )
306
+
307
+ # In-process preprocessing (for smaller datasets or debugging)
308
+ # Memory may be retained by allocator after preprocessing
309
+ heatmap = Heatmap(
310
+ data=pl.scan_parquet("large_peaks.parquet"), # runs in main process
311
+ cache_id="peaks_heatmap",
312
+ ...
313
+ )
314
+ ```
315
+
316
+ **Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
317
+
318
+ ## Cache Reconstruction
319
+
320
+ Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
321
+
322
+ ```python
323
+ # First run: create component with data and config
324
+ table = Table(
325
+ cache_id="my_table",
326
+ data_path="data.parquet",
327
+ filters={'spectrum': 'scan_id'},
328
+ column_definitions=[...],
329
+ cache_path="./cache",
330
+ )
331
+
332
+ # Subsequent runs: reconstruct from cache only
333
+ table = Table(
334
+ cache_id="my_table",
335
+ cache_path="./cache",
336
+ )
337
+ # All config (filters, column_definitions, etc.) restored from cache
338
+ ```
220
339
 
221
340
  ## Rendering
222
341
 
@@ -243,14 +362,38 @@ npm install
243
362
  npm run build
244
363
  ```
245
364
 
246
- ### Development Mode
365
+ ### Development Mode (Hot Reload)
247
366
 
248
367
  ```bash
368
+ # Terminal 1: Vue dev server
249
369
  cd js-component
250
370
  npm run dev
251
371
 
252
- # In another terminal:
253
- export SVC_DEV_MODE=true
254
- export SVC_DEV_URL=http://localhost:5173
255
- streamlit run app.py
372
+ # Terminal 2: Streamlit with dev mode
373
+ SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
374
+ ```
375
+
376
+ ### Running Tests
377
+
378
+ ```bash
379
+ # Python tests
380
+ pip install -e ".[dev]"
381
+ pytest tests/ -v
382
+
383
+ # TypeScript type checking
384
+ cd js-component
385
+ npm run type-check
386
+ ```
387
+
388
+ ### Linting and Formatting
389
+
390
+ ```bash
391
+ # Python
392
+ ruff check .
393
+ ruff format .
394
+
395
+ # JavaScript/TypeScript
396
+ cd js-component
397
+ npm run lint
398
+ npm run format
256
399
  ```
@@ -0,0 +1,364 @@
1
+ # OpenMS-Insight
2
+
3
+ [![PyPI version](https://badge.fury.io/py/openms-insight.svg)](https://badge.fury.io/py/openms-insight)
4
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
5
+ [![Tests](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml/badge.svg)](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
6
+
7
+ Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
8
+
9
+ ## Features
10
+
11
+ - **Cross-component selection linking** via shared identifiers
12
+ - **Memory-efficient preprocessing** via subprocess isolation
13
+ - **Automatic disk caching** with config-based invalidation
14
+ - **Cache reconstruction** - components can be restored from cache without re-specifying configuration
15
+ - **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
16
+ - **Line plot component** (Plotly.js) with highlighting, annotations, zoom
17
+ - **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
18
+ - **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install openms-insight
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ```python
29
+ import streamlit as st
30
+ from openms_insight import Table, LinePlot, StateManager
31
+
32
+ # Create state manager for cross-component linking
33
+ state_manager = StateManager()
34
+
35
+ # Create a table - clicking a row sets the 'item' selection
36
+ table = Table(
37
+ cache_id="items_table",
38
+ data_path="items.parquet",
39
+ interactivity={'item': 'item_id'},
40
+ column_definitions=[
41
+ {'field': 'item_id', 'title': 'ID', 'sorter': 'number'},
42
+ {'field': 'name', 'title': 'Name'},
43
+ ],
44
+ )
45
+ table(state_manager=state_manager)
46
+
47
+ # Create a linked plot - filters by the selected 'item'
48
+ plot = LinePlot(
49
+ cache_id="values_plot",
50
+ data_path="values.parquet",
51
+ filters={'item': 'item_id'},
52
+ x_column='x',
53
+ y_column='y',
54
+ )
55
+ plot(state_manager=state_manager)
56
+ ```
57
+
58
+ ## Cross-Component Linking
59
+
60
+ Components communicate through **identifiers** using three mechanisms:
61
+
62
+ - **`filters`**: INPUT - filter this component's data by the selection
63
+ - **`filter_defaults`**: INPUT - default value when selection is None
64
+ - **`interactivity`**: OUTPUT - set a selection when user clicks
65
+
66
+ ```python
67
+ # Master table: no filters, sets 'spectrum' on click
68
+ master = Table(
69
+ cache_id="spectra",
70
+ data_path="spectra.parquet",
71
+ interactivity={'spectrum': 'scan_id'}, # Click -> sets spectrum=scan_id
72
+ )
73
+
74
+ # Detail table: filters by 'spectrum', sets 'peak' on click
75
+ detail = Table(
76
+ cache_id="peaks",
77
+ data_path="peaks.parquet",
78
+ filters={'spectrum': 'scan_id'}, # Filters where scan_id = selected spectrum
79
+ interactivity={'peak': 'peak_id'}, # Click -> sets peak=peak_id
80
+ )
81
+
82
+ # Plot: filters by 'spectrum', highlights selected 'peak'
83
+ plot = LinePlot(
84
+ cache_id="plot",
85
+ data_path="peaks.parquet",
86
+ filters={'spectrum': 'scan_id'},
87
+ interactivity={'peak': 'peak_id'},
88
+ x_column='mass',
89
+ y_column='intensity',
90
+ )
91
+
92
+ # Table with filter defaults - shows unannotated data when no identification selected
93
+ annotations = Table(
94
+ cache_id="annotations",
95
+ data_path="annotations.parquet",
96
+ filters={'identification': 'id_idx'},
97
+ filter_defaults={'identification': -1}, # Use -1 when identification is None
98
+ )
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Components
104
+
105
+ ### Table
106
+
107
+ Interactive table using Tabulator.js with filtering dialogs, sorting, pagination, and CSV export.
108
+
109
+ ```python
110
+ Table(
111
+ cache_id="spectra_table",
112
+ data_path="spectra.parquet",
113
+ interactivity={'spectrum': 'scan_id'},
114
+ column_definitions=[
115
+ {'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
116
+ {'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
117
+ 'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
118
+ {'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
119
+ ],
120
+ index_field='scan_id',
121
+ go_to_fields=['scan_id'],
122
+ initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
123
+ default_row=0,
124
+ pagination=True,
125
+ page_size=100,
126
+ )
127
+ ```
128
+
129
+ **Key parameters:**
130
+ - `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
131
+ - `index_field`: Column used as unique row identifier (default: 'id')
132
+ - `go_to_fields`: Columns available in "Go to" navigation
133
+ - `initial_sort`: Default sort configuration
134
+ - `pagination`: Enable pagination for large tables (default: True)
135
+ - `page_size`: Rows per page (default: 100)
136
+
137
+ ### LinePlot
138
+
139
+ Stick-style line plot using Plotly.js for mass spectra visualization.
140
+
141
+ ```python
142
+ LinePlot(
143
+ cache_id="spectrum_plot",
144
+ data_path="peaks.parquet",
145
+ filters={'spectrum': 'scan_id'},
146
+ interactivity={'peak': 'peak_id'},
147
+ x_column='mass',
148
+ y_column='intensity',
149
+ highlight_column='is_annotated',
150
+ annotation_column='ion_label',
151
+ title="MS/MS Spectrum",
152
+ x_label="m/z",
153
+ y_label="Intensity",
154
+ styling={
155
+ 'highlightColor': '#E4572E',
156
+ 'selectedColor': '#F3A712',
157
+ 'unhighlightedColor': 'lightblue',
158
+ },
159
+ )
160
+ ```
161
+
162
+ **Key parameters:**
163
+ - `x_column`, `y_column`: Column names for x/y values
164
+ - `highlight_column`: Boolean/int column indicating which points to highlight
165
+ - `annotation_column`: Text column for labels on highlighted points
166
+ - `styling`: Color configuration dict
167
+
168
+ ### Heatmap
169
+
170
+ 2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
171
+
172
+ ```python
173
+ Heatmap(
174
+ cache_id="peaks_heatmap",
175
+ data_path="all_peaks.parquet",
176
+ x_column='retention_time',
177
+ y_column='mass',
178
+ intensity_column='intensity',
179
+ interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
180
+ min_points=30000,
181
+ x_bins=400,
182
+ y_bins=50,
183
+ title="Peak Map",
184
+ x_label="Retention Time (min)",
185
+ y_label="m/z",
186
+ colorscale='Portland',
187
+ )
188
+ ```
189
+
190
+ **Key parameters:**
191
+ - `x_column`, `y_column`, `intensity_column`: Column names for axes and color
192
+ - `min_points`: Target size for downsampling (default: 20000)
193
+ - `x_bins`, `y_bins`: Grid resolution for spatial binning
194
+ - `colorscale`: Plotly colorscale name (default: 'Portland')
195
+
196
+ ### SequenceView
197
+
198
+ Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
199
+
200
+ ```python
201
+ # Dynamic: sequence from DataFrame filtered by selection
202
+ SequenceView(
203
+ cache_id="peptide_view",
204
+ sequence_data_path="sequences.parquet", # columns: scan_id, sequence, precursor_charge
205
+ peaks_data_path="peaks.parquet", # columns: scan_id, peak_id, mass, intensity
206
+ filters={'spectrum': 'scan_id'},
207
+ interactivity={'peak': 'peak_id'},
208
+ deconvolved=False, # peaks are m/z values, consider charge states
209
+ title="Fragment Coverage",
210
+ )
211
+
212
+ # Static: single sequence with optional peaks
213
+ SequenceView(
214
+ cache_id="static_peptide",
215
+ sequence_data=("PEPTIDEK", 2), # (sequence, charge) tuple
216
+ peaks_data=peaks_df, # Optional: LazyFrame with mass, intensity columns
217
+ deconvolved=True, # peaks are neutral masses
218
+ )
219
+
220
+ # Simplest: just a sequence string
221
+ SequenceView(
222
+ cache_id="simple_seq",
223
+ sequence_data="PEPTIDEK", # charge defaults to 1
224
+ )
225
+ ```
226
+
227
+ **Key parameters:**
228
+ - `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
229
+ - `sequence_data_path`: Path to parquet with sequence data
230
+ - `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
231
+ - `deconvolved`: If False (default), peaks are m/z and matching considers charge states
232
+ - `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
233
+
234
+ **Features:**
235
+ - Automatic fragment ion matching (a/b/c/x/y/z ions)
236
+ - Configurable mass tolerance (ppm or Da)
237
+ - Neutral loss support (-H2O, -NH3)
238
+ - Auto-zoom for short sequences (≤20 amino acids)
239
+ - Fragment coverage statistics
240
+ - Click-to-select peaks with cross-component linking
241
+
242
+ ---
243
+
244
+ ## Shared Component Arguments
245
+
246
+ All components accept these common arguments:
247
+
248
+ | Argument | Type | Default | Description |
249
+ |----------|------|---------|-------------|
250
+ | `cache_id` | `str` | **Required** | Unique identifier for disk cache |
251
+ | `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
252
+ | `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
253
+ | `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
254
+ | `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
255
+ | `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
256
+ | `cache_path` | `str` | `"."` | Base directory for cache storage |
257
+ | `regenerate_cache` | `bool` | `False` | Force cache regeneration |
258
+
259
+ ## Memory-Efficient Preprocessing
260
+
261
+ When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
262
+
263
+ ```python
264
+ # Subprocess preprocessing (recommended for large datasets)
265
+ # Memory is fully released after cache creation
266
+ heatmap = Heatmap(
267
+ data_path="large_peaks.parquet", # triggers subprocess
268
+ cache_id="peaks_heatmap",
269
+ ...
270
+ )
271
+
272
+ # In-process preprocessing (for smaller datasets or debugging)
273
+ # Memory may be retained by allocator after preprocessing
274
+ heatmap = Heatmap(
275
+ data=pl.scan_parquet("large_peaks.parquet"), # runs in main process
276
+ cache_id="peaks_heatmap",
277
+ ...
278
+ )
279
+ ```
280
+
281
+ **Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
282
+
283
+ ## Cache Reconstruction
284
+
285
+ Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
286
+
287
+ ```python
288
+ # First run: create component with data and config
289
+ table = Table(
290
+ cache_id="my_table",
291
+ data_path="data.parquet",
292
+ filters={'spectrum': 'scan_id'},
293
+ column_definitions=[...],
294
+ cache_path="./cache",
295
+ )
296
+
297
+ # Subsequent runs: reconstruct from cache only
298
+ table = Table(
299
+ cache_id="my_table",
300
+ cache_path="./cache",
301
+ )
302
+ # All config (filters, column_definitions, etc.) restored from cache
303
+ ```
304
+
305
+ ## Rendering
306
+
307
+ All components are callable. Pass a `StateManager` to enable cross-component linking:
308
+
309
+ ```python
310
+ from openms_insight import StateManager
311
+
312
+ state_manager = StateManager()
313
+
314
+ table(state_manager=state_manager, height=300)
315
+ plot(state_manager=state_manager, height=400)
316
+ ```
317
+
318
+ ---
319
+
320
+ ## Development
321
+
322
+ ### Building the Vue Component
323
+
324
+ ```bash
325
+ cd js-component
326
+ npm install
327
+ npm run build
328
+ ```
329
+
330
+ ### Development Mode (Hot Reload)
331
+
332
+ ```bash
333
+ # Terminal 1: Vue dev server
334
+ cd js-component
335
+ npm run dev
336
+
337
+ # Terminal 2: Streamlit with dev mode
338
+ SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
339
+ ```
340
+
341
+ ### Running Tests
342
+
343
+ ```bash
344
+ # Python tests
345
+ pip install -e ".[dev]"
346
+ pytest tests/ -v
347
+
348
+ # TypeScript type checking
349
+ cd js-component
350
+ npm run type-check
351
+ ```
352
+
353
+ ### Linting and Formatting
354
+
355
+ ```bash
356
+ # Python
357
+ ruff check .
358
+ ruff format .
359
+
360
+ # JavaScript/TypeScript
361
+ cd js-component
362
+ npm run lint
363
+ npm run format
364
+ ```
@@ -5,15 +5,15 @@ This package provides reusable, interactive Streamlit components backed by Vue.j
5
5
  visualizations with cross-component selection state management.
6
6
  """
7
7
 
8
+ from .components.heatmap import Heatmap
9
+ from .components.lineplot import LinePlot
10
+ from .components.sequenceview import SequenceView, SequenceViewResult
11
+ from .components.table import Table
8
12
  from .core.base import BaseComponent
9
- from .core.state import StateManager
10
- from .core.registry import register_component, get_component_class
11
13
  from .core.cache import CacheMissError
12
-
13
- from .components.table import Table
14
- from .components.lineplot import LinePlot
15
- from .components.heatmap import Heatmap
16
- from .components.sequenceview import SequenceView
14
+ from .core.registry import get_component_class, register_component
15
+ from .core.state import StateManager
16
+ from .rendering.bridge import clear_component_annotations, get_component_annotations
17
17
 
18
18
  __version__ = "0.1.0"
19
19
 
@@ -29,4 +29,8 @@ __all__ = [
29
29
  "LinePlot",
30
30
  "Heatmap",
31
31
  "SequenceView",
32
+ "SequenceViewResult",
33
+ # Utilities
34
+ "get_component_annotations",
35
+ "clear_component_annotations",
32
36
  ]