openms-insight 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openms_insight-0.1.1 → openms_insight-0.1.3}/PKG-INFO +163 -20
- openms_insight-0.1.3/README.md +364 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/__init__.py +11 -7
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/components/heatmap.py +192 -102
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/components/lineplot.py +377 -82
- openms_insight-0.1.3/openms_insight/components/sequenceview.py +848 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/components/table.py +86 -58
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/__init__.py +2 -2
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/base.py +113 -49
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/registry.py +6 -5
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/state.py +33 -31
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/subprocess_preprocess.py +1 -3
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/index.css +1 -1
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/index.js +113 -113
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/preprocessing/__init__.py +5 -6
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/preprocessing/compression.py +68 -66
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/preprocessing/filtering.py +119 -9
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/rendering/__init__.py +1 -1
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/rendering/bridge.py +192 -42
- {openms_insight-0.1.1 → openms_insight-0.1.3}/pyproject.toml +2 -2
- openms_insight-0.1.1/README.md +0 -221
- openms_insight-0.1.1/openms_insight/components/sequenceview.py +0 -384
- {openms_insight-0.1.1 → openms_insight-0.1.3}/.gitignore +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/LICENSE +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/components/__init__.py +2 -2
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/core/cache.py +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2 +0 -0
- {openms_insight-0.1.1 → openms_insight-0.1.3}/openms_insight/js-component/dist/index.html +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openms-insight
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Interactive visualization components for mass spectrometry data in Streamlit
|
|
5
5
|
Project-URL: Homepage, https://github.com/t0mdavid-m/OpenMS-Insight
|
|
6
6
|
Project-URL: Documentation, https://github.com/t0mdavid-m/OpenMS-Insight#readme
|
|
7
7
|
Project-URL: Repository, https://github.com/t0mdavid-m/OpenMS-Insight
|
|
8
8
|
Project-URL: Issues, https://github.com/t0mdavid-m/OpenMS-Insight/issues
|
|
9
|
-
Author:
|
|
9
|
+
Author: Tom David Müller
|
|
10
10
|
License-Expression: BSD-3-Clause
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Keywords: mass-spectrometry,openms,plotly,proteomics,streamlit,tabulator,visualization,vue
|
|
@@ -37,6 +37,7 @@ Description-Content-Type: text/markdown
|
|
|
37
37
|
|
|
38
38
|
[](https://badge.fury.io/py/openms-insight)
|
|
39
39
|
[](https://www.python.org/downloads/)
|
|
40
|
+
[](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
|
|
40
41
|
|
|
41
42
|
Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
|
|
42
43
|
|
|
@@ -45,10 +46,11 @@ Interactive visualization components for mass spectrometry data in Streamlit, ba
|
|
|
45
46
|
- **Cross-component selection linking** via shared identifiers
|
|
46
47
|
- **Memory-efficient preprocessing** via subprocess isolation
|
|
47
48
|
- **Automatic disk caching** with config-based invalidation
|
|
48
|
-
- **
|
|
49
|
+
- **Cache reconstruction** - components can be restored from cache without re-specifying configuration
|
|
50
|
+
- **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
|
|
49
51
|
- **Line plot component** (Plotly.js) with highlighting, annotations, zoom
|
|
50
|
-
- **Heatmap component** (Plotly scattergl) with multi-resolution downsampling
|
|
51
|
-
- **Sequence view component** for peptide
|
|
52
|
+
- **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
|
|
53
|
+
- **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
|
|
52
54
|
|
|
53
55
|
## Installation
|
|
54
56
|
|
|
@@ -90,9 +92,10 @@ plot(state_manager=state_manager)
|
|
|
90
92
|
|
|
91
93
|
## Cross-Component Linking
|
|
92
94
|
|
|
93
|
-
Components communicate through **identifiers** using
|
|
95
|
+
Components communicate through **identifiers** using three mechanisms:
|
|
94
96
|
|
|
95
97
|
- **`filters`**: INPUT - filter this component's data by the selection
|
|
98
|
+
- **`filter_defaults`**: INPUT - default value when selection is None
|
|
96
99
|
- **`interactivity`**: OUTPUT - set a selection when user clicks
|
|
97
100
|
|
|
98
101
|
```python
|
|
@@ -120,6 +123,14 @@ plot = LinePlot(
|
|
|
120
123
|
x_column='mass',
|
|
121
124
|
y_column='intensity',
|
|
122
125
|
)
|
|
126
|
+
|
|
127
|
+
# Table with filter defaults - shows unannotated data when no identification selected
|
|
128
|
+
annotations = Table(
|
|
129
|
+
cache_id="annotations",
|
|
130
|
+
data_path="annotations.parquet",
|
|
131
|
+
filters={'identification': 'id_idx'},
|
|
132
|
+
filter_defaults={'identification': -1}, # Use -1 when identification is None
|
|
133
|
+
)
|
|
123
134
|
```
|
|
124
135
|
|
|
125
136
|
---
|
|
@@ -137,17 +148,27 @@ Table(
|
|
|
137
148
|
interactivity={'spectrum': 'scan_id'},
|
|
138
149
|
column_definitions=[
|
|
139
150
|
{'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
|
|
140
|
-
{'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right'
|
|
151
|
+
{'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
|
|
152
|
+
'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
|
|
141
153
|
{'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
|
|
142
154
|
],
|
|
143
155
|
index_field='scan_id',
|
|
144
156
|
go_to_fields=['scan_id'],
|
|
157
|
+
initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
|
|
145
158
|
default_row=0,
|
|
146
159
|
pagination=True,
|
|
147
|
-
page_size=
|
|
160
|
+
page_size=100,
|
|
148
161
|
)
|
|
149
162
|
```
|
|
150
163
|
|
|
164
|
+
**Key parameters:**
|
|
165
|
+
- `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
|
|
166
|
+
- `index_field`: Column used as unique row identifier (default: 'id')
|
|
167
|
+
- `go_to_fields`: Columns available in "Go to" navigation
|
|
168
|
+
- `initial_sort`: Default sort configuration
|
|
169
|
+
- `pagination`: Enable pagination for large tables (default: True)
|
|
170
|
+
- `page_size`: Rows per page (default: 100)
|
|
171
|
+
|
|
151
172
|
### LinePlot
|
|
152
173
|
|
|
153
174
|
Stick-style line plot using Plotly.js for mass spectra visualization.
|
|
@@ -165,9 +186,20 @@ LinePlot(
|
|
|
165
186
|
title="MS/MS Spectrum",
|
|
166
187
|
x_label="m/z",
|
|
167
188
|
y_label="Intensity",
|
|
189
|
+
styling={
|
|
190
|
+
'highlightColor': '#E4572E',
|
|
191
|
+
'selectedColor': '#F3A712',
|
|
192
|
+
'unhighlightedColor': 'lightblue',
|
|
193
|
+
},
|
|
168
194
|
)
|
|
169
195
|
```
|
|
170
196
|
|
|
197
|
+
**Key parameters:**
|
|
198
|
+
- `x_column`, `y_column`: Column names for x/y values
|
|
199
|
+
- `highlight_column`: Boolean/int column indicating which points to highlight
|
|
200
|
+
- `annotation_column`: Text column for labels on highlighted points
|
|
201
|
+
- `styling`: Color configuration dict
|
|
202
|
+
|
|
171
203
|
### Heatmap
|
|
172
204
|
|
|
173
205
|
2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
|
|
@@ -181,28 +213,67 @@ Heatmap(
|
|
|
181
213
|
intensity_column='intensity',
|
|
182
214
|
interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
|
|
183
215
|
min_points=30000,
|
|
216
|
+
x_bins=400,
|
|
217
|
+
y_bins=50,
|
|
184
218
|
title="Peak Map",
|
|
185
219
|
x_label="Retention Time (min)",
|
|
186
220
|
y_label="m/z",
|
|
221
|
+
colorscale='Portland',
|
|
187
222
|
)
|
|
188
223
|
```
|
|
189
224
|
|
|
225
|
+
**Key parameters:**
|
|
226
|
+
- `x_column`, `y_column`, `intensity_column`: Column names for axes and color
|
|
227
|
+
- `min_points`: Target size for downsampling (default: 20000)
|
|
228
|
+
- `x_bins`, `y_bins`: Grid resolution for spatial binning
|
|
229
|
+
- `colorscale`: Plotly colorscale name (default: 'Portland')
|
|
230
|
+
|
|
190
231
|
### SequenceView
|
|
191
232
|
|
|
192
|
-
Peptide
|
|
233
|
+
Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
|
|
193
234
|
|
|
194
235
|
```python
|
|
236
|
+
# Dynamic: sequence from DataFrame filtered by selection
|
|
195
237
|
SequenceView(
|
|
196
238
|
cache_id="peptide_view",
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
precursor_mass=944.5,
|
|
239
|
+
sequence_data_path="sequences.parquet", # columns: scan_id, sequence, precursor_charge
|
|
240
|
+
peaks_data_path="peaks.parquet", # columns: scan_id, peak_id, mass, intensity
|
|
241
|
+
filters={'spectrum': 'scan_id'},
|
|
201
242
|
interactivity={'peak': 'peak_id'},
|
|
243
|
+
deconvolved=False, # peaks are m/z values, consider charge states
|
|
202
244
|
title="Fragment Coverage",
|
|
203
245
|
)
|
|
246
|
+
|
|
247
|
+
# Static: single sequence with optional peaks
|
|
248
|
+
SequenceView(
|
|
249
|
+
cache_id="static_peptide",
|
|
250
|
+
sequence_data=("PEPTIDEK", 2), # (sequence, charge) tuple
|
|
251
|
+
peaks_data=peaks_df, # Optional: LazyFrame with mass, intensity columns
|
|
252
|
+
deconvolved=True, # peaks are neutral masses
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Simplest: just a sequence string
|
|
256
|
+
SequenceView(
|
|
257
|
+
cache_id="simple_seq",
|
|
258
|
+
sequence_data="PEPTIDEK", # charge defaults to 1
|
|
259
|
+
)
|
|
204
260
|
```
|
|
205
261
|
|
|
262
|
+
**Key parameters:**
|
|
263
|
+
- `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
|
|
264
|
+
- `sequence_data_path`: Path to parquet with sequence data
|
|
265
|
+
- `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
|
|
266
|
+
- `deconvolved`: If False (default), peaks are m/z and matching considers charge states
|
|
267
|
+
- `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
|
|
268
|
+
|
|
269
|
+
**Features:**
|
|
270
|
+
- Automatic fragment ion matching (a/b/c/x/y/z ions)
|
|
271
|
+
- Configurable mass tolerance (ppm or Da)
|
|
272
|
+
- Neutral loss support (-H2O, -NH3)
|
|
273
|
+
- Auto-zoom for short sequences (≤20 amino acids)
|
|
274
|
+
- Fragment coverage statistics
|
|
275
|
+
- Click-to-select peaks with cross-component linking
|
|
276
|
+
|
|
206
277
|
---
|
|
207
278
|
|
|
208
279
|
## Shared Component Arguments
|
|
@@ -212,11 +283,59 @@ All components accept these common arguments:
|
|
|
212
283
|
| Argument | Type | Default | Description |
|
|
213
284
|
|----------|------|---------|-------------|
|
|
214
285
|
| `cache_id` | `str` | **Required** | Unique identifier for disk cache |
|
|
215
|
-
| `data_path` | `str` | `None` | Path to parquet file (preferred
|
|
216
|
-
| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path
|
|
286
|
+
| `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
|
|
287
|
+
| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
|
|
217
288
|
| `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
|
|
289
|
+
| `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
|
|
218
290
|
| `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
|
|
219
291
|
| `cache_path` | `str` | `"."` | Base directory for cache storage |
|
|
292
|
+
| `regenerate_cache` | `bool` | `False` | Force cache regeneration |
|
|
293
|
+
|
|
294
|
+
## Memory-Efficient Preprocessing
|
|
295
|
+
|
|
296
|
+
When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
|
|
297
|
+
|
|
298
|
+
```python
|
|
299
|
+
# Subprocess preprocessing (recommended for large datasets)
|
|
300
|
+
# Memory is fully released after cache creation
|
|
301
|
+
heatmap = Heatmap(
|
|
302
|
+
data_path="large_peaks.parquet", # triggers subprocess
|
|
303
|
+
cache_id="peaks_heatmap",
|
|
304
|
+
...
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# In-process preprocessing (for smaller datasets or debugging)
|
|
308
|
+
# Memory may be retained by allocator after preprocessing
|
|
309
|
+
heatmap = Heatmap(
|
|
310
|
+
data=pl.scan_parquet("large_peaks.parquet"), # runs in main process
|
|
311
|
+
cache_id="peaks_heatmap",
|
|
312
|
+
...
|
|
313
|
+
)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
|
|
317
|
+
|
|
318
|
+
## Cache Reconstruction
|
|
319
|
+
|
|
320
|
+
Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
# First run: create component with data and config
|
|
324
|
+
table = Table(
|
|
325
|
+
cache_id="my_table",
|
|
326
|
+
data_path="data.parquet",
|
|
327
|
+
filters={'spectrum': 'scan_id'},
|
|
328
|
+
column_definitions=[...],
|
|
329
|
+
cache_path="./cache",
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Subsequent runs: reconstruct from cache only
|
|
333
|
+
table = Table(
|
|
334
|
+
cache_id="my_table",
|
|
335
|
+
cache_path="./cache",
|
|
336
|
+
)
|
|
337
|
+
# All config (filters, column_definitions, etc.) restored from cache
|
|
338
|
+
```
|
|
220
339
|
|
|
221
340
|
## Rendering
|
|
222
341
|
|
|
@@ -243,14 +362,38 @@ npm install
|
|
|
243
362
|
npm run build
|
|
244
363
|
```
|
|
245
364
|
|
|
246
|
-
### Development Mode
|
|
365
|
+
### Development Mode (Hot Reload)
|
|
247
366
|
|
|
248
367
|
```bash
|
|
368
|
+
# Terminal 1: Vue dev server
|
|
249
369
|
cd js-component
|
|
250
370
|
npm run dev
|
|
251
371
|
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
372
|
+
# Terminal 2: Streamlit with dev mode
|
|
373
|
+
SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### Running Tests
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
# Python tests
|
|
380
|
+
pip install -e ".[dev]"
|
|
381
|
+
pytest tests/ -v
|
|
382
|
+
|
|
383
|
+
# TypeScript type checking
|
|
384
|
+
cd js-component
|
|
385
|
+
npm run type-check
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
### Linting and Formatting
|
|
389
|
+
|
|
390
|
+
```bash
|
|
391
|
+
# Python
|
|
392
|
+
ruff check .
|
|
393
|
+
ruff format .
|
|
394
|
+
|
|
395
|
+
# JavaScript/TypeScript
|
|
396
|
+
cd js-component
|
|
397
|
+
npm run lint
|
|
398
|
+
npm run format
|
|
256
399
|
```
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# OpenMS-Insight
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/openms-insight)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://github.com/t0mdavid-m/OpenMS-Insight/actions/workflows/tests.yml)
|
|
6
|
+
|
|
7
|
+
Interactive visualization components for mass spectrometry data in Streamlit, backed by Vue.js.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Cross-component selection linking** via shared identifiers
|
|
12
|
+
- **Memory-efficient preprocessing** via subprocess isolation
|
|
13
|
+
- **Automatic disk caching** with config-based invalidation
|
|
14
|
+
- **Cache reconstruction** - components can be restored from cache without re-specifying configuration
|
|
15
|
+
- **Table component** (Tabulator.js) with filtering, sorting, go-to, pagination, CSV export
|
|
16
|
+
- **Line plot component** (Plotly.js) with highlighting, annotations, zoom
|
|
17
|
+
- **Heatmap component** (Plotly scattergl) with multi-resolution downsampling for millions of points
|
|
18
|
+
- **Sequence view component** for peptide visualization with fragment ion matching and auto-zoom
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install openms-insight
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import streamlit as st
|
|
30
|
+
from openms_insight import Table, LinePlot, StateManager
|
|
31
|
+
|
|
32
|
+
# Create state manager for cross-component linking
|
|
33
|
+
state_manager = StateManager()
|
|
34
|
+
|
|
35
|
+
# Create a table - clicking a row sets the 'item' selection
|
|
36
|
+
table = Table(
|
|
37
|
+
cache_id="items_table",
|
|
38
|
+
data_path="items.parquet",
|
|
39
|
+
interactivity={'item': 'item_id'},
|
|
40
|
+
column_definitions=[
|
|
41
|
+
{'field': 'item_id', 'title': 'ID', 'sorter': 'number'},
|
|
42
|
+
{'field': 'name', 'title': 'Name'},
|
|
43
|
+
],
|
|
44
|
+
)
|
|
45
|
+
table(state_manager=state_manager)
|
|
46
|
+
|
|
47
|
+
# Create a linked plot - filters by the selected 'item'
|
|
48
|
+
plot = LinePlot(
|
|
49
|
+
cache_id="values_plot",
|
|
50
|
+
data_path="values.parquet",
|
|
51
|
+
filters={'item': 'item_id'},
|
|
52
|
+
x_column='x',
|
|
53
|
+
y_column='y',
|
|
54
|
+
)
|
|
55
|
+
plot(state_manager=state_manager)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Cross-Component Linking
|
|
59
|
+
|
|
60
|
+
Components communicate through **identifiers** using three mechanisms:
|
|
61
|
+
|
|
62
|
+
- **`filters`**: INPUT - filter this component's data by the selection
|
|
63
|
+
- **`filter_defaults`**: INPUT - default value when selection is None
|
|
64
|
+
- **`interactivity`**: OUTPUT - set a selection when user clicks
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
# Master table: no filters, sets 'spectrum' on click
|
|
68
|
+
master = Table(
|
|
69
|
+
cache_id="spectra",
|
|
70
|
+
data_path="spectra.parquet",
|
|
71
|
+
interactivity={'spectrum': 'scan_id'}, # Click -> sets spectrum=scan_id
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Detail table: filters by 'spectrum', sets 'peak' on click
|
|
75
|
+
detail = Table(
|
|
76
|
+
cache_id="peaks",
|
|
77
|
+
data_path="peaks.parquet",
|
|
78
|
+
filters={'spectrum': 'scan_id'}, # Filters where scan_id = selected spectrum
|
|
79
|
+
interactivity={'peak': 'peak_id'}, # Click -> sets peak=peak_id
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Plot: filters by 'spectrum', highlights selected 'peak'
|
|
83
|
+
plot = LinePlot(
|
|
84
|
+
cache_id="plot",
|
|
85
|
+
data_path="peaks.parquet",
|
|
86
|
+
filters={'spectrum': 'scan_id'},
|
|
87
|
+
interactivity={'peak': 'peak_id'},
|
|
88
|
+
x_column='mass',
|
|
89
|
+
y_column='intensity',
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Table with filter defaults - shows unannotated data when no identification selected
|
|
93
|
+
annotations = Table(
|
|
94
|
+
cache_id="annotations",
|
|
95
|
+
data_path="annotations.parquet",
|
|
96
|
+
filters={'identification': 'id_idx'},
|
|
97
|
+
filter_defaults={'identification': -1}, # Use -1 when identification is None
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Components
|
|
104
|
+
|
|
105
|
+
### Table
|
|
106
|
+
|
|
107
|
+
Interactive table using Tabulator.js with filtering dialogs, sorting, pagination, and CSV export.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
Table(
|
|
111
|
+
cache_id="spectra_table",
|
|
112
|
+
data_path="spectra.parquet",
|
|
113
|
+
interactivity={'spectrum': 'scan_id'},
|
|
114
|
+
column_definitions=[
|
|
115
|
+
{'field': 'scan_id', 'title': 'Scan', 'sorter': 'number'},
|
|
116
|
+
{'field': 'rt', 'title': 'RT (min)', 'sorter': 'number', 'hozAlign': 'right',
|
|
117
|
+
'formatter': 'money', 'formatterParams': {'precision': 2, 'symbol': ''}},
|
|
118
|
+
{'field': 'precursor_mz', 'title': 'm/z', 'sorter': 'number'},
|
|
119
|
+
],
|
|
120
|
+
index_field='scan_id',
|
|
121
|
+
go_to_fields=['scan_id'],
|
|
122
|
+
initial_sort=[{'column': 'scan_id', 'dir': 'asc'}],
|
|
123
|
+
default_row=0,
|
|
124
|
+
pagination=True,
|
|
125
|
+
page_size=100,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Key parameters:**
|
|
130
|
+
- `column_definitions`: List of Tabulator column configs (field, title, sorter, formatter, etc.)
|
|
131
|
+
- `index_field`: Column used as unique row identifier (default: 'id')
|
|
132
|
+
- `go_to_fields`: Columns available in "Go to" navigation
|
|
133
|
+
- `initial_sort`: Default sort configuration
|
|
134
|
+
- `pagination`: Enable pagination for large tables (default: True)
|
|
135
|
+
- `page_size`: Rows per page (default: 100)
|
|
136
|
+
|
|
137
|
+
### LinePlot
|
|
138
|
+
|
|
139
|
+
Stick-style line plot using Plotly.js for mass spectra visualization.
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
LinePlot(
|
|
143
|
+
cache_id="spectrum_plot",
|
|
144
|
+
data_path="peaks.parquet",
|
|
145
|
+
filters={'spectrum': 'scan_id'},
|
|
146
|
+
interactivity={'peak': 'peak_id'},
|
|
147
|
+
x_column='mass',
|
|
148
|
+
y_column='intensity',
|
|
149
|
+
highlight_column='is_annotated',
|
|
150
|
+
annotation_column='ion_label',
|
|
151
|
+
title="MS/MS Spectrum",
|
|
152
|
+
x_label="m/z",
|
|
153
|
+
y_label="Intensity",
|
|
154
|
+
styling={
|
|
155
|
+
'highlightColor': '#E4572E',
|
|
156
|
+
'selectedColor': '#F3A712',
|
|
157
|
+
'unhighlightedColor': 'lightblue',
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Key parameters:**
|
|
163
|
+
- `x_column`, `y_column`: Column names for x/y values
|
|
164
|
+
- `highlight_column`: Boolean/int column indicating which points to highlight
|
|
165
|
+
- `annotation_column`: Text column for labels on highlighted points
|
|
166
|
+
- `styling`: Color configuration dict
|
|
167
|
+
|
|
168
|
+
### Heatmap
|
|
169
|
+
|
|
170
|
+
2D scatter heatmap using Plotly scattergl with multi-resolution downsampling for large datasets (millions of points).
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
Heatmap(
|
|
174
|
+
cache_id="peaks_heatmap",
|
|
175
|
+
data_path="all_peaks.parquet",
|
|
176
|
+
x_column='retention_time',
|
|
177
|
+
y_column='mass',
|
|
178
|
+
intensity_column='intensity',
|
|
179
|
+
interactivity={'spectrum': 'scan_id', 'peak': 'peak_id'},
|
|
180
|
+
min_points=30000,
|
|
181
|
+
x_bins=400,
|
|
182
|
+
y_bins=50,
|
|
183
|
+
title="Peak Map",
|
|
184
|
+
x_label="Retention Time (min)",
|
|
185
|
+
y_label="m/z",
|
|
186
|
+
colorscale='Portland',
|
|
187
|
+
)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**Key parameters:**
|
|
191
|
+
- `x_column`, `y_column`, `intensity_column`: Column names for axes and color
|
|
192
|
+
- `min_points`: Target size for downsampling (default: 20000)
|
|
193
|
+
- `x_bins`, `y_bins`: Grid resolution for spatial binning
|
|
194
|
+
- `colorscale`: Plotly colorscale name (default: 'Portland')
|
|
195
|
+
|
|
196
|
+
### SequenceView
|
|
197
|
+
|
|
198
|
+
Peptide sequence visualization with fragment ion matching. Supports both dynamic (filtered by selection) and static sequences.
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
# Dynamic: sequence from DataFrame filtered by selection
|
|
202
|
+
SequenceView(
|
|
203
|
+
cache_id="peptide_view",
|
|
204
|
+
sequence_data_path="sequences.parquet", # columns: scan_id, sequence, precursor_charge
|
|
205
|
+
peaks_data_path="peaks.parquet", # columns: scan_id, peak_id, mass, intensity
|
|
206
|
+
filters={'spectrum': 'scan_id'},
|
|
207
|
+
interactivity={'peak': 'peak_id'},
|
|
208
|
+
deconvolved=False, # peaks are m/z values, consider charge states
|
|
209
|
+
title="Fragment Coverage",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Static: single sequence with optional peaks
|
|
213
|
+
SequenceView(
|
|
214
|
+
cache_id="static_peptide",
|
|
215
|
+
sequence_data=("PEPTIDEK", 2), # (sequence, charge) tuple
|
|
216
|
+
peaks_data=peaks_df, # Optional: LazyFrame with mass, intensity columns
|
|
217
|
+
deconvolved=True, # peaks are neutral masses
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Simplest: just a sequence string
|
|
221
|
+
SequenceView(
|
|
222
|
+
cache_id="simple_seq",
|
|
223
|
+
sequence_data="PEPTIDEK", # charge defaults to 1
|
|
224
|
+
)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
**Key parameters:**
|
|
228
|
+
- `sequence_data`: LazyFrame, (sequence, charge) tuple, or sequence string
|
|
229
|
+
- `sequence_data_path`: Path to parquet with sequence data
|
|
230
|
+
- `peaks_data` / `peaks_data_path`: Optional peak data for fragment matching
|
|
231
|
+
- `deconvolved`: If False (default), peaks are m/z and matching considers charge states
|
|
232
|
+
- `annotation_config`: Dict with ion_types, tolerance, neutral_losses settings
|
|
233
|
+
|
|
234
|
+
**Features:**
|
|
235
|
+
- Automatic fragment ion matching (a/b/c/x/y/z ions)
|
|
236
|
+
- Configurable mass tolerance (ppm or Da)
|
|
237
|
+
- Neutral loss support (-H2O, -NH3)
|
|
238
|
+
- Auto-zoom for short sequences (≤20 amino acids)
|
|
239
|
+
- Fragment coverage statistics
|
|
240
|
+
- Click-to-select peaks with cross-component linking
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Shared Component Arguments
|
|
245
|
+
|
|
246
|
+
All components accept these common arguments:
|
|
247
|
+
|
|
248
|
+
| Argument | Type | Default | Description |
|
|
249
|
+
|----------|------|---------|-------------|
|
|
250
|
+
| `cache_id` | `str` | **Required** | Unique identifier for disk cache |
|
|
251
|
+
| `data_path` | `str` | `None` | Path to parquet file (preferred for memory efficiency) |
|
|
252
|
+
| `data` | `pl.LazyFrame` | `None` | Polars LazyFrame (alternative to data_path) |
|
|
253
|
+
| `filters` | `Dict[str, str]` | `None` | Map identifier -> column for filtering |
|
|
254
|
+
| `filter_defaults` | `Dict[str, Any]` | `None` | Default values when selection is None |
|
|
255
|
+
| `interactivity` | `Dict[str, str]` | `None` | Map identifier -> column for click actions |
|
|
256
|
+
| `cache_path` | `str` | `"."` | Base directory for cache storage |
|
|
257
|
+
| `regenerate_cache` | `bool` | `False` | Force cache regeneration |
|
|
258
|
+
|
|
259
|
+
## Memory-Efficient Preprocessing
|
|
260
|
+
|
|
261
|
+
When working with large datasets (especially heatmaps with millions of points), use `data_path` instead of `data` to enable subprocess preprocessing:
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
# Subprocess preprocessing (recommended for large datasets)
|
|
265
|
+
# Memory is fully released after cache creation
|
|
266
|
+
heatmap = Heatmap(
|
|
267
|
+
data_path="large_peaks.parquet", # triggers subprocess
|
|
268
|
+
cache_id="peaks_heatmap",
|
|
269
|
+
...
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# In-process preprocessing (for smaller datasets or debugging)
|
|
273
|
+
# Memory may be retained by allocator after preprocessing
|
|
274
|
+
heatmap = Heatmap(
|
|
275
|
+
data=pl.scan_parquet("large_peaks.parquet"), # runs in main process
|
|
276
|
+
cache_id="peaks_heatmap",
|
|
277
|
+
...
|
|
278
|
+
)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Why this matters:** Memory allocators like mimalloc (used by Polars) retain freed memory for performance. For large datasets, this can cause memory usage to stay high even after preprocessing completes. Running preprocessing in a subprocess guarantees all memory is returned to the OS when the subprocess exits.
|
|
282
|
+
|
|
283
|
+
## Cache Reconstruction
|
|
284
|
+
|
|
285
|
+
Components can be reconstructed from cache using only `cache_id` and `cache_path`. All configuration is restored from the cached manifest:
|
|
286
|
+
|
|
287
|
+
```python
|
|
288
|
+
# First run: create component with data and config
|
|
289
|
+
table = Table(
|
|
290
|
+
cache_id="my_table",
|
|
291
|
+
data_path="data.parquet",
|
|
292
|
+
filters={'spectrum': 'scan_id'},
|
|
293
|
+
column_definitions=[...],
|
|
294
|
+
cache_path="./cache",
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Subsequent runs: reconstruct from cache only
|
|
298
|
+
table = Table(
|
|
299
|
+
cache_id="my_table",
|
|
300
|
+
cache_path="./cache",
|
|
301
|
+
)
|
|
302
|
+
# All config (filters, column_definitions, etc.) restored from cache
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## Rendering
|
|
306
|
+
|
|
307
|
+
All components are callable. Pass a `StateManager` to enable cross-component linking:
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
from openms_insight import StateManager
|
|
311
|
+
|
|
312
|
+
state_manager = StateManager()
|
|
313
|
+
|
|
314
|
+
table(state_manager=state_manager, height=300)
|
|
315
|
+
plot(state_manager=state_manager, height=400)
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Development
|
|
321
|
+
|
|
322
|
+
### Building the Vue Component
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
cd js-component
|
|
326
|
+
npm install
|
|
327
|
+
npm run build
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### Development Mode (Hot Reload)
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
# Terminal 1: Vue dev server
|
|
334
|
+
cd js-component
|
|
335
|
+
npm run dev
|
|
336
|
+
|
|
337
|
+
# Terminal 2: Streamlit with dev mode
|
|
338
|
+
SVC_DEV_MODE=true SVC_DEV_URL=http://localhost:5173 streamlit run app.py
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
### Running Tests
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
# Python tests
|
|
345
|
+
pip install -e ".[dev]"
|
|
346
|
+
pytest tests/ -v
|
|
347
|
+
|
|
348
|
+
# TypeScript type checking
|
|
349
|
+
cd js-component
|
|
350
|
+
npm run type-check
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
### Linting and Formatting
|
|
354
|
+
|
|
355
|
+
```bash
|
|
356
|
+
# Python
|
|
357
|
+
ruff check .
|
|
358
|
+
ruff format .
|
|
359
|
+
|
|
360
|
+
# JavaScript/TypeScript
|
|
361
|
+
cd js-component
|
|
362
|
+
npm run lint
|
|
363
|
+
npm run format
|
|
364
|
+
```
|
|
@@ -5,15 +5,15 @@ This package provides reusable, interactive Streamlit components backed by Vue.j
|
|
|
5
5
|
visualizations with cross-component selection state management.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from .components.heatmap import Heatmap
|
|
9
|
+
from .components.lineplot import LinePlot
|
|
10
|
+
from .components.sequenceview import SequenceView, SequenceViewResult
|
|
11
|
+
from .components.table import Table
|
|
8
12
|
from .core.base import BaseComponent
|
|
9
|
-
from .core.state import StateManager
|
|
10
|
-
from .core.registry import register_component, get_component_class
|
|
11
13
|
from .core.cache import CacheMissError
|
|
12
|
-
|
|
13
|
-
from .
|
|
14
|
-
from .
|
|
15
|
-
from .components.heatmap import Heatmap
|
|
16
|
-
from .components.sequenceview import SequenceView
|
|
14
|
+
from .core.registry import get_component_class, register_component
|
|
15
|
+
from .core.state import StateManager
|
|
16
|
+
from .rendering.bridge import clear_component_annotations, get_component_annotations
|
|
17
17
|
|
|
18
18
|
__version__ = "0.1.0"
|
|
19
19
|
|
|
@@ -29,4 +29,8 @@ __all__ = [
|
|
|
29
29
|
"LinePlot",
|
|
30
30
|
"Heatmap",
|
|
31
31
|
"SequenceView",
|
|
32
|
+
"SequenceViewResult",
|
|
33
|
+
# Utilities
|
|
34
|
+
"get_component_annotations",
|
|
35
|
+
"clear_component_annotations",
|
|
32
36
|
]
|