openms-insight 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openms_insight/__init__.py +32 -0
- openms_insight/components/__init__.py +11 -0
- openms_insight/components/heatmap.py +823 -0
- openms_insight/components/lineplot.py +492 -0
- openms_insight/components/sequenceview.py +384 -0
- openms_insight/components/table.py +400 -0
- openms_insight/core/__init__.py +14 -0
- openms_insight/core/base.py +413 -0
- openms_insight/core/cache.py +39 -0
- openms_insight/core/registry.py +82 -0
- openms_insight/core/state.py +215 -0
- openms_insight/js-component/dist/assets/index.css +5 -0
- openms_insight/js-component/dist/assets/index.js +4220 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.eot +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.ttf +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff +0 -0
- openms_insight/js-component/dist/assets/materialdesignicons-webfont.woff2 +0 -0
- openms_insight/js-component/dist/index.html +14 -0
- openms_insight/preprocessing/__init__.py +22 -0
- openms_insight/preprocessing/compression.py +338 -0
- openms_insight/preprocessing/filtering.py +316 -0
- openms_insight/rendering/__init__.py +8 -0
- openms_insight/rendering/bridge.py +312 -0
- openms_insight-0.1.0.dist-info/METADATA +256 -0
- openms_insight-0.1.0.dist-info/RECORD +27 -0
- openms_insight-0.1.0.dist-info/WHEEL +4 -0
- openms_insight-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"""Table component using Tabulator.js."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from ..core.base import BaseComponent
|
|
8
|
+
from ..core.registry import register_component
|
|
9
|
+
from ..preprocessing.filtering import compute_dataframe_hash, filter_and_collect_cached
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_component("table")
|
|
13
|
+
class Table(BaseComponent):
|
|
14
|
+
"""
|
|
15
|
+
Interactive table component using Tabulator.js.
|
|
16
|
+
|
|
17
|
+
Features:
|
|
18
|
+
- Column definitions with formatters, sorters, tooltips
|
|
19
|
+
- Row selection with cross-component linking
|
|
20
|
+
- Filtering dialog (categorical, numeric, text)
|
|
21
|
+
- Go-to functionality for navigation by field value
|
|
22
|
+
- CSV download
|
|
23
|
+
- Automatic column type detection
|
|
24
|
+
|
|
25
|
+
Tables can have separate `filters` and `interactivity` mappings:
|
|
26
|
+
- `filters`: Which selections filter this table's data
|
|
27
|
+
- `interactivity`: What selection is set when a row is clicked
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
# Master table - shows all data, clicking sets 'spectrum' selection
|
|
31
|
+
master_table = Table(
|
|
32
|
+
cache_id="spectra_table",
|
|
33
|
+
data=spectra_df,
|
|
34
|
+
interactivity={'spectrum': 'scan_id'},
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Detail table - filters by 'spectrum', clicking sets 'peak' selection
|
|
38
|
+
detail_table = Table(
|
|
39
|
+
cache_id="peaks_table",
|
|
40
|
+
data=peaks_df,
|
|
41
|
+
filters={'spectrum': 'scan_id'},
|
|
42
|
+
interactivity={'peak': 'mass'},
|
|
43
|
+
)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
_component_type: str = "table"
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
cache_id: str,
|
|
51
|
+
data: Optional[pl.LazyFrame] = None,
|
|
52
|
+
filters: Optional[Dict[str, str]] = None,
|
|
53
|
+
filter_defaults: Optional[Dict[str, Any]] = None,
|
|
54
|
+
interactivity: Optional[Dict[str, str]] = None,
|
|
55
|
+
cache_path: str = ".",
|
|
56
|
+
regenerate_cache: bool = False,
|
|
57
|
+
column_definitions: Optional[List[Dict[str, Any]]] = None,
|
|
58
|
+
title: Optional[str] = None,
|
|
59
|
+
index_field: str = 'id',
|
|
60
|
+
go_to_fields: Optional[List[str]] = None,
|
|
61
|
+
layout: str = 'fitDataFill',
|
|
62
|
+
default_row: int = 0,
|
|
63
|
+
initial_sort: Optional[List[Dict[str, Any]]] = None,
|
|
64
|
+
pagination: bool = True,
|
|
65
|
+
page_size: int = 100,
|
|
66
|
+
**kwargs
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize the Table component.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
cache_id: Unique identifier for this component's cache (MANDATORY).
|
|
73
|
+
Creates a folder {cache_path}/{cache_id}/ for cached data.
|
|
74
|
+
data: Polars LazyFrame with table data. Optional if cache exists.
|
|
75
|
+
filters: Mapping of identifier names to column names for filtering.
|
|
76
|
+
Example: {'spectrum': 'scan_id'}
|
|
77
|
+
When 'spectrum' selection exists, table shows only rows where
|
|
78
|
+
scan_id equals the selected value.
|
|
79
|
+
filter_defaults: Default values for filters when state is None.
|
|
80
|
+
Example: {'identification': -1}
|
|
81
|
+
When 'identification' selection is None, filter uses -1 instead.
|
|
82
|
+
interactivity: Mapping of identifier names to column names for clicks.
|
|
83
|
+
Example: {'peak': 'mass'}
|
|
84
|
+
When a row is clicked, sets 'peak' selection to that row's mass.
|
|
85
|
+
cache_path: Base path for cache storage. Default "." (current dir).
|
|
86
|
+
regenerate_cache: If True, regenerate cache even if valid cache exists.
|
|
87
|
+
column_definitions: List of Tabulator column definition dicts.
|
|
88
|
+
Each dict can contain:
|
|
89
|
+
- field: Column field name (required)
|
|
90
|
+
- title: Display title (defaults to field name)
|
|
91
|
+
- sorter: 'number', 'string', 'alphanum', 'boolean', 'date', etc.
|
|
92
|
+
- formatter: 'money', 'progress', 'star', 'tickCross', etc.
|
|
93
|
+
- formatterParams: Dict of formatter parameters
|
|
94
|
+
- headerTooltip: True or tooltip string
|
|
95
|
+
- hozAlign: 'left', 'center', 'right'
|
|
96
|
+
- width: Column width (number or string like '100px')
|
|
97
|
+
If None, auto-generates from data schema.
|
|
98
|
+
title: Table title displayed above the table
|
|
99
|
+
index_field: Field used as row index for selection (default: 'id')
|
|
100
|
+
go_to_fields: List of field names for "Go to" navigation feature
|
|
101
|
+
layout: Tabulator layout mode ('fitData', 'fitDataFill', 'fitColumns', etc.)
|
|
102
|
+
default_row: Default row to select on load (-1 for none)
|
|
103
|
+
initial_sort: List of sort configurations like [{'column': 'field', 'dir': 'asc'}]
|
|
104
|
+
pagination: Enable pagination for large tables (default: True).
|
|
105
|
+
Pagination dramatically improves performance for tables with
|
|
106
|
+
thousands of rows by only rendering one page at a time.
|
|
107
|
+
page_size: Number of rows per page when pagination is enabled (default: 100)
|
|
108
|
+
**kwargs: Additional configuration options
|
|
109
|
+
"""
|
|
110
|
+
self._column_definitions = column_definitions
|
|
111
|
+
self._title = title
|
|
112
|
+
self._index_field = index_field
|
|
113
|
+
self._go_to_fields = go_to_fields
|
|
114
|
+
self._layout = layout
|
|
115
|
+
self._default_row = default_row
|
|
116
|
+
self._initial_sort = initial_sort
|
|
117
|
+
self._pagination = pagination
|
|
118
|
+
self._page_size = page_size
|
|
119
|
+
|
|
120
|
+
super().__init__(
|
|
121
|
+
cache_id=cache_id,
|
|
122
|
+
data=data,
|
|
123
|
+
filters=filters,
|
|
124
|
+
filter_defaults=filter_defaults,
|
|
125
|
+
interactivity=interactivity,
|
|
126
|
+
cache_path=cache_path,
|
|
127
|
+
regenerate_cache=regenerate_cache,
|
|
128
|
+
**kwargs
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _get_cache_config(self) -> Dict[str, Any]:
|
|
132
|
+
"""
|
|
133
|
+
Get configuration that affects cache validity.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Dict of config values that affect preprocessing
|
|
137
|
+
"""
|
|
138
|
+
return {
|
|
139
|
+
'column_definitions': self._column_definitions,
|
|
140
|
+
'index_field': self._index_field,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
def _get_row_group_size(self) -> int:
|
|
144
|
+
"""
|
|
145
|
+
Get optimal row group size for parquet writing.
|
|
146
|
+
|
|
147
|
+
Filtered tables use smaller row groups (10K) for better predicate
|
|
148
|
+
pushdown granularity - this allows Polars to skip row groups that
|
|
149
|
+
don't contain the filter value. Master tables (no filters) use
|
|
150
|
+
larger groups (50K) since we read all data anyway.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Number of rows per row group
|
|
154
|
+
"""
|
|
155
|
+
if self._filters:
|
|
156
|
+
return 10_000 # Smaller groups for better filter performance
|
|
157
|
+
return 50_000 # Larger groups for master tables
|
|
158
|
+
|
|
159
|
+
def _preprocess(self) -> None:
|
|
160
|
+
"""
|
|
161
|
+
Preprocess table data.
|
|
162
|
+
|
|
163
|
+
Sorts by filter columns for efficient predicate pushdown, then
|
|
164
|
+
collects the LazyFrame and generates column definitions if needed.
|
|
165
|
+
Data is cached by base class for fast subsequent loads.
|
|
166
|
+
"""
|
|
167
|
+
data = self._raw_data
|
|
168
|
+
|
|
169
|
+
# Sort by filter columns for efficient predicate pushdown.
|
|
170
|
+
# This clusters identical filter values together, enabling Polars
|
|
171
|
+
# to skip row groups that don't contain the target value when
|
|
172
|
+
# filtering by selection state.
|
|
173
|
+
if self._filters:
|
|
174
|
+
sort_columns = list(self._filters.values())
|
|
175
|
+
data = data.sort(sort_columns)
|
|
176
|
+
|
|
177
|
+
# Collect schema for auto-generating column definitions if needed
|
|
178
|
+
schema = data.collect_schema()
|
|
179
|
+
|
|
180
|
+
if self._column_definitions is None:
|
|
181
|
+
# Auto-generate column definitions from schema
|
|
182
|
+
self._column_definitions = []
|
|
183
|
+
for name, dtype in zip(schema.names(), schema.dtypes()):
|
|
184
|
+
col_def: Dict[str, Any] = {
|
|
185
|
+
'field': name,
|
|
186
|
+
'title': name.replace('_', ' ').title(),
|
|
187
|
+
'headerTooltip': True,
|
|
188
|
+
}
|
|
189
|
+
# Set sorter based on data type
|
|
190
|
+
if dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64,
|
|
191
|
+
pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
|
|
192
|
+
pl.Float32, pl.Float64):
|
|
193
|
+
col_def['sorter'] = 'number'
|
|
194
|
+
col_def['hozAlign'] = 'right'
|
|
195
|
+
elif dtype == pl.Boolean:
|
|
196
|
+
col_def['sorter'] = 'boolean'
|
|
197
|
+
elif dtype in (pl.Date, pl.Datetime, pl.Time):
|
|
198
|
+
col_def['sorter'] = 'date'
|
|
199
|
+
else:
|
|
200
|
+
col_def['sorter'] = 'string'
|
|
201
|
+
|
|
202
|
+
self._column_definitions.append(col_def)
|
|
203
|
+
|
|
204
|
+
# Store column definitions in preprocessed data for serialization
|
|
205
|
+
self._preprocessed_data['column_definitions'] = self._column_definitions
|
|
206
|
+
|
|
207
|
+
# Collect data for caching (filter happens at render time)
|
|
208
|
+
# Base class will serialize this to parquet with optimized row groups
|
|
209
|
+
self._preprocessed_data['data'] = data.collect()
|
|
210
|
+
|
|
211
|
+
def _get_columns_to_select(self) -> Optional[List[str]]:
|
|
212
|
+
"""Get list of columns needed for this table."""
|
|
213
|
+
if not self._column_definitions:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
columns_to_select = [
|
|
217
|
+
col_def['field']
|
|
218
|
+
for col_def in self._column_definitions
|
|
219
|
+
if 'field' in col_def
|
|
220
|
+
]
|
|
221
|
+
# Always include index field for row identification
|
|
222
|
+
if self._index_field and self._index_field not in columns_to_select:
|
|
223
|
+
columns_to_select.append(self._index_field)
|
|
224
|
+
# Include columns needed for interactivity
|
|
225
|
+
if self._interactivity:
|
|
226
|
+
for col in self._interactivity.values():
|
|
227
|
+
if col not in columns_to_select:
|
|
228
|
+
columns_to_select.append(col)
|
|
229
|
+
# Include columns needed for filtering
|
|
230
|
+
if self._filters:
|
|
231
|
+
for col in self._filters.values():
|
|
232
|
+
if col not in columns_to_select:
|
|
233
|
+
columns_to_select.append(col)
|
|
234
|
+
|
|
235
|
+
return columns_to_select if columns_to_select else None
|
|
236
|
+
|
|
237
|
+
def _get_vue_component_name(self) -> str:
|
|
238
|
+
"""Return the Vue component name."""
|
|
239
|
+
return 'TabulatorTable'
|
|
240
|
+
|
|
241
|
+
def _get_data_key(self) -> str:
|
|
242
|
+
"""Return the key used to send primary data to Vue."""
|
|
243
|
+
return 'tableData'
|
|
244
|
+
|
|
245
|
+
def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
246
|
+
"""
|
|
247
|
+
Prepare table data for Vue component.
|
|
248
|
+
|
|
249
|
+
Returns pandas DataFrame for efficient Arrow serialization to frontend.
|
|
250
|
+
Data is filtered based on current selection state.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
state: Current selection state from StateManager
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Dict with tableData (pandas DataFrame) and _hash keys
|
|
257
|
+
"""
|
|
258
|
+
# Get columns to select for projection pushdown
|
|
259
|
+
columns = self._get_columns_to_select()
|
|
260
|
+
|
|
261
|
+
# Get cached data (DataFrame or LazyFrame)
|
|
262
|
+
data = self._preprocessed_data.get('data')
|
|
263
|
+
if data is None:
|
|
264
|
+
# Fallback to raw data if available
|
|
265
|
+
data = self._raw_data
|
|
266
|
+
|
|
267
|
+
# Ensure we have a LazyFrame for filtering
|
|
268
|
+
if isinstance(data, pl.DataFrame):
|
|
269
|
+
data = data.lazy()
|
|
270
|
+
|
|
271
|
+
# Use cached filter+collect - returns (pandas DataFrame, hash)
|
|
272
|
+
df_pandas, data_hash = filter_and_collect_cached(
|
|
273
|
+
data,
|
|
274
|
+
self._filters,
|
|
275
|
+
state,
|
|
276
|
+
columns=columns,
|
|
277
|
+
filter_defaults=self._filter_defaults,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return {'tableData': df_pandas, '_hash': data_hash}
|
|
281
|
+
|
|
282
|
+
def _get_component_args(self) -> Dict[str, Any]:
|
|
283
|
+
"""
|
|
284
|
+
Get component arguments to send to Vue.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Dict with all table configuration for Vue
|
|
288
|
+
"""
|
|
289
|
+
# Get column definitions (may have been loaded from cache)
|
|
290
|
+
column_defs = self._column_definitions
|
|
291
|
+
if column_defs is None:
|
|
292
|
+
column_defs = self._preprocessed_data.get('column_definitions', [])
|
|
293
|
+
|
|
294
|
+
args: Dict[str, Any] = {
|
|
295
|
+
'componentType': self._get_vue_component_name(),
|
|
296
|
+
'columnDefinitions': column_defs,
|
|
297
|
+
'tableIndexField': self._index_field,
|
|
298
|
+
'tableLayoutParam': self._layout,
|
|
299
|
+
'defaultRow': self._default_row,
|
|
300
|
+
# Pass interactivity so Vue knows which identifier to update on row click
|
|
301
|
+
'interactivity': self._interactivity,
|
|
302
|
+
# Pagination settings
|
|
303
|
+
'pagination': self._pagination,
|
|
304
|
+
'pageSize': self._page_size,
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if self._title:
|
|
308
|
+
args['title'] = self._title
|
|
309
|
+
|
|
310
|
+
if self._go_to_fields:
|
|
311
|
+
args['goToFields'] = self._go_to_fields
|
|
312
|
+
|
|
313
|
+
if self._initial_sort:
|
|
314
|
+
args['initialSort'] = self._initial_sort
|
|
315
|
+
|
|
316
|
+
# Add any extra config options
|
|
317
|
+
args.update(self._config)
|
|
318
|
+
|
|
319
|
+
return args
|
|
320
|
+
|
|
321
|
+
def with_column_formatter(
|
|
322
|
+
self,
|
|
323
|
+
field: str,
|
|
324
|
+
formatter: str,
|
|
325
|
+
formatter_params: Optional[Dict[str, Any]] = None
|
|
326
|
+
) -> 'Table':
|
|
327
|
+
"""
|
|
328
|
+
Add or update a column formatter.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
field: Column field name
|
|
332
|
+
formatter: Tabulator formatter name
|
|
333
|
+
formatter_params: Optional formatter parameters
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Self for method chaining
|
|
337
|
+
"""
|
|
338
|
+
for col_def in self._column_definitions or []:
|
|
339
|
+
if col_def.get('field') == field:
|
|
340
|
+
col_def['formatter'] = formatter
|
|
341
|
+
if formatter_params:
|
|
342
|
+
col_def['formatterParams'] = formatter_params
|
|
343
|
+
break
|
|
344
|
+
return self
|
|
345
|
+
|
|
346
|
+
def with_money_format(
|
|
347
|
+
self,
|
|
348
|
+
field: str,
|
|
349
|
+
precision: int = 2,
|
|
350
|
+
symbol: str = '',
|
|
351
|
+
thousand: str = ',',
|
|
352
|
+
decimal: str = '.'
|
|
353
|
+
) -> 'Table':
|
|
354
|
+
"""
|
|
355
|
+
Format a column as currency/money.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
field: Column field name
|
|
359
|
+
precision: Number of decimal places
|
|
360
|
+
symbol: Currency symbol (default: none)
|
|
361
|
+
thousand: Thousands separator
|
|
362
|
+
decimal: Decimal separator
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Self for method chaining
|
|
366
|
+
"""
|
|
367
|
+
return self.with_column_formatter(
|
|
368
|
+
field,
|
|
369
|
+
'money',
|
|
370
|
+
{
|
|
371
|
+
'precision': precision,
|
|
372
|
+
'symbol': symbol,
|
|
373
|
+
'thousand': thousand,
|
|
374
|
+
'decimal': decimal,
|
|
375
|
+
}
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
def with_progress_bar(
|
|
379
|
+
self,
|
|
380
|
+
field: str,
|
|
381
|
+
min_val: float = 0,
|
|
382
|
+
max_val: float = 100,
|
|
383
|
+
color: Optional[str] = None
|
|
384
|
+
) -> 'Table':
|
|
385
|
+
"""
|
|
386
|
+
Format a column as a progress bar.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
field: Column field name
|
|
390
|
+
min_val: Minimum value
|
|
391
|
+
max_val: Maximum value
|
|
392
|
+
color: Bar color (CSS color string)
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Self for method chaining
|
|
396
|
+
"""
|
|
397
|
+
params: Dict[str, Any] = {'min': min_val, 'max': max_val}
|
|
398
|
+
if color:
|
|
399
|
+
params['color'] = color
|
|
400
|
+
return self.with_column_formatter(field, 'progress', params)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Core infrastructure for openms_insight."""
|
|
2
|
+
|
|
3
|
+
from .base import BaseComponent
|
|
4
|
+
from .state import StateManager
|
|
5
|
+
from .registry import register_component, get_component_class
|
|
6
|
+
from .cache import CacheMissError
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BaseComponent",
|
|
10
|
+
"StateManager",
|
|
11
|
+
"register_component",
|
|
12
|
+
"get_component_class",
|
|
13
|
+
"CacheMissError",
|
|
14
|
+
]
|