openms-insight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,400 @@
1
+ """Table component using Tabulator.js."""
2
+
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import polars as pl
6
+
7
+ from ..core.base import BaseComponent
8
+ from ..core.registry import register_component
9
+ from ..preprocessing.filtering import compute_dataframe_hash, filter_and_collect_cached
10
+
11
+
12
+ @register_component("table")
13
+ class Table(BaseComponent):
14
+ """
15
+ Interactive table component using Tabulator.js.
16
+
17
+ Features:
18
+ - Column definitions with formatters, sorters, tooltips
19
+ - Row selection with cross-component linking
20
+ - Filtering dialog (categorical, numeric, text)
21
+ - Go-to functionality for navigation by field value
22
+ - CSV download
23
+ - Automatic column type detection
24
+
25
+ Tables can have separate `filters` and `interactivity` mappings:
26
+ - `filters`: Which selections filter this table's data
27
+ - `interactivity`: What selection is set when a row is clicked
28
+
29
+ Example:
30
+ # Master table - shows all data, clicking sets 'spectrum' selection
31
+ master_table = Table(
32
+ cache_id="spectra_table",
33
+ data=spectra_df,
34
+ interactivity={'spectrum': 'scan_id'},
35
+ )
36
+
37
+ # Detail table - filters by 'spectrum', clicking sets 'peak' selection
38
+ detail_table = Table(
39
+ cache_id="peaks_table",
40
+ data=peaks_df,
41
+ filters={'spectrum': 'scan_id'},
42
+ interactivity={'peak': 'mass'},
43
+ )
44
+ """
45
+
46
+ _component_type: str = "table"
47
+
48
+ def __init__(
49
+ self,
50
+ cache_id: str,
51
+ data: Optional[pl.LazyFrame] = None,
52
+ filters: Optional[Dict[str, str]] = None,
53
+ filter_defaults: Optional[Dict[str, Any]] = None,
54
+ interactivity: Optional[Dict[str, str]] = None,
55
+ cache_path: str = ".",
56
+ regenerate_cache: bool = False,
57
+ column_definitions: Optional[List[Dict[str, Any]]] = None,
58
+ title: Optional[str] = None,
59
+ index_field: str = 'id',
60
+ go_to_fields: Optional[List[str]] = None,
61
+ layout: str = 'fitDataFill',
62
+ default_row: int = 0,
63
+ initial_sort: Optional[List[Dict[str, Any]]] = None,
64
+ pagination: bool = True,
65
+ page_size: int = 100,
66
+ **kwargs
67
+ ):
68
+ """
69
+ Initialize the Table component.
70
+
71
+ Args:
72
+ cache_id: Unique identifier for this component's cache (MANDATORY).
73
+ Creates a folder {cache_path}/{cache_id}/ for cached data.
74
+ data: Polars LazyFrame with table data. Optional if cache exists.
75
+ filters: Mapping of identifier names to column names for filtering.
76
+ Example: {'spectrum': 'scan_id'}
77
+ When 'spectrum' selection exists, table shows only rows where
78
+ scan_id equals the selected value.
79
+ filter_defaults: Default values for filters when state is None.
80
+ Example: {'identification': -1}
81
+ When 'identification' selection is None, filter uses -1 instead.
82
+ interactivity: Mapping of identifier names to column names for clicks.
83
+ Example: {'peak': 'mass'}
84
+ When a row is clicked, sets 'peak' selection to that row's mass.
85
+ cache_path: Base path for cache storage. Default "." (current dir).
86
+ regenerate_cache: If True, regenerate cache even if valid cache exists.
87
+ column_definitions: List of Tabulator column definition dicts.
88
+ Each dict can contain:
89
+ - field: Column field name (required)
90
+ - title: Display title (defaults to field name)
91
+ - sorter: 'number', 'string', 'alphanum', 'boolean', 'date', etc.
92
+ - formatter: 'money', 'progress', 'star', 'tickCross', etc.
93
+ - formatterParams: Dict of formatter parameters
94
+ - headerTooltip: True or tooltip string
95
+ - hozAlign: 'left', 'center', 'right'
96
+ - width: Column width (number or string like '100px')
97
+ If None, auto-generates from data schema.
98
+ title: Table title displayed above the table
99
+ index_field: Field used as row index for selection (default: 'id')
100
+ go_to_fields: List of field names for "Go to" navigation feature
101
+ layout: Tabulator layout mode ('fitData', 'fitDataFill', 'fitColumns', etc.)
102
+ default_row: Default row to select on load (-1 for none)
103
+ initial_sort: List of sort configurations like [{'column': 'field', 'dir': 'asc'}]
104
+ pagination: Enable pagination for large tables (default: True).
105
+ Pagination dramatically improves performance for tables with
106
+ thousands of rows by only rendering one page at a time.
107
+ page_size: Number of rows per page when pagination is enabled (default: 100)
108
+ **kwargs: Additional configuration options
109
+ """
110
+ self._column_definitions = column_definitions
111
+ self._title = title
112
+ self._index_field = index_field
113
+ self._go_to_fields = go_to_fields
114
+ self._layout = layout
115
+ self._default_row = default_row
116
+ self._initial_sort = initial_sort
117
+ self._pagination = pagination
118
+ self._page_size = page_size
119
+
120
+ super().__init__(
121
+ cache_id=cache_id,
122
+ data=data,
123
+ filters=filters,
124
+ filter_defaults=filter_defaults,
125
+ interactivity=interactivity,
126
+ cache_path=cache_path,
127
+ regenerate_cache=regenerate_cache,
128
+ **kwargs
129
+ )
130
+
131
+ def _get_cache_config(self) -> Dict[str, Any]:
132
+ """
133
+ Get configuration that affects cache validity.
134
+
135
+ Returns:
136
+ Dict of config values that affect preprocessing
137
+ """
138
+ return {
139
+ 'column_definitions': self._column_definitions,
140
+ 'index_field': self._index_field,
141
+ }
142
+
143
+ def _get_row_group_size(self) -> int:
144
+ """
145
+ Get optimal row group size for parquet writing.
146
+
147
+ Filtered tables use smaller row groups (10K) for better predicate
148
+ pushdown granularity - this allows Polars to skip row groups that
149
+ don't contain the filter value. Master tables (no filters) use
150
+ larger groups (50K) since we read all data anyway.
151
+
152
+ Returns:
153
+ Number of rows per row group
154
+ """
155
+ if self._filters:
156
+ return 10_000 # Smaller groups for better filter performance
157
+ return 50_000 # Larger groups for master tables
158
+
159
+ def _preprocess(self) -> None:
160
+ """
161
+ Preprocess table data.
162
+
163
+ Sorts by filter columns for efficient predicate pushdown, then
164
+ collects the LazyFrame and generates column definitions if needed.
165
+ Data is cached by base class for fast subsequent loads.
166
+ """
167
+ data = self._raw_data
168
+
169
+ # Sort by filter columns for efficient predicate pushdown.
170
+ # This clusters identical filter values together, enabling Polars
171
+ # to skip row groups that don't contain the target value when
172
+ # filtering by selection state.
173
+ if self._filters:
174
+ sort_columns = list(self._filters.values())
175
+ data = data.sort(sort_columns)
176
+
177
+ # Collect schema for auto-generating column definitions if needed
178
+ schema = data.collect_schema()
179
+
180
+ if self._column_definitions is None:
181
+ # Auto-generate column definitions from schema
182
+ self._column_definitions = []
183
+ for name, dtype in zip(schema.names(), schema.dtypes()):
184
+ col_def: Dict[str, Any] = {
185
+ 'field': name,
186
+ 'title': name.replace('_', ' ').title(),
187
+ 'headerTooltip': True,
188
+ }
189
+ # Set sorter based on data type
190
+ if dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64,
191
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
192
+ pl.Float32, pl.Float64):
193
+ col_def['sorter'] = 'number'
194
+ col_def['hozAlign'] = 'right'
195
+ elif dtype == pl.Boolean:
196
+ col_def['sorter'] = 'boolean'
197
+ elif dtype in (pl.Date, pl.Datetime, pl.Time):
198
+ col_def['sorter'] = 'date'
199
+ else:
200
+ col_def['sorter'] = 'string'
201
+
202
+ self._column_definitions.append(col_def)
203
+
204
+ # Store column definitions in preprocessed data for serialization
205
+ self._preprocessed_data['column_definitions'] = self._column_definitions
206
+
207
+ # Collect data for caching (filter happens at render time)
208
+ # Base class will serialize this to parquet with optimized row groups
209
+ self._preprocessed_data['data'] = data.collect()
210
+
211
+ def _get_columns_to_select(self) -> Optional[List[str]]:
212
+ """Get list of columns needed for this table."""
213
+ if not self._column_definitions:
214
+ return None
215
+
216
+ columns_to_select = [
217
+ col_def['field']
218
+ for col_def in self._column_definitions
219
+ if 'field' in col_def
220
+ ]
221
+ # Always include index field for row identification
222
+ if self._index_field and self._index_field not in columns_to_select:
223
+ columns_to_select.append(self._index_field)
224
+ # Include columns needed for interactivity
225
+ if self._interactivity:
226
+ for col in self._interactivity.values():
227
+ if col not in columns_to_select:
228
+ columns_to_select.append(col)
229
+ # Include columns needed for filtering
230
+ if self._filters:
231
+ for col in self._filters.values():
232
+ if col not in columns_to_select:
233
+ columns_to_select.append(col)
234
+
235
+ return columns_to_select if columns_to_select else None
236
+
237
+ def _get_vue_component_name(self) -> str:
238
+ """Return the Vue component name."""
239
+ return 'TabulatorTable'
240
+
241
+ def _get_data_key(self) -> str:
242
+ """Return the key used to send primary data to Vue."""
243
+ return 'tableData'
244
+
245
+ def _prepare_vue_data(self, state: Dict[str, Any]) -> Dict[str, Any]:
246
+ """
247
+ Prepare table data for Vue component.
248
+
249
+ Returns pandas DataFrame for efficient Arrow serialization to frontend.
250
+ Data is filtered based on current selection state.
251
+
252
+ Args:
253
+ state: Current selection state from StateManager
254
+
255
+ Returns:
256
+ Dict with tableData (pandas DataFrame) and _hash keys
257
+ """
258
+ # Get columns to select for projection pushdown
259
+ columns = self._get_columns_to_select()
260
+
261
+ # Get cached data (DataFrame or LazyFrame)
262
+ data = self._preprocessed_data.get('data')
263
+ if data is None:
264
+ # Fallback to raw data if available
265
+ data = self._raw_data
266
+
267
+ # Ensure we have a LazyFrame for filtering
268
+ if isinstance(data, pl.DataFrame):
269
+ data = data.lazy()
270
+
271
+ # Use cached filter+collect - returns (pandas DataFrame, hash)
272
+ df_pandas, data_hash = filter_and_collect_cached(
273
+ data,
274
+ self._filters,
275
+ state,
276
+ columns=columns,
277
+ filter_defaults=self._filter_defaults,
278
+ )
279
+
280
+ return {'tableData': df_pandas, '_hash': data_hash}
281
+
282
+ def _get_component_args(self) -> Dict[str, Any]:
283
+ """
284
+ Get component arguments to send to Vue.
285
+
286
+ Returns:
287
+ Dict with all table configuration for Vue
288
+ """
289
+ # Get column definitions (may have been loaded from cache)
290
+ column_defs = self._column_definitions
291
+ if column_defs is None:
292
+ column_defs = self._preprocessed_data.get('column_definitions', [])
293
+
294
+ args: Dict[str, Any] = {
295
+ 'componentType': self._get_vue_component_name(),
296
+ 'columnDefinitions': column_defs,
297
+ 'tableIndexField': self._index_field,
298
+ 'tableLayoutParam': self._layout,
299
+ 'defaultRow': self._default_row,
300
+ # Pass interactivity so Vue knows which identifier to update on row click
301
+ 'interactivity': self._interactivity,
302
+ # Pagination settings
303
+ 'pagination': self._pagination,
304
+ 'pageSize': self._page_size,
305
+ }
306
+
307
+ if self._title:
308
+ args['title'] = self._title
309
+
310
+ if self._go_to_fields:
311
+ args['goToFields'] = self._go_to_fields
312
+
313
+ if self._initial_sort:
314
+ args['initialSort'] = self._initial_sort
315
+
316
+ # Add any extra config options
317
+ args.update(self._config)
318
+
319
+ return args
320
+
321
+ def with_column_formatter(
322
+ self,
323
+ field: str,
324
+ formatter: str,
325
+ formatter_params: Optional[Dict[str, Any]] = None
326
+ ) -> 'Table':
327
+ """
328
+ Add or update a column formatter.
329
+
330
+ Args:
331
+ field: Column field name
332
+ formatter: Tabulator formatter name
333
+ formatter_params: Optional formatter parameters
334
+
335
+ Returns:
336
+ Self for method chaining
337
+ """
338
+ for col_def in self._column_definitions or []:
339
+ if col_def.get('field') == field:
340
+ col_def['formatter'] = formatter
341
+ if formatter_params:
342
+ col_def['formatterParams'] = formatter_params
343
+ break
344
+ return self
345
+
346
+ def with_money_format(
347
+ self,
348
+ field: str,
349
+ precision: int = 2,
350
+ symbol: str = '',
351
+ thousand: str = ',',
352
+ decimal: str = '.'
353
+ ) -> 'Table':
354
+ """
355
+ Format a column as currency/money.
356
+
357
+ Args:
358
+ field: Column field name
359
+ precision: Number of decimal places
360
+ symbol: Currency symbol (default: none)
361
+ thousand: Thousands separator
362
+ decimal: Decimal separator
363
+
364
+ Returns:
365
+ Self for method chaining
366
+ """
367
+ return self.with_column_formatter(
368
+ field,
369
+ 'money',
370
+ {
371
+ 'precision': precision,
372
+ 'symbol': symbol,
373
+ 'thousand': thousand,
374
+ 'decimal': decimal,
375
+ }
376
+ )
377
+
378
+ def with_progress_bar(
379
+ self,
380
+ field: str,
381
+ min_val: float = 0,
382
+ max_val: float = 100,
383
+ color: Optional[str] = None
384
+ ) -> 'Table':
385
+ """
386
+ Format a column as a progress bar.
387
+
388
+ Args:
389
+ field: Column field name
390
+ min_val: Minimum value
391
+ max_val: Maximum value
392
+ color: Bar color (CSS color string)
393
+
394
+ Returns:
395
+ Self for method chaining
396
+ """
397
+ params: Dict[str, Any] = {'min': min_val, 'max': max_val}
398
+ if color:
399
+ params['color'] = color
400
+ return self.with_column_formatter(field, 'progress', params)
@@ -0,0 +1,14 @@
1
+ """Core infrastructure for openms_insight."""
2
+
3
+ from .base import BaseComponent
4
+ from .state import StateManager
5
+ from .registry import register_component, get_component_class
6
+ from .cache import CacheMissError
7
+
8
+ __all__ = [
9
+ "BaseComponent",
10
+ "StateManager",
11
+ "register_component",
12
+ "get_component_class",
13
+ "CacheMissError",
14
+ ]