dataframe-textual 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dataframe-textual might be problematic. Click here for more details.

@@ -0,0 +1,490 @@
1
+ """Modal screens for displaying data in tables (row details and frequency)."""
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from .data_frame_table import DataFrameTable
7
+
8
+ import polars as pl
9
+ from rich.text import Text
10
+ from textual.app import ComposeResult
11
+ from textual.coordinate import Coordinate
12
+ from textual.renderables.bar import Bar
13
+ from textual.screen import ModalScreen
14
+ from textual.widgets import DataTable
15
+
16
+ from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_float, format_row
17
+
18
+
19
+ class TableScreen(ModalScreen):
20
+ """Base class for modal screens displaying data in a DataTable.
21
+
22
+ Provides common functionality for screens that show tabular data with
23
+ keyboard shortcuts and styling.
24
+ """
25
+
26
+ DEFAULT_CSS = """
27
+ TableScreen {
28
+ align: center middle;
29
+ }
30
+
31
+ TableScreen > DataTable {
32
+ width: auto;
33
+ min-width: 20;
34
+ border: solid $primary;
35
+ }
36
+ """
37
+
38
+ def __init__(self, dftable: DataFrameTable) -> None:
39
+ """Initialize the table screen.
40
+
41
+ Sets up the base modal screen with reference to the main DataFrameTable widget
42
+ and stores the DataFrame for display.
43
+
44
+ Args:
45
+ dftable: Reference to the parent DataFrameTable widget.
46
+
47
+ Returns:
48
+ None
49
+ """
50
+ super().__init__()
51
+ self.df: pl.DataFrame = dftable.df # Polars DataFrame
52
+ self.dftable = dftable # DataFrameTable
53
+ self.thousand_separator = False # Whether to use thousand separators in numbers
54
+
55
+ def compose(self) -> ComposeResult:
56
+ """Compose the table screen widget structure.
57
+
58
+ Creates and yields a DataTable widget for displaying tabular data.
59
+ Subclasses should override to customize table configuration.
60
+
61
+ Yields:
62
+ DataTable: The table widget for this screen.
63
+ """
64
+ self.table = DataTable(zebra_stripes=True)
65
+ yield self.table
66
+
67
+ def build_table(self) -> None:
68
+ """Build the table content.
69
+
70
+ Subclasses should implement this method to populate the DataTable
71
+ with appropriate columns and rows based on the specific screen's purpose.
72
+
73
+ Returns:
74
+ None
75
+ """
76
+ raise NotImplementedError("Subclasses must implement build_table method.")
77
+
78
+ def on_key(self, event) -> None:
79
+ """Handle key press events in the table screen.
80
+
81
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
82
+ Prevents propagation of non-navigation keys to parent screens.
83
+
84
+ Args:
85
+ event: The key event object.
86
+
87
+ Returns:
88
+ None
89
+ """
90
+ if event.key in ("q", "escape"):
91
+ self.app.pop_screen()
92
+ event.stop()
93
+ elif event.key == "comma":
94
+ self.thousand_separator = not self.thousand_separator
95
+ self.build_table()
96
+ event.stop()
97
+
98
+ def _filter_or_highlight_selected_value(
99
+ self, col_name_value: tuple[str, Any] | None, action: str = "filter"
100
+ ) -> None:
101
+ """Apply filter or highlight action by the selected value.
102
+
103
+ Filters or highlights rows in the main table based on a selected value from
104
+ this table (typically frequency or row detail). Updates the main table's display
105
+ and notifies the user of the action.
106
+
107
+ Args:
108
+ col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
109
+ action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
110
+
111
+ Returns:
112
+ None
113
+ """
114
+ if col_name_value is None:
115
+ return
116
+ col_name, col_value = col_name_value
117
+
118
+ # Handle NULL values
119
+ if col_value == NULL:
120
+ # Create expression for NULL values
121
+ expr = pl.col(col_name).is_null()
122
+ value_display = "[$success]NULL[/]"
123
+ else:
124
+ # Create expression for the selected value
125
+ expr = pl.col(col_name) == col_value
126
+ value_display = f"[$success]{col_value}[/]"
127
+
128
+ matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
129
+
130
+ # Apply the action
131
+ if action == "filter":
132
+ # Update visible_rows to reflect the filter
133
+ for i in range(len(self.dftable.visible_rows)):
134
+ self.dftable.visible_rows[i] = i in matched_indices
135
+ title = "Filter"
136
+ message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
137
+ else: # action == "highlight"
138
+ # Update selected_rows to reflect the highlights
139
+ for i in range(len(self.dftable.selected_rows)):
140
+ self.dftable.selected_rows[i] = i in matched_indices
141
+ title = "Highlight"
142
+ message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
143
+
144
+ # Recreate the table display with updated data in the main app
145
+ self.dftable._setup_table()
146
+
147
+ # Dismiss the frequency screen
148
+ self.app.pop_screen()
149
+
150
+ self.notify(message, title=title)
151
+
152
+
153
+ class RowDetailScreen(TableScreen):
154
+ """Modal screen to display a single row's details."""
155
+
156
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
157
+
158
+ def __init__(self, ridx: int, dftable):
159
+ super().__init__(dftable)
160
+ self.ridx = ridx
161
+
162
+ def on_mount(self) -> None:
163
+ """Initialize the row detail screen.
164
+
165
+ Populates the table with column names and values from the selected row
166
+ of the main DataFrame. Sets the table cursor type to "row".
167
+
168
+ Returns:
169
+ None
170
+ """
171
+ self.build_table()
172
+
173
+ def build_table(self) -> None:
174
+ """Build the row detail table."""
175
+ self.table.clear(columns=True)
176
+ self.table.add_column("Column")
177
+ self.table.add_column("Value")
178
+
179
+ # Get all columns and values from the dataframe row
180
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
181
+ self.table.add_row(
182
+ *format_row([col, val], [None, dtype], apply_justify=False, thousand_separator=self.thousand_separator)
183
+ )
184
+
185
+ self.table.cursor_type = "row"
186
+
187
+ def on_key(self, event) -> None:
188
+ """Handle key press events in the row detail screen.
189
+
190
+ Supports 'v' for filtering and '"' for highlighting the main table
191
+ by the value in the selected row.
192
+
193
+ Args:
194
+ event: The key event object.
195
+
196
+ Returns:
197
+ None
198
+ """
199
+ if event.key == "v":
200
+ # Filter the main table by the selected value
201
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
202
+ event.stop()
203
+ elif event.key == "quotation_mark": # '"'
204
+ # Highlight the main table by the selected value
205
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
206
+ event.stop()
207
+ elif event.key == "comma":
208
+ event.stop()
209
+
210
+ def _get_col_name_value(self) -> tuple[str, Any] | None:
211
+ row_idx = self.table.cursor_row
212
+ if row_idx >= len(self.df.columns):
213
+ return None # Invalid row
214
+
215
+ col_name = self.df.columns[row_idx]
216
+ col_value = self.df.item(self.ridx, row_idx)
217
+
218
+ return col_name, col_value
219
+
220
+
221
+ class StatisticsScreen(TableScreen):
222
+ """Modal screen to display statistics for a column or entire dataframe."""
223
+
224
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
225
+
226
+ def __init__(self, dftable: DataFrameTable, col_idx: int | None = None):
227
+ super().__init__(dftable)
228
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
229
+
230
+ def on_mount(self) -> None:
231
+ """Create the statistics table."""
232
+ self.build_table()
233
+
234
+ def build_table(self) -> None:
235
+ """Build the statistics table."""
236
+ self.table.clear(columns=True)
237
+
238
+ if self.col_idx is None:
239
+ # Dataframe statistics
240
+ self._build_dataframe_stats()
241
+ else:
242
+ # Column statistics
243
+ self._build_column_stats()
244
+
245
+ def _build_column_stats(self) -> None:
246
+ """Build statistics for a single column."""
247
+ col_name = self.df.columns[self.col_idx]
248
+ lf = self.df.lazy()
249
+
250
+ # Apply only to visible rows
251
+ if False in self.dftable.visible_rows:
252
+ lf = lf.filter(self.dftable.visible_rows)
253
+
254
+ # Get column statistics
255
+ stats_df = lf.select(pl.col(col_name)).collect().describe()
256
+ if len(stats_df) == 0:
257
+ return
258
+
259
+ col_dtype = stats_df.dtypes[1] # 'value' column
260
+ dc = DtypeConfig(col_dtype)
261
+
262
+ # Add statistics label column
263
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
264
+
265
+ # Add value column with appropriate styling
266
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
267
+
268
+ # Add rows
269
+ for row in stats_df.rows():
270
+ stat_label, stat_value = row
271
+ value = stat_value
272
+ if stat_value is None:
273
+ value = NULL_DISPLAY
274
+ elif dc.gtype == "integer" and self.thousand_separator:
275
+ value = f"{stat_value:,}"
276
+ elif dc.gtype == "float":
277
+ value = format_float(stat_value, self.thousand_separator)
278
+ else:
279
+ value = str(stat_value)
280
+
281
+ self.table.add_row(
282
+ Text(stat_label, justify="left"),
283
+ Text(value, style=dc.style, justify=dc.justify),
284
+ )
285
+
286
+ def _build_dataframe_stats(self) -> None:
287
+ """Build statistics for the entire dataframe."""
288
+ lf = self.df.lazy()
289
+
290
+ # Apply only to visible rows
291
+ if False in self.dftable.visible_rows:
292
+ lf = lf.filter(self.dftable.visible_rows)
293
+
294
+ # Get dataframe statistics
295
+ stats_df = lf.collect().describe()
296
+
297
+ # Add columns for each dataframe column with appropriate styling
298
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
299
+ if idx == 0:
300
+ # Add statistics label column (first column, no styling)
301
+ self.table.add_column("Statistic", key="statistic")
302
+ continue
303
+
304
+ dc = DtypeConfig(col_dtype)
305
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
306
+
307
+ # Add rows
308
+ for row in stats_df.rows():
309
+ formatted_row = []
310
+
311
+ # Format remaining values with appropriate styling
312
+ for idx, stat_value in enumerate(row):
313
+ # First element is the statistic label
314
+ if idx == 0:
315
+ formatted_row.append(stat_value)
316
+ continue
317
+
318
+ col_dtype = stats_df.dtypes[idx]
319
+ dc = DtypeConfig(col_dtype)
320
+
321
+ value = stat_value
322
+ if stat_value is None:
323
+ value = NULL_DISPLAY
324
+ elif dc.gtype == "integer" and self.thousand_separator:
325
+ value = f"{stat_value:,}"
326
+ elif dc.gtype == "float":
327
+ value = format_float(stat_value, self.thousand_separator)
328
+ else:
329
+ value = str(stat_value)
330
+
331
+ formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
332
+
333
+ self.table.add_row(*formatted_row)
334
+
335
+
336
+ class FrequencyScreen(TableScreen):
337
+ """Modal screen to display frequency of values in a column."""
338
+
339
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "FrequencyScreen")
340
+
341
+ def __init__(self, col_idx: int, dftable: DataFrameTable):
342
+ super().__init__(dftable)
343
+ self.col_idx = col_idx
344
+ self.sorted_columns = {
345
+ 1: True, # Count
346
+ }
347
+ self.df: pl.DataFrame = (
348
+ dftable.df[dftable.df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
349
+ )
350
+
351
+ def on_mount(self) -> None:
352
+ """Create the frequency table."""
353
+ self.build_table()
354
+
355
+ def on_key(self, event):
356
+ if event.key == "left_square_bracket": # '['
357
+ # Sort by current column in ascending order
358
+ self._sort_by_column(descending=False)
359
+ event.stop()
360
+ elif event.key == "right_square_bracket": # ']'
361
+ # Sort by current column in descending order
362
+ self._sort_by_column(descending=True)
363
+ event.stop()
364
+ elif event.key == "v":
365
+ # Filter the main table by the selected value
366
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
367
+ event.stop()
368
+ elif event.key == "quotation_mark": # '"'
369
+ # Highlight the main table by the selected value
370
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
371
+ event.stop()
372
+
373
+ def build_table(self) -> None:
374
+ """Build the frequency table."""
375
+ self.table.clear(columns=True)
376
+
377
+ # Create frequency table
378
+ column = self.dftable.df.columns[self.col_idx]
379
+ dtype = self.dftable.df.dtypes[self.col_idx]
380
+ dc = DtypeConfig(dtype)
381
+
382
+ # Calculate frequencies using Polars
383
+ total_count = len(self.dftable.df)
384
+
385
+ # Add column headers with sort indicators
386
+ columns = [
387
+ (column, "Value", 0),
388
+ ("Count", "Count", 1),
389
+ ("%", "%", 2),
390
+ ("Histogram", "Histogram", 3),
391
+ ]
392
+
393
+ for display_name, key, col_idx_num in columns:
394
+ # Check if this column is sorted and add indicator
395
+ if col_idx_num in self.sorted_columns:
396
+ descending = self.sorted_columns[col_idx_num]
397
+ sort_indicator = " ▼" if descending else " ▲"
398
+ header_text = display_name + sort_indicator
399
+ else:
400
+ header_text = display_name
401
+
402
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
403
+ self.table.add_column(Text(header_text, justify=justify), key=key)
404
+
405
+ # Get style config for Int64 and Float64
406
+ ds_int = DtypeConfig(pl.Int64)
407
+ ds_float = DtypeConfig(pl.Float64)
408
+
409
+ # Add rows to the frequency table
410
+ for row_idx, row in enumerate(self.df.rows()):
411
+ column, count = row
412
+ percentage = (count / total_count) * 100
413
+
414
+ if column is None:
415
+ value = NULL_DISPLAY
416
+ elif dc.gtype == "integer" and self.thousand_separator:
417
+ value = f"{column:,}"
418
+ elif dc.gtype == "float":
419
+ value = format_float(column, self.thousand_separator)
420
+ else:
421
+ value = str(column)
422
+
423
+ self.table.add_row(
424
+ Text(value, style=dc.style, justify=dc.justify),
425
+ Text(
426
+ f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
427
+ ),
428
+ Text(
429
+ f"{percentage:,.3f}" if self.thousand_separator else f"{percentage:.3f}",
430
+ style=ds_float.style,
431
+ justify=ds_float.justify,
432
+ ),
433
+ Bar(
434
+ highlight_range=(0.0, percentage / 100 * 10),
435
+ width=10,
436
+ ),
437
+ key=str(row_idx + 1),
438
+ )
439
+
440
+ # Add a total row
441
+ self.table.add_row(
442
+ Text("Total", style="bold", justify=dc.justify),
443
+ Text(f"{total_count:,}", style="bold", justify="right"),
444
+ Text("100.00", style="bold", justify="right"),
445
+ Bar(
446
+ highlight_range=(0.0, 10),
447
+ width=10,
448
+ ),
449
+ key="total",
450
+ )
451
+
452
+ def _sort_by_column(self, descending: bool) -> None:
453
+ """Sort the dataframe by the selected column and refresh the main table."""
454
+ row_idx, col_idx = self.table.cursor_coordinate
455
+ col_sort = col_idx if col_idx == 0 else 1
456
+
457
+ sort_dir = self.sorted_columns.get(col_sort)
458
+ if sort_dir is not None:
459
+ # If already sorted in the same direction, do nothing
460
+ if sort_dir == descending:
461
+ self.notify("Already sorted in that order", title="Sort", severity="warning")
462
+ return
463
+
464
+ self.sorted_columns.clear()
465
+ self.sorted_columns[col_sort] = descending
466
+
467
+ col_name = self.df.columns[col_sort]
468
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
469
+
470
+ # Rebuild the frequency table
471
+ self.table.clear(columns=True)
472
+ self.build_table()
473
+
474
+ self.table.move_cursor(row=row_idx, column=col_idx)
475
+
476
+ # order = "desc" if descending else "asc"
477
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
478
+
479
+ def _get_col_name_value(self) -> tuple[str, str] | None:
480
+ row_idx = self.table.cursor_row
481
+ if row_idx >= len(self.df[:, 0]): # first column
482
+ return None # Skip the last `Total` row
483
+
484
+ col_name = self.dftable.df.columns[self.col_idx]
485
+ col_dtype = self.dftable.df.dtypes[self.col_idx]
486
+
487
+ cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
488
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
489
+
490
+ return col_name, col_value