dataframe-textual 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,527 @@
1
+ """Modal screens for displaying data in tables (row details and frequency)."""
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from .data_frame_table import DataFrameTable
7
+
8
+ import polars as pl
9
+ from rich.text import Text
10
+ from textual.app import ComposeResult
11
+ from textual.coordinate import Coordinate
12
+ from textual.renderables.bar import Bar
13
+ from textual.screen import ModalScreen
14
+ from textual.widgets import DataTable
15
+
16
+ from .common import NULL, NULL_DISPLAY, RID, DtypeConfig, format_float
17
+
18
+
19
+ class TableScreen(ModalScreen):
20
+ """Base class for modal screens displaying data in a DataTable.
21
+
22
+ Provides common functionality for screens that show tabular data with
23
+ keyboard shortcuts and styling.
24
+ """
25
+
26
+ DEFAULT_CSS = """
27
+ TableScreen {
28
+ align: center middle;
29
+ }
30
+
31
+ TableScreen > DataTable {
32
+ width: auto;
33
+ height: auto;
34
+ border: solid $primary;
35
+ max-width: 100%;
36
+ overflow: auto;
37
+ }
38
+ """
39
+
40
+ def __init__(self, dftable: "DataFrameTable") -> None:
41
+ """Initialize the table screen.
42
+
43
+ Sets up the base modal screen with reference to the main DataFrameTable widget
44
+ and stores the DataFrame for display.
45
+
46
+ Args:
47
+ dftable: Reference to the parent DataFrameTable widget.
48
+ """
49
+ super().__init__()
50
+ self.dftable = dftable # DataFrameTable
51
+ self.df: pl.DataFrame = dftable.df # Polars DataFrame
52
+ self.thousand_separator = False # Whether to use thousand separators in numbers
53
+
54
+ def compose(self) -> ComposeResult:
55
+ """Compose the table screen widget structure.
56
+
57
+ Creates and yields a DataTable widget for displaying tabular data.
58
+ Subclasses should override to customize table configuration.
59
+
60
+ Yields:
61
+ DataTable: The table widget for this screen.
62
+ """
63
+ self.table = DataTable(zebra_stripes=True)
64
+ yield self.table
65
+
66
+ def build_table(self) -> None:
67
+ """Build the table content.
68
+
69
+ Subclasses should implement this method to populate the DataTable
70
+ with appropriate columns and rows based on the specific screen's purpose.
71
+ """
72
+ raise NotImplementedError("Subclasses must implement build_table method.")
73
+
74
+ def on_key(self, event) -> None:
75
+ """Handle key press events in the table screen.
76
+
77
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
78
+ Prevents propagation of non-navigation keys to parent screens.
79
+
80
+ Args:
81
+ event: The key event object.
82
+ """
83
+ if event.key in ("q", "escape"):
84
+ self.app.pop_screen()
85
+ event.stop()
86
+ elif event.key == "comma":
87
+ self.thousand_separator = not self.thousand_separator
88
+ self.build_table()
89
+ event.stop()
90
+
91
+ def filter_or_view_selected_value(self, cidx_name_value: tuple[int, str, Any] | None, action: str = "view") -> None:
92
+ """Apply filter or view action by the selected value.
93
+
94
+ Filters or views rows in the main table based on a selected value from
95
+ this table (typically frequency or row detail). Updates the main table's display
96
+ and notifies the user of the action.
97
+
98
+ Args:
99
+ col_name_value: Tuple of (column_name, column_value) to filter/view by, or None.
100
+ action: Either "filter" to hide non-matching rows, or "view" to show matching rows. Defaults to "view".
101
+ """
102
+ if cidx_name_value is None:
103
+ return
104
+ cidx, col_name, col_value = cidx_name_value
105
+ self.log(f"Filtering or viewing by `{col_name} == {col_value}`")
106
+
107
+ # Handle NULL values
108
+ if col_value == NULL:
109
+ # Create expression for NULL values
110
+ expr = pl.col(col_name).is_null()
111
+ value_display = f"[$success]{NULL_DISPLAY}[/]"
112
+ else:
113
+ # Create expression for the selected value
114
+ expr = pl.col(col_name) == col_value
115
+ value_display = f"[$success]{col_value}[/]"
116
+
117
+ df_filtered = self.dftable.df.lazy().filter(expr).collect()
118
+ self.log(f"Filtered dataframe has {len(df_filtered)} rows")
119
+
120
+ ok_rids = set(df_filtered[RID].to_list())
121
+ if not ok_rids:
122
+ self.notify(
123
+ f"No matches found for [$warning]{col_name}[/] == {value_display}",
124
+ title="No Matches",
125
+ severity="warning",
126
+ )
127
+ return
128
+
129
+ # Action filter
130
+ if action == "filter":
131
+ self.dftable.do_filter_rows()
132
+
133
+ # Action view
134
+ else:
135
+ self.dftable.view_rows((expr, cidx, False, True))
136
+
137
+ # Dismiss the frequency screen
138
+ self.app.pop_screen()
139
+
140
+
141
+ class RowDetailScreen(TableScreen):
142
+ """Modal screen to display a single row's details."""
143
+
144
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
145
+
146
+ def __init__(self, ridx: int, dftable):
147
+ super().__init__(dftable)
148
+ self.ridx = ridx
149
+
150
+ def on_mount(self) -> None:
151
+ """Initialize the row detail screen.
152
+
153
+ Populates the table with column names and values from the selected row
154
+ of the main DataFrame. Sets the table cursor type to "row".
155
+ """
156
+ self.build_table()
157
+
158
+ def build_table(self) -> None:
159
+ """Build the row detail table."""
160
+ self.table.clear(columns=True)
161
+ self.table.add_column("Column")
162
+ self.table.add_column("Value")
163
+
164
+ # Get all columns and values from the dataframe row
165
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
166
+ if col in self.dftable.hidden_columns or col == RID:
167
+ continue # Skip RID column
168
+ formatted_row = []
169
+ formatted_row.append(col)
170
+
171
+ dc = DtypeConfig(dtype)
172
+ formatted_row.append(dc.format(val, justify="", thousand_separator=self.thousand_separator))
173
+ self.table.add_row(*formatted_row)
174
+
175
+ self.table.cursor_type = "row"
176
+
177
+ def on_key(self, event) -> None:
178
+ """Handle key press events in the row detail screen.
179
+
180
+ Supports 'v' for filtering and '"' for highlighting the main table
181
+ by the value in the selected row.
182
+
183
+ Args:
184
+ event: The key event object.
185
+ """
186
+ if event.key == "v":
187
+ # View the main table by the selected value
188
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="view")
189
+ event.stop()
190
+ elif event.key == "quotation_mark": # '"'
191
+ # Filter the main table by the selected value
192
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="filter")
193
+ event.stop()
194
+ elif event.key == "right_curly_bracket": # '}'
195
+ # Move to the next row
196
+ ridx = self.ridx + 1
197
+ if ridx < len(self.df):
198
+ self.ridx = ridx
199
+ self.dftable.move_cursor_to(self.ridx)
200
+ self.build_table()
201
+ event.stop()
202
+ elif event.key == "left_curly_bracket": # '{'
203
+ # Move to the previous row
204
+ ridx = self.ridx - 1
205
+ if ridx >= 0:
206
+ self.ridx = ridx
207
+ self.dftable.move_cursor_to(self.ridx)
208
+ self.build_table()
209
+ event.stop()
210
+
211
+ def get_cidx_name_value(self) -> tuple[int, str, Any] | None:
212
+ cidx = self.table.cursor_row
213
+ if cidx >= len(self.df.columns):
214
+ return None # Invalid row
215
+
216
+ col_name = self.df.columns[cidx]
217
+ col_value = self.df.item(self.ridx, cidx)
218
+
219
+ return cidx, col_name, col_value
220
+
221
+
222
+ class StatisticsScreen(TableScreen):
223
+ """Modal screen to display statistics for a column or entire dataframe."""
224
+
225
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
226
+
227
+ def __init__(self, dftable: "DataFrameTable", col_idx: int | None = None):
228
+ super().__init__(dftable)
229
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
230
+
231
+ def on_mount(self) -> None:
232
+ """Create the statistics table."""
233
+ self.build_table()
234
+
235
+ def build_table(self) -> None:
236
+ """Build the statistics table."""
237
+ self.table.clear(columns=True)
238
+
239
+ if self.col_idx is None:
240
+ # Dataframe statistics
241
+ self.build_dataframe_stats()
242
+ self.table.cursor_type = "column"
243
+ else:
244
+ # Column statistics
245
+ self.build_column_stats()
246
+ self.table.cursor_type = "row"
247
+
248
+ def build_column_stats(self) -> None:
249
+ """Build statistics for a single column."""
250
+ col_name = self.df.columns[self.col_idx]
251
+ lf = self.df.lazy()
252
+
253
+ # Get column statistics
254
+ stats_df = lf.select(pl.col(col_name)).describe()
255
+ if len(stats_df) == 0:
256
+ return
257
+
258
+ col_dtype = stats_df.dtypes[1] # 'value' column
259
+ dc = DtypeConfig(col_dtype)
260
+
261
+ # Add statistics label column
262
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
263
+
264
+ # Add value column with appropriate styling
265
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
266
+
267
+ # Add rows
268
+ for row in stats_df.rows():
269
+ stat_label, stat_value = row
270
+ self.table.add_row(
271
+ stat_label,
272
+ dc.format(stat_value, thousand_separator=self.thousand_separator),
273
+ )
274
+
275
+ def build_dataframe_stats(self) -> None:
276
+ """Build statistics for the entire dataframe."""
277
+ lf = self.df.lazy().select(pl.exclude(RID))
278
+
279
+ # Apply only to non-hidden columns
280
+ if self.dftable.hidden_columns:
281
+ lf = lf.select(pl.exclude(self.dftable.hidden_columns))
282
+
283
+ # Get dataframe statistics
284
+ stats_df = lf.describe()
285
+
286
+ # Add columns for each dataframe column with appropriate styling
287
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
288
+ if idx == 0:
289
+ # Add statistics label column (first column, no styling)
290
+ self.table.add_column("Statistic", key="statistic")
291
+ continue
292
+
293
+ dc = DtypeConfig(col_dtype)
294
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
295
+
296
+ # Add rows
297
+ for row in stats_df.rows():
298
+ formatted_row = []
299
+
300
+ # Format remaining values with appropriate styling
301
+ for idx, stat_value in enumerate(row):
302
+ # First element is the statistic label
303
+ if idx == 0:
304
+ formatted_row.append(stat_value)
305
+ continue
306
+
307
+ col_dtype = stats_df.dtypes[idx]
308
+ dc = DtypeConfig(col_dtype)
309
+
310
+ formatted_row.append(dc.format(stat_value, thousand_separator=self.thousand_separator))
311
+
312
+ self.table.add_row(*formatted_row)
313
+
314
+
315
+ class FrequencyScreen(TableScreen):
316
+ """Modal screen to display frequency of values in a column."""
317
+
318
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "FrequencyScreen")
319
+
320
+ def __init__(self, cidx: int, dftable: "DataFrameTable") -> None:
321
+ super().__init__(dftable)
322
+ self.cidx = cidx
323
+ self.sorted_columns = {1: True} # Count sort by default
324
+ self.total_count = len(dftable.df)
325
+
326
+ col = dftable.df.columns[self.cidx]
327
+ self.df: pl.DataFrame = dftable.df.lazy().select(pl.col(col).value_counts(sort=True)).unnest(col).collect()
328
+
329
+ def on_mount(self) -> None:
330
+ """Create the frequency table."""
331
+ self.build_table()
332
+
333
+ def on_key(self, event):
334
+ if event.key == "left_square_bracket": # '['
335
+ # Sort by current column in ascending order
336
+ self.sort_by_column(descending=False)
337
+ event.stop()
338
+ elif event.key == "right_square_bracket": # ']'
339
+ # Sort by current column in descending order
340
+ self.sort_by_column(descending=True)
341
+ event.stop()
342
+ elif event.key == "v":
343
+ # Filter the main table by the selected value
344
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="view")
345
+ event.stop()
346
+ elif event.key == "quotation_mark": # '"'
347
+ # Highlight the main table by the selected value
348
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="filter")
349
+ event.stop()
350
+
351
+ def build_table(self) -> None:
352
+ """Build the frequency table."""
353
+ self.table.clear(columns=True)
354
+
355
+ # Create frequency table
356
+ column = self.dftable.df.columns[self.cidx]
357
+ dtype = self.dftable.df.dtypes[self.cidx]
358
+ dc = DtypeConfig(dtype)
359
+
360
+ # Add column headers with sort indicators
361
+ columns = [
362
+ (column, "Value", 0),
363
+ ("Count", "Count", 1),
364
+ ("%", "%", 2),
365
+ ("Histogram", "Histogram", 3),
366
+ ]
367
+
368
+ for display_name, key, col_idx_num in columns:
369
+ # Check if this column is sorted and add indicator
370
+ if col_idx_num in self.sorted_columns:
371
+ descending = self.sorted_columns[col_idx_num]
372
+ sort_indicator = " ▼" if descending else " ▲"
373
+ header_text = display_name + sort_indicator
374
+ else:
375
+ header_text = display_name
376
+
377
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
378
+ self.table.add_column(Text(header_text, justify=justify), key=key)
379
+
380
+ # Get style config for Int64 and Float64
381
+ dc_int = DtypeConfig(pl.Int64)
382
+ dc_float = DtypeConfig(pl.Float64)
383
+
384
+ # Add rows to the frequency table
385
+ for row_idx, row in enumerate(self.df.rows()):
386
+ column, count = row
387
+ percentage = (count / self.total_count) * 100
388
+
389
+ self.table.add_row(
390
+ dc.format(column),
391
+ dc_int.format(count, thousand_separator=self.thousand_separator),
392
+ dc_float.format(percentage, thousand_separator=self.thousand_separator),
393
+ Bar(
394
+ highlight_range=(0.0, percentage / 100 * 10),
395
+ width=10,
396
+ ),
397
+ key=str(row_idx + 1),
398
+ )
399
+
400
+ # Add a total row
401
+ self.table.add_row(
402
+ Text("Total", style="bold", justify=dc.justify),
403
+ Text(
404
+ f"{self.total_count:,}" if self.thousand_separator else str(self.total_count),
405
+ style="bold",
406
+ justify="right",
407
+ ),
408
+ Text(
409
+ format_float(100.0, self.thousand_separator, precision=-2 if len(self.df) > 1 else 2),
410
+ style="bold",
411
+ justify="right",
412
+ ),
413
+ Bar(
414
+ highlight_range=(0.0, 10),
415
+ width=10,
416
+ ),
417
+ key="total",
418
+ )
419
+
420
+ def sort_by_column(self, descending: bool) -> None:
421
+ """Sort the dataframe by the selected column and refresh the main table."""
422
+ row_idx, col_idx = self.table.cursor_coordinate
423
+ col_sort = col_idx if col_idx == 0 else 1
424
+
425
+ if self.sorted_columns.get(col_sort) == descending:
426
+ # If already sorted in the same direction, do nothing
427
+ # self.notify("Already sorted in that order", title="Sort", severity="warning")
428
+ return
429
+
430
+ self.sorted_columns.clear()
431
+ self.sorted_columns[col_sort] = descending
432
+
433
+ col_name = self.df.columns[col_sort]
434
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
435
+
436
+ # Rebuild the frequency table
437
+ self.table.clear(columns=True)
438
+ self.build_table()
439
+
440
+ self.table.move_cursor(row=row_idx, column=col_idx)
441
+
442
+ # order = "desc" if descending else "asc"
443
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
444
+
445
+ def get_cidx_name_value(self) -> tuple[str, str, str] | None:
446
+ row_idx = self.table.cursor_row
447
+ if row_idx >= len(self.df[:, 0]): # first column
448
+ return None # Skip the last `Total` row
449
+
450
+ col_name = self.dftable.df.columns[self.cidx]
451
+ col_dtype = self.dftable.df.dtypes[self.cidx]
452
+
453
+ cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
454
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
455
+
456
+ return self.cidx, col_name, col_value
457
+
458
+
459
+ class MetaShape(TableScreen):
460
+ """Modal screen to display metadata about the dataframe."""
461
+
462
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "MetadataScreen")
463
+
464
+ def on_mount(self) -> None:
465
+ """Initialize the metadata screen.
466
+
467
+ Populates the table with metadata information about the dataframe,
468
+ including row and column counts.
469
+ """
470
+ self.build_table()
471
+
472
+ def build_table(self) -> None:
473
+ """Build the metadata table."""
474
+ self.table.clear(columns=True)
475
+ self.table.add_column("")
476
+ self.table.add_column(Text("Count", justify="right"))
477
+
478
+ # Get shape information
479
+ num_rows, num_cols = self.df.shape if self.dftable.df_view is None else self.dftable.df_view.shape
480
+ num_cols -= 1 # Exclude RID column
481
+ dc_int = DtypeConfig(pl.Int64)
482
+
483
+ # Add rows to the table
484
+ self.table.add_row("Row", dc_int.format(num_rows, thousand_separator=self.thousand_separator))
485
+ self.table.add_row("Column", dc_int.format(num_cols, thousand_separator=self.thousand_separator))
486
+
487
+ self.table.cursor_type = "none"
488
+
489
+
490
+ class MetaColumnScreen(TableScreen):
491
+ """Modal screen to display metadata about the columns in the dataframe."""
492
+
493
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "MetaColumnScreen")
494
+
495
+ def on_mount(self) -> None:
496
+ """Initialize the column metadata screen.
497
+
498
+ Populates the table with information about each column in the dataframe,
499
+ including ID (1-based index), Name, and Type.
500
+ """
501
+ self.build_table()
502
+
503
+ def build_table(self) -> None:
504
+ """Build the column metadata table."""
505
+ self.table.clear(columns=True)
506
+ self.table.add_column("Column")
507
+ self.table.add_column("Name")
508
+ self.table.add_column("Type")
509
+
510
+ # Get schema information
511
+ schema = self.df.schema
512
+ dc_int = DtypeConfig(pl.Int64)
513
+ dc_str = DtypeConfig(pl.String)
514
+
515
+ # Add a row for each column
516
+ for idx, (col_name, col_type) in enumerate(schema.items(), 1):
517
+ if col_name == RID:
518
+ continue # Skip RID column
519
+
520
+ dc = DtypeConfig(col_type)
521
+ self.table.add_row(
522
+ dc_int.format(idx, thousand_separator=self.thousand_separator),
523
+ col_name,
524
+ dc_str.format("Datetime" if str(col_type).startswith("Datetime") else col_type, style=dc.style),
525
+ )
526
+
527
+ self.table.cursor_type = "none"