dataframe-textual 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
1
  """Modal screens for displaying data in tables (row details and frequency)."""
2
2
 
3
- from typing import Any
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ if TYPE_CHECKING:
6
+ from .data_frame_table import DataFrameTable
4
7
 
5
8
  import polars as pl
6
9
  from rich.text import Text
@@ -10,7 +13,7 @@ from textual.renderables.bar import Bar
10
13
  from textual.screen import ModalScreen
11
14
  from textual.widgets import DataTable
12
15
 
13
- from .common import BOOLS, DtypeConfig, _format_row
16
+ from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_row
14
17
 
15
18
 
16
19
  class TableScreen(ModalScreen):
@@ -28,86 +31,118 @@ class TableScreen(ModalScreen):
28
31
  TableScreen > DataTable {
29
32
  width: auto;
30
33
  min-width: 30;
31
- height: auto;
32
34
  border: solid $primary;
33
35
  }
34
36
  """
35
37
 
36
- def __init__(self, df: pl.DataFrame, id: str | None = None):
38
+ def __init__(self, dftable: DataFrameTable) -> None:
39
+ """Initialize the table screen.
40
+
41
+ Sets up the base modal screen with reference to the main DataFrameTable widget
42
+ and stores the DataFrame for display.
43
+
44
+ Args:
45
+ dftable: Reference to the parent DataFrameTable widget.
46
+
47
+ Returns:
48
+ None
49
+ """
37
50
  super().__init__()
38
- self.df = df
39
- self.id = id
51
+ self.df: pl.DataFrame = dftable.df # Polars DataFrame
52
+ self.dftable = dftable # DataFrameTable
53
+ self.thousand_separator = False # Whether to use thousand separators in numbers
40
54
 
41
55
  def compose(self) -> ComposeResult:
42
- """Create the table. Must be overridden by subclasses."""
43
- self.table = DataTable(zebra_stripes=True, id=self.id)
56
+ """Compose the table screen widget structure.
57
+
58
+ Creates and yields a DataTable widget for displaying tabular data.
59
+ Subclasses should override to customize table configuration.
60
+
61
+ Yields:
62
+ DataTable: The table widget for this screen.
63
+ """
64
+ self.table = DataTable(zebra_stripes=True)
44
65
  yield self.table
45
66
 
46
- def on_key(self, event):
67
+ def build_table(self) -> None:
68
+ """Build the table content.
69
+
70
+ Subclasses should implement this method to populate the DataTable
71
+ with appropriate columns and rows based on the specific screen's purpose.
72
+
73
+ Returns:
74
+ None
75
+ """
76
+ raise NotImplementedError("Subclasses must implement build_table method.")
77
+
78
+ def on_key(self, event) -> None:
79
+ """Handle key press events in the table screen.
80
+
81
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
82
+ Prevents propagation of non-navigation keys to parent screens.
83
+
84
+ Args:
85
+ event: The key event object.
86
+
87
+ Returns:
88
+ None
89
+ """
47
90
  if event.key in ("q", "escape"):
48
91
  self.app.pop_screen()
49
92
  event.stop()
50
- # Prevent key events from propagating to parent screen,
51
- # except for the following default key bindings for DataTable
52
- elif event.key not in (
53
- "up",
54
- "down",
55
- "right",
56
- "left",
57
- "pageup",
58
- "pagedown",
59
- "ctrl+home",
60
- "ctrl+end",
61
- "home",
62
- "end",
63
- ):
93
+ elif event.key == "comma":
94
+ self.thousand_separator = not self.thousand_separator
95
+ self.build_table()
64
96
  event.stop()
65
97
 
66
98
  def _filter_or_highlight_selected_value(
67
- self, col_name_value: tuple[str, str] | None, action: str = "filter"
99
+ self, col_name_value: tuple[str, Any] | None, action: str = "filter"
68
100
  ) -> None:
69
- """Apply filter or highlight action by the selected value from the frequency table.
101
+ """Apply filter or highlight action by the selected value.
102
+
103
+ Filters or highlights rows in the main table based on a selected value from
104
+ this table (typically frequency or row detail). Updates the main table's display
105
+ and notifies the user of the action.
70
106
 
71
107
  Args:
72
- col_name: The name of the column to filter/highlight.
73
- col_value: The value to filter/highlight by.
74
- action: Either "filter" to filter visible rows, or "highlight" to select matching rows.
108
+ col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
109
+ action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
110
+
111
+ Returns:
112
+ None
75
113
  """
76
114
  if col_name_value is None:
77
115
  return
78
116
  col_name, col_value = col_name_value
79
117
 
80
118
  # Handle NULL values
81
- if col_value == "-":
119
+ if col_value == NULL:
82
120
  # Create expression for NULL values
83
121
  expr = pl.col(col_name).is_null()
84
- value_display = "[on $primary]NULL[/]"
122
+ value_display = "[$success]NULL[/]"
85
123
  else:
86
124
  # Create expression for the selected value
87
125
  expr = pl.col(col_name) == col_value
88
- value_display = f"[on $primary]{col_value}[/]"
126
+ value_display = f"[$success]{col_value}[/]"
89
127
 
90
- app = self.app
91
- matched_indices = set(
92
- app.df.with_row_index("__rid__").filter(expr)["__rid__"].to_list()
93
- )
128
+ matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
94
129
 
95
130
  # Apply the action
96
131
  if action == "filter":
97
132
  # Update visible_rows to reflect the filter
98
- for i in range(len(app.visible_rows)):
99
- app.visible_rows[i] = i in matched_indices
133
+ for i in range(len(self.dftable.visible_rows)):
134
+ self.dftable.visible_rows[i] = i in matched_indices
100
135
  title = "Filter"
101
- message = f"Filtered by [on $primary]{col_name}[/] = {value_display}"
136
+ message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
102
137
  else: # action == "highlight"
103
138
  # Update selected_rows to reflect the highlights
104
- for i in range(len(app.selected_rows)):
105
- app.selected_rows[i] = i in matched_indices
139
+ for i in range(len(self.dftable.selected_rows)):
140
+ self.dftable.selected_rows[i] = i in matched_indices
106
141
  title = "Highlight"
107
- message = f"Highlighted [on $primary]{col_name}[/] = {value_display}"
142
+ message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
108
143
 
109
144
  # Recreate the table display with updated data in the main app
110
- app._setup_table()
145
+ self.dftable._setup_table()
111
146
 
112
147
  # Dismiss the frequency screen
113
148
  self.app.pop_screen()
@@ -120,37 +155,56 @@ class RowDetailScreen(TableScreen):
120
155
 
121
156
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
122
157
 
123
- def __init__(self, row_idx: int, df: pl.DataFrame):
124
- super().__init__(df, id="row-detail-table")
125
- self.row_idx = row_idx
158
+ def __init__(self, ridx: int, dftable):
159
+ super().__init__(dftable)
160
+ self.ridx = ridx
126
161
 
127
162
  def on_mount(self) -> None:
128
- """Create the detail table."""
163
+ """Initialize the row detail screen.
164
+
165
+ Populates the table with column names and values from the selected row
166
+ of the main DataFrame. Sets the table cursor type to "row".
167
+
168
+ Returns:
169
+ None
170
+ """
171
+ self.build_table()
172
+
173
+ def build_table(self) -> None:
174
+ """Build the row detail table."""
175
+ self.table.clear(columns=True)
129
176
  self.table.add_column("Column")
130
177
  self.table.add_column("Value")
131
178
 
132
179
  # Get all columns and values from the dataframe row
133
- for col, val, dtype in zip(
134
- self.df.columns, self.df.row(self.row_idx), self.df.dtypes
135
- ):
180
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
136
181
  self.table.add_row(
137
- *_format_row([col, val], [None, dtype], apply_justify=False)
182
+ *format_row([col, val], [None, dtype], apply_justify=False, thousand_separator=self.thousand_separator)
138
183
  )
139
184
 
140
185
  self.table.cursor_type = "row"
141
186
 
142
- def on_key(self, event):
187
+ def on_key(self, event) -> None:
188
+ """Handle key press events in the row detail screen.
189
+
190
+ Supports 'v' for filtering and '"' for highlighting the main table
191
+ by the value in the selected row.
192
+
193
+ Args:
194
+ event: The key event object.
195
+
196
+ Returns:
197
+ None
198
+ """
143
199
  if event.key == "v":
144
200
  # Filter the main table by the selected value
145
- self._filter_or_highlight_selected_value(
146
- self._get_col_name_value(), action="filter"
147
- )
201
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
148
202
  event.stop()
149
203
  elif event.key == "quotation_mark": # '"'
150
204
  # Highlight the main table by the selected value
151
- self._filter_or_highlight_selected_value(
152
- self._get_col_name_value(), action="highlight"
153
- )
205
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
206
+ event.stop()
207
+ elif event.key == "comma":
154
208
  event.stop()
155
209
 
156
210
  def _get_col_name_value(self) -> tuple[str, Any] | None:
@@ -159,58 +213,220 @@ class RowDetailScreen(TableScreen):
159
213
  return None # Invalid row
160
214
 
161
215
  col_name = self.df.columns[row_idx]
162
- col_value = self.df.item(self.row_idx, row_idx)
216
+ col_value = self.df.item(self.ridx, row_idx)
163
217
 
164
218
  return col_name, col_value
165
219
 
166
220
 
221
+ class StatisticsScreen(TableScreen):
222
+ """Modal screen to display statistics for a column or entire dataframe."""
223
+
224
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
225
+
226
+ def __init__(self, dftable: DataFrameTable, col_idx: int | None = None):
227
+ super().__init__(dftable)
228
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
229
+
230
+ def on_mount(self) -> None:
231
+ """Create the statistics table."""
232
+ self.build_table()
233
+
234
+ def build_table(self) -> None:
235
+ """Build the statistics table."""
236
+ self.table.clear(columns=True)
237
+
238
+ if self.col_idx is None:
239
+ # Dataframe statistics
240
+ self._build_dataframe_stats()
241
+ else:
242
+ # Column statistics
243
+ self._build_column_stats()
244
+
245
+ def _build_column_stats(self) -> None:
246
+ """Build statistics for a single column."""
247
+ col_name = self.df.columns[self.col_idx]
248
+ lf = self.df.lazy()
249
+
250
+ # Apply only to visible rows
251
+ if False in self.dftable.visible_rows:
252
+ lf = lf.filter(self.dftable.visible_rows)
253
+
254
+ # Get column statistics
255
+ stats_df = lf.select(pl.col(col_name)).collect().describe()
256
+ if len(stats_df) == 0:
257
+ return
258
+
259
+ col_dtype = stats_df.dtypes[1] # 'value' column
260
+ dc = DtypeConfig(col_dtype)
261
+
262
+ # Add statistics label column
263
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
264
+
265
+ # Add value column with appropriate styling
266
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
267
+
268
+ # Add rows
269
+ for row in stats_df.rows():
270
+ stat_label, stat_value = row
271
+ value = stat_value
272
+ if stat_value is None:
273
+ value = NULL_DISPLAY
274
+ elif dc.gtype == "int" and self.thousand_separator:
275
+ value = f"{stat_value:,}"
276
+ elif dc.gtype == "float":
277
+ value = f"{stat_value:,.2f}" if self.thousand_separator else f"{stat_value:.2f}"
278
+ else:
279
+ value = str(stat_value)
280
+
281
+ self.table.add_row(
282
+ Text(stat_label, justify="left"),
283
+ Text(value, style=dc.style, justify=dc.justify),
284
+ )
285
+
286
+ def _build_dataframe_stats(self) -> None:
287
+ """Build statistics for the entire dataframe."""
288
+ lf = self.df.lazy()
289
+
290
+ # Apply only to visible rows
291
+ if False in self.dftable.visible_rows:
292
+ lf = lf.filter(self.dftable.visible_rows)
293
+
294
+ # Get dataframe statistics
295
+ stats_df = lf.collect().describe()
296
+
297
+ # Add columns for each dataframe column with appropriate styling
298
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
299
+ if idx == 0:
300
+ # Add statistics label column (first column, no styling)
301
+ self.table.add_column("Statistic", key="statistic")
302
+ continue
303
+
304
+ dc = DtypeConfig(col_dtype)
305
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
306
+
307
+ # Add rows
308
+ for row in stats_df.rows():
309
+ formatted_row = []
310
+
311
+ # Format remaining values with appropriate styling
312
+ for idx, stat_value in enumerate(row):
313
+ # First element is the statistic label
314
+ if idx == 0:
315
+ formatted_row.append(stat_value)
316
+ continue
317
+
318
+ col_dtype = stats_df.dtypes[idx]
319
+ dc = DtypeConfig(col_dtype)
320
+
321
+ value = stat_value
322
+ if stat_value is None:
323
+ value = NULL_DISPLAY
324
+ elif dc.gtype == "int" and self.thousand_separator:
325
+ value = f"{stat_value:,}"
326
+ elif dc.gtype == "float":
327
+ value = f"{stat_value:,.2f}" if self.thousand_separator else f"{stat_value:.2f}"
328
+ else:
329
+ value = str(stat_value)
330
+
331
+ formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
332
+
333
+ self.table.add_row(*formatted_row)
334
+
335
+
167
336
  class FrequencyScreen(TableScreen):
168
337
  """Modal screen to display frequency of values in a column."""
169
338
 
170
339
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "FrequencyScreen")
171
340
 
172
- def __init__(self, col_idx: int, df: pl.DataFrame):
173
- super().__init__(df, id="frequency-table")
341
+ def __init__(self, col_idx: int, dftable: DataFrameTable):
342
+ super().__init__(dftable)
174
343
  self.col_idx = col_idx
175
344
  self.sorted_columns = {
176
345
  1: True, # Count
177
- 2: True, # %
178
346
  }
347
+ self.df: pl.DataFrame = (
348
+ dftable.df[dftable.df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
349
+ )
179
350
 
180
351
  def on_mount(self) -> None:
181
352
  """Create the frequency table."""
182
- column = self.df.columns[self.col_idx]
183
- dtype = str(self.df.dtypes[self.col_idx])
353
+ self.build_table()
354
+
355
+ def on_key(self, event):
356
+ if event.key == "left_square_bracket": # '['
357
+ # Sort by current column in ascending order
358
+ self._sort_by_column(descending=False)
359
+ event.stop()
360
+ elif event.key == "right_square_bracket": # ']'
361
+ # Sort by current column in descending order
362
+ self._sort_by_column(descending=True)
363
+ event.stop()
364
+ elif event.key == "v":
365
+ # Filter the main table by the selected value
366
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
367
+ event.stop()
368
+ elif event.key == "quotation_mark": # '"'
369
+ # Highlight the main table by the selected value
370
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
371
+ event.stop()
372
+
373
+ def build_table(self) -> None:
374
+ """Build the frequency table."""
375
+ self.table.clear(columns=True)
376
+
377
+ # Create frequency table
378
+ column = self.dftable.df.columns[self.col_idx]
379
+ dtype = self.dftable.df.dtypes[self.col_idx]
184
380
  dc = DtypeConfig(dtype)
185
381
 
186
382
  # Calculate frequencies using Polars
187
- freq_df = self.df[column].value_counts(sort=True).sort("count", descending=True)
188
- total_count = len(self.df)
383
+ total_count = len(self.dftable.df)
384
+
385
+ # Add column headers with sort indicators
386
+ columns = [
387
+ (column, "Value", 0),
388
+ ("Count", "Count", 1),
389
+ ("%", "%", 2),
390
+ ("Histogram", "Histogram", 3),
391
+ ]
392
+
393
+ for display_name, key, col_idx_num in columns:
394
+ # Check if this column is sorted and add indicator
395
+ if col_idx_num in self.sorted_columns:
396
+ descending = self.sorted_columns[col_idx_num]
397
+ sort_indicator = " ▼" if descending else " ▲"
398
+ header_text = display_name + sort_indicator
399
+ else:
400
+ header_text = display_name
189
401
 
190
- # Create frequency table
191
- self.table.add_column(Text(column, justify=dc.justify), key=column)
192
- self.table.add_column(Text("Count", justify="right"), key="Count")
193
- self.table.add_column(Text("%", justify="right"), key="%")
194
- self.table.add_column(Text("Histogram", justify="left"), key="Histogram")
402
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
403
+ self.table.add_column(Text(header_text, justify=justify), key=key)
195
404
 
196
405
  # Get style config for Int64 and Float64
197
- ds_int = DtypeConfig("Int64")
198
- ds_float = DtypeConfig("Float64")
406
+ ds_int = DtypeConfig(pl.Int64)
407
+ ds_float = DtypeConfig(pl.Float64)
199
408
 
200
409
  # Add rows to the frequency table
201
- for row_idx, row in enumerate(freq_df.rows()):
202
- value, count = row
410
+ for row_idx, row in enumerate(self.df.rows()):
411
+ column, count = row
203
412
  percentage = (count / total_count) * 100
204
413
 
414
+ if column is None:
415
+ value = NULL_DISPLAY
416
+ elif dc.gtype == "int" and self.thousand_separator:
417
+ value = f"{column:,}"
418
+ elif dc.gtype == "float":
419
+ value = f"{column:,.2f}" if self.thousand_separator else f"{column:.3f}"
420
+ else:
421
+ value = str(column)
422
+
205
423
  self.table.add_row(
424
+ Text(value, style=dc.style, justify=dc.justify),
206
425
  Text(
207
- "-" if value is None else str(value),
208
- style=dc.style,
209
- justify=dc.justify,
426
+ f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
210
427
  ),
211
- Text(str(count), style=ds_int.style, justify=ds_int.justify),
212
428
  Text(
213
- f"{percentage:.2f}",
429
+ f"{percentage:,.3f}" if self.thousand_separator else f"{percentage:.3f}",
214
430
  style=ds_float.style,
215
431
  justify=ds_float.justify,
216
432
  ),
@@ -226,90 +442,49 @@ class FrequencyScreen(TableScreen):
226
442
  Text("Total", style="bold", justify=dc.justify),
227
443
  Text(f"{total_count:,}", style="bold", justify="right"),
228
444
  Text("100.00", style="bold", justify="right"),
445
+ Bar(
446
+ highlight_range=(0.0, 10),
447
+ width=10,
448
+ ),
229
449
  key="total",
230
450
  )
231
451
 
232
- def on_key(self, event):
233
- if event.key == "left_square_bracket": # '['
234
- # Sort by current column in ascending order
235
- self._sort_by_column(descending=False)
236
- event.stop()
237
- elif event.key == "right_square_bracket": # ']'
238
- # Sort by current column in descending order
239
- self._sort_by_column(descending=True)
240
- event.stop()
241
- elif event.key == "v":
242
- # Filter the main table by the selected value
243
- self._filter_or_highlight_selected_value(
244
- self._get_col_name_value(), action="filter"
245
- )
246
- event.stop()
247
- elif event.key == "quotation_mark": # '"'
248
- # Highlight the main table by the selected value
249
- self._filter_or_highlight_selected_value(
250
- self._get_col_name_value(), action="highlight"
251
- )
252
- event.stop()
253
-
254
452
  def _sort_by_column(self, descending: bool) -> None:
255
453
  """Sort the dataframe by the selected column and refresh the main table."""
256
- freq_table = self.query_one(DataTable)
257
-
258
- col_idx = freq_table.cursor_column
259
- col_dtype = "String"
454
+ row_idx, col_idx = self.table.cursor_coordinate
455
+ col_sort = col_idx if col_idx == 0 else 1
260
456
 
261
- sort_dir = self.sorted_columns.get(col_idx)
457
+ sort_dir = self.sorted_columns.get(col_sort)
262
458
  if sort_dir is not None:
263
459
  # If already sorted in the same direction, do nothing
264
460
  if sort_dir == descending:
265
- self.notify(
266
- "Already sorted in that order", title="Sort", severity="warning"
267
- )
461
+ self.notify("Already sorted in that order", title="Sort", severity="warning")
268
462
  return
269
463
 
270
464
  self.sorted_columns.clear()
271
- self.sorted_columns[col_idx] = descending
272
-
273
- if col_idx == 0:
274
- col_name = self.df.columns[self.col_idx]
275
- col_dtype = str(self.df.dtypes[self.col_idx])
276
- elif col_idx == 1:
277
- col_name = "Count"
278
- col_dtype = "Int64"
279
- elif col_idx == 2:
280
- col_name = "%"
281
- col_dtype = "Float64"
282
-
283
- def key_fun(freq_col):
284
- col_value = freq_col.plain
285
-
286
- if col_dtype == "Int64":
287
- return int(col_value)
288
- elif col_dtype == "Float64":
289
- return float(col_value)
290
- elif col_dtype == "Boolean":
291
- return BOOLS[col_value]
292
- else:
293
- return col_value
465
+ self.sorted_columns[col_sort] = descending
294
466
 
295
- # Sort the table
296
- freq_table.sort(
297
- col_name, key=lambda freq_col: key_fun(freq_col), reverse=descending
298
- )
467
+ col_name = self.df.columns[col_sort]
468
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
469
+
470
+ # Rebuild the frequency table
471
+ self.table.clear(columns=True)
472
+ self.build_table()
473
+
474
+ self.table.move_cursor(row=row_idx, column=col_idx)
299
475
 
300
- # Notify the user
301
- order = "desc" if descending else "asc"
302
- self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
476
+ # order = "desc" if descending else "asc"
477
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
303
478
 
304
479
  def _get_col_name_value(self) -> tuple[str, str] | None:
305
480
  row_idx = self.table.cursor_row
306
- if row_idx >= len(self.df.columns):
307
- return None # Skip total row
481
+ if row_idx >= len(self.df[:, 0]): # first column
482
+ return None # Skip the last `Total` row
308
483
 
309
- col_name = self.df.columns[self.col_idx]
310
- col_dtype = self.df.dtypes[self.col_idx]
484
+ col_name = self.dftable.df.columns[self.col_idx]
485
+ col_dtype = self.dftable.df.dtypes[self.col_idx]
311
486
 
312
487
  cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
313
- col_value = cell_value.plain
488
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
314
489
 
315
- return col_name, DtypeConfig(col_dtype).convert(col_value)
490
+ return col_name, col_value