dataframe-textual 0.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ from textual.renderables.bar import Bar
13
13
  from textual.screen import ModalScreen
14
14
  from textual.widgets import DataTable
15
15
 
16
- from .common import DtypeConfig, _format_row
16
+ from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_float, format_row
17
17
 
18
18
 
19
19
  class TableScreen(ModalScreen):
@@ -30,69 +30,104 @@ class TableScreen(ModalScreen):
30
30
 
31
31
  TableScreen > DataTable {
32
32
  width: auto;
33
- min-width: 30;
34
33
  height: auto;
35
34
  border: solid $primary;
35
+ max-width: 100%;
36
+ overflow: auto;
36
37
  }
37
38
  """
38
39
 
39
- def __init__(self, dftable: DataFrameTable):
40
+ def __init__(self, dftable: "DataFrameTable") -> None:
41
+ """Initialize the table screen.
42
+
43
+ Sets up the base modal screen with reference to the main DataFrameTable widget
44
+ and stores the DataFrame for display.
45
+
46
+ Args:
47
+ dftable: Reference to the parent DataFrameTable widget.
48
+
49
+ Returns:
50
+ None
51
+ """
40
52
  super().__init__()
41
- self.df: pl.DataFrame = dftable.df # Polars DataFrame
42
53
  self.dftable = dftable # DataFrameTable
54
+ self.df: pl.DataFrame = dftable.df # Polars DataFrame
55
+ self.thousand_separator = False # Whether to use thousand separators in numbers
43
56
 
44
57
  def compose(self) -> ComposeResult:
45
- """Create the table. Must be overridden by subclasses."""
58
+ """Compose the table screen widget structure.
59
+
60
+ Creates and yields a DataTable widget for displaying tabular data.
61
+ Subclasses should override to customize table configuration.
62
+
63
+ Yields:
64
+ DataTable: The table widget for this screen.
65
+ """
46
66
  self.table = DataTable(zebra_stripes=True)
47
67
  yield self.table
48
68
 
49
- def on_key(self, event):
69
+ def build_table(self) -> None:
70
+ """Build the table content.
71
+
72
+ Subclasses should implement this method to populate the DataTable
73
+ with appropriate columns and rows based on the specific screen's purpose.
74
+
75
+ Returns:
76
+ None
77
+ """
78
+ raise NotImplementedError("Subclasses must implement build_table method.")
79
+
80
+ def on_key(self, event) -> None:
81
+ """Handle key press events in the table screen.
82
+
83
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
84
+ Prevents propagation of non-navigation keys to parent screens.
85
+
86
+ Args:
87
+ event: The key event object.
88
+
89
+ Returns:
90
+ None
91
+ """
50
92
  if event.key in ("q", "escape"):
51
93
  self.app.pop_screen()
52
94
  event.stop()
53
- # Prevent key events from propagating to parent screen,
54
- # except for the following default key bindings for DataTable
55
- elif event.key not in (
56
- "up",
57
- "down",
58
- "right",
59
- "left",
60
- "pageup",
61
- "pagedown",
62
- "ctrl+home",
63
- "ctrl+end",
64
- "home",
65
- "end",
66
- ):
95
+ elif event.key == "comma":
96
+ self.thousand_separator = not self.thousand_separator
97
+ self.build_table()
67
98
  event.stop()
68
99
 
69
100
  def _filter_or_highlight_selected_value(
70
- self, col_name_value: tuple[str, str] | None, action: str = "filter"
101
+ self, col_name_value: tuple[str, Any] | None, action: str = "filter"
71
102
  ) -> None:
72
- """Apply filter or highlight action by the selected value from the frequency table.
103
+ """Apply filter or highlight action by the selected value.
104
+
105
+ Filters or highlights rows in the main table based on a selected value from
106
+ this table (typically frequency or row detail). Updates the main table's display
107
+ and notifies the user of the action.
73
108
 
74
109
  Args:
75
- col_name: The name of the column to filter/highlight.
76
- col_value: The value to filter/highlight by.
77
- action: Either "filter" to filter visible rows, or "highlight" to select matching rows.
110
+ col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
111
+ action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
112
+
113
+ Returns:
114
+ None
78
115
  """
79
116
  if col_name_value is None:
80
117
  return
81
118
  col_name, col_value = col_name_value
82
119
 
83
120
  # Handle NULL values
84
- if col_value == "-":
121
+ if col_value == NULL:
85
122
  # Create expression for NULL values
86
123
  expr = pl.col(col_name).is_null()
87
- value_display = "[on $primary]NULL[/]"
124
+ value_display = "[$success]NULL[/]"
88
125
  else:
89
126
  # Create expression for the selected value
90
127
  expr = pl.col(col_name) == col_value
91
- value_display = f"[on $primary]{col_value}[/]"
128
+ value_display = f"[$success]{col_value}[/]"
92
129
 
93
- matched_indices = set(
94
- self.dftable.df.with_row_index("__rid__").filter(expr)["__rid__"].to_list()
95
- )
130
+ matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
96
131
 
97
132
  # Apply the action
98
133
  if action == "filter":
@@ -100,13 +135,13 @@ class TableScreen(ModalScreen):
100
135
  for i in range(len(self.dftable.visible_rows)):
101
136
  self.dftable.visible_rows[i] = i in matched_indices
102
137
  title = "Filter"
103
- message = f"Filtered by [on $primary]{col_name}[/] = {value_display}"
138
+ message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
104
139
  else: # action == "highlight"
105
140
  # Update selected_rows to reflect the highlights
106
141
  for i in range(len(self.dftable.selected_rows)):
107
142
  self.dftable.selected_rows[i] = i in matched_indices
108
143
  title = "Highlight"
109
- message = f"Highlighted [on $primary]{col_name}[/] = {value_display}"
144
+ message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
110
145
 
111
146
  # Recreate the table display with updated data in the main app
112
147
  self.dftable._setup_table()
@@ -122,37 +157,61 @@ class RowDetailScreen(TableScreen):
122
157
 
123
158
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
124
159
 
125
- def __init__(self, row_idx: int, dftable):
160
+ def __init__(self, ridx: int, dftable):
126
161
  super().__init__(dftable)
127
- self.row_idx = row_idx
162
+ self.ridx = ridx
128
163
 
129
164
  def on_mount(self) -> None:
130
- """Create the detail table."""
165
+ """Initialize the row detail screen.
166
+
167
+ Populates the table with column names and values from the selected row
168
+ of the main DataFrame. Sets the table cursor type to "row".
169
+
170
+ Returns:
171
+ None
172
+ """
173
+ self.build_table()
174
+
175
+ def build_table(self) -> None:
176
+ """Build the row detail table."""
177
+ self.table.clear(columns=True)
131
178
  self.table.add_column("Column")
132
179
  self.table.add_column("Value")
133
180
 
134
181
  # Get all columns and values from the dataframe row
135
- for col, val, dtype in zip(
136
- self.df.columns, self.df.row(self.row_idx), self.df.dtypes
137
- ):
182
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
138
183
  self.table.add_row(
139
- *_format_row([col, val], [None, dtype], apply_justify=False)
184
+ *format_row(
185
+ [col, val],
186
+ [None, dtype],
187
+ apply_justify=False,
188
+ thousand_separator=self.thousand_separator,
189
+ )
140
190
  )
141
191
 
142
192
  self.table.cursor_type = "row"
143
193
 
144
- def on_key(self, event):
194
+ def on_key(self, event) -> None:
195
+ """Handle key press events in the row detail screen.
196
+
197
+ Supports 'v' for filtering and '"' for highlighting the main table
198
+ by the value in the selected row.
199
+
200
+ Args:
201
+ event: The key event object.
202
+
203
+ Returns:
204
+ None
205
+ """
145
206
  if event.key == "v":
146
207
  # Filter the main table by the selected value
147
- self._filter_or_highlight_selected_value(
148
- self._get_col_name_value(), action="filter"
149
- )
208
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
150
209
  event.stop()
151
210
  elif event.key == "quotation_mark": # '"'
152
211
  # Highlight the main table by the selected value
153
- self._filter_or_highlight_selected_value(
154
- self._get_col_name_value(), action="highlight"
155
- )
212
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
213
+ event.stop()
214
+ elif event.key == "comma":
156
215
  event.stop()
157
216
 
158
217
  def _get_col_name_value(self) -> tuple[str, Any] | None:
@@ -161,28 +220,147 @@ class RowDetailScreen(TableScreen):
161
220
  return None # Invalid row
162
221
 
163
222
  col_name = self.df.columns[row_idx]
164
- col_value = self.df.item(self.row_idx, row_idx)
223
+ col_value = self.df.item(self.ridx, row_idx)
165
224
 
166
225
  return col_name, col_value
167
226
 
168
227
 
228
+ class StatisticsScreen(TableScreen):
229
+ """Modal screen to display statistics for a column or entire dataframe."""
230
+
231
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
232
+
233
+ def __init__(self, dftable: "DataFrameTable", col_idx: int | None = None):
234
+ super().__init__(dftable)
235
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
236
+
237
+ def on_mount(self) -> None:
238
+ """Create the statistics table."""
239
+ self.build_table()
240
+
241
+ def build_table(self) -> None:
242
+ """Build the statistics table."""
243
+ self.table.clear(columns=True)
244
+
245
+ if self.col_idx is None:
246
+ # Dataframe statistics
247
+ self._build_dataframe_stats()
248
+ self.table.cursor_type = "column"
249
+ else:
250
+ # Column statistics
251
+ self._build_column_stats()
252
+ self.table.cursor_type = "row"
253
+
254
+ def _build_column_stats(self) -> None:
255
+ """Build statistics for a single column."""
256
+ col_name = self.df.columns[self.col_idx]
257
+ lf = self.df.lazy()
258
+
259
+ # Apply only to visible rows
260
+ if False in self.dftable.visible_rows:
261
+ lf = lf.filter(self.dftable.visible_rows)
262
+
263
+ # Get column statistics
264
+ stats_df = lf.select(pl.col(col_name)).collect().describe()
265
+ if len(stats_df) == 0:
266
+ return
267
+
268
+ col_dtype = stats_df.dtypes[1] # 'value' column
269
+ dc = DtypeConfig(col_dtype)
270
+
271
+ # Add statistics label column
272
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
273
+
274
+ # Add value column with appropriate styling
275
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
276
+
277
+ # Add rows
278
+ for row in stats_df.rows():
279
+ stat_label, stat_value = row
280
+ value = stat_value
281
+ if stat_value is None:
282
+ value = NULL_DISPLAY
283
+ elif dc.gtype == "integer" and self.thousand_separator:
284
+ value = f"{stat_value:,}"
285
+ elif dc.gtype == "float":
286
+ value = format_float(stat_value, self.thousand_separator)
287
+ else:
288
+ value = str(stat_value)
289
+
290
+ self.table.add_row(
291
+ Text(stat_label, justify="left"),
292
+ Text(value, style=dc.style, justify=dc.justify),
293
+ )
294
+
295
+ def _build_dataframe_stats(self) -> None:
296
+ """Build statistics for the entire dataframe."""
297
+ lf = self.df.lazy()
298
+
299
+ # Apply only to visible rows
300
+ if False in self.dftable.visible_rows:
301
+ lf = lf.filter(self.dftable.visible_rows)
302
+
303
+ # Apply only to non-hidden columns
304
+ if self.dftable.hidden_columns:
305
+ lf = lf.select(pl.exclude(self.dftable.hidden_columns))
306
+
307
+ # Get dataframe statistics
308
+ stats_df = lf.collect().describe()
309
+
310
+ # Add columns for each dataframe column with appropriate styling
311
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
312
+ if idx == 0:
313
+ # Add statistics label column (first column, no styling)
314
+ self.table.add_column("Statistic", key="statistic")
315
+ continue
316
+
317
+ dc = DtypeConfig(col_dtype)
318
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
319
+
320
+ # Add rows
321
+ for row in stats_df.rows():
322
+ formatted_row = []
323
+
324
+ # Format remaining values with appropriate styling
325
+ for idx, stat_value in enumerate(row):
326
+ # First element is the statistic label
327
+ if idx == 0:
328
+ formatted_row.append(stat_value)
329
+ continue
330
+
331
+ col_dtype = stats_df.dtypes[idx]
332
+ dc = DtypeConfig(col_dtype)
333
+
334
+ value = stat_value
335
+ if stat_value is None:
336
+ value = NULL_DISPLAY
337
+ elif dc.gtype == "integer" and self.thousand_separator:
338
+ value = f"{stat_value:,}"
339
+ elif dc.gtype == "float":
340
+ value = format_float(stat_value, self.thousand_separator)
341
+ else:
342
+ value = str(stat_value)
343
+
344
+ formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
345
+
346
+ self.table.add_row(*formatted_row)
347
+
348
+
169
349
  class FrequencyScreen(TableScreen):
170
350
  """Modal screen to display frequency of values in a column."""
171
351
 
172
352
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "FrequencyScreen")
173
353
 
174
- def __init__(self, col_idx: int, dftable: DataFrameTable):
354
+ def __init__(self, col_idx: int, dftable: "DataFrameTable") -> None:
175
355
  super().__init__(dftable)
176
356
  self.col_idx = col_idx
177
357
  self.sorted_columns = {
178
358
  1: True, # Count
179
- 2: True, # %
180
359
  }
181
- self.df: pl.DataFrame = (
182
- dftable.df[dftable.df.columns[self.col_idx]]
183
- .value_counts(sort=True)
184
- .sort("count", descending=True)
185
- )
360
+
361
+ df = dftable.df.filter(dftable.visible_rows) if False in dftable.visible_rows else dftable.df
362
+ self.total_count = len(df)
363
+ self.df: pl.DataFrame = df[df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
186
364
 
187
365
  def on_mount(self) -> None:
188
366
  """Create the frequency table."""
@@ -199,49 +377,67 @@ class FrequencyScreen(TableScreen):
199
377
  event.stop()
200
378
  elif event.key == "v":
201
379
  # Filter the main table by the selected value
202
- self._filter_or_highlight_selected_value(
203
- self._get_col_name_value(), action="filter"
204
- )
380
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
205
381
  event.stop()
206
382
  elif event.key == "quotation_mark": # '"'
207
383
  # Highlight the main table by the selected value
208
- self._filter_or_highlight_selected_value(
209
- self._get_col_name_value(), action="highlight"
210
- )
384
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
211
385
  event.stop()
212
386
 
213
387
  def build_table(self) -> None:
388
+ """Build the frequency table."""
389
+ self.table.clear(columns=True)
390
+
214
391
  # Create frequency table
215
392
  column = self.dftable.df.columns[self.col_idx]
216
- dtype = str(self.dftable.df.dtypes[self.col_idx])
393
+ dtype = self.dftable.df.dtypes[self.col_idx]
217
394
  dc = DtypeConfig(dtype)
218
395
 
219
- # Calculate frequencies using Polars
220
- total_count = len(self.dftable.df)
221
-
222
- self.table.add_column(Text(column, justify=dc.justify), key=column)
223
- self.table.add_column(Text("Count", justify="right"), key="Count")
224
- self.table.add_column(Text("%", justify="right"), key="%")
225
- self.table.add_column(Text("Histogram", justify="left"), key="Histogram")
396
+ # Add column headers with sort indicators
397
+ columns = [
398
+ (column, "Value", 0),
399
+ ("Count", "Count", 1),
400
+ ("%", "%", 2),
401
+ ("Histogram", "Histogram", 3),
402
+ ]
403
+
404
+ for display_name, key, col_idx_num in columns:
405
+ # Check if this column is sorted and add indicator
406
+ if col_idx_num in self.sorted_columns:
407
+ descending = self.sorted_columns[col_idx_num]
408
+ sort_indicator = " ▼" if descending else " ▲"
409
+ header_text = display_name + sort_indicator
410
+ else:
411
+ header_text = display_name
412
+
413
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
414
+ self.table.add_column(Text(header_text, justify=justify), key=key)
226
415
 
227
416
  # Get style config for Int64 and Float64
228
- ds_int = DtypeConfig("Int64")
229
- ds_float = DtypeConfig("Float64")
417
+ ds_int = DtypeConfig(pl.Int64)
418
+ ds_float = DtypeConfig(pl.Float64)
230
419
 
231
420
  # Add rows to the frequency table
232
421
  for row_idx, row in enumerate(self.df.rows()):
233
- value, count = row
234
- percentage = (count / total_count) * 100
422
+ column, count = row
423
+ percentage = (count / self.total_count) * 100
424
+
425
+ if column is None:
426
+ value = NULL_DISPLAY
427
+ elif dc.gtype == "integer" and self.thousand_separator:
428
+ value = f"{column:,}"
429
+ elif dc.gtype == "float":
430
+ value = format_float(column, self.thousand_separator)
431
+ else:
432
+ value = str(column)
235
433
 
236
434
  self.table.add_row(
435
+ Text(value, style=dc.style, justify=dc.justify),
237
436
  Text(
238
- "-" if value is None else str(value),
239
- style=dc.style,
240
- justify=dc.justify,
437
+ f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
241
438
  ),
242
- Text(str(count), style=ds_int.style, justify=ds_int.justify),
243
439
  Text(
244
- f"{percentage:.2f}",
440
+ format_float(percentage, self.thousand_separator),
245
441
  style=ds_float.style,
246
442
  justify=ds_float.justify,
247
443
  ),
@@ -255,8 +451,16 @@ class FrequencyScreen(TableScreen):
255
451
  # Add a total row
256
452
  self.table.add_row(
257
453
  Text("Total", style="bold", justify=dc.justify),
258
- Text(f"{total_count:,}", style="bold", justify="right"),
259
- Text("100.00", style="bold", justify="right"),
454
+ Text(
455
+ f"{self.total_count:,}" if self.thousand_separator else str(self.total_count),
456
+ style="bold",
457
+ justify="right",
458
+ ),
459
+ Text(
460
+ format_float(100.0, self.thousand_separator),
461
+ style="bold",
462
+ justify="right",
463
+ ),
260
464
  Bar(
261
465
  highlight_range=(0.0, 10),
262
466
  width=10,
@@ -266,26 +470,19 @@ class FrequencyScreen(TableScreen):
266
470
 
267
471
  def _sort_by_column(self, descending: bool) -> None:
268
472
  """Sort the dataframe by the selected column and refresh the main table."""
269
-
270
- self.log(self.df)
271
-
272
473
  row_idx, col_idx = self.table.cursor_coordinate
273
474
  col_sort = col_idx if col_idx == 0 else 1
274
475
 
275
- sort_dir = self.sorted_columns.get(col_sort)
276
- if sort_dir is not None:
476
+ if self.sorted_columns.get(col_sort) == descending:
277
477
  # If already sorted in the same direction, do nothing
278
- if sort_dir == descending:
279
- self.notify(
280
- "Already sorted in that order", title="Sort", severity="warning"
281
- )
282
- return
478
+ # self.notify("Already sorted in that order", title="Sort", severity="warning")
479
+ return
283
480
 
284
481
  self.sorted_columns.clear()
285
482
  self.sorted_columns[col_sort] = descending
286
483
 
287
484
  col_name = self.df.columns[col_sort]
288
- self.df = self.df.sort(col_name, descending=descending)
485
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
289
486
 
290
487
  # Rebuild the frequency table
291
488
  self.table.clear(columns=True)
@@ -293,19 +490,18 @@ class FrequencyScreen(TableScreen):
293
490
 
294
491
  self.table.move_cursor(row=row_idx, column=col_idx)
295
492
 
296
- # Notify the user
297
- order = "desc" if descending else "asc"
298
- self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
493
+ # order = "desc" if descending else "asc"
494
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
299
495
 
300
496
  def _get_col_name_value(self) -> tuple[str, str] | None:
301
497
  row_idx = self.table.cursor_row
302
- if row_idx >= len(self.df.columns):
303
- return None # Skip total row
498
+ if row_idx >= len(self.df[:, 0]): # first column
499
+ return None # Skip the last `Total` row
304
500
 
305
- col_name = self.df.columns[self.col_idx]
306
- col_dtype = self.df.dtypes[self.col_idx]
501
+ col_name = self.dftable.df.columns[self.col_idx]
502
+ col_dtype = self.dftable.df.dtypes[self.col_idx]
307
503
 
308
504
  cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
309
- col_value = cell_value.plain
505
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
310
506
 
311
- return col_name, DtypeConfig(col_dtype).convert(col_value)
507
+ return col_name, col_value