dataframe-textual 0.3.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ from textual.renderables.bar import Bar
13
13
  from textual.screen import ModalScreen
14
14
  from textual.widgets import DataTable
15
15
 
16
- from .common import DtypeConfig, _format_row
16
+ from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_row
17
17
 
18
18
 
19
19
  class TableScreen(ModalScreen):
@@ -31,68 +31,101 @@ class TableScreen(ModalScreen):
31
31
  TableScreen > DataTable {
32
32
  width: auto;
33
33
  min-width: 30;
34
- height: auto;
35
34
  border: solid $primary;
36
35
  }
37
36
  """
38
37
 
39
- def __init__(self, dftable: DataFrameTable):
38
+ def __init__(self, dftable: DataFrameTable) -> None:
39
+ """Initialize the table screen.
40
+
41
+ Sets up the base modal screen with reference to the main DataFrameTable widget
42
+ and stores the DataFrame for display.
43
+
44
+ Args:
45
+ dftable: Reference to the parent DataFrameTable widget.
46
+
47
+ Returns:
48
+ None
49
+ """
40
50
  super().__init__()
41
51
  self.df: pl.DataFrame = dftable.df # Polars DataFrame
42
52
  self.dftable = dftable # DataFrameTable
53
+ self.thousand_separator = False # Whether to use thousand separators in numbers
43
54
 
44
55
  def compose(self) -> ComposeResult:
45
- """Create the table. Must be overridden by subclasses."""
56
+ """Compose the table screen widget structure.
57
+
58
+ Creates and yields a DataTable widget for displaying tabular data.
59
+ Subclasses should override to customize table configuration.
60
+
61
+ Yields:
62
+ DataTable: The table widget for this screen.
63
+ """
46
64
  self.table = DataTable(zebra_stripes=True)
47
65
  yield self.table
48
66
 
49
- def on_key(self, event):
67
+ def build_table(self) -> None:
68
+ """Build the table content.
69
+
70
+ Subclasses should implement this method to populate the DataTable
71
+ with appropriate columns and rows based on the specific screen's purpose.
72
+
73
+ Returns:
74
+ None
75
+ """
76
+ raise NotImplementedError("Subclasses must implement build_table method.")
77
+
78
+ def on_key(self, event) -> None:
79
+ """Handle key press events in the table screen.
80
+
81
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
82
+ Prevents propagation of non-navigation keys to parent screens.
83
+
84
+ Args:
85
+ event: The key event object.
86
+
87
+ Returns:
88
+ None
89
+ """
50
90
  if event.key in ("q", "escape"):
51
91
  self.app.pop_screen()
52
92
  event.stop()
53
- # Prevent key events from propagating to parent screen,
54
- # except for the following default key bindings for DataTable
55
- elif event.key not in (
56
- "up",
57
- "down",
58
- "right",
59
- "left",
60
- "pageup",
61
- "pagedown",
62
- "ctrl+home",
63
- "ctrl+end",
64
- "home",
65
- "end",
66
- ):
93
+ elif event.key == "comma":
94
+ self.thousand_separator = not self.thousand_separator
95
+ self.build_table()
67
96
  event.stop()
68
97
 
69
98
  def _filter_or_highlight_selected_value(
70
- self, col_name_value: tuple[str, str] | None, action: str = "filter"
99
+ self, col_name_value: tuple[str, Any] | None, action: str = "filter"
71
100
  ) -> None:
72
- """Apply filter or highlight action by the selected value from the frequency table.
101
+ """Apply filter or highlight action by the selected value.
102
+
103
+ Filters or highlights rows in the main table based on a selected value from
104
+ this table (typically frequency or row detail). Updates the main table's display
105
+ and notifies the user of the action.
73
106
 
74
107
  Args:
75
- col_name: The name of the column to filter/highlight.
76
- col_value: The value to filter/highlight by.
77
- action: Either "filter" to filter visible rows, or "highlight" to select matching rows.
108
+ col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
109
+ action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
110
+
111
+ Returns:
112
+ None
78
113
  """
79
114
  if col_name_value is None:
80
115
  return
81
116
  col_name, col_value = col_name_value
82
117
 
83
118
  # Handle NULL values
84
- if col_value == "-":
119
+ if col_value == NULL:
85
120
  # Create expression for NULL values
86
121
  expr = pl.col(col_name).is_null()
87
- value_display = "[on $primary]NULL[/]"
122
+ value_display = "[$success]NULL[/]"
88
123
  else:
89
124
  # Create expression for the selected value
90
125
  expr = pl.col(col_name) == col_value
91
- value_display = f"[on $primary]{col_value}[/]"
126
+ value_display = f"[$success]{col_value}[/]"
92
127
 
93
- matched_indices = set(
94
- self.dftable.df.with_row_index("__rid__").filter(expr)["__rid__"].to_list()
95
- )
128
+ matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
96
129
 
97
130
  # Apply the action
98
131
  if action == "filter":
@@ -100,13 +133,13 @@ class TableScreen(ModalScreen):
100
133
  for i in range(len(self.dftable.visible_rows)):
101
134
  self.dftable.visible_rows[i] = i in matched_indices
102
135
  title = "Filter"
103
- message = f"Filtered by [on $primary]{col_name}[/] = {value_display}"
136
+ message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
104
137
  else: # action == "highlight"
105
138
  # Update selected_rows to reflect the highlights
106
139
  for i in range(len(self.dftable.selected_rows)):
107
140
  self.dftable.selected_rows[i] = i in matched_indices
108
141
  title = "Highlight"
109
- message = f"Highlighted [on $primary]{col_name}[/] = {value_display}"
142
+ message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
110
143
 
111
144
  # Recreate the table display with updated data in the main app
112
145
  self.dftable._setup_table()
@@ -122,37 +155,56 @@ class RowDetailScreen(TableScreen):
122
155
 
123
156
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
124
157
 
125
- def __init__(self, row_idx: int, dftable):
158
+ def __init__(self, ridx: int, dftable):
126
159
  super().__init__(dftable)
127
- self.row_idx = row_idx
160
+ self.ridx = ridx
128
161
 
129
162
  def on_mount(self) -> None:
130
- """Create the detail table."""
163
+ """Initialize the row detail screen.
164
+
165
+ Populates the table with column names and values from the selected row
166
+ of the main DataFrame. Sets the table cursor type to "row".
167
+
168
+ Returns:
169
+ None
170
+ """
171
+ self.build_table()
172
+
173
+ def build_table(self) -> None:
174
+ """Build the row detail table."""
175
+ self.table.clear(columns=True)
131
176
  self.table.add_column("Column")
132
177
  self.table.add_column("Value")
133
178
 
134
179
  # Get all columns and values from the dataframe row
135
- for col, val, dtype in zip(
136
- self.df.columns, self.df.row(self.row_idx), self.df.dtypes
137
- ):
180
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
138
181
  self.table.add_row(
139
- *_format_row([col, val], [None, dtype], apply_justify=False)
182
+ *format_row([col, val], [None, dtype], apply_justify=False, thousand_separator=self.thousand_separator)
140
183
  )
141
184
 
142
185
  self.table.cursor_type = "row"
143
186
 
144
- def on_key(self, event):
187
+ def on_key(self, event) -> None:
188
+ """Handle key press events in the row detail screen.
189
+
190
+ Supports 'v' for filtering and '"' for highlighting the main table
191
+ by the value in the selected row.
192
+
193
+ Args:
194
+ event: The key event object.
195
+
196
+ Returns:
197
+ None
198
+ """
145
199
  if event.key == "v":
146
200
  # Filter the main table by the selected value
147
- self._filter_or_highlight_selected_value(
148
- self._get_col_name_value(), action="filter"
149
- )
201
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
150
202
  event.stop()
151
203
  elif event.key == "quotation_mark": # '"'
152
204
  # Highlight the main table by the selected value
153
- self._filter_or_highlight_selected_value(
154
- self._get_col_name_value(), action="highlight"
155
- )
205
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
206
+ event.stop()
207
+ elif event.key == "comma":
156
208
  event.stop()
157
209
 
158
210
  def _get_col_name_value(self) -> tuple[str, Any] | None:
@@ -161,11 +213,126 @@ class RowDetailScreen(TableScreen):
161
213
  return None # Invalid row
162
214
 
163
215
  col_name = self.df.columns[row_idx]
164
- col_value = self.df.item(self.row_idx, row_idx)
216
+ col_value = self.df.item(self.ridx, row_idx)
165
217
 
166
218
  return col_name, col_value
167
219
 
168
220
 
221
+ class StatisticsScreen(TableScreen):
222
+ """Modal screen to display statistics for a column or entire dataframe."""
223
+
224
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
225
+
226
+ def __init__(self, dftable: DataFrameTable, col_idx: int | None = None):
227
+ super().__init__(dftable)
228
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
229
+
230
+ def on_mount(self) -> None:
231
+ """Create the statistics table."""
232
+ self.build_table()
233
+
234
+ def build_table(self) -> None:
235
+ """Build the statistics table."""
236
+ self.table.clear(columns=True)
237
+
238
+ if self.col_idx is None:
239
+ # Dataframe statistics
240
+ self._build_dataframe_stats()
241
+ else:
242
+ # Column statistics
243
+ self._build_column_stats()
244
+
245
+ def _build_column_stats(self) -> None:
246
+ """Build statistics for a single column."""
247
+ col_name = self.df.columns[self.col_idx]
248
+ lf = self.df.lazy()
249
+
250
+ # Apply only to visible rows
251
+ if False in self.dftable.visible_rows:
252
+ lf = lf.filter(self.dftable.visible_rows)
253
+
254
+ # Get column statistics
255
+ stats_df = lf.select(pl.col(col_name)).collect().describe()
256
+ if len(stats_df) == 0:
257
+ return
258
+
259
+ col_dtype = stats_df.dtypes[1] # 'value' column
260
+ dc = DtypeConfig(col_dtype)
261
+
262
+ # Add statistics label column
263
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
264
+
265
+ # Add value column with appropriate styling
266
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
267
+
268
+ # Add rows
269
+ for row in stats_df.rows():
270
+ stat_label, stat_value = row
271
+ value = stat_value
272
+ if stat_value is None:
273
+ value = NULL_DISPLAY
274
+ elif dc.gtype == "int" and self.thousand_separator:
275
+ value = f"{stat_value:,}"
276
+ elif dc.gtype == "float":
277
+ value = f"{stat_value:,.2f}" if self.thousand_separator else f"{stat_value:.2f}"
278
+ else:
279
+ value = str(stat_value)
280
+
281
+ self.table.add_row(
282
+ Text(stat_label, justify="left"),
283
+ Text(value, style=dc.style, justify=dc.justify),
284
+ )
285
+
286
+ def _build_dataframe_stats(self) -> None:
287
+ """Build statistics for the entire dataframe."""
288
+ lf = self.df.lazy()
289
+
290
+ # Apply only to visible rows
291
+ if False in self.dftable.visible_rows:
292
+ lf = lf.filter(self.dftable.visible_rows)
293
+
294
+ # Get dataframe statistics
295
+ stats_df = lf.collect().describe()
296
+
297
+ # Add columns for each dataframe column with appropriate styling
298
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
299
+ if idx == 0:
300
+ # Add statistics label column (first column, no styling)
301
+ self.table.add_column("Statistic", key="statistic")
302
+ continue
303
+
304
+ dc = DtypeConfig(col_dtype)
305
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
306
+
307
+ # Add rows
308
+ for row in stats_df.rows():
309
+ formatted_row = []
310
+
311
+ # Format remaining values with appropriate styling
312
+ for idx, stat_value in enumerate(row):
313
+ # First element is the statistic label
314
+ if idx == 0:
315
+ formatted_row.append(stat_value)
316
+ continue
317
+
318
+ col_dtype = stats_df.dtypes[idx]
319
+ dc = DtypeConfig(col_dtype)
320
+
321
+ value = stat_value
322
+ if stat_value is None:
323
+ value = NULL_DISPLAY
324
+ elif dc.gtype == "int" and self.thousand_separator:
325
+ value = f"{stat_value:,}"
326
+ elif dc.gtype == "float":
327
+ value = f"{stat_value:,.2f}" if self.thousand_separator else f"{stat_value:.2f}"
328
+ else:
329
+ value = str(stat_value)
330
+
331
+ formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
332
+
333
+ self.table.add_row(*formatted_row)
334
+
335
+
169
336
  class FrequencyScreen(TableScreen):
170
337
  """Modal screen to display frequency of values in a column."""
171
338
 
@@ -176,12 +343,9 @@ class FrequencyScreen(TableScreen):
176
343
  self.col_idx = col_idx
177
344
  self.sorted_columns = {
178
345
  1: True, # Count
179
- 2: True, # %
180
346
  }
181
347
  self.df: pl.DataFrame = (
182
- dftable.df[dftable.df.columns[self.col_idx]]
183
- .value_counts(sort=True)
184
- .sort("count", descending=True)
348
+ dftable.df[dftable.df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
185
349
  )
186
350
 
187
351
  def on_mount(self) -> None:
@@ -199,49 +363,70 @@ class FrequencyScreen(TableScreen):
199
363
  event.stop()
200
364
  elif event.key == "v":
201
365
  # Filter the main table by the selected value
202
- self._filter_or_highlight_selected_value(
203
- self._get_col_name_value(), action="filter"
204
- )
366
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
205
367
  event.stop()
206
368
  elif event.key == "quotation_mark": # '"'
207
369
  # Highlight the main table by the selected value
208
- self._filter_or_highlight_selected_value(
209
- self._get_col_name_value(), action="highlight"
210
- )
370
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
211
371
  event.stop()
212
372
 
213
373
  def build_table(self) -> None:
374
+ """Build the frequency table."""
375
+ self.table.clear(columns=True)
376
+
214
377
  # Create frequency table
215
378
  column = self.dftable.df.columns[self.col_idx]
216
- dtype = str(self.dftable.df.dtypes[self.col_idx])
379
+ dtype = self.dftable.df.dtypes[self.col_idx]
217
380
  dc = DtypeConfig(dtype)
218
381
 
219
382
  # Calculate frequencies using Polars
220
383
  total_count = len(self.dftable.df)
221
384
 
222
- self.table.add_column(Text(column, justify=dc.justify), key=column)
223
- self.table.add_column(Text("Count", justify="right"), key="Count")
224
- self.table.add_column(Text("%", justify="right"), key="%")
225
- self.table.add_column(Text("Histogram", justify="left"), key="Histogram")
385
+ # Add column headers with sort indicators
386
+ columns = [
387
+ (column, "Value", 0),
388
+ ("Count", "Count", 1),
389
+ ("%", "%", 2),
390
+ ("Histogram", "Histogram", 3),
391
+ ]
392
+
393
+ for display_name, key, col_idx_num in columns:
394
+ # Check if this column is sorted and add indicator
395
+ if col_idx_num in self.sorted_columns:
396
+ descending = self.sorted_columns[col_idx_num]
397
+ sort_indicator = " ▼" if descending else " ▲"
398
+ header_text = display_name + sort_indicator
399
+ else:
400
+ header_text = display_name
401
+
402
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
403
+ self.table.add_column(Text(header_text, justify=justify), key=key)
226
404
 
227
405
  # Get style config for Int64 and Float64
228
- ds_int = DtypeConfig("Int64")
229
- ds_float = DtypeConfig("Float64")
406
+ ds_int = DtypeConfig(pl.Int64)
407
+ ds_float = DtypeConfig(pl.Float64)
230
408
 
231
409
  # Add rows to the frequency table
232
410
  for row_idx, row in enumerate(self.df.rows()):
233
- value, count = row
411
+ column, count = row
234
412
  percentage = (count / total_count) * 100
235
413
 
414
+ if column is None:
415
+ value = NULL_DISPLAY
416
+ elif dc.gtype == "int" and self.thousand_separator:
417
+ value = f"{column:,}"
418
+ elif dc.gtype == "float":
419
+ value = f"{column:,.2f}" if self.thousand_separator else f"{column:.3f}"
420
+ else:
421
+ value = str(column)
422
+
236
423
  self.table.add_row(
424
+ Text(value, style=dc.style, justify=dc.justify),
237
425
  Text(
238
- "-" if value is None else str(value),
239
- style=dc.style,
240
- justify=dc.justify,
426
+ f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
241
427
  ),
242
- Text(str(count), style=ds_int.style, justify=ds_int.justify),
243
428
  Text(
244
- f"{percentage:.2f}",
429
+ f"{percentage:,.3f}" if self.thousand_separator else f"{percentage:.3f}",
245
430
  style=ds_float.style,
246
431
  justify=ds_float.justify,
247
432
  ),
@@ -266,9 +451,6 @@ class FrequencyScreen(TableScreen):
266
451
 
267
452
  def _sort_by_column(self, descending: bool) -> None:
268
453
  """Sort the dataframe by the selected column and refresh the main table."""
269
-
270
- self.log(self.df)
271
-
272
454
  row_idx, col_idx = self.table.cursor_coordinate
273
455
  col_sort = col_idx if col_idx == 0 else 1
274
456
 
@@ -276,16 +458,14 @@ class FrequencyScreen(TableScreen):
276
458
  if sort_dir is not None:
277
459
  # If already sorted in the same direction, do nothing
278
460
  if sort_dir == descending:
279
- self.notify(
280
- "Already sorted in that order", title="Sort", severity="warning"
281
- )
461
+ self.notify("Already sorted in that order", title="Sort", severity="warning")
282
462
  return
283
463
 
284
464
  self.sorted_columns.clear()
285
465
  self.sorted_columns[col_sort] = descending
286
466
 
287
467
  col_name = self.df.columns[col_sort]
288
- self.df = self.df.sort(col_name, descending=descending)
468
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
289
469
 
290
470
  # Rebuild the frequency table
291
471
  self.table.clear(columns=True)
@@ -293,19 +473,18 @@ class FrequencyScreen(TableScreen):
293
473
 
294
474
  self.table.move_cursor(row=row_idx, column=col_idx)
295
475
 
296
- # Notify the user
297
- order = "desc" if descending else "asc"
298
- self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
476
+ # order = "desc" if descending else "asc"
477
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
299
478
 
300
479
  def _get_col_name_value(self) -> tuple[str, str] | None:
301
480
  row_idx = self.table.cursor_row
302
- if row_idx >= len(self.df.columns):
303
- return None # Skip total row
481
+ if row_idx >= len(self.df[:, 0]): # first column
482
+ return None # Skip the last `Total` row
304
483
 
305
- col_name = self.df.columns[self.col_idx]
306
- col_dtype = self.df.dtypes[self.col_idx]
484
+ col_name = self.dftable.df.columns[self.col_idx]
485
+ col_dtype = self.dftable.df.dtypes[self.col_idx]
307
486
 
308
487
  cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
309
- col_value = cell_value.plain
488
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
310
489
 
311
- return col_name, DtypeConfig(col_dtype).convert(col_value)
490
+ return col_name, col_value