dataframe-textual 0.3.2__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ from textual.renderables.bar import Bar
13
13
  from textual.screen import ModalScreen
14
14
  from textual.widgets import DataTable
15
15
 
16
- from .common import DtypeConfig, _format_row
16
+ from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_float, format_row
17
17
 
18
18
 
19
19
  class TableScreen(ModalScreen):
@@ -30,69 +30,104 @@ class TableScreen(ModalScreen):
30
30
 
31
31
  TableScreen > DataTable {
32
32
  width: auto;
33
- min-width: 30;
34
33
  height: auto;
35
34
  border: solid $primary;
35
+ max-width: 100%;
36
+ overflow: auto;
36
37
  }
37
38
  """
38
39
 
39
- def __init__(self, dftable: DataFrameTable):
40
+ def __init__(self, dftable: DataFrameTable) -> None:
41
+ """Initialize the table screen.
42
+
43
+ Sets up the base modal screen with reference to the main DataFrameTable widget
44
+ and stores the DataFrame for display.
45
+
46
+ Args:
47
+ dftable: Reference to the parent DataFrameTable widget.
48
+
49
+ Returns:
50
+ None
51
+ """
40
52
  super().__init__()
41
53
  self.df: pl.DataFrame = dftable.df # Polars DataFrame
42
54
  self.dftable = dftable # DataFrameTable
55
+ self.thousand_separator = False # Whether to use thousand separators in numbers
43
56
 
44
57
  def compose(self) -> ComposeResult:
45
- """Create the table. Must be overridden by subclasses."""
58
+ """Compose the table screen widget structure.
59
+
60
+ Creates and yields a DataTable widget for displaying tabular data.
61
+ Subclasses should override to customize table configuration.
62
+
63
+ Yields:
64
+ DataTable: The table widget for this screen.
65
+ """
46
66
  self.table = DataTable(zebra_stripes=True)
47
67
  yield self.table
48
68
 
49
- def on_key(self, event):
69
+ def build_table(self) -> None:
70
+ """Build the table content.
71
+
72
+ Subclasses should implement this method to populate the DataTable
73
+ with appropriate columns and rows based on the specific screen's purpose.
74
+
75
+ Returns:
76
+ None
77
+ """
78
+ raise NotImplementedError("Subclasses must implement build_table method.")
79
+
80
+ def on_key(self, event) -> None:
81
+ """Handle key press events in the table screen.
82
+
83
+ Provides keyboard shortcuts for navigation and interaction, including q/Escape to close.
84
+ Prevents propagation of non-navigation keys to parent screens.
85
+
86
+ Args:
87
+ event: The key event object.
88
+
89
+ Returns:
90
+ None
91
+ """
50
92
  if event.key in ("q", "escape"):
51
93
  self.app.pop_screen()
52
94
  event.stop()
53
- # Prevent key events from propagating to parent screen,
54
- # except for the following default key bindings for DataTable
55
- elif event.key not in (
56
- "up",
57
- "down",
58
- "right",
59
- "left",
60
- "pageup",
61
- "pagedown",
62
- "ctrl+home",
63
- "ctrl+end",
64
- "home",
65
- "end",
66
- ):
95
+ elif event.key == "comma":
96
+ self.thousand_separator = not self.thousand_separator
97
+ self.build_table()
67
98
  event.stop()
68
99
 
69
100
  def _filter_or_highlight_selected_value(
70
- self, col_name_value: tuple[str, str] | None, action: str = "filter"
101
+ self, col_name_value: tuple[str, Any] | None, action: str = "filter"
71
102
  ) -> None:
72
- """Apply filter or highlight action by the selected value from the frequency table.
103
+ """Apply filter or highlight action by the selected value.
104
+
105
+ Filters or highlights rows in the main table based on a selected value from
106
+ this table (typically frequency or row detail). Updates the main table's display
107
+ and notifies the user of the action.
73
108
 
74
109
  Args:
75
- col_name: The name of the column to filter/highlight.
76
- col_value: The value to filter/highlight by.
77
- action: Either "filter" to filter visible rows, or "highlight" to select matching rows.
110
+ col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
111
+ action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
112
+
113
+ Returns:
114
+ None
78
115
  """
79
116
  if col_name_value is None:
80
117
  return
81
118
  col_name, col_value = col_name_value
82
119
 
83
120
  # Handle NULL values
84
- if col_value == "-":
121
+ if col_value == NULL:
85
122
  # Create expression for NULL values
86
123
  expr = pl.col(col_name).is_null()
87
- value_display = "[on $primary]NULL[/]"
124
+ value_display = "[$success]NULL[/]"
88
125
  else:
89
126
  # Create expression for the selected value
90
127
  expr = pl.col(col_name) == col_value
91
- value_display = f"[on $primary]{col_value}[/]"
128
+ value_display = f"[$success]{col_value}[/]"
92
129
 
93
- matched_indices = set(
94
- self.dftable.df.with_row_index("__rid__").filter(expr)["__rid__"].to_list()
95
- )
130
+ matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
96
131
 
97
132
  # Apply the action
98
133
  if action == "filter":
@@ -100,13 +135,13 @@ class TableScreen(ModalScreen):
100
135
  for i in range(len(self.dftable.visible_rows)):
101
136
  self.dftable.visible_rows[i] = i in matched_indices
102
137
  title = "Filter"
103
- message = f"Filtered by [on $primary]{col_name}[/] = {value_display}"
138
+ message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
104
139
  else: # action == "highlight"
105
140
  # Update selected_rows to reflect the highlights
106
141
  for i in range(len(self.dftable.selected_rows)):
107
142
  self.dftable.selected_rows[i] = i in matched_indices
108
143
  title = "Highlight"
109
- message = f"Highlighted [on $primary]{col_name}[/] = {value_display}"
144
+ message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
110
145
 
111
146
  # Recreate the table display with updated data in the main app
112
147
  self.dftable._setup_table()
@@ -122,37 +157,56 @@ class RowDetailScreen(TableScreen):
122
157
 
123
158
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "RowDetailScreen")
124
159
 
125
- def __init__(self, row_idx: int, dftable):
160
+ def __init__(self, ridx: int, dftable):
126
161
  super().__init__(dftable)
127
- self.row_idx = row_idx
162
+ self.ridx = ridx
128
163
 
129
164
  def on_mount(self) -> None:
130
- """Create the detail table."""
165
+ """Initialize the row detail screen.
166
+
167
+ Populates the table with column names and values from the selected row
168
+ of the main DataFrame. Sets the table cursor type to "row".
169
+
170
+ Returns:
171
+ None
172
+ """
173
+ self.build_table()
174
+
175
+ def build_table(self) -> None:
176
+ """Build the row detail table."""
177
+ self.table.clear(columns=True)
131
178
  self.table.add_column("Column")
132
179
  self.table.add_column("Value")
133
180
 
134
181
  # Get all columns and values from the dataframe row
135
- for col, val, dtype in zip(
136
- self.df.columns, self.df.row(self.row_idx), self.df.dtypes
137
- ):
182
+ for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
138
183
  self.table.add_row(
139
- *_format_row([col, val], [None, dtype], apply_justify=False)
184
+ *format_row([col, val], [None, dtype], apply_justify=False, thousand_separator=self.thousand_separator)
140
185
  )
141
186
 
142
187
  self.table.cursor_type = "row"
143
188
 
144
- def on_key(self, event):
189
+ def on_key(self, event) -> None:
190
+ """Handle key press events in the row detail screen.
191
+
192
+ Supports 'v' for filtering and '"' for highlighting the main table
193
+ by the value in the selected row.
194
+
195
+ Args:
196
+ event: The key event object.
197
+
198
+ Returns:
199
+ None
200
+ """
145
201
  if event.key == "v":
146
202
  # Filter the main table by the selected value
147
- self._filter_or_highlight_selected_value(
148
- self._get_col_name_value(), action="filter"
149
- )
203
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
150
204
  event.stop()
151
205
  elif event.key == "quotation_mark": # '"'
152
206
  # Highlight the main table by the selected value
153
- self._filter_or_highlight_selected_value(
154
- self._get_col_name_value(), action="highlight"
155
- )
207
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
208
+ event.stop()
209
+ elif event.key == "comma":
156
210
  event.stop()
157
211
 
158
212
  def _get_col_name_value(self) -> tuple[str, Any] | None:
@@ -161,11 +215,130 @@ class RowDetailScreen(TableScreen):
161
215
  return None # Invalid row
162
216
 
163
217
  col_name = self.df.columns[row_idx]
164
- col_value = self.df.item(self.row_idx, row_idx)
218
+ col_value = self.df.item(self.ridx, row_idx)
165
219
 
166
220
  return col_name, col_value
167
221
 
168
222
 
223
+ class StatisticsScreen(TableScreen):
224
+ """Modal screen to display statistics for a column or entire dataframe."""
225
+
226
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "StatisticsScreen")
227
+
228
+ def __init__(self, dftable: DataFrameTable, col_idx: int | None = None):
229
+ super().__init__(dftable)
230
+ self.col_idx = col_idx # None for dataframe statistics, otherwise column index
231
+
232
+ def on_mount(self) -> None:
233
+ """Create the statistics table."""
234
+ self.build_table()
235
+
236
+ def build_table(self) -> None:
237
+ """Build the statistics table."""
238
+ self.table.clear(columns=True)
239
+
240
+ if self.col_idx is None:
241
+ # Dataframe statistics
242
+ self._build_dataframe_stats()
243
+ else:
244
+ # Column statistics
245
+ self._build_column_stats()
246
+
247
+ def _build_column_stats(self) -> None:
248
+ """Build statistics for a single column."""
249
+ col_name = self.df.columns[self.col_idx]
250
+ lf = self.df.lazy()
251
+
252
+ # Apply only to visible rows
253
+ if False in self.dftable.visible_rows:
254
+ lf = lf.filter(self.dftable.visible_rows)
255
+
256
+ # Get column statistics
257
+ stats_df = lf.select(pl.col(col_name)).collect().describe()
258
+ if len(stats_df) == 0:
259
+ return
260
+
261
+ col_dtype = stats_df.dtypes[1] # 'value' column
262
+ dc = DtypeConfig(col_dtype)
263
+
264
+ # Add statistics label column
265
+ self.table.add_column(Text("Statistic", justify="left"), key="statistic")
266
+
267
+ # Add value column with appropriate styling
268
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
269
+
270
+ # Add rows
271
+ for row in stats_df.rows():
272
+ stat_label, stat_value = row
273
+ value = stat_value
274
+ if stat_value is None:
275
+ value = NULL_DISPLAY
276
+ elif dc.gtype == "integer" and self.thousand_separator:
277
+ value = f"{stat_value:,}"
278
+ elif dc.gtype == "float":
279
+ value = format_float(stat_value, self.thousand_separator)
280
+ else:
281
+ value = str(stat_value)
282
+
283
+ self.table.add_row(
284
+ Text(stat_label, justify="left"),
285
+ Text(value, style=dc.style, justify=dc.justify),
286
+ )
287
+
288
+ def _build_dataframe_stats(self) -> None:
289
+ """Build statistics for the entire dataframe."""
290
+ lf = self.df.lazy()
291
+
292
+ # Apply only to visible rows
293
+ if False in self.dftable.visible_rows:
294
+ lf = lf.filter(self.dftable.visible_rows)
295
+
296
+ # Apply only to non-hidden columns
297
+ if self.dftable.hidden_columns:
298
+ lf = lf.select(pl.exclude(self.dftable.hidden_columns))
299
+
300
+ # Get dataframe statistics
301
+ stats_df = lf.collect().describe()
302
+
303
+ # Add columns for each dataframe column with appropriate styling
304
+ for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
305
+ if idx == 0:
306
+ # Add statistics label column (first column, no styling)
307
+ self.table.add_column("Statistic", key="statistic")
308
+ continue
309
+
310
+ dc = DtypeConfig(col_dtype)
311
+ self.table.add_column(Text(col_name, justify=dc.justify), key=col_name)
312
+
313
+ # Add rows
314
+ for row in stats_df.rows():
315
+ formatted_row = []
316
+
317
+ # Format remaining values with appropriate styling
318
+ for idx, stat_value in enumerate(row):
319
+ # First element is the statistic label
320
+ if idx == 0:
321
+ formatted_row.append(stat_value)
322
+ continue
323
+
324
+ col_dtype = stats_df.dtypes[idx]
325
+ dc = DtypeConfig(col_dtype)
326
+
327
+ value = stat_value
328
+ if stat_value is None:
329
+ value = NULL_DISPLAY
330
+ elif dc.gtype == "integer" and self.thousand_separator:
331
+ value = f"{stat_value:,}"
332
+ elif dc.gtype == "float":
333
+ value = format_float(stat_value, self.thousand_separator)
334
+ else:
335
+ value = str(stat_value)
336
+
337
+ formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
338
+
339
+ self.table.add_row(*formatted_row)
340
+
341
+
169
342
  class FrequencyScreen(TableScreen):
170
343
  """Modal screen to display frequency of values in a column."""
171
344
 
@@ -176,12 +349,9 @@ class FrequencyScreen(TableScreen):
176
349
  self.col_idx = col_idx
177
350
  self.sorted_columns = {
178
351
  1: True, # Count
179
- 2: True, # %
180
352
  }
181
353
  self.df: pl.DataFrame = (
182
- dftable.df[dftable.df.columns[self.col_idx]]
183
- .value_counts(sort=True)
184
- .sort("count", descending=True)
354
+ dftable.df[dftable.df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
185
355
  )
186
356
 
187
357
  def on_mount(self) -> None:
@@ -199,49 +369,70 @@ class FrequencyScreen(TableScreen):
199
369
  event.stop()
200
370
  elif event.key == "v":
201
371
  # Filter the main table by the selected value
202
- self._filter_or_highlight_selected_value(
203
- self._get_col_name_value(), action="filter"
204
- )
372
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
205
373
  event.stop()
206
374
  elif event.key == "quotation_mark": # '"'
207
375
  # Highlight the main table by the selected value
208
- self._filter_or_highlight_selected_value(
209
- self._get_col_name_value(), action="highlight"
210
- )
376
+ self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
211
377
  event.stop()
212
378
 
213
379
  def build_table(self) -> None:
380
+ """Build the frequency table."""
381
+ self.table.clear(columns=True)
382
+
214
383
  # Create frequency table
215
384
  column = self.dftable.df.columns[self.col_idx]
216
- dtype = str(self.dftable.df.dtypes[self.col_idx])
385
+ dtype = self.dftable.df.dtypes[self.col_idx]
217
386
  dc = DtypeConfig(dtype)
218
387
 
219
388
  # Calculate frequencies using Polars
220
389
  total_count = len(self.dftable.df)
221
390
 
222
- self.table.add_column(Text(column, justify=dc.justify), key=column)
223
- self.table.add_column(Text("Count", justify="right"), key="Count")
224
- self.table.add_column(Text("%", justify="right"), key="%")
225
- self.table.add_column(Text("Histogram", justify="left"), key="Histogram")
391
+ # Add column headers with sort indicators
392
+ columns = [
393
+ (column, "Value", 0),
394
+ ("Count", "Count", 1),
395
+ ("%", "%", 2),
396
+ ("Histogram", "Histogram", 3),
397
+ ]
398
+
399
+ for display_name, key, col_idx_num in columns:
400
+ # Check if this column is sorted and add indicator
401
+ if col_idx_num in self.sorted_columns:
402
+ descending = self.sorted_columns[col_idx_num]
403
+ sort_indicator = " ▼" if descending else " ▲"
404
+ header_text = display_name + sort_indicator
405
+ else:
406
+ header_text = display_name
407
+
408
+ justify = dc.justify if col_idx_num == 0 else ("right" if col_idx_num in (1, 2) else "left")
409
+ self.table.add_column(Text(header_text, justify=justify), key=key)
226
410
 
227
411
  # Get style config for Int64 and Float64
228
- ds_int = DtypeConfig("Int64")
229
- ds_float = DtypeConfig("Float64")
412
+ ds_int = DtypeConfig(pl.Int64)
413
+ ds_float = DtypeConfig(pl.Float64)
230
414
 
231
415
  # Add rows to the frequency table
232
416
  for row_idx, row in enumerate(self.df.rows()):
233
- value, count = row
417
+ column, count = row
234
418
  percentage = (count / total_count) * 100
235
419
 
420
+ if column is None:
421
+ value = NULL_DISPLAY
422
+ elif dc.gtype == "integer" and self.thousand_separator:
423
+ value = f"{column:,}"
424
+ elif dc.gtype == "float":
425
+ value = format_float(column, self.thousand_separator)
426
+ else:
427
+ value = str(column)
428
+
236
429
  self.table.add_row(
430
+ Text(value, style=dc.style, justify=dc.justify),
237
431
  Text(
238
- "-" if value is None else str(value),
239
- style=dc.style,
240
- justify=dc.justify,
432
+ f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
241
433
  ),
242
- Text(str(count), style=ds_int.style, justify=ds_int.justify),
243
434
  Text(
244
- f"{percentage:.2f}",
435
+ f"{percentage:,.3f}" if self.thousand_separator else f"{percentage:.3f}",
245
436
  style=ds_float.style,
246
437
  justify=ds_float.justify,
247
438
  ),
@@ -266,9 +457,6 @@ class FrequencyScreen(TableScreen):
266
457
 
267
458
  def _sort_by_column(self, descending: bool) -> None:
268
459
  """Sort the dataframe by the selected column and refresh the main table."""
269
-
270
- self.log(self.df)
271
-
272
460
  row_idx, col_idx = self.table.cursor_coordinate
273
461
  col_sort = col_idx if col_idx == 0 else 1
274
462
 
@@ -276,16 +464,14 @@ class FrequencyScreen(TableScreen):
276
464
  if sort_dir is not None:
277
465
  # If already sorted in the same direction, do nothing
278
466
  if sort_dir == descending:
279
- self.notify(
280
- "Already sorted in that order", title="Sort", severity="warning"
281
- )
467
+ self.notify("Already sorted in that order", title="Sort", severity="warning")
282
468
  return
283
469
 
284
470
  self.sorted_columns.clear()
285
471
  self.sorted_columns[col_sort] = descending
286
472
 
287
473
  col_name = self.df.columns[col_sort]
288
- self.df = self.df.sort(col_name, descending=descending)
474
+ self.df = self.df.sort(col_name, descending=descending, nulls_last=True)
289
475
 
290
476
  # Rebuild the frequency table
291
477
  self.table.clear(columns=True)
@@ -293,19 +479,18 @@ class FrequencyScreen(TableScreen):
293
479
 
294
480
  self.table.move_cursor(row=row_idx, column=col_idx)
295
481
 
296
- # Notify the user
297
- order = "desc" if descending else "asc"
298
- self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
482
+ # order = "desc" if descending else "asc"
483
+ # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
299
484
 
300
485
  def _get_col_name_value(self) -> tuple[str, str] | None:
301
486
  row_idx = self.table.cursor_row
302
- if row_idx >= len(self.df.columns):
303
- return None # Skip total row
487
+ if row_idx >= len(self.df[:, 0]): # first column
488
+ return None # Skip the last `Total` row
304
489
 
305
- col_name = self.df.columns[self.col_idx]
306
- col_dtype = self.df.dtypes[self.col_idx]
490
+ col_name = self.dftable.df.columns[self.col_idx]
491
+ col_dtype = self.dftable.df.dtypes[self.col_idx]
307
492
 
308
493
  cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
309
- col_value = cell_value.plain
494
+ col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
310
495
 
311
- return col_name, DtypeConfig(col_dtype).convert(col_value)
496
+ return col_name, col_value