dataframe-textual 1.9.0__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ from textual.renderables.bar import Bar
13
13
  from textual.screen import ModalScreen
14
14
  from textual.widgets import DataTable
15
15
 
16
- from .common import NULL, NULL_DISPLAY, RIDX, DtypeConfig, format_float, format_row
16
+ from .common import NULL, NULL_DISPLAY, RID, DtypeConfig, format_float
17
17
 
18
18
 
19
19
  class TableScreen(ModalScreen):
@@ -45,9 +45,6 @@ class TableScreen(ModalScreen):
45
45
 
46
46
  Args:
47
47
  dftable: Reference to the parent DataFrameTable widget.
48
-
49
- Returns:
50
- None
51
48
  """
52
49
  super().__init__()
53
50
  self.dftable = dftable # DataFrameTable
@@ -71,9 +68,6 @@ class TableScreen(ModalScreen):
71
68
 
72
69
  Subclasses should implement this method to populate the DataTable
73
70
  with appropriate columns and rows based on the specific screen's purpose.
74
-
75
- Returns:
76
- None
77
71
  """
78
72
  raise NotImplementedError("Subclasses must implement build_table method.")
79
73
 
@@ -85,9 +79,6 @@ class TableScreen(ModalScreen):
85
79
 
86
80
  Args:
87
81
  event: The key event object.
88
-
89
- Returns:
90
- None
91
82
  """
92
83
  if event.key in ("q", "escape"):
93
84
  self.app.pop_screen()
@@ -97,60 +88,55 @@ class TableScreen(ModalScreen):
97
88
  self.build_table()
98
89
  event.stop()
99
90
 
100
- def _filter_or_highlight_selected_value(
101
- self, col_name_value: tuple[str, Any] | None, action: str = "filter"
102
- ) -> None:
103
- """Apply filter or highlight action by the selected value.
91
+ def filter_or_view_selected_value(self, cidx_name_value: tuple[int, str, Any] | None, action: str = "view") -> None:
92
+ """Apply filter or view action by the selected value.
104
93
 
105
- Filters or highlights rows in the main table based on a selected value from
94
+ Filters or views rows in the main table based on a selected value from
106
95
  this table (typically frequency or row detail). Updates the main table's display
107
96
  and notifies the user of the action.
108
97
 
109
98
  Args:
110
- col_name_value: Tuple of (column_name, column_value) to filter/highlight by, or None.
111
- action: Either "filter" to hide non-matching rows, or "highlight" to select matching rows. Defaults to "filter".
112
-
113
- Returns:
114
- None
99
+ col_name_value: Tuple of (column_name, column_value) to filter/view by, or None.
100
+ action: Either "filter" to hide non-matching rows, or "view" to show matching rows. Defaults to "view".
115
101
  """
116
- if col_name_value is None:
102
+ if cidx_name_value is None:
117
103
  return
118
- col_name, col_value = col_name_value
104
+ cidx, col_name, col_value = cidx_name_value
105
+ self.log(f"Filtering or viewing by `{col_name} == {col_value}`")
119
106
 
120
107
  # Handle NULL values
121
108
  if col_value == NULL:
122
109
  # Create expression for NULL values
123
110
  expr = pl.col(col_name).is_null()
124
- value_display = "[$success]NULL[/]"
111
+ value_display = f"[$success]{NULL_DISPLAY}[/]"
125
112
  else:
126
113
  # Create expression for the selected value
127
114
  expr = pl.col(col_name) == col_value
128
115
  value_display = f"[$success]{col_value}[/]"
129
116
 
130
- matched_indices = set(self.dftable.df.with_row_index(RIDX).filter(expr)[RIDX].to_list())
117
+ df_filtered = self.dftable.df.lazy().filter(expr).collect()
118
+ self.log(f"Filtered dataframe has {len(df_filtered)} rows")
131
119
 
132
- # Apply the action
120
+ ok_rids = set(df_filtered[RID].to_list())
121
+ if not ok_rids:
122
+ self.notify(
123
+ f"No matches found for [$warning]{col_name}[/] == {value_display}",
124
+ title="No Matches",
125
+ severity="warning",
126
+ )
127
+ return
128
+
129
+ # Action filter
133
130
  if action == "filter":
134
- # Update visible_rows to reflect the filter
135
- for i in range(len(self.dftable.visible_rows)):
136
- self.dftable.visible_rows[i] = i in matched_indices
137
- title = "Filter"
138
- message = f"Filtered by [$accent]{col_name}[/] == [$success]{value_display}[/]"
139
- else: # action == "highlight"
140
- # Update selected_rows to reflect the highlights
141
- for i in range(len(self.dftable.selected_rows)):
142
- self.dftable.selected_rows[i] = i in matched_indices
143
- title = "Highlight"
144
- message = f"Highlighted [$accent]{col_name}[/] == [$success]{value_display}[/]"
145
-
146
- # Recreate the table display with updated data in the main app
147
- self.dftable.setup_table()
131
+ self.dftable.do_filter_rows()
132
+
133
+ # Action view
134
+ else:
135
+ self.dftable.view_rows((expr, cidx, False, True))
148
136
 
149
137
  # Dismiss the frequency screen
150
138
  self.app.pop_screen()
151
139
 
152
- self.notify(message, title=title)
153
-
154
140
 
155
141
  class RowDetailScreen(TableScreen):
156
142
  """Modal screen to display a single row's details."""
@@ -166,9 +152,6 @@ class RowDetailScreen(TableScreen):
166
152
 
167
153
  Populates the table with column names and values from the selected row
168
154
  of the main DataFrame. Sets the table cursor type to "row".
169
-
170
- Returns:
171
- None
172
155
  """
173
156
  self.build_table()
174
157
 
@@ -180,14 +163,14 @@ class RowDetailScreen(TableScreen):
180
163
 
181
164
  # Get all columns and values from the dataframe row
182
165
  for col, val, dtype in zip(self.df.columns, self.df.row(self.ridx), self.df.dtypes):
183
- self.table.add_row(
184
- *format_row(
185
- [col, val],
186
- [None, dtype],
187
- apply_justify=False,
188
- thousand_separator=self.thousand_separator,
189
- )
190
- )
166
+ if col in self.dftable.hidden_columns or col == RID:
167
+ continue # Skip RID column
168
+ formatted_row = []
169
+ formatted_row.append(col)
170
+
171
+ dc = DtypeConfig(dtype)
172
+ formatted_row.append(dc.format(val, justify="", thousand_separator=self.thousand_separator))
173
+ self.table.add_row(*formatted_row)
191
174
 
192
175
  self.table.cursor_type = "row"
193
176
 
@@ -199,30 +182,41 @@ class RowDetailScreen(TableScreen):
199
182
 
200
183
  Args:
201
184
  event: The key event object.
202
-
203
- Returns:
204
- None
205
185
  """
206
186
  if event.key == "v":
207
- # Filter the main table by the selected value
208
- self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
187
+ # View the main table by the selected value
188
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="view")
209
189
  event.stop()
210
190
  elif event.key == "quotation_mark": # '"'
211
- # Highlight the main table by the selected value
212
- self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
191
+ # Filter the main table by the selected value
192
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="filter")
213
193
  event.stop()
214
- elif event.key == "comma":
194
+ elif event.key == "right_curly_bracket": # '}'
195
+ # Move to the next row
196
+ ridx = self.ridx + 1
197
+ if ridx < len(self.df):
198
+ self.ridx = ridx
199
+ self.dftable.move_cursor_to(self.ridx)
200
+ self.build_table()
201
+ event.stop()
202
+ elif event.key == "left_curly_bracket": # '{'
203
+ # Move to the previous row
204
+ ridx = self.ridx - 1
205
+ if ridx >= 0:
206
+ self.ridx = ridx
207
+ self.dftable.move_cursor_to(self.ridx)
208
+ self.build_table()
215
209
  event.stop()
216
210
 
217
- def _get_col_name_value(self) -> tuple[str, Any] | None:
218
- row_idx = self.table.cursor_row
219
- if row_idx >= len(self.df.columns):
211
+ def get_cidx_name_value(self) -> tuple[int, str, Any] | None:
212
+ cidx = self.table.cursor_row
213
+ if cidx >= len(self.df.columns):
220
214
  return None # Invalid row
221
215
 
222
- col_name = self.df.columns[row_idx]
223
- col_value = self.df.item(self.ridx, row_idx)
216
+ col_name = self.df.columns[cidx]
217
+ col_value = self.df.item(self.ridx, cidx)
224
218
 
225
- return col_name, col_value
219
+ return cidx, col_name, col_value
226
220
 
227
221
 
228
222
  class StatisticsScreen(TableScreen):
@@ -244,24 +238,20 @@ class StatisticsScreen(TableScreen):
244
238
 
245
239
  if self.col_idx is None:
246
240
  # Dataframe statistics
247
- self._build_dataframe_stats()
241
+ self.build_dataframe_stats()
248
242
  self.table.cursor_type = "column"
249
243
  else:
250
244
  # Column statistics
251
- self._build_column_stats()
245
+ self.build_column_stats()
252
246
  self.table.cursor_type = "row"
253
247
 
254
- def _build_column_stats(self) -> None:
248
+ def build_column_stats(self) -> None:
255
249
  """Build statistics for a single column."""
256
250
  col_name = self.df.columns[self.col_idx]
257
251
  lf = self.df.lazy()
258
252
 
259
- # Apply only to visible rows
260
- if False in self.dftable.visible_rows:
261
- lf = lf.filter(self.dftable.visible_rows)
262
-
263
253
  # Get column statistics
264
- stats_df = lf.select(pl.col(col_name)).collect().describe()
254
+ stats_df = lf.select(pl.col(col_name)).describe()
265
255
  if len(stats_df) == 0:
266
256
  return
267
257
 
@@ -277,35 +267,21 @@ class StatisticsScreen(TableScreen):
277
267
  # Add rows
278
268
  for row in stats_df.rows():
279
269
  stat_label, stat_value = row
280
- value = stat_value
281
- if stat_value is None:
282
- value = NULL_DISPLAY
283
- elif dc.gtype == "integer" and self.thousand_separator:
284
- value = f"{stat_value:,}"
285
- elif dc.gtype == "float":
286
- value = format_float(stat_value, self.thousand_separator)
287
- else:
288
- value = str(stat_value)
289
-
290
270
  self.table.add_row(
291
- Text(stat_label, justify="left"),
292
- Text(value, style=dc.style, justify=dc.justify),
271
+ stat_label,
272
+ dc.format(stat_value, thousand_separator=self.thousand_separator),
293
273
  )
294
274
 
295
- def _build_dataframe_stats(self) -> None:
275
+ def build_dataframe_stats(self) -> None:
296
276
  """Build statistics for the entire dataframe."""
297
- lf = self.df.lazy()
298
-
299
- # Apply only to visible rows
300
- if False in self.dftable.visible_rows:
301
- lf = lf.filter(self.dftable.visible_rows)
277
+ lf = self.df.lazy().select(pl.exclude(RID))
302
278
 
303
279
  # Apply only to non-hidden columns
304
280
  if self.dftable.hidden_columns:
305
281
  lf = lf.select(pl.exclude(self.dftable.hidden_columns))
306
282
 
307
283
  # Get dataframe statistics
308
- stats_df = lf.collect().describe()
284
+ stats_df = lf.describe()
309
285
 
310
286
  # Add columns for each dataframe column with appropriate styling
311
287
  for idx, (col_name, col_dtype) in enumerate(zip(stats_df.columns, stats_df.dtypes), 0):
@@ -331,17 +307,7 @@ class StatisticsScreen(TableScreen):
331
307
  col_dtype = stats_df.dtypes[idx]
332
308
  dc = DtypeConfig(col_dtype)
333
309
 
334
- value = stat_value
335
- if stat_value is None:
336
- value = NULL_DISPLAY
337
- elif dc.gtype == "integer" and self.thousand_separator:
338
- value = f"{stat_value:,}"
339
- elif dc.gtype == "float":
340
- value = format_float(stat_value, self.thousand_separator)
341
- else:
342
- value = str(stat_value)
343
-
344
- formatted_row.append(Text(value, style=dc.style, justify=dc.justify))
310
+ formatted_row.append(dc.format(stat_value, thousand_separator=self.thousand_separator))
345
311
 
346
312
  self.table.add_row(*formatted_row)
347
313
 
@@ -351,16 +317,14 @@ class FrequencyScreen(TableScreen):
351
317
 
352
318
  CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "FrequencyScreen")
353
319
 
354
- def __init__(self, col_idx: int, dftable: "DataFrameTable") -> None:
320
+ def __init__(self, cidx: int, dftable: "DataFrameTable") -> None:
355
321
  super().__init__(dftable)
356
- self.col_idx = col_idx
357
- self.sorted_columns = {
358
- 1: True, # Count
359
- }
322
+ self.cidx = cidx
323
+ self.sorted_columns = {1: True} # Count sort by default
324
+ self.total_count = len(dftable.df)
360
325
 
361
- df = dftable.df.filter(dftable.visible_rows) if False in dftable.visible_rows else dftable.df
362
- self.total_count = len(df)
363
- self.df: pl.DataFrame = df[df.columns[self.col_idx]].value_counts(sort=True).sort("count", descending=True)
326
+ col = dftable.df.columns[self.cidx]
327
+ self.df: pl.DataFrame = dftable.df.lazy().select(pl.col(col).value_counts(sort=True)).unnest(col).collect()
364
328
 
365
329
  def on_mount(self) -> None:
366
330
  """Create the frequency table."""
@@ -369,19 +333,19 @@ class FrequencyScreen(TableScreen):
369
333
  def on_key(self, event):
370
334
  if event.key == "left_square_bracket": # '['
371
335
  # Sort by current column in ascending order
372
- self._sort_by_column(descending=False)
336
+ self.sort_by_column(descending=False)
373
337
  event.stop()
374
338
  elif event.key == "right_square_bracket": # ']'
375
339
  # Sort by current column in descending order
376
- self._sort_by_column(descending=True)
340
+ self.sort_by_column(descending=True)
377
341
  event.stop()
378
342
  elif event.key == "v":
379
343
  # Filter the main table by the selected value
380
- self._filter_or_highlight_selected_value(self._get_col_name_value(), action="filter")
344
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="view")
381
345
  event.stop()
382
346
  elif event.key == "quotation_mark": # '"'
383
347
  # Highlight the main table by the selected value
384
- self._filter_or_highlight_selected_value(self._get_col_name_value(), action="highlight")
348
+ self.filter_or_view_selected_value(self.get_cidx_name_value(), action="filter")
385
349
  event.stop()
386
350
 
387
351
  def build_table(self) -> None:
@@ -389,8 +353,8 @@ class FrequencyScreen(TableScreen):
389
353
  self.table.clear(columns=True)
390
354
 
391
355
  # Create frequency table
392
- column = self.dftable.df.columns[self.col_idx]
393
- dtype = self.dftable.df.dtypes[self.col_idx]
356
+ column = self.dftable.df.columns[self.cidx]
357
+ dtype = self.dftable.df.dtypes[self.cidx]
394
358
  dc = DtypeConfig(dtype)
395
359
 
396
360
  # Add column headers with sort indicators
@@ -414,33 +378,18 @@ class FrequencyScreen(TableScreen):
414
378
  self.table.add_column(Text(header_text, justify=justify), key=key)
415
379
 
416
380
  # Get style config for Int64 and Float64
417
- ds_int = DtypeConfig(pl.Int64)
418
- ds_float = DtypeConfig(pl.Float64)
381
+ dc_int = DtypeConfig(pl.Int64)
382
+ dc_float = DtypeConfig(pl.Float64)
419
383
 
420
384
  # Add rows to the frequency table
421
385
  for row_idx, row in enumerate(self.df.rows()):
422
386
  column, count = row
423
387
  percentage = (count / self.total_count) * 100
424
388
 
425
- if column is None:
426
- value = NULL_DISPLAY
427
- elif dc.gtype == "integer" and self.thousand_separator:
428
- value = f"{column:,}"
429
- elif dc.gtype == "float":
430
- value = format_float(column, self.thousand_separator)
431
- else:
432
- value = str(column)
433
-
434
389
  self.table.add_row(
435
- Text(value, style=dc.style, justify=dc.justify),
436
- Text(
437
- f"{count:,}" if self.thousand_separator else str(count), style=ds_int.style, justify=ds_int.justify
438
- ),
439
- Text(
440
- format_float(percentage, self.thousand_separator),
441
- style=ds_float.style,
442
- justify=ds_float.justify,
443
- ),
390
+ dc.format(column),
391
+ dc_int.format(count, thousand_separator=self.thousand_separator),
392
+ dc_float.format(percentage, thousand_separator=self.thousand_separator),
444
393
  Bar(
445
394
  highlight_range=(0.0, percentage / 100 * 10),
446
395
  width=10,
@@ -457,7 +406,7 @@ class FrequencyScreen(TableScreen):
457
406
  justify="right",
458
407
  ),
459
408
  Text(
460
- format_float(100.0, self.thousand_separator),
409
+ format_float(100.0, self.thousand_separator, precision=-2 if len(self.df) > 1 else 2),
461
410
  style="bold",
462
411
  justify="right",
463
412
  ),
@@ -468,7 +417,7 @@ class FrequencyScreen(TableScreen):
468
417
  key="total",
469
418
  )
470
419
 
471
- def _sort_by_column(self, descending: bool) -> None:
420
+ def sort_by_column(self, descending: bool) -> None:
472
421
  """Sort the dataframe by the selected column and refresh the main table."""
473
422
  row_idx, col_idx = self.table.cursor_coordinate
474
423
  col_sort = col_idx if col_idx == 0 else 1
@@ -493,15 +442,86 @@ class FrequencyScreen(TableScreen):
493
442
  # order = "desc" if descending else "asc"
494
443
  # self.notify(f"Sorted by [on $primary]{col_name}[/] ({order})", title="Sort")
495
444
 
496
- def _get_col_name_value(self) -> tuple[str, str] | None:
445
+ def get_cidx_name_value(self) -> tuple[str, str, str] | None:
497
446
  row_idx = self.table.cursor_row
498
447
  if row_idx >= len(self.df[:, 0]): # first column
499
448
  return None # Skip the last `Total` row
500
449
 
501
- col_name = self.dftable.df.columns[self.col_idx]
502
- col_dtype = self.dftable.df.dtypes[self.col_idx]
450
+ col_name = self.dftable.df.columns[self.cidx]
451
+ col_dtype = self.dftable.df.dtypes[self.cidx]
503
452
 
504
453
  cell_value = self.table.get_cell_at(Coordinate(row_idx, 0))
505
454
  col_value = NULL if cell_value.plain == NULL_DISPLAY else DtypeConfig(col_dtype).convert(cell_value.plain)
506
455
 
507
- return col_name, col_value
456
+ return self.cidx, col_name, col_value
457
+
458
+
459
+ class MetaShape(TableScreen):
460
+ """Modal screen to display metadata about the dataframe."""
461
+
462
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "MetadataScreen")
463
+
464
+ def on_mount(self) -> None:
465
+ """Initialize the metadata screen.
466
+
467
+ Populates the table with metadata information about the dataframe,
468
+ including row and column counts.
469
+ """
470
+ self.build_table()
471
+
472
+ def build_table(self) -> None:
473
+ """Build the metadata table."""
474
+ self.table.clear(columns=True)
475
+ self.table.add_column("")
476
+ self.table.add_column(Text("Count", justify="right"))
477
+
478
+ # Get shape information
479
+ num_rows, num_cols = self.df.shape if self.dftable.df_view is None else self.dftable.df_view.shape
480
+ num_cols -= 1 # Exclude RID column
481
+ dc_int = DtypeConfig(pl.Int64)
482
+
483
+ # Add rows to the table
484
+ self.table.add_row("Row", dc_int.format(num_rows, thousand_separator=self.thousand_separator))
485
+ self.table.add_row("Column", dc_int.format(num_cols, thousand_separator=self.thousand_separator))
486
+
487
+ self.table.cursor_type = "none"
488
+
489
+
490
+ class MetaColumnScreen(TableScreen):
491
+ """Modal screen to display metadata about the columns in the dataframe."""
492
+
493
+ CSS = TableScreen.DEFAULT_CSS.replace("TableScreen", "MetaColumnScreen")
494
+
495
+ def on_mount(self) -> None:
496
+ """Initialize the column metadata screen.
497
+
498
+ Populates the table with information about each column in the dataframe,
499
+ including ID (1-based index), Name, and Type.
500
+ """
501
+ self.build_table()
502
+
503
+ def build_table(self) -> None:
504
+ """Build the column metadata table."""
505
+ self.table.clear(columns=True)
506
+ self.table.add_column("Column")
507
+ self.table.add_column("Name")
508
+ self.table.add_column("Type")
509
+
510
+ # Get schema information
511
+ schema = self.df.schema
512
+ dc_int = DtypeConfig(pl.Int64)
513
+ dc_str = DtypeConfig(pl.String)
514
+
515
+ # Add a row for each column
516
+ for idx, (col_name, col_type) in enumerate(schema.items(), 1):
517
+ if col_name == RID:
518
+ continue # Skip RID column
519
+
520
+ dc = DtypeConfig(col_type)
521
+ self.table.add_row(
522
+ dc_int.format(idx, thousand_separator=self.thousand_separator),
523
+ col_name,
524
+ dc_str.format("Datetime" if str(col_type).startswith("Datetime") else col_type, style=dc.style),
525
+ )
526
+
527
+ self.table.cursor_type = "none"