dataframe-textual 1.12.0__py3-none-any.whl → 1.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,13 +3,15 @@
3
3
  import sys
4
4
  from collections import defaultdict, deque
5
5
  from dataclasses import dataclass
6
+ from itertools import zip_longest
6
7
  from pathlib import Path
7
8
  from textwrap import dedent
8
9
  from typing import Any
9
10
 
10
11
  import polars as pl
11
- from rich.text import Text
12
+ from rich.text import Text, TextType
12
13
  from textual import work
14
+ from textual._two_way_dict import TwoWayDict
13
15
  from textual.coordinate import Coordinate
14
16
  from textual.events import Click
15
17
  from textual.reactive import reactive
@@ -18,8 +20,11 @@ from textual.widgets import DataTable, TabPane
18
20
  from textual.widgets._data_table import (
19
21
  CellDoesNotExist,
20
22
  CellKey,
23
+ CellType,
21
24
  ColumnKey,
22
25
  CursorType,
26
+ DuplicateKey,
27
+ Row,
23
28
  RowKey,
24
29
  )
25
30
 
@@ -34,12 +39,13 @@ from .common import (
34
39
  format_row,
35
40
  get_next_item,
36
41
  parse_placeholders,
42
+ round_to_nearest_hundreds,
37
43
  sleep_async,
38
44
  tentative_expr,
39
45
  validate_expr,
40
46
  )
41
47
  from .sql_screen import AdvancedSqlScreen, SimpleSqlScreen
42
- from .table_screen import FrequencyScreen, RowDetailScreen, StatisticsScreen
48
+ from .table_screen import FrequencyScreen, MetaColumnScreen, MetaShape, RowDetailScreen, StatisticsScreen
43
49
  from .yes_no_screen import (
44
50
  AddColumnScreen,
45
51
  AddLinkScreen,
@@ -57,6 +63,9 @@ from .yes_no_screen import (
57
63
  # Color for highlighting selections and matches
58
64
  HIGHLIGHT_COLOR = "red"
59
65
 
66
+ # Buffer size for loading rows
67
+ BUFFER_SIZE = 5
68
+
60
69
  # Warning threshold for loading rows
61
70
  WARN_ROWS_THRESHOLD = 50_000
62
71
 
@@ -115,7 +124,7 @@ class DataFrameTable(DataTable):
115
124
  - **g** - ⬆️ Jump to first row
116
125
  - **G** - ⬇️ Jump to last row
117
126
  - **HOME/END** - 🎯 Jump to first/last column
118
- - **Ctrl+HOME/END** - 🎯 Jump to page top/bottom
127
+ - **Ctrl+HOME/END** - 🎯 Jump to page top/top
119
128
  - **Ctrl+F** - 📜 Page down
120
129
  - **Ctrl+B** - 📜 Page up
121
130
  - **PgUp/PgDn** - 📜 Page up/down
@@ -125,11 +134,13 @@ class DataFrameTable(DataTable):
125
134
  - **U** - 🔄 Redo last undone action
126
135
  - **Ctrl+U** - 🔁 Reset to initial state
127
136
 
128
- ## 👁️ Viewing & Display
137
+ ## 👁️ Display
129
138
  - **Enter** - 📋 Show row details in modal
130
139
  - **F** - 📊 Show frequency distribution
131
140
  - **s** - 📈 Show statistics for current column
132
141
  - **S** - 📊 Show statistics for entire dataframe
142
+ - **m** - 📐 Show dataframe metadata (row/column counts)
143
+ - **M** - 📋 Show column metadata (ID, name, type)
133
144
  - **h** - 👁️ Hide current column
134
145
  - **H** - 👀 Show all hidden rows/columns
135
146
  - **_** - 📏 Expand column to full width
@@ -143,31 +154,31 @@ class DataFrameTable(DataTable):
143
154
  - **]** - 🔽 Sort column descending
144
155
  - *(Multi-column sort supported)*
145
156
 
146
- ## 🔍 Searching & Filtering
147
- - **|** - 🔎 Search in current column with expression
148
- - **\\\\** - 🔎 Search in current column using cursor value
157
+ ## Row Selection
158
+ - **\\\\** - Select rows in current column using cursor value
159
+ - **|** - Select rows with expression
160
+ - **'** - ✅ Select/deselect current row
161
+ - **t** - 💡 Toggle row selection (invert all)
162
+ - **T** - 🧹 Clear all selections and matches
163
+ - **{** - ⬆️ Go to previous selected row
164
+ - **}** - ⬇️ Go to next selected row
165
+ - *(Supports case-insensitive & whole-word matching)*
166
+
167
+ ## 🔎 Find & Replace
149
168
  - **/** - 🔎 Find in current column with cursor value
150
169
  - **?** - 🔎 Find in current column with expression
151
170
  - **;** - 🌐 Global find using cursor value
152
171
  - **:** - 🌐 Global find with expression
153
172
  - **n** - ⬇️ Go to next match
154
173
  - **N** - ⬆️ Go to previous match
155
- - **v** - 👁️ View/filter rows by cell or selected rows and hide others
156
- - **V** - 🔧 View/filter rows by expression and hide others
157
- - *(All search/find support case-insensitive & whole-word matching)*
158
-
159
- ## ✏️ Replace
160
174
  - **r** - 🔄 Replace in current column (interactive or all)
161
175
  - **R** - 🔄 Replace across all columns (interactive or all)
162
176
  - *(Supports case-insensitive & whole-word matching)*
163
177
 
164
- ## Selection & Filter
165
- - **'** - ✓️ Select/deselect current row
166
- - **t** - 💡 Toggle row selection (invert all)
167
- - **T** - 🧹 Clear all selections and matches
168
- - **{** - ⬆️ Go to previous selected row
169
- - **}** - ⬇️ Go to next selected row
170
- - **"** - 📍 Filter selected rows and remove others
178
+ ## 👁️ View & Filter
179
+ - **"** - 📍 Filter selected rows (removes others)
180
+ - **v** - 👁️ View rows that are selected or contain matching cells (hide others)
181
+ - **V** - 🔧 View rows by expression (hides others)
171
182
 
172
183
  ## 🔍 SQL Interface
173
184
  - **l** - 💬 Open simple SQL interface (select columns & where clause)
@@ -212,6 +223,8 @@ class DataFrameTable(DataTable):
212
223
  ("G", "jump_bottom", "Jump to bottom"),
213
224
  ("ctrl+f", "forward_page", "Page down"),
214
225
  ("ctrl+b", "backward_page", "Page up"),
226
+ ("pageup", "page_up", "Page up"),
227
+ ("pagedown", "page_down", "Page down"),
215
228
  # Undo/Redo/Reset
216
229
  ("u", "undo", "Undo"),
217
230
  ("U", "redo", "Redo"),
@@ -230,7 +243,9 @@ class DataFrameTable(DataTable):
230
243
  ("ctrl+r", "copy_row", "Copy row to clipboard"),
231
244
  # Save
232
245
  ("ctrl+s", "save_to_file", "Save to file"),
233
- # Detail, Frequency, and Statistics
246
+ # Metadata, Detail, Frequency, and Statistics
247
+ ("m", "metadata_shape", "Show metadata for row count and column count"),
248
+ ("M", "metadata_column", "Show metadata for column"),
234
249
  ("enter", "view_row_detail", "View row details"),
235
250
  ("F", "show_frequency", "Show frequency"),
236
251
  ("s", "show_statistics", "Show statistics for column"),
@@ -242,25 +257,23 @@ class DataFrameTable(DataTable):
242
257
  ("v", "view_rows", "View rows"),
243
258
  ("V", "view_rows_expr", "View rows by expression"),
244
259
  ("quotation_mark", "filter_rows", "Filter selected"), # `"`
245
- # Search
246
- ("backslash", "search_cursor_value", "Search column with cursor value"), # `\`
247
- ("vertical_line", "search_expr", "Search column with expression"), # `|`
260
+ # Row Selection
261
+ ("backslash", "select_row_cursor_value", "Select rows with cursor value in current column"), # `\`
262
+ ("vertical_line", "select_row_expr", "Select rows with expression"), # `|`
248
263
  ("right_curly_bracket", "next_selected_row", "Go to next selected row"), # `}`
249
264
  ("left_curly_bracket", "previous_selected_row", "Go to previous selected row"), # `{`
250
- # Find
265
+ ("apostrophe", "toggle_row_selection", "Toggle row selection"), # `'`
266
+ ("t", "toggle_selections", "Toggle all row selections"),
267
+ ("T", "clear_selections_and_matches", "Clear selections"),
268
+ # Find & Replace
251
269
  ("slash", "find_cursor_value", "Find in column with cursor value"), # `/`
252
270
  ("question_mark", "find_expr", "Find in column with expression"), # `?`
253
271
  ("semicolon", "find_cursor_value('global')", "Global find with cursor value"), # `;`
254
272
  ("colon", "find_expr('global')", "Global find with expression"), # `:`
255
273
  ("n", "next_match", "Go to next match"), # `n`
256
274
  ("N", "previous_match", "Go to previous match"), # `Shift+n`
257
- # Replace
258
275
  ("r", "replace", "Replace in column"), # `r`
259
276
  ("R", "replace_global", "Replace global"), # `Shift+R`
260
- # Selection
261
- ("apostrophe", "toggle_row_selection", "Toggle row selection"), # `'`
262
- ("t", "toggle_selections", "Toggle all row selections"),
263
- ("T", "clear_selections_and_matches", "Clear selections"),
264
277
  # Delete
265
278
  ("delete", "clear_cell", "Clear cell"),
266
279
  ("minus", "delete_column", "Delete column"), # `-`
@@ -316,9 +329,9 @@ class DataFrameTable(DataTable):
316
329
  self.filename = filename or "untitled.csv" # Current filename
317
330
  self.tabname = tabname or Path(filename).stem # Tab name
318
331
  # Pagination & Loading
319
- self.INITIAL_BATCH_SIZE = (self.app.size.height // 100 + 1) * 100
320
- self.BATCH_SIZE = self.INITIAL_BATCH_SIZE // 2
332
+ self.BATCH_SIZE = max((self.app.size.height // 100 + 1) * 100, 100)
321
333
  self.loaded_rows = 0 # Track how many rows are currently loaded
334
+ self.loaded_ranges: list[tuple[int, int]] = [] # List of (start, end) row indices that are loaded
322
335
 
323
336
  # State tracking (all 0-based indexing)
324
337
  self.sorted_columns: dict[str, bool] = {} # col_name -> descending
@@ -419,7 +432,7 @@ class DataFrameTable(DataTable):
419
432
  Returns:
420
433
  bool: True if there are hidden rows, False otherwise.
421
434
  """
422
- return any(v for v in self.visible_rows if v is False)
435
+ return any(1 for v in self.visible_rows if v is False)
423
436
 
424
437
  @property
425
438
  def ordered_selected_rows(self) -> list[int]:
@@ -452,6 +465,22 @@ class DataFrameTable(DataTable):
452
465
  """
453
466
  return self.histories[-1] if self.histories else None
454
467
 
468
+ def _round_to_nearest_hundreds(self, num: int):
469
+ """Round a number to the nearest hundreds.
470
+
471
+ Args:
472
+ num: The number to round.
473
+ """
474
+ return round_to_nearest_hundreds(num, N=self.BATCH_SIZE)
475
+
476
+ def get_row_idx(self, row_key: RowKey) -> int:
477
+ """Get the row index for a given table row key.
478
+
479
+ Args:
480
+ row_key: Row key as string.
481
+ """
482
+ return super().get_row_index(row_key)
483
+
455
484
  def get_row_key(self, row_idx: int) -> RowKey:
456
485
  """Get the row key for a given table row index.
457
486
 
@@ -463,7 +492,18 @@ class DataFrameTable(DataTable):
463
492
  """
464
493
  return self._row_locations.get_key(row_idx)
465
494
 
466
- def get_column_key(self, col_idx: int) -> ColumnKey:
495
+ def get_col_idx(self, col_key: ColumnKey) -> int:
496
+ """Get the column index for a given table column key.
497
+
498
+ Args:
499
+ col_key: Column key as string.
500
+
501
+ Returns:
502
+ Corresponding column index as int.
503
+ """
504
+ return super().get_column_index(col_key)
505
+
506
+ def get_col_key(self, col_idx: int) -> ColumnKey:
467
507
  """Get the column key for a given table column index.
468
508
 
469
509
  Args:
@@ -474,11 +514,11 @@ class DataFrameTable(DataTable):
474
514
  """
475
515
  return self._column_locations.get_key(col_idx)
476
516
 
477
- def should_highlight(self, cursor: Coordinate, target_cell: Coordinate, type_of_cursor: CursorType) -> bool:
517
+ def _should_highlight(self, cursor: Coordinate, target_cell: Coordinate, type_of_cursor: CursorType) -> bool:
478
518
  """Determine if the given cell should be highlighted because of the cursor.
479
519
 
480
- In "cell" mode, also highlights the row and column headers. In "row" and "column"
481
- modes, highlights the entire row or column respectively.
520
+ In "cell" mode, also highlights the row and column headers. This overrides the default
521
+ behavior of DataTable which only highlights the exact cell under the cursor.
482
522
 
483
523
  Args:
484
524
  cursor: The current position of the cursor.
@@ -575,7 +615,7 @@ class DataFrameTable(DataTable):
575
615
  else:
576
616
  content_tab.remove_class("dirty")
577
617
 
578
- def move_cursor_to(self, ridx: int, cidx: int) -> None:
618
+ def move_cursor_to(self, ridx: int | None = None, cidx: int | None = None) -> None:
579
619
  """Move cursor based on the dataframe indices.
580
620
 
581
621
  Args:
@@ -583,11 +623,11 @@ class DataFrameTable(DataTable):
583
623
  cidx: Column index (0-based) in the dataframe.
584
624
  """
585
625
  # Ensure the target row is loaded
586
- if ridx >= self.loaded_rows:
587
- self.load_rows(stop=ridx + self.BATCH_SIZE)
626
+ start, stop = self._round_to_nearest_hundreds(ridx)
627
+ self.load_rows_range(start, stop)
588
628
 
589
- row_key = str(ridx)
590
- col_key = self.df.columns[cidx]
629
+ row_key = self.cursor_row_key if ridx is None else str(ridx)
630
+ col_key = self.cursor_col_key if cidx is None else self.df.columns[cidx]
591
631
  row_idx, col_idx = self.get_cell_coordinate(row_key, col_key)
592
632
  self.move_cursor(row=row_idx, column=col_idx)
593
633
 
@@ -603,15 +643,15 @@ class DataFrameTable(DataTable):
603
643
  def on_key(self, event) -> None:
604
644
  """Handle key press events for pagination.
605
645
 
606
- Currently handles "pagedown" and "down" keys to trigger lazy loading of additional rows
607
- when scrolling near the end of the loaded data.
608
-
609
646
  Args:
610
647
  event: The key event object.
611
648
  """
612
- if event.key in ("pagedown", "down"):
649
+ if event.key == "up":
650
+ # Let the table handle the navigation first
651
+ self.load_rows_up()
652
+ elif event.key == "down":
613
653
  # Let the table handle the navigation first
614
- self.check_and_load_more()
654
+ self.load_rows_down()
615
655
 
616
656
  def on_click(self, event: Click) -> None:
617
657
  """Handle mouse click events on the table.
@@ -624,13 +664,13 @@ class DataFrameTable(DataTable):
624
664
  if self.cursor_type == "cell" and event.chain > 1: # only on double-click or more
625
665
  try:
626
666
  row_idx = event.style.meta["row"]
627
- # col_idx = event.style.meta["column"]
667
+ col_idx = event.style.meta["column"]
628
668
  except (KeyError, TypeError):
629
669
  return # Unable to get row/column info
630
670
 
631
671
  # header row
632
672
  if row_idx == -1:
633
- self.do_rename_column()
673
+ self.do_rename_column(col_idx)
634
674
  else:
635
675
  self.do_edit_cell()
636
676
 
@@ -641,16 +681,38 @@ class DataFrameTable(DataTable):
641
681
 
642
682
  def action_jump_bottom(self) -> None:
643
683
  """Jump to the bottom of the table."""
644
- self.load_rows(move_to_end=True)
684
+ stop = len(self.df)
685
+ start = max(0, ((stop - self.BATCH_SIZE) // self.BATCH_SIZE + 1) * self.BATCH_SIZE)
686
+ self.load_rows_range(start, stop)
687
+ self.move_cursor(row=self.row_count - 1)
688
+
689
+ def action_page_up(self) -> None:
690
+ """Move the cursor one page up."""
691
+ self._set_hover_cursor(False)
692
+ if self.show_cursor and self.cursor_type in ("cell", "row"):
693
+ height = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
694
+
695
+ col_idx = self.cursor_column
696
+ ridx = self.cursor_row_idx
697
+ next_ridx = max(0, ridx - height - BUFFER_SIZE)
698
+ start, stop = self._round_to_nearest_hundreds(next_ridx)
699
+ self.load_rows_range(start, stop)
645
700
 
646
- def action_forward_page(self) -> None:
647
- """Scroll down one page."""
701
+ self.move_cursor(row=self.get_row_idx(str(next_ridx)), column=col_idx)
702
+ else:
703
+ super().action_page_up()
704
+
705
+ def action_page_down(self) -> None:
648
706
  super().action_page_down()
649
- self.check_and_load_more()
707
+ self.load_rows_down()
650
708
 
651
709
  def action_backward_page(self) -> None:
652
710
  """Scroll up one page."""
653
- super().action_page_up()
711
+ self.action_page_up()
712
+
713
+ def action_forward_page(self) -> None:
714
+ """Scroll down one page."""
715
+ self.action_page_down()
654
716
 
655
717
  def action_view_row_detail(self) -> None:
656
718
  """View details of the current row."""
@@ -696,6 +758,14 @@ class DataFrameTable(DataTable):
696
758
  """
697
759
  self.do_show_statistics(scope)
698
760
 
761
+ def action_metadata_shape(self) -> None:
762
+ """Show metadata about the dataframe (row and column counts)."""
763
+ self.do_metadata_shape()
764
+
765
+ def action_metadata_column(self) -> None:
766
+ """Show metadata for the current column."""
767
+ self.do_metadata_column()
768
+
699
769
  def action_view_rows(self) -> None:
700
770
  """View rows by current cell value."""
701
771
  self.do_view_rows()
@@ -732,13 +802,13 @@ class DataFrameTable(DataTable):
732
802
  """Clear the current cell (set to None)."""
733
803
  self.do_clear_cell()
734
804
 
735
- def action_search_cursor_value(self) -> None:
736
- """Search cursor value in the current column."""
737
- self.do_search_cursor_value()
805
+ def action_select_row_cursor_value(self) -> None:
806
+ """Select rows with cursor value in the current column."""
807
+ self.do_select_row_cursor_value()
738
808
 
739
- def action_search_expr(self) -> None:
740
- """Search by expression in the current column."""
741
- self.do_search_expr()
809
+ def action_select_row_expr(self) -> None:
810
+ """Select rows by expression."""
811
+ self.do_select_row_expr()
742
812
 
743
813
  def action_find_cursor_value(self, scope="column") -> None:
744
814
  """Find by cursor value.
@@ -925,9 +995,13 @@ class DataFrameTable(DataTable):
925
995
  """Open the advanced SQL interface screen."""
926
996
  self.do_advanced_sql()
927
997
 
998
+ def on_mouse_scroll_up(self, event) -> None:
999
+ """Load more rows when scrolling up with mouse."""
1000
+ self.load_rows_up()
1001
+
928
1002
  def on_mouse_scroll_down(self, event) -> None:
929
1003
  """Load more rows when scrolling down with mouse."""
930
- self.check_and_load_more()
1004
+ self.load_rows_down()
931
1005
 
932
1006
  # Setup & Loading
933
1007
  def reset_df(self, new_df: pl.DataFrame, dirty: bool = True) -> None:
@@ -964,27 +1038,27 @@ class DataFrameTable(DataTable):
964
1038
  if reset:
965
1039
  self.reset_df(self.dataframe, dirty=False)
966
1040
 
967
- # Lazy load up to INITIAL_BATCH_SIZE visible rows
968
- stop, visible_count, row_idx = self.INITIAL_BATCH_SIZE, 0, 0
1041
+ # Lazy load up to BATCH_SIZE visible rows
1042
+ stop, visible_count, row_idx = self.BATCH_SIZE, 0, 0
969
1043
  for row_idx, visible in enumerate(self.visible_rows):
970
1044
  if not visible:
971
1045
  continue
972
1046
  visible_count += 1
973
- if visible_count > self.INITIAL_BATCH_SIZE:
974
- stop = row_idx + self.BATCH_SIZE
1047
+ if visible_count > self.BATCH_SIZE:
1048
+ stop = row_idx
975
1049
  break
976
1050
  else:
977
- stop = row_idx + self.BATCH_SIZE
1051
+ stop = row_idx
978
1052
 
979
- # # Ensure all selected rows or matches are loaded
980
- # stop = max(stop, rindex(self.selected_rows, True) + 1)
981
- # stop = max(stop, max(self.matches.keys(), default=0) + 1)
1053
+ # Round up to next hundreds
1054
+ if stop % self.BATCH_SIZE != 0:
1055
+ stop = (stop // self.BATCH_SIZE + 1) * self.BATCH_SIZE
982
1056
 
983
1057
  # Save current cursor position before clearing
984
1058
  row_idx, col_idx = self.cursor_coordinate
985
1059
 
986
1060
  self.setup_columns()
987
- self.load_rows(stop)
1061
+ self.load_rows_range(0, stop)
988
1062
 
989
1063
  # Restore cursor position
990
1064
  if row_idx < len(self.rows) and col_idx < len(self.columns):
@@ -1007,7 +1081,7 @@ class DataFrameTable(DataTable):
1007
1081
  column_widths = {}
1008
1082
 
1009
1083
  # Get available width for the table (with some padding for borders/scrollbar)
1010
- available_width = self.size.width - 4 # Account for borders and scrollbar
1084
+ available_width = self.scrollable_content_region.width
1011
1085
 
1012
1086
  # Calculate how much width we need for string columns first
1013
1087
  string_cols = [col for col, dtype in zip(self.df.columns, self.df.dtypes) if dtype == pl.String]
@@ -1017,7 +1091,7 @@ class DataFrameTable(DataTable):
1017
1091
  return column_widths
1018
1092
 
1019
1093
  # Sample a reasonable number of rows to calculate widths (don't scan entire dataframe)
1020
- sample_size = min(self.INITIAL_BATCH_SIZE, len(self.df))
1094
+ sample_size = min(self.BATCH_SIZE, len(self.df))
1021
1095
  sample_lf = self.df.lazy().slice(0, sample_size)
1022
1096
 
1023
1097
  # Determine widths for each column
@@ -1144,7 +1218,8 @@ class DataFrameTable(DataTable):
1144
1218
  # Load max BATCH_SIZE rows at a time
1145
1219
  chunk_size = min(self.BATCH_SIZE, stop - self.loaded_rows)
1146
1220
  next_stop = min(self.loaded_rows + chunk_size, stop)
1147
- self.load_rows_batch(next_stop)
1221
+ self.load_rows_range(self.loaded_rows, next_stop)
1222
+ self.loaded_rows = next_stop
1148
1223
 
1149
1224
  # If there's more to load, yield to event loop with delay
1150
1225
  if next_stop < stop:
@@ -1156,64 +1231,382 @@ class DataFrameTable(DataTable):
1156
1231
  if move_to_end:
1157
1232
  self.call_after_refresh(lambda: self.move_cursor(row=self.row_count - 1))
1158
1233
 
1159
- def load_rows_batch(self, stop: int) -> None:
1160
- """Load a batch of rows into the table.
1234
+ def _calculate_load_range(self, start: int, stop: int) -> list[tuple[int, int]]:
1235
+ """Calculate the actual ranges to load, accounting for already-loaded ranges.
1161
1236
 
1162
- Row keys are 0-based indices as strings, which map directly to dataframe row indices.
1163
- Row labels are 1-based indices as strings.
1237
+ Handles complex cases where a loaded range is fully contained within the requested
1238
+ range (creating head and tail segments to load). All overlapping/adjacent loaded
1239
+ ranges are merged first to minimize gaps.
1164
1240
 
1165
1241
  Args:
1166
- stop: Stop loading rows when this index is reached.
1242
+ start: Requested start index (0-based).
1243
+ stop: Requested stop index (0-based, exclusive).
1244
+
1245
+ Returns:
1246
+ List of (actual_start, actual_stop) tuples to load. Empty list if the entire
1247
+ requested range is already loaded.
1248
+
1249
+ Example:
1250
+ If loaded ranges are [(150, 250)] and requesting (100, 300):
1251
+ - Returns [(100, 150), (250, 300)] to load head and tail
1252
+ If loaded ranges are [(0, 100), (100, 200)] and requesting (50, 150):
1253
+ - After merging, loaded_ranges becomes [(0, 200)]
1254
+ - Returns [] (already fully loaded)
1167
1255
  """
1168
- try:
1169
- start = self.loaded_rows
1170
- df_slice = self.df.slice(start, stop - start)
1256
+ if not self.loaded_ranges:
1257
+ return [(start, stop)]
1258
+
1259
+ # Sort loaded ranges by start index
1260
+ sorted_ranges = sorted(self.loaded_ranges)
1261
+
1262
+ # Merge overlapping/adjacent ranges
1263
+ merged = []
1264
+ for range_start, range_stop in sorted_ranges:
1265
+ if merged and range_start <= merged[-1][1]:
1266
+ # Overlapping or adjacent: merge
1267
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1268
+ else:
1269
+ merged.append((range_start, range_stop))
1270
+
1271
+ self.loaded_ranges = merged
1272
+
1273
+ # Calculate ranges to load by finding gaps in the merged ranges
1274
+ ranges_to_load = []
1275
+ current_pos = start
1276
+
1277
+ for range_start, range_stop in merged:
1278
+ # If there's a gap before this loaded range, add it to load list
1279
+ if current_pos < range_start and current_pos < stop:
1280
+ gap_end = min(range_start, stop)
1281
+ ranges_to_load.append((current_pos, gap_end))
1282
+ current_pos = range_stop
1283
+ elif current_pos >= range_stop:
1284
+ # Already moved past this loaded range
1285
+ continue
1286
+ else:
1287
+ # Current position is inside this loaded range, skip past it
1288
+ current_pos = max(current_pos, range_stop)
1171
1289
 
1172
- for ridx, row in enumerate(df_slice.rows(), start):
1173
- if not self.visible_rows[ridx]:
1174
- continue # Skip hidden rows
1290
+ # If there's remaining range after all loaded ranges, add it
1291
+ if current_pos < stop:
1292
+ ranges_to_load.append((current_pos, stop))
1293
+
1294
+ return ranges_to_load
1295
+
1296
+ def _merge_loaded_ranges(self) -> None:
1297
+ """Merge adjacent and overlapping ranges in self.loaded_ranges.
1298
+
1299
+ Ranges like (0, 100) and (100, 200) are merged into (0, 200).
1300
+ """
1301
+ if len(self.loaded_ranges) <= 1:
1302
+ return
1303
+
1304
+ # Sort by start index
1305
+ sorted_ranges = sorted(self.loaded_ranges)
1306
+
1307
+ # Merge overlapping/adjacent ranges
1308
+ merged = [sorted_ranges[0]]
1309
+ for range_start, range_stop in sorted_ranges[1:]:
1310
+ # Overlapping or adjacent: merge
1311
+ if range_start <= merged[-1][1]:
1312
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1313
+ else:
1314
+ merged.append((range_start, range_stop))
1315
+
1316
+ self.loaded_ranges = merged
1317
+
1318
+ def _find_insert_position_for_row(self, ridx: int) -> int:
1319
+ """Find the correct table position to insert a row with the given dataframe index.
1320
+
1321
+ In the table display, rows are ordered by their dataframe index, regardless of
1322
+ the internal row keys. This method finds where a row should be inserted based on
1323
+ its dataframe index and the indices of already-loaded rows.
1324
+
1325
+ Args:
1326
+ ridx: The 0-based dataframe row index.
1327
+
1328
+ Returns:
1329
+ The 0-based table position where the row should be inserted.
1330
+ """
1331
+ # Count how many already-loaded rows have lower dataframe indices
1332
+ # Iterate through loaded rows instead of iterating 0..ridx for efficiency
1333
+ insert_pos = 0
1334
+ for row_key in self._row_locations:
1335
+ loaded_ridx = int(row_key.value)
1336
+ if loaded_ridx < ridx:
1337
+ insert_pos += 1
1338
+
1339
+ return insert_pos
1340
+
1341
+ def load_rows_segment(self, segment_start: int, segment_stop: int) -> int:
1342
+ """Load a single contiguous segment of rows into the table.
1343
+
1344
+ This is the core loading logic that inserts rows at correct positions,
1345
+ respecting visibility and selection states. Used by load_rows_range()
1346
+ to handle each segment independently.
1347
+
1348
+ Args:
1349
+ segment_start: Start loading rows from this index (0-based).
1350
+ segment_stop: Stop loading rows when this index is reached (0-based, exclusive).
1351
+ """
1352
+ # Record this range before loading
1353
+ self.loaded_ranges.append((segment_start, segment_stop))
1354
+
1355
+ # Load the dataframe slice
1356
+ df_slice = self.df.slice(segment_start, segment_stop - segment_start)
1357
+
1358
+ # Load each row at the correct position
1359
+ for ridx, row in enumerate(df_slice.rows(), segment_start):
1360
+ if not self.visible_rows[ridx]:
1361
+ continue # Skip hidden rows
1362
+
1363
+ is_selected = self.selected_rows[ridx]
1364
+ match_cols = self.matches.get(ridx, set())
1365
+
1366
+ vals, dtypes, styles = [], [], []
1367
+ for cidx, (val, col, dtype) in enumerate(zip(row, self.df.columns, self.df.dtypes)):
1368
+ if col in self.hidden_columns:
1369
+ continue # Skip hidden columns
1370
+
1371
+ vals.append(val)
1372
+ dtypes.append(dtype)
1373
+
1374
+ # Highlight entire row with selection or cells with matches
1375
+ styles.append(HIGHLIGHT_COLOR if is_selected or cidx in match_cols else None)
1376
+
1377
+ formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1175
1378
 
1176
- is_selected = self.selected_rows[ridx]
1177
- match_cols = self.matches.get(ridx, set())
1379
+ # Find correct insertion position and insert
1380
+ insert_pos = self._find_insert_position_for_row(ridx)
1381
+ self.insert_row(*formatted_row, key=str(ridx), label=str(ridx + 1), position=insert_pos)
1178
1382
 
1179
- vals, dtypes, styles = [], [], []
1180
- for cidx, (val, col, dtype) in enumerate(zip(row, self.df.columns, self.df.dtypes)):
1181
- if col in self.hidden_columns:
1182
- continue # Skip hidden columns
1383
+ # Number of rows loaded in this segment
1384
+ segment_count = len(df_slice)
1183
1385
 
1184
- vals.append(val)
1185
- dtypes.append(dtype)
1386
+ # Update loaded rows count
1387
+ self.loaded_rows += segment_count
1186
1388
 
1187
- # Highlight entire row with selection or cells with matches
1188
- styles.append(HIGHLIGHT_COLOR if is_selected or cidx in match_cols else None)
1389
+ return segment_count
1189
1390
 
1190
- formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1391
+ def load_rows_range(self, start: int, stop: int) -> int:
1392
+ """Load a batch of rows into the table.
1393
+
1394
+ Row keys are 0-based indices as strings, which map directly to dataframe row indices.
1395
+ Row labels are 1-based indices as strings.
1396
+
1397
+ Intelligently handles range loading:
1398
+ 1. Calculates which ranges actually need loading (avoiding reloading)
1399
+ 2. Handles complex cases where loaded ranges create "holes" (head and tail segments)
1400
+ 3. Inserts rows at correct positions in the table
1401
+ 4. Merges adjacent/overlapping ranges to optimize future loading
1402
+
1403
+ Args:
1404
+ start: Start loading rows from this index (0-based).
1405
+ stop: Stop loading rows when this index is reached (0-based, exclusive).
1406
+ """
1407
+ start = max(0, start) # Clamp to non-negative
1408
+ stop = min(stop, len(self.df)) # Clamp to dataframe length
1409
+
1410
+ try:
1411
+ # Calculate actual ranges to load, accounting for already-loaded ranges
1412
+ ranges_to_load = self._calculate_load_range(start, stop)
1413
+
1414
+ # If nothing needs loading, return early
1415
+ if not ranges_to_load:
1416
+ self.log(f"Range {start}-{stop} already loaded, skipping")
1417
+ return 0
1191
1418
 
1192
- # Always add labels so they can be shown/hidden via CSS
1193
- self.add_row(*formatted_row, key=str(ridx), label=str(ridx + 1))
1419
+ # Track the number of loaded rows in this range
1420
+ range_count = 0
1194
1421
 
1195
- # Update loaded rows count
1196
- self.loaded_rows = stop
1422
+ # Load each segment
1423
+ for segment_start, segment_stop in ranges_to_load:
1424
+ range_count += self.load_rows_segment(segment_start, segment_stop)
1197
1425
 
1198
- # self.notify(f"Loaded [$accent]{self.loaded_rows}/{len(self.df)}[/] rows from [$success]{self.name}[/]", title="Load")
1199
- self.log(f"Loaded {self.loaded_rows}/{len(self.df)} rows from `{self.filename or self.name}`")
1426
+ # Merge adjacent/overlapping ranges to optimize storage
1427
+ self._merge_loaded_ranges()
1428
+
1429
+ self.log(f"Loaded {range_count} rows for range {start}-{stop}/{len(self.df)}")
1430
+ return range_count
1200
1431
 
1201
1432
  except Exception as e:
1202
1433
  self.notify("Error loading rows", title="Load", severity="error", timeout=10)
1203
1434
  self.log(f"Error loading rows: {str(e)}")
1435
+ return 0
1204
1436
 
1205
- def check_and_load_more(self) -> None:
1437
+ def load_rows_up(self) -> None:
1206
1438
  """Check if we need to load more rows and load them."""
1207
1439
  # If we've loaded everything, no need to check
1208
1440
  if self.loaded_rows >= len(self.df):
1209
1441
  return
1210
1442
 
1211
- visible_row_count = self.size.height - self.header_height
1212
- bottom_visible_row = self.scroll_y + visible_row_count
1443
+ top_row_index = int(self.scroll_y) + BUFFER_SIZE
1444
+ top_row_key = self.get_row_key(top_row_index)
1445
+
1446
+ if top_row_key:
1447
+ top_ridx = int(top_row_key.value)
1448
+ else:
1449
+ top_ridx = 0
1450
+ self.log(f"No top row key at index {top_row_index}, defaulting to 0")
1451
+
1452
+ # Load upward
1453
+ start, stop = self._round_to_nearest_hundreds(top_ridx - BUFFER_SIZE * 2)
1454
+ range_count = self.load_rows_range(start, stop)
1455
+
1456
+ # self.log(
1457
+ # "========",
1458
+ # f"{self.scrollable_content_region.height = },",
1459
+ # f"{self.header_height = },",
1460
+ # f"{self.scroll_y = },",
1461
+ # f"{top_row_index = },",
1462
+ # f"{top_ridx = },",
1463
+ # f"{start = },",
1464
+ # f"{stop = },",
1465
+ # f"{range_count = },",
1466
+ # f"{self.loaded_ranges = }",
1467
+ # )
1468
+
1469
+ # Adjust scroll to maintain position if rows were loaded above
1470
+ if range_count > 0:
1471
+ self.move_cursor(row=top_row_index + range_count)
1472
+ self.log(f"Loaded up: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1473
+
1474
+ def load_rows_down(self) -> None:
1475
+ """Check if we need to load more rows and load them."""
1476
+ # If we've loaded everything, no need to check
1477
+ if self.loaded_rows >= len(self.df):
1478
+ return
1479
+
1480
+ visible_row_count = self.scrollable_content_region.height - self.header_height
1481
+ bottom_row_index = self.scroll_y + visible_row_count - BUFFER_SIZE
1482
+
1483
+ bottom_row_key = self.get_row_key(bottom_row_index)
1484
+ if bottom_row_key:
1485
+ bottom_ridx = int(bottom_row_key.value)
1486
+ else:
1487
+ bottom_ridx = 0
1488
+ self.log(f"No bottom row key at index {bottom_row_index}, defaulting to 0")
1489
+
1490
+ # Load downward
1491
+ start, stop = self._round_to_nearest_hundreds(bottom_ridx + BUFFER_SIZE * 2)
1492
+ range_count = self.load_rows_range(start, stop)
1493
+
1494
+ # self.log(
1495
+ # "========",
1496
+ # f"{self.scrollable_content_region.height = },",
1497
+ # f"{self.header_height = },",
1498
+ # f"{self.scroll_y = },",
1499
+ # f"{bottom_row_index = },",
1500
+ # f"{bottom_ridx = },",
1501
+ # f"{start = },",
1502
+ # f"{stop = },",
1503
+ # f"{range_count = },",
1504
+ # f"{self.loaded_ranges = }",
1505
+ # )
1506
+
1507
+ if range_count > 0:
1508
+ self.log(f"Loaded down: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1509
+
1510
+ def insert_row(
1511
+ self,
1512
+ *cells: CellType,
1513
+ height: int | None = 1,
1514
+ key: str | None = None,
1515
+ label: TextType | None = None,
1516
+ position: int | None = None,
1517
+ ) -> RowKey:
1518
+ """Insert a row at a specific position in the DataTable.
1519
+
1520
+ When inserting, all rows at and after the insertion position are shifted down,
1521
+ and their entries in self._row_locations are updated accordingly.
1522
+
1523
+ Args:
1524
+ *cells: Positional arguments should contain cell data.
1525
+ height: The height of a row (in lines). Use `None` to auto-detect the optimal
1526
+ height.
1527
+ key: A key which uniquely identifies this row. If None, it will be generated
1528
+ for you and returned.
1529
+ label: The label for the row. Will be displayed to the left if supplied.
1530
+ position: The 0-based row index where the new row should be inserted.
1531
+ If None, inserts at the end (same as add_row). If out of bounds,
1532
+ inserts at the nearest valid position.
1213
1533
 
1214
- # If visible area is close to the end of loaded rows, load more
1215
- if bottom_visible_row >= self.loaded_rows - 10:
1216
- self.load_rows(self.loaded_rows + self.BATCH_SIZE)
1534
+ Returns:
1535
+ Unique identifier for this row. Can be used to retrieve this row regardless
1536
+ of its current location in the DataTable (it could have moved after
1537
+ being added due to sorting or insertion/deletion of other rows).
1538
+
1539
+ Raises:
1540
+ DuplicateKey: If a row with the given key already exists.
1541
+ ValueError: If more cells are provided than there are columns.
1542
+ """
1543
+ # Default to appending if position not specified or >= row_count
1544
+ row_count = self.row_count
1545
+ if position is None or position >= row_count:
1546
+ return self.add_row(*cells, height=height, key=key, label=label)
1547
+
1548
+ # Clamp position to valid range [0, row_count)
1549
+ position = max(0, position)
1550
+
1551
+ row_key = RowKey(key)
1552
+ if row_key in self._row_locations:
1553
+ raise DuplicateKey(f"The row key {row_key!r} already exists.")
1554
+
1555
+ if len(cells) > len(self.ordered_columns):
1556
+ raise ValueError("More values provided than there are columns.")
1557
+
1558
+ # TC: Rebuild self._row_locations to shift rows at and after position down by 1
1559
+ # Create a mapping of old index -> new index
1560
+ old_to_new = {}
1561
+ for old_idx in range(row_count):
1562
+ if old_idx < position:
1563
+ old_to_new[old_idx] = old_idx # No change
1564
+ else:
1565
+ old_to_new[old_idx] = old_idx + 1 # Shift down by 1
1566
+
1567
+ # Update _row_locations with the new indices
1568
+ new_row_locations = TwoWayDict({})
1569
+ for row_key_item in self._row_locations:
1570
+ old_idx = self.get_row_idx(row_key_item)
1571
+ new_idx = old_to_new.get(old_idx, old_idx)
1572
+ new_row_locations[row_key_item] = new_idx
1573
+
1574
+ # Update the internal mapping
1575
+ self._row_locations = new_row_locations
1576
+ # TC
1577
+
1578
+ row_index = position
1579
+ # Map the key of this row to its current index
1580
+ self._row_locations[row_key] = row_index
1581
+ self._data[row_key] = {column.key: cell for column, cell in zip_longest(self.ordered_columns, cells)}
1582
+
1583
+ label = Text.from_markup(label, end="") if isinstance(label, str) else label
1584
+
1585
+ # Rows with auto-height get a height of 0 because 1) we need an integer height
1586
+ # to do some intermediate computations and 2) because 0 doesn't impact the data
1587
+ # table while we don't figure out how tall this row is.
1588
+ self.rows[row_key] = Row(
1589
+ row_key,
1590
+ height or 0,
1591
+ label,
1592
+ height is None,
1593
+ )
1594
+ self._new_rows.add(row_key)
1595
+ self._require_update_dimensions = True
1596
+ self.cursor_coordinate = self.cursor_coordinate
1597
+
1598
+ # If a position has opened for the cursor to appear, where it previously
1599
+ # could not (e.g. when there's no data in the table), then a highlighted
1600
+ # event is posted, since there's now a highlighted cell when there wasn't
1601
+ # before.
1602
+ cell_now_available = self.row_count == 1 and len(self.columns) > 0
1603
+ visible_cursor = self.show_cursor and self.cursor_type != "none"
1604
+ if cell_now_available and visible_cursor:
1605
+ self._highlight_cursor()
1606
+
1607
+ self._update_count += 1
1608
+ self.check_idle()
1609
+ return row_key
1217
1610
 
1218
1611
  # History & Undo
1219
1612
  def create_history(self, description: str) -> None:
@@ -1354,6 +1747,14 @@ class DataFrameTable(DataTable):
1354
1747
  cidx = self.cursor_col_idx
1355
1748
  self.app.push_screen(StatisticsScreen(self, col_idx=cidx))
1356
1749
 
1750
+ def do_metadata_shape(self) -> None:
1751
+ """Show metadata about the dataframe (row and column counts)."""
1752
+ self.app.push_screen(MetaShape(self))
1753
+
1754
+ def do_metadata_column(self) -> None:
1755
+ """Show metadata for all columns in the dataframe."""
1756
+ self.app.push_screen(MetaColumnScreen(self))
1757
+
1357
1758
  def do_freeze_row_column(self) -> None:
1358
1759
  """Open the freeze screen to set fixed rows and columns."""
1359
1760
  self.app.push_screen(FreezeScreen(), callback=self.freeze_row_column)
@@ -1486,6 +1887,7 @@ class DataFrameTable(DataTable):
1486
1887
 
1487
1888
  # Add to history
1488
1889
  self.add_history(f"Sorted on column [$success]{col_name}[/]", dirty=True)
1890
+
1489
1891
  if old_desc is None:
1490
1892
  # Add new column to sort
1491
1893
  self.sorted_columns[col_name] = descending
@@ -1497,18 +1899,27 @@ class DataFrameTable(DataTable):
1497
1899
  del self.sorted_columns[col_name]
1498
1900
  self.sorted_columns[col_name] = descending
1499
1901
 
1902
+ lf = self.df.lazy().with_row_index(RIDX)
1903
+
1500
1904
  # Apply multi-column sort
1501
1905
  if sort_cols := list(self.sorted_columns.keys()):
1502
1906
  descending_flags = list(self.sorted_columns.values())
1503
- df_sorted = self.df.with_row_index(RIDX).sort(sort_cols, descending=descending_flags, nulls_last=True)
1504
- else:
1505
- # No sort columns - restore original order
1506
- df_sorted = self.df.with_row_index(RIDX)
1907
+ lf = lf.sort(sort_cols, descending=descending_flags, nulls_last=True)
1908
+
1909
+ df_sorted = lf.collect()
1507
1910
 
1508
- # Updated selected_rows and visible_rows to match new order
1911
+ # Updated visible rows, selected rows, and cell matches to match new order
1509
1912
  old_row_indices = df_sorted[RIDX].to_list()
1510
- self.selected_rows = [self.selected_rows[i] for i in old_row_indices]
1511
- self.visible_rows = [self.visible_rows[i] for i in old_row_indices]
1913
+ if self.has_hidden_rows:
1914
+ self.visible_rows = [self.visible_rows[old_ridx] for old_ridx in old_row_indices]
1915
+ if any(self.selected_rows):
1916
+ self.selected_rows = [self.selected_rows[old_ridx] for old_ridx in old_row_indices]
1917
+ if any(self.matches):
1918
+ self.matches = {
1919
+ new_ridx: self.matches[old_ridx]
1920
+ for new_ridx, old_ridx in enumerate(old_row_indices)
1921
+ if old_ridx in self.matches
1922
+ }
1512
1923
 
1513
1924
  # Update the dataframe
1514
1925
  self.df = df_sorted.drop(RIDX)
@@ -1649,10 +2060,10 @@ class DataFrameTable(DataTable):
1649
2060
 
1650
2061
  # self.notify(f"Column [$accent]{col_name}[/] updated with [$success]{expr}[/]", title="Edit Column")
1651
2062
 
1652
- def do_rename_column(self) -> None:
2063
+ def do_rename_column(self, col_idx: int | None) -> None:
1653
2064
  """Open modal to rename the selected column."""
1654
- col_name = self.cursor_col_name
1655
- col_idx = self.cursor_column
2065
+ col_idx = self.cursor_column if col_idx is None else col_idx
2066
+ col_name = self.get_col_key(col_idx).value
1656
2067
 
1657
2068
  # Push the rename column modal screen
1658
2069
  self.app.push_screen(
@@ -1832,10 +2243,10 @@ class DataFrameTable(DataTable):
1832
2243
  def add_link_column(self, result: tuple[str, str] | None) -> None:
1833
2244
  """Handle result from AddLinkScreen.
1834
2245
 
1835
- Creates a new link column in the dataframe with clickable links based on a
1836
- user-provided template. Supports multiple placeholder types:
2246
+ Creates a new link column in the dataframe based on a user-provided template.
2247
+ Supports multiple placeholder types:
1837
2248
  - `$_` - Current column (based on cursor position)
1838
- - `$1`, `$2`, etc. - Column by 1-based position index
2249
+ - `$1`, `$2`, etc. - Column by index (1-based)
1839
2250
  - `$name` - Column by name (e.g., `$id`, `$product_name`)
1840
2251
 
1841
2252
  The template is evaluated for each row using Polars expressions with vectorized
@@ -1903,7 +2314,7 @@ class DataFrameTable(DataTable):
1903
2314
  # Remove all columns before the current column
1904
2315
  if more == "before":
1905
2316
  for i in range(col_idx + 1):
1906
- col_key = self.get_column_key(i)
2317
+ col_key = self.get_col_key(i)
1907
2318
  col_names_to_remove.append(col_key.value)
1908
2319
  col_keys_to_remove.append(col_key)
1909
2320
 
@@ -1912,7 +2323,7 @@ class DataFrameTable(DataTable):
1912
2323
  # Remove all columns after the current column
1913
2324
  elif more == "after":
1914
2325
  for i in range(col_idx, len(self.columns)):
1915
- col_key = self.get_column_key(i)
2326
+ col_key = self.get_col_key(i)
1916
2327
  col_names_to_remove.append(col_key.value)
1917
2328
  col_keys_to_remove.append(col_key)
1918
2329
 
@@ -2197,8 +2608,8 @@ class DataFrameTable(DataTable):
2197
2608
  self._row_locations[row_key],
2198
2609
  self._row_locations[swap_key],
2199
2610
  ) = (
2200
- self._row_locations.get(swap_key),
2201
- self._row_locations.get(row_key),
2611
+ self.get_row_idx(swap_key),
2612
+ self.get_row_idx(row_key),
2202
2613
  )
2203
2614
 
2204
2615
  self._update_count += 1
@@ -2272,18 +2683,23 @@ class DataFrameTable(DataTable):
2272
2683
  )
2273
2684
  self.log(f"Error casting column `{col_name}`: {str(e)}")
2274
2685
 
2275
- # Search
2276
- def do_search_cursor_value(self) -> None:
2686
+ # Row selection
2687
+ def do_select_row_cursor_value(self) -> None:
2277
2688
  """Search with cursor value in current column."""
2278
2689
  cidx = self.cursor_col_idx
2690
+ col_name = self.cursor_col_name
2279
2691
 
2280
2692
  # Get the value of the currently selected cell
2281
2693
  term = NULL if self.cursor_value is None else str(self.cursor_value)
2694
+ if self.cursor_value is None:
2695
+ term = pl.col(col_name).is_null()
2696
+ else:
2697
+ term = pl.col(col_name) == self.cursor_value
2282
2698
 
2283
- self.search((term, cidx, False, True))
2699
+ self.select_row((term, cidx, False, True))
2284
2700
 
2285
- def do_search_expr(self) -> None:
2286
- """Search by expression."""
2701
+ def do_select_row_expr(self) -> None:
2702
+ """Select rows by expression."""
2287
2703
  cidx = self.cursor_col_idx
2288
2704
 
2289
2705
  # Use current cell value as default search term
@@ -2292,21 +2708,26 @@ class DataFrameTable(DataTable):
2292
2708
  # Push the search modal screen
2293
2709
  self.app.push_screen(
2294
2710
  SearchScreen("Search", term, self.df, cidx),
2295
- callback=self.search,
2711
+ callback=self.select_row,
2296
2712
  )
2297
2713
 
2298
- def search(self, result) -> None:
2299
- """Search for a term."""
2714
+ def select_row(self, result) -> None:
2715
+ """Select rows by value or expression."""
2300
2716
  if result is None:
2301
2717
  return
2302
2718
 
2303
2719
  term, cidx, match_nocase, match_whole = result
2304
2720
  col_name = self.df.columns[cidx]
2305
2721
 
2306
- if term == NULL:
2722
+ # Already a Polars expression
2723
+ if isinstance(term, pl.Expr):
2724
+ expr = term
2725
+
2726
+ # Null case
2727
+ elif term == NULL:
2307
2728
  expr = pl.col(col_name).is_null()
2308
2729
 
2309
- # Support for polars expressions
2730
+ # Expression in string form
2310
2731
  elif tentative_expr(term):
2311
2732
  try:
2312
2733
  expr = validate_expr(term, self.df.columns, cidx)
@@ -2351,33 +2772,107 @@ class DataFrameTable(DataTable):
2351
2772
  try:
2352
2773
  matches = set(lf.filter(expr).select(RIDX).collect().to_series().to_list())
2353
2774
  except Exception as e:
2354
- self.notify(f"Error applying search filter [$error]{term}[/]", title="Search", severity="error", timeout=10)
2775
+ self.notify(
2776
+ f"Error applying search filter `[$error]{term}[/]`", title="Search", severity="error", timeout=10
2777
+ )
2355
2778
  self.log(f"Error applying search filter `{term}`: {str(e)}")
2356
2779
  return
2357
2780
 
2358
2781
  match_count = len(matches)
2359
2782
  if match_count == 0:
2360
2783
  self.notify(
2361
- f"No matches found for [$warning]{term}[/]. Try [$accent](?i)abc[/] for case-insensitive search.",
2784
+ f"No matches found for `[$warning]{term}[/]`. Try [$accent](?i)abc[/] for case-insensitive search.",
2362
2785
  title="Search",
2363
2786
  severity="warning",
2364
2787
  )
2365
2788
  return
2366
2789
 
2790
+ message = f"Found [$success]{match_count}[/] matching row(s) for `[$accent]{term}[/]`"
2791
+
2367
2792
  # Add to history
2368
- self.add_history(f"Searched [$success]{term}[/] in column [$accent]{col_name}[/]")
2793
+ self.add_history(message)
2369
2794
 
2370
2795
  # Update selected rows to include new matches
2371
2796
  for m in matches:
2372
2797
  self.selected_rows[m] = True
2373
2798
 
2374
2799
  # Show notification immediately, then start highlighting
2375
- self.notify(f"Found [$success]{match_count}[/] matches for [$accent]{term}[/]", title="Search")
2800
+ self.notify(message, title="Select Row")
2801
+
2802
+ # Recreate table for display
2803
+ self.setup_table()
2804
+
2805
+ def do_toggle_selections(self) -> None:
2806
+ """Toggle selected rows highlighting on/off."""
2807
+ # Add to history
2808
+ self.add_history("Toggled row selection")
2809
+
2810
+ if self.has_hidden_rows:
2811
+ # Some rows are hidden - invert only selected visible rows and clear selections for hidden rows
2812
+ for i in range(len(self.selected_rows)):
2813
+ if self.visible_rows[i]:
2814
+ self.selected_rows[i] = not self.selected_rows[i]
2815
+ else:
2816
+ self.selected_rows[i] = False
2817
+ else:
2818
+ # Invert all selected rows
2819
+ self.selected_rows = [not selected for selected in self.selected_rows]
2820
+
2821
+ # Check if we're highlighting or un-highlighting
2822
+ if new_selected_count := self.selected_rows.count(True):
2823
+ self.notify(f"Toggled selection for [$success]{new_selected_count}[/] rows", title="Toggle")
2824
+
2825
+ # Recreate table for display
2826
+ self.setup_table()
2827
+
2828
+ def do_toggle_row_selection(self) -> None:
2829
+ """Select/deselect current row."""
2830
+ # Add to history
2831
+ self.add_history("Toggled row selection")
2832
+
2833
+ ridx = self.cursor_row_idx
2834
+ self.selected_rows[ridx] = not self.selected_rows[ridx]
2835
+
2836
+ row_key = str(ridx)
2837
+ match_cols = self.matches.get(ridx, set())
2838
+ for col_idx, col in enumerate(self.ordered_columns):
2839
+ col_key = col.key
2840
+ cell_text: Text = self.get_cell(row_key, col_key)
2841
+
2842
+ if self.selected_rows[ridx] or (col_idx in match_cols):
2843
+ cell_text.style = HIGHLIGHT_COLOR
2844
+ else:
2845
+ # Reset to default style based on dtype
2846
+ dtype = self.df.dtypes[col_idx]
2847
+ dc = DtypeConfig(dtype)
2848
+ cell_text.style = dc.style
2849
+
2850
+ self.update_cell(row_key, col_key, cell_text)
2851
+
2852
+ def do_clear_selections_and_matches(self) -> None:
2853
+ """Clear all selected rows and matches without removing them from the dataframe."""
2854
+ # Check if any selected rows or matches
2855
+ if not any(self.selected_rows) and not self.matches:
2856
+ self.notify("No selections to clear", title="Clear", severity="warning")
2857
+ return
2858
+
2859
+ row_count = sum(
2860
+ 1 if (selected or idx in self.matches) else 0 for idx, selected in enumerate(self.selected_rows)
2861
+ )
2862
+
2863
+ # Add to history
2864
+ self.add_history("Cleared all selected rows")
2865
+
2866
+ # Clear all selections
2867
+ self.selected_rows = [False] * len(self.df)
2868
+ self.matches = defaultdict(set)
2376
2869
 
2377
2870
  # Recreate table for display
2378
2871
  self.setup_table()
2379
2872
 
2380
- # Find
2873
+ self.notify(f"Cleared selections for [$success]{row_count}[/] rows", title="Clear")
2874
+
2875
+ # Find & Replace
2381
2876
  def find_matches(
2382
2877
  self, term: str, cidx: int | None = None, match_nocase: bool = False, match_whole: bool = False
2383
2878
  ) -> dict[int, set[int]]:
@@ -2386,6 +2881,8 @@ class DataFrameTable(DataTable):
2386
2881
  Args:
2387
2882
  term: The search term (can be NULL, expression, or plain text)
2388
2883
  cidx: Column index for column-specific search. If None, searches all columns.
2884
+ match_nocase: Whether to perform case-insensitive matching (for string terms)
2885
+ match_whole: Whether to match the whole cell content (for string terms)
2389
2886
 
2390
2887
  Returns:
2391
2888
  Dictionary mapping row indices to sets of column indices containing matches.
@@ -2484,27 +2981,27 @@ class DataFrameTable(DataTable):
2484
2981
  try:
2485
2982
  matches = self.find_matches(term, cidx, match_nocase, match_whole)
2486
2983
  except Exception as e:
2487
- self.notify(f"Error finding matches for [$error]{term}[/]", title="Find", severity="error", timeout=10)
2984
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
2488
2985
  self.log(f"Error finding matches for `{term}`: {str(e)}")
2489
2986
  return
2490
2987
 
2491
2988
  if not matches:
2492
2989
  self.notify(
2493
- f"No matches found for [$warning]{term}[/] in current column. Try [$accent](?i)abc[/] for case-insensitive search.",
2990
+ f"No matches found for `[$warning]{term}[/]` in current column. Try [$accent](?i)abc[/] for case-insensitive search.",
2494
2991
  title="Find",
2495
2992
  severity="warning",
2496
2993
  )
2497
2994
  return
2498
2995
 
2499
2996
  # Add to history
2500
- self.add_history(f"Found [$success]{term}[/] in column [$accent]{col_name}[/]")
2997
+ self.add_history(f"Found `[$success]{term}[/]` in column [$accent]{col_name}[/]")
2501
2998
 
2502
2999
  # Add to matches and count total
2503
3000
  match_count = sum(len(col_idxs) for col_idxs in matches.values())
2504
3001
  for ridx, col_idxs in matches.items():
2505
3002
  self.matches[ridx].update(col_idxs)
2506
3003
 
2507
- self.notify(f"Found [$success]{match_count}[/] matches for [$accent]{term}[/]", title="Find")
3004
+ self.notify(f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]`", title="Find")
2508
3005
 
2509
3006
  # Recreate table for display
2510
3007
  self.setup_table()
@@ -2518,20 +3015,20 @@ class DataFrameTable(DataTable):
2518
3015
  try:
2519
3016
  matches = self.find_matches(term, cidx=None, match_nocase=match_nocase, match_whole=match_whole)
2520
3017
  except Exception as e:
2521
- self.notify(f"Error finding matches for [$error]{term}[/]", title="Find", severity="error", timeout=10)
3018
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
2522
3019
  self.log(f"Error finding matches for `{term}`: {str(e)}")
2523
3020
  return
2524
3021
 
2525
3022
  if not matches:
2526
3023
  self.notify(
2527
- f"No matches found for [$warning]{term}[/] in any column. Try [$accent](?i)abc[/] for case-insensitive search.",
3024
+ f"No matches found for `[$warning]{term}[/]` in any column. Try [$accent](?i)abc[/] for case-insensitive search.",
2528
3025
  title="Global Find",
2529
3026
  severity="warning",
2530
3027
  )
2531
3028
  return
2532
3029
 
2533
3030
  # Add to history
2534
- self.add_history(f"Found [$success]{term}[/] across all columns")
3031
+ self.add_history(f"Found `[$success]{term}[/]` across all columns")
2535
3032
 
2536
3033
  # Add to matches and count total
2537
3034
  match_count = sum(len(col_idxs) for col_idxs in matches.values())
@@ -2539,7 +3036,8 @@ class DataFrameTable(DataTable):
2539
3036
  self.matches[ridx].update(col_idxs)
2540
3037
 
2541
3038
  self.notify(
2542
- f"Found [$success]{match_count}[/] matches for [$accent]{term}[/] across all columns", title="Global Find"
3039
+ f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]` across all columns",
3040
+ title="Global Find",
2543
3041
  )
2544
3042
 
2545
3043
  # Recreate table for display
@@ -2639,7 +3137,6 @@ class DataFrameTable(DataTable):
2639
3137
  last_ridx = selected_row_indices[-1]
2640
3138
  self.move_cursor_to(last_ridx, self.cursor_col_idx)
2641
3139
 
2642
- # Replace
2643
3140
  def do_replace(self) -> None:
2644
3141
  """Open replace screen for current column."""
2645
3142
  # Push the replace modal screen
@@ -2740,7 +3237,7 @@ class DataFrameTable(DataTable):
2740
3237
  self.app.push_screen(
2741
3238
  ConfirmScreen(
2742
3239
  "Replace All",
2743
- label=f"Replace [$success]{term_find}[/] with [$success]{term_replace or repr('')}[/] for all [$accent]{state.total_occurrence}[/] occurrences?",
3240
+ label=f"Replace `[$success]{term_find}[/]` with `[$success]{term_replace}[/]` for all [$accent]{state.total_occurrence}[/] occurrences?",
2744
3241
  ),
2745
3242
  callback=self.handle_replace_all_confirmation,
2746
3243
  )
@@ -2848,7 +3345,7 @@ class DataFrameTable(DataTable):
2848
3345
  state.current_occurrence += 1
2849
3346
 
2850
3347
  # Show confirmation
2851
- label = f"Replace [$warning]{state.term_find}[/] with [$success]{state.term_replace}[/] ({state.current_occurrence} of {state.total_occurrence})?"
3348
+ label = f"Replace `[$warning]{state.term_find}[/]` with `[$success]{state.term_replace}[/]` ({state.current_occurrence} of {state.total_occurrence})?"
2852
3349
 
2853
3350
  self.app.push_screen(
2854
3351
  ConfirmScreen("Replace", label=label, maybe="Skip"),
@@ -2923,112 +3420,7 @@ class DataFrameTable(DataTable):
2923
3420
  # Show next confirmation
2924
3421
  self.show_next_replace_confirmation()
2925
3422
 
2926
- # Selection & Match
2927
- def do_toggle_selections(self) -> None:
2928
- """Toggle selected rows highlighting on/off."""
2929
- # Add to history
2930
- self.add_history("Toggled row selection")
2931
-
2932
- if self.has_hidden_rows:
2933
- # Some rows are hidden - invert only selected visible rows and clear selections for hidden rows
2934
- for i in range(len(self.selected_rows)):
2935
- if self.visible_rows[i]:
2936
- self.selected_rows[i] = not self.selected_rows[i]
2937
- else:
2938
- self.selected_rows[i] = False
2939
- else:
2940
- # Invert all selected rows
2941
- self.selected_rows = [not selected for selected in self.selected_rows]
2942
-
2943
- # Check if we're highlighting or un-highlighting
2944
- if new_selected_count := self.selected_rows.count(True):
2945
- self.notify(f"Toggled selection for [$success]{new_selected_count}[/] rows", title="Toggle")
2946
-
2947
- # Recreate table for display
2948
- self.setup_table()
2949
-
2950
- def do_toggle_row_selection(self) -> None:
2951
- """Select/deselect current row."""
2952
- # Add to history
2953
- self.add_history("Toggled row selection")
2954
-
2955
- ridx = self.cursor_row_idx
2956
- self.selected_rows[ridx] = not self.selected_rows[ridx]
2957
-
2958
- row_key = str(ridx)
2959
- match_cols = self.matches.get(ridx, set())
2960
- for col_idx, col in enumerate(self.ordered_columns):
2961
- col_key = col.key
2962
- cell_text: Text = self.get_cell(row_key, col_key)
2963
-
2964
- if self.selected_rows[ridx] or (col_idx in match_cols):
2965
- cell_text.style = HIGHLIGHT_COLOR
2966
- else:
2967
- # Reset to default style based on dtype
2968
- dtype = self.df.dtypes[col_idx]
2969
- dc = DtypeConfig(dtype)
2970
- cell_text.style = dc.style
2971
-
2972
- self.update_cell(row_key, col_key, cell_text)
2973
-
2974
- def do_clear_selections_and_matches(self) -> None:
2975
- """Clear all selected rows and matches without removing them from the dataframe."""
2976
- # Check if any selected rows or matches
2977
- if not any(self.selected_rows) and not self.matches:
2978
- self.notify("No selections to clear", title="Clear", severity="warning")
2979
- return
2980
-
2981
- row_count = sum(
2982
- 1 if (selected or idx in self.matches) else 0 for idx, selected in enumerate(self.selected_rows)
2983
- )
2984
-
2985
- # Add to history
2986
- self.add_history("Cleared all selected rows")
2987
-
2988
- # Clear all selections
2989
- self.selected_rows = [False] * len(self.df)
2990
- self.matches = defaultdict(set)
2991
-
2992
- # Recreate table for display
2993
- self.setup_table()
2994
-
2995
- self.notify(f"Cleared selections for [$success]{row_count}[/] rows", title="Clear")
2996
-
2997
- # Filter & View
2998
- def do_filter_rows(self) -> None:
2999
- """Keep only the rows with selections and cell matches, and remove others."""
3000
- if any(self.selected_rows) or self.matches:
3001
- message = "Filter to rows with selection and cell matches (other rows removed)"
3002
- filter_expr = [
3003
- True if (selected or ridx in self.matches) else False
3004
- for ridx, selected in enumerate(self.selected_rows)
3005
- ]
3006
- else: # Search cursor value in current column
3007
- message = "Filter to rows matching cursor value (other rows removed)"
3008
- ridx = self.cursor_row_idx
3009
- cidx = self.cursor_col_idx
3010
- value = self.df.item(ridx, cidx)
3011
-
3012
- col_name = self.df.columns[cidx]
3013
- if value is None:
3014
- filter_expr = pl.col(col_name).is_null()
3015
- else:
3016
- filter_expr = pl.col(col_name) == value
3017
-
3018
- # Add to history
3019
- self.add_history(message, dirty=True)
3020
-
3021
- # Apply filter to dataframe with row indices
3022
- df_filtered = self.df.with_row_index(RIDX).filter(filter_expr)
3023
-
3024
- # Update dataframe
3025
- self.reset_df(df_filtered.drop(RIDX))
3026
-
3027
- # Recreate table for display
3028
- self.setup_table()
3029
-
3030
- self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows", title="Filter")
3031
-
3423
+ # View & Filter
3032
3424
  def do_view_rows(self) -> None:
3033
3425
  """View rows.
3034
3426
 
@@ -3150,6 +3542,49 @@ class DataFrameTable(DataTable):
3150
3542
 
3151
3543
  self.notify(f"Filtered to [$success]{matched_count}[/] matching rows", title="Filter")
3152
3544
 
3545
+ def do_filter_rows(self) -> None:
3546
+ """Keep only the rows with selections and cell matches, and remove others."""
3547
+ if any(self.selected_rows) or self.matches:
3548
+ message = "Filtered to rows with selection and cell matches (other rows removed)"
3549
+ filter_expr = [
3550
+ True if (selected or ridx in self.matches) else False
3551
+ for ridx, selected in enumerate(self.selected_rows)
3552
+ ]
3553
+ else: # Search cursor value in current column
3554
+ message = "Filtered to rows matching cursor value (other rows removed)"
3555
+ cidx = self.cursor_col_idx
3556
+ col_name = self.df.columns[cidx]
3557
+ value = self.cursor_value
3558
+
3559
+ if value is None:
3560
+ filter_expr = pl.col(col_name).is_null()
3561
+ else:
3562
+ filter_expr = pl.col(col_name) == value
3563
+
3564
+ # Add to history
3565
+ self.add_history(message, dirty=True)
3566
+
3567
+ # Apply filter to dataframe with row indices
3568
+ df_filtered = self.df.with_row_index(RIDX).filter(filter_expr)
3569
+
3570
+ # Update selected rows
3571
+ selected_rows = [self.selected_rows[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))]
3572
+
3573
+ # Update matches
3574
+ matches = {ridx: self.matches[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))}
3575
+
3576
+ # Update dataframe
3577
+ self.reset_df(df_filtered.drop(RIDX))
3578
+
3579
+ # Restore selected rows and matches
3580
+ self.selected_rows = selected_rows
3581
+ self.matches = matches
3582
+
3583
+ # Recreate table for display
3584
+ self.setup_table()
3585
+
3586
+ self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows", title="Filter")
3587
+
3153
3588
  # Copy & Save
3154
3589
  def do_copy_to_clipboard(self, content: str, message: str) -> None:
3155
3590
  """Copy content to clipboard using pbcopy (macOS) or xclip (Linux).
@@ -3195,13 +3630,13 @@ class DataFrameTable(DataTable):
3195
3630
  """Handle result from SaveFileScreen."""
3196
3631
  if result is None:
3197
3632
  return
3198
- filename, all_tabs = result
3633
+ filename, all_tabs, overwrite_prompt = result
3199
3634
 
3200
3635
  # Whether to save all tabs (for Excel files)
3201
3636
  self._all_tabs = all_tabs
3202
3637
 
3203
3638
  # Check if file exists
3204
- if Path(filename).exists():
3639
+ if overwrite_prompt and Path(filename).exists():
3205
3640
  self._pending_filename = filename
3206
3641
  self.app.push_screen(
3207
3642
  ConfirmScreen("File already exists. Overwrite?"),
@@ -3267,10 +3702,11 @@ class DataFrameTable(DataTable):
3267
3702
  else:
3268
3703
  self.dirty = False
3269
3704
 
3270
- if self._task_after_save == "close_tab":
3271
- self.app.do_close_tab()
3272
- elif self._task_after_save == "quit_app":
3273
- self.app.exit()
3705
+ if hasattr(self, "_task_after_save"):
3706
+ if self._task_after_save == "close_tab":
3707
+ self.app.do_close_tab()
3708
+ elif self._task_after_save == "quit_app":
3709
+ self.app.exit()
3274
3710
 
3275
3711
  # From ConfirmScreen callback, so notify accordingly
3276
3712
  if self._all_tabs:
@@ -3374,9 +3810,18 @@ class DataFrameTable(DataTable):
3374
3810
  col_name for col_name in self.df.columns if col_name not in filtered_col_names
3375
3811
  }
3376
3812
  else: # filter - modify the dataframe
3377
- self.df = df_filtered.drop(RIDX)
3378
- self.visible_rows = [True] * len(self.df)
3379
- self.hidden_columns.clear()
3813
+ # Update selected rows
3814
+ selected_rows = [self.selected_rows[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))]
3815
+
3816
+ # Update matches
3817
+ matches = {ridx: self.matches[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))}
3818
+
3819
+ # Update dataframe
3820
+ self.reset_df(df_filtered.drop(RIDX))
3821
+
3822
+ # Restore selected rows and matches
3823
+ self.selected_rows = selected_rows
3824
+ self.matches = matches
3380
3825
  except Exception as e:
3381
3826
  self.notify(f"Error executing SQL query [$error]{sql}[/]", title="SQL Query", severity="error", timeout=10)
3382
3827
  self.log(f"Error executing SQL query `{sql}`: {str(e)}")