dataframe-textual 1.16.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,6 @@ from typing import Any
10
10
 
11
11
  import polars as pl
12
12
  from rich.text import Text, TextType
13
- from textual import work
14
13
  from textual._two_way_dict import TwoWayDict
15
14
  from textual.coordinate import Coordinate
16
15
  from textual.events import Click
@@ -32,7 +31,7 @@ from .common import (
32
31
  CURSOR_TYPES,
33
32
  NULL,
34
33
  NULL_DISPLAY,
35
- RIDX,
34
+ RID,
36
35
  SUBSCRIPT_DIGITS,
37
36
  SUPPORTED_FORMATS,
38
37
  DtypeConfig,
@@ -40,7 +39,6 @@ from .common import (
40
39
  get_next_item,
41
40
  parse_placeholders,
42
41
  round_to_nearest_hundreds,
43
- sleep_async,
44
42
  tentative_expr,
45
43
  validate_expr,
46
44
  )
@@ -79,16 +77,16 @@ class History:
79
77
 
80
78
  description: str
81
79
  df: pl.DataFrame
80
+ df_view: pl.DataFrame | None
82
81
  filename: str
83
82
  loaded_rows: int
84
- sorted_columns: dict[str, bool]
85
83
  hidden_columns: set[str]
86
- selected_rows: list[bool]
87
- visible_rows: list[bool]
84
+ selected_rows: set[int]
85
+ sorted_columns: dict[str, bool] # col_name -> descending
88
86
  fixed_rows: int
89
87
  fixed_columns: int
90
88
  cursor_coordinate: Coordinate
91
- matches: dict[int, set[int]]
89
+ matches: dict[int, set[str]] # RID -> set of col names
92
90
  dirty: bool = False # Whether this history state has unsaved changes
93
91
 
94
92
 
@@ -155,7 +153,7 @@ class DataFrameTable(DataTable):
155
153
  - *(Multi-column sort supported)*
156
154
 
157
155
  ## ✅ Row Selection
158
- - **\\\\** - ✅ Select rows in current column using cursor value
156
+ - **\\\\** - ✅ Select rows with cell matches or those matching cursor value in current column
159
157
  - **|** - ✅ Select rows with expression
160
158
  - **'** - ✅ Select/deselect current row
161
159
  - **t** - 💡 Toggle row selection (invert all)
@@ -177,8 +175,8 @@ class DataFrameTable(DataTable):
177
175
 
178
176
  ## 👁️ View & Filter
179
177
  - **"** - 📍 Filter selected rows (removes others)
180
- - **v** - 👁️ View rows that are selected or contain matching cells (hide others)
181
- - **V** - 🔧 View rows by expression (hides others)
178
+ - **v** - 👁️ View selected rows (hides others)
179
+ - **V** - 🔧 View selected rows matching expression (hides others)
182
180
 
183
181
  ## 🔍 SQL Interface
184
182
  - **l** - 💬 Open simple SQL interface (select columns & where clause)
@@ -221,10 +219,8 @@ class DataFrameTable(DataTable):
221
219
  # Navigation
222
220
  ("g", "jump_top", "Jump to top"),
223
221
  ("G", "jump_bottom", "Jump to bottom"),
224
- ("ctrl+f", "forward_page", "Page down"),
225
- ("ctrl+b", "backward_page", "Page up"),
226
- ("pageup", "page_up", "Page up"),
227
- ("pagedown", "page_down", "Page down"),
222
+ ("pageup,ctrl+b", "page_up", "Page up"),
223
+ ("pagedown,ctrl+f", "page_down", "Page down"),
228
224
  # Undo/Redo/Reset
229
225
  ("u", "undo", "Undo"),
230
226
  ("U", "redo", "Redo"),
@@ -237,6 +233,7 @@ class DataFrameTable(DataTable):
237
233
  ("z", "freeze_row_column", "Freeze rows/columns"),
238
234
  ("comma", "show_thousand_separator", "Toggle thousand separator"), # `,`
239
235
  ("underscore", "expand_column", "Expand column to full width"), # `_`
236
+ ("circumflex_accent", "toggle_rid", "Toggle internal row index"), # `^`
240
237
  # Copy
241
238
  ("c", "copy_cell", "Copy cell to clipboard"),
242
239
  ("ctrl+c", "copy_column", "Copy column to clipboard"),
@@ -254,11 +251,11 @@ class DataFrameTable(DataTable):
254
251
  ("left_square_bracket", "sort_ascending", "Sort ascending"), # `[`
255
252
  ("right_square_bracket", "sort_descending", "Sort descending"), # `]`
256
253
  # View & Filter
257
- ("v", "view_rows", "View rows"),
258
- ("V", "view_rows_expr", "View rows by expression"),
259
- ("quotation_mark", "filter_rows", "Filter selected"), # `"`
254
+ ("v", "view_rows", "View selected rows"),
255
+ ("V", "view_rows_expr", "View selected rows matching expression"),
256
+ ("quotation_mark", "filter_rows", "Filter selected rows"), # `"`
260
257
  # Row Selection
261
- ("backslash", "select_row_cursor_value", "Select rows with cursor value in current column"), # `\`
258
+ ("backslash", "select_row", "Select rows with cell matches or those matching cursor value in current column"), # `\`
262
259
  ("vertical_line", "select_row_expr", "Select rows with expression"), # `|`
263
260
  ("right_curly_bracket", "next_selected_row", "Go to next selected row"), # `}`
264
261
  ("left_curly_bracket", "previous_selected_row", "Go to previous selected row"), # `{`
@@ -324,34 +321,40 @@ class DataFrameTable(DataTable):
324
321
  super().__init__(**kwargs)
325
322
 
326
323
  # DataFrame state
327
- self.dataframe = df # Original dataframe
328
- self.df = df # Internal/working dataframe
324
+ self.dataframe = df.lazy().with_row_index(RID).select(pl.exclude(RID), RID).collect() # Original dataframe
325
+ self.df = self.dataframe # Internal/working dataframe
329
326
  self.filename = filename or "untitled.csv" # Current filename
330
327
  self.tabname = tabname or Path(filename).stem # Tab name
328
+
329
+ # In view mode, this is the copy of self.df
330
+ self.df_view = None
331
+
331
332
  # Pagination & Loading
332
333
  self.BATCH_SIZE = max((self.app.size.height // 100 + 1) * 100, 100)
333
334
  self.loaded_rows = 0 # Track how many rows are currently loaded
334
335
  self.loaded_ranges: list[tuple[int, int]] = [] # List of (start, end) row indices that are loaded
335
336
 
336
337
  # State tracking (all 0-based indexing)
337
- self.sorted_columns: dict[str, bool] = {} # col_name -> descending
338
338
  self.hidden_columns: set[str] = set() # Set of hidden column names
339
- self.selected_rows: list[bool] = [False] * len(self.df) # Track selected rows
340
- self.visible_rows: list[bool] = [True] * len(self.df) # Track visible rows (for filtering)
341
- self.matches: dict[int, set[int]] = defaultdict(set) # Track search matches: row_idx -> set of col_idx
339
+ self.selected_rows: set[int] = set() # Track selected rows by RID
340
+ self.sorted_columns: dict[str, bool] = {} # col_name -> descending
341
+ self.matches: dict[int, set[str]] = defaultdict(set) # Track search matches: RID -> set of col_names
342
342
 
343
343
  # Freezing
344
344
  self.fixed_rows = 0 # Number of fixed rows
345
345
  self.fixed_columns = 0 # Number of fixed columns
346
346
 
347
347
  # History stack for undo
348
- self.histories: deque[History] = deque()
349
- # Current history state for redo
350
- self.history: History = None
348
+ self.histories_undo: deque[History] = deque()
349
+ # History stack for redo
350
+ self.histories_redo: deque[History] = deque()
351
351
 
352
352
  # Whether to use thousand separator for numeric display
353
353
  self.thousand_separator = False
354
354
 
355
+ # Whether to show internal row index column
356
+ self.show_rid = False
357
+
355
358
  @property
356
359
  def cursor_key(self) -> CellKey:
357
360
  """Get the current cursor position as a CellKey.
@@ -418,22 +421,13 @@ class DataFrameTable(DataTable):
418
421
 
419
422
  @property
420
423
  def cursor_value(self) -> Any:
421
- """Get the current cursor cell value.
424
+ """Get the current cursor cell value in the dataframe.
422
425
 
423
426
  Returns:
424
427
  Any: The value of the cell at the cursor position.
425
428
  """
426
429
  return self.df.item(self.cursor_row_idx, self.cursor_col_idx)
427
430
 
428
- @property
429
- def has_hidden_rows(self) -> bool:
430
- """Check if there are any hidden rows.
431
-
432
- Returns:
433
- bool: True if there are hidden rows, False otherwise.
434
- """
435
- return any(1 for v in self.visible_rows if v is False)
436
-
437
431
  @property
438
432
  def ordered_selected_rows(self) -> list[int]:
439
433
  """Get the list of selected row indices in order.
@@ -441,7 +435,7 @@ class DataFrameTable(DataTable):
441
435
  Returns:
442
436
  list[int]: A list of 0-based row indices that are currently selected.
443
437
  """
444
- return [ridx for ridx, selected in enumerate(self.selected_rows) if selected]
438
+ return [ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.selected_rows]
445
439
 
446
440
  @property
447
441
  def ordered_matches(self) -> list[tuple[int, int]]:
@@ -451,19 +445,22 @@ class DataFrameTable(DataTable):
451
445
  list[tuple[int, int]]: A list of (row_idx, col_idx) tuples for matched cells.
452
446
  """
453
447
  matches = []
454
- for ridx in sorted(self.matches.keys()):
455
- for cidx in sorted(self.matches[ridx]):
456
- matches.append((ridx, cidx))
457
- return matches
458
448
 
459
- @property
460
- def last_history(self) -> History:
461
- """Get the last history state.
449
+ # Uniq columns
450
+ cols_to_check = set()
451
+ for cols in self.matches.values():
452
+ cols_to_check.update(cols)
462
453
 
463
- Returns:
464
- History: The most recent History object from the histories deque.
465
- """
466
- return self.histories[-1] if self.histories else None
454
+ # Ordered columns
455
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_check}
456
+
457
+ for ridx, rid in enumerate(self.df[RID]):
458
+ if cols := self.matches.get(rid):
459
+ for cidx, col in cidx2col.items():
460
+ if col in cols:
461
+ matches.append((ridx, cidx))
462
+
463
+ return matches
467
464
 
468
465
  def _round_to_nearest_hundreds(self, num: int):
469
466
  """Round a number to the nearest hundreds.
@@ -677,42 +674,19 @@ class DataFrameTable(DataTable):
677
674
  # Action handlers for BINDINGS
678
675
  def action_jump_top(self) -> None:
679
676
  """Jump to the top of the table."""
680
- self.move_cursor(row=0)
677
+ self.do_jump_top()
681
678
 
682
679
  def action_jump_bottom(self) -> None:
683
680
  """Jump to the bottom of the table."""
684
- stop = len(self.df)
685
- start = max(0, ((stop - self.BATCH_SIZE) // self.BATCH_SIZE + 1) * self.BATCH_SIZE)
686
- self.load_rows_range(start, stop)
687
- self.move_cursor(row=self.row_count - 1)
681
+ self.do_jump_bottom()
688
682
 
689
683
  def action_page_up(self) -> None:
690
684
  """Move the cursor one page up."""
691
- self._set_hover_cursor(False)
692
- if self.show_cursor and self.cursor_type in ("cell", "row"):
693
- height = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
694
-
695
- col_idx = self.cursor_column
696
- ridx = self.cursor_row_idx
697
- next_ridx = max(0, ridx - height - BUFFER_SIZE)
698
- start, stop = self._round_to_nearest_hundreds(next_ridx)
699
- self.load_rows_range(start, stop)
700
-
701
- self.move_cursor(row=self.get_row_idx(str(next_ridx)), column=col_idx)
702
- else:
703
- super().action_page_up()
685
+ self.do_page_up()
704
686
 
705
687
  def action_page_down(self) -> None:
706
- super().action_page_down()
707
- self.load_rows_down()
708
-
709
- def action_backward_page(self) -> None:
710
- """Scroll up one page."""
711
- self.action_page_up()
712
-
713
- def action_forward_page(self) -> None:
714
- """Scroll down one page."""
715
- self.action_page_down()
688
+ """Move the cursor one page down."""
689
+ self.do_page_down()
716
690
 
717
691
  def action_view_row_detail(self) -> None:
718
692
  """View details of the current row."""
@@ -730,6 +704,10 @@ class DataFrameTable(DataTable):
730
704
  """Expand the current column to its full width."""
731
705
  self.do_expand_column()
732
706
 
707
+ def action_toggle_rid(self) -> None:
708
+ """Toggle the internal row index column visibility."""
709
+ self.do_toggle_rid()
710
+
733
711
  def action_show_hidden_rows_columns(self) -> None:
734
712
  """Show all hidden rows/columns."""
735
713
  self.do_show_hidden_rows_columns()
@@ -802,9 +780,9 @@ class DataFrameTable(DataTable):
802
780
  """Clear the current cell (set to None)."""
803
781
  self.do_clear_cell()
804
782
 
805
- def action_select_row_cursor_value(self) -> None:
783
+ def action_select_row(self) -> None:
806
784
  """Select rows with cursor value in the current column."""
807
- self.do_select_row_cursor_value()
785
+ self.do_select_row()
808
786
 
809
787
  def action_select_row_expr(self) -> None:
810
788
  """Select rows by expression."""
@@ -1014,51 +992,31 @@ class DataFrameTable(DataTable):
1014
992
  # Set new dataframe and reset table
1015
993
  self.df = new_df
1016
994
  self.loaded_rows = 0
1017
- self.sorted_columns = {}
1018
995
  self.hidden_columns = set()
1019
- self.selected_rows = [False] * len(self.df)
1020
- self.visible_rows = [True] * len(self.df)
996
+ self.selected_rows = set()
997
+ self.sorted_columns = {}
1021
998
  self.fixed_rows = 0
1022
999
  self.fixed_columns = 0
1023
1000
  self.matches = defaultdict(set)
1024
1001
  # self.histories.clear()
1025
- # self.history = None
1002
+ # self.histories2.clear()
1026
1003
  self.dirty = dirty # Mark as dirty since data changed
1027
1004
 
1028
- def setup_table(self, reset: bool = False) -> None:
1005
+ def setup_table(self) -> None:
1029
1006
  """Setup the table for display.
1030
1007
 
1031
1008
  Row keys are 0-based indices, which map directly to dataframe row indices.
1032
1009
  Column keys are header names from the dataframe.
1033
1010
  """
1034
1011
  self.loaded_rows = 0
1012
+ self.loaded_ranges.clear()
1035
1013
  self.show_row_labels = True
1036
1014
 
1037
- # Reset to original dataframe
1038
- if reset:
1039
- self.reset_df(self.dataframe, dirty=False)
1040
-
1041
- # Lazy load up to BATCH_SIZE visible rows
1042
- stop, visible_count, row_idx = self.BATCH_SIZE, 0, 0
1043
- for row_idx, visible in enumerate(self.visible_rows):
1044
- if not visible:
1045
- continue
1046
- visible_count += 1
1047
- if visible_count > self.BATCH_SIZE:
1048
- stop = row_idx
1049
- break
1050
- else:
1051
- stop = row_idx
1052
-
1053
- # Round up to next hundreds
1054
- if stop % self.BATCH_SIZE != 0:
1055
- stop = (stop // self.BATCH_SIZE + 1) * self.BATCH_SIZE
1056
-
1057
1015
  # Save current cursor position before clearing
1058
1016
  row_idx, col_idx = self.cursor_coordinate
1059
1017
 
1060
1018
  self.setup_columns()
1061
- self.load_rows_range(0, stop)
1019
+ self.load_rows_range(0, self.BATCH_SIZE) # Load initial rows
1062
1020
 
1063
1021
  # Restore cursor position
1064
1022
  if row_idx < len(self.rows) and col_idx < len(self.columns):
@@ -1102,28 +1060,30 @@ class DataFrameTable(DataTable):
1102
1060
  # Get column label width
1103
1061
  # Add padding for sort indicators if any
1104
1062
  label_width = measure(self.app.console, col, 1) + 2
1063
+ if dtype != pl.String:
1064
+ available_width -= label_width
1065
+ continue
1105
1066
 
1106
1067
  try:
1107
1068
  # Get sample values from the column
1108
- sample_values = sample_lf.select(col).collect().get_column(col).to_list()
1069
+ sample_values = sample_lf.select(col).collect().get_column(col).drop_nulls().to_list()
1109
1070
  if any(val.startswith(("https://", "http://")) for val in sample_values):
1110
1071
  continue # Skip link columns so they can auto-size and be clickable
1111
1072
 
1112
1073
  # Find maximum width in sample
1113
1074
  max_cell_width = max(
1114
- (measure(self.app.console, str(val), 1) for val in sample_values if val),
1075
+ (measure(self.app.console, val, 1) for val in sample_values),
1115
1076
  default=label_width,
1116
1077
  )
1117
1078
 
1118
1079
  # Set column width to max of label and sampled data (capped at reasonable max)
1119
1080
  max_width = max(label_width, max_cell_width)
1120
- except Exception:
1081
+ except Exception as e:
1121
1082
  # If any error, let Textual auto-size
1122
1083
  max_width = label_width
1084
+ self.log(f"Error determining width for column '{col}': {e}")
1123
1085
 
1124
- if dtype == pl.String:
1125
- column_widths[col] = max_width
1126
-
1086
+ column_widths[col] = max_width
1127
1087
  available_width -= max_width
1128
1088
 
1129
1089
  # If there's no more available width, auto-size remaining columns
@@ -1147,8 +1107,8 @@ class DataFrameTable(DataTable):
1147
1107
 
1148
1108
  # Add columns with justified headers
1149
1109
  for col, dtype in zip(self.df.columns, self.df.dtypes):
1150
- if col in self.hidden_columns:
1151
- continue # Skip hidden columns
1110
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1111
+ continue # Skip hidden columns and internal RID
1152
1112
  for idx, c in enumerate(self.sorted_columns, 1):
1153
1113
  if c == col:
1154
1114
  # Add sort indicator to column header
@@ -1166,71 +1126,6 @@ class DataFrameTable(DataTable):
1166
1126
 
1167
1127
  self.add_column(Text(cell_value, justify=DtypeConfig(dtype).justify), key=col, width=width)
1168
1128
 
1169
- def load_rows(self, stop: int | None = None, move_to_end: bool = False) -> None:
1170
- """Load a batch of rows into the table (synchronous wrapper).
1171
-
1172
- Args:
1173
- stop: Stop loading rows when this index is reached.
1174
- If None, load until the end of the dataframe.
1175
- """
1176
- if stop is None or stop > len(self.df):
1177
- stop = len(self.df)
1178
-
1179
- # If already loaded enough rows, just move cursor if needed
1180
- if stop <= self.loaded_rows:
1181
- if move_to_end:
1182
- self.move_cursor(row=self.row_count - 1)
1183
-
1184
- return
1185
-
1186
- # Warn user if loading a large number of rows
1187
- elif (nrows := stop - self.loaded_rows) >= WARN_ROWS_THRESHOLD:
1188
-
1189
- def _continue(result: bool) -> None:
1190
- if result:
1191
- self.load_rows_async(stop, move_to_end=move_to_end)
1192
-
1193
- self.app.push_screen(
1194
- ConfirmScreen(
1195
- f"Load {nrows} Rows",
1196
- label="Loading a large number of rows may cause the application to become unresponsive. Do you want to continue?",
1197
- ),
1198
- callback=_continue,
1199
- )
1200
-
1201
- return
1202
-
1203
- # Load rows asynchronously
1204
- self.load_rows_async(stop, move_to_end=move_to_end)
1205
-
1206
- @work(exclusive=True, description="Loading rows...")
1207
- async def load_rows_async(self, stop: int, move_to_end: bool = False) -> None:
1208
- """Perform loading with async to avoid blocking.
1209
-
1210
- Args:
1211
- stop: Stop loading rows when this index is reached.
1212
- move_to_end: If True, move cursor to the last loaded row after loading completes.
1213
- """
1214
- # Load rows in smaller chunks to avoid blocking
1215
- if stop > self.loaded_rows:
1216
- self.log(f"Async loading up to row {self.loaded_rows = }, {stop = }")
1217
- # Load incrementally to avoid one big block
1218
- # Load max BATCH_SIZE rows at a time
1219
- chunk_size = min(self.BATCH_SIZE, stop - self.loaded_rows)
1220
- next_stop = min(self.loaded_rows + chunk_size, stop)
1221
- self.load_rows_range(self.loaded_rows, next_stop)
1222
- self.loaded_rows = next_stop
1223
-
1224
- # If there's more to load, yield to event loop with delay
1225
- if next_stop < stop:
1226
- await sleep_async(0.05) # 50ms delay to allow UI updates
1227
- self.load_rows_async(stop, move_to_end=move_to_end)
1228
- return
1229
-
1230
- # After loading completes, move cursor to end if requested
1231
- if move_to_end:
1232
- self.call_after_refresh(lambda: self.move_cursor(row=self.row_count - 1))
1233
-
1234
1129
  def _calculate_load_range(self, start: int, stop: int) -> list[tuple[int, int]]:
1235
1130
  """Calculate the actual ranges to load, accounting for already-loaded ranges.
1236
1131
 
@@ -1262,8 +1157,11 @@ class DataFrameTable(DataTable):
1262
1157
  # Merge overlapping/adjacent ranges
1263
1158
  merged = []
1264
1159
  for range_start, range_stop in sorted_ranges:
1265
- if merged and range_start <= merged[-1][1]:
1266
- # Overlapping or adjacent: merge
1160
+ # Fully covered, no need to load anything
1161
+ if range_start <= start and range_stop >= stop:
1162
+ return []
1163
+ # Overlapping or adjacent: merge
1164
+ elif merged and range_start <= merged[-1][1]:
1267
1165
  merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1268
1166
  else:
1269
1167
  merged.append((range_start, range_stop))
@@ -1356,23 +1254,20 @@ class DataFrameTable(DataTable):
1356
1254
  df_slice = self.df.slice(segment_start, segment_stop - segment_start)
1357
1255
 
1358
1256
  # Load each row at the correct position
1359
- for ridx, row in enumerate(df_slice.rows(), segment_start):
1360
- if not self.visible_rows[ridx]:
1361
- continue # Skip hidden rows
1362
-
1363
- is_selected = self.selected_rows[ridx]
1364
- match_cols = self.matches.get(ridx, set())
1257
+ for (ridx, row), rid in zip(enumerate(df_slice.rows(), segment_start), df_slice[RID]):
1258
+ is_selected = rid in self.selected_rows
1259
+ match_cols = self.matches.get(rid, set())
1365
1260
 
1366
1261
  vals, dtypes, styles = [], [], []
1367
- for cidx, (val, col, dtype) in enumerate(zip(row, self.df.columns, self.df.dtypes)):
1368
- if col in self.hidden_columns:
1369
- continue # Skip hidden columns
1262
+ for val, col, dtype in zip(row, self.df.columns, self.df.dtypes, strict=True):
1263
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1264
+ continue # Skip hidden columns and internal RID
1370
1265
 
1371
1266
  vals.append(val)
1372
1267
  dtypes.append(dtype)
1373
1268
 
1374
1269
  # Highlight entire row with selection or cells with matches
1375
- styles.append(HIGHLIGHT_COLOR if is_selected or cidx in match_cols else None)
1270
+ styles.append(HIGHLIGHT_COLOR if is_selected or col in match_cols else None)
1376
1271
 
1377
1272
  formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1378
1273
 
@@ -1413,8 +1308,7 @@ class DataFrameTable(DataTable):
1413
1308
 
1414
1309
  # If nothing needs loading, return early
1415
1310
  if not ranges_to_load:
1416
- self.log(f"Range {start}-{stop} already loaded, skipping")
1417
- return 0
1311
+ return 0 # Already loaded
1418
1312
 
1419
1313
  # Track the number of loaded rows in this range
1420
1314
  range_count = 0
@@ -1446,26 +1340,12 @@ class DataFrameTable(DataTable):
1446
1340
  if top_row_key:
1447
1341
  top_ridx = int(top_row_key.value)
1448
1342
  else:
1449
- top_ridx = 0
1450
- self.log(f"No top row key at index {top_row_index}, defaulting to 0")
1343
+ top_ridx = 0 # No top row key at index, default to 0
1451
1344
 
1452
1345
  # Load upward
1453
1346
  start, stop = self._round_to_nearest_hundreds(top_ridx - BUFFER_SIZE * 2)
1454
1347
  range_count = self.load_rows_range(start, stop)
1455
1348
 
1456
- # self.log(
1457
- # "========",
1458
- # f"{self.scrollable_content_region.height = },",
1459
- # f"{self.header_height = },",
1460
- # f"{self.scroll_y = },",
1461
- # f"{top_row_index = },",
1462
- # f"{top_ridx = },",
1463
- # f"{start = },",
1464
- # f"{stop = },",
1465
- # f"{range_count = },",
1466
- # f"{self.loaded_ranges = }",
1467
- # )
1468
-
1469
1349
  # Adjust scroll to maintain position if rows were loaded above
1470
1350
  if range_count > 0:
1471
1351
  self.move_cursor(row=top_row_index + range_count)
@@ -1477,33 +1357,19 @@ class DataFrameTable(DataTable):
1477
1357
  if self.loaded_rows >= len(self.df):
1478
1358
  return
1479
1359
 
1480
- visible_row_count = self.scrollable_content_region.height - self.header_height
1360
+ visible_row_count = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1481
1361
  bottom_row_index = self.scroll_y + visible_row_count - BUFFER_SIZE
1482
1362
 
1483
1363
  bottom_row_key = self.get_row_key(bottom_row_index)
1484
1364
  if bottom_row_key:
1485
1365
  bottom_ridx = int(bottom_row_key.value)
1486
1366
  else:
1487
- bottom_ridx = 0
1488
- self.log(f"No bottom row key at index {bottom_row_index}, defaulting to 0")
1367
+ bottom_ridx = 0 # No bottom row key at index, default to 0
1489
1368
 
1490
1369
  # Load downward
1491
1370
  start, stop = self._round_to_nearest_hundreds(bottom_ridx + BUFFER_SIZE * 2)
1492
1371
  range_count = self.load_rows_range(start, stop)
1493
1372
 
1494
- # self.log(
1495
- # "========",
1496
- # f"{self.scrollable_content_region.height = },",
1497
- # f"{self.header_height = },",
1498
- # f"{self.scroll_y = },",
1499
- # f"{bottom_row_index = },",
1500
- # f"{bottom_ridx = },",
1501
- # f"{start = },",
1502
- # f"{stop = },",
1503
- # f"{range_count = },",
1504
- # f"{self.loaded_ranges = }",
1505
- # )
1506
-
1507
1373
  if range_count > 0:
1508
1374
  self.log(f"Loaded down: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1509
1375
 
@@ -1608,18 +1474,55 @@ class DataFrameTable(DataTable):
1608
1474
  self.check_idle()
1609
1475
  return row_key
1610
1476
 
1477
+ # Navigation
1478
+ def do_jump_top(self) -> None:
1479
+ """Jump to the top of the table."""
1480
+ self.move_cursor(row=0)
1481
+
1482
+ def do_jump_bottom(self) -> None:
1483
+ """Jump to the bottom of the table."""
1484
+ stop = len(self.df)
1485
+ start = max(0, stop - self.BATCH_SIZE)
1486
+
1487
+ if start % self.BATCH_SIZE != 0:
1488
+ start = (start // self.BATCH_SIZE + 1) * self.BATCH_SIZE
1489
+
1490
+ self.load_rows_range(start, stop)
1491
+ self.move_cursor(row=self.row_count - 1)
1492
+
1493
+ def do_page_up(self) -> None:
1494
+ """Move the cursor one page up."""
1495
+ self._set_hover_cursor(False)
1496
+ if self.show_cursor and self.cursor_type in ("cell", "row"):
1497
+ height = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1498
+
1499
+ col_idx = self.cursor_column
1500
+ ridx = self.cursor_row_idx
1501
+ next_ridx = max(0, ridx - height - BUFFER_SIZE)
1502
+ start, stop = self._round_to_nearest_hundreds(next_ridx)
1503
+ self.load_rows_range(start, stop)
1504
+
1505
+ self.move_cursor(row=self.get_row_idx(str(next_ridx)), column=col_idx)
1506
+ else:
1507
+ super().action_page_up()
1508
+
1509
+ def do_page_down(self) -> None:
1510
+ """Move the cursor one page down."""
1511
+ super().action_page_down()
1512
+ self.load_rows_down()
1513
+
1611
1514
  # History & Undo
1612
1515
  def create_history(self, description: str) -> None:
1613
1516
  """Create the initial history state."""
1614
1517
  return History(
1615
1518
  description=description,
1616
1519
  df=self.df,
1520
+ df_view=self.df_view,
1617
1521
  filename=self.filename,
1618
1522
  loaded_rows=self.loaded_rows,
1619
- sorted_columns=self.sorted_columns.copy(),
1620
1523
  hidden_columns=self.hidden_columns.copy(),
1621
1524
  selected_rows=self.selected_rows.copy(),
1622
- visible_rows=self.visible_rows.copy(),
1525
+ sorted_columns=self.sorted_columns.copy(),
1623
1526
  fixed_rows=self.fixed_rows,
1624
1527
  fixed_columns=self.fixed_columns,
1625
1528
  cursor_coordinate=self.cursor_coordinate,
@@ -1634,12 +1537,12 @@ class DataFrameTable(DataTable):
1634
1537
 
1635
1538
  # Restore state
1636
1539
  self.df = history.df
1540
+ self.df_view = history.df_view
1637
1541
  self.filename = history.filename
1638
1542
  self.loaded_rows = history.loaded_rows
1639
- self.sorted_columns = history.sorted_columns.copy()
1640
1543
  self.hidden_columns = history.hidden_columns.copy()
1641
1544
  self.selected_rows = history.selected_rows.copy()
1642
- self.visible_rows = history.visible_rows.copy()
1545
+ self.sorted_columns = history.sorted_columns.copy()
1643
1546
  self.fixed_rows = history.fixed_rows
1644
1547
  self.fixed_columns = history.fixed_columns
1645
1548
  self.cursor_coordinate = history.cursor_coordinate
@@ -1649,15 +1552,18 @@ class DataFrameTable(DataTable):
1649
1552
  # Recreate table for display
1650
1553
  self.setup_table()
1651
1554
 
1652
- def add_history(self, description: str, dirty: bool = False) -> None:
1555
+ def add_history(self, description: str, dirty: bool = False, clear_redo: bool = True) -> None:
1653
1556
  """Add the current state to the history stack.
1654
1557
 
1655
1558
  Args:
1656
1559
  description: Description of the action for this history entry.
1657
1560
  dirty: Whether this operation modifies the data (True) or just display state (False).
1658
1561
  """
1659
- history = self.create_history(description)
1660
- self.histories.append(history)
1562
+ self.histories_undo.append(self.create_history(description))
1563
+
1564
+ # Clear redo stack when a new action is performed
1565
+ if clear_redo:
1566
+ self.histories_redo.clear()
1661
1567
 
1662
1568
  # Mark table as dirty if this operation modifies data
1663
1569
  if dirty:
@@ -1665,52 +1571,43 @@ class DataFrameTable(DataTable):
1665
1571
 
1666
1572
  def do_undo(self) -> None:
1667
1573
  """Undo the last action."""
1668
- if not self.histories:
1574
+ if not self.histories_undo:
1669
1575
  self.notify("No actions to undo", title="Undo", severity="warning")
1670
1576
  return
1671
1577
 
1672
- # Pop the last history state for undo
1673
- history = self.histories.pop()
1674
-
1675
- # Save current state for redo
1676
- self.history = self.create_history(history.description)
1578
+ # Pop the last history state for undo and save to redo stack
1579
+ history = self.histories_undo.pop()
1580
+ self.histories_redo.append(self.create_history(history.description))
1677
1581
 
1678
1582
  # Restore state
1679
1583
  self.apply_history(history)
1680
1584
 
1681
- self.notify(f"Reverted: [$success]{history.description}[/]", title="Undo")
1585
+ self.notify(f"Reverted: {history.description}", title="Undo")
1682
1586
 
1683
1587
  def do_redo(self) -> None:
1684
1588
  """Redo the last undone action."""
1685
- if self.history is None:
1589
+ if not self.histories_redo:
1686
1590
  self.notify("No actions to redo", title="Redo", severity="warning")
1687
1591
  return
1688
1592
 
1689
- description = self.history.description
1593
+ # Pop the last undone state from redo stack
1594
+ history = self.histories_redo.pop()
1595
+ description = history.description
1690
1596
 
1691
1597
  # Save current state for undo
1692
- self.add_history(description)
1598
+ self.add_history(description, clear_redo=False)
1693
1599
 
1694
1600
  # Restore state
1695
- self.apply_history(self.history)
1696
-
1697
- # Clear redo state
1698
- self.history = None
1601
+ self.apply_history(history)
1699
1602
 
1700
- self.notify(f"Reapplied: [$success]{description}[/]", title="Redo")
1603
+ self.notify(f"Reapplied: {description}", title="Redo")
1701
1604
 
1702
1605
  def do_reset(self) -> None:
1703
1606
  """Reset the table to the initial state."""
1704
- self.setup_table(reset=True)
1607
+ self.reset_df(self.dataframe, dirty=False)
1608
+ self.setup_table()
1705
1609
  self.notify("Restored initial state", title="Reset")
1706
1610
 
1707
- def restore_dirty(self, default: bool | None = None) -> None:
1708
- """Restore the dirty state from the last history entry."""
1709
- if self.last_history:
1710
- self.dirty = self.last_history.dirty
1711
- elif default is not None:
1712
- self.dirty = default
1713
-
1714
1611
  # Display
1715
1612
  def do_cycle_cursor_type(self) -> None:
1716
1613
  """Cycle through cursor types: cell -> row -> column -> cell."""
@@ -1817,14 +1714,20 @@ class DataFrameTable(DataTable):
1817
1714
  max_width = len(col_name) + 2 # Start with column name width + padding
1818
1715
 
1819
1716
  try:
1717
+ need_expand = False
1718
+
1820
1719
  # Scan through all loaded rows that are visible to find max width
1821
1720
  for row_idx in range(self.loaded_rows):
1822
- if not self.visible_rows[row_idx]:
1823
- continue # Skip hidden rows
1824
1721
  cell_value = str(self.df.item(row_idx, col_idx))
1825
1722
  cell_width = measure(self.app.console, cell_value, 1)
1723
+
1724
+ if cell_width > max_width:
1725
+ need_expand = True
1826
1726
  max_width = max(max_width, cell_width)
1827
1727
 
1728
+ if not need_expand:
1729
+ return
1730
+
1828
1731
  # Update the column width
1829
1732
  col = self.columns[col_key]
1830
1733
  col.width = max_width
@@ -1841,32 +1744,34 @@ class DataFrameTable(DataTable):
1841
1744
  )
1842
1745
  self.log(f"Error expanding column `{col_name}`: {str(e)}")
1843
1746
 
1844
- def do_show_hidden_rows_columns(self) -> None:
1845
- """Show all hidden rows/columns by recreating the table."""
1846
- # Get currently visible columns
1847
- visible_cols = set(col.key for col in self.ordered_columns)
1747
+ def do_toggle_rid(self) -> None:
1748
+ """Toggle display of the internal RID column."""
1749
+ self.show_rid = not self.show_rid
1848
1750
 
1849
- hidden_row_count = sum(0 if visible else 1 for visible in self.visible_rows)
1850
- hidden_col_count = sum(0 if col in visible_cols else 1 for col in self.df.columns)
1751
+ # Recreate table for display
1752
+ self.setup_table()
1851
1753
 
1852
- if not hidden_row_count and not hidden_col_count:
1853
- self.notify("No hidden columns or rows to show", title="Show", severity="warning")
1754
+ def do_show_hidden_rows_columns(self) -> None:
1755
+ """Show all hidden rows/columns by recreating the table."""
1756
+ if not self.hidden_columns and self.df_view is None:
1757
+ self.notify("No hidden rows or columns to show", title="Show", severity="warning")
1854
1758
  return
1855
1759
 
1856
1760
  # Add to history
1857
1761
  self.add_history("Showed hidden rows/columns")
1858
1762
 
1763
+ # If in a filtered view, restore the full dataframe
1764
+ if self.df_view is not None:
1765
+ self.df = self.df_view
1766
+ self.df_view = None
1767
+
1859
1768
  # Clear hidden rows/columns tracking
1860
- self.visible_rows = [True] * len(self.df)
1861
1769
  self.hidden_columns.clear()
1862
1770
 
1863
1771
  # Recreate table for display
1864
1772
  self.setup_table()
1865
1773
 
1866
- self.notify(
1867
- f"Showed [$success]{hidden_row_count}[/] hidden row(s) and/or [$accent]{hidden_col_count}[/] column(s)",
1868
- title="Show",
1869
- )
1774
+ self.notify("Showed hidden row(s) and/or hidden column(s)", title="Show")
1870
1775
 
1871
1776
  # Sort
1872
1777
  def do_sort_by_column(self, descending: bool = False) -> None:
@@ -1888,41 +1793,39 @@ class DataFrameTable(DataTable):
1888
1793
  # Add to history
1889
1794
  self.add_history(f"Sorted on column [$success]{col_name}[/]", dirty=True)
1890
1795
 
1796
+ # New column - add to sort
1891
1797
  if old_desc is None:
1892
- # Add new column to sort
1893
1798
  self.sorted_columns[col_name] = descending
1799
+
1800
+ # Old column, same direction - remove from sort
1894
1801
  elif old_desc == descending:
1895
- # Same direction - remove from sort
1896
1802
  del self.sorted_columns[col_name]
1803
+
1804
+ # Old column, different direction - add to sort at end
1897
1805
  else:
1898
- # Move to end of sort order
1899
1806
  del self.sorted_columns[col_name]
1900
1807
  self.sorted_columns[col_name] = descending
1901
1808
 
1902
- lf = self.df.lazy().with_row_index(RIDX)
1809
+ lf = self.df.lazy()
1810
+ sort_by = {}
1903
1811
 
1904
1812
  # Apply multi-column sort
1905
1813
  if sort_cols := list(self.sorted_columns.keys()):
1906
1814
  descending_flags = list(self.sorted_columns.values())
1907
- lf = lf.sort(sort_cols, descending=descending_flags, nulls_last=True)
1908
-
1909
- df_sorted = lf.collect()
1910
-
1911
- # Updated visible rows, selected rows, and cell matches to match new order
1912
- old_row_indices = df_sorted[RIDX].to_list()
1913
- if self.has_hidden_rows:
1914
- self.visible_rows = [self.visible_rows[old_ridx] for old_ridx in old_row_indices]
1915
- if any(self.selected_rows):
1916
- self.selected_rows = [self.selected_rows[old_ridx] for old_ridx in old_row_indices]
1917
- if any(self.matches):
1918
- self.matches = {
1919
- new_ridx: self.matches[old_ridx]
1920
- for new_ridx, old_ridx in enumerate(old_row_indices)
1921
- if old_ridx in self.matches
1922
- }
1815
+ sort_by = {"by": sort_cols, "descending": descending_flags, "nulls_last": True}
1816
+ else:
1817
+ # No sort - restore original order by adding a temporary index column
1818
+ sort_by = {"by": RID}
1819
+
1820
+ # Perform the sort
1821
+ df_sorted = lf.sort(**sort_by).collect()
1822
+
1823
+ # Also update df_view if applicable
1824
+ if self.df_view is not None:
1825
+ self.df_view = self.df_view.lazy().sort(**sort_by).collect()
1923
1826
 
1924
1827
  # Update the dataframe
1925
- self.df = df_sorted.drop(RIDX)
1828
+ self.df = df_sorted
1926
1829
 
1927
1830
  # Recreate table for display
1928
1831
  self.setup_table()
@@ -1969,6 +1872,17 @@ class DataFrameTable(DataTable):
1969
1872
  .alias(col_name)
1970
1873
  )
1971
1874
 
1875
+ # Also update the view if applicable
1876
+ if self.df_view is not None:
1877
+ # Get the RID value for this row in df_view
1878
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
1879
+ self.df_view = self.df_view.with_columns(
1880
+ pl.when(pl.col(RID) == ridx_view)
1881
+ .then(pl.lit(new_value))
1882
+ .otherwise(pl.col(col_name))
1883
+ .alias(col_name)
1884
+ )
1885
+
1972
1886
  # Update the display
1973
1887
  cell_value = self.df.item(ridx, cidx)
1974
1888
  if cell_value is None:
@@ -2044,11 +1958,26 @@ class DataFrameTable(DataTable):
2044
1958
 
2045
1959
  try:
2046
1960
  # Apply the expression to the column
2047
- self.df = self.df.with_columns(expr.alias(col_name))
1961
+ self.df = self.df.lazy().with_columns(expr.alias(col_name)).collect()
1962
+
1963
+ # Also update the view if applicable
1964
+ # Update the value of col_name in df_view using the value of col_name from df based on RID mapping between them
1965
+ if self.df_view is not None:
1966
+ # Get updated column from df for rows that exist in df_view
1967
+ col_updated = f"^_{col_name}_^"
1968
+ lf_updated = self.df.lazy().select(RID, pl.col(col_name).alias(col_updated))
1969
+ # Join and use coalesce to prefer updated value or keep original
1970
+ self.df_view = (
1971
+ self.df_view.lazy()
1972
+ .join(lf_updated, on=RID, how="left")
1973
+ .with_columns(pl.coalesce(pl.col(col_updated), pl.col(col_name)).alias(col_name))
1974
+ .drop(col_updated)
1975
+ .collect()
1976
+ )
2048
1977
  except Exception as e:
2049
1978
  self.notify(
2050
1979
  f"Error applying expression: [$error]{term}[/] to column [$accent]{col_name}[/]",
2051
- title="Edit",
1980
+ title="Edit Column",
2052
1981
  severity="error",
2053
1982
  timeout=10,
2054
1983
  )
@@ -2090,14 +2019,25 @@ class DataFrameTable(DataTable):
2090
2019
  # Rename the column in the dataframe
2091
2020
  self.df = self.df.rename({col_name: new_name})
2092
2021
 
2093
- # Update sorted_columns if this column was sorted
2022
+ # Also update the view if applicable
2023
+ if self.df_view is not None:
2024
+ self.df_view = self.df_view.rename({col_name: new_name})
2025
+
2026
+ # Update sorted_columns if this column was sorted and maintain order
2094
2027
  if col_name in self.sorted_columns:
2095
- self.sorted_columns[new_name] = self.sorted_columns.pop(col_name)
2028
+ sorted_columns = {}
2029
+ for col, order in self.sorted_columns.items():
2030
+ if col == col_name:
2031
+ sorted_columns[new_name] = order
2032
+ else:
2033
+ sorted_columns[col] = order
2034
+ self.sorted_columns = sorted_columns
2096
2035
 
2097
- # Update hidden_columns if this column was hidden
2098
- if col_name in self.hidden_columns:
2099
- self.hidden_columns.remove(col_name)
2100
- self.hidden_columns.add(new_name)
2036
+ # Update matches if this column had cell matches
2037
+ for cols in self.matches.values():
2038
+ if col_name in cols:
2039
+ cols.remove(col_name)
2040
+ cols.add(new_name)
2101
2041
 
2102
2042
  # Recreate table for display
2103
2043
  self.setup_table()
@@ -2126,6 +2066,13 @@ class DataFrameTable(DataTable):
2126
2066
  .alias(col_name)
2127
2067
  )
2128
2068
 
2069
+ # Also update the view if applicable
2070
+ if self.df_view is not None:
2071
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
2072
+ self.df_view = self.df_view.with_columns(
2073
+ pl.when(pl.col(RID) == ridx_view).then(pl.lit(None)).otherwise(pl.col(col_name)).alias(col_name)
2074
+ )
2075
+
2129
2076
  # Update the display
2130
2077
  dtype = self.df.dtypes[cidx]
2131
2078
  dc = DtypeConfig(dtype)
@@ -2144,30 +2091,27 @@ class DataFrameTable(DataTable):
2144
2091
  self.log(f"Error clearing cell ({ridx}, {col_name}): {str(e)}")
2145
2092
  raise e
2146
2093
 
2147
- def do_add_column(self, col_name: str = None, col_value: pl.Expr = None) -> None:
2094
+ def do_add_column(self, col_name: str = None) -> None:
2148
2095
  """Add acolumn after the current column."""
2149
2096
  cidx = self.cursor_col_idx
2150
2097
 
2151
2098
  if not col_name:
2152
2099
  # Generate a unique column name
2153
2100
  base_name = "new_col"
2154
- new_name = base_name
2101
+ new_col_name = base_name
2155
2102
  counter = 1
2156
- while new_name in self.df.columns:
2157
- new_name = f"{base_name}_{counter}"
2103
+ while new_col_name in self.df.columns:
2104
+ new_col_name = f"{base_name}_{counter}"
2158
2105
  counter += 1
2159
2106
  else:
2160
- new_name = col_name
2107
+ new_col_name = col_name
2161
2108
 
2162
2109
  # Add to history
2163
- self.add_history(f"Added column [$success]{new_name}[/] after column [$accent]{cidx + 1}[/]", dirty=True)
2110
+ self.add_history(f"Added column [$success]{new_col_name}[/] after column [$accent]{cidx + 1}[/]", dirty=True)
2164
2111
 
2165
2112
  try:
2166
2113
  # Create an empty column (all None values)
2167
- if isinstance(col_value, pl.Expr):
2168
- new_col = col_value.alias(new_name)
2169
- else:
2170
- new_col = pl.lit(col_value).alias(new_name)
2114
+ new_col_name = pl.lit(None).alias(new_col_name)
2171
2115
 
2172
2116
  # Get columns up to current, the new column, then remaining columns
2173
2117
  cols = self.df.columns
@@ -2175,8 +2119,12 @@ class DataFrameTable(DataTable):
2175
2119
  cols_after = cols[cidx + 1 :]
2176
2120
 
2177
2121
  # Build the new dataframe with columns reordered
2178
- select_cols = cols_before + [new_name] + cols_after
2179
- self.df = self.df.with_columns(new_col).select(select_cols)
2122
+ select_cols = cols_before + [new_col_name] + cols_after
2123
+ self.df = self.df.lazy().with_columns(new_col_name).select(select_cols).collect()
2124
+
2125
+ # Also update the view if applicable
2126
+ if self.df_view is not None:
2127
+ self.df_view = self.df_view.lazy().with_columns(new_col_name).select(select_cols).collect()
2180
2128
 
2181
2129
  # Recreate table for display
2182
2130
  self.setup_table()
@@ -2186,8 +2134,10 @@ class DataFrameTable(DataTable):
2186
2134
 
2187
2135
  # self.notify(f"Added column [$success]{new_name}[/]", title="Add Column")
2188
2136
  except Exception as e:
2189
- self.notify(f"Error adding column [$error]{new_name}[/]", title="Add Column", severity="error", timeout=10)
2190
- self.log(f"Error adding column `{new_name}`: {str(e)}")
2137
+ self.notify(
2138
+ f"Error adding column [$error]{new_col_name}[/]", title="Add Column", severity="error", timeout=10
2139
+ )
2140
+ self.log(f"Error adding column `{new_col_name}`: {str(e)}")
2191
2141
  raise e
2192
2142
 
2193
2143
  def do_add_column_expr(self) -> None:
@@ -2219,7 +2169,14 @@ class DataFrameTable(DataTable):
2219
2169
 
2220
2170
  # Build the new dataframe with columns reordered
2221
2171
  select_cols = cols_before + [new_col_name] + cols_after
2222
- self.df = self.df.with_row_index(RIDX).with_columns(new_col).select(select_cols)
2172
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2173
+
2174
+ # Also update the view if applicable
2175
+ if self.df_view is not None:
2176
+ # Get updated column from df for rows that exist in df_view
2177
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2178
+ # Join and use coalesce to prefer updated value or keep original
2179
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
2223
2180
 
2224
2181
  # Recreate table for display
2225
2182
  self.setup_table()
@@ -2285,7 +2242,14 @@ class DataFrameTable(DataTable):
2285
2242
 
2286
2243
  # Build the new dataframe with columns reordered
2287
2244
  select_cols = cols_before + [new_col_name] + cols_after
2288
- self.df = self.df.with_columns(new_col).select(select_cols)
2245
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2246
+
2247
+ # Also update the view if applicable
2248
+ if self.df_view is not None:
2249
+ # Get updated column from df for rows that exist in df_view
2250
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2251
+ # Join and use coalesce to prefer updated value or keep original
2252
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
2289
2253
 
2290
2254
  # Recreate table for display
2291
2255
  self.setup_table()
@@ -2352,17 +2316,24 @@ class DataFrameTable(DataTable):
2352
2316
  if col_name in self.sorted_columns:
2353
2317
  del self.sorted_columns[col_name]
2354
2318
 
2319
+ # Remove from hidden columns if present
2320
+ for col_name in col_names_to_remove:
2321
+ self.hidden_columns.discard(col_name)
2322
+
2355
2323
  # Remove from matches
2356
- col_indices_to_remove = set(self.df.columns.index(name) for name in col_names_to_remove)
2357
- for row_idx in list(self.matches.keys()):
2358
- self.matches[row_idx].difference_update(col_indices_to_remove)
2324
+ for rid in list(self.matches.keys()):
2325
+ self.matches[rid].difference_update(col_names_to_remove)
2359
2326
  # Remove empty entries
2360
- if not self.matches[row_idx]:
2361
- del self.matches[row_idx]
2327
+ if not self.matches[rid]:
2328
+ del self.matches[rid]
2362
2329
 
2363
2330
  # Remove from dataframe
2364
2331
  self.df = self.df.drop(col_names_to_remove)
2365
2332
 
2333
+ # Also update the view if applicable
2334
+ if self.df_view is not None:
2335
+ self.df_view = self.df_view.drop(col_names_to_remove)
2336
+
2366
2337
  self.notify(message, title="Delete")
2367
2338
 
2368
2339
  def do_duplicate_column(self) -> None:
@@ -2373,29 +2344,28 @@ class DataFrameTable(DataTable):
2373
2344
  col_idx = self.cursor_column
2374
2345
  new_col_name = f"{col_name}_copy"
2375
2346
 
2347
+ # Ensure new column name is unique
2348
+ counter = 1
2349
+ while new_col_name in self.df.columns:
2350
+ new_col_name = f"{new_col_name}{counter}"
2351
+ counter += 1
2352
+
2376
2353
  # Add to history
2377
2354
  self.add_history(f"Duplicated column [$success]{col_name}[/]", dirty=True)
2378
2355
 
2379
2356
  # Create new column and reorder columns to insert after current column
2380
2357
  cols_before = self.df.columns[: cidx + 1]
2381
2358
  cols_after = self.df.columns[cidx + 1 :]
2359
+ cols_new = cols_before + [new_col_name] + cols_after
2382
2360
 
2383
2361
  # Add the new column and reorder columns for insertion after current column
2384
- self.df = self.df.with_columns(pl.col(col_name).alias(new_col_name)).select(
2385
- list(cols_before) + [new_col_name] + list(cols_after)
2386
- )
2362
+ self.df = self.df.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
2387
2363
 
2388
- # Update matches to account for new column
2389
- new_matches = defaultdict(set)
2390
- for row_idx, cols in self.matches.items():
2391
- new_cols = set()
2392
- for col_idx_in_set in cols:
2393
- if col_idx_in_set <= cidx:
2394
- new_cols.add(col_idx_in_set)
2395
- else:
2396
- new_cols.add(col_idx_in_set + 1)
2397
- new_matches[row_idx] = new_cols
2398
- self.matches = new_matches
2364
+ # Also update the view if applicable
2365
+ if self.df_view is not None:
2366
+ self.df_view = (
2367
+ self.df_view.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
2368
+ )
2399
2369
 
2400
2370
  # Recreate table for display
2401
2371
  self.setup_table()
@@ -2411,58 +2381,61 @@ class DataFrameTable(DataTable):
2411
2381
  Supports deleting multiple selected rows. If no rows are selected, deletes the row at the cursor.
2412
2382
  """
2413
2383
  old_count = len(self.df)
2414
- predicates = [True] * len(self.df)
2384
+ rids_to_delete = set()
2415
2385
 
2416
2386
  # Delete all selected rows
2417
- if selected_count := self.selected_rows.count(True):
2387
+ if selected_count := len(self.selected_rows):
2418
2388
  history_desc = f"Deleted {selected_count} selected row(s)"
2419
-
2420
- for ridx, selected in enumerate(self.selected_rows):
2421
- if selected:
2422
- predicates[ridx] = False
2389
+ rids_to_delete = self.selected_rows
2423
2390
 
2424
2391
  # Delete current row and those above
2425
2392
  elif more == "above":
2426
2393
  ridx = self.cursor_row_idx
2427
2394
  history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those above"
2428
- for i in range(ridx + 1):
2429
- predicates[i] = False
2395
+ for rid in self.df[RID][: ridx + 1]:
2396
+ rids_to_delete.add(rid)
2430
2397
 
2431
2398
  # Delete current row and those below
2432
2399
  elif more == "below":
2433
2400
  ridx = self.cursor_row_idx
2434
2401
  history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those below"
2435
- for i in range(ridx, len(self.df)):
2436
- if self.visible_rows[i]:
2437
- predicates[i] = False
2402
+ for rid in self.df[RID][ridx:]:
2403
+ rids_to_delete.add(rid)
2438
2404
 
2439
2405
  # Delete the row at the cursor
2440
2406
  else:
2441
2407
  ridx = self.cursor_row_idx
2442
2408
  history_desc = f"Deleted row [$success]{ridx + 1}[/]"
2443
- if self.visible_rows[ridx]:
2444
- predicates[ridx] = False
2409
+ rids_to_delete.add(self.df[RID][ridx])
2445
2410
 
2446
2411
  # Add to history
2447
2412
  self.add_history(history_desc, dirty=True)
2448
2413
 
2449
2414
  # Apply the filter to remove rows
2450
2415
  try:
2451
- df = self.df.with_row_index(RIDX).filter(predicates)
2416
+ df_filtered = self.df.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2452
2417
  except Exception as e:
2453
2418
  self.notify(f"Error deleting row(s): {e}", title="Delete", severity="error", timeout=10)
2454
- self.histories.pop() # Remove last history entry
2419
+ self.histories_undo.pop() # Remove last history entry
2455
2420
  return
2456
2421
 
2457
- self.df = df.drop(RIDX)
2422
+ # RIDs of remaining rows
2423
+ ok_rids = set(df_filtered[RID])
2458
2424
 
2459
- # Update selected and visible rows tracking
2460
- old_row_indices = set(df[RIDX].to_list())
2461
- self.selected_rows = [selected for i, selected in enumerate(self.selected_rows) if i in old_row_indices]
2462
- self.visible_rows = [visible for i, visible in enumerate(self.visible_rows) if i in old_row_indices]
2425
+ # Update selected rows tracking
2426
+ if self.selected_rows:
2427
+ self.selected_rows.intersection_update(ok_rids)
2463
2428
 
2464
- # Clear all matches since row indices have changed
2465
- self.matches = defaultdict(set)
2429
+ # Update the dataframe
2430
+ self.df = df_filtered
2431
+
2432
+ # Update matches since row indices have changed
2433
+ if self.matches:
2434
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
2435
+
2436
+ # Also update the view if applicable
2437
+ if self.df_view is not None:
2438
+ self.df_view = self.df_view.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2466
2439
 
2467
2440
  # Recreate table for display
2468
2441
  self.setup_table()
@@ -2474,34 +2447,29 @@ class DataFrameTable(DataTable):
2474
2447
  def do_duplicate_row(self) -> None:
2475
2448
  """Duplicate the currently selected row, inserting it right after the current row."""
2476
2449
  ridx = self.cursor_row_idx
2450
+ rid = self.df[RID][ridx]
2451
+
2452
+ lf = self.df.lazy()
2477
2453
 
2478
2454
  # Get the row to duplicate
2479
- row_to_duplicate = self.df.slice(ridx, 1)
2455
+ row_to_duplicate = lf.slice(ridx, 1).with_columns(pl.col(RID) + 1)
2480
2456
 
2481
2457
  # Add to history
2482
2458
  self.add_history(f"Duplicated row [$success]{ridx + 1}[/]", dirty=True)
2483
2459
 
2484
2460
  # Concatenate: rows before + duplicated row + rows after
2485
- df_before = self.df.slice(0, ridx + 1)
2486
- df_after = self.df.slice(ridx + 1)
2461
+ lf_before = lf.slice(0, ridx + 1)
2462
+ lf_after = lf.slice(ridx + 1).with_columns(pl.col(RID) + 1)
2487
2463
 
2488
2464
  # Combine the parts
2489
- self.df = pl.concat([df_before, row_to_duplicate, df_after])
2490
-
2491
- # Update selected and visible rows tracking to account for new row
2492
- new_selected_rows = self.selected_rows[: ridx + 1] + [self.selected_rows[ridx]] + self.selected_rows[ridx + 1 :]
2493
- new_visible_rows = self.visible_rows[: ridx + 1] + [self.visible_rows[ridx]] + self.visible_rows[ridx + 1 :]
2494
- self.selected_rows = new_selected_rows
2495
- self.visible_rows = new_visible_rows
2496
-
2497
- # Update matches to account for new row
2498
- new_matches = defaultdict(set)
2499
- for row_idx, cols in self.matches.items():
2500
- if row_idx <= ridx:
2501
- new_matches[row_idx] = cols
2502
- else:
2503
- new_matches[row_idx + 1] = cols
2504
- self.matches = new_matches
2465
+ self.df = pl.concat([lf_before, row_to_duplicate, lf_after]).collect()
2466
+
2467
+ # Also update the view if applicable
2468
+ if self.df_view is not None:
2469
+ lf_view = self.df_view.lazy()
2470
+ lf_view_before = lf_view.slice(0, rid + 1)
2471
+ lf_view_after = lf_view.slice(rid + 1).with_columns(pl.col(RID) + 1)
2472
+ self.df_view = pl.concat([lf_view_before, row_to_duplicate, lf_view_after]).collect()
2505
2473
 
2506
2474
  # Recreate table for display
2507
2475
  self.setup_table()
@@ -2567,6 +2535,10 @@ class DataFrameTable(DataTable):
2567
2535
  cols[cidx], cols[swap_cidx] = cols[swap_cidx], cols[cidx]
2568
2536
  self.df = self.df.select(cols)
2569
2537
 
2538
+ # Also update the view if applicable
2539
+ if self.df_view is not None:
2540
+ self.df_view = self.df_view.select(cols)
2541
+
2570
2542
  # self.notify(f"Moved column [$success]{col_name}[/] {direction}", title="Move")
2571
2543
 
2572
2544
  def do_move_row(self, direction: str) -> None:
@@ -2575,65 +2547,88 @@ class DataFrameTable(DataTable):
2575
2547
  Args:
2576
2548
  direction: "up" to move up, "down" to move down.
2577
2549
  """
2578
- row_idx, col_idx = self.cursor_coordinate
2550
+ curr_row_idx, col_idx = self.cursor_coordinate
2579
2551
 
2580
2552
  # Validate move is possible
2581
2553
  if direction == "up":
2582
- if row_idx <= 0:
2554
+ if curr_row_idx <= 0:
2583
2555
  self.notify("Cannot move row up", title="Move", severity="warning")
2584
2556
  return
2585
- swap_idx = row_idx - 1
2557
+ swap_row_idx = curr_row_idx - 1
2586
2558
  elif direction == "down":
2587
- if row_idx >= len(self.rows) - 1:
2559
+ if curr_row_idx >= len(self.rows) - 1:
2588
2560
  self.notify("Cannot move row down", title="Move", severity="warning")
2589
2561
  return
2590
- swap_idx = row_idx + 1
2562
+ swap_row_idx = curr_row_idx + 1
2591
2563
  else:
2592
2564
  # Invalid direction
2593
2565
  return
2594
2566
 
2595
- row_key = self.coordinate_to_cell_key((row_idx, 0)).row_key
2596
- swap_key = self.coordinate_to_cell_key((swap_idx, 0)).row_key
2597
-
2598
2567
  # Add to history
2599
2568
  self.add_history(
2600
- f"Moved row [$success]{row_key.value}[/] [$accent]{direction}[/] (swapped with row [$success]{swap_key.value}[/])",
2569
+ f"Moved row [$success]{curr_row_idx}[/] [$accent]{direction}[/] (swapped with row [$success]{swap_row_idx}[/])",
2601
2570
  dirty=True,
2602
2571
  )
2603
2572
 
2604
2573
  # Swap rows in the table's internal row locations
2574
+ curr_key = self.coordinate_to_cell_key((curr_row_idx, 0)).row_key
2575
+ swap_key = self.coordinate_to_cell_key((swap_row_idx, 0)).row_key
2576
+
2605
2577
  self.check_idle()
2606
2578
 
2607
2579
  (
2608
- self._row_locations[row_key],
2580
+ self._row_locations[curr_key],
2609
2581
  self._row_locations[swap_key],
2610
2582
  ) = (
2611
2583
  self.get_row_idx(swap_key),
2612
- self.get_row_idx(row_key),
2584
+ self.get_row_idx(curr_key),
2613
2585
  )
2614
2586
 
2615
2587
  self._update_count += 1
2616
2588
  self.refresh()
2617
2589
 
2618
2590
  # Restore cursor position on the moved row
2619
- self.move_cursor(row=swap_idx, column=col_idx)
2591
+ self.move_cursor(row=swap_row_idx, column=col_idx)
2620
2592
 
2621
- # Swap rows in the dataframe
2622
- ridx = int(row_key.value) # 0-based
2623
- swap_ridx = int(swap_key.value) # 0-based
2624
- first, second = sorted([ridx, swap_ridx])
2593
+ # Locate the rows to swap
2594
+ curr_ridx = curr_row_idx
2595
+ swap_ridx = swap_row_idx
2596
+ first, second = sorted([curr_ridx, swap_ridx])
2625
2597
 
2598
+ # Swap the rows in the dataframe
2626
2599
  self.df = pl.concat(
2627
2600
  [
2628
- self.df.slice(0, first),
2629
- self.df.slice(second, 1),
2630
- self.df.slice(first + 1, second - first - 1),
2631
- self.df.slice(first, 1),
2632
- self.df.slice(second + 1),
2601
+ self.df.slice(0, first).lazy(),
2602
+ self.df.slice(second, 1).lazy(),
2603
+ self.df.slice(first + 1, second - first - 1).lazy(),
2604
+ self.df.slice(first, 1).lazy(),
2605
+ self.df.slice(second + 1).lazy(),
2633
2606
  ]
2634
- )
2607
+ ).collect()
2608
+
2609
+ # Also update the view if applicable
2610
+ if self.df_view is not None:
2611
+ # Find RID values
2612
+ curr_rid = self.df[RID][curr_row_idx]
2613
+ swap_rid = self.df[RID][swap_row_idx]
2635
2614
 
2636
- # self.notify(f"Moved row [$success]{row_key.value}[/] {direction}", title="Move")
2615
+ # Locate the rows by RID in the view
2616
+ curr_ridx = self.df_view[RID].index_of(curr_rid)
2617
+ swap_ridx = self.df_view[RID].index_of(swap_rid)
2618
+ first, second = sorted([curr_ridx, swap_ridx])
2619
+
2620
+ # Swap the rows in the view
2621
+ self.df_view = pl.concat(
2622
+ [
2623
+ self.df_view.slice(0, first).lazy(),
2624
+ self.df_view.slice(second, 1).lazy(),
2625
+ self.df_view.slice(first + 1, second - first - 1).lazy(),
2626
+ self.df_view.slice(first, 1).lazy(),
2627
+ self.df_view.slice(second + 1).lazy(),
2628
+ ]
2629
+ ).collect()
2630
+
2631
+ # self.notify(f"Moved row [$success]{row_key.value}[/] {direction}", title="Move Row")
2637
2632
 
2638
2633
  # Type casting
2639
2634
  def do_cast_column_dtype(self, dtype: str) -> None:
@@ -2670,6 +2665,10 @@ class DataFrameTable(DataTable):
2670
2665
  # Cast the column using Polars
2671
2666
  self.df = self.df.with_columns(pl.col(col_name).cast(target_dtype))
2672
2667
 
2668
+ # Also update the view if applicable
2669
+ if self.df_view is not None:
2670
+ self.df_view = self.df_view.with_columns(pl.col(col_name).cast(target_dtype))
2671
+
2673
2672
  # Recreate table for display
2674
2673
  self.setup_table()
2675
2674
 
@@ -2684,17 +2683,26 @@ class DataFrameTable(DataTable):
2684
2683
  self.log(f"Error casting column `{col_name}`: {str(e)}")
2685
2684
 
2686
2685
  # Row selection
2687
- def do_select_row_cursor_value(self) -> None:
2688
- """Search with cursor value in current column."""
2686
+ def do_select_row(self) -> None:
2687
+ """Select rows.
2688
+
2689
+ If there are existing cell matches, use those to select rows.
2690
+ Otherwise, use the current cell value as the search term and select rows matching that value.
2691
+ """
2689
2692
  cidx = self.cursor_col_idx
2690
- col_name = self.cursor_col_name
2691
2693
 
2692
- # Get the value of the currently selected cell
2693
- term = NULL if self.cursor_value is None else str(self.cursor_value)
2694
- if self.cursor_value is None:
2695
- term = pl.col(col_name).is_null()
2694
+ # Use existing cell matches if present
2695
+ if self.matches:
2696
+ term = pl.col(RID).is_in(self.matches)
2696
2697
  else:
2697
- term = pl.col(col_name) == self.cursor_value
2698
+ col_name = self.cursor_col_name
2699
+
2700
+ # Get the value of the currently selected cell
2701
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
2702
+ if self.cursor_value is None:
2703
+ term = pl.col(col_name).is_null()
2704
+ else:
2705
+ term = pl.col(col_name) == self.cursor_value
2698
2706
 
2699
2707
  self.select_row((term, cidx, False, True))
2700
2708
 
@@ -2707,7 +2715,7 @@ class DataFrameTable(DataTable):
2707
2715
 
2708
2716
  # Push the search modal screen
2709
2717
  self.app.push_screen(
2710
- SearchScreen("Search", term, self.df, cidx),
2718
+ SearchScreen("Select", term, self.df, cidx),
2711
2719
  callback=self.select_row,
2712
2720
  )
2713
2721
 
@@ -2717,12 +2725,16 @@ class DataFrameTable(DataTable):
2717
2725
  return
2718
2726
 
2719
2727
  term, cidx, match_nocase, match_whole = result
2720
- col_name = self.df.columns[cidx]
2728
+ col_name = "all columns" if cidx is None else self.df.columns[cidx]
2721
2729
 
2722
2730
  # Already a Polars expression
2723
2731
  if isinstance(term, pl.Expr):
2724
2732
  expr = term
2725
2733
 
2734
+ # bool list or Series
2735
+ elif isinstance(term, (list, pl.Series)):
2736
+ expr = term
2737
+
2726
2738
  # Null case
2727
2739
  elif term == NULL:
2728
2740
  expr = pl.col(col_name).is_null()
@@ -2764,13 +2776,11 @@ class DataFrameTable(DataTable):
2764
2776
  )
2765
2777
 
2766
2778
  # Lazyframe for filtering
2767
- lf = self.df.lazy().with_row_index(RIDX)
2768
- if self.has_hidden_rows:
2769
- lf = lf.filter(self.visible_rows)
2779
+ lf = self.df.lazy()
2770
2780
 
2771
2781
  # Apply filter to get matched row indices
2772
2782
  try:
2773
- matches = set(lf.filter(expr).select(RIDX).collect().to_series().to_list())
2783
+ ok_rids = set(lf.filter(expr).collect()[RID])
2774
2784
  except Exception as e:
2775
2785
  self.notify(
2776
2786
  f"Error applying search filter `[$error]{term}[/]`", title="Search", severity="error", timeout=10
@@ -2778,7 +2788,7 @@ class DataFrameTable(DataTable):
2778
2788
  self.log(f"Error applying search filter `{term}`: {str(e)}")
2779
2789
  return
2780
2790
 
2781
- match_count = len(matches)
2791
+ match_count = len(ok_rids)
2782
2792
  if match_count == 0:
2783
2793
  self.notify(
2784
2794
  f"No matches found for `[$warning]{term}[/]`. Try [$accent](?i)abc[/] for case-insensitive search.",
@@ -2787,14 +2797,13 @@ class DataFrameTable(DataTable):
2787
2797
  )
2788
2798
  return
2789
2799
 
2790
- message = f"Found [$success]{match_count}[/] matching row(s) for `[$accent]{term}[/]`"
2800
+ message = f"Found [$success]{match_count}[/] matching row(s)"
2791
2801
 
2792
2802
  # Add to history
2793
2803
  self.add_history(message)
2794
2804
 
2795
- # Update selected rows to include new matches
2796
- for m in matches:
2797
- self.selected_rows[m] = True
2805
+ # Update selected rows to include new selections
2806
+ self.selected_rows.update(ok_rids)
2798
2807
 
2799
2808
  # Show notification immediately, then start highlighting
2800
2809
  self.notify(message, title="Select Row")
@@ -2807,20 +2816,12 @@ class DataFrameTable(DataTable):
2807
2816
  # Add to history
2808
2817
  self.add_history("Toggled row selection")
2809
2818
 
2810
- if self.has_hidden_rows:
2811
- # Some rows are hidden - invert only selected visible rows and clear selections for hidden rows
2812
- for i in range(len(self.selected_rows)):
2813
- if self.visible_rows[i]:
2814
- self.selected_rows[i] = not self.selected_rows[i]
2815
- else:
2816
- self.selected_rows[i] = False
2817
- else:
2818
- # Invert all selected rows
2819
- self.selected_rows = [not selected for selected in self.selected_rows]
2819
+ # Invert all selected rows
2820
+ self.selected_rows = {rid for rid in self.df[RID] if rid not in self.selected_rows}
2820
2821
 
2821
2822
  # Check if we're highlighting or un-highlighting
2822
- if new_selected_count := self.selected_rows.count(True):
2823
- self.notify(f"Toggled selection for [$success]{new_selected_count}[/] rows", title="Toggle")
2823
+ if selected_count := len(self.selected_rows):
2824
+ self.notify(f"Toggled selection for [$success]{selected_count}[/] rows", title="Toggle")
2824
2825
 
2825
2826
  # Recreate table for display
2826
2827
  self.setup_table()
@@ -2830,16 +2831,25 @@ class DataFrameTable(DataTable):
2830
2831
  # Add to history
2831
2832
  self.add_history("Toggled row selection")
2832
2833
 
2834
+ # Get current row RID
2833
2835
  ridx = self.cursor_row_idx
2834
- self.selected_rows[ridx] = not self.selected_rows[ridx]
2836
+ rid = self.df[RID][ridx]
2837
+
2838
+ if rid in self.selected_rows:
2839
+ self.selected_rows.discard(rid)
2840
+ else:
2841
+ self.selected_rows.add(rid)
2842
+
2843
+ row_key = self.cursor_row_key
2844
+ is_selected = rid in self.selected_rows
2845
+ match_cols = self.matches.get(rid, set())
2835
2846
 
2836
- row_key = str(ridx)
2837
- match_cols = self.matches.get(ridx, set())
2838
2847
  for col_idx, col in enumerate(self.ordered_columns):
2839
2848
  col_key = col.key
2849
+ col_name = col_key.value
2840
2850
  cell_text: Text = self.get_cell(row_key, col_key)
2841
2851
 
2842
- if self.selected_rows[ridx] or (col_idx in match_cols):
2852
+ if is_selected or (col_name in match_cols):
2843
2853
  cell_text.style = HIGHLIGHT_COLOR
2844
2854
  else:
2845
2855
  # Reset to default style based on dtype
@@ -2852,19 +2862,17 @@ class DataFrameTable(DataTable):
2852
2862
  def do_clear_selections_and_matches(self) -> None:
2853
2863
  """Clear all selected rows and matches without removing them from the dataframe."""
2854
2864
  # Check if any selected rows or matches
2855
- if not any(self.selected_rows) and not self.matches:
2865
+ if not self.selected_rows and not self.matches:
2856
2866
  self.notify("No selections to clear", title="Clear", severity="warning")
2857
2867
  return
2858
2868
 
2859
- row_count = sum(
2860
- 1 if (selected or idx in self.matches) else 0 for idx, selected in enumerate(self.selected_rows)
2861
- )
2869
+ row_count = len(self.selected_rows | set(self.matches.keys()))
2862
2870
 
2863
2871
  # Add to history
2864
2872
  self.add_history("Cleared all selected rows")
2865
2873
 
2866
2874
  # Clear all selections
2867
- self.selected_rows = [False] * len(self.df)
2875
+ self.selected_rows = set()
2868
2876
  self.matches = defaultdict(set)
2869
2877
 
2870
2878
  # Recreate table for display
@@ -2875,7 +2883,7 @@ class DataFrameTable(DataTable):
2875
2883
  # Find & Replace
2876
2884
  def find_matches(
2877
2885
  self, term: str, cidx: int | None = None, match_nocase: bool = False, match_whole: bool = False
2878
- ) -> dict[int, set[int]]:
2886
+ ) -> dict[int, set[str]]:
2879
2887
  """Find matches for a term in the dataframe.
2880
2888
 
2881
2889
  Args:
@@ -2892,12 +2900,10 @@ class DataFrameTable(DataTable):
2892
2900
  Raises:
2893
2901
  Exception: If expression validation or filtering fails.
2894
2902
  """
2895
- matches: dict[int, set[int]] = defaultdict(set)
2903
+ matches: dict[int, set[str]] = defaultdict(set)
2896
2904
 
2897
2905
  # Lazyframe for filtering
2898
- lf = self.df.lazy().with_row_index(RIDX)
2899
- if self.has_hidden_rows:
2900
- lf = lf.filter(self.visible_rows)
2906
+ lf = self.df.lazy()
2901
2907
 
2902
2908
  # Determine which columns to search: single column or all columns
2903
2909
  if cidx is not None:
@@ -2928,14 +2934,14 @@ class DataFrameTable(DataTable):
2928
2934
 
2929
2935
  # Get matched row indices
2930
2936
  try:
2931
- matched_ridxs = lf.filter(expr).select(RIDX).collect().to_series().to_list()
2937
+ matched_ridxs = lf.filter(expr).collect()[RID]
2932
2938
  except Exception as e:
2933
2939
  self.notify(f"Error applying filter: [$error]{expr}[/]", title="Find", severity="error", timeout=10)
2934
2940
  self.log(f"Error applying filter: {str(e)}")
2935
2941
  return matches
2936
2942
 
2937
2943
  for ridx in matched_ridxs:
2938
- matches[ridx].add(col_idx)
2944
+ matches[ridx].add(col_name)
2939
2945
 
2940
2946
  return matches
2941
2947
 
@@ -2997,9 +3003,9 @@ class DataFrameTable(DataTable):
2997
3003
  self.add_history(f"Found `[$success]{term}[/]` in column [$accent]{col_name}[/]")
2998
3004
 
2999
3005
  # Add to matches and count total
3000
- match_count = sum(len(col_idxs) for col_idxs in matches.values())
3001
- for ridx, col_idxs in matches.items():
3002
- self.matches[ridx].update(col_idxs)
3006
+ match_count = sum(len(cols) for cols in matches.values())
3007
+ for rid, cols in matches.items():
3008
+ self.matches[rid].update(cols)
3003
3009
 
3004
3010
  self.notify(f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]`", title="Find")
3005
3011
 
@@ -3031,9 +3037,9 @@ class DataFrameTable(DataTable):
3031
3037
  self.add_history(f"Found `[$success]{term}[/]` across all columns")
3032
3038
 
3033
3039
  # Add to matches and count total
3034
- match_count = sum(len(col_idxs) for col_idxs in matches.values())
3035
- for ridx, col_idxs in matches.items():
3036
- self.matches[ridx].update(col_idxs)
3040
+ match_count = sum(len(cols) for cols in matches.values())
3041
+ for rid, cols in matches.items():
3042
+ self.matches[rid].update(cols)
3037
3043
 
3038
3044
  self.notify(
3039
3045
  f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]` across all columns",
@@ -3095,7 +3101,7 @@ class DataFrameTable(DataTable):
3095
3101
 
3096
3102
  def do_next_selected_row(self) -> None:
3097
3103
  """Move cursor to the next selected row."""
3098
- if not any(self.selected_rows):
3104
+ if not self.selected_rows:
3099
3105
  self.notify("No selected rows to navigate", title="Next Selected Row", severity="warning")
3100
3106
  return
3101
3107
 
@@ -3117,7 +3123,7 @@ class DataFrameTable(DataTable):
3117
3123
 
3118
3124
  def do_previous_selected_row(self) -> None:
3119
3125
  """Move cursor to the previous selected row."""
3120
- if not any(self.selected_rows):
3126
+ if not self.selected_rows:
3121
3127
  self.notify("No selected rows to navigate", title="Previous Selected Row", severity="warning")
3122
3128
  return
3123
3129
 
@@ -3190,25 +3196,34 @@ class DataFrameTable(DataTable):
3190
3196
  )
3191
3197
 
3192
3198
  # Update matches
3193
- self.matches = {ridx: col_idxs.copy() for ridx, col_idxs in matches.items()}
3199
+ self.matches = matches
3194
3200
 
3195
3201
  # Recreate table for display
3196
3202
  self.setup_table()
3197
3203
 
3198
3204
  # Store state for interactive replacement using dataclass
3199
- sorted_rows = sorted(self.matches.keys())
3205
+ rid2ridx = {rid: ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.matches}
3206
+
3207
+ # Unique columns to replace
3208
+ cols_to_replace = set()
3209
+ for cols in self.matches.values():
3210
+ cols_to_replace.update(cols)
3211
+
3212
+ # Sorted column indices to replace
3213
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_replace}
3214
+
3200
3215
  self.replace_state = ReplaceState(
3201
3216
  term_find=term_find,
3202
3217
  term_replace=term_replace,
3203
3218
  match_nocase=match_nocase,
3204
3219
  match_whole=match_whole,
3205
3220
  cidx=cidx,
3206
- rows=sorted_rows,
3207
- cols_per_row=[sorted(self.matches[ridx]) for ridx in sorted_rows],
3221
+ rows=list(rid2ridx.values()),
3222
+ cols_per_row=[[cidx for cidx, col in cidx2col.items() if col in self.matches[rid]] for rid in rid2ridx],
3208
3223
  current_rpos=0,
3209
3224
  current_cpos=0,
3210
3225
  current_occurrence=0,
3211
- total_occurrence=sum(len(col_idxs) for col_idxs in self.matches.values()),
3226
+ total_occurrence=sum(len(cols) for cols in self.matches.values()),
3212
3227
  replaced_occurrence=0,
3213
3228
  skipped_occurrence=0,
3214
3229
  done=False,
@@ -3292,6 +3307,18 @@ class DataFrameTable(DataTable):
3292
3307
  pl.when(mask).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
3293
3308
  )
3294
3309
 
3310
+ # Also update the view if applicable
3311
+ if self.df_view is not None:
3312
+ col_updated = f"^_{col_name}_^"
3313
+ lf_updated = self.df.lazy().filter(mask).select(pl.col(col_name).alias(col_updated), pl.col(RID))
3314
+ self.df_view = (
3315
+ self.df_view.lazy()
3316
+ .join(lf_updated, on=RID, how="left")
3317
+ .with_columns(pl.coalesce(pl.col(col_updated), pl.col(col_name)).alias(col_name))
3318
+ .drop(col_updated)
3319
+ .collect()
3320
+ )
3321
+
3295
3322
  state.replaced_occurrence += len(ridxs)
3296
3323
 
3297
3324
  # Recreate table for display
@@ -3303,7 +3330,7 @@ class DataFrameTable(DataTable):
3303
3330
 
3304
3331
  col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
3305
3332
  self.notify(
3306
- f"Replaced [$success]{state.replaced_occurrence}[/] of [$accent]{state.total_occurrence}[/] in [$s]{col_name}[/]",
3333
+ f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]",
3307
3334
  title="Replace",
3308
3335
  )
3309
3336
 
@@ -3327,7 +3354,7 @@ class DataFrameTable(DataTable):
3327
3354
  if state.done:
3328
3355
  # All done - show final notification
3329
3356
  col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
3330
- msg = f"Replaced [$success]{state.replaced_occurrence}[/] of [$accent]{state.total_occurrence}[/] in [$success]{col_name}[/]"
3357
+ msg = f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]"
3331
3358
  if state.skipped_occurrence > 0:
3332
3359
  msg += f", [$warning]{state.skipped_occurrence}[/] skipped"
3333
3360
  self.notify(msg, title="Replace")
@@ -3362,6 +3389,7 @@ class DataFrameTable(DataTable):
3362
3389
  cidx = state.cols_per_row[state.current_rpos][state.current_cpos]
3363
3390
  col_name = self.df.columns[cidx]
3364
3391
  dtype = self.df.dtypes[cidx]
3392
+ rid = self.df[RID][ridx]
3365
3393
 
3366
3394
  # Replace
3367
3395
  if result is True:
@@ -3374,6 +3402,15 @@ class DataFrameTable(DataTable):
3374
3402
  .otherwise(pl.col(col_name))
3375
3403
  .alias(col_name)
3376
3404
  )
3405
+
3406
+ # Also update the view if applicable
3407
+ if self.df_view is not None:
3408
+ self.df_view = self.df_view.with_columns(
3409
+ pl.when(pl.col(RID) == rid)
3410
+ .then(pl.col(col_name).str.replace_all(term_find, state.term_replace))
3411
+ .otherwise(pl.col(col_name))
3412
+ .alias(col_name)
3413
+ )
3377
3414
  else:
3378
3415
  # try to convert replacement value to column dtype
3379
3416
  try:
@@ -3388,6 +3425,12 @@ class DataFrameTable(DataTable):
3388
3425
  .alias(col_name)
3389
3426
  )
3390
3427
 
3428
+ # Also update the view if applicable
3429
+ if self.df_view is not None:
3430
+ self.df_view = self.df_view.with_columns(
3431
+ pl.when(pl.col(RID) == rid).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
3432
+ )
3433
+
3391
3434
  state.replaced_occurrence += 1
3392
3435
 
3393
3436
  # Skip
@@ -3424,18 +3467,16 @@ class DataFrameTable(DataTable):
3424
3467
  def do_view_rows(self) -> None:
3425
3468
  """View rows.
3426
3469
 
3427
- If there are selected rows or matches, view those rows.
3428
- Otherwise, view based on the value of the currently selected cell.
3470
+ If there are selected rows, view those.
3471
+ Otherwise, view based on the cursor value.
3429
3472
  """
3430
3473
 
3431
3474
  cidx = self.cursor_col_idx
3432
- col_name = self.df.columns[cidx]
3475
+ col_name = self.cursor_col_name
3433
3476
 
3434
- # If there are rows with selections or matches, use those
3435
- if any(self.selected_rows) or self.matches:
3436
- term = [
3437
- True if (selected or idx in self.matches) else False for idx, selected in enumerate(self.selected_rows)
3438
- ]
3477
+ # If there are selected rows, use those
3478
+ if self.selected_rows:
3479
+ term = pl.col(RID).is_in(self.selected_rows)
3439
3480
  # Otherwise, use the current cell value
3440
3481
  else:
3441
3482
  ridx = self.cursor_row_idx
@@ -3457,7 +3498,7 @@ class DataFrameTable(DataTable):
3457
3498
  )
3458
3499
 
3459
3500
  def view_rows(self, result) -> None:
3460
- """Show only rows with selections or matches, and do hide others. Do not modify the dataframe."""
3501
+ """View selected rows and hide others. Do not modify the dataframe."""
3461
3502
  if result is None:
3462
3503
  return
3463
3504
  term, cidx, match_nocase, match_whole = result
@@ -3467,13 +3508,17 @@ class DataFrameTable(DataTable):
3467
3508
  # Support for polars expression
3468
3509
  if isinstance(term, pl.Expr):
3469
3510
  expr = term
3511
+
3470
3512
  # Support for list of booleans (selected rows)
3471
3513
  elif isinstance(term, (list, pl.Series)):
3472
3514
  expr = term
3515
+
3516
+ # Null case
3473
3517
  elif term == NULL:
3474
3518
  expr = pl.col(col_name).is_null()
3519
+
3520
+ # Support for polars expression in string form
3475
3521
  elif tentative_expr(term):
3476
- # Support for polars expression in string form
3477
3522
  try:
3478
3523
  expr = validate_expr(term, self.df.columns, cidx)
3479
3524
  except Exception as e:
@@ -3482,6 +3527,8 @@ class DataFrameTable(DataTable):
3482
3527
  )
3483
3528
  self.log(f"Error validating expression `{term}`: {str(e)}")
3484
3529
  return
3530
+
3531
+ # Type-aware search based on column dtype
3485
3532
  else:
3486
3533
  dtype = self.df.dtypes[cidx]
3487
3534
  if dtype == pl.String:
@@ -3505,11 +3552,7 @@ class DataFrameTable(DataTable):
3505
3552
  )
3506
3553
 
3507
3554
  # Lazyframe with row indices
3508
- lf = self.df.lazy().with_row_index(RIDX)
3509
-
3510
- # Apply existing visibility filter first
3511
- if self.has_hidden_rows:
3512
- lf = lf.filter(self.visible_rows)
3555
+ lf = self.df.lazy()
3513
3556
 
3514
3557
  expr_str = "boolean list or series" if isinstance(expr, (list, pl.Series)) else str(expr)
3515
3558
 
@@ -3517,7 +3560,7 @@ class DataFrameTable(DataTable):
3517
3560
  try:
3518
3561
  df_filtered = lf.filter(expr).collect()
3519
3562
  except Exception as e:
3520
- self.histories.pop() # Remove last history entry
3563
+ self.histories_undo.pop() # Remove last history entry
3521
3564
  self.notify(f"Error applying filter [$error]{expr_str}[/]", title="Filter", severity="error", timeout=10)
3522
3565
  self.log(f"Error applying filter `{expr_str}`: {str(e)}")
3523
3566
  return
@@ -3530,26 +3573,37 @@ class DataFrameTable(DataTable):
3530
3573
  # Add to history
3531
3574
  self.add_history(f"Filtered by expression [$success]{expr_str}[/]")
3532
3575
 
3533
- # Mark unfiltered rows as invisible
3534
- filtered_row_indices = set(df_filtered[RIDX].to_list())
3535
- if filtered_row_indices:
3536
- for ridx in range(len(self.visible_rows)):
3537
- if ridx not in filtered_row_indices:
3538
- self.visible_rows[ridx] = False
3576
+ ok_rids = set(df_filtered[RID])
3577
+
3578
+ # Create a view of self.df as a copy
3579
+ if self.df_view is None:
3580
+ self.df_view = self.df
3581
+
3582
+ # Update dataframe
3583
+ self.df = df_filtered
3584
+
3585
+ # Update selected rows
3586
+ if self.selected_rows:
3587
+ self.selected_rows.intersection_update(ok_rids)
3588
+
3589
+ # Update matches
3590
+ if self.matches:
3591
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3539
3592
 
3540
3593
  # Recreate table for display
3541
3594
  self.setup_table()
3542
3595
 
3543
- self.notify(f"Filtered to [$success]{matched_count}[/] matching rows", title="Filter")
3596
+ self.notify(f"Filtered to [$success]{matched_count}[/] matching row(s)", title="Filter")
3544
3597
 
3545
3598
  def do_filter_rows(self) -> None:
3546
- """Keep only the rows with selections and cell matches, and remove others."""
3547
- if any(self.selected_rows) or self.matches:
3548
- message = "Filtered to rows with selection and cell matches (other rows removed)"
3549
- filter_expr = [
3550
- True if (selected or ridx in self.matches) else False
3551
- for ridx, selected in enumerate(self.selected_rows)
3552
- ]
3599
+ """Filter rows.
3600
+
3601
+ If there are selected rows, use those.
3602
+ Otherwise, filter based on the cursor value.
3603
+ """
3604
+ if self.selected_rows:
3605
+ message = "Filtered to selected rows (other rows removed)"
3606
+ filter_expr = pl.col(RID).is_in(self.selected_rows)
3553
3607
  else: # Search cursor value in current column
3554
3608
  message = "Filtered to rows matching cursor value (other rows removed)"
3555
3609
  cidx = self.cursor_col_idx
@@ -3565,16 +3619,26 @@ class DataFrameTable(DataTable):
3565
3619
  self.add_history(message, dirty=True)
3566
3620
 
3567
3621
  # Apply filter to dataframe with row indices
3568
- df_filtered = self.df.with_row_index(RIDX).filter(filter_expr)
3622
+ df_filtered = self.df.lazy().filter(filter_expr).collect()
3623
+ ok_rids = set(df_filtered[RID])
3569
3624
 
3570
3625
  # Update selected rows
3571
- selected_rows = [self.selected_rows[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))]
3626
+ if self.selected_rows:
3627
+ selected_rows = {rid for rid in self.selected_rows if rid in ok_rids}
3628
+ else:
3629
+ selected_rows = set()
3572
3630
 
3573
3631
  # Update matches
3574
- matches = {ridx: self.matches[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))}
3632
+ if self.matches:
3633
+ matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3634
+ else:
3635
+ matches = defaultdict(set)
3575
3636
 
3576
3637
  # Update dataframe
3577
- self.reset_df(df_filtered.drop(RIDX))
3638
+ self.reset_df(df_filtered)
3639
+
3640
+ # Clear view for filter mode
3641
+ self.df_view = None
3578
3642
 
3579
3643
  # Restore selected rows and matches
3580
3644
  self.selected_rows = selected_rows
@@ -3583,7 +3647,7 @@ class DataFrameTable(DataTable):
3583
3647
  # Recreate table for display
3584
3648
  self.setup_table()
3585
3649
 
3586
- self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows", title="Filter")
3650
+ self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows.", title="Filter")
3587
3651
 
3588
3652
  # Copy & Save
3589
3653
  def do_copy_to_clipboard(self, content: str, message: str) -> None:
@@ -3609,20 +3673,24 @@ class DataFrameTable(DataTable):
3609
3673
  except FileNotFoundError:
3610
3674
  self.notify("Error copying to clipboard", title="Clipboard", severity="error", timeout=10)
3611
3675
 
3612
- def do_save_to_file(
3613
- self, title: str = "Save to File", all_tabs: bool | None = None, task_after_save: str | None = None
3614
- ) -> None:
3676
+ def do_save_to_file(self, all_tabs: bool | None = None, task_after_save: str | None = None) -> None:
3615
3677
  """Open screen to save file."""
3616
3678
  self._task_after_save = task_after_save
3679
+ tab_count = len(self.app.tabs)
3680
+ save_all = tab_count > 1 and all_tabs is not False
3681
+
3682
+ filepath = Path(self.filename)
3683
+ if save_all:
3684
+ ext = filepath.suffix.lower()
3685
+ if ext in (".xlsx", ".xls"):
3686
+ filename = self.filename
3687
+ else:
3688
+ filename = "all-tabs.xlsx"
3689
+ else:
3690
+ filename = str(filepath.with_stem(self.tabname))
3617
3691
 
3618
- multi_tab = len(self.app.tabs) > 1
3619
- filename = (
3620
- "all-tabs.xlsx"
3621
- if all_tabs or (all_tabs is None and multi_tab)
3622
- else str(Path(self.filename).with_stem(self.tabname))
3623
- )
3624
3692
  self.app.push_screen(
3625
- SaveFileScreen(filename, title=title, all_tabs=all_tabs, multi_tab=multi_tab),
3693
+ SaveFileScreen(filename, save_all=save_all, tab_count=tab_count),
3626
3694
  callback=self.save_to_file,
3627
3695
  )
3628
3696
 
@@ -3630,10 +3698,8 @@ class DataFrameTable(DataTable):
3630
3698
  """Handle result from SaveFileScreen."""
3631
3699
  if result is None:
3632
3700
  return
3633
- filename, all_tabs, overwrite_prompt = result
3634
-
3635
- # Whether to save all tabs (for Excel files)
3636
- self._all_tabs = all_tabs
3701
+ filename, save_all, overwrite_prompt = result
3702
+ self._save_all = save_all
3637
3703
 
3638
3704
  # Check if file exists
3639
3705
  if overwrite_prompt and Path(filename).exists():
@@ -3652,7 +3718,7 @@ class DataFrameTable(DataTable):
3652
3718
  else:
3653
3719
  # Go back to SaveFileScreen to allow user to enter a different name
3654
3720
  self.app.push_screen(
3655
- SaveFileScreen(self._pending_filename),
3721
+ SaveFileScreen(self._pending_filename, save_all=self._save_all),
3656
3722
  callback=self.save_to_file,
3657
3723
  )
3658
3724
 
@@ -3660,7 +3726,7 @@ class DataFrameTable(DataTable):
3660
3726
  """Actually save the dataframe to a file."""
3661
3727
  filepath = Path(filename)
3662
3728
  ext = filepath.suffix.lower()
3663
- if ext.endswith(".gz"):
3729
+ if ext == ".gz":
3664
3730
  ext = Path(filename).with_suffix("").suffix.lower()
3665
3731
 
3666
3732
  fmt = ext.removeprefix(".")
@@ -3672,30 +3738,28 @@ class DataFrameTable(DataTable):
3672
3738
  )
3673
3739
  fmt = "csv"
3674
3740
 
3675
- # Add to history
3676
- self.add_history(f"Saved dataframe to [$success]{filename}[/]")
3677
-
3741
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3678
3742
  try:
3679
3743
  if fmt == "csv":
3680
- self.df.write_csv(filename)
3744
+ df.write_csv(filename)
3681
3745
  elif fmt in ("tsv", "tab"):
3682
- self.df.write_csv(filename, separator="\t")
3746
+ df.write_csv(filename, separator="\t")
3683
3747
  elif fmt in ("xlsx", "xls"):
3684
3748
  self.save_excel(filename)
3685
3749
  elif fmt == "json":
3686
- self.df.write_json(filename)
3750
+ df.write_json(filename)
3687
3751
  elif fmt == "ndjson":
3688
- self.df.write_ndjson(filename)
3752
+ df.write_ndjson(filename)
3689
3753
  elif fmt == "parquet":
3690
- self.df.write_parquet(filename)
3754
+ df.write_parquet(filename)
3691
3755
  else: # Fallback to CSV
3692
- self.df.write_csv(filename)
3756
+ df.write_csv(filename)
3693
3757
 
3694
3758
  # Update current filename
3695
3759
  self.filename = filename
3696
3760
 
3697
3761
  # Reset dirty flag after save
3698
- if self._all_tabs:
3762
+ if self._save_all:
3699
3763
  tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3700
3764
  for table in tabs.values():
3701
3765
  table.dirty = False
@@ -3709,7 +3773,7 @@ class DataFrameTable(DataTable):
3709
3773
  self.app.exit()
3710
3774
 
3711
3775
  # From ConfirmScreen callback, so notify accordingly
3712
- if self._all_tabs:
3776
+ if self._save_all:
3713
3777
  self.notify(f"Saved all tabs to [$success]{filename}[/]", title="Save to File")
3714
3778
  else:
3715
3779
  self.notify(f"Saved current tab to [$success]{filename}[/]", title="Save to File")
@@ -3722,16 +3786,18 @@ class DataFrameTable(DataTable):
3722
3786
  """Save to an Excel file."""
3723
3787
  import xlsxwriter
3724
3788
 
3725
- if not self._all_tabs or len(self.app.tabs) == 1:
3789
+ if not self._save_all or len(self.app.tabs) == 1:
3726
3790
  # Single tab - save directly
3727
- self.df.write_excel(filename)
3791
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3792
+ df.write_excel(filename, worksheet=self.tabname)
3728
3793
  else:
3729
3794
  # Multiple tabs - use xlsxwriter to create multiple sheets
3730
3795
  with xlsxwriter.Workbook(filename) as wb:
3731
3796
  tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3732
3797
  for table in tabs.values():
3733
3798
  worksheet = wb.add_worksheet(table.tabname)
3734
- table.df.write_excel(workbook=wb, worksheet=worksheet)
3799
+ df = (table.df if table.df_view is None else table.df_view).select(pl.exclude(RID))
3800
+ df.write_excel(workbook=wb, worksheet=worksheet)
3735
3801
 
3736
3802
  # SQL Interface
3737
3803
  def do_simple_sql(self) -> None:
@@ -3775,19 +3841,17 @@ class DataFrameTable(DataTable):
3775
3841
  sql: The SQL query string to execute.
3776
3842
  """
3777
3843
 
3778
- import re
3779
-
3780
- RE_FROM_SELF = re.compile(r"\bfrom\s+self\b", re.IGNORECASE)
3844
+ sql = sql.replace("$#", f"(`{RID}` + 1)")
3845
+ if RID not in sql and "*" not in sql:
3846
+ # Ensure RID is selected
3847
+ import re
3781
3848
 
3782
- sql = RE_FROM_SELF.sub(f", `{RIDX}` FROM self", sql)
3849
+ RE_FROM_SELF = re.compile(r"\bFROM\s+self\b", re.IGNORECASE)
3850
+ sql = RE_FROM_SELF.sub(f", `{RID}` FROM self", sql)
3783
3851
 
3784
3852
  # Execute the SQL query
3785
3853
  try:
3786
- lf = self.df.lazy().with_row_index(RIDX)
3787
- if self.has_hidden_rows:
3788
- lf = lf.filter(self.visible_rows)
3789
-
3790
- df_filtered = lf.sql(sql).collect()
3854
+ df_filtered = self.df.lazy().sql(sql).collect()
3791
3855
 
3792
3856
  if not len(df_filtered):
3793
3857
  self.notify(
@@ -3795,38 +3859,34 @@ class DataFrameTable(DataTable):
3795
3859
  )
3796
3860
  return
3797
3861
 
3798
- # Add to history
3799
- self.add_history(f"SQL Query:\n[$success]{sql}[/]", dirty=not view)
3800
-
3801
- if view:
3802
- # Just view - do not modify the dataframe
3803
- filtered_row_indices = set(df_filtered[RIDX].to_list())
3804
- if filtered_row_indices:
3805
- self.visible_rows = [ridx in filtered_row_indices for ridx in range(len(self.visible_rows))]
3806
-
3807
- filtered_col_names = set(df_filtered.columns)
3808
- if filtered_col_names:
3809
- self.hidden_columns = {
3810
- col_name for col_name in self.df.columns if col_name not in filtered_col_names
3811
- }
3812
- else: # filter - modify the dataframe
3813
- # Update selected rows
3814
- selected_rows = [self.selected_rows[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))]
3815
-
3816
- # Update matches
3817
- matches = {ridx: self.matches[df_filtered[RIDX][ridx]] for ridx in range(len(df_filtered))}
3818
-
3819
- # Update dataframe
3820
- self.reset_df(df_filtered.drop(RIDX))
3821
-
3822
- # Restore selected rows and matches
3823
- self.selected_rows = selected_rows
3824
- self.matches = matches
3825
3862
  except Exception as e:
3826
3863
  self.notify(f"Error executing SQL query [$error]{sql}[/]", title="SQL Query", severity="error", timeout=10)
3827
3864
  self.log(f"Error executing SQL query `{sql}`: {str(e)}")
3828
3865
  return
3829
3866
 
3867
+ # Add to history
3868
+ self.add_history(f"SQL Query:\n[$success]{sql}[/]", dirty=not view)
3869
+
3870
+ # Create a view of self.df as a copy
3871
+ if view and self.df_view is None:
3872
+ self.df_view = self.df
3873
+
3874
+ # Clear view for filter mode
3875
+ if not view:
3876
+ self.df_view = None
3877
+
3878
+ # Update dataframe
3879
+ self.df = df_filtered
3880
+ ok_rids = set(df_filtered[RID])
3881
+
3882
+ # Update selected rows
3883
+ if self.selected_rows:
3884
+ self.selected_rows.intersection_update(ok_rids)
3885
+
3886
+ # Update matches
3887
+ if self.matches:
3888
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3889
+
3830
3890
  # Recreate table for display
3831
3891
  self.setup_table()
3832
3892