dataframe-textual 1.12.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,13 +3,14 @@
3
3
  import sys
4
4
  from collections import defaultdict, deque
5
5
  from dataclasses import dataclass
6
+ from itertools import zip_longest
6
7
  from pathlib import Path
7
8
  from textwrap import dedent
8
9
  from typing import Any
9
10
 
10
11
  import polars as pl
11
- from rich.text import Text
12
- from textual import work
12
+ from rich.text import Text, TextType
13
+ from textual._two_way_dict import TwoWayDict
13
14
  from textual.coordinate import Coordinate
14
15
  from textual.events import Click
15
16
  from textual.reactive import reactive
@@ -18,8 +19,11 @@ from textual.widgets import DataTable, TabPane
18
19
  from textual.widgets._data_table import (
19
20
  CellDoesNotExist,
20
21
  CellKey,
22
+ CellType,
21
23
  ColumnKey,
22
24
  CursorType,
25
+ DuplicateKey,
26
+ Row,
23
27
  RowKey,
24
28
  )
25
29
 
@@ -27,19 +31,19 @@ from .common import (
27
31
  CURSOR_TYPES,
28
32
  NULL,
29
33
  NULL_DISPLAY,
30
- RIDX,
34
+ RID,
31
35
  SUBSCRIPT_DIGITS,
32
36
  SUPPORTED_FORMATS,
33
37
  DtypeConfig,
34
38
  format_row,
35
39
  get_next_item,
36
40
  parse_placeholders,
37
- sleep_async,
41
+ round_to_nearest_hundreds,
38
42
  tentative_expr,
39
43
  validate_expr,
40
44
  )
41
45
  from .sql_screen import AdvancedSqlScreen, SimpleSqlScreen
42
- from .table_screen import FrequencyScreen, RowDetailScreen, StatisticsScreen
46
+ from .table_screen import FrequencyScreen, MetaColumnScreen, MetaShape, RowDetailScreen, StatisticsScreen
43
47
  from .yes_no_screen import (
44
48
  AddColumnScreen,
45
49
  AddLinkScreen,
@@ -57,6 +61,9 @@ from .yes_no_screen import (
57
61
  # Color for highlighting selections and matches
58
62
  HIGHLIGHT_COLOR = "red"
59
63
 
64
+ # Buffer size for loading rows
65
+ BUFFER_SIZE = 5
66
+
60
67
  # Warning threshold for loading rows
61
68
  WARN_ROWS_THRESHOLD = 50_000
62
69
 
@@ -70,16 +77,16 @@ class History:
70
77
 
71
78
  description: str
72
79
  df: pl.DataFrame
80
+ df_view: pl.DataFrame | None
73
81
  filename: str
74
82
  loaded_rows: int
75
- sorted_columns: dict[str, bool]
76
83
  hidden_columns: set[str]
77
- selected_rows: list[bool]
78
- visible_rows: list[bool]
84
+ selected_rows: set[int]
85
+ sorted_columns: dict[str, bool] # col_name -> descending
79
86
  fixed_rows: int
80
87
  fixed_columns: int
81
88
  cursor_coordinate: Coordinate
82
- matches: dict[int, set[int]]
89
+ matches: dict[int, set[str]] # RID -> set of col names
83
90
  dirty: bool = False # Whether this history state has unsaved changes
84
91
 
85
92
 
@@ -115,7 +122,7 @@ class DataFrameTable(DataTable):
115
122
  - **g** - ⬆️ Jump to first row
116
123
  - **G** - ⬇️ Jump to last row
117
124
  - **HOME/END** - 🎯 Jump to first/last column
118
- - **Ctrl+HOME/END** - 🎯 Jump to page top/bottom
125
+ - **Ctrl+HOME/END** - 🎯 Jump to page top/top
119
126
  - **Ctrl+F** - 📜 Page down
120
127
  - **Ctrl+B** - 📜 Page up
121
128
  - **PgUp/PgDn** - 📜 Page up/down
@@ -125,11 +132,13 @@ class DataFrameTable(DataTable):
125
132
  - **U** - 🔄 Redo last undone action
126
133
  - **Ctrl+U** - 🔁 Reset to initial state
127
134
 
128
- ## 👁️ Viewing & Display
135
+ ## 👁️ Display
129
136
  - **Enter** - 📋 Show row details in modal
130
137
  - **F** - 📊 Show frequency distribution
131
138
  - **s** - 📈 Show statistics for current column
132
139
  - **S** - 📊 Show statistics for entire dataframe
140
+ - **m** - 📐 Show dataframe metadata (row/column counts)
141
+ - **M** - 📋 Show column metadata (ID, name, type)
133
142
  - **h** - 👁️ Hide current column
134
143
  - **H** - 👀 Show all hidden rows/columns
135
144
  - **_** - 📏 Expand column to full width
@@ -143,31 +152,31 @@ class DataFrameTable(DataTable):
143
152
  - **]** - 🔽 Sort column descending
144
153
  - *(Multi-column sort supported)*
145
154
 
146
- ## 🔍 Searching & Filtering
147
- - **|** - 🔎 Search in current column with expression
148
- - **\\\\** - 🔎 Search in current column using cursor value
155
+ ## Row Selection
156
+ - **\\\\** - Select rows with cell matches or those matching cursor value in current column
157
+ - **|** - Select rows with expression
158
+ - **'** - ✅ Select/deselect current row
159
+ - **t** - 💡 Toggle row selection (invert all)
160
+ - **T** - 🧹 Clear all selections and matches
161
+ - **{** - ⬆️ Go to previous selected row
162
+ - **}** - ⬇️ Go to next selected row
163
+ - *(Supports case-insensitive & whole-word matching)*
164
+
165
+ ## 🔎 Find & Replace
149
166
  - **/** - 🔎 Find in current column with cursor value
150
167
  - **?** - 🔎 Find in current column with expression
151
168
  - **;** - 🌐 Global find using cursor value
152
169
  - **:** - 🌐 Global find with expression
153
170
  - **n** - ⬇️ Go to next match
154
171
  - **N** - ⬆️ Go to previous match
155
- - **v** - 👁️ View/filter rows by cell or selected rows and hide others
156
- - **V** - 🔧 View/filter rows by expression and hide others
157
- - *(All search/find support case-insensitive & whole-word matching)*
158
-
159
- ## ✏️ Replace
160
172
  - **r** - 🔄 Replace in current column (interactive or all)
161
173
  - **R** - 🔄 Replace across all columns (interactive or all)
162
174
  - *(Supports case-insensitive & whole-word matching)*
163
175
 
164
- ## Selection & Filter
165
- - **'** - ✓️ Select/deselect current row
166
- - **t** - 💡 Toggle row selection (invert all)
167
- - **T** - 🧹 Clear all selections and matches
168
- - **{** - ⬆️ Go to previous selected row
169
- - **}** - ⬇️ Go to next selected row
170
- - **"** - 📍 Filter selected rows and remove others
176
+ ## 👁️ View & Filter
177
+ - **"** - 📍 Filter selected rows (removes others)
178
+ - **v** - 👁️ View selected rows (hides others)
179
+ - **V** - 🔧 View selected rows matching expression (hides others)
171
180
 
172
181
  ## 🔍 SQL Interface
173
182
  - **l** - 💬 Open simple SQL interface (select columns & where clause)
@@ -210,8 +219,8 @@ class DataFrameTable(DataTable):
210
219
  # Navigation
211
220
  ("g", "jump_top", "Jump to top"),
212
221
  ("G", "jump_bottom", "Jump to bottom"),
213
- ("ctrl+f", "forward_page", "Page down"),
214
- ("ctrl+b", "backward_page", "Page up"),
222
+ ("pageup,ctrl+b", "page_up", "Page up"),
223
+ ("pagedown,ctrl+f", "page_down", "Page down"),
215
224
  # Undo/Redo/Reset
216
225
  ("u", "undo", "Undo"),
217
226
  ("U", "redo", "Redo"),
@@ -224,13 +233,16 @@ class DataFrameTable(DataTable):
224
233
  ("z", "freeze_row_column", "Freeze rows/columns"),
225
234
  ("comma", "show_thousand_separator", "Toggle thousand separator"), # `,`
226
235
  ("underscore", "expand_column", "Expand column to full width"), # `_`
236
+ ("circumflex_accent", "toggle_rid", "Toggle internal row index"), # `^`
227
237
  # Copy
228
238
  ("c", "copy_cell", "Copy cell to clipboard"),
229
239
  ("ctrl+c", "copy_column", "Copy column to clipboard"),
230
240
  ("ctrl+r", "copy_row", "Copy row to clipboard"),
231
241
  # Save
232
242
  ("ctrl+s", "save_to_file", "Save to file"),
233
- # Detail, Frequency, and Statistics
243
+ # Metadata, Detail, Frequency, and Statistics
244
+ ("m", "metadata_shape", "Show metadata for row count and column count"),
245
+ ("M", "metadata_column", "Show metadata for column"),
234
246
  ("enter", "view_row_detail", "View row details"),
235
247
  ("F", "show_frequency", "Show frequency"),
236
248
  ("s", "show_statistics", "Show statistics for column"),
@@ -239,28 +251,26 @@ class DataFrameTable(DataTable):
239
251
  ("left_square_bracket", "sort_ascending", "Sort ascending"), # `[`
240
252
  ("right_square_bracket", "sort_descending", "Sort descending"), # `]`
241
253
  # View & Filter
242
- ("v", "view_rows", "View rows"),
243
- ("V", "view_rows_expr", "View rows by expression"),
244
- ("quotation_mark", "filter_rows", "Filter selected"), # `"`
245
- # Search
246
- ("backslash", "search_cursor_value", "Search column with cursor value"), # `\`
247
- ("vertical_line", "search_expr", "Search column with expression"), # `|`
254
+ ("v", "view_rows", "View selected rows"),
255
+ ("V", "view_rows_expr", "View selected rows matching expression"),
256
+ ("quotation_mark", "filter_rows", "Filter selected rows"), # `"`
257
+ # Row Selection
258
+ ("backslash", "select_row", "Select rows with cell matches or those matching cursor value in current column"), # `\`
259
+ ("vertical_line", "select_row_expr", "Select rows with expression"), # `|`
248
260
  ("right_curly_bracket", "next_selected_row", "Go to next selected row"), # `}`
249
261
  ("left_curly_bracket", "previous_selected_row", "Go to previous selected row"), # `{`
250
- # Find
262
+ ("apostrophe", "toggle_row_selection", "Toggle row selection"), # `'`
263
+ ("t", "toggle_selections", "Toggle all row selections"),
264
+ ("T", "clear_selections_and_matches", "Clear selections"),
265
+ # Find & Replace
251
266
  ("slash", "find_cursor_value", "Find in column with cursor value"), # `/`
252
267
  ("question_mark", "find_expr", "Find in column with expression"), # `?`
253
268
  ("semicolon", "find_cursor_value('global')", "Global find with cursor value"), # `;`
254
269
  ("colon", "find_expr('global')", "Global find with expression"), # `:`
255
270
  ("n", "next_match", "Go to next match"), # `n`
256
271
  ("N", "previous_match", "Go to previous match"), # `Shift+n`
257
- # Replace
258
272
  ("r", "replace", "Replace in column"), # `r`
259
273
  ("R", "replace_global", "Replace global"), # `Shift+R`
260
- # Selection
261
- ("apostrophe", "toggle_row_selection", "Toggle row selection"), # `'`
262
- ("t", "toggle_selections", "Toggle all row selections"),
263
- ("T", "clear_selections_and_matches", "Clear selections"),
264
274
  # Delete
265
275
  ("delete", "clear_cell", "Clear cell"),
266
276
  ("minus", "delete_column", "Delete column"), # `-`
@@ -311,34 +321,40 @@ class DataFrameTable(DataTable):
311
321
  super().__init__(**kwargs)
312
322
 
313
323
  # DataFrame state
314
- self.dataframe = df # Original dataframe
315
- self.df = df # Internal/working dataframe
324
+ self.dataframe = df.lazy().with_row_index(RID).select(pl.exclude(RID), RID).collect() # Original dataframe
325
+ self.df = self.dataframe # Internal/working dataframe
316
326
  self.filename = filename or "untitled.csv" # Current filename
317
327
  self.tabname = tabname or Path(filename).stem # Tab name
328
+
329
+ # In view mode, this is the copy of self.df
330
+ self.df_view = None
331
+
318
332
  # Pagination & Loading
319
- self.INITIAL_BATCH_SIZE = (self.app.size.height // 100 + 1) * 100
320
- self.BATCH_SIZE = self.INITIAL_BATCH_SIZE // 2
333
+ self.BATCH_SIZE = max((self.app.size.height // 100 + 1) * 100, 100)
321
334
  self.loaded_rows = 0 # Track how many rows are currently loaded
335
+ self.loaded_ranges: list[tuple[int, int]] = [] # List of (start, end) row indices that are loaded
322
336
 
323
337
  # State tracking (all 0-based indexing)
324
- self.sorted_columns: dict[str, bool] = {} # col_name -> descending
325
338
  self.hidden_columns: set[str] = set() # Set of hidden column names
326
- self.selected_rows: list[bool] = [False] * len(self.df) # Track selected rows
327
- self.visible_rows: list[bool] = [True] * len(self.df) # Track visible rows (for filtering)
328
- self.matches: dict[int, set[int]] = defaultdict(set) # Track search matches: row_idx -> set of col_idx
339
+ self.selected_rows: set[int] = set() # Track selected rows by RID
340
+ self.sorted_columns: dict[str, bool] = {} # col_name -> descending
341
+ self.matches: dict[int, set[str]] = defaultdict(set) # Track search matches: RID -> set of col_names
329
342
 
330
343
  # Freezing
331
344
  self.fixed_rows = 0 # Number of fixed rows
332
345
  self.fixed_columns = 0 # Number of fixed columns
333
346
 
334
347
  # History stack for undo
335
- self.histories: deque[History] = deque()
336
- # Current history state for redo
337
- self.history: History = None
348
+ self.histories_undo: deque[History] = deque()
349
+ # History stack for redo
350
+ self.histories_redo: deque[History] = deque()
338
351
 
339
352
  # Whether to use thousand separator for numeric display
340
353
  self.thousand_separator = False
341
354
 
355
+ # Whether to show internal row index column
356
+ self.show_rid = False
357
+
342
358
  @property
343
359
  def cursor_key(self) -> CellKey:
344
360
  """Get the current cursor position as a CellKey.
@@ -405,22 +421,13 @@ class DataFrameTable(DataTable):
405
421
 
406
422
  @property
407
423
  def cursor_value(self) -> Any:
408
- """Get the current cursor cell value.
424
+ """Get the current cursor cell value in the dataframe.
409
425
 
410
426
  Returns:
411
427
  Any: The value of the cell at the cursor position.
412
428
  """
413
429
  return self.df.item(self.cursor_row_idx, self.cursor_col_idx)
414
430
 
415
- @property
416
- def has_hidden_rows(self) -> bool:
417
- """Check if there are any hidden rows.
418
-
419
- Returns:
420
- bool: True if there are hidden rows, False otherwise.
421
- """
422
- return any(v for v in self.visible_rows if v is False)
423
-
424
431
  @property
425
432
  def ordered_selected_rows(self) -> list[int]:
426
433
  """Get the list of selected row indices in order.
@@ -428,7 +435,7 @@ class DataFrameTable(DataTable):
428
435
  Returns:
429
436
  list[int]: A list of 0-based row indices that are currently selected.
430
437
  """
431
- return [ridx for ridx, selected in enumerate(self.selected_rows) if selected]
438
+ return [ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.selected_rows]
432
439
 
433
440
  @property
434
441
  def ordered_matches(self) -> list[tuple[int, int]]:
@@ -438,19 +445,38 @@ class DataFrameTable(DataTable):
438
445
  list[tuple[int, int]]: A list of (row_idx, col_idx) tuples for matched cells.
439
446
  """
440
447
  matches = []
441
- for ridx in sorted(self.matches.keys()):
442
- for cidx in sorted(self.matches[ridx]):
443
- matches.append((ridx, cidx))
448
+
449
+ # Uniq columns
450
+ cols_to_check = set()
451
+ for cols in self.matches.values():
452
+ cols_to_check.update(cols)
453
+
454
+ # Ordered columns
455
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_check}
456
+
457
+ for ridx, rid in enumerate(self.df[RID]):
458
+ if cols := self.matches.get(rid):
459
+ for cidx, col in cidx2col.items():
460
+ if col in cols:
461
+ matches.append((ridx, cidx))
462
+
444
463
  return matches
445
464
 
446
- @property
447
- def last_history(self) -> History:
448
- """Get the last history state.
465
+ def _round_to_nearest_hundreds(self, num: int):
466
+ """Round a number to the nearest hundreds.
449
467
 
450
- Returns:
451
- History: The most recent History object from the histories deque.
468
+ Args:
469
+ num: The number to round.
452
470
  """
453
- return self.histories[-1] if self.histories else None
471
+ return round_to_nearest_hundreds(num, N=self.BATCH_SIZE)
472
+
473
+ def get_row_idx(self, row_key: RowKey) -> int:
474
+ """Get the row index for a given table row key.
475
+
476
+ Args:
477
+ row_key: Row key as string.
478
+ """
479
+ return super().get_row_index(row_key)
454
480
 
455
481
  def get_row_key(self, row_idx: int) -> RowKey:
456
482
  """Get the row key for a given table row index.
@@ -463,7 +489,18 @@ class DataFrameTable(DataTable):
463
489
  """
464
490
  return self._row_locations.get_key(row_idx)
465
491
 
466
- def get_column_key(self, col_idx: int) -> ColumnKey:
492
+ def get_col_idx(self, col_key: ColumnKey) -> int:
493
+ """Get the column index for a given table column key.
494
+
495
+ Args:
496
+ col_key: Column key as string.
497
+
498
+ Returns:
499
+ Corresponding column index as int.
500
+ """
501
+ return super().get_column_index(col_key)
502
+
503
+ def get_col_key(self, col_idx: int) -> ColumnKey:
467
504
  """Get the column key for a given table column index.
468
505
 
469
506
  Args:
@@ -474,11 +511,11 @@ class DataFrameTable(DataTable):
474
511
  """
475
512
  return self._column_locations.get_key(col_idx)
476
513
 
477
- def should_highlight(self, cursor: Coordinate, target_cell: Coordinate, type_of_cursor: CursorType) -> bool:
514
+ def _should_highlight(self, cursor: Coordinate, target_cell: Coordinate, type_of_cursor: CursorType) -> bool:
478
515
  """Determine if the given cell should be highlighted because of the cursor.
479
516
 
480
- In "cell" mode, also highlights the row and column headers. In "row" and "column"
481
- modes, highlights the entire row or column respectively.
517
+ In "cell" mode, also highlights the row and column headers. This overrides the default
518
+ behavior of DataTable which only highlights the exact cell under the cursor.
482
519
 
483
520
  Args:
484
521
  cursor: The current position of the cursor.
@@ -575,7 +612,7 @@ class DataFrameTable(DataTable):
575
612
  else:
576
613
  content_tab.remove_class("dirty")
577
614
 
578
- def move_cursor_to(self, ridx: int, cidx: int) -> None:
615
+ def move_cursor_to(self, ridx: int | None = None, cidx: int | None = None) -> None:
579
616
  """Move cursor based on the dataframe indices.
580
617
 
581
618
  Args:
@@ -583,11 +620,11 @@ class DataFrameTable(DataTable):
583
620
  cidx: Column index (0-based) in the dataframe.
584
621
  """
585
622
  # Ensure the target row is loaded
586
- if ridx >= self.loaded_rows:
587
- self.load_rows(stop=ridx + self.BATCH_SIZE)
623
+ start, stop = self._round_to_nearest_hundreds(ridx)
624
+ self.load_rows_range(start, stop)
588
625
 
589
- row_key = str(ridx)
590
- col_key = self.df.columns[cidx]
626
+ row_key = self.cursor_row_key if ridx is None else str(ridx)
627
+ col_key = self.cursor_col_key if cidx is None else self.df.columns[cidx]
591
628
  row_idx, col_idx = self.get_cell_coordinate(row_key, col_key)
592
629
  self.move_cursor(row=row_idx, column=col_idx)
593
630
 
@@ -603,15 +640,15 @@ class DataFrameTable(DataTable):
603
640
  def on_key(self, event) -> None:
604
641
  """Handle key press events for pagination.
605
642
 
606
- Currently handles "pagedown" and "down" keys to trigger lazy loading of additional rows
607
- when scrolling near the end of the loaded data.
608
-
609
643
  Args:
610
644
  event: The key event object.
611
645
  """
612
- if event.key in ("pagedown", "down"):
646
+ if event.key == "up":
647
+ # Let the table handle the navigation first
648
+ self.load_rows_up()
649
+ elif event.key == "down":
613
650
  # Let the table handle the navigation first
614
- self.check_and_load_more()
651
+ self.load_rows_down()
615
652
 
616
653
  def on_click(self, event: Click) -> None:
617
654
  """Handle mouse click events on the table.
@@ -624,33 +661,32 @@ class DataFrameTable(DataTable):
624
661
  if self.cursor_type == "cell" and event.chain > 1: # only on double-click or more
625
662
  try:
626
663
  row_idx = event.style.meta["row"]
627
- # col_idx = event.style.meta["column"]
664
+ col_idx = event.style.meta["column"]
628
665
  except (KeyError, TypeError):
629
666
  return # Unable to get row/column info
630
667
 
631
668
  # header row
632
669
  if row_idx == -1:
633
- self.do_rename_column()
670
+ self.do_rename_column(col_idx)
634
671
  else:
635
672
  self.do_edit_cell()
636
673
 
637
674
  # Action handlers for BINDINGS
638
675
  def action_jump_top(self) -> None:
639
676
  """Jump to the top of the table."""
640
- self.move_cursor(row=0)
677
+ self.do_jump_top()
641
678
 
642
679
  def action_jump_bottom(self) -> None:
643
680
  """Jump to the bottom of the table."""
644
- self.load_rows(move_to_end=True)
681
+ self.do_jump_bottom()
645
682
 
646
- def action_forward_page(self) -> None:
647
- """Scroll down one page."""
648
- super().action_page_down()
649
- self.check_and_load_more()
683
+ def action_page_up(self) -> None:
684
+ """Move the cursor one page up."""
685
+ self.do_page_up()
650
686
 
651
- def action_backward_page(self) -> None:
652
- """Scroll up one page."""
653
- super().action_page_up()
687
+ def action_page_down(self) -> None:
688
+ """Move the cursor one page down."""
689
+ self.do_page_down()
654
690
 
655
691
  def action_view_row_detail(self) -> None:
656
692
  """View details of the current row."""
@@ -668,6 +704,10 @@ class DataFrameTable(DataTable):
668
704
  """Expand the current column to its full width."""
669
705
  self.do_expand_column()
670
706
 
707
+ def action_toggle_rid(self) -> None:
708
+ """Toggle the internal row index column visibility."""
709
+ self.do_toggle_rid()
710
+
671
711
  def action_show_hidden_rows_columns(self) -> None:
672
712
  """Show all hidden rows/columns."""
673
713
  self.do_show_hidden_rows_columns()
@@ -696,6 +736,14 @@ class DataFrameTable(DataTable):
696
736
  """
697
737
  self.do_show_statistics(scope)
698
738
 
739
+ def action_metadata_shape(self) -> None:
740
+ """Show metadata about the dataframe (row and column counts)."""
741
+ self.do_metadata_shape()
742
+
743
+ def action_metadata_column(self) -> None:
744
+ """Show metadata for the current column."""
745
+ self.do_metadata_column()
746
+
699
747
  def action_view_rows(self) -> None:
700
748
  """View rows by current cell value."""
701
749
  self.do_view_rows()
@@ -732,13 +780,13 @@ class DataFrameTable(DataTable):
732
780
  """Clear the current cell (set to None)."""
733
781
  self.do_clear_cell()
734
782
 
735
- def action_search_cursor_value(self) -> None:
736
- """Search cursor value in the current column."""
737
- self.do_search_cursor_value()
783
+ def action_select_row(self) -> None:
784
+ """Select rows with cursor value in the current column."""
785
+ self.do_select_row()
738
786
 
739
- def action_search_expr(self) -> None:
740
- """Search by expression in the current column."""
741
- self.do_search_expr()
787
+ def action_select_row_expr(self) -> None:
788
+ """Select rows by expression."""
789
+ self.do_select_row_expr()
742
790
 
743
791
  def action_find_cursor_value(self, scope="column") -> None:
744
792
  """Find by cursor value.
@@ -925,9 +973,13 @@ class DataFrameTable(DataTable):
925
973
  """Open the advanced SQL interface screen."""
926
974
  self.do_advanced_sql()
927
975
 
976
+ def on_mouse_scroll_up(self, event) -> None:
977
+ """Load more rows when scrolling up with mouse."""
978
+ self.load_rows_up()
979
+
928
980
  def on_mouse_scroll_down(self, event) -> None:
929
981
  """Load more rows when scrolling down with mouse."""
930
- self.check_and_load_more()
982
+ self.load_rows_down()
931
983
 
932
984
  # Setup & Loading
933
985
  def reset_df(self, new_df: pl.DataFrame, dirty: bool = True) -> None:
@@ -940,51 +992,31 @@ class DataFrameTable(DataTable):
940
992
  # Set new dataframe and reset table
941
993
  self.df = new_df
942
994
  self.loaded_rows = 0
943
- self.sorted_columns = {}
944
995
  self.hidden_columns = set()
945
- self.selected_rows = [False] * len(self.df)
946
- self.visible_rows = [True] * len(self.df)
996
+ self.selected_rows = set()
997
+ self.sorted_columns = {}
947
998
  self.fixed_rows = 0
948
999
  self.fixed_columns = 0
949
1000
  self.matches = defaultdict(set)
950
1001
  # self.histories.clear()
951
- # self.history = None
1002
+ # self.histories2.clear()
952
1003
  self.dirty = dirty # Mark as dirty since data changed
953
1004
 
954
- def setup_table(self, reset: bool = False) -> None:
1005
+ def setup_table(self) -> None:
955
1006
  """Setup the table for display.
956
1007
 
957
1008
  Row keys are 0-based indices, which map directly to dataframe row indices.
958
1009
  Column keys are header names from the dataframe.
959
1010
  """
960
1011
  self.loaded_rows = 0
1012
+ self.loaded_ranges.clear()
961
1013
  self.show_row_labels = True
962
1014
 
963
- # Reset to original dataframe
964
- if reset:
965
- self.reset_df(self.dataframe, dirty=False)
966
-
967
- # Lazy load up to INITIAL_BATCH_SIZE visible rows
968
- stop, visible_count, row_idx = self.INITIAL_BATCH_SIZE, 0, 0
969
- for row_idx, visible in enumerate(self.visible_rows):
970
- if not visible:
971
- continue
972
- visible_count += 1
973
- if visible_count > self.INITIAL_BATCH_SIZE:
974
- stop = row_idx + self.BATCH_SIZE
975
- break
976
- else:
977
- stop = row_idx + self.BATCH_SIZE
978
-
979
- # # Ensure all selected rows or matches are loaded
980
- # stop = max(stop, rindex(self.selected_rows, True) + 1)
981
- # stop = max(stop, max(self.matches.keys(), default=0) + 1)
982
-
983
1015
  # Save current cursor position before clearing
984
1016
  row_idx, col_idx = self.cursor_coordinate
985
1017
 
986
1018
  self.setup_columns()
987
- self.load_rows(stop)
1019
+ self.load_rows_range(0, self.BATCH_SIZE) # Load initial rows
988
1020
 
989
1021
  # Restore cursor position
990
1022
  if row_idx < len(self.rows) and col_idx < len(self.columns):
@@ -1007,7 +1039,7 @@ class DataFrameTable(DataTable):
1007
1039
  column_widths = {}
1008
1040
 
1009
1041
  # Get available width for the table (with some padding for borders/scrollbar)
1010
- available_width = self.size.width - 4 # Account for borders and scrollbar
1042
+ available_width = self.scrollable_content_region.width
1011
1043
 
1012
1044
  # Calculate how much width we need for string columns first
1013
1045
  string_cols = [col for col, dtype in zip(self.df.columns, self.df.dtypes) if dtype == pl.String]
@@ -1017,7 +1049,7 @@ class DataFrameTable(DataTable):
1017
1049
  return column_widths
1018
1050
 
1019
1051
  # Sample a reasonable number of rows to calculate widths (don't scan entire dataframe)
1020
- sample_size = min(self.INITIAL_BATCH_SIZE, len(self.df))
1052
+ sample_size = min(self.BATCH_SIZE, len(self.df))
1021
1053
  sample_lf = self.df.lazy().slice(0, sample_size)
1022
1054
 
1023
1055
  # Determine widths for each column
@@ -1028,28 +1060,30 @@ class DataFrameTable(DataTable):
1028
1060
  # Get column label width
1029
1061
  # Add padding for sort indicators if any
1030
1062
  label_width = measure(self.app.console, col, 1) + 2
1063
+ if dtype != pl.String:
1064
+ available_width -= label_width
1065
+ continue
1031
1066
 
1032
1067
  try:
1033
1068
  # Get sample values from the column
1034
- sample_values = sample_lf.select(col).collect().get_column(col).to_list()
1069
+ sample_values = sample_lf.select(col).collect().get_column(col).drop_nulls().to_list()
1035
1070
  if any(val.startswith(("https://", "http://")) for val in sample_values):
1036
1071
  continue # Skip link columns so they can auto-size and be clickable
1037
1072
 
1038
1073
  # Find maximum width in sample
1039
1074
  max_cell_width = max(
1040
- (measure(self.app.console, str(val), 1) for val in sample_values if val),
1075
+ (measure(self.app.console, val, 1) for val in sample_values),
1041
1076
  default=label_width,
1042
1077
  )
1043
1078
 
1044
1079
  # Set column width to max of label and sampled data (capped at reasonable max)
1045
1080
  max_width = max(label_width, max_cell_width)
1046
- except Exception:
1081
+ except Exception as e:
1047
1082
  # If any error, let Textual auto-size
1048
1083
  max_width = label_width
1084
+ self.log(f"Error determining width for column '{col}': {e}")
1049
1085
 
1050
- if dtype == pl.String:
1051
- column_widths[col] = max_width
1052
-
1086
+ column_widths[col] = max_width
1053
1087
  available_width -= max_width
1054
1088
 
1055
1089
  # If there's no more available width, auto-size remaining columns
@@ -1073,8 +1107,8 @@ class DataFrameTable(DataTable):
1073
1107
 
1074
1108
  # Add columns with justified headers
1075
1109
  for col, dtype in zip(self.df.columns, self.df.dtypes):
1076
- if col in self.hidden_columns:
1077
- continue # Skip hidden columns
1110
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1111
+ continue # Skip hidden columns and internal RID
1078
1112
  for idx, c in enumerate(self.sorted_columns, 1):
1079
1113
  if c == col:
1080
1114
  # Add sort indicator to column header
@@ -1092,128 +1126,390 @@ class DataFrameTable(DataTable):
1092
1126
 
1093
1127
  self.add_column(Text(cell_value, justify=DtypeConfig(dtype).justify), key=col, width=width)
1094
1128
 
1095
- def load_rows(self, stop: int | None = None, move_to_end: bool = False) -> None:
1096
- """Load a batch of rows into the table (synchronous wrapper).
1129
+ def _calculate_load_range(self, start: int, stop: int) -> list[tuple[int, int]]:
1130
+ """Calculate the actual ranges to load, accounting for already-loaded ranges.
1131
+
1132
+ Handles complex cases where a loaded range is fully contained within the requested
1133
+ range (creating head and tail segments to load). All overlapping/adjacent loaded
1134
+ ranges are merged first to minimize gaps.
1097
1135
 
1098
1136
  Args:
1099
- stop: Stop loading rows when this index is reached.
1100
- If None, load until the end of the dataframe.
1137
+ start: Requested start index (0-based).
1138
+ stop: Requested stop index (0-based, exclusive).
1139
+
1140
+ Returns:
1141
+ List of (actual_start, actual_stop) tuples to load. Empty list if the entire
1142
+ requested range is already loaded.
1143
+
1144
+ Example:
1145
+ If loaded ranges are [(150, 250)] and requesting (100, 300):
1146
+ - Returns [(100, 150), (250, 300)] to load head and tail
1147
+ If loaded ranges are [(0, 100), (100, 200)] and requesting (50, 150):
1148
+ - After merging, loaded_ranges becomes [(0, 200)]
1149
+ - Returns [] (already fully loaded)
1101
1150
  """
1102
- if stop is None or stop > len(self.df):
1103
- stop = len(self.df)
1151
+ if not self.loaded_ranges:
1152
+ return [(start, stop)]
1153
+
1154
+ # Sort loaded ranges by start index
1155
+ sorted_ranges = sorted(self.loaded_ranges)
1156
+
1157
+ # Merge overlapping/adjacent ranges
1158
+ merged = []
1159
+ for range_start, range_stop in sorted_ranges:
1160
+ # Fully covered, no need to load anything
1161
+ if range_start <= start and range_stop >= stop:
1162
+ return []
1163
+ # Overlapping or adjacent: merge
1164
+ elif merged and range_start <= merged[-1][1]:
1165
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1166
+ else:
1167
+ merged.append((range_start, range_stop))
1168
+
1169
+ self.loaded_ranges = merged
1170
+
1171
+ # Calculate ranges to load by finding gaps in the merged ranges
1172
+ ranges_to_load = []
1173
+ current_pos = start
1174
+
1175
+ for range_start, range_stop in merged:
1176
+ # If there's a gap before this loaded range, add it to load list
1177
+ if current_pos < range_start and current_pos < stop:
1178
+ gap_end = min(range_start, stop)
1179
+ ranges_to_load.append((current_pos, gap_end))
1180
+ current_pos = range_stop
1181
+ elif current_pos >= range_stop:
1182
+ # Already moved past this loaded range
1183
+ continue
1184
+ else:
1185
+ # Current position is inside this loaded range, skip past it
1186
+ current_pos = max(current_pos, range_stop)
1104
1187
 
1105
- # If already loaded enough rows, just move cursor if needed
1106
- if stop <= self.loaded_rows:
1107
- if move_to_end:
1108
- self.move_cursor(row=self.row_count - 1)
1188
+ # If there's remaining range after all loaded ranges, add it
1189
+ if current_pos < stop:
1190
+ ranges_to_load.append((current_pos, stop))
1109
1191
 
1192
+ return ranges_to_load
1193
+
1194
+ def _merge_loaded_ranges(self) -> None:
1195
+ """Merge adjacent and overlapping ranges in self.loaded_ranges.
1196
+
1197
+ Ranges like (0, 100) and (100, 200) are merged into (0, 200).
1198
+ """
1199
+ if len(self.loaded_ranges) <= 1:
1110
1200
  return
1111
1201
 
1112
- # Warn user if loading a large number of rows
1113
- elif (nrows := stop - self.loaded_rows) >= WARN_ROWS_THRESHOLD:
1202
+ # Sort by start index
1203
+ sorted_ranges = sorted(self.loaded_ranges)
1114
1204
 
1115
- def _continue(result: bool) -> None:
1116
- if result:
1117
- self.load_rows_async(stop, move_to_end=move_to_end)
1205
+ # Merge overlapping/adjacent ranges
1206
+ merged = [sorted_ranges[0]]
1207
+ for range_start, range_stop in sorted_ranges[1:]:
1208
+ # Overlapping or adjacent: merge
1209
+ if range_start <= merged[-1][1]:
1210
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1211
+ else:
1212
+ merged.append((range_start, range_stop))
1118
1213
 
1119
- self.app.push_screen(
1120
- ConfirmScreen(
1121
- f"Load {nrows} Rows",
1122
- label="Loading a large number of rows may cause the application to become unresponsive. Do you want to continue?",
1123
- ),
1124
- callback=_continue,
1125
- )
1214
+ self.loaded_ranges = merged
1126
1215
 
1127
- return
1216
+ def _find_insert_position_for_row(self, ridx: int) -> int:
1217
+ """Find the correct table position to insert a row with the given dataframe index.
1218
+
1219
+ In the table display, rows are ordered by their dataframe index, regardless of
1220
+ the internal row keys. This method finds where a row should be inserted based on
1221
+ its dataframe index and the indices of already-loaded rows.
1222
+
1223
+ Args:
1224
+ ridx: The 0-based dataframe row index.
1225
+
1226
+ Returns:
1227
+ The 0-based table position where the row should be inserted.
1228
+ """
1229
+ # Count how many already-loaded rows have lower dataframe indices
1230
+ # Iterate through loaded rows instead of iterating 0..ridx for efficiency
1231
+ insert_pos = 0
1232
+ for row_key in self._row_locations:
1233
+ loaded_ridx = int(row_key.value)
1234
+ if loaded_ridx < ridx:
1235
+ insert_pos += 1
1128
1236
 
1129
- # Load rows asynchronously
1130
- self.load_rows_async(stop, move_to_end=move_to_end)
1237
+ return insert_pos
1131
1238
 
1132
- @work(exclusive=True, description="Loading rows...")
1133
- async def load_rows_async(self, stop: int, move_to_end: bool = False) -> None:
1134
- """Perform loading with async to avoid blocking.
1239
+ def load_rows_segment(self, segment_start: int, segment_stop: int) -> int:
1240
+ """Load a single contiguous segment of rows into the table.
1241
+
1242
+ This is the core loading logic that inserts rows at correct positions,
1243
+ respecting visibility and selection states. Used by load_rows_range()
1244
+ to handle each segment independently.
1135
1245
 
1136
1246
  Args:
1137
- stop: Stop loading rows when this index is reached.
1138
- move_to_end: If True, move cursor to the last loaded row after loading completes.
1247
+ segment_start: Start loading rows from this index (0-based).
1248
+ segment_stop: Stop loading rows when this index is reached (0-based, exclusive).
1139
1249
  """
1140
- # Load rows in smaller chunks to avoid blocking
1141
- if stop > self.loaded_rows:
1142
- self.log(f"Async loading up to row {self.loaded_rows = }, {stop = }")
1143
- # Load incrementally to avoid one big block
1144
- # Load max BATCH_SIZE rows at a time
1145
- chunk_size = min(self.BATCH_SIZE, stop - self.loaded_rows)
1146
- next_stop = min(self.loaded_rows + chunk_size, stop)
1147
- self.load_rows_batch(next_stop)
1148
-
1149
- # If there's more to load, yield to event loop with delay
1150
- if next_stop < stop:
1151
- await sleep_async(0.05) # 50ms delay to allow UI updates
1152
- self.load_rows_async(stop, move_to_end=move_to_end)
1153
- return
1250
+ # Record this range before loading
1251
+ self.loaded_ranges.append((segment_start, segment_stop))
1252
+
1253
+ # Load the dataframe slice
1254
+ df_slice = self.df.slice(segment_start, segment_stop - segment_start)
1255
+
1256
+ # Load each row at the correct position
1257
+ for (ridx, row), rid in zip(enumerate(df_slice.rows(), segment_start), df_slice[RID]):
1258
+ is_selected = rid in self.selected_rows
1259
+ match_cols = self.matches.get(rid, set())
1260
+
1261
+ vals, dtypes, styles = [], [], []
1262
+ for val, col, dtype in zip(row, self.df.columns, self.df.dtypes, strict=True):
1263
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1264
+ continue # Skip hidden columns and internal RID
1265
+
1266
+ vals.append(val)
1267
+ dtypes.append(dtype)
1268
+
1269
+ # Highlight entire row with selection or cells with matches
1270
+ styles.append(HIGHLIGHT_COLOR if is_selected or col in match_cols else None)
1271
+
1272
+ formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1273
+
1274
+ # Find correct insertion position and insert
1275
+ insert_pos = self._find_insert_position_for_row(ridx)
1276
+ self.insert_row(*formatted_row, key=str(ridx), label=str(ridx + 1), position=insert_pos)
1277
+
1278
+ # Number of rows loaded in this segment
1279
+ segment_count = len(df_slice)
1154
1280
 
1155
- # After loading completes, move cursor to end if requested
1156
- if move_to_end:
1157
- self.call_after_refresh(lambda: self.move_cursor(row=self.row_count - 1))
1281
+ # Update loaded rows count
1282
+ self.loaded_rows += segment_count
1158
1283
 
1159
- def load_rows_batch(self, stop: int) -> None:
1284
+ return segment_count
1285
+
1286
+ def load_rows_range(self, start: int, stop: int) -> int:
1160
1287
  """Load a batch of rows into the table.
1161
1288
 
1162
1289
  Row keys are 0-based indices as strings, which map directly to dataframe row indices.
1163
1290
  Row labels are 1-based indices as strings.
1164
1291
 
1292
+ Intelligently handles range loading:
1293
+ 1. Calculates which ranges actually need loading (avoiding reloading)
1294
+ 2. Handles complex cases where loaded ranges create "holes" (head and tail segments)
1295
+ 3. Inserts rows at correct positions in the table
1296
+ 4. Merges adjacent/overlapping ranges to optimize future loading
1297
+
1165
1298
  Args:
1166
- stop: Stop loading rows when this index is reached.
1299
+ start: Start loading rows from this index (0-based).
1300
+ stop: Stop loading rows when this index is reached (0-based, exclusive).
1167
1301
  """
1302
+ start = max(0, start) # Clamp to non-negative
1303
+ stop = min(stop, len(self.df)) # Clamp to dataframe length
1304
+
1168
1305
  try:
1169
- start = self.loaded_rows
1170
- df_slice = self.df.slice(start, stop - start)
1306
+ # Calculate actual ranges to load, accounting for already-loaded ranges
1307
+ ranges_to_load = self._calculate_load_range(start, stop)
1171
1308
 
1172
- for ridx, row in enumerate(df_slice.rows(), start):
1173
- if not self.visible_rows[ridx]:
1174
- continue # Skip hidden rows
1309
+ # If nothing needs loading, return early
1310
+ if not ranges_to_load:
1311
+ return 0 # Already loaded
1175
1312
 
1176
- is_selected = self.selected_rows[ridx]
1177
- match_cols = self.matches.get(ridx, set())
1313
+ # Track the number of loaded rows in this range
1314
+ range_count = 0
1178
1315
 
1179
- vals, dtypes, styles = [], [], []
1180
- for cidx, (val, col, dtype) in enumerate(zip(row, self.df.columns, self.df.dtypes)):
1181
- if col in self.hidden_columns:
1182
- continue # Skip hidden columns
1316
+ # Load each segment
1317
+ for segment_start, segment_stop in ranges_to_load:
1318
+ range_count += self.load_rows_segment(segment_start, segment_stop)
1183
1319
 
1184
- vals.append(val)
1185
- dtypes.append(dtype)
1320
+ # Merge adjacent/overlapping ranges to optimize storage
1321
+ self._merge_loaded_ranges()
1186
1322
 
1187
- # Highlight entire row with selection or cells with matches
1188
- styles.append(HIGHLIGHT_COLOR if is_selected or cidx in match_cols else None)
1323
+ self.log(f"Loaded {range_count} rows for range {start}-{stop}/{len(self.df)}")
1324
+ return range_count
1189
1325
 
1190
- formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1326
+ except Exception as e:
1327
+ self.notify("Error loading rows", title="Load", severity="error", timeout=10)
1328
+ self.log(f"Error loading rows: {str(e)}")
1329
+ return 0
1191
1330
 
1192
- # Always add labels so they can be shown/hidden via CSS
1193
- self.add_row(*formatted_row, key=str(ridx), label=str(ridx + 1))
1331
+ def load_rows_up(self) -> None:
1332
+ """Check if we need to load more rows and load them."""
1333
+ # If we've loaded everything, no need to check
1334
+ if self.loaded_rows >= len(self.df):
1335
+ return
1194
1336
 
1195
- # Update loaded rows count
1196
- self.loaded_rows = stop
1337
+ top_row_index = int(self.scroll_y) + BUFFER_SIZE
1338
+ top_row_key = self.get_row_key(top_row_index)
1197
1339
 
1198
- # self.notify(f"Loaded [$accent]{self.loaded_rows}/{len(self.df)}[/] rows from [$success]{self.name}[/]", title="Load")
1199
- self.log(f"Loaded {self.loaded_rows}/{len(self.df)} rows from `{self.filename or self.name}`")
1340
+ if top_row_key:
1341
+ top_ridx = int(top_row_key.value)
1342
+ else:
1343
+ top_ridx = 0 # No top row key at index, default to 0
1200
1344
 
1201
- except Exception as e:
1202
- self.notify("Error loading rows", title="Load", severity="error", timeout=10)
1203
- self.log(f"Error loading rows: {str(e)}")
1345
+ # Load upward
1346
+ start, stop = self._round_to_nearest_hundreds(top_ridx - BUFFER_SIZE * 2)
1347
+ range_count = self.load_rows_range(start, stop)
1348
+
1349
+ # Adjust scroll to maintain position if rows were loaded above
1350
+ if range_count > 0:
1351
+ self.move_cursor(row=top_row_index + range_count)
1352
+ self.log(f"Loaded up: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1204
1353
 
1205
- def check_and_load_more(self) -> None:
1354
+ def load_rows_down(self) -> None:
1206
1355
  """Check if we need to load more rows and load them."""
1207
1356
  # If we've loaded everything, no need to check
1208
1357
  if self.loaded_rows >= len(self.df):
1209
1358
  return
1210
1359
 
1211
- visible_row_count = self.size.height - self.header_height
1212
- bottom_visible_row = self.scroll_y + visible_row_count
1360
+ visible_row_count = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1361
+ bottom_row_index = self.scroll_y + visible_row_count - BUFFER_SIZE
1362
+
1363
+ bottom_row_key = self.get_row_key(bottom_row_index)
1364
+ if bottom_row_key:
1365
+ bottom_ridx = int(bottom_row_key.value)
1366
+ else:
1367
+ bottom_ridx = 0 # No bottom row key at index, default to 0
1368
+
1369
+ # Load downward
1370
+ start, stop = self._round_to_nearest_hundreds(bottom_ridx + BUFFER_SIZE * 2)
1371
+ range_count = self.load_rows_range(start, stop)
1372
+
1373
+ if range_count > 0:
1374
+ self.log(f"Loaded down: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1375
+
1376
+ def insert_row(
1377
+ self,
1378
+ *cells: CellType,
1379
+ height: int | None = 1,
1380
+ key: str | None = None,
1381
+ label: TextType | None = None,
1382
+ position: int | None = None,
1383
+ ) -> RowKey:
1384
+ """Insert a row at a specific position in the DataTable.
1385
+
1386
+ When inserting, all rows at and after the insertion position are shifted down,
1387
+ and their entries in self._row_locations are updated accordingly.
1388
+
1389
+ Args:
1390
+ *cells: Positional arguments should contain cell data.
1391
+ height: The height of a row (in lines). Use `None` to auto-detect the optimal
1392
+ height.
1393
+ key: A key which uniquely identifies this row. If None, it will be generated
1394
+ for you and returned.
1395
+ label: The label for the row. Will be displayed to the left if supplied.
1396
+ position: The 0-based row index where the new row should be inserted.
1397
+ If None, inserts at the end (same as add_row). If out of bounds,
1398
+ inserts at the nearest valid position.
1399
+
1400
+ Returns:
1401
+ Unique identifier for this row. Can be used to retrieve this row regardless
1402
+ of its current location in the DataTable (it could have moved after
1403
+ being added due to sorting or insertion/deletion of other rows).
1404
+
1405
+ Raises:
1406
+ DuplicateKey: If a row with the given key already exists.
1407
+ ValueError: If more cells are provided than there are columns.
1408
+ """
1409
+ # Default to appending if position not specified or >= row_count
1410
+ row_count = self.row_count
1411
+ if position is None or position >= row_count:
1412
+ return self.add_row(*cells, height=height, key=key, label=label)
1413
+
1414
+ # Clamp position to valid range [0, row_count)
1415
+ position = max(0, position)
1416
+
1417
+ row_key = RowKey(key)
1418
+ if row_key in self._row_locations:
1419
+ raise DuplicateKey(f"The row key {row_key!r} already exists.")
1420
+
1421
+ if len(cells) > len(self.ordered_columns):
1422
+ raise ValueError("More values provided than there are columns.")
1423
+
1424
+ # TC: Rebuild self._row_locations to shift rows at and after position down by 1
1425
+ # Create a mapping of old index -> new index
1426
+ old_to_new = {}
1427
+ for old_idx in range(row_count):
1428
+ if old_idx < position:
1429
+ old_to_new[old_idx] = old_idx # No change
1430
+ else:
1431
+ old_to_new[old_idx] = old_idx + 1 # Shift down by 1
1432
+
1433
+ # Update _row_locations with the new indices
1434
+ new_row_locations = TwoWayDict({})
1435
+ for row_key_item in self._row_locations:
1436
+ old_idx = self.get_row_idx(row_key_item)
1437
+ new_idx = old_to_new.get(old_idx, old_idx)
1438
+ new_row_locations[row_key_item] = new_idx
1439
+
1440
+ # Update the internal mapping
1441
+ self._row_locations = new_row_locations
1442
+ # TC
1443
+
1444
+ row_index = position
1445
+ # Map the key of this row to its current index
1446
+ self._row_locations[row_key] = row_index
1447
+ self._data[row_key] = {column.key: cell for column, cell in zip_longest(self.ordered_columns, cells)}
1448
+
1449
+ label = Text.from_markup(label, end="") if isinstance(label, str) else label
1450
+
1451
+ # Rows with auto-height get a height of 0 because 1) we need an integer height
1452
+ # to do some intermediate computations and 2) because 0 doesn't impact the data
1453
+ # table while we don't figure out how tall this row is.
1454
+ self.rows[row_key] = Row(
1455
+ row_key,
1456
+ height or 0,
1457
+ label,
1458
+ height is None,
1459
+ )
1460
+ self._new_rows.add(row_key)
1461
+ self._require_update_dimensions = True
1462
+ self.cursor_coordinate = self.cursor_coordinate
1463
+
1464
+ # If a position has opened for the cursor to appear, where it previously
1465
+ # could not (e.g. when there's no data in the table), then a highlighted
1466
+ # event is posted, since there's now a highlighted cell when there wasn't
1467
+ # before.
1468
+ cell_now_available = self.row_count == 1 and len(self.columns) > 0
1469
+ visible_cursor = self.show_cursor and self.cursor_type != "none"
1470
+ if cell_now_available and visible_cursor:
1471
+ self._highlight_cursor()
1472
+
1473
+ self._update_count += 1
1474
+ self.check_idle()
1475
+ return row_key
1476
+
1477
+ # Navigation
1478
+ def do_jump_top(self) -> None:
1479
+ """Jump to the top of the table."""
1480
+ self.move_cursor(row=0)
1481
+
1482
+ def do_jump_bottom(self) -> None:
1483
+ """Jump to the bottom of the table."""
1484
+ stop = len(self.df)
1485
+ start = max(0, stop - self.BATCH_SIZE)
1213
1486
 
1214
- # If visible area is close to the end of loaded rows, load more
1215
- if bottom_visible_row >= self.loaded_rows - 10:
1216
- self.load_rows(self.loaded_rows + self.BATCH_SIZE)
1487
+ if start % self.BATCH_SIZE != 0:
1488
+ start = (start // self.BATCH_SIZE + 1) * self.BATCH_SIZE
1489
+
1490
+ self.load_rows_range(start, stop)
1491
+ self.move_cursor(row=self.row_count - 1)
1492
+
1493
+ def do_page_up(self) -> None:
1494
+ """Move the cursor one page up."""
1495
+ self._set_hover_cursor(False)
1496
+ if self.show_cursor and self.cursor_type in ("cell", "row"):
1497
+ height = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1498
+
1499
+ col_idx = self.cursor_column
1500
+ ridx = self.cursor_row_idx
1501
+ next_ridx = max(0, ridx - height - BUFFER_SIZE)
1502
+ start, stop = self._round_to_nearest_hundreds(next_ridx)
1503
+ self.load_rows_range(start, stop)
1504
+
1505
+ self.move_cursor(row=self.get_row_idx(str(next_ridx)), column=col_idx)
1506
+ else:
1507
+ super().action_page_up()
1508
+
1509
+ def do_page_down(self) -> None:
1510
+ """Move the cursor one page down."""
1511
+ super().action_page_down()
1512
+ self.load_rows_down()
1217
1513
 
1218
1514
  # History & Undo
1219
1515
  def create_history(self, description: str) -> None:
@@ -1221,12 +1517,12 @@ class DataFrameTable(DataTable):
1221
1517
  return History(
1222
1518
  description=description,
1223
1519
  df=self.df,
1520
+ df_view=self.df_view,
1224
1521
  filename=self.filename,
1225
1522
  loaded_rows=self.loaded_rows,
1226
- sorted_columns=self.sorted_columns.copy(),
1227
1523
  hidden_columns=self.hidden_columns.copy(),
1228
1524
  selected_rows=self.selected_rows.copy(),
1229
- visible_rows=self.visible_rows.copy(),
1525
+ sorted_columns=self.sorted_columns.copy(),
1230
1526
  fixed_rows=self.fixed_rows,
1231
1527
  fixed_columns=self.fixed_columns,
1232
1528
  cursor_coordinate=self.cursor_coordinate,
@@ -1241,12 +1537,12 @@ class DataFrameTable(DataTable):
1241
1537
 
1242
1538
  # Restore state
1243
1539
  self.df = history.df
1540
+ self.df_view = history.df_view
1244
1541
  self.filename = history.filename
1245
1542
  self.loaded_rows = history.loaded_rows
1246
- self.sorted_columns = history.sorted_columns.copy()
1247
1543
  self.hidden_columns = history.hidden_columns.copy()
1248
1544
  self.selected_rows = history.selected_rows.copy()
1249
- self.visible_rows = history.visible_rows.copy()
1545
+ self.sorted_columns = history.sorted_columns.copy()
1250
1546
  self.fixed_rows = history.fixed_rows
1251
1547
  self.fixed_columns = history.fixed_columns
1252
1548
  self.cursor_coordinate = history.cursor_coordinate
@@ -1256,15 +1552,18 @@ class DataFrameTable(DataTable):
1256
1552
  # Recreate table for display
1257
1553
  self.setup_table()
1258
1554
 
1259
- def add_history(self, description: str, dirty: bool = False) -> None:
1555
+ def add_history(self, description: str, dirty: bool = False, clear_redo: bool = True) -> None:
1260
1556
  """Add the current state to the history stack.
1261
1557
 
1262
1558
  Args:
1263
1559
  description: Description of the action for this history entry.
1264
1560
  dirty: Whether this operation modifies the data (True) or just display state (False).
1265
1561
  """
1266
- history = self.create_history(description)
1267
- self.histories.append(history)
1562
+ self.histories_undo.append(self.create_history(description))
1563
+
1564
+ # Clear redo stack when a new action is performed
1565
+ if clear_redo:
1566
+ self.histories_redo.clear()
1268
1567
 
1269
1568
  # Mark table as dirty if this operation modifies data
1270
1569
  if dirty:
@@ -1272,52 +1571,43 @@ class DataFrameTable(DataTable):
1272
1571
 
1273
1572
  def do_undo(self) -> None:
1274
1573
  """Undo the last action."""
1275
- if not self.histories:
1574
+ if not self.histories_undo:
1276
1575
  self.notify("No actions to undo", title="Undo", severity="warning")
1277
1576
  return
1278
1577
 
1279
- # Pop the last history state for undo
1280
- history = self.histories.pop()
1281
-
1282
- # Save current state for redo
1283
- self.history = self.create_history(history.description)
1578
+ # Pop the last history state for undo and save to redo stack
1579
+ history = self.histories_undo.pop()
1580
+ self.histories_redo.append(self.create_history(history.description))
1284
1581
 
1285
1582
  # Restore state
1286
1583
  self.apply_history(history)
1287
1584
 
1288
- self.notify(f"Reverted: [$success]{history.description}[/]", title="Undo")
1585
+ self.notify(f"Reverted: {history.description}", title="Undo")
1289
1586
 
1290
1587
  def do_redo(self) -> None:
1291
1588
  """Redo the last undone action."""
1292
- if self.history is None:
1589
+ if not self.histories_redo:
1293
1590
  self.notify("No actions to redo", title="Redo", severity="warning")
1294
1591
  return
1295
1592
 
1296
- description = self.history.description
1593
+ # Pop the last undone state from redo stack
1594
+ history = self.histories_redo.pop()
1595
+ description = history.description
1297
1596
 
1298
1597
  # Save current state for undo
1299
- self.add_history(description)
1598
+ self.add_history(description, clear_redo=False)
1300
1599
 
1301
1600
  # Restore state
1302
- self.apply_history(self.history)
1303
-
1304
- # Clear redo state
1305
- self.history = None
1601
+ self.apply_history(history)
1306
1602
 
1307
- self.notify(f"Reapplied: [$success]{description}[/]", title="Redo")
1603
+ self.notify(f"Reapplied: {description}", title="Redo")
1308
1604
 
1309
1605
  def do_reset(self) -> None:
1310
1606
  """Reset the table to the initial state."""
1311
- self.setup_table(reset=True)
1607
+ self.reset_df(self.dataframe, dirty=False)
1608
+ self.setup_table()
1312
1609
  self.notify("Restored initial state", title="Reset")
1313
1610
 
1314
- def restore_dirty(self, default: bool | None = None) -> None:
1315
- """Restore the dirty state from the last history entry."""
1316
- if self.last_history:
1317
- self.dirty = self.last_history.dirty
1318
- elif default is not None:
1319
- self.dirty = default
1320
-
1321
1611
  # Display
1322
1612
  def do_cycle_cursor_type(self) -> None:
1323
1613
  """Cycle through cursor types: cell -> row -> column -> cell."""
@@ -1354,6 +1644,14 @@ class DataFrameTable(DataTable):
1354
1644
  cidx = self.cursor_col_idx
1355
1645
  self.app.push_screen(StatisticsScreen(self, col_idx=cidx))
1356
1646
 
1647
+ def do_metadata_shape(self) -> None:
1648
+ """Show metadata about the dataframe (row and column counts)."""
1649
+ self.app.push_screen(MetaShape(self))
1650
+
1651
+ def do_metadata_column(self) -> None:
1652
+ """Show metadata for all columns in the dataframe."""
1653
+ self.app.push_screen(MetaColumnScreen(self))
1654
+
1357
1655
  def do_freeze_row_column(self) -> None:
1358
1656
  """Open the freeze screen to set fixed rows and columns."""
1359
1657
  self.app.push_screen(FreezeScreen(), callback=self.freeze_row_column)
@@ -1416,14 +1714,20 @@ class DataFrameTable(DataTable):
1416
1714
  max_width = len(col_name) + 2 # Start with column name width + padding
1417
1715
 
1418
1716
  try:
1717
+ need_expand = False
1718
+
1419
1719
  # Scan through all loaded rows that are visible to find max width
1420
1720
  for row_idx in range(self.loaded_rows):
1421
- if not self.visible_rows[row_idx]:
1422
- continue # Skip hidden rows
1423
1721
  cell_value = str(self.df.item(row_idx, col_idx))
1424
1722
  cell_width = measure(self.app.console, cell_value, 1)
1723
+
1724
+ if cell_width > max_width:
1725
+ need_expand = True
1425
1726
  max_width = max(max_width, cell_width)
1426
1727
 
1728
+ if not need_expand:
1729
+ return
1730
+
1427
1731
  # Update the column width
1428
1732
  col = self.columns[col_key]
1429
1733
  col.width = max_width
@@ -1440,32 +1744,34 @@ class DataFrameTable(DataTable):
1440
1744
  )
1441
1745
  self.log(f"Error expanding column `{col_name}`: {str(e)}")
1442
1746
 
1443
- def do_show_hidden_rows_columns(self) -> None:
1444
- """Show all hidden rows/columns by recreating the table."""
1445
- # Get currently visible columns
1446
- visible_cols = set(col.key for col in self.ordered_columns)
1747
+ def do_toggle_rid(self) -> None:
1748
+ """Toggle display of the internal RID column."""
1749
+ self.show_rid = not self.show_rid
1447
1750
 
1448
- hidden_row_count = sum(0 if visible else 1 for visible in self.visible_rows)
1449
- hidden_col_count = sum(0 if col in visible_cols else 1 for col in self.df.columns)
1751
+ # Recreate table for display
1752
+ self.setup_table()
1450
1753
 
1451
- if not hidden_row_count and not hidden_col_count:
1452
- self.notify("No hidden columns or rows to show", title="Show", severity="warning")
1754
+ def do_show_hidden_rows_columns(self) -> None:
1755
+ """Show all hidden rows/columns by recreating the table."""
1756
+ if not self.hidden_columns and self.df_view is None:
1757
+ self.notify("No hidden rows or columns to show", title="Show", severity="warning")
1453
1758
  return
1454
1759
 
1455
1760
  # Add to history
1456
1761
  self.add_history("Showed hidden rows/columns")
1457
1762
 
1763
+ # If in a filtered view, restore the full dataframe
1764
+ if self.df_view is not None:
1765
+ self.df = self.df_view
1766
+ self.df_view = None
1767
+
1458
1768
  # Clear hidden rows/columns tracking
1459
- self.visible_rows = [True] * len(self.df)
1460
1769
  self.hidden_columns.clear()
1461
1770
 
1462
1771
  # Recreate table for display
1463
1772
  self.setup_table()
1464
1773
 
1465
- self.notify(
1466
- f"Showed [$success]{hidden_row_count}[/] hidden row(s) and/or [$accent]{hidden_col_count}[/] column(s)",
1467
- title="Show",
1468
- )
1774
+ self.notify("Showed hidden row(s) and/or hidden column(s)", title="Show")
1469
1775
 
1470
1776
  # Sort
1471
1777
  def do_sort_by_column(self, descending: bool = False) -> None:
@@ -1486,32 +1792,40 @@ class DataFrameTable(DataTable):
1486
1792
 
1487
1793
  # Add to history
1488
1794
  self.add_history(f"Sorted on column [$success]{col_name}[/]", dirty=True)
1795
+
1796
+ # New column - add to sort
1489
1797
  if old_desc is None:
1490
- # Add new column to sort
1491
1798
  self.sorted_columns[col_name] = descending
1799
+
1800
+ # Old column, same direction - remove from sort
1492
1801
  elif old_desc == descending:
1493
- # Same direction - remove from sort
1494
1802
  del self.sorted_columns[col_name]
1803
+
1804
+ # Old column, different direction - add to sort at end
1495
1805
  else:
1496
- # Move to end of sort order
1497
1806
  del self.sorted_columns[col_name]
1498
1807
  self.sorted_columns[col_name] = descending
1499
1808
 
1809
+ lf = self.df.lazy()
1810
+ sort_by = {}
1811
+
1500
1812
  # Apply multi-column sort
1501
1813
  if sort_cols := list(self.sorted_columns.keys()):
1502
1814
  descending_flags = list(self.sorted_columns.values())
1503
- df_sorted = self.df.with_row_index(RIDX).sort(sort_cols, descending=descending_flags, nulls_last=True)
1815
+ sort_by = {"by": sort_cols, "descending": descending_flags, "nulls_last": True}
1504
1816
  else:
1505
- # No sort columns - restore original order
1506
- df_sorted = self.df.with_row_index(RIDX)
1817
+ # No sort - restore original order by adding a temporary index column
1818
+ sort_by = {"by": RID}
1507
1819
 
1508
- # Updated selected_rows and visible_rows to match new order
1509
- old_row_indices = df_sorted[RIDX].to_list()
1510
- self.selected_rows = [self.selected_rows[i] for i in old_row_indices]
1511
- self.visible_rows = [self.visible_rows[i] for i in old_row_indices]
1820
+ # Perform the sort
1821
+ df_sorted = lf.sort(**sort_by).collect()
1822
+
1823
+ # Also update df_view if applicable
1824
+ if self.df_view is not None:
1825
+ self.df_view = self.df_view.lazy().sort(**sort_by).collect()
1512
1826
 
1513
1827
  # Update the dataframe
1514
- self.df = df_sorted.drop(RIDX)
1828
+ self.df = df_sorted
1515
1829
 
1516
1830
  # Recreate table for display
1517
1831
  self.setup_table()
@@ -1558,6 +1872,17 @@ class DataFrameTable(DataTable):
1558
1872
  .alias(col_name)
1559
1873
  )
1560
1874
 
1875
+ # Also update the view if applicable
1876
+ if self.df_view is not None:
1877
+ # Get the RID value for this row in df_view
1878
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
1879
+ self.df_view = self.df_view.with_columns(
1880
+ pl.when(pl.col(RID) == ridx_view)
1881
+ .then(pl.lit(new_value))
1882
+ .otherwise(pl.col(col_name))
1883
+ .alias(col_name)
1884
+ )
1885
+
1561
1886
  # Update the display
1562
1887
  cell_value = self.df.item(ridx, cidx)
1563
1888
  if cell_value is None:
@@ -1633,11 +1958,26 @@ class DataFrameTable(DataTable):
1633
1958
 
1634
1959
  try:
1635
1960
  # Apply the expression to the column
1636
- self.df = self.df.with_columns(expr.alias(col_name))
1961
+ self.df = self.df.lazy().with_columns(expr.alias(col_name)).collect()
1962
+
1963
+ # Also update the view if applicable
1964
+ # Update the value of col_name in df_view using the value of col_name from df based on RID mapping between them
1965
+ if self.df_view is not None:
1966
+ # Get updated column from df for rows that exist in df_view
1967
+ col_updated = f"^_{col_name}_^"
1968
+ lf_updated = self.df.lazy().select(RID, pl.col(col_name).alias(col_updated))
1969
+ # Join and use coalesce to prefer updated value or keep original
1970
+ self.df_view = (
1971
+ self.df_view.lazy()
1972
+ .join(lf_updated, on=RID, how="left")
1973
+ .with_columns(pl.coalesce(pl.col(col_updated), pl.col(col_name)).alias(col_name))
1974
+ .drop(col_updated)
1975
+ .collect()
1976
+ )
1637
1977
  except Exception as e:
1638
1978
  self.notify(
1639
1979
  f"Error applying expression: [$error]{term}[/] to column [$accent]{col_name}[/]",
1640
- title="Edit",
1980
+ title="Edit Column",
1641
1981
  severity="error",
1642
1982
  timeout=10,
1643
1983
  )
@@ -1649,10 +1989,10 @@ class DataFrameTable(DataTable):
1649
1989
 
1650
1990
  # self.notify(f"Column [$accent]{col_name}[/] updated with [$success]{expr}[/]", title="Edit Column")
1651
1991
 
1652
- def do_rename_column(self) -> None:
1992
+ def do_rename_column(self, col_idx: int | None) -> None:
1653
1993
  """Open modal to rename the selected column."""
1654
- col_name = self.cursor_col_name
1655
- col_idx = self.cursor_column
1994
+ col_idx = self.cursor_column if col_idx is None else col_idx
1995
+ col_name = self.get_col_key(col_idx).value
1656
1996
 
1657
1997
  # Push the rename column modal screen
1658
1998
  self.app.push_screen(
@@ -1679,14 +2019,25 @@ class DataFrameTable(DataTable):
1679
2019
  # Rename the column in the dataframe
1680
2020
  self.df = self.df.rename({col_name: new_name})
1681
2021
 
1682
- # Update sorted_columns if this column was sorted
2022
+ # Also update the view if applicable
2023
+ if self.df_view is not None:
2024
+ self.df_view = self.df_view.rename({col_name: new_name})
2025
+
2026
+ # Update sorted_columns if this column was sorted and maintain order
1683
2027
  if col_name in self.sorted_columns:
1684
- self.sorted_columns[new_name] = self.sorted_columns.pop(col_name)
2028
+ sorted_columns = {}
2029
+ for col, order in self.sorted_columns.items():
2030
+ if col == col_name:
2031
+ sorted_columns[new_name] = order
2032
+ else:
2033
+ sorted_columns[col] = order
2034
+ self.sorted_columns = sorted_columns
1685
2035
 
1686
- # Update hidden_columns if this column was hidden
1687
- if col_name in self.hidden_columns:
1688
- self.hidden_columns.remove(col_name)
1689
- self.hidden_columns.add(new_name)
2036
+ # Update matches if this column had cell matches
2037
+ for cols in self.matches.values():
2038
+ if col_name in cols:
2039
+ cols.remove(col_name)
2040
+ cols.add(new_name)
1690
2041
 
1691
2042
  # Recreate table for display
1692
2043
  self.setup_table()
@@ -1715,6 +2066,13 @@ class DataFrameTable(DataTable):
1715
2066
  .alias(col_name)
1716
2067
  )
1717
2068
 
2069
+ # Also update the view if applicable
2070
+ if self.df_view is not None:
2071
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
2072
+ self.df_view = self.df_view.with_columns(
2073
+ pl.when(pl.col(RID) == ridx_view).then(pl.lit(None)).otherwise(pl.col(col_name)).alias(col_name)
2074
+ )
2075
+
1718
2076
  # Update the display
1719
2077
  dtype = self.df.dtypes[cidx]
1720
2078
  dc = DtypeConfig(dtype)
@@ -1733,30 +2091,27 @@ class DataFrameTable(DataTable):
1733
2091
  self.log(f"Error clearing cell ({ridx}, {col_name}): {str(e)}")
1734
2092
  raise e
1735
2093
 
1736
- def do_add_column(self, col_name: str = None, col_value: pl.Expr = None) -> None:
2094
+ def do_add_column(self, col_name: str = None) -> None:
1737
2095
  """Add acolumn after the current column."""
1738
2096
  cidx = self.cursor_col_idx
1739
2097
 
1740
2098
  if not col_name:
1741
2099
  # Generate a unique column name
1742
2100
  base_name = "new_col"
1743
- new_name = base_name
2101
+ new_col_name = base_name
1744
2102
  counter = 1
1745
- while new_name in self.df.columns:
1746
- new_name = f"{base_name}_{counter}"
2103
+ while new_col_name in self.df.columns:
2104
+ new_col_name = f"{base_name}_{counter}"
1747
2105
  counter += 1
1748
2106
  else:
1749
- new_name = col_name
2107
+ new_col_name = col_name
1750
2108
 
1751
2109
  # Add to history
1752
- self.add_history(f"Added column [$success]{new_name}[/] after column [$accent]{cidx + 1}[/]", dirty=True)
2110
+ self.add_history(f"Added column [$success]{new_col_name}[/] after column [$accent]{cidx + 1}[/]", dirty=True)
1753
2111
 
1754
2112
  try:
1755
2113
  # Create an empty column (all None values)
1756
- if isinstance(col_value, pl.Expr):
1757
- new_col = col_value.alias(new_name)
1758
- else:
1759
- new_col = pl.lit(col_value).alias(new_name)
2114
+ new_col_name = pl.lit(None).alias(new_col_name)
1760
2115
 
1761
2116
  # Get columns up to current, the new column, then remaining columns
1762
2117
  cols = self.df.columns
@@ -1764,8 +2119,12 @@ class DataFrameTable(DataTable):
1764
2119
  cols_after = cols[cidx + 1 :]
1765
2120
 
1766
2121
  # Build the new dataframe with columns reordered
1767
- select_cols = cols_before + [new_name] + cols_after
1768
- self.df = self.df.with_columns(new_col).select(select_cols)
2122
+ select_cols = cols_before + [new_col_name] + cols_after
2123
+ self.df = self.df.lazy().with_columns(new_col_name).select(select_cols).collect()
2124
+
2125
+ # Also update the view if applicable
2126
+ if self.df_view is not None:
2127
+ self.df_view = self.df_view.lazy().with_columns(new_col_name).select(select_cols).collect()
1769
2128
 
1770
2129
  # Recreate table for display
1771
2130
  self.setup_table()
@@ -1775,8 +2134,10 @@ class DataFrameTable(DataTable):
1775
2134
 
1776
2135
  # self.notify(f"Added column [$success]{new_name}[/]", title="Add Column")
1777
2136
  except Exception as e:
1778
- self.notify(f"Error adding column [$error]{new_name}[/]", title="Add Column", severity="error", timeout=10)
1779
- self.log(f"Error adding column `{new_name}`: {str(e)}")
2137
+ self.notify(
2138
+ f"Error adding column [$error]{new_col_name}[/]", title="Add Column", severity="error", timeout=10
2139
+ )
2140
+ self.log(f"Error adding column `{new_col_name}`: {str(e)}")
1780
2141
  raise e
1781
2142
 
1782
2143
  def do_add_column_expr(self) -> None:
@@ -1808,7 +2169,14 @@ class DataFrameTable(DataTable):
1808
2169
 
1809
2170
  # Build the new dataframe with columns reordered
1810
2171
  select_cols = cols_before + [new_col_name] + cols_after
1811
- self.df = self.df.with_row_index(RIDX).with_columns(new_col).select(select_cols)
2172
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2173
+
2174
+ # Also update the view if applicable
2175
+ if self.df_view is not None:
2176
+ # Get updated column from df for rows that exist in df_view
2177
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2178
+ # Join and use coalesce to prefer updated value or keep original
2179
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
1812
2180
 
1813
2181
  # Recreate table for display
1814
2182
  self.setup_table()
@@ -1832,10 +2200,10 @@ class DataFrameTable(DataTable):
1832
2200
  def add_link_column(self, result: tuple[str, str] | None) -> None:
1833
2201
  """Handle result from AddLinkScreen.
1834
2202
 
1835
- Creates a new link column in the dataframe with clickable links based on a
1836
- user-provided template. Supports multiple placeholder types:
2203
+ Creates a new link column in the dataframe based on a user-provided template.
2204
+ Supports multiple placeholder types:
1837
2205
  - `$_` - Current column (based on cursor position)
1838
- - `$1`, `$2`, etc. - Column by 1-based position index
2206
+ - `$1`, `$2`, etc. - Column by index (1-based)
1839
2207
  - `$name` - Column by name (e.g., `$id`, `$product_name`)
1840
2208
 
1841
2209
  The template is evaluated for each row using Polars expressions with vectorized
@@ -1874,7 +2242,14 @@ class DataFrameTable(DataTable):
1874
2242
 
1875
2243
  # Build the new dataframe with columns reordered
1876
2244
  select_cols = cols_before + [new_col_name] + cols_after
1877
- self.df = self.df.with_columns(new_col).select(select_cols)
2245
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2246
+
2247
+ # Also update the view if applicable
2248
+ if self.df_view is not None:
2249
+ # Get updated column from df for rows that exist in df_view
2250
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2251
+ # Join and use coalesce to prefer updated value or keep original
2252
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
1878
2253
 
1879
2254
  # Recreate table for display
1880
2255
  self.setup_table()
@@ -1903,7 +2278,7 @@ class DataFrameTable(DataTable):
1903
2278
  # Remove all columns before the current column
1904
2279
  if more == "before":
1905
2280
  for i in range(col_idx + 1):
1906
- col_key = self.get_column_key(i)
2281
+ col_key = self.get_col_key(i)
1907
2282
  col_names_to_remove.append(col_key.value)
1908
2283
  col_keys_to_remove.append(col_key)
1909
2284
 
@@ -1912,7 +2287,7 @@ class DataFrameTable(DataTable):
1912
2287
  # Remove all columns after the current column
1913
2288
  elif more == "after":
1914
2289
  for i in range(col_idx, len(self.columns)):
1915
- col_key = self.get_column_key(i)
2290
+ col_key = self.get_col_key(i)
1916
2291
  col_names_to_remove.append(col_key.value)
1917
2292
  col_keys_to_remove.append(col_key)
1918
2293
 
@@ -1941,17 +2316,24 @@ class DataFrameTable(DataTable):
1941
2316
  if col_name in self.sorted_columns:
1942
2317
  del self.sorted_columns[col_name]
1943
2318
 
2319
+ # Remove from hidden columns if present
2320
+ for col_name in col_names_to_remove:
2321
+ self.hidden_columns.discard(col_name)
2322
+
1944
2323
  # Remove from matches
1945
- col_indices_to_remove = set(self.df.columns.index(name) for name in col_names_to_remove)
1946
- for row_idx in list(self.matches.keys()):
1947
- self.matches[row_idx].difference_update(col_indices_to_remove)
2324
+ for rid in list(self.matches.keys()):
2325
+ self.matches[rid].difference_update(col_names_to_remove)
1948
2326
  # Remove empty entries
1949
- if not self.matches[row_idx]:
1950
- del self.matches[row_idx]
2327
+ if not self.matches[rid]:
2328
+ del self.matches[rid]
1951
2329
 
1952
2330
  # Remove from dataframe
1953
2331
  self.df = self.df.drop(col_names_to_remove)
1954
2332
 
2333
+ # Also update the view if applicable
2334
+ if self.df_view is not None:
2335
+ self.df_view = self.df_view.drop(col_names_to_remove)
2336
+
1955
2337
  self.notify(message, title="Delete")
1956
2338
 
1957
2339
  def do_duplicate_column(self) -> None:
@@ -1962,29 +2344,28 @@ class DataFrameTable(DataTable):
1962
2344
  col_idx = self.cursor_column
1963
2345
  new_col_name = f"{col_name}_copy"
1964
2346
 
2347
+ # Ensure new column name is unique
2348
+ counter = 1
2349
+ while new_col_name in self.df.columns:
2350
+ new_col_name = f"{new_col_name}{counter}"
2351
+ counter += 1
2352
+
1965
2353
  # Add to history
1966
2354
  self.add_history(f"Duplicated column [$success]{col_name}[/]", dirty=True)
1967
2355
 
1968
2356
  # Create new column and reorder columns to insert after current column
1969
2357
  cols_before = self.df.columns[: cidx + 1]
1970
2358
  cols_after = self.df.columns[cidx + 1 :]
2359
+ cols_new = cols_before + [new_col_name] + cols_after
1971
2360
 
1972
2361
  # Add the new column and reorder columns for insertion after current column
1973
- self.df = self.df.with_columns(pl.col(col_name).alias(new_col_name)).select(
1974
- list(cols_before) + [new_col_name] + list(cols_after)
1975
- )
2362
+ self.df = self.df.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
1976
2363
 
1977
- # Update matches to account for new column
1978
- new_matches = defaultdict(set)
1979
- for row_idx, cols in self.matches.items():
1980
- new_cols = set()
1981
- for col_idx_in_set in cols:
1982
- if col_idx_in_set <= cidx:
1983
- new_cols.add(col_idx_in_set)
1984
- else:
1985
- new_cols.add(col_idx_in_set + 1)
1986
- new_matches[row_idx] = new_cols
1987
- self.matches = new_matches
2364
+ # Also update the view if applicable
2365
+ if self.df_view is not None:
2366
+ self.df_view = (
2367
+ self.df_view.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
2368
+ )
1988
2369
 
1989
2370
  # Recreate table for display
1990
2371
  self.setup_table()
@@ -2000,58 +2381,61 @@ class DataFrameTable(DataTable):
2000
2381
  Supports deleting multiple selected rows. If no rows are selected, deletes the row at the cursor.
2001
2382
  """
2002
2383
  old_count = len(self.df)
2003
- predicates = [True] * len(self.df)
2384
+ rids_to_delete = set()
2004
2385
 
2005
2386
  # Delete all selected rows
2006
- if selected_count := self.selected_rows.count(True):
2387
+ if selected_count := len(self.selected_rows):
2007
2388
  history_desc = f"Deleted {selected_count} selected row(s)"
2008
-
2009
- for ridx, selected in enumerate(self.selected_rows):
2010
- if selected:
2011
- predicates[ridx] = False
2389
+ rids_to_delete = self.selected_rows
2012
2390
 
2013
2391
  # Delete current row and those above
2014
2392
  elif more == "above":
2015
2393
  ridx = self.cursor_row_idx
2016
2394
  history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those above"
2017
- for i in range(ridx + 1):
2018
- predicates[i] = False
2395
+ for rid in self.df[RID][: ridx + 1]:
2396
+ rids_to_delete.add(rid)
2019
2397
 
2020
2398
  # Delete current row and those below
2021
2399
  elif more == "below":
2022
2400
  ridx = self.cursor_row_idx
2023
2401
  history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those below"
2024
- for i in range(ridx, len(self.df)):
2025
- if self.visible_rows[i]:
2026
- predicates[i] = False
2402
+ for rid in self.df[RID][ridx:]:
2403
+ rids_to_delete.add(rid)
2027
2404
 
2028
2405
  # Delete the row at the cursor
2029
2406
  else:
2030
2407
  ridx = self.cursor_row_idx
2031
2408
  history_desc = f"Deleted row [$success]{ridx + 1}[/]"
2032
- if self.visible_rows[ridx]:
2033
- predicates[ridx] = False
2409
+ rids_to_delete.add(self.df[RID][ridx])
2034
2410
 
2035
2411
  # Add to history
2036
2412
  self.add_history(history_desc, dirty=True)
2037
2413
 
2038
2414
  # Apply the filter to remove rows
2039
2415
  try:
2040
- df = self.df.with_row_index(RIDX).filter(predicates)
2416
+ df_filtered = self.df.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2041
2417
  except Exception as e:
2042
2418
  self.notify(f"Error deleting row(s): {e}", title="Delete", severity="error", timeout=10)
2043
- self.histories.pop() # Remove last history entry
2419
+ self.histories_undo.pop() # Remove last history entry
2044
2420
  return
2045
2421
 
2046
- self.df = df.drop(RIDX)
2422
+ # RIDs of remaining rows
2423
+ ok_rids = set(df_filtered[RID])
2047
2424
 
2048
- # Update selected and visible rows tracking
2049
- old_row_indices = set(df[RIDX].to_list())
2050
- self.selected_rows = [selected for i, selected in enumerate(self.selected_rows) if i in old_row_indices]
2051
- self.visible_rows = [visible for i, visible in enumerate(self.visible_rows) if i in old_row_indices]
2425
+ # Update selected rows tracking
2426
+ if self.selected_rows:
2427
+ self.selected_rows.intersection_update(ok_rids)
2052
2428
 
2053
- # Clear all matches since row indices have changed
2054
- self.matches = defaultdict(set)
2429
+ # Update the dataframe
2430
+ self.df = df_filtered
2431
+
2432
+ # Update matches since row indices have changed
2433
+ if self.matches:
2434
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
2435
+
2436
+ # Also update the view if applicable
2437
+ if self.df_view is not None:
2438
+ self.df_view = self.df_view.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2055
2439
 
2056
2440
  # Recreate table for display
2057
2441
  self.setup_table()
@@ -2063,34 +2447,29 @@ class DataFrameTable(DataTable):
2063
2447
  def do_duplicate_row(self) -> None:
2064
2448
  """Duplicate the currently selected row, inserting it right after the current row."""
2065
2449
  ridx = self.cursor_row_idx
2450
+ rid = self.df[RID][ridx]
2451
+
2452
+ lf = self.df.lazy()
2066
2453
 
2067
2454
  # Get the row to duplicate
2068
- row_to_duplicate = self.df.slice(ridx, 1)
2455
+ row_to_duplicate = lf.slice(ridx, 1).with_columns(pl.col(RID) + 1)
2069
2456
 
2070
2457
  # Add to history
2071
2458
  self.add_history(f"Duplicated row [$success]{ridx + 1}[/]", dirty=True)
2072
2459
 
2073
2460
  # Concatenate: rows before + duplicated row + rows after
2074
- df_before = self.df.slice(0, ridx + 1)
2075
- df_after = self.df.slice(ridx + 1)
2461
+ lf_before = lf.slice(0, ridx + 1)
2462
+ lf_after = lf.slice(ridx + 1).with_columns(pl.col(RID) + 1)
2076
2463
 
2077
2464
  # Combine the parts
2078
- self.df = pl.concat([df_before, row_to_duplicate, df_after])
2079
-
2080
- # Update selected and visible rows tracking to account for new row
2081
- new_selected_rows = self.selected_rows[: ridx + 1] + [self.selected_rows[ridx]] + self.selected_rows[ridx + 1 :]
2082
- new_visible_rows = self.visible_rows[: ridx + 1] + [self.visible_rows[ridx]] + self.visible_rows[ridx + 1 :]
2083
- self.selected_rows = new_selected_rows
2084
- self.visible_rows = new_visible_rows
2085
-
2086
- # Update matches to account for new row
2087
- new_matches = defaultdict(set)
2088
- for row_idx, cols in self.matches.items():
2089
- if row_idx <= ridx:
2090
- new_matches[row_idx] = cols
2091
- else:
2092
- new_matches[row_idx + 1] = cols
2093
- self.matches = new_matches
2465
+ self.df = pl.concat([lf_before, row_to_duplicate, lf_after]).collect()
2466
+
2467
+ # Also update the view if applicable
2468
+ if self.df_view is not None:
2469
+ lf_view = self.df_view.lazy()
2470
+ lf_view_before = lf_view.slice(0, rid + 1)
2471
+ lf_view_after = lf_view.slice(rid + 1).with_columns(pl.col(RID) + 1)
2472
+ self.df_view = pl.concat([lf_view_before, row_to_duplicate, lf_view_after]).collect()
2094
2473
 
2095
2474
  # Recreate table for display
2096
2475
  self.setup_table()
@@ -2156,6 +2535,10 @@ class DataFrameTable(DataTable):
2156
2535
  cols[cidx], cols[swap_cidx] = cols[swap_cidx], cols[cidx]
2157
2536
  self.df = self.df.select(cols)
2158
2537
 
2538
+ # Also update the view if applicable
2539
+ if self.df_view is not None:
2540
+ self.df_view = self.df_view.select(cols)
2541
+
2159
2542
  # self.notify(f"Moved column [$success]{col_name}[/] {direction}", title="Move")
2160
2543
 
2161
2544
  def do_move_row(self, direction: str) -> None:
@@ -2164,65 +2547,88 @@ class DataFrameTable(DataTable):
2164
2547
  Args:
2165
2548
  direction: "up" to move up, "down" to move down.
2166
2549
  """
2167
- row_idx, col_idx = self.cursor_coordinate
2550
+ curr_row_idx, col_idx = self.cursor_coordinate
2168
2551
 
2169
2552
  # Validate move is possible
2170
2553
  if direction == "up":
2171
- if row_idx <= 0:
2554
+ if curr_row_idx <= 0:
2172
2555
  self.notify("Cannot move row up", title="Move", severity="warning")
2173
2556
  return
2174
- swap_idx = row_idx - 1
2557
+ swap_row_idx = curr_row_idx - 1
2175
2558
  elif direction == "down":
2176
- if row_idx >= len(self.rows) - 1:
2559
+ if curr_row_idx >= len(self.rows) - 1:
2177
2560
  self.notify("Cannot move row down", title="Move", severity="warning")
2178
2561
  return
2179
- swap_idx = row_idx + 1
2562
+ swap_row_idx = curr_row_idx + 1
2180
2563
  else:
2181
2564
  # Invalid direction
2182
2565
  return
2183
2566
 
2184
- row_key = self.coordinate_to_cell_key((row_idx, 0)).row_key
2185
- swap_key = self.coordinate_to_cell_key((swap_idx, 0)).row_key
2186
-
2187
2567
  # Add to history
2188
2568
  self.add_history(
2189
- f"Moved row [$success]{row_key.value}[/] [$accent]{direction}[/] (swapped with row [$success]{swap_key.value}[/])",
2569
+ f"Moved row [$success]{curr_row_idx}[/] [$accent]{direction}[/] (swapped with row [$success]{swap_row_idx}[/])",
2190
2570
  dirty=True,
2191
2571
  )
2192
2572
 
2193
2573
  # Swap rows in the table's internal row locations
2574
+ curr_key = self.coordinate_to_cell_key((curr_row_idx, 0)).row_key
2575
+ swap_key = self.coordinate_to_cell_key((swap_row_idx, 0)).row_key
2576
+
2194
2577
  self.check_idle()
2195
2578
 
2196
2579
  (
2197
- self._row_locations[row_key],
2580
+ self._row_locations[curr_key],
2198
2581
  self._row_locations[swap_key],
2199
2582
  ) = (
2200
- self._row_locations.get(swap_key),
2201
- self._row_locations.get(row_key),
2583
+ self.get_row_idx(swap_key),
2584
+ self.get_row_idx(curr_key),
2202
2585
  )
2203
2586
 
2204
2587
  self._update_count += 1
2205
2588
  self.refresh()
2206
2589
 
2207
2590
  # Restore cursor position on the moved row
2208
- self.move_cursor(row=swap_idx, column=col_idx)
2591
+ self.move_cursor(row=swap_row_idx, column=col_idx)
2209
2592
 
2210
- # Swap rows in the dataframe
2211
- ridx = int(row_key.value) # 0-based
2212
- swap_ridx = int(swap_key.value) # 0-based
2213
- first, second = sorted([ridx, swap_ridx])
2593
+ # Locate the rows to swap
2594
+ curr_ridx = curr_row_idx
2595
+ swap_ridx = swap_row_idx
2596
+ first, second = sorted([curr_ridx, swap_ridx])
2214
2597
 
2598
+ # Swap the rows in the dataframe
2215
2599
  self.df = pl.concat(
2216
2600
  [
2217
- self.df.slice(0, first),
2218
- self.df.slice(second, 1),
2219
- self.df.slice(first + 1, second - first - 1),
2220
- self.df.slice(first, 1),
2221
- self.df.slice(second + 1),
2601
+ self.df.slice(0, first).lazy(),
2602
+ self.df.slice(second, 1).lazy(),
2603
+ self.df.slice(first + 1, second - first - 1).lazy(),
2604
+ self.df.slice(first, 1).lazy(),
2605
+ self.df.slice(second + 1).lazy(),
2222
2606
  ]
2223
- )
2607
+ ).collect()
2224
2608
 
2225
- # self.notify(f"Moved row [$success]{row_key.value}[/] {direction}", title="Move")
2609
+ # Also update the view if applicable
2610
+ if self.df_view is not None:
2611
+ # Find RID values
2612
+ curr_rid = self.df[RID][curr_row_idx]
2613
+ swap_rid = self.df[RID][swap_row_idx]
2614
+
2615
+ # Locate the rows by RID in the view
2616
+ curr_ridx = self.df_view[RID].index_of(curr_rid)
2617
+ swap_ridx = self.df_view[RID].index_of(swap_rid)
2618
+ first, second = sorted([curr_ridx, swap_ridx])
2619
+
2620
+ # Swap the rows in the view
2621
+ self.df_view = pl.concat(
2622
+ [
2623
+ self.df_view.slice(0, first).lazy(),
2624
+ self.df_view.slice(second, 1).lazy(),
2625
+ self.df_view.slice(first + 1, second - first - 1).lazy(),
2626
+ self.df_view.slice(first, 1).lazy(),
2627
+ self.df_view.slice(second + 1).lazy(),
2628
+ ]
2629
+ ).collect()
2630
+
2631
+ # self.notify(f"Moved row [$success]{row_key.value}[/] {direction}", title="Move Row")
2226
2632
 
2227
2633
  # Type casting
2228
2634
  def do_cast_column_dtype(self, dtype: str) -> None:
@@ -2259,6 +2665,10 @@ class DataFrameTable(DataTable):
2259
2665
  # Cast the column using Polars
2260
2666
  self.df = self.df.with_columns(pl.col(col_name).cast(target_dtype))
2261
2667
 
2668
+ # Also update the view if applicable
2669
+ if self.df_view is not None:
2670
+ self.df_view = self.df_view.with_columns(pl.col(col_name).cast(target_dtype))
2671
+
2262
2672
  # Recreate table for display
2263
2673
  self.setup_table()
2264
2674
 
@@ -2272,18 +2682,32 @@ class DataFrameTable(DataTable):
2272
2682
  )
2273
2683
  self.log(f"Error casting column `{col_name}`: {str(e)}")
2274
2684
 
2275
- # Search
2276
- def do_search_cursor_value(self) -> None:
2277
- """Search with cursor value in current column."""
2685
+ # Row selection
2686
+ def do_select_row(self) -> None:
2687
+ """Select rows.
2688
+
2689
+ If there are existing cell matches, use those to select rows.
2690
+ Otherwise, use the current cell value as the search term and select rows matching that value.
2691
+ """
2278
2692
  cidx = self.cursor_col_idx
2279
2693
 
2280
- # Get the value of the currently selected cell
2281
- term = NULL if self.cursor_value is None else str(self.cursor_value)
2694
+ # Use existing cell matches if present
2695
+ if self.matches:
2696
+ term = pl.col(RID).is_in(self.matches)
2697
+ else:
2698
+ col_name = self.cursor_col_name
2282
2699
 
2283
- self.search((term, cidx, False, True))
2700
+ # Get the value of the currently selected cell
2701
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
2702
+ if self.cursor_value is None:
2703
+ term = pl.col(col_name).is_null()
2704
+ else:
2705
+ term = pl.col(col_name) == self.cursor_value
2706
+
2707
+ self.select_row((term, cidx, False, True))
2284
2708
 
2285
- def do_search_expr(self) -> None:
2286
- """Search by expression."""
2709
+ def do_select_row_expr(self) -> None:
2710
+ """Select rows by expression."""
2287
2711
  cidx = self.cursor_col_idx
2288
2712
 
2289
2713
  # Use current cell value as default search term
@@ -2291,22 +2715,31 @@ class DataFrameTable(DataTable):
2291
2715
 
2292
2716
  # Push the search modal screen
2293
2717
  self.app.push_screen(
2294
- SearchScreen("Search", term, self.df, cidx),
2295
- callback=self.search,
2718
+ SearchScreen("Select", term, self.df, cidx),
2719
+ callback=self.select_row,
2296
2720
  )
2297
2721
 
2298
- def search(self, result) -> None:
2299
- """Search for a term."""
2722
+ def select_row(self, result) -> None:
2723
+ """Select rows by value or expression."""
2300
2724
  if result is None:
2301
2725
  return
2302
2726
 
2303
2727
  term, cidx, match_nocase, match_whole = result
2304
- col_name = self.df.columns[cidx]
2728
+ col_name = "all columns" if cidx is None else self.df.columns[cidx]
2305
2729
 
2306
- if term == NULL:
2730
+ # Already a Polars expression
2731
+ if isinstance(term, pl.Expr):
2732
+ expr = term
2733
+
2734
+ # bool list or Series
2735
+ elif isinstance(term, (list, pl.Series)):
2736
+ expr = term
2737
+
2738
+ # Null case
2739
+ elif term == NULL:
2307
2740
  expr = pl.col(col_name).is_null()
2308
2741
 
2309
- # Support for polars expressions
2742
+ # Expression in string form
2310
2743
  elif tentative_expr(term):
2311
2744
  try:
2312
2745
  expr = validate_expr(term, self.df.columns, cidx)
@@ -2343,49 +2776,121 @@ class DataFrameTable(DataTable):
2343
2776
  )
2344
2777
 
2345
2778
  # Lazyframe for filtering
2346
- lf = self.df.lazy().with_row_index(RIDX)
2347
- if self.has_hidden_rows:
2348
- lf = lf.filter(self.visible_rows)
2779
+ lf = self.df.lazy()
2349
2780
 
2350
2781
  # Apply filter to get matched row indices
2351
2782
  try:
2352
- matches = set(lf.filter(expr).select(RIDX).collect().to_series().to_list())
2783
+ ok_rids = set(lf.filter(expr).collect()[RID])
2353
2784
  except Exception as e:
2354
- self.notify(f"Error applying search filter [$error]{term}[/]", title="Search", severity="error", timeout=10)
2785
+ self.notify(
2786
+ f"Error applying search filter `[$error]{term}[/]`", title="Search", severity="error", timeout=10
2787
+ )
2355
2788
  self.log(f"Error applying search filter `{term}`: {str(e)}")
2356
2789
  return
2357
2790
 
2358
- match_count = len(matches)
2791
+ match_count = len(ok_rids)
2359
2792
  if match_count == 0:
2360
2793
  self.notify(
2361
- f"No matches found for [$warning]{term}[/]. Try [$accent](?i)abc[/] for case-insensitive search.",
2794
+ f"No matches found for `[$warning]{term}[/]`. Try [$accent](?i)abc[/] for case-insensitive search.",
2362
2795
  title="Search",
2363
2796
  severity="warning",
2364
2797
  )
2365
2798
  return
2366
2799
 
2800
+ message = f"Found [$success]{match_count}[/] matching row(s)"
2801
+
2367
2802
  # Add to history
2368
- self.add_history(f"Searched [$success]{term}[/] in column [$accent]{col_name}[/]")
2803
+ self.add_history(message)
2369
2804
 
2370
- # Update selected rows to include new matches
2371
- for m in matches:
2372
- self.selected_rows[m] = True
2805
+ # Update selected rows to include new selections
2806
+ self.selected_rows.update(ok_rids)
2373
2807
 
2374
2808
  # Show notification immediately, then start highlighting
2375
- self.notify(f"Found [$success]{match_count}[/] matches for [$accent]{term}[/]", title="Search")
2809
+ self.notify(message, title="Select Row")
2376
2810
 
2377
2811
  # Recreate table for display
2378
2812
  self.setup_table()
2379
2813
 
2380
- # Find
2814
+ def do_toggle_selections(self) -> None:
2815
+ """Toggle selected rows highlighting on/off."""
2816
+ # Add to history
2817
+ self.add_history("Toggled row selection")
2818
+
2819
+ # Invert all selected rows
2820
+ self.selected_rows = {rid for rid in self.df[RID] if rid not in self.selected_rows}
2821
+
2822
+ # Check if we're highlighting or un-highlighting
2823
+ if selected_count := len(self.selected_rows):
2824
+ self.notify(f"Toggled selection for [$success]{selected_count}[/] rows", title="Toggle")
2825
+
2826
+ # Recreate table for display
2827
+ self.setup_table()
2828
+
2829
+ def do_toggle_row_selection(self) -> None:
2830
+ """Select/deselect current row."""
2831
+ # Add to history
2832
+ self.add_history("Toggled row selection")
2833
+
2834
+ # Get current row RID
2835
+ ridx = self.cursor_row_idx
2836
+ rid = self.df[RID][ridx]
2837
+
2838
+ if rid in self.selected_rows:
2839
+ self.selected_rows.discard(rid)
2840
+ else:
2841
+ self.selected_rows.add(rid)
2842
+
2843
+ row_key = self.cursor_row_key
2844
+ is_selected = rid in self.selected_rows
2845
+ match_cols = self.matches.get(rid, set())
2846
+
2847
+ for col_idx, col in enumerate(self.ordered_columns):
2848
+ col_key = col.key
2849
+ col_name = col_key.value
2850
+ cell_text: Text = self.get_cell(row_key, col_key)
2851
+
2852
+ if is_selected or (col_name in match_cols):
2853
+ cell_text.style = HIGHLIGHT_COLOR
2854
+ else:
2855
+ # Reset to default style based on dtype
2856
+ dtype = self.df.dtypes[col_idx]
2857
+ dc = DtypeConfig(dtype)
2858
+ cell_text.style = dc.style
2859
+
2860
+ self.update_cell(row_key, col_key, cell_text)
2861
+
2862
+ def do_clear_selections_and_matches(self) -> None:
2863
+ """Clear all selected rows and matches without removing them from the dataframe."""
2864
+ # Check if any selected rows or matches
2865
+ if not self.selected_rows and not self.matches:
2866
+ self.notify("No selections to clear", title="Clear", severity="warning")
2867
+ return
2868
+
2869
+ row_count = len(self.selected_rows | set(self.matches.keys()))
2870
+
2871
+ # Add to history
2872
+ self.add_history("Cleared all selected rows")
2873
+
2874
+ # Clear all selections
2875
+ self.selected_rows = set()
2876
+ self.matches = defaultdict(set)
2877
+
2878
+ # Recreate table for display
2879
+ self.setup_table()
2880
+
2881
+ self.notify(f"Cleared selections for [$success]{row_count}[/] rows", title="Clear")
2882
+
2883
+ # Find & Replace
2381
2884
  def find_matches(
2382
2885
  self, term: str, cidx: int | None = None, match_nocase: bool = False, match_whole: bool = False
2383
- ) -> dict[int, set[int]]:
2886
+ ) -> dict[int, set[str]]:
2384
2887
  """Find matches for a term in the dataframe.
2385
2888
 
2386
2889
  Args:
2387
2890
  term: The search term (can be NULL, expression, or plain text)
2388
2891
  cidx: Column index for column-specific search. If None, searches all columns.
2892
+ match_nocase: Whether to perform case-insensitive matching (for string terms)
2893
+ match_whole: Whether to match the whole cell content (for string terms)
2389
2894
 
2390
2895
  Returns:
2391
2896
  Dictionary mapping row indices to sets of column indices containing matches.
@@ -2395,12 +2900,10 @@ class DataFrameTable(DataTable):
2395
2900
  Raises:
2396
2901
  Exception: If expression validation or filtering fails.
2397
2902
  """
2398
- matches: dict[int, set[int]] = defaultdict(set)
2903
+ matches: dict[int, set[str]] = defaultdict(set)
2399
2904
 
2400
2905
  # Lazyframe for filtering
2401
- lf = self.df.lazy().with_row_index(RIDX)
2402
- if self.has_hidden_rows:
2403
- lf = lf.filter(self.visible_rows)
2906
+ lf = self.df.lazy()
2404
2907
 
2405
2908
  # Determine which columns to search: single column or all columns
2406
2909
  if cidx is not None:
@@ -2431,14 +2934,14 @@ class DataFrameTable(DataTable):
2431
2934
 
2432
2935
  # Get matched row indices
2433
2936
  try:
2434
- matched_ridxs = lf.filter(expr).select(RIDX).collect().to_series().to_list()
2937
+ matched_ridxs = lf.filter(expr).collect()[RID]
2435
2938
  except Exception as e:
2436
2939
  self.notify(f"Error applying filter: [$error]{expr}[/]", title="Find", severity="error", timeout=10)
2437
2940
  self.log(f"Error applying filter: {str(e)}")
2438
2941
  return matches
2439
2942
 
2440
2943
  for ridx in matched_ridxs:
2441
- matches[ridx].add(col_idx)
2944
+ matches[ridx].add(col_name)
2442
2945
 
2443
2946
  return matches
2444
2947
 
@@ -2484,27 +2987,27 @@ class DataFrameTable(DataTable):
2484
2987
  try:
2485
2988
  matches = self.find_matches(term, cidx, match_nocase, match_whole)
2486
2989
  except Exception as e:
2487
- self.notify(f"Error finding matches for [$error]{term}[/]", title="Find", severity="error", timeout=10)
2990
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
2488
2991
  self.log(f"Error finding matches for `{term}`: {str(e)}")
2489
2992
  return
2490
2993
 
2491
2994
  if not matches:
2492
2995
  self.notify(
2493
- f"No matches found for [$warning]{term}[/] in current column. Try [$accent](?i)abc[/] for case-insensitive search.",
2996
+ f"No matches found for `[$warning]{term}[/]` in current column. Try [$accent](?i)abc[/] for case-insensitive search.",
2494
2997
  title="Find",
2495
2998
  severity="warning",
2496
2999
  )
2497
3000
  return
2498
3001
 
2499
3002
  # Add to history
2500
- self.add_history(f"Found [$success]{term}[/] in column [$accent]{col_name}[/]")
3003
+ self.add_history(f"Found `[$success]{term}[/]` in column [$accent]{col_name}[/]")
2501
3004
 
2502
3005
  # Add to matches and count total
2503
- match_count = sum(len(col_idxs) for col_idxs in matches.values())
2504
- for ridx, col_idxs in matches.items():
2505
- self.matches[ridx].update(col_idxs)
3006
+ match_count = sum(len(cols) for cols in matches.values())
3007
+ for rid, cols in matches.items():
3008
+ self.matches[rid].update(cols)
2506
3009
 
2507
- self.notify(f"Found [$success]{match_count}[/] matches for [$accent]{term}[/]", title="Find")
3010
+ self.notify(f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]`", title="Find")
2508
3011
 
2509
3012
  # Recreate table for display
2510
3013
  self.setup_table()
@@ -2518,28 +3021,29 @@ class DataFrameTable(DataTable):
2518
3021
  try:
2519
3022
  matches = self.find_matches(term, cidx=None, match_nocase=match_nocase, match_whole=match_whole)
2520
3023
  except Exception as e:
2521
- self.notify(f"Error finding matches for [$error]{term}[/]", title="Find", severity="error", timeout=10)
3024
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
2522
3025
  self.log(f"Error finding matches for `{term}`: {str(e)}")
2523
3026
  return
2524
3027
 
2525
3028
  if not matches:
2526
3029
  self.notify(
2527
- f"No matches found for [$warning]{term}[/] in any column. Try [$accent](?i)abc[/] for case-insensitive search.",
3030
+ f"No matches found for `[$warning]{term}[/]` in any column. Try [$accent](?i)abc[/] for case-insensitive search.",
2528
3031
  title="Global Find",
2529
3032
  severity="warning",
2530
3033
  )
2531
3034
  return
2532
3035
 
2533
3036
  # Add to history
2534
- self.add_history(f"Found [$success]{term}[/] across all columns")
3037
+ self.add_history(f"Found `[$success]{term}[/]` across all columns")
2535
3038
 
2536
3039
  # Add to matches and count total
2537
- match_count = sum(len(col_idxs) for col_idxs in matches.values())
2538
- for ridx, col_idxs in matches.items():
2539
- self.matches[ridx].update(col_idxs)
3040
+ match_count = sum(len(cols) for cols in matches.values())
3041
+ for rid, cols in matches.items():
3042
+ self.matches[rid].update(cols)
2540
3043
 
2541
3044
  self.notify(
2542
- f"Found [$success]{match_count}[/] matches for [$accent]{term}[/] across all columns", title="Global Find"
3045
+ f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]` across all columns",
3046
+ title="Global Find",
2543
3047
  )
2544
3048
 
2545
3049
  # Recreate table for display
@@ -2597,7 +3101,7 @@ class DataFrameTable(DataTable):
2597
3101
 
2598
3102
  def do_next_selected_row(self) -> None:
2599
3103
  """Move cursor to the next selected row."""
2600
- if not any(self.selected_rows):
3104
+ if not self.selected_rows:
2601
3105
  self.notify("No selected rows to navigate", title="Next Selected Row", severity="warning")
2602
3106
  return
2603
3107
 
@@ -2619,7 +3123,7 @@ class DataFrameTable(DataTable):
2619
3123
 
2620
3124
  def do_previous_selected_row(self) -> None:
2621
3125
  """Move cursor to the previous selected row."""
2622
- if not any(self.selected_rows):
3126
+ if not self.selected_rows:
2623
3127
  self.notify("No selected rows to navigate", title="Previous Selected Row", severity="warning")
2624
3128
  return
2625
3129
 
@@ -2639,7 +3143,6 @@ class DataFrameTable(DataTable):
2639
3143
  last_ridx = selected_row_indices[-1]
2640
3144
  self.move_cursor_to(last_ridx, self.cursor_col_idx)
2641
3145
 
2642
- # Replace
2643
3146
  def do_replace(self) -> None:
2644
3147
  """Open replace screen for current column."""
2645
3148
  # Push the replace modal screen
@@ -2693,25 +3196,34 @@ class DataFrameTable(DataTable):
2693
3196
  )
2694
3197
 
2695
3198
  # Update matches
2696
- self.matches = {ridx: col_idxs.copy() for ridx, col_idxs in matches.items()}
3199
+ self.matches = matches
2697
3200
 
2698
3201
  # Recreate table for display
2699
3202
  self.setup_table()
2700
3203
 
2701
3204
  # Store state for interactive replacement using dataclass
2702
- sorted_rows = sorted(self.matches.keys())
3205
+ rid2ridx = {rid: ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.matches}
3206
+
3207
+ # Unique columns to replace
3208
+ cols_to_replace = set()
3209
+ for cols in self.matches.values():
3210
+ cols_to_replace.update(cols)
3211
+
3212
+ # Sorted column indices to replace
3213
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_replace}
3214
+
2703
3215
  self.replace_state = ReplaceState(
2704
3216
  term_find=term_find,
2705
3217
  term_replace=term_replace,
2706
3218
  match_nocase=match_nocase,
2707
3219
  match_whole=match_whole,
2708
3220
  cidx=cidx,
2709
- rows=sorted_rows,
2710
- cols_per_row=[sorted(self.matches[ridx]) for ridx in sorted_rows],
3221
+ rows=list(rid2ridx.values()),
3222
+ cols_per_row=[[cidx for cidx, col in cidx2col.items() if col in self.matches[rid]] for rid in rid2ridx],
2711
3223
  current_rpos=0,
2712
3224
  current_cpos=0,
2713
3225
  current_occurrence=0,
2714
- total_occurrence=sum(len(col_idxs) for col_idxs in self.matches.values()),
3226
+ total_occurrence=sum(len(cols) for cols in self.matches.values()),
2715
3227
  replaced_occurrence=0,
2716
3228
  skipped_occurrence=0,
2717
3229
  done=False,
@@ -2740,7 +3252,7 @@ class DataFrameTable(DataTable):
2740
3252
  self.app.push_screen(
2741
3253
  ConfirmScreen(
2742
3254
  "Replace All",
2743
- label=f"Replace [$success]{term_find}[/] with [$success]{term_replace or repr('')}[/] for all [$accent]{state.total_occurrence}[/] occurrences?",
3255
+ label=f"Replace `[$success]{term_find}[/]` with `[$success]{term_replace}[/]` for all [$accent]{state.total_occurrence}[/] occurrences?",
2744
3256
  ),
2745
3257
  callback=self.handle_replace_all_confirmation,
2746
3258
  )
@@ -2795,6 +3307,18 @@ class DataFrameTable(DataTable):
2795
3307
  pl.when(mask).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
2796
3308
  )
2797
3309
 
3310
+ # Also update the view if applicable
3311
+ if self.df_view is not None:
3312
+ col_updated = f"^_{col_name}_^"
3313
+ lf_updated = self.df.lazy().filter(mask).select(pl.col(col_name).alias(col_updated), pl.col(RID))
3314
+ self.df_view = (
3315
+ self.df_view.lazy()
3316
+ .join(lf_updated, on=RID, how="left")
3317
+ .with_columns(pl.coalesce(pl.col(col_updated), pl.col(col_name)).alias(col_name))
3318
+ .drop(col_updated)
3319
+ .collect()
3320
+ )
3321
+
2798
3322
  state.replaced_occurrence += len(ridxs)
2799
3323
 
2800
3324
  # Recreate table for display
@@ -2806,7 +3330,7 @@ class DataFrameTable(DataTable):
2806
3330
 
2807
3331
  col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
2808
3332
  self.notify(
2809
- f"Replaced [$success]{state.replaced_occurrence}[/] of [$accent]{state.total_occurrence}[/] in [$s]{col_name}[/]",
3333
+ f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]",
2810
3334
  title="Replace",
2811
3335
  )
2812
3336
 
@@ -2830,7 +3354,7 @@ class DataFrameTable(DataTable):
2830
3354
  if state.done:
2831
3355
  # All done - show final notification
2832
3356
  col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
2833
- msg = f"Replaced [$success]{state.replaced_occurrence}[/] of [$accent]{state.total_occurrence}[/] in [$success]{col_name}[/]"
3357
+ msg = f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]"
2834
3358
  if state.skipped_occurrence > 0:
2835
3359
  msg += f", [$warning]{state.skipped_occurrence}[/] skipped"
2836
3360
  self.notify(msg, title="Replace")
@@ -2848,7 +3372,7 @@ class DataFrameTable(DataTable):
2848
3372
  state.current_occurrence += 1
2849
3373
 
2850
3374
  # Show confirmation
2851
- label = f"Replace [$warning]{state.term_find}[/] with [$success]{state.term_replace}[/] ({state.current_occurrence} of {state.total_occurrence})?"
3375
+ label = f"Replace `[$warning]{state.term_find}[/]` with `[$success]{state.term_replace}[/]` ({state.current_occurrence} of {state.total_occurrence})?"
2852
3376
 
2853
3377
  self.app.push_screen(
2854
3378
  ConfirmScreen("Replace", label=label, maybe="Skip"),
@@ -2865,6 +3389,7 @@ class DataFrameTable(DataTable):
2865
3389
  cidx = state.cols_per_row[state.current_rpos][state.current_cpos]
2866
3390
  col_name = self.df.columns[cidx]
2867
3391
  dtype = self.df.dtypes[cidx]
3392
+ rid = self.df[RID][ridx]
2868
3393
 
2869
3394
  # Replace
2870
3395
  if result is True:
@@ -2877,6 +3402,15 @@ class DataFrameTable(DataTable):
2877
3402
  .otherwise(pl.col(col_name))
2878
3403
  .alias(col_name)
2879
3404
  )
3405
+
3406
+ # Also update the view if applicable
3407
+ if self.df_view is not None:
3408
+ self.df_view = self.df_view.with_columns(
3409
+ pl.when(pl.col(RID) == rid)
3410
+ .then(pl.col(col_name).str.replace_all(term_find, state.term_replace))
3411
+ .otherwise(pl.col(col_name))
3412
+ .alias(col_name)
3413
+ )
2880
3414
  else:
2881
3415
  # try to convert replacement value to column dtype
2882
3416
  try:
@@ -2891,6 +3425,12 @@ class DataFrameTable(DataTable):
2891
3425
  .alias(col_name)
2892
3426
  )
2893
3427
 
3428
+ # Also update the view if applicable
3429
+ if self.df_view is not None:
3430
+ self.df_view = self.df_view.with_columns(
3431
+ pl.when(pl.col(RID) == rid).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
3432
+ )
3433
+
2894
3434
  state.replaced_occurrence += 1
2895
3435
 
2896
3436
  # Skip
@@ -2923,127 +3463,20 @@ class DataFrameTable(DataTable):
2923
3463
  # Show next confirmation
2924
3464
  self.show_next_replace_confirmation()
2925
3465
 
2926
- # Selection & Match
2927
- def do_toggle_selections(self) -> None:
2928
- """Toggle selected rows highlighting on/off."""
2929
- # Add to history
2930
- self.add_history("Toggled row selection")
2931
-
2932
- if self.has_hidden_rows:
2933
- # Some rows are hidden - invert only selected visible rows and clear selections for hidden rows
2934
- for i in range(len(self.selected_rows)):
2935
- if self.visible_rows[i]:
2936
- self.selected_rows[i] = not self.selected_rows[i]
2937
- else:
2938
- self.selected_rows[i] = False
2939
- else:
2940
- # Invert all selected rows
2941
- self.selected_rows = [not selected for selected in self.selected_rows]
2942
-
2943
- # Check if we're highlighting or un-highlighting
2944
- if new_selected_count := self.selected_rows.count(True):
2945
- self.notify(f"Toggled selection for [$success]{new_selected_count}[/] rows", title="Toggle")
2946
-
2947
- # Recreate table for display
2948
- self.setup_table()
2949
-
2950
- def do_toggle_row_selection(self) -> None:
2951
- """Select/deselect current row."""
2952
- # Add to history
2953
- self.add_history("Toggled row selection")
2954
-
2955
- ridx = self.cursor_row_idx
2956
- self.selected_rows[ridx] = not self.selected_rows[ridx]
2957
-
2958
- row_key = str(ridx)
2959
- match_cols = self.matches.get(ridx, set())
2960
- for col_idx, col in enumerate(self.ordered_columns):
2961
- col_key = col.key
2962
- cell_text: Text = self.get_cell(row_key, col_key)
2963
-
2964
- if self.selected_rows[ridx] or (col_idx in match_cols):
2965
- cell_text.style = HIGHLIGHT_COLOR
2966
- else:
2967
- # Reset to default style based on dtype
2968
- dtype = self.df.dtypes[col_idx]
2969
- dc = DtypeConfig(dtype)
2970
- cell_text.style = dc.style
2971
-
2972
- self.update_cell(row_key, col_key, cell_text)
2973
-
2974
- def do_clear_selections_and_matches(self) -> None:
2975
- """Clear all selected rows and matches without removing them from the dataframe."""
2976
- # Check if any selected rows or matches
2977
- if not any(self.selected_rows) and not self.matches:
2978
- self.notify("No selections to clear", title="Clear", severity="warning")
2979
- return
2980
-
2981
- row_count = sum(
2982
- 1 if (selected or idx in self.matches) else 0 for idx, selected in enumerate(self.selected_rows)
2983
- )
2984
-
2985
- # Add to history
2986
- self.add_history("Cleared all selected rows")
2987
-
2988
- # Clear all selections
2989
- self.selected_rows = [False] * len(self.df)
2990
- self.matches = defaultdict(set)
2991
-
2992
- # Recreate table for display
2993
- self.setup_table()
2994
-
2995
- self.notify(f"Cleared selections for [$success]{row_count}[/] rows", title="Clear")
2996
-
2997
- # Filter & View
2998
- def do_filter_rows(self) -> None:
2999
- """Keep only the rows with selections and cell matches, and remove others."""
3000
- if any(self.selected_rows) or self.matches:
3001
- message = "Filter to rows with selection and cell matches (other rows removed)"
3002
- filter_expr = [
3003
- True if (selected or ridx in self.matches) else False
3004
- for ridx, selected in enumerate(self.selected_rows)
3005
- ]
3006
- else: # Search cursor value in current column
3007
- message = "Filter to rows matching cursor value (other rows removed)"
3008
- ridx = self.cursor_row_idx
3009
- cidx = self.cursor_col_idx
3010
- value = self.df.item(ridx, cidx)
3011
-
3012
- col_name = self.df.columns[cidx]
3013
- if value is None:
3014
- filter_expr = pl.col(col_name).is_null()
3015
- else:
3016
- filter_expr = pl.col(col_name) == value
3017
-
3018
- # Add to history
3019
- self.add_history(message, dirty=True)
3020
-
3021
- # Apply filter to dataframe with row indices
3022
- df_filtered = self.df.with_row_index(RIDX).filter(filter_expr)
3023
-
3024
- # Update dataframe
3025
- self.reset_df(df_filtered.drop(RIDX))
3026
-
3027
- # Recreate table for display
3028
- self.setup_table()
3029
-
3030
- self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows", title="Filter")
3031
-
3466
+ # View & Filter
3032
3467
  def do_view_rows(self) -> None:
3033
3468
  """View rows.
3034
3469
 
3035
- If there are selected rows or matches, view those rows.
3036
- Otherwise, view based on the value of the currently selected cell.
3470
+ If there are selected rows, view those.
3471
+ Otherwise, view based on the cursor value.
3037
3472
  """
3038
3473
 
3039
3474
  cidx = self.cursor_col_idx
3040
- col_name = self.df.columns[cidx]
3475
+ col_name = self.cursor_col_name
3041
3476
 
3042
- # If there are rows with selections or matches, use those
3043
- if any(self.selected_rows) or self.matches:
3044
- term = [
3045
- True if (selected or idx in self.matches) else False for idx, selected in enumerate(self.selected_rows)
3046
- ]
3477
+ # If there are selected rows, use those
3478
+ if self.selected_rows:
3479
+ term = pl.col(RID).is_in(self.selected_rows)
3047
3480
  # Otherwise, use the current cell value
3048
3481
  else:
3049
3482
  ridx = self.cursor_row_idx
@@ -3065,7 +3498,7 @@ class DataFrameTable(DataTable):
3065
3498
  )
3066
3499
 
3067
3500
  def view_rows(self, result) -> None:
3068
- """Show only rows with selections or matches, and do hide others. Do not modify the dataframe."""
3501
+ """View selected rows and hide others. Do not modify the dataframe."""
3069
3502
  if result is None:
3070
3503
  return
3071
3504
  term, cidx, match_nocase, match_whole = result
@@ -3075,13 +3508,17 @@ class DataFrameTable(DataTable):
3075
3508
  # Support for polars expression
3076
3509
  if isinstance(term, pl.Expr):
3077
3510
  expr = term
3511
+
3078
3512
  # Support for list of booleans (selected rows)
3079
3513
  elif isinstance(term, (list, pl.Series)):
3080
3514
  expr = term
3515
+
3516
+ # Null case
3081
3517
  elif term == NULL:
3082
3518
  expr = pl.col(col_name).is_null()
3519
+
3520
+ # Support for polars expression in string form
3083
3521
  elif tentative_expr(term):
3084
- # Support for polars expression in string form
3085
3522
  try:
3086
3523
  expr = validate_expr(term, self.df.columns, cidx)
3087
3524
  except Exception as e:
@@ -3090,6 +3527,8 @@ class DataFrameTable(DataTable):
3090
3527
  )
3091
3528
  self.log(f"Error validating expression `{term}`: {str(e)}")
3092
3529
  return
3530
+
3531
+ # Type-aware search based on column dtype
3093
3532
  else:
3094
3533
  dtype = self.df.dtypes[cidx]
3095
3534
  if dtype == pl.String:
@@ -3113,11 +3552,7 @@ class DataFrameTable(DataTable):
3113
3552
  )
3114
3553
 
3115
3554
  # Lazyframe with row indices
3116
- lf = self.df.lazy().with_row_index(RIDX)
3117
-
3118
- # Apply existing visibility filter first
3119
- if self.has_hidden_rows:
3120
- lf = lf.filter(self.visible_rows)
3555
+ lf = self.df.lazy()
3121
3556
 
3122
3557
  expr_str = "boolean list or series" if isinstance(expr, (list, pl.Series)) else str(expr)
3123
3558
 
@@ -3125,7 +3560,7 @@ class DataFrameTable(DataTable):
3125
3560
  try:
3126
3561
  df_filtered = lf.filter(expr).collect()
3127
3562
  except Exception as e:
3128
- self.histories.pop() # Remove last history entry
3563
+ self.histories_undo.pop() # Remove last history entry
3129
3564
  self.notify(f"Error applying filter [$error]{expr_str}[/]", title="Filter", severity="error", timeout=10)
3130
3565
  self.log(f"Error applying filter `{expr_str}`: {str(e)}")
3131
3566
  return
@@ -3138,17 +3573,81 @@ class DataFrameTable(DataTable):
3138
3573
  # Add to history
3139
3574
  self.add_history(f"Filtered by expression [$success]{expr_str}[/]")
3140
3575
 
3141
- # Mark unfiltered rows as invisible
3142
- filtered_row_indices = set(df_filtered[RIDX].to_list())
3143
- if filtered_row_indices:
3144
- for ridx in range(len(self.visible_rows)):
3145
- if ridx not in filtered_row_indices:
3146
- self.visible_rows[ridx] = False
3576
+ ok_rids = set(df_filtered[RID])
3577
+
3578
+ # Create a view of self.df as a copy
3579
+ if self.df_view is None:
3580
+ self.df_view = self.df
3581
+
3582
+ # Update dataframe
3583
+ self.df = df_filtered
3584
+
3585
+ # Update selected rows
3586
+ if self.selected_rows:
3587
+ self.selected_rows.intersection_update(ok_rids)
3588
+
3589
+ # Update matches
3590
+ if self.matches:
3591
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3592
+
3593
+ # Recreate table for display
3594
+ self.setup_table()
3595
+
3596
+ self.notify(f"Filtered to [$success]{matched_count}[/] matching row(s)", title="Filter")
3597
+
3598
+ def do_filter_rows(self) -> None:
3599
+ """Filter rows.
3600
+
3601
+ If there are selected rows, use those.
3602
+ Otherwise, filter based on the cursor value.
3603
+ """
3604
+ if self.selected_rows:
3605
+ message = "Filtered to selected rows (other rows removed)"
3606
+ filter_expr = pl.col(RID).is_in(self.selected_rows)
3607
+ else: # Search cursor value in current column
3608
+ message = "Filtered to rows matching cursor value (other rows removed)"
3609
+ cidx = self.cursor_col_idx
3610
+ col_name = self.df.columns[cidx]
3611
+ value = self.cursor_value
3612
+
3613
+ if value is None:
3614
+ filter_expr = pl.col(col_name).is_null()
3615
+ else:
3616
+ filter_expr = pl.col(col_name) == value
3617
+
3618
+ # Add to history
3619
+ self.add_history(message, dirty=True)
3620
+
3621
+ # Apply filter to dataframe with row indices
3622
+ df_filtered = self.df.lazy().filter(filter_expr).collect()
3623
+ ok_rids = set(df_filtered[RID])
3624
+
3625
+ # Update selected rows
3626
+ if self.selected_rows:
3627
+ selected_rows = {rid for rid in self.selected_rows if rid in ok_rids}
3628
+ else:
3629
+ selected_rows = set()
3630
+
3631
+ # Update matches
3632
+ if self.matches:
3633
+ matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3634
+ else:
3635
+ matches = defaultdict(set)
3636
+
3637
+ # Update dataframe
3638
+ self.reset_df(df_filtered)
3639
+
3640
+ # Clear view for filter mode
3641
+ self.df_view = None
3642
+
3643
+ # Restore selected rows and matches
3644
+ self.selected_rows = selected_rows
3645
+ self.matches = matches
3147
3646
 
3148
3647
  # Recreate table for display
3149
3648
  self.setup_table()
3150
3649
 
3151
- self.notify(f"Filtered to [$success]{matched_count}[/] matching rows", title="Filter")
3650
+ self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows.", title="Filter")
3152
3651
 
3153
3652
  # Copy & Save
3154
3653
  def do_copy_to_clipboard(self, content: str, message: str) -> None:
@@ -3174,20 +3673,24 @@ class DataFrameTable(DataTable):
3174
3673
  except FileNotFoundError:
3175
3674
  self.notify("Error copying to clipboard", title="Clipboard", severity="error", timeout=10)
3176
3675
 
3177
- def do_save_to_file(
3178
- self, title: str = "Save to File", all_tabs: bool | None = None, task_after_save: str | None = None
3179
- ) -> None:
3676
+ def do_save_to_file(self, all_tabs: bool | None = None, task_after_save: str | None = None) -> None:
3180
3677
  """Open screen to save file."""
3181
3678
  self._task_after_save = task_after_save
3679
+ tab_count = len(self.app.tabs)
3680
+ save_all = tab_count > 1 and all_tabs is not False
3681
+
3682
+ filepath = Path(self.filename)
3683
+ if save_all:
3684
+ ext = filepath.suffix.lower()
3685
+ if ext in (".xlsx", ".xls"):
3686
+ filename = self.filename
3687
+ else:
3688
+ filename = "all-tabs.xlsx"
3689
+ else:
3690
+ filename = str(filepath.with_stem(self.tabname))
3182
3691
 
3183
- multi_tab = len(self.app.tabs) > 1
3184
- filename = (
3185
- "all-tabs.xlsx"
3186
- if all_tabs or (all_tabs is None and multi_tab)
3187
- else str(Path(self.filename).with_stem(self.tabname))
3188
- )
3189
3692
  self.app.push_screen(
3190
- SaveFileScreen(filename, title=title, all_tabs=all_tabs, multi_tab=multi_tab),
3693
+ SaveFileScreen(filename, save_all=save_all, tab_count=tab_count),
3191
3694
  callback=self.save_to_file,
3192
3695
  )
3193
3696
 
@@ -3195,13 +3698,11 @@ class DataFrameTable(DataTable):
3195
3698
  """Handle result from SaveFileScreen."""
3196
3699
  if result is None:
3197
3700
  return
3198
- filename, all_tabs = result
3199
-
3200
- # Whether to save all tabs (for Excel files)
3201
- self._all_tabs = all_tabs
3701
+ filename, save_all, overwrite_prompt = result
3702
+ self._save_all = save_all
3202
3703
 
3203
3704
  # Check if file exists
3204
- if Path(filename).exists():
3705
+ if overwrite_prompt and Path(filename).exists():
3205
3706
  self._pending_filename = filename
3206
3707
  self.app.push_screen(
3207
3708
  ConfirmScreen("File already exists. Overwrite?"),
@@ -3217,7 +3718,7 @@ class DataFrameTable(DataTable):
3217
3718
  else:
3218
3719
  # Go back to SaveFileScreen to allow user to enter a different name
3219
3720
  self.app.push_screen(
3220
- SaveFileScreen(self._pending_filename),
3721
+ SaveFileScreen(self._pending_filename, save_all=self._save_all),
3221
3722
  callback=self.save_to_file,
3222
3723
  )
3223
3724
 
@@ -3225,7 +3726,7 @@ class DataFrameTable(DataTable):
3225
3726
  """Actually save the dataframe to a file."""
3226
3727
  filepath = Path(filename)
3227
3728
  ext = filepath.suffix.lower()
3228
- if ext.endswith(".gz"):
3729
+ if ext == ".gz":
3229
3730
  ext = Path(filename).with_suffix("").suffix.lower()
3230
3731
 
3231
3732
  fmt = ext.removeprefix(".")
@@ -3237,43 +3738,42 @@ class DataFrameTable(DataTable):
3237
3738
  )
3238
3739
  fmt = "csv"
3239
3740
 
3240
- # Add to history
3241
- self.add_history(f"Saved dataframe to [$success]{filename}[/]")
3242
-
3741
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3243
3742
  try:
3244
3743
  if fmt == "csv":
3245
- self.df.write_csv(filename)
3744
+ df.write_csv(filename)
3246
3745
  elif fmt in ("tsv", "tab"):
3247
- self.df.write_csv(filename, separator="\t")
3746
+ df.write_csv(filename, separator="\t")
3248
3747
  elif fmt in ("xlsx", "xls"):
3249
3748
  self.save_excel(filename)
3250
3749
  elif fmt == "json":
3251
- self.df.write_json(filename)
3750
+ df.write_json(filename)
3252
3751
  elif fmt == "ndjson":
3253
- self.df.write_ndjson(filename)
3752
+ df.write_ndjson(filename)
3254
3753
  elif fmt == "parquet":
3255
- self.df.write_parquet(filename)
3754
+ df.write_parquet(filename)
3256
3755
  else: # Fallback to CSV
3257
- self.df.write_csv(filename)
3756
+ df.write_csv(filename)
3258
3757
 
3259
3758
  # Update current filename
3260
3759
  self.filename = filename
3261
3760
 
3262
3761
  # Reset dirty flag after save
3263
- if self._all_tabs:
3762
+ if self._save_all:
3264
3763
  tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3265
3764
  for table in tabs.values():
3266
3765
  table.dirty = False
3267
3766
  else:
3268
3767
  self.dirty = False
3269
3768
 
3270
- if self._task_after_save == "close_tab":
3271
- self.app.do_close_tab()
3272
- elif self._task_after_save == "quit_app":
3273
- self.app.exit()
3769
+ if hasattr(self, "_task_after_save"):
3770
+ if self._task_after_save == "close_tab":
3771
+ self.app.do_close_tab()
3772
+ elif self._task_after_save == "quit_app":
3773
+ self.app.exit()
3274
3774
 
3275
3775
  # From ConfirmScreen callback, so notify accordingly
3276
- if self._all_tabs:
3776
+ if self._save_all:
3277
3777
  self.notify(f"Saved all tabs to [$success]{filename}[/]", title="Save to File")
3278
3778
  else:
3279
3779
  self.notify(f"Saved current tab to [$success]{filename}[/]", title="Save to File")
@@ -3286,16 +3786,18 @@ class DataFrameTable(DataTable):
3286
3786
  """Save to an Excel file."""
3287
3787
  import xlsxwriter
3288
3788
 
3289
- if not self._all_tabs or len(self.app.tabs) == 1:
3789
+ if not self._save_all or len(self.app.tabs) == 1:
3290
3790
  # Single tab - save directly
3291
- self.df.write_excel(filename)
3791
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3792
+ df.write_excel(filename, worksheet=self.tabname)
3292
3793
  else:
3293
3794
  # Multiple tabs - use xlsxwriter to create multiple sheets
3294
3795
  with xlsxwriter.Workbook(filename) as wb:
3295
3796
  tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3296
3797
  for table in tabs.values():
3297
3798
  worksheet = wb.add_worksheet(table.tabname)
3298
- table.df.write_excel(workbook=wb, worksheet=worksheet)
3799
+ df = (table.df if table.df_view is None else table.df_view).select(pl.exclude(RID))
3800
+ df.write_excel(workbook=wb, worksheet=worksheet)
3299
3801
 
3300
3802
  # SQL Interface
3301
3803
  def do_simple_sql(self) -> None:
@@ -3339,19 +3841,17 @@ class DataFrameTable(DataTable):
3339
3841
  sql: The SQL query string to execute.
3340
3842
  """
3341
3843
 
3342
- import re
3844
+ sql = sql.replace("$#", f"(`{RID}` + 1)")
3845
+ if RID not in sql and "*" not in sql:
3846
+ # Ensure RID is selected
3847
+ import re
3343
3848
 
3344
- RE_FROM_SELF = re.compile(r"\bfrom\s+self\b", re.IGNORECASE)
3345
-
3346
- sql = RE_FROM_SELF.sub(f", `{RIDX}` FROM self", sql)
3849
+ RE_FROM_SELF = re.compile(r"\bFROM\s+self\b", re.IGNORECASE)
3850
+ sql = RE_FROM_SELF.sub(f", `{RID}` FROM self", sql)
3347
3851
 
3348
3852
  # Execute the SQL query
3349
3853
  try:
3350
- lf = self.df.lazy().with_row_index(RIDX)
3351
- if self.has_hidden_rows:
3352
- lf = lf.filter(self.visible_rows)
3353
-
3354
- df_filtered = lf.sql(sql).collect()
3854
+ df_filtered = self.df.lazy().sql(sql).collect()
3355
3855
 
3356
3856
  if not len(df_filtered):
3357
3857
  self.notify(
@@ -3359,29 +3859,34 @@ class DataFrameTable(DataTable):
3359
3859
  )
3360
3860
  return
3361
3861
 
3362
- # Add to history
3363
- self.add_history(f"SQL Query:\n[$success]{sql}[/]", dirty=not view)
3364
-
3365
- if view:
3366
- # Just view - do not modify the dataframe
3367
- filtered_row_indices = set(df_filtered[RIDX].to_list())
3368
- if filtered_row_indices:
3369
- self.visible_rows = [ridx in filtered_row_indices for ridx in range(len(self.visible_rows))]
3370
-
3371
- filtered_col_names = set(df_filtered.columns)
3372
- if filtered_col_names:
3373
- self.hidden_columns = {
3374
- col_name for col_name in self.df.columns if col_name not in filtered_col_names
3375
- }
3376
- else: # filter - modify the dataframe
3377
- self.df = df_filtered.drop(RIDX)
3378
- self.visible_rows = [True] * len(self.df)
3379
- self.hidden_columns.clear()
3380
3862
  except Exception as e:
3381
3863
  self.notify(f"Error executing SQL query [$error]{sql}[/]", title="SQL Query", severity="error", timeout=10)
3382
3864
  self.log(f"Error executing SQL query `{sql}`: {str(e)}")
3383
3865
  return
3384
3866
 
3867
+ # Add to history
3868
+ self.add_history(f"SQL Query:\n[$success]{sql}[/]", dirty=not view)
3869
+
3870
+ # Create a view of self.df as a copy
3871
+ if view and self.df_view is None:
3872
+ self.df_view = self.df
3873
+
3874
+ # Clear view for filter mode
3875
+ if not view:
3876
+ self.df_view = None
3877
+
3878
+ # Update dataframe
3879
+ self.df = df_filtered
3880
+ ok_rids = set(df_filtered[RID])
3881
+
3882
+ # Update selected rows
3883
+ if self.selected_rows:
3884
+ self.selected_rows.intersection_update(ok_rids)
3885
+
3886
+ # Update matches
3887
+ if self.matches:
3888
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3889
+
3385
3890
  # Recreate table for display
3386
3891
  self.setup_table()
3387
3892