dataframe-textual 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3940 @@
1
+ """DataFrameTable widget for displaying and interacting with Polars DataFrames."""
2
+
3
+ import sys
4
+ from collections import defaultdict, deque
5
+ from dataclasses import dataclass
6
+ from itertools import zip_longest
7
+ from pathlib import Path
8
+ from textwrap import dedent
9
+ from typing import Any
10
+
11
+ import polars as pl
12
+ from rich.text import Text, TextType
13
+ from textual._two_way_dict import TwoWayDict
14
+ from textual.coordinate import Coordinate
15
+ from textual.events import Click
16
+ from textual.reactive import reactive
17
+ from textual.render import measure
18
+ from textual.widgets import DataTable, TabPane
19
+ from textual.widgets._data_table import (
20
+ CellDoesNotExist,
21
+ CellKey,
22
+ CellType,
23
+ Column,
24
+ ColumnKey,
25
+ CursorType,
26
+ DuplicateKey,
27
+ Row,
28
+ RowKey,
29
+ )
30
+
31
+ from .common import (
32
+ CURSOR_TYPES,
33
+ NULL,
34
+ NULL_DISPLAY,
35
+ RID,
36
+ SUBSCRIPT_DIGITS,
37
+ SUPPORTED_FORMATS,
38
+ DtypeConfig,
39
+ format_row,
40
+ get_next_item,
41
+ parse_placeholders,
42
+ round_to_nearest_hundreds,
43
+ tentative_expr,
44
+ validate_expr,
45
+ )
46
+ from .sql_screen import AdvancedSqlScreen, SimpleSqlScreen
47
+ from .table_screen import FrequencyScreen, MetaColumnScreen, MetaShape, RowDetailScreen, StatisticsScreen
48
+ from .yes_no_screen import (
49
+ AddColumnScreen,
50
+ AddLinkScreen,
51
+ ConfirmScreen,
52
+ EditCellScreen,
53
+ EditColumnScreen,
54
+ FilterScreen,
55
+ FindReplaceScreen,
56
+ FreezeScreen,
57
+ RenameColumnScreen,
58
+ SaveFileScreen,
59
+ SearchScreen,
60
+ )
61
+
62
+ # Color for highlighting selections and matches
63
+ HIGHLIGHT_COLOR = "red"
64
+
65
+ # Buffer size for loading rows
66
+ BUFFER_SIZE = 5
67
+
68
+ # Warning threshold for loading rows
69
+ WARN_ROWS_THRESHOLD = 50_000
70
+
71
+ # Maximum width for string columns before truncation
72
+ STRING_WIDTH_CAP = 35
73
+
74
+
75
+ @dataclass
76
+ class History:
77
+ """Class to track history of dataframe states for undo/redo functionality."""
78
+
79
+ description: str
80
+ df: pl.DataFrame
81
+ df_view: pl.DataFrame | None
82
+ filename: str
83
+ hidden_columns: set[str]
84
+ selected_rows: set[int]
85
+ sorted_columns: dict[str, bool] # col_name -> descending
86
+ matches: dict[int, set[str]] # RID -> set of col names
87
+ fixed_rows: int
88
+ fixed_columns: int
89
+ cursor_coordinate: Coordinate
90
+ dirty: bool = False # Whether this history state has unsaved changes
91
+
92
+
93
+ @dataclass
94
+ class ReplaceState:
95
+ """Class to track state during interactive replace operations."""
96
+
97
+ term_find: str
98
+ term_replace: str
99
+ match_nocase: bool
100
+ match_whole: bool
101
+ cidx: int # Column index to search in, could be None for all columns
102
+ rows: list[int] # List of row indices
103
+ cols_per_row: list[list[int]] # List of list of column indices per row
104
+ current_rpos: int # Current row position index in rows
105
+ current_cpos: int # Current column position index within current row's cols
106
+ current_occurrence: int # Current occurrence count (for display)
107
+ total_occurrence: int # Total number of occurrences
108
+ replaced_occurrence: int # Number of occurrences already replaced
109
+ skipped_occurrence: int # Number of occurrences skipped
110
+ done: bool = False # Whether the replace operation is complete
111
+
112
+
113
+ def add_rid_column(df: pl.DataFrame) -> pl.DataFrame:
114
+ """Add internal row index as last column to the dataframe if not already present.
115
+
116
+ Args:
117
+ df: The Polars DataFrame to modify.
118
+
119
+ Returns:
120
+ The modified DataFrame with the internal row index column added.
121
+ """
122
+ if RID not in df.columns:
123
+ df = df.lazy().with_row_index(RID).select(pl.exclude(RID), RID).collect()
124
+ return df
125
+
126
+
127
+ class DataFrameTable(DataTable):
128
+ """Custom DataTable to highlight row/column labels based on cursor position."""
129
+
130
+ # Help text for the DataTable which will be shown in the HelpPanel
131
+ HELP = dedent("""
132
+ # 📊 DataFrame Viewer - Table Controls
133
+
134
+ ## ⬆️ Navigation
135
+ - **↑↓←→** - 🎯 Move cursor (cell/row/column)
136
+ - **g** - ⬆️ Jump to first row
137
+ - **G** - ⬇️ Jump to last row
138
+ - **HOME/END** - 🎯 Jump to first/last column
139
+ - **Ctrl+HOME/END** - 🎯 Jump to page top/top
140
+ - **Ctrl+F** - 📜 Page down
141
+ - **Ctrl+B** - 📜 Page up
142
+ - **PgUp/PgDn** - 📜 Page up/down
143
+
144
+ ## ♻️ Undo/Redo/Reset
145
+ - **u** - ↩️ Undo last action
146
+ - **U** - 🔄 Redo last undone action
147
+ - **Ctrl+U** - 🔁 Reset to initial state
148
+
149
+ ## 👁️ Display
150
+ - **Enter** - 📋 Show row details in modal
151
+ - **F** - 📊 Show frequency distribution
152
+ - **s** - 📈 Show statistics for current column
153
+ - **S** - 📊 Show statistics for entire dataframe
154
+ - **m** - 📐 Show dataframe metadata (row/column counts)
155
+ - **M** - 📋 Show column metadata (ID, name, type)
156
+ - **h** - 👁️ Hide current column
157
+ - **H** - 👀 Show all hidden rows/columns
158
+ - **_** - 📏 Toggle column full width
159
+ - **z** - 📌 Freeze rows and columns
160
+ - **~** - 🏷️ Toggle row labels
161
+ - **,** - 🔢 Toggle thousand separator for numeric display
162
+ - **K** - 🔄 Cycle cursor (cell → row → column → cell)
163
+
164
+ ## ↕️ Sorting
165
+ - **[** - 🔼 Sort column ascending
166
+ - **]** - 🔽 Sort column descending
167
+ - *(Multi-column sort supported)*
168
+
169
+ ## ✅ Row Selection
170
+ - **\\\\** - ✅ Select rows with cell matches or those matching cursor value in current column
171
+ - **|** - ✅ Select rows with expression
172
+ - **'** - ✅ Select/deselect current row
173
+ - **t** - 💡 Toggle row selection (invert all)
174
+ - **T** - 🧹 Clear all selections and matches
175
+ - **{** - ⬆️ Go to previous selected row
176
+ - **}** - ⬇️ Go to next selected row
177
+ - *(Supports case-insensitive & whole-word matching)*
178
+
179
+ ## 🔎 Find & Replace
180
+ - **/** - 🔎 Find in current column with cursor value
181
+ - **?** - 🔎 Find in current column with expression
182
+ - **;** - 🌐 Global find using cursor value
183
+ - **:** - 🌐 Global find with expression
184
+ - **n** - ⬇️ Go to next match
185
+ - **N** - ⬆️ Go to previous match
186
+ - **r** - 🔄 Replace in current column (interactive or all)
187
+ - **R** - 🔄 Replace across all columns (interactive or all)
188
+ - *(Supports case-insensitive & whole-word matching)*
189
+
190
+ ## 👁️ View & Filter
191
+ - **"** - 📍 Filter selected rows (removes others)
192
+ - **v** - 👁️ View selected rows (hides others)
193
+ - **V** - 🔧 View selected rows matching expression (hides others)
194
+
195
+ ## 🔍 SQL Interface
196
+ - **l** - 💬 Open simple SQL interface (select columns & where clause)
197
+ - **L** - 🔎 Open advanced SQL interface (full SQL queries)
198
+
199
+ ## ✏️ Editing
200
+ - **Double-click** - ✍️ Edit cell or rename column header
201
+ - **e** - ✍️ Edit current cell
202
+ - **E** - 📊 Edit entire column with expression
203
+ - **a** - ➕ Add empty column after current
204
+ - **A** - ➕ Add column with name and optional expression
205
+ - **@** - 🔗 Add a new link column from template
206
+ - **x** - ❌ Delete current row
207
+ - **X** - ❌ Delete row and those below
208
+ - **Ctrl+X** - ❌ Delete row and those above
209
+ - **delete** - ❌ Clear current cell (set to NULL)
210
+ - **-** - ❌ Delete current column
211
+ - **d** - 📋 Duplicate current column
212
+ - **D** - 📋 Duplicate current row
213
+
214
+ ## 🎯 Reorder
215
+ - **Shift+↑↓** - ⬆️⬇️ Move row up/down
216
+ - **Shift+←→** - ⬅️➡️ Move column left/right
217
+
218
+ ## 🎨 Type Casting
219
+ - **#** - 🔢 Cast column to integer
220
+ - **%** - 🔢 Cast column to float
221
+ - **!** - ✅ Cast column to boolean
222
+ - **$** - 📝 Cast column to string
223
+
224
+ ## 💾 Copy & Save
225
+ - **c** - 📋 Copy cell to clipboard
226
+ - **Ctrl+c** - 📊 Copy column to clipboard
227
+ - **Ctrl+r** - 📝 Copy row to clipboard (tab-separated)
228
+ - **Ctrl+s** - 💾 Save current tab to file
229
+ """).strip()
230
+
231
+ # fmt: off
232
+ BINDINGS = [
233
+ # Navigation
234
+ ("g", "jump_top", "Jump to top"),
235
+ ("G", "jump_bottom", "Jump to bottom"),
236
+ ("pageup,ctrl+b", "page_up", "Page up"),
237
+ ("pagedown,ctrl+f", "page_down", "Page down"),
238
+ # Undo/Redo/Reset
239
+ ("u", "undo", "Undo"),
240
+ ("U", "redo", "Redo"),
241
+ ("ctrl+u", "reset", "Reset to initial state"),
242
+ # Display
243
+ ("h", "hide_column", "Hide column"),
244
+ ("H", "show_hidden_rows_columns", "Show hidden rows/columns"),
245
+ ("tilde", "toggle_row_labels", "Toggle row labels"), # `~`
246
+ ("K", "cycle_cursor_type", "Cycle cursor mode"), # `K`
247
+ ("z", "freeze_row_column", "Freeze rows/columns"),
248
+ ("comma", "show_thousand_separator", "Toggle thousand separator"), # `,`
249
+ ("underscore", "expand_column", "Expand column to full width"), # `_`
250
+ ("circumflex_accent", "toggle_rid", "Toggle internal row index"), # `^`
251
+ # Copy
252
+ ("c", "copy_cell", "Copy cell to clipboard"),
253
+ ("ctrl+c", "copy_column", "Copy column to clipboard"),
254
+ ("ctrl+r", "copy_row", "Copy row to clipboard"),
255
+ # Save
256
+ ("ctrl+s", "save_to_file", "Save to file"),
257
+ # Metadata, Detail, Frequency, and Statistics
258
+ ("m", "metadata_shape", "Show metadata for row count and column count"),
259
+ ("M", "metadata_column", "Show metadata for column"),
260
+ ("enter", "view_row_detail", "View row details"),
261
+ ("F", "show_frequency", "Show frequency"),
262
+ ("s", "show_statistics", "Show statistics for column"),
263
+ ("S", "show_statistics('dataframe')", "Show statistics for dataframe"),
264
+ # Sort
265
+ ("left_square_bracket", "sort_ascending", "Sort ascending"), # `[`
266
+ ("right_square_bracket", "sort_descending", "Sort descending"), # `]`
267
+ # View & Filter
268
+ ("v", "view_rows", "View selected rows"),
269
+ ("V", "view_rows_expr", "View selected rows matching expression"),
270
+ ("quotation_mark", "filter_rows", "Filter selected rows"), # `"`
271
+ # Row Selection
272
+ ("backslash", "select_row", "Select rows with cell matches or those matching cursor value in current column"), # `\`
273
+ ("vertical_line", "select_row_expr", "Select rows with expression"), # `|`
274
+ ("right_curly_bracket", "next_selected_row", "Go to next selected row"), # `}`
275
+ ("left_curly_bracket", "previous_selected_row", "Go to previous selected row"), # `{`
276
+ ("apostrophe", "toggle_row_selection", "Toggle row selection"), # `'`
277
+ ("t", "toggle_selections", "Toggle all row selections"),
278
+ ("T", "clear_selections_and_matches", "Clear selections"),
279
+ # Find & Replace
280
+ ("slash", "find_cursor_value", "Find in column with cursor value"), # `/`
281
+ ("question_mark", "find_expr", "Find in column with expression"), # `?`
282
+ ("semicolon", "find_cursor_value('global')", "Global find with cursor value"), # `;`
283
+ ("colon", "find_expr('global')", "Global find with expression"), # `:`
284
+ ("n", "next_match", "Go to next match"), # `n`
285
+ ("N", "previous_match", "Go to previous match"), # `Shift+n`
286
+ ("r", "replace", "Replace in column"), # `r`
287
+ ("R", "replace_global", "Replace global"), # `Shift+R`
288
+ # Delete
289
+ ("delete", "clear_cell", "Clear cell"),
290
+ ("minus", "delete_column", "Delete column"), # `-`
291
+ ("x", "delete_row", "Delete row"),
292
+ ("X", "delete_row_and_below", "Delete row and those below"),
293
+ ("ctrl+x", "delete_row_and_up", "Delete row and those up"),
294
+ # Duplicate
295
+ ("d", "duplicate_column", "Duplicate column"),
296
+ ("D", "duplicate_row", "Duplicate row"),
297
+ # Edit
298
+ ("e", "edit_cell", "Edit cell"),
299
+ ("E", "edit_column", "Edit column"),
300
+ # Add
301
+ ("a", "add_column", "Add column"),
302
+ ("A", "add_column_expr", "Add column with expression"),
303
+ ("at", "add_link_column", "Add a link column"), # `@`
304
+ # Reorder
305
+ ("shift+left", "move_column_left", "Move column left"),
306
+ ("shift+right", "move_column_right", "Move column right"),
307
+ ("shift+up", "move_row_up", "Move row up"),
308
+ ("shift+down", "move_row_down", "Move row down"),
309
+ # Type Casting
310
+ ("number_sign", "cast_column_dtype('pl.Int64')", "Cast column dtype to integer"), # `#`
311
+ ("percent_sign", "cast_column_dtype('pl.Float64')", "Cast column dtype to float"), # `%`
312
+ ("exclamation_mark", "cast_column_dtype('pl.Boolean')", "Cast column dtype to bool"), # `!`
313
+ ("dollar_sign", "cast_column_dtype('pl.String')", "Cast column dtype to string"), # `$`
314
+ # Sql
315
+ ("l", "simple_sql", "Simple SQL interface"),
316
+ ("L", "advanced_sql", "Advanced SQL interface"),
317
+ ]
318
+ # fmt: on
319
+
320
+ # Track if dataframe has unsaved changes
321
+ dirty: reactive[bool] = reactive(False)
322
+
323
+ def __init__(self, df: pl.DataFrame, filename: str = "", tabname: str = "", **kwargs) -> None:
324
+ """Initialize the DataFrameTable with a dataframe and manage all state.
325
+
326
+ Sets up the table widget with display configuration, loads the dataframe, and
327
+ initializes all state tracking variables for row/column operations.
328
+
329
+ Args:
330
+ df: The Polars DataFrame to display and edit.
331
+ filename: Optional source filename for the data (used in save operations). Defaults to "".
332
+ tabname: Optional name for the tab displaying this dataframe. Defaults to "".
333
+ **kwargs: Additional keyword arguments passed to the parent DataTable widget.
334
+ """
335
+ super().__init__(**kwargs)
336
+
337
+ # DataFrame state
338
+ self.dataframe = add_rid_column(df) # Original dataframe
339
+ self.df = self.dataframe # Internal/working dataframe
340
+ self.filename = filename or "untitled.csv" # Current filename
341
+ self.tabname = tabname or Path(filename).stem # Tab name
342
+
343
+ # In view mode, this is the copy of self.df
344
+ self.df_view = None
345
+
346
+ # Pagination & Loading
347
+ self.BATCH_SIZE = max((self.app.size.height // 100 + 1) * 100, 100)
348
+ self.loaded_rows = 0 # Track how many rows are currently loaded
349
+ self.loaded_ranges: list[tuple[int, int]] = [] # List of (start, end) row indices that are loaded
350
+
351
+ # State tracking (all 0-based indexing)
352
+ self.hidden_columns: set[str] = set() # Set of hidden column names
353
+ self.selected_rows: set[int] = set() # Track selected rows by RID
354
+ self.sorted_columns: dict[str, bool] = {} # col_name -> descending
355
+ self.matches: dict[int, set[str]] = defaultdict(set) # Track search matches: RID -> set of col_names
356
+
357
+ # Freezing
358
+ self.fixed_rows = 0 # Number of fixed rows
359
+ self.fixed_columns = 0 # Number of fixed columns
360
+
361
+ # History stack for undo
362
+ self.histories_undo: deque[History] = deque()
363
+ # History stack for redo
364
+ self.histories_redo: deque[History] = deque()
365
+
366
+ # Whether to use thousand separator for numeric display
367
+ self.thousand_separator = False
368
+
369
+ # Set of columns expanded to full width
370
+ self.expanded_columns: set[str] = set()
371
+
372
+ # Whether to show internal row index column
373
+ self.show_rid = False
374
+
375
+ @property
376
+ def cursor_key(self) -> CellKey:
377
+ """Get the current cursor position as a CellKey.
378
+
379
+ Returns:
380
+ CellKey: A CellKey object representing the current cursor position.
381
+ """
382
+ return self.coordinate_to_cell_key(self.cursor_coordinate)
383
+
384
+ @property
385
+ def cursor_row_key(self) -> RowKey:
386
+ """Get the current cursor row as a RowKey.
387
+
388
+ Returns:
389
+ RowKey: The row key for the row containing the cursor.
390
+ """
391
+ return self.cursor_key.row_key
392
+
393
+ @property
394
+ def cursor_col_key(self) -> ColumnKey:
395
+ """Get the current cursor column as a ColumnKey.
396
+
397
+ Returns:
398
+ ColumnKey: The column key for the column containing the cursor.
399
+ """
400
+ return self.cursor_key.column_key
401
+
402
+ @property
403
+ def cursor_row_idx(self) -> int:
404
+ """Get the current cursor row index (0-based) as in dataframe.
405
+
406
+ Returns:
407
+ int: The 0-based row index of the cursor position.
408
+
409
+ Raises:
410
+ AssertionError: If the cursor row index is out of bounds.
411
+ """
412
+ ridx = int(self.cursor_row_key.value)
413
+ assert 0 <= ridx < len(self.df), "Cursor row index is out of bounds"
414
+ return ridx
415
+
416
+ @property
417
+ def cursor_col_idx(self) -> int:
418
+ """Get the current cursor column index (0-based) as in dataframe.
419
+
420
+ Returns:
421
+ int: The 0-based column index of the cursor position.
422
+
423
+ Raises:
424
+ AssertionError: If the cursor column index is out of bounds.
425
+ """
426
+ cidx = self.df.columns.index(self.cursor_col_key.value)
427
+ assert 0 <= cidx < len(self.df.columns), "Cursor column index is out of bounds"
428
+ return cidx
429
+
430
+ @property
431
+ def cursor_col_name(self) -> str:
432
+ """Get the current cursor column name as in dataframe.
433
+
434
+ Returns:
435
+ str: The name of the column containing the cursor.
436
+ """
437
+ return self.cursor_col_key.value
438
+
439
+ @property
440
+ def cursor_value(self) -> Any:
441
+ """Get the current cursor cell value in the dataframe.
442
+
443
+ Returns:
444
+ Any: The value of the cell at the cursor position.
445
+ """
446
+ return self.df.item(self.cursor_row_idx, self.cursor_col_idx)
447
+
448
+ @property
449
+ def ordered_selected_rows(self) -> list[int]:
450
+ """Get the list of selected row indices in order.
451
+
452
+ Returns:
453
+ list[int]: A list of 0-based row indices that are currently selected.
454
+ """
455
+ return [ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.selected_rows]
456
+
457
+ @property
458
+ def ordered_matches(self) -> list[tuple[int, int]]:
459
+ """Get the list of matched cell coordinates in order.
460
+
461
+ Returns:
462
+ list[tuple[int, int]]: A list of (row_idx, col_idx) tuples for matched cells.
463
+ """
464
+ matches = []
465
+
466
+ # Uniq columns
467
+ cols_to_check = set()
468
+ for cols in self.matches.values():
469
+ cols_to_check.update(cols)
470
+
471
+ # Ordered columns
472
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_check}
473
+
474
+ for ridx, rid in enumerate(self.df[RID]):
475
+ if cols := self.matches.get(rid):
476
+ for cidx, col in cidx2col.items():
477
+ if col in cols:
478
+ matches.append((ridx, cidx))
479
+
480
+ return matches
481
+
482
+ def _round_to_nearest_hundreds(self, num: int):
483
+ """Round a number to the nearest hundreds.
484
+
485
+ Args:
486
+ num: The number to round.
487
+ """
488
+ return round_to_nearest_hundreds(num, N=self.BATCH_SIZE)
489
+
490
+ def get_row_idx(self, row_key: RowKey) -> int:
491
+ """Get the row index for a given table row key.
492
+
493
+ Args:
494
+ row_key: Row key as string.
495
+ """
496
+ return super().get_row_index(row_key)
497
+
498
+ def get_row_key(self, row_idx: int) -> RowKey:
499
+ """Get the row key for a given table row index.
500
+
501
+ Args:
502
+ row_idx: Row index in the table display.
503
+
504
+ Returns:
505
+ Corresponding row key as string.
506
+ """
507
+ return self._row_locations.get_key(row_idx)
508
+
509
+ def get_col_idx(self, col_key: ColumnKey) -> int:
510
+ """Get the column index for a given table column key.
511
+
512
+ Args:
513
+ col_key: Column key as string.
514
+
515
+ Returns:
516
+ Corresponding column index as int.
517
+ """
518
+ return super().get_column_index(col_key)
519
+
520
+ def get_col_key(self, col_idx: int) -> ColumnKey:
521
+ """Get the column key for a given table column index.
522
+
523
+ Args:
524
+ col_idx: Column index in the table display.
525
+
526
+ Returns:
527
+ Corresponding column key as string.
528
+ """
529
+ return self._column_locations.get_key(col_idx)
530
+
531
+ def _should_highlight(self, cursor: Coordinate, target_cell: Coordinate, type_of_cursor: CursorType) -> bool:
532
+ """Determine if the given cell should be highlighted because of the cursor.
533
+
534
+ In "cell" mode, also highlights the row and column headers. This overrides the default
535
+ behavior of DataTable which only highlights the exact cell under the cursor.
536
+
537
+ Args:
538
+ cursor: The current position of the cursor.
539
+ target_cell: The cell we're checking for the need to highlight.
540
+ type_of_cursor: The type of cursor that is currently active ("cell", "row", or "column").
541
+
542
+ Returns:
543
+ bool: True if the target cell should be highlighted, False otherwise.
544
+ """
545
+ if type_of_cursor == "cell":
546
+ # Return true if the cursor is over the target cell
547
+ # This includes the case where the cursor is in the same row or column
548
+ return (
549
+ cursor == target_cell
550
+ or (target_cell.row == -1 and target_cell.column == cursor.column)
551
+ or (target_cell.column == -1 and target_cell.row == cursor.row)
552
+ )
553
+ elif type_of_cursor == "row":
554
+ cursor_row, _ = cursor
555
+ cell_row, _ = target_cell
556
+ return cursor_row == cell_row
557
+ elif type_of_cursor == "column":
558
+ _, cursor_column = cursor
559
+ _, cell_column = target_cell
560
+ return cursor_column == cell_column
561
+ else:
562
+ return False
563
+
564
+ def watch_cursor_coordinate(self, old_coordinate: Coordinate, new_coordinate: Coordinate) -> None:
565
+ """Handle cursor position changes and refresh highlighting.
566
+
567
+ This method is called by Textual whenever the cursor moves. It refreshes cells that need
568
+ to change their highlight state. Also emits CellSelected message when cursor type is "cell"
569
+ for keyboard navigation only (mouse clicks already trigger it).
570
+
571
+ Args:
572
+ old_coordinate: The previous cursor coordinate.
573
+ new_coordinate: The new cursor coordinate.
574
+ """
575
+ if old_coordinate != new_coordinate:
576
+ # Emit CellSelected message for cell cursor type (keyboard navigation only)
577
+ # Only emit if this is from keyboard navigation (flag is True when from keyboard)
578
+ if self.cursor_type == "cell" and getattr(self, "_from_keyboard", False):
579
+ self._from_keyboard = False # Reset flag
580
+ try:
581
+ self._post_selected_message()
582
+ except CellDoesNotExist:
583
+ # This could happen when after calling clear(), the old coordinate is invalid
584
+ pass
585
+
586
+ # For cell cursor type, refresh old and new row/column headers
587
+ if self.cursor_type == "cell":
588
+ old_row, old_col = old_coordinate
589
+ new_row, new_col = new_coordinate
590
+
591
+ # Refresh entire column (not just header) to ensure proper highlighting
592
+ self.refresh_column(old_col)
593
+ self.refresh_column(new_col)
594
+
595
+ # Refresh entire row (not just header) to ensure proper highlighting
596
+ self.refresh_row(old_row)
597
+ self.refresh_row(new_row)
598
+ elif self.cursor_type == "row":
599
+ self.refresh_row(old_coordinate.row)
600
+ self._highlight_row(new_coordinate.row)
601
+ elif self.cursor_type == "column":
602
+ self.refresh_column(old_coordinate.column)
603
+ self._highlight_column(new_coordinate.column)
604
+
605
+ # Handle scrolling if needed
606
+ if self._require_update_dimensions:
607
+ self.call_after_refresh(self._scroll_cursor_into_view)
608
+ else:
609
+ self._scroll_cursor_into_view()
610
+
611
+ def watch_dirty(self, old_dirty: bool, new_dirty: bool) -> None:
612
+ """Watch for changes to the dirty state and update tab title.
613
+
614
+ When new_dirty is True, set the tab color to red.
615
+ When new_dirty is False, remove the red color.
616
+
617
+ Args:
618
+ old_dirty: The old dirty state.
619
+ new_dirty: The new dirty state.
620
+ """
621
+ if old_dirty == new_dirty:
622
+ return # No change
623
+
624
+ # Find the corresponding ContentTab
625
+ content_tab = self.app.query_one(f"#--content-tab-{self.id}")
626
+ if content_tab:
627
+ if new_dirty:
628
+ content_tab.add_class("dirty")
629
+ else:
630
+ content_tab.remove_class("dirty")
631
+
632
+ def move_cursor_to(self, ridx: int | None = None, cidx: int | None = None) -> None:
633
+ """Move cursor based on the dataframe indices.
634
+
635
+ Args:
636
+ ridx: Row index (0-based) in the dataframe.
637
+ cidx: Column index (0-based) in the dataframe.
638
+ """
639
+ # Ensure the target row is loaded
640
+ start, stop = self._round_to_nearest_hundreds(ridx)
641
+ self.load_rows_range(start, stop)
642
+
643
+ row_key = self.cursor_row_key if ridx is None else str(ridx)
644
+ col_key = self.cursor_col_key if cidx is None else self.df.columns[cidx]
645
+ row_idx, col_idx = self.get_cell_coordinate(row_key, col_key)
646
+ self.move_cursor(row=row_idx, column=col_idx)
647
+
648
+ def on_mount(self) -> None:
649
+ """Initialize table display when the widget is mounted.
650
+
651
+ Called by Textual when the widget is first added to the display tree.
652
+ Currently a placeholder as table setup is deferred until first use.
653
+ """
654
+ # self.setup_table()
655
+ pass
656
+
657
+ def on_key(self, event) -> None:
658
+ """Handle key press events for pagination.
659
+
660
+ Args:
661
+ event: The key event object.
662
+ """
663
+ if event.key == "up":
664
+ # Let the table handle the navigation first
665
+ self.load_rows_up()
666
+ elif event.key == "down":
667
+ # Let the table handle the navigation first
668
+ self.load_rows_down()
669
+
670
+ def on_click(self, event: Click) -> None:
671
+ """Handle mouse click events on the table.
672
+
673
+ Supports double-click editing of cells and renaming of column headers.
674
+
675
+ Args:
676
+ event: The click event containing row and column information.
677
+ """
678
+ if self.cursor_type == "cell" and event.chain > 1: # only on double-click or more
679
+ try:
680
+ row_idx = event.style.meta["row"]
681
+ col_idx = event.style.meta["column"]
682
+ except (KeyError, TypeError):
683
+ return # Unable to get row/column info
684
+
685
+ # header row
686
+ if row_idx == -1:
687
+ self.do_rename_column(col_idx)
688
+ else:
689
+ self.do_edit_cell()
690
+
691
+ # Action handlers for BINDINGS
692
+ def action_jump_top(self) -> None:
693
+ """Jump to the top of the table."""
694
+ self.do_jump_top()
695
+
696
+ def action_jump_bottom(self) -> None:
697
+ """Jump to the bottom of the table."""
698
+ self.do_jump_bottom()
699
+
700
+ def action_page_up(self) -> None:
701
+ """Move the cursor one page up."""
702
+ self.do_page_up()
703
+
704
+ def action_page_down(self) -> None:
705
+ """Move the cursor one page down."""
706
+ self.do_page_down()
707
+
708
+ def action_view_row_detail(self) -> None:
709
+ """View details of the current row."""
710
+ self.do_view_row_detail()
711
+
712
+ def action_delete_column(self) -> None:
713
+ """Delete the current column."""
714
+ self.do_delete_column()
715
+
716
+ def action_hide_column(self) -> None:
717
+ """Hide the current column."""
718
+ self.do_hide_column()
719
+
720
+ def action_expand_column(self) -> None:
721
+ """Expand the current column to its full width."""
722
+ self.do_expand_column()
723
+
724
+ def action_toggle_rid(self) -> None:
725
+ """Toggle the internal row index column visibility."""
726
+ self.do_toggle_rid()
727
+
728
+ def action_show_hidden_rows_columns(self) -> None:
729
+ """Show all hidden rows/columns."""
730
+ self.do_show_hidden_rows_columns()
731
+
732
+ def action_sort_ascending(self) -> None:
733
+ """Sort by current column in ascending order."""
734
+ self.do_sort_by_column(descending=False)
735
+
736
+ def action_sort_descending(self) -> None:
737
+ """Sort by current column in descending order."""
738
+ self.do_sort_by_column(descending=True)
739
+
740
+ def action_save_to_file(self) -> None:
741
+ """Save the current dataframe to a file."""
742
+ self.do_save_to_file()
743
+
744
+ def action_show_frequency(self) -> None:
745
+ """Show frequency distribution for the current column."""
746
+ self.do_show_frequency()
747
+
748
+ def action_show_statistics(self, scope: str = "column") -> None:
749
+ """Show statistics for the current column or entire dataframe.
750
+
751
+ Args:
752
+ scope: Either "column" for current column stats or "dataframe" for all columns.
753
+ """
754
+ self.do_show_statistics(scope)
755
+
756
+ def action_metadata_shape(self) -> None:
757
+ """Show metadata about the dataframe (row and column counts)."""
758
+ self.do_metadata_shape()
759
+
760
+ def action_metadata_column(self) -> None:
761
+ """Show metadata for the current column."""
762
+ self.do_metadata_column()
763
+
764
+ def action_view_rows(self) -> None:
765
+ """View rows by current cell value."""
766
+ self.do_view_rows()
767
+
768
+ def action_view_rows_expr(self) -> None:
769
+ """Open the advanced filter screen."""
770
+ self.do_view_rows_expr()
771
+
772
+ def action_edit_cell(self) -> None:
773
+ """Edit the current cell."""
774
+ self.do_edit_cell()
775
+
776
+ def action_edit_column(self) -> None:
777
+ """Edit the entire current column with an expression."""
778
+ self.do_edit_column()
779
+
780
+ def action_add_column(self) -> None:
781
+ """Add an empty column after the current column."""
782
+ self.do_add_column()
783
+
784
+ def action_add_column_expr(self) -> None:
785
+ """Add a new column with optional expression after the current column."""
786
+ self.do_add_column_expr()
787
+
788
+ def action_add_link_column(self) -> None:
789
+ """Open AddLinkScreen to create a new link column from a Polars expression."""
790
+ self.do_add_link_column()
791
+
792
+ def action_rename_column(self) -> None:
793
+ """Rename the current column."""
794
+ self.do_rename_column()
795
+
796
+ def action_clear_cell(self) -> None:
797
+ """Clear the current cell (set to None)."""
798
+ self.do_clear_cell()
799
+
800
+ def action_select_row(self) -> None:
801
+ """Select rows with cursor value in the current column."""
802
+ self.do_select_row()
803
+
804
+ def action_select_row_expr(self) -> None:
805
+ """Select rows by expression."""
806
+ self.do_select_row_expr()
807
+
808
+ def action_find_cursor_value(self, scope="column") -> None:
809
+ """Find by cursor value.
810
+
811
+ Args:
812
+ scope: "column" to find in current column, "global" to find across all columns.
813
+ """
814
+ self.do_find_cursor_value(scope=scope)
815
+
816
+ def action_find_expr(self, scope="column") -> None:
817
+ """Find by expression.
818
+
819
+ Args:
820
+ scope: "column" to find in current column, "global" to find across all columns.
821
+ """
822
+ self.do_find_expr(scope=scope)
823
+
824
+ def action_replace(self) -> None:
825
+ """Replace values in current column."""
826
+ self.do_replace()
827
+
828
+ def action_replace_global(self) -> None:
829
+ """Replace values across all columns."""
830
+ self.do_replace_global()
831
+
832
+ def action_toggle_row_selection(self) -> None:
833
+ """Toggle selection for the current row."""
834
+ self.do_toggle_row_selection()
835
+
836
+ def action_toggle_selections(self) -> None:
837
+ """Toggle all row selections."""
838
+ self.do_toggle_selections()
839
+
840
+ def action_filter_rows(self) -> None:
841
+ """Filter to show only selected rows."""
842
+ self.do_filter_rows()
843
+
844
+ def action_delete_row(self) -> None:
845
+ """Delete the current row."""
846
+ self.do_delete_row()
847
+
848
+ def action_delete_row_and_below(self) -> None:
849
+ """Delete the current row and those below."""
850
+ self.do_delete_row(more="below")
851
+
852
+ def action_delete_row_and_up(self) -> None:
853
+ """Delete the current row and those above."""
854
+ self.do_delete_row(more="above")
855
+
856
+ def action_duplicate_column(self) -> None:
857
+ """Duplicate the current column."""
858
+ self.do_duplicate_column()
859
+
860
+ def action_duplicate_row(self) -> None:
861
+ """Duplicate the current row."""
862
+ self.do_duplicate_row()
863
+
864
+ def action_undo(self) -> None:
865
+ """Undo the last action."""
866
+ self.do_undo()
867
+
868
+ def action_redo(self) -> None:
869
+ """Redo the last undone action."""
870
+ self.do_redo()
871
+
872
+ def action_reset(self) -> None:
873
+ """Reset to the initial state."""
874
+ self.do_reset()
875
+
876
+ def action_move_column_left(self) -> None:
877
+ """Move the current column to the left."""
878
+ self.do_move_column("left")
879
+
880
+ def action_move_column_right(self) -> None:
881
+ """Move the current column to the right."""
882
+ self.do_move_column("right")
883
+
884
+ def action_move_row_up(self) -> None:
885
+ """Move the current row up."""
886
+ self.do_move_row("up")
887
+
888
+ def action_move_row_down(self) -> None:
889
+ """Move the current row down."""
890
+ self.do_move_row("down")
891
+
892
+ def action_clear_selections_and_matches(self) -> None:
893
+ """Clear all row selections and matches."""
894
+ self.do_clear_selections_and_matches()
895
+
896
+ def action_cycle_cursor_type(self) -> None:
897
+ """Cycle through cursor types."""
898
+ self.do_cycle_cursor_type()
899
+
900
+ def action_freeze_row_column(self) -> None:
901
+ """Open the freeze screen."""
902
+ self.do_freeze_row_column()
903
+
904
+ def action_toggle_row_labels(self) -> None:
905
+ """Toggle row labels visibility."""
906
+ self.show_row_labels = not self.show_row_labels
907
+ # status = "shown" if self.show_row_labels else "hidden"
908
+ # self.notify(f"Row labels {status}", title="Labels")
909
+
910
+ def action_cast_column_dtype(self, dtype: str | pl.DataType) -> None:
911
+ """Cast the current column to a different data type."""
912
+ self.do_cast_column_dtype(dtype)
913
+
914
+ def action_copy_cell(self) -> None:
915
+ """Copy the current cell to clipboard."""
916
+ ridx = self.cursor_row_idx
917
+ cidx = self.cursor_col_idx
918
+
919
+ try:
920
+ cell_str = str(self.df.item(ridx, cidx))
921
+ self.do_copy_to_clipboard(cell_str, f"Copied: [$success]{cell_str[:50]}[/]")
922
+ except IndexError:
923
+ self.notify(
924
+ f"Error copying cell ([$error]{ridx}[/], [$accent]{cidx}[/])",
925
+ title="Clipboard",
926
+ severity="error",
927
+ timeout=10,
928
+ )
929
+
930
+ def action_copy_column(self) -> None:
931
+ """Copy the current column to clipboard (one value per line)."""
932
+ col_name = self.cursor_col_name
933
+
934
+ try:
935
+ # Get all values in the column and join with newlines
936
+ col_values = [str(val) for val in self.df[col_name].to_list()]
937
+ col_str = "\n".join(col_values)
938
+
939
+ self.do_copy_to_clipboard(
940
+ col_str,
941
+ f"Copied [$accent]{len(col_values)}[/] values from column [$success]{col_name}[/]",
942
+ )
943
+ except (FileNotFoundError, IndexError):
944
+ self.notify(f"Error copying column [$error]{col_name}[/]", title="Clipboard", severity="error", timeout=10)
945
+
946
+ def action_copy_row(self) -> None:
947
+ """Copy the current row to clipboard (values separated by tabs)."""
948
+ ridx = self.cursor_row_idx
949
+
950
+ try:
951
+ # Get all values in the row and join with tabs
952
+ row_values = [str(val) for val in self.df.row(ridx)]
953
+ row_str = "\t".join(row_values)
954
+
955
+ self.do_copy_to_clipboard(
956
+ row_str,
957
+ f"Copied row [$accent]{ridx + 1}[/] with [$success]{len(row_values)}[/] values",
958
+ )
959
+ except (FileNotFoundError, IndexError):
960
+ self.notify(f"Error copying row [$error]{ridx}[/]", title="Clipboard", severity="error", timeout=10)
961
+
962
+ def action_show_thousand_separator(self) -> None:
963
+ """Toggle thousand separator for numeric display."""
964
+ self.thousand_separator = not self.thousand_separator
965
+ self.setup_table()
966
+ # status = "enabled" if self.thousand_separator else "disabled"
967
+ # self.notify(f"Thousand separator {status}", title="Display")
968
+
969
+ def action_next_match(self) -> None:
970
+ """Go to the next matched cell."""
971
+ self.do_next_match()
972
+
973
+ def action_previous_match(self) -> None:
974
+ """Go to the previous matched cell."""
975
+ self.do_previous_match()
976
+
977
+ def action_next_selected_row(self) -> None:
978
+ """Go to the next selected row."""
979
+ self.do_next_selected_row()
980
+
981
+ def action_previous_selected_row(self) -> None:
982
+ """Go to the previous selected row."""
983
+ self.do_previous_selected_row()
984
+
985
+ def action_simple_sql(self) -> None:
986
+ """Open the SQL interface screen."""
987
+ self.do_simple_sql()
988
+
989
+ def action_advanced_sql(self) -> None:
990
+ """Open the advanced SQL interface screen."""
991
+ self.do_advanced_sql()
992
+
993
+ def on_mouse_scroll_up(self, event) -> None:
994
+ """Load more rows when scrolling up with mouse."""
995
+ self.load_rows_up()
996
+
997
+ def on_mouse_scroll_down(self, event) -> None:
998
+ """Load more rows when scrolling down with mouse."""
999
+ self.load_rows_down()
1000
+
1001
+ # Setup & Loading
1002
+ def reset_df(self, new_df: pl.DataFrame, dirty: bool = True) -> None:
1003
+ """Reset the dataframe to a new one and refresh the table.
1004
+
1005
+ Args:
1006
+ new_df: The new Polars DataFrame to set.
1007
+ dirty: Whether to mark the table as dirty (unsaved changes). Defaults to True.
1008
+ """
1009
+ # Set new dataframe and reset table
1010
+ self.df = new_df
1011
+ self.loaded_rows = 0
1012
+ self.hidden_columns = set()
1013
+ self.selected_rows = set()
1014
+ self.sorted_columns = {}
1015
+ self.fixed_rows = 0
1016
+ self.fixed_columns = 0
1017
+ self.matches = defaultdict(set)
1018
+ # self.histories.clear()
1019
+ # self.histories2.clear()
1020
+ self.dirty = dirty # Mark as dirty since data changed
1021
+
1022
+ def setup_table(self) -> None:
1023
+ """Setup the table for display.
1024
+
1025
+ Row keys are 0-based indices, which map directly to dataframe row indices.
1026
+ Column keys are header names from the dataframe.
1027
+ """
1028
+ self.loaded_rows = 0
1029
+ self.loaded_ranges.clear()
1030
+ self.show_row_labels = True
1031
+
1032
+ # Save current cursor position before clearing
1033
+ row_idx, col_idx = self.cursor_coordinate
1034
+
1035
+ self.setup_columns()
1036
+ self.load_rows_range(0, self.BATCH_SIZE) # Load initial rows
1037
+
1038
+ # Restore cursor position
1039
+ if row_idx < len(self.rows) and col_idx < len(self.columns):
1040
+ self.move_cursor(row=row_idx, column=col_idx)
1041
+
1042
+ def determine_column_widths(self) -> dict[str, int]:
1043
+ """Determine optimal width for each column based on data type and content.
1044
+
1045
+ For String columns:
1046
+ - Minimum width: length of column label
1047
+ - Ideal width: maximum width of all cells in the column
1048
+ - If space constrained: find appropriate width smaller than maximum
1049
+
1050
+ For non-String columns:
1051
+ - Return None to let Textual auto-determine width
1052
+
1053
+ Returns:
1054
+ dict[str, int]: Mapping of column name to width (None for auto-sizing columns).
1055
+ """
1056
+ col_widths, col_label_widths = {}, {}
1057
+
1058
+ # Get available width for the table (with some padding for borders/scrollbar)
1059
+ available_width = self.scrollable_content_region.width
1060
+
1061
+ # Calculate how much width we need for string columns first
1062
+ string_cols = [col for col, dtype in zip(self.df.columns, self.df.dtypes) if dtype == pl.String]
1063
+
1064
+ # No string columns, let TextualDataTable auto-size all columns
1065
+ if not string_cols:
1066
+ return col_widths
1067
+
1068
+ # Sample a reasonable number of rows to calculate widths (don't scan entire dataframe)
1069
+ sample_size = min(self.BATCH_SIZE, len(self.df))
1070
+ sample_lf = self.df.lazy().slice(0, sample_size)
1071
+
1072
+ # Determine widths for each column
1073
+ for col, dtype in zip(self.df.columns, self.df.dtypes):
1074
+ if col in self.hidden_columns:
1075
+ continue
1076
+
1077
+ # Get column label width
1078
+ # Add padding for sort indicators if any
1079
+ label_width = measure(self.app.console, col, 1) + 2
1080
+ col_label_widths[col] = label_width
1081
+
1082
+ # Let Textual auto-size for non-string columns and already expanded columns
1083
+ if dtype != pl.String or col in self.expanded_columns:
1084
+ available_width -= label_width
1085
+ continue
1086
+
1087
+ try:
1088
+ # Get sample values from the column
1089
+ sample_values = sample_lf.select(col).collect().get_column(col).drop_nulls().to_list()
1090
+ if any(val.startswith(("https://", "http://")) for val in sample_values):
1091
+ continue # Skip link columns so they can auto-size and be clickable
1092
+
1093
+ # Find maximum width in sample
1094
+ max_cell_width = max(
1095
+ (measure(self.app.console, val, 1) for val in sample_values),
1096
+ default=label_width,
1097
+ )
1098
+
1099
+ # Set column width to max of label and sampled data (capped at reasonable max)
1100
+ max_width = max(label_width, max_cell_width)
1101
+ except Exception as e:
1102
+ # If any error, let Textual auto-size
1103
+ max_width = label_width
1104
+ self.log(f"Error determining width for column '{col}': {e}")
1105
+
1106
+ col_widths[col] = max_width
1107
+ available_width -= max_width
1108
+
1109
+ # If there's no more available width, auto-size remaining columns
1110
+ if available_width < 0:
1111
+ for col in col_widths:
1112
+ if col_widths[col] > STRING_WIDTH_CAP and col_label_widths[col] < STRING_WIDTH_CAP:
1113
+ col_widths[col] = STRING_WIDTH_CAP # Cap string columns
1114
+
1115
+ return col_widths
1116
+
1117
+ def setup_columns(self) -> None:
1118
+ """Clear table and setup columns.
1119
+
1120
+ Column keys are header names from the dataframe.
1121
+ Column labels contain column names from the dataframe, with sort indicators if applicable.
1122
+ """
1123
+ self.clear(columns=True)
1124
+
1125
+ # Get optimal column widths
1126
+ column_widths = self.determine_column_widths()
1127
+
1128
+ # Add columns with justified headers
1129
+ for col, dtype in zip(self.df.columns, self.df.dtypes):
1130
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1131
+ continue # Skip hidden columns and internal RID
1132
+ for idx, c in enumerate(self.sorted_columns, 1):
1133
+ if c == col:
1134
+ # Add sort indicator to column header
1135
+ descending = self.sorted_columns[col]
1136
+ sort_indicator = (
1137
+ f" ▼{SUBSCRIPT_DIGITS.get(idx, '')}" if descending else f" ▲{SUBSCRIPT_DIGITS.get(idx, '')}"
1138
+ )
1139
+ cell_value = col + sort_indicator
1140
+ break
1141
+ else: # No break occurred, so column is not sorted
1142
+ cell_value = col
1143
+
1144
+ # Get the width for this column (None means auto-size)
1145
+ width = column_widths.get(col)
1146
+
1147
+ self.add_column(Text(cell_value, justify=DtypeConfig(dtype).justify), key=col, width=width)
1148
+
1149
+ def _calculate_load_range(self, start: int, stop: int) -> list[tuple[int, int]]:
1150
+ """Calculate the actual ranges to load, accounting for already-loaded ranges.
1151
+
1152
+ Handles complex cases where a loaded range is fully contained within the requested
1153
+ range (creating head and tail segments to load). All overlapping/adjacent loaded
1154
+ ranges are merged first to minimize gaps.
1155
+
1156
+ Args:
1157
+ start: Requested start index (0-based).
1158
+ stop: Requested stop index (0-based, exclusive).
1159
+
1160
+ Returns:
1161
+ List of (actual_start, actual_stop) tuples to load. Empty list if the entire
1162
+ requested range is already loaded.
1163
+
1164
+ Example:
1165
+ If loaded ranges are [(150, 250)] and requesting (100, 300):
1166
+ - Returns [(100, 150), (250, 300)] to load head and tail
1167
+ If loaded ranges are [(0, 100), (100, 200)] and requesting (50, 150):
1168
+ - After merging, loaded_ranges becomes [(0, 200)]
1169
+ - Returns [] (already fully loaded)
1170
+ """
1171
+ if not self.loaded_ranges:
1172
+ return [(start, stop)]
1173
+
1174
+ # Sort loaded ranges by start index
1175
+ sorted_ranges = sorted(self.loaded_ranges)
1176
+
1177
+ # Merge overlapping/adjacent ranges
1178
+ merged = []
1179
+ for range_start, range_stop in sorted_ranges:
1180
+ # Fully covered, no need to load anything
1181
+ if range_start <= start and range_stop >= stop:
1182
+ return []
1183
+ # Overlapping or adjacent: merge
1184
+ elif merged and range_start <= merged[-1][1]:
1185
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1186
+ else:
1187
+ merged.append((range_start, range_stop))
1188
+
1189
+ self.loaded_ranges = merged
1190
+
1191
+ # Calculate ranges to load by finding gaps in the merged ranges
1192
+ ranges_to_load = []
1193
+ current_pos = start
1194
+
1195
+ for range_start, range_stop in merged:
1196
+ # If there's a gap before this loaded range, add it to load list
1197
+ if current_pos < range_start and current_pos < stop:
1198
+ gap_end = min(range_start, stop)
1199
+ ranges_to_load.append((current_pos, gap_end))
1200
+ current_pos = range_stop
1201
+ elif current_pos >= range_stop:
1202
+ # Already moved past this loaded range
1203
+ continue
1204
+ else:
1205
+ # Current position is inside this loaded range, skip past it
1206
+ current_pos = max(current_pos, range_stop)
1207
+
1208
+ # If there's remaining range after all loaded ranges, add it
1209
+ if current_pos < stop:
1210
+ ranges_to_load.append((current_pos, stop))
1211
+
1212
+ return ranges_to_load
1213
+
1214
+ def _merge_loaded_ranges(self) -> None:
1215
+ """Merge adjacent and overlapping ranges in self.loaded_ranges.
1216
+
1217
+ Ranges like (0, 100) and (100, 200) are merged into (0, 200).
1218
+ """
1219
+ if len(self.loaded_ranges) <= 1:
1220
+ return
1221
+
1222
+ # Sort by start index
1223
+ sorted_ranges = sorted(self.loaded_ranges)
1224
+
1225
+ # Merge overlapping/adjacent ranges
1226
+ merged = [sorted_ranges[0]]
1227
+ for range_start, range_stop in sorted_ranges[1:]:
1228
+ # Overlapping or adjacent: merge
1229
+ if range_start <= merged[-1][1]:
1230
+ merged[-1] = (merged[-1][0], max(merged[-1][1], range_stop))
1231
+ else:
1232
+ merged.append((range_start, range_stop))
1233
+
1234
+ self.loaded_ranges = merged
1235
+
1236
+ def _find_insert_position_for_row(self, ridx: int) -> int:
1237
+ """Find the correct table position to insert a row with the given dataframe index.
1238
+
1239
+ In the table display, rows are ordered by their dataframe index, regardless of
1240
+ the internal row keys. This method finds where a row should be inserted based on
1241
+ its dataframe index and the indices of already-loaded rows.
1242
+
1243
+ Args:
1244
+ ridx: The 0-based dataframe row index.
1245
+
1246
+ Returns:
1247
+ The 0-based table position where the row should be inserted.
1248
+ """
1249
+ # Count how many already-loaded rows have lower dataframe indices
1250
+ # Iterate through loaded rows instead of iterating 0..ridx for efficiency
1251
+ insert_pos = 0
1252
+ for row_key in self._row_locations:
1253
+ loaded_ridx = int(row_key.value)
1254
+ if loaded_ridx < ridx:
1255
+ insert_pos += 1
1256
+
1257
+ return insert_pos
1258
+
1259
+ def load_rows_segment(self, segment_start: int, segment_stop: int) -> int:
1260
+ """Load a single contiguous segment of rows into the table.
1261
+
1262
+ This is the core loading logic that inserts rows at correct positions,
1263
+ respecting visibility and selection states. Used by load_rows_range()
1264
+ to handle each segment independently.
1265
+
1266
+ Args:
1267
+ segment_start: Start loading rows from this index (0-based).
1268
+ segment_stop: Stop loading rows when this index is reached (0-based, exclusive).
1269
+ """
1270
+ # Record this range before loading
1271
+ self.loaded_ranges.append((segment_start, segment_stop))
1272
+
1273
+ # Load the dataframe slice
1274
+ df_slice = self.df.slice(segment_start, segment_stop - segment_start)
1275
+
1276
+ # Load each row at the correct position
1277
+ for (ridx, row), rid in zip(enumerate(df_slice.rows(), segment_start), df_slice[RID]):
1278
+ is_selected = rid in self.selected_rows
1279
+ match_cols = self.matches.get(rid, set())
1280
+
1281
+ vals, dtypes, styles = [], [], []
1282
+ for val, col, dtype in zip(row, self.df.columns, self.df.dtypes, strict=True):
1283
+ if col in self.hidden_columns or (col == RID and not self.show_rid):
1284
+ continue # Skip hidden columns and internal RID
1285
+
1286
+ vals.append(val)
1287
+ dtypes.append(dtype)
1288
+
1289
+ # Highlight entire row with selection or cells with matches
1290
+ styles.append(HIGHLIGHT_COLOR if is_selected or col in match_cols else None)
1291
+
1292
+ formatted_row = format_row(vals, dtypes, styles=styles, thousand_separator=self.thousand_separator)
1293
+
1294
+ # Find correct insertion position and insert
1295
+ insert_pos = self._find_insert_position_for_row(ridx)
1296
+ self.insert_row(*formatted_row, key=str(ridx), label=str(ridx + 1), position=insert_pos)
1297
+
1298
+ # Number of rows loaded in this segment
1299
+ segment_count = len(df_slice)
1300
+
1301
+ # Update loaded rows count
1302
+ self.loaded_rows += segment_count
1303
+
1304
+ return segment_count
1305
+
1306
+ def load_rows_range(self, start: int, stop: int) -> int:
1307
+ """Load a batch of rows into the table.
1308
+
1309
+ Row keys are 0-based indices as strings, which map directly to dataframe row indices.
1310
+ Row labels are 1-based indices as strings.
1311
+
1312
+ Intelligently handles range loading:
1313
+ 1. Calculates which ranges actually need loading (avoiding reloading)
1314
+ 2. Handles complex cases where loaded ranges create "holes" (head and tail segments)
1315
+ 3. Inserts rows at correct positions in the table
1316
+ 4. Merges adjacent/overlapping ranges to optimize future loading
1317
+
1318
+ Args:
1319
+ start: Start loading rows from this index (0-based).
1320
+ stop: Stop loading rows when this index is reached (0-based, exclusive).
1321
+ """
1322
+ start = max(0, start) # Clamp to non-negative
1323
+ stop = min(stop, len(self.df)) # Clamp to dataframe length
1324
+
1325
+ try:
1326
+ # Calculate actual ranges to load, accounting for already-loaded ranges
1327
+ ranges_to_load = self._calculate_load_range(start, stop)
1328
+
1329
+ # If nothing needs loading, return early
1330
+ if not ranges_to_load:
1331
+ return 0 # Already loaded
1332
+
1333
+ # Track the number of loaded rows in this range
1334
+ range_count = 0
1335
+
1336
+ # Load each segment
1337
+ for segment_start, segment_stop in ranges_to_load:
1338
+ range_count += self.load_rows_segment(segment_start, segment_stop)
1339
+
1340
+ # Merge adjacent/overlapping ranges to optimize storage
1341
+ self._merge_loaded_ranges()
1342
+
1343
+ self.log(f"Loaded {range_count} rows for range {start}-{stop}/{len(self.df)}")
1344
+ return range_count
1345
+
1346
+ except Exception as e:
1347
+ self.notify("Error loading rows", title="Load", severity="error", timeout=10)
1348
+ self.log(f"Error loading rows: {str(e)}")
1349
+ return 0
1350
+
1351
+ def load_rows_up(self) -> None:
1352
+ """Check if we need to load more rows and load them."""
1353
+ # If we've loaded everything, no need to check
1354
+ if self.loaded_rows >= len(self.df):
1355
+ return
1356
+
1357
+ top_row_index = int(self.scroll_y) + BUFFER_SIZE
1358
+ top_row_key = self.get_row_key(top_row_index)
1359
+
1360
+ if top_row_key:
1361
+ top_ridx = int(top_row_key.value)
1362
+ else:
1363
+ top_ridx = 0 # No top row key at index, default to 0
1364
+
1365
+ # Load upward
1366
+ start, stop = self._round_to_nearest_hundreds(top_ridx - BUFFER_SIZE * 2)
1367
+ range_count = self.load_rows_range(start, stop)
1368
+
1369
+ # Adjust scroll to maintain position if rows were loaded above
1370
+ if range_count > 0:
1371
+ self.move_cursor(row=top_row_index + range_count)
1372
+ self.log(f"Loaded up: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1373
+
1374
+ def load_rows_down(self) -> None:
1375
+ """Check if we need to load more rows and load them."""
1376
+ # If we've loaded everything, no need to check
1377
+ if self.loaded_rows >= len(self.df):
1378
+ return
1379
+
1380
+ visible_row_count = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1381
+ bottom_row_index = self.scroll_y + visible_row_count - BUFFER_SIZE
1382
+
1383
+ bottom_row_key = self.get_row_key(bottom_row_index)
1384
+ if bottom_row_key:
1385
+ bottom_ridx = int(bottom_row_key.value)
1386
+ else:
1387
+ bottom_ridx = 0 # No bottom row key at index, default to 0
1388
+
1389
+ # Load downward
1390
+ start, stop = self._round_to_nearest_hundreds(bottom_ridx + BUFFER_SIZE * 2)
1391
+ range_count = self.load_rows_range(start, stop)
1392
+
1393
+ if range_count > 0:
1394
+ self.log(f"Loaded down: {range_count} rows in range {start}-{stop}/{len(self.df)}")
1395
+
1396
+ def insert_row(
1397
+ self,
1398
+ *cells: CellType,
1399
+ height: int | None = 1,
1400
+ key: str | None = None,
1401
+ label: TextType | None = None,
1402
+ position: int | None = None,
1403
+ ) -> RowKey:
1404
+ """Insert a row at a specific position in the DataTable.
1405
+
1406
+ When inserting, all rows at and after the insertion position are shifted down,
1407
+ and their entries in self._row_locations are updated accordingly.
1408
+
1409
+ Args:
1410
+ *cells: Positional arguments should contain cell data.
1411
+ height: The height of a row (in lines). Use `None` to auto-detect the optimal
1412
+ height.
1413
+ key: A key which uniquely identifies this row. If None, it will be generated
1414
+ for you and returned.
1415
+ label: The label for the row. Will be displayed to the left if supplied.
1416
+ position: The 0-based row index where the new row should be inserted.
1417
+ If None, inserts at the end (same as add_row). If out of bounds,
1418
+ inserts at the nearest valid position.
1419
+
1420
+ Returns:
1421
+ Unique identifier for this row. Can be used to retrieve this row regardless
1422
+ of its current location in the DataTable (it could have moved after
1423
+ being added due to sorting or insertion/deletion of other rows).
1424
+
1425
+ Raises:
1426
+ DuplicateKey: If a row with the given key already exists.
1427
+ ValueError: If more cells are provided than there are columns.
1428
+ """
1429
+ # Default to appending if position not specified or >= row_count
1430
+ row_count = self.row_count
1431
+ if position is None or position >= row_count:
1432
+ return self.add_row(*cells, height=height, key=key, label=label)
1433
+
1434
+ # Clamp position to valid range [0, row_count)
1435
+ position = max(0, position)
1436
+
1437
+ row_key = RowKey(key)
1438
+ if row_key in self._row_locations:
1439
+ raise DuplicateKey(f"The row key {row_key!r} already exists.")
1440
+
1441
+ if len(cells) > len(self.ordered_columns):
1442
+ raise ValueError("More values provided than there are columns.")
1443
+
1444
+ # TC: Rebuild self._row_locations to shift rows at and after position down by 1
1445
+ # Create a mapping of old index -> new index
1446
+ old_to_new = {}
1447
+ for old_idx in range(row_count):
1448
+ if old_idx < position:
1449
+ old_to_new[old_idx] = old_idx # No change
1450
+ else:
1451
+ old_to_new[old_idx] = old_idx + 1 # Shift down by 1
1452
+
1453
+ # Update _row_locations with the new indices
1454
+ new_row_locations = TwoWayDict({})
1455
+ for row_key_item in self._row_locations:
1456
+ old_idx = self.get_row_idx(row_key_item)
1457
+ new_idx = old_to_new.get(old_idx, old_idx)
1458
+ new_row_locations[row_key_item] = new_idx
1459
+
1460
+ # Update the internal mapping
1461
+ self._row_locations = new_row_locations
1462
+ # TC
1463
+
1464
+ row_index = position
1465
+ # Map the key of this row to its current index
1466
+ self._row_locations[row_key] = row_index
1467
+ self._data[row_key] = {column.key: cell for column, cell in zip_longest(self.ordered_columns, cells)}
1468
+
1469
+ label = Text.from_markup(label, end="") if isinstance(label, str) else label
1470
+
1471
+ # Rows with auto-height get a height of 0 because 1) we need an integer height
1472
+ # to do some intermediate computations and 2) because 0 doesn't impact the data
1473
+ # table while we don't figure out how tall this row is.
1474
+ self.rows[row_key] = Row(
1475
+ row_key,
1476
+ height or 0,
1477
+ label,
1478
+ height is None,
1479
+ )
1480
+ self._new_rows.add(row_key)
1481
+ self._require_update_dimensions = True
1482
+ self.cursor_coordinate = self.cursor_coordinate
1483
+
1484
+ # If a position has opened for the cursor to appear, where it previously
1485
+ # could not (e.g. when there's no data in the table), then a highlighted
1486
+ # event is posted, since there's now a highlighted cell when there wasn't
1487
+ # before.
1488
+ cell_now_available = self.row_count == 1 and len(self.columns) > 0
1489
+ visible_cursor = self.show_cursor and self.cursor_type != "none"
1490
+ if cell_now_available and visible_cursor:
1491
+ self._highlight_cursor()
1492
+
1493
+ self._update_count += 1
1494
+ self.check_idle()
1495
+ return row_key
1496
+
1497
+ # Navigation
1498
+ def do_jump_top(self) -> None:
1499
+ """Jump to the top of the table."""
1500
+ self.move_cursor(row=0)
1501
+
1502
+ def do_jump_bottom(self) -> None:
1503
+ """Jump to the bottom of the table."""
1504
+ stop = len(self.df)
1505
+ start = max(0, stop - self.BATCH_SIZE)
1506
+
1507
+ if start % self.BATCH_SIZE != 0:
1508
+ start = (start // self.BATCH_SIZE + 1) * self.BATCH_SIZE
1509
+
1510
+ if stop - start < self.BATCH_SIZE:
1511
+ start -= self.BATCH_SIZE
1512
+
1513
+ self.load_rows_range(start, stop)
1514
+ self.move_cursor(row=self.row_count - 1)
1515
+
1516
+ def do_page_up(self) -> None:
1517
+ """Move the cursor one page up."""
1518
+ self._set_hover_cursor(False)
1519
+ if self.show_cursor and self.cursor_type in ("cell", "row"):
1520
+ height = self.scrollable_content_region.height - (self.header_height if self.show_header else 0)
1521
+
1522
+ col_idx = self.cursor_column
1523
+ ridx = self.cursor_row_idx
1524
+ next_ridx = max(0, ridx - height - BUFFER_SIZE)
1525
+ start, stop = self._round_to_nearest_hundreds(next_ridx)
1526
+ self.load_rows_range(start, stop)
1527
+
1528
+ self.move_cursor(row=self.get_row_idx(str(next_ridx)), column=col_idx)
1529
+ else:
1530
+ super().action_page_up()
1531
+
1532
+ def do_page_down(self) -> None:
1533
+ """Move the cursor one page down."""
1534
+ super().action_page_down()
1535
+ self.load_rows_down()
1536
+
1537
+ # History & Undo
1538
+ def create_history(self, description: str) -> None:
1539
+ """Create the initial history state."""
1540
+ return History(
1541
+ description=description,
1542
+ df=self.df,
1543
+ df_view=self.df_view,
1544
+ filename=self.filename,
1545
+ hidden_columns=self.hidden_columns.copy(),
1546
+ selected_rows=self.selected_rows.copy(),
1547
+ sorted_columns=self.sorted_columns.copy(),
1548
+ matches={k: v.copy() for k, v in self.matches.items()},
1549
+ fixed_rows=self.fixed_rows,
1550
+ fixed_columns=self.fixed_columns,
1551
+ cursor_coordinate=self.cursor_coordinate,
1552
+ dirty=self.dirty,
1553
+ )
1554
+
1555
+ def apply_history(self, history: History) -> None:
1556
+ """Apply the current history state to the table."""
1557
+ if history is None:
1558
+ return
1559
+
1560
+ # Restore state
1561
+ self.df = history.df
1562
+ self.df_view = history.df_view
1563
+ self.filename = history.filename
1564
+ self.hidden_columns = history.hidden_columns.copy()
1565
+ self.selected_rows = history.selected_rows.copy()
1566
+ self.sorted_columns = history.sorted_columns.copy()
1567
+ self.matches = {k: v.copy() for k, v in history.matches.items()} if history.matches else defaultdict(set)
1568
+ self.fixed_rows = history.fixed_rows
1569
+ self.fixed_columns = history.fixed_columns
1570
+ self.cursor_coordinate = history.cursor_coordinate
1571
+ self.dirty = history.dirty
1572
+
1573
+ # Recreate table for display
1574
+ self.setup_table()
1575
+
1576
+ def add_history(self, description: str, dirty: bool = False, clear_redo: bool = True) -> None:
1577
+ """Add the current state to the history stack.
1578
+
1579
+ Args:
1580
+ description: Description of the action for this history entry.
1581
+ dirty: Whether this operation modifies the data (True) or just display state (False).
1582
+ """
1583
+ self.histories_undo.append(self.create_history(description))
1584
+
1585
+ # Clear redo stack when a new action is performed
1586
+ if clear_redo:
1587
+ self.histories_redo.clear()
1588
+
1589
+ # Mark table as dirty if this operation modifies data
1590
+ if dirty:
1591
+ self.dirty = True
1592
+
1593
+ def do_undo(self) -> None:
1594
+ """Undo the last action."""
1595
+ if not self.histories_undo:
1596
+ self.notify("No actions to undo", title="Undo", severity="warning")
1597
+ return
1598
+
1599
+ # Pop the last history state for undo and save to redo stack
1600
+ history = self.histories_undo.pop()
1601
+ self.histories_redo.append(self.create_history(history.description))
1602
+
1603
+ # Restore state
1604
+ self.apply_history(history)
1605
+
1606
+ self.notify(f"Reverted: {history.description}", title="Undo")
1607
+
1608
+ def do_redo(self) -> None:
1609
+ """Redo the last undone action."""
1610
+ if not self.histories_redo:
1611
+ self.notify("No actions to redo", title="Redo", severity="warning")
1612
+ return
1613
+
1614
+ # Pop the last undone state from redo stack
1615
+ history = self.histories_redo.pop()
1616
+ description = history.description
1617
+
1618
+ # Save current state for undo
1619
+ self.add_history(description, clear_redo=False)
1620
+
1621
+ # Restore state
1622
+ self.apply_history(history)
1623
+
1624
+ self.notify(f"Reapplied: {description}", title="Redo")
1625
+
1626
+ def do_reset(self) -> None:
1627
+ """Reset the table to the initial state."""
1628
+ self.reset_df(self.dataframe, dirty=False)
1629
+ self.setup_table()
1630
+ self.notify("Restored initial state", title="Reset")
1631
+
1632
+ # Display
1633
+ def do_cycle_cursor_type(self) -> None:
1634
+ """Cycle through cursor types: cell -> row -> column -> cell."""
1635
+ next_type = get_next_item(CURSOR_TYPES, self.cursor_type)
1636
+ self.cursor_type = next_type
1637
+
1638
+ # self.notify(f"Changed cursor type to [$success]{next_type}[/]", title="Cursor")
1639
+
1640
+ def do_view_row_detail(self) -> None:
1641
+ """Open a modal screen to view the selected row's details."""
1642
+ ridx = self.cursor_row_idx
1643
+
1644
+ # Push the modal screen
1645
+ self.app.push_screen(RowDetailScreen(ridx, self))
1646
+
1647
+ def do_show_frequency(self) -> None:
1648
+ """Show frequency distribution for the current column."""
1649
+ cidx = self.cursor_col_idx
1650
+
1651
+ # Push the frequency modal screen
1652
+ self.app.push_screen(FrequencyScreen(cidx, self))
1653
+
1654
+ def do_show_statistics(self, scope: str = "column") -> None:
1655
+ """Show statistics for the current column or entire dataframe.
1656
+
1657
+ Args:
1658
+ scope: Either "column" for current column stats or "dataframe" for all columns.
1659
+ """
1660
+ if scope == "dataframe":
1661
+ # Show statistics for entire dataframe
1662
+ self.app.push_screen(StatisticsScreen(self, col_idx=None))
1663
+ else:
1664
+ # Show statistics for current column
1665
+ cidx = self.cursor_col_idx
1666
+ self.app.push_screen(StatisticsScreen(self, col_idx=cidx))
1667
+
1668
+ def do_metadata_shape(self) -> None:
1669
+ """Show metadata about the dataframe (row and column counts)."""
1670
+ self.app.push_screen(MetaShape(self))
1671
+
1672
+ def do_metadata_column(self) -> None:
1673
+ """Show metadata for all columns in the dataframe."""
1674
+ self.app.push_screen(MetaColumnScreen(self))
1675
+
1676
+ def do_freeze_row_column(self) -> None:
1677
+ """Open the freeze screen to set fixed rows and columns."""
1678
+ self.app.push_screen(FreezeScreen(), callback=self.freeze_row_column)
1679
+
1680
+ def freeze_row_column(self, result: tuple[int, int] | None) -> None:
1681
+ """Handle result from PinScreen.
1682
+
1683
+ Args:
1684
+ result: Tuple of (fixed_rows, fixed_columns) or None if cancelled.
1685
+ """
1686
+ if result is None:
1687
+ return
1688
+
1689
+ fixed_rows, fixed_columns = result
1690
+
1691
+ # Add to history
1692
+ self.add_history(f"Pinned [$success]{fixed_rows}[/] rows and [$accent]{fixed_columns}[/] columns")
1693
+
1694
+ # Apply the pin settings to the table
1695
+ if fixed_rows >= 0:
1696
+ self.fixed_rows = fixed_rows
1697
+ if fixed_columns >= 0:
1698
+ self.fixed_columns = fixed_columns
1699
+
1700
+ # self.notify(f"Pinned [$success]{fixed_rows}[/] rows and [$accent]{fixed_columns}[/] columns", title="Pin")
1701
+
1702
+ def do_hide_column(self) -> None:
1703
+ """Hide the currently selected column from the table display."""
1704
+ col_key = self.cursor_col_key
1705
+ col_name = col_key.value
1706
+ col_idx = self.cursor_column
1707
+
1708
+ # Add to history
1709
+ self.add_history(f"Hid column [$success]{col_name}[/]")
1710
+
1711
+ # Remove the column from the table display (but keep in dataframe)
1712
+ self.remove_column(col_key)
1713
+
1714
+ # Track hidden columns
1715
+ self.hidden_columns.add(col_name)
1716
+
1717
+ # Move cursor left if we hid the last column
1718
+ if col_idx >= len(self.columns):
1719
+ self.move_cursor(column=len(self.columns) - 1)
1720
+
1721
+ # self.notify(f"Hid column [$success]{col_name}[/]. Press [$accent]H[/] to show hidden columns", title="Hide")
1722
+
1723
+ def do_expand_column(self) -> None:
1724
+ """Expand the current column to show the widest cell in the loaded data."""
1725
+ col_idx = self.cursor_col_idx
1726
+ col_key = self.cursor_col_key
1727
+ col_name = col_key.value
1728
+ dtype = self.df.dtypes[col_idx]
1729
+
1730
+ # Only expand string columns
1731
+ if dtype != pl.String:
1732
+ return
1733
+
1734
+ # The column to expand/shrink
1735
+ col: Column = self.columns[col_key]
1736
+
1737
+ # Calculate the maximum width across all loaded rows
1738
+ label_width = len(col_name) + 2 # Start with column name width + padding
1739
+
1740
+ try:
1741
+ need_expand = False
1742
+ max_width = label_width
1743
+
1744
+ # Scan through all loaded rows that are visible to find max width
1745
+ for row_idx in range(self.loaded_rows):
1746
+ cell_value = str(self.df.item(row_idx, col_idx))
1747
+ cell_width = measure(self.app.console, cell_value, 1)
1748
+
1749
+ if cell_width > max_width:
1750
+ need_expand = True
1751
+ max_width = max(max_width, cell_width)
1752
+
1753
+ if not need_expand:
1754
+ return
1755
+
1756
+ if col_name in self.expanded_columns:
1757
+ col.width = max(label_width, STRING_WIDTH_CAP)
1758
+ self.expanded_columns.remove(col_name)
1759
+ else:
1760
+ self.expanded_columns.add(col_name)
1761
+
1762
+ # Update the column width
1763
+ col.width = max_width
1764
+
1765
+ except Exception as e:
1766
+ self.notify(
1767
+ f"Error expanding column [$error]{col_name}[/]", title="Expand Column", severity="error", timeout=10
1768
+ )
1769
+ self.log(f"Error expanding column `{col_name}`: {str(e)}")
1770
+
1771
+ # Force a refresh
1772
+ self._update_count += 1
1773
+ self._require_update_dimensions = True
1774
+ self.refresh(layout=True)
1775
+
1776
+ # self.notify(f"Expanded column [$success]{col_name}[/] to width [$accent]{max_width}[/]", title="Expand")
1777
+
1778
+ def do_toggle_rid(self) -> None:
1779
+ """Toggle display of the internal RID column."""
1780
+ self.show_rid = not self.show_rid
1781
+
1782
+ # Recreate table for display
1783
+ self.setup_table()
1784
+
1785
+ def do_show_hidden_rows_columns(self) -> None:
1786
+ """Show all hidden rows/columns by recreating the table."""
1787
+ if not self.hidden_columns and self.df_view is None:
1788
+ self.notify("No hidden rows or columns to show", title="Show", severity="warning")
1789
+ return
1790
+
1791
+ # Add to history
1792
+ self.add_history("Showed hidden rows/columns")
1793
+
1794
+ # If in a filtered view, restore the full dataframe
1795
+ if self.df_view is not None:
1796
+ self.df = self.df_view
1797
+ self.df_view = None
1798
+
1799
+ # Clear hidden rows/columns tracking
1800
+ self.hidden_columns.clear()
1801
+
1802
+ # Recreate table for display
1803
+ self.setup_table()
1804
+
1805
+ self.notify("Showed hidden row(s) and/or hidden column(s)", title="Show")
1806
+
1807
+ # Sort
1808
+ def do_sort_by_column(self, descending: bool = False) -> None:
1809
+ """Sort by the currently selected column.
1810
+
1811
+ Supports multi-column sorting:
1812
+ - First press on a column: sort by that column only
1813
+ - Subsequent presses on other columns: add to sort order
1814
+
1815
+ Args:
1816
+ descending: If True, sort in descending order. If False, ascending order.
1817
+ """
1818
+ col_name = self.cursor_col_name
1819
+ col_idx = self.cursor_column
1820
+
1821
+ # Check if this column is already in the sort keys
1822
+ old_desc = self.sorted_columns.get(col_name)
1823
+
1824
+ # Add to history
1825
+ self.add_history(f"Sorted on column [$success]{col_name}[/]", dirty=True)
1826
+
1827
+ # New column - add to sort
1828
+ if old_desc is None:
1829
+ self.sorted_columns[col_name] = descending
1830
+
1831
+ # Old column, same direction - remove from sort
1832
+ elif old_desc == descending:
1833
+ del self.sorted_columns[col_name]
1834
+
1835
+ # Old column, different direction - add to sort at end
1836
+ else:
1837
+ del self.sorted_columns[col_name]
1838
+ self.sorted_columns[col_name] = descending
1839
+
1840
+ lf = self.df.lazy()
1841
+ sort_by = {}
1842
+
1843
+ # Apply multi-column sort
1844
+ if sort_cols := list(self.sorted_columns.keys()):
1845
+ descending_flags = list(self.sorted_columns.values())
1846
+ sort_by = {"by": sort_cols, "descending": descending_flags, "nulls_last": True}
1847
+ else:
1848
+ # No sort - restore original order by adding a temporary index column
1849
+ sort_by = {"by": RID}
1850
+
1851
+ # Perform the sort
1852
+ df_sorted = lf.sort(**sort_by).collect()
1853
+
1854
+ # Also update df_view if applicable
1855
+ if self.df_view is not None:
1856
+ self.df_view = self.df_view.lazy().sort(**sort_by).collect()
1857
+
1858
+ # Update the dataframe
1859
+ self.df = df_sorted
1860
+
1861
+ # Recreate table for display
1862
+ self.setup_table()
1863
+
1864
+ # Restore cursor position on the sorted column
1865
+ self.move_cursor(column=col_idx, row=0)
1866
+
1867
+ # Edit
1868
+ def do_edit_cell(self, ridx: int = None, cidx: int = None) -> None:
1869
+ """Open modal to edit the selected cell."""
1870
+ ridx = self.cursor_row_idx if ridx is None else ridx
1871
+ cidx = self.cursor_col_idx if cidx is None else cidx
1872
+
1873
+ # Push the edit modal screen
1874
+ self.app.push_screen(
1875
+ EditCellScreen(ridx, cidx, self.df),
1876
+ callback=self.edit_cell,
1877
+ )
1878
+
1879
+ def edit_cell(self, result) -> None:
1880
+ """Handle result from EditCellScreen."""
1881
+ if result is None:
1882
+ return
1883
+
1884
+ ridx, cidx, new_value = result
1885
+ if new_value is None:
1886
+ self.app.push_screen(
1887
+ EditCellScreen(ridx, cidx, self.df),
1888
+ callback=self.edit_cell,
1889
+ )
1890
+ return
1891
+
1892
+ col_name = self.df.columns[cidx]
1893
+
1894
+ # Add to history
1895
+ self.add_history(f"Edited cell [$success]({ridx + 1}, {col_name})[/]", dirty=True)
1896
+
1897
+ # Update the cell in the dataframe
1898
+ try:
1899
+ self.df = self.df.with_columns(
1900
+ pl.when(pl.arange(0, len(self.df)) == ridx)
1901
+ .then(pl.lit(new_value))
1902
+ .otherwise(pl.col(col_name))
1903
+ .alias(col_name)
1904
+ )
1905
+
1906
+ # Also update the view if applicable
1907
+ if self.df_view is not None:
1908
+ # Get the RID value for this row in df_view
1909
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
1910
+ self.df_view = self.df_view.with_columns(
1911
+ pl.when(pl.col(RID) == ridx_view)
1912
+ .then(pl.lit(new_value))
1913
+ .otherwise(pl.col(col_name))
1914
+ .alias(col_name)
1915
+ )
1916
+
1917
+ # Update the display
1918
+ cell_value = self.df.item(ridx, cidx)
1919
+ if cell_value is None:
1920
+ cell_value = NULL_DISPLAY
1921
+ dtype = self.df.dtypes[cidx]
1922
+ dc = DtypeConfig(dtype)
1923
+ formatted_value = Text(str(cell_value), style=dc.style, justify=dc.justify)
1924
+
1925
+ # string as keys
1926
+ row_key = str(ridx)
1927
+ col_key = col_name
1928
+ self.update_cell(row_key, col_key, formatted_value, update_width=True)
1929
+
1930
+ # self.notify(f"Cell updated to [$success]{cell_value}[/]", title="Edit Cell")
1931
+ except Exception as e:
1932
+ self.notify(
1933
+ f"Error updating cell ([$error]{ridx}[/], [$accent]{col_name}[/])",
1934
+ title="Edit Cell",
1935
+ severity="error",
1936
+ timeout=10,
1937
+ )
1938
+ self.log(f"Error updating cell ({ridx}, {col_name}): {str(e)}")
1939
+
1940
+ def do_edit_column(self) -> None:
1941
+ """Open modal to edit the entire column with an expression."""
1942
+ cidx = self.cursor_col_idx
1943
+
1944
+ # Push the edit column modal screen
1945
+ self.app.push_screen(
1946
+ EditColumnScreen(cidx, self.df),
1947
+ callback=self.edit_column,
1948
+ )
1949
+
1950
+ def edit_column(self, result) -> None:
1951
+ """Edit a column."""
1952
+ if result is None:
1953
+ return
1954
+ term, cidx = result
1955
+
1956
+ col_name = self.df.columns[cidx]
1957
+
1958
+ # Null case
1959
+ if term is None or term == NULL:
1960
+ expr = pl.lit(None)
1961
+
1962
+ # Check if term is a valid expression
1963
+ elif tentative_expr(term):
1964
+ try:
1965
+ expr = validate_expr(term, self.df.columns, cidx)
1966
+ except Exception as e:
1967
+ self.notify(
1968
+ f"Error validating expression [$error]{term}[/]", title="Edit Column", severity="error", timeout=10
1969
+ )
1970
+ self.log(f"Error validating expression `{term}`: {str(e)}")
1971
+ return
1972
+
1973
+ # Otherwise, treat term as a literal value
1974
+ else:
1975
+ dtype = self.df.dtypes[cidx]
1976
+ try:
1977
+ value = DtypeConfig(dtype).convert(term)
1978
+ expr = pl.lit(value)
1979
+ except Exception:
1980
+ self.notify(
1981
+ f"Error converting [$error]{term}[/] to [$accent]{dtype}[/]. Cast to string.",
1982
+ title="Edit",
1983
+ severity="error",
1984
+ )
1985
+ expr = pl.lit(str(term))
1986
+
1987
+ # Add to history
1988
+ self.add_history(f"Edited column [$success]{col_name}[/] with expression", dirty=True)
1989
+
1990
+ try:
1991
+ # Apply the expression to the column
1992
+ self.df = self.df.lazy().with_columns(expr.alias(col_name)).collect()
1993
+
1994
+ # Also update the view if applicable
1995
+ # Update the value of col_name in df_view using the value of col_name from df based on RID mapping between them
1996
+ if self.df_view is not None:
1997
+ # Get updated column from df for rows that exist in df_view
1998
+ col_updated = f"^_{col_name}_^"
1999
+ col_exists = "^_exists_^"
2000
+ lf_updated = self.df.lazy().select(
2001
+ RID, pl.col(col_name).alias(col_updated), pl.lit(True).alias(col_exists)
2002
+ )
2003
+ # Join and use when/then/otherwise to handle all updates including NULLs
2004
+ self.df_view = (
2005
+ self.df_view.lazy()
2006
+ .join(lf_updated, on=RID, how="left")
2007
+ .with_columns(
2008
+ pl.when(pl.col(col_exists))
2009
+ .then(pl.col(col_updated))
2010
+ .otherwise(pl.col(col_name))
2011
+ .alias(col_name)
2012
+ )
2013
+ .drop(col_updated, col_exists)
2014
+ .collect()
2015
+ )
2016
+ except Exception as e:
2017
+ self.notify(
2018
+ f"Error applying expression: [$error]{term}[/] to column [$accent]{col_name}[/]",
2019
+ title="Edit Column",
2020
+ severity="error",
2021
+ timeout=10,
2022
+ )
2023
+ self.log(f"Error applying expression `{term}` to column `{col_name}`: {str(e)}")
2024
+ return
2025
+
2026
+ # Recreate table for display
2027
+ self.setup_table()
2028
+
2029
+ # self.notify(f"Column [$accent]{col_name}[/] updated with [$success]{expr}[/]", title="Edit Column")
2030
+
2031
+ def do_rename_column(self, col_idx: int | None) -> None:
2032
+ """Open modal to rename the selected column."""
2033
+ col_idx = self.cursor_column if col_idx is None else col_idx
2034
+ col_name = self.get_col_key(col_idx).value
2035
+
2036
+ # Push the rename column modal screen
2037
+ self.app.push_screen(
2038
+ RenameColumnScreen(col_idx, col_name, self.df.columns),
2039
+ callback=self.rename_column,
2040
+ )
2041
+
2042
+ def rename_column(self, result) -> None:
2043
+ """Handle result from RenameColumnScreen."""
2044
+ if result is None:
2045
+ return
2046
+
2047
+ col_idx, col_name, new_name = result
2048
+ if new_name is None:
2049
+ self.app.push_screen(
2050
+ RenameColumnScreen(col_idx, col_name, self.df.columns),
2051
+ callback=self.rename_column,
2052
+ )
2053
+ return
2054
+
2055
+ # Add to history
2056
+ self.add_history(f"Renamed column [$success]{col_name}[/] to [$accent]{new_name}[/]", dirty=True)
2057
+
2058
+ # Rename the column in the dataframe
2059
+ self.df = self.df.rename({col_name: new_name})
2060
+
2061
+ # Also update the view if applicable
2062
+ if self.df_view is not None:
2063
+ self.df_view = self.df_view.rename({col_name: new_name})
2064
+
2065
+ # Update sorted_columns if this column was sorted and maintain order
2066
+ if col_name in self.sorted_columns:
2067
+ sorted_columns = {}
2068
+ for col, order in self.sorted_columns.items():
2069
+ if col == col_name:
2070
+ sorted_columns[new_name] = order
2071
+ else:
2072
+ sorted_columns[col] = order
2073
+ self.sorted_columns = sorted_columns
2074
+
2075
+ # Update matches if this column had cell matches
2076
+ for cols in self.matches.values():
2077
+ if col_name in cols:
2078
+ cols.remove(col_name)
2079
+ cols.add(new_name)
2080
+
2081
+ # Recreate table for display
2082
+ self.setup_table()
2083
+
2084
+ # Move cursor to the renamed column
2085
+ self.move_cursor(column=col_idx)
2086
+
2087
+ # self.notify(f"Renamed column [$success]{col_name}[/] to [$success]{new_name}[/]", title="Column")
2088
+
2089
+ def do_clear_cell(self) -> None:
2090
+ """Clear the current cell by setting its value to None."""
2091
+ row_key, col_key = self.cursor_key
2092
+ ridx = self.cursor_row_idx
2093
+ cidx = self.cursor_col_idx
2094
+ col_name = self.cursor_col_name
2095
+
2096
+ # Add to history
2097
+ self.add_history(f"Cleared cell [$success]({ridx + 1}, {col_name})[/]", dirty=True)
2098
+
2099
+ # Update the cell to None in the dataframe
2100
+ try:
2101
+ self.df = self.df.with_columns(
2102
+ pl.when(pl.arange(0, len(self.df)) == ridx)
2103
+ .then(pl.lit(None))
2104
+ .otherwise(pl.col(col_name))
2105
+ .alias(col_name)
2106
+ )
2107
+
2108
+ # Also update the view if applicable
2109
+ if self.df_view is not None:
2110
+ ridx_view = self.df.item(ridx, self.df.columns.index(RID))
2111
+ self.df_view = self.df_view.with_columns(
2112
+ pl.when(pl.col(RID) == ridx_view).then(pl.lit(None)).otherwise(pl.col(col_name)).alias(col_name)
2113
+ )
2114
+
2115
+ # Update the display
2116
+ dtype = self.df.dtypes[cidx]
2117
+ dc = DtypeConfig(dtype)
2118
+ formatted_value = Text(NULL_DISPLAY, style=dc.style, justify=dc.justify)
2119
+
2120
+ self.update_cell(row_key, col_key, formatted_value)
2121
+
2122
+ # self.notify(f"Cell cleared to [$success]{NULL_DISPLAY}[/]", title="Clear Cell")
2123
+ except Exception as e:
2124
+ self.notify(
2125
+ f"Error clearing cell ([$error]{ridx}[/], [$accent]{col_name}[/])",
2126
+ title="Clear Cell",
2127
+ severity="error",
2128
+ timeout=10,
2129
+ )
2130
+ self.log(f"Error clearing cell ({ridx}, {col_name}): {str(e)}")
2131
+ raise e
2132
+
2133
+ def do_add_column(self, col_name: str = None) -> None:
2134
+ """Add acolumn after the current column."""
2135
+ cidx = self.cursor_col_idx
2136
+
2137
+ if not col_name:
2138
+ # Generate a unique column name
2139
+ base_name = "new_col"
2140
+ new_col_name = base_name
2141
+ counter = 1
2142
+ while new_col_name in self.df.columns:
2143
+ new_col_name = f"{base_name}_{counter}"
2144
+ counter += 1
2145
+ else:
2146
+ new_col_name = col_name
2147
+
2148
+ # Add to history
2149
+ self.add_history(f"Added column [$success]{new_col_name}[/] after column [$accent]{cidx + 1}[/]", dirty=True)
2150
+
2151
+ try:
2152
+ # Create an empty column (all None values)
2153
+ new_col_name = pl.lit(None).alias(new_col_name)
2154
+
2155
+ # Get columns up to current, the new column, then remaining columns
2156
+ cols = self.df.columns
2157
+ cols_before = cols[: cidx + 1]
2158
+ cols_after = cols[cidx + 1 :]
2159
+
2160
+ # Build the new dataframe with columns reordered
2161
+ select_cols = cols_before + [new_col_name] + cols_after
2162
+ self.df = self.df.lazy().with_columns(new_col_name).select(select_cols).collect()
2163
+
2164
+ # Also update the view if applicable
2165
+ if self.df_view is not None:
2166
+ self.df_view = self.df_view.lazy().with_columns(new_col_name).select(select_cols).collect()
2167
+
2168
+ # Recreate table for display
2169
+ self.setup_table()
2170
+
2171
+ # Move cursor to the new column
2172
+ self.move_cursor(column=cidx + 1)
2173
+
2174
+ # self.notify(f"Added column [$success]{new_name}[/]", title="Add Column")
2175
+ except Exception as e:
2176
+ self.notify(
2177
+ f"Error adding column [$error]{new_col_name}[/]", title="Add Column", severity="error", timeout=10
2178
+ )
2179
+ self.log(f"Error adding column `{new_col_name}`: {str(e)}")
2180
+ raise e
2181
+
2182
+ def do_add_column_expr(self) -> None:
2183
+ """Open screen to add a new column with optional expression."""
2184
+ cidx = self.cursor_col_idx
2185
+ self.app.push_screen(
2186
+ AddColumnScreen(cidx, self.df),
2187
+ self.add_column_expr,
2188
+ )
2189
+
2190
+ def add_column_expr(self, result: tuple[int, str, str, pl.Expr] | None) -> None:
2191
+ """Add a new column with an expression."""
2192
+ if result is None:
2193
+ return
2194
+
2195
+ cidx, new_col_name, expr = result
2196
+
2197
+ # Add to history
2198
+ self.add_history(f"Added column [$success]{new_col_name}[/] with expression [$accent]{expr}[/].", dirty=True)
2199
+
2200
+ try:
2201
+ # Create the column
2202
+ new_col = expr.alias(new_col_name)
2203
+
2204
+ # Get columns up to current, the new column, then remaining columns
2205
+ cols = self.df.columns
2206
+ cols_before = cols[: cidx + 1]
2207
+ cols_after = cols[cidx + 1 :]
2208
+
2209
+ # Build the new dataframe with columns reordered
2210
+ select_cols = cols_before + [new_col_name] + cols_after
2211
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2212
+
2213
+ # Also update the view if applicable
2214
+ if self.df_view is not None:
2215
+ # Get updated column from df for rows that exist in df_view
2216
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2217
+ # Join and use coalesce to prefer updated value or keep original
2218
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
2219
+
2220
+ # Recreate table for display
2221
+ self.setup_table()
2222
+
2223
+ # Move cursor to the new column
2224
+ self.move_cursor(column=cidx + 1)
2225
+
2226
+ # self.notify(f"Added column [$success]{col_name}[/]", title="Add Column")
2227
+ except Exception as e:
2228
+ self.notify(
2229
+ f"Error adding column [$error]{new_col_name}[/]", title="Add Column", severity="error", timeout=10
2230
+ )
2231
+ self.log(f"Error adding column `{new_col_name}`: {str(e)}")
2232
+
2233
+ def do_add_link_column(self) -> None:
2234
+ self.app.push_screen(
2235
+ AddLinkScreen(self.cursor_col_idx, self.df),
2236
+ callback=self.add_link_column,
2237
+ )
2238
+
2239
+ def add_link_column(self, result: tuple[str, str] | None) -> None:
2240
+ """Handle result from AddLinkScreen.
2241
+
2242
+ Creates a new link column in the dataframe based on a user-provided template.
2243
+ Supports multiple placeholder types:
2244
+ - `$_` - Current column (based on cursor position)
2245
+ - `$1`, `$2`, etc. - Column by index (1-based)
2246
+ - `$name` - Column by name (e.g., `$id`, `$product_name`)
2247
+
2248
+ The template is evaluated for each row using Polars expressions with vectorized
2249
+ string concatenation. The new column is inserted after the current column.
2250
+
2251
+ Args:
2252
+ result: Tuple of (cidx, new_col_name, link_template) or None if cancelled.
2253
+ """
2254
+ if result is None:
2255
+ return
2256
+ cidx, new_col_name, link_template = result
2257
+
2258
+ self.add_history(
2259
+ f"Added link column [$success]{new_col_name}[/] with template [$accent]{link_template}[/].", dirty=True
2260
+ )
2261
+
2262
+ try:
2263
+ # Hack to support PubChem link
2264
+ link_template = link_template.replace("PC", "pubchem.ncbi.nlm.nih.gov")
2265
+
2266
+ # Ensure link starts with http:// or https://
2267
+ if not link_template.startswith(("https://", "http://")):
2268
+ link_template = "https://" + link_template
2269
+
2270
+ # Parse template placeholders into Polars expressions
2271
+ parts = parse_placeholders(link_template, self.df.columns, cidx)
2272
+
2273
+ # Build the concatenation expression
2274
+ exprs = [part if isinstance(part, pl.Expr) else pl.lit(part) for part in parts]
2275
+ new_col = pl.concat_str(exprs).alias(new_col_name)
2276
+
2277
+ # Get columns up to current, the new column, then remaining columns
2278
+ cols = self.df.columns
2279
+ cols_before = cols[: cidx + 1]
2280
+ cols_after = cols[cidx + 1 :]
2281
+
2282
+ # Build the new dataframe with columns reordered
2283
+ select_cols = cols_before + [new_col_name] + cols_after
2284
+ self.df = self.df.lazy().with_columns(new_col).select(select_cols).collect()
2285
+
2286
+ # Also update the view if applicable
2287
+ if self.df_view is not None:
2288
+ # Get updated column from df for rows that exist in df_view
2289
+ lf_updated = self.df.lazy().select(RID, pl.col(new_col_name))
2290
+ # Join and use coalesce to prefer updated value or keep original
2291
+ self.df_view = self.df_view.lazy().join(lf_updated, on=RID, how="left").select(select_cols).collect()
2292
+
2293
+ # Recreate table for display
2294
+ self.setup_table()
2295
+
2296
+ # Move cursor to the new column
2297
+ self.move_cursor(column=cidx + 1)
2298
+
2299
+ self.notify(f"Added link column [$success]{new_col_name}[/]. Use Ctrl/Cmd click to open.", title="Add Link")
2300
+
2301
+ except Exception as e:
2302
+ self.notify(
2303
+ f"Error adding link column [$error]{new_col_name}[/]", title="Add Link", severity="error", timeout=10
2304
+ )
2305
+ self.log(f"Error adding link column: {str(e)}")
2306
+
2307
+ def do_delete_column(self, more: str = None) -> None:
2308
+ """Remove the currently selected column from the table."""
2309
+ # Get the column to remove
2310
+ col_idx = self.cursor_column
2311
+ try:
2312
+ col_name = self.cursor_col_name
2313
+ except CellDoesNotExist:
2314
+ self.notify("No column to delete at the current cursor position", title="Delete Column", severity="warning")
2315
+ return
2316
+
2317
+ col_key = self.cursor_col_key
2318
+
2319
+ col_names_to_remove = []
2320
+ col_keys_to_remove = []
2321
+
2322
+ # Remove all columns before the current column
2323
+ if more == "before":
2324
+ for i in range(col_idx + 1):
2325
+ col_key = self.get_col_key(i)
2326
+ col_names_to_remove.append(col_key.value)
2327
+ col_keys_to_remove.append(col_key)
2328
+
2329
+ message = f"Removed column [$success]{col_name}[/] and all columns before"
2330
+
2331
+ # Remove all columns after the current column
2332
+ elif more == "after":
2333
+ for i in range(col_idx, len(self.columns)):
2334
+ col_key = self.get_col_key(i)
2335
+ col_names_to_remove.append(col_key.value)
2336
+ col_keys_to_remove.append(col_key)
2337
+
2338
+ message = f"Removed column [$success]{col_name}[/] and all columns after"
2339
+
2340
+ # Remove only the current column
2341
+ else:
2342
+ col_names_to_remove.append(col_name)
2343
+ col_keys_to_remove.append(col_key)
2344
+ message = f"Removed column [$success]{col_name}[/]"
2345
+
2346
+ # Add to history
2347
+ self.add_history(message, dirty=True)
2348
+
2349
+ # Remove the columns from the table display using the column names as keys
2350
+ for ck in col_keys_to_remove:
2351
+ self.remove_column(ck)
2352
+
2353
+ # Move cursor left if we deleted the last column(s)
2354
+ last_col_idx = len(self.columns) - 1
2355
+ if col_idx > last_col_idx:
2356
+ self.move_cursor(column=last_col_idx)
2357
+
2358
+ # Remove from sorted columns if present
2359
+ for col_name in col_names_to_remove:
2360
+ if col_name in self.sorted_columns:
2361
+ del self.sorted_columns[col_name]
2362
+
2363
+ # Remove from hidden columns if present
2364
+ for col_name in col_names_to_remove:
2365
+ self.hidden_columns.discard(col_name)
2366
+
2367
+ # Remove from matches
2368
+ for rid in list(self.matches.keys()):
2369
+ self.matches[rid].difference_update(col_names_to_remove)
2370
+ # Remove empty entries
2371
+ if not self.matches[rid]:
2372
+ del self.matches[rid]
2373
+
2374
+ # Remove from dataframe
2375
+ self.df = self.df.drop(col_names_to_remove)
2376
+
2377
+ # Also update the view if applicable
2378
+ if self.df_view is not None:
2379
+ self.df_view = self.df_view.drop(col_names_to_remove)
2380
+
2381
+ self.notify(message, title="Delete Column")
2382
+
2383
+ def do_duplicate_column(self) -> None:
2384
+ """Duplicate the currently selected column, inserting it right after the current column."""
2385
+ cidx = self.cursor_col_idx
2386
+ col_name = self.cursor_col_name
2387
+
2388
+ col_idx = self.cursor_column
2389
+ new_col_name = f"{col_name}_copy"
2390
+
2391
+ # Ensure new column name is unique
2392
+ counter = 1
2393
+ while new_col_name in self.df.columns:
2394
+ new_col_name = f"{new_col_name}{counter}"
2395
+ counter += 1
2396
+
2397
+ # Add to history
2398
+ self.add_history(f"Duplicated column [$success]{col_name}[/]", dirty=True)
2399
+
2400
+ # Create new column and reorder columns to insert after current column
2401
+ cols_before = self.df.columns[: cidx + 1]
2402
+ cols_after = self.df.columns[cidx + 1 :]
2403
+ cols_new = cols_before + [new_col_name] + cols_after
2404
+
2405
+ # Add the new column and reorder columns for insertion after current column
2406
+ self.df = self.df.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
2407
+
2408
+ # Also update the view if applicable
2409
+ if self.df_view is not None:
2410
+ self.df_view = (
2411
+ self.df_view.lazy().with_columns(pl.col(col_name).alias(new_col_name)).select(cols_new).collect()
2412
+ )
2413
+
2414
+ # Recreate table for display
2415
+ self.setup_table()
2416
+
2417
+ # Move cursor to the new duplicated column
2418
+ self.move_cursor(column=col_idx + 1)
2419
+
2420
+ # self.notify(f"Duplicated column [$success]{col_name}[/] as [$accent]{new_col_name}[/]", title="Duplicate")
2421
+
2422
+ def do_delete_row(self, more: str = None) -> None:
2423
+ """Delete rows from the table and dataframe.
2424
+
2425
+ Supports deleting multiple selected rows. If no rows are selected, deletes the row at the cursor.
2426
+ """
2427
+ old_count = len(self.df)
2428
+ rids_to_delete = set()
2429
+
2430
+ # Delete all selected rows
2431
+ if selected_count := len(self.selected_rows):
2432
+ history_desc = f"Deleted {selected_count} selected row(s)"
2433
+ rids_to_delete.update(self.selected_rows)
2434
+
2435
+ # Delete current row and those above
2436
+ elif more == "above":
2437
+ ridx = self.cursor_row_idx
2438
+ history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those above"
2439
+ for rid in self.df[RID][: ridx + 1]:
2440
+ rids_to_delete.add(rid)
2441
+
2442
+ # Delete current row and those below
2443
+ elif more == "below":
2444
+ ridx = self.cursor_row_idx
2445
+ history_desc = f"Deleted current row [$success]{ridx + 1}[/] and those below"
2446
+ for rid in self.df[RID][ridx:]:
2447
+ rids_to_delete.add(rid)
2448
+
2449
+ # Delete the row at the cursor
2450
+ else:
2451
+ ridx = self.cursor_row_idx
2452
+ history_desc = f"Deleted row [$success]{ridx + 1}[/]"
2453
+ rids_to_delete.add(self.df[RID][ridx])
2454
+
2455
+ # Add to history
2456
+ self.add_history(history_desc, dirty=True)
2457
+
2458
+ # Apply the filter to remove rows
2459
+ try:
2460
+ df_filtered = self.df.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2461
+ except Exception as e:
2462
+ self.notify(f"Error deleting row(s): {e}", title="Delete", severity="error", timeout=10)
2463
+ self.histories_undo.pop() # Remove last history entry
2464
+ return
2465
+
2466
+ # RIDs of remaining rows
2467
+ ok_rids = set(df_filtered[RID])
2468
+
2469
+ # Update selected rows tracking
2470
+ if self.selected_rows:
2471
+ self.selected_rows.intersection_update(ok_rids)
2472
+
2473
+ # Update the dataframe
2474
+ self.df = df_filtered
2475
+
2476
+ # Update matches since row indices have changed
2477
+ if self.matches:
2478
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
2479
+
2480
+ # Also update the view if applicable
2481
+ if self.df_view is not None:
2482
+ self.df_view = self.df_view.lazy().filter(~pl.col(RID).is_in(rids_to_delete)).collect()
2483
+
2484
+ # Recreate table for display
2485
+ self.setup_table()
2486
+
2487
+ deleted_count = old_count - len(self.df)
2488
+ if deleted_count > 0:
2489
+ self.notify(f"Deleted [$success]{deleted_count}[/] row(s)", title="Delete")
2490
+
2491
+ def do_duplicate_row(self) -> None:
2492
+ """Duplicate the currently selected row, inserting it right after the current row."""
2493
+ ridx = self.cursor_row_idx
2494
+ rid = self.df[RID][ridx]
2495
+
2496
+ lf = self.df.lazy()
2497
+
2498
+ # Get the row to duplicate
2499
+ row_to_duplicate = lf.slice(ridx, 1).with_columns(pl.col(RID) + 1)
2500
+
2501
+ # Add to history
2502
+ self.add_history(f"Duplicated row [$success]{ridx + 1}[/]", dirty=True)
2503
+
2504
+ # Concatenate: rows before + duplicated row + rows after
2505
+ lf_before = lf.slice(0, ridx + 1)
2506
+ lf_after = lf.slice(ridx + 1).with_columns(pl.col(RID) + 1)
2507
+
2508
+ # Combine the parts
2509
+ self.df = pl.concat([lf_before, row_to_duplicate, lf_after]).collect()
2510
+
2511
+ # Also update the view if applicable
2512
+ if self.df_view is not None:
2513
+ lf_view = self.df_view.lazy()
2514
+ lf_view_before = lf_view.slice(0, rid + 1)
2515
+ lf_view_after = lf_view.slice(rid + 1).with_columns(pl.col(RID) + 1)
2516
+ self.df_view = pl.concat([lf_view_before, row_to_duplicate, lf_view_after]).collect()
2517
+
2518
+ # Recreate table for display
2519
+ self.setup_table()
2520
+
2521
+ # Move cursor to the new duplicated row
2522
+ self.move_cursor(row=ridx + 1)
2523
+
2524
+ # self.notify(f"Duplicated row [$success]{ridx + 1}[/]", title="Row")
2525
+
2526
+ def do_move_column(self, direction: str) -> None:
2527
+ """Move the current column left or right.
2528
+
2529
+ Args:
2530
+ direction: "left" to move left, "right" to move right.
2531
+ """
2532
+ row_idx, col_idx = self.cursor_coordinate
2533
+ col_key = self.cursor_col_key
2534
+ col_name = col_key.value
2535
+ cidx = self.cursor_col_idx
2536
+
2537
+ # Validate move is possible
2538
+ if direction == "left":
2539
+ if col_idx <= 0:
2540
+ self.notify("Cannot move column left", title="Move", severity="warning")
2541
+ return
2542
+ swap_idx = col_idx - 1
2543
+ elif direction == "right":
2544
+ if col_idx >= len(self.columns) - 1:
2545
+ self.notify("Cannot move column right", title="Move", severity="warning")
2546
+ return
2547
+ swap_idx = col_idx + 1
2548
+
2549
+ # Get column to swap
2550
+ _, swap_key = self.coordinate_to_cell_key(Coordinate(row_idx, swap_idx))
2551
+ swap_name = swap_key.value
2552
+ swap_cidx = self.df.columns.index(swap_name)
2553
+
2554
+ # Add to history
2555
+ self.add_history(
2556
+ f"Moved column [$success]{col_name}[/] [$accent]{direction}[/] (swapped with [$success]{swap_name}[/])",
2557
+ dirty=True,
2558
+ )
2559
+
2560
+ # Swap columns in the table's internal column locations
2561
+ self.check_idle()
2562
+
2563
+ (
2564
+ self._column_locations[col_key],
2565
+ self._column_locations[swap_key],
2566
+ ) = (
2567
+ self._column_locations.get(swap_key),
2568
+ self._column_locations.get(col_key),
2569
+ )
2570
+
2571
+ self._update_count += 1
2572
+ self.refresh()
2573
+
2574
+ # Restore cursor position on the moved column
2575
+ self.move_cursor(row=row_idx, column=swap_idx)
2576
+
2577
+ # Update the dataframe column order
2578
+ cols = list(self.df.columns)
2579
+ cols[cidx], cols[swap_cidx] = cols[swap_cidx], cols[cidx]
2580
+ self.df = self.df.select(cols)
2581
+
2582
+ # Also update the view if applicable
2583
+ if self.df_view is not None:
2584
+ self.df_view = self.df_view.select(cols)
2585
+
2586
+ # self.notify(f"Moved column [$success]{col_name}[/] {direction}", title="Move")
2587
+
2588
+ def do_move_row(self, direction: str) -> None:
2589
+ """Move the current row up or down.
2590
+
2591
+ Args:
2592
+ direction: "up" to move up, "down" to move down.
2593
+ """
2594
+ curr_row_idx, col_idx = self.cursor_coordinate
2595
+
2596
+ # Validate move is possible
2597
+ if direction == "up":
2598
+ if curr_row_idx <= 0:
2599
+ self.notify("Cannot move row up", title="Move", severity="warning")
2600
+ return
2601
+ swap_row_idx = curr_row_idx - 1
2602
+ elif direction == "down":
2603
+ if curr_row_idx >= len(self.rows) - 1:
2604
+ self.notify("Cannot move row down", title="Move", severity="warning")
2605
+ return
2606
+ swap_row_idx = curr_row_idx + 1
2607
+ else:
2608
+ # Invalid direction
2609
+ return
2610
+
2611
+ # Add to history
2612
+ self.add_history(
2613
+ f"Moved row [$success]{curr_row_idx}[/] [$accent]{direction}[/] (swapped with row [$success]{swap_row_idx}[/])",
2614
+ dirty=True,
2615
+ )
2616
+
2617
+ # Swap rows in the table's internal row locations
2618
+ curr_key = self.coordinate_to_cell_key((curr_row_idx, 0)).row_key
2619
+ swap_key = self.coordinate_to_cell_key((swap_row_idx, 0)).row_key
2620
+
2621
+ self.check_idle()
2622
+
2623
+ (
2624
+ self._row_locations[curr_key],
2625
+ self._row_locations[swap_key],
2626
+ ) = (
2627
+ self.get_row_idx(swap_key),
2628
+ self.get_row_idx(curr_key),
2629
+ )
2630
+
2631
+ self._update_count += 1
2632
+ self.refresh()
2633
+
2634
+ # Restore cursor position on the moved row
2635
+ self.move_cursor(row=swap_row_idx, column=col_idx)
2636
+
2637
+ # Locate the rows to swap
2638
+ curr_ridx = curr_row_idx
2639
+ swap_ridx = swap_row_idx
2640
+ first, second = sorted([curr_ridx, swap_ridx])
2641
+
2642
+ # Swap the rows in the dataframe
2643
+ self.df = pl.concat(
2644
+ [
2645
+ self.df.slice(0, first).lazy(),
2646
+ self.df.slice(second, 1).lazy(),
2647
+ self.df.slice(first + 1, second - first - 1).lazy(),
2648
+ self.df.slice(first, 1).lazy(),
2649
+ self.df.slice(second + 1).lazy(),
2650
+ ]
2651
+ ).collect()
2652
+
2653
+ # Also update the view if applicable
2654
+ if self.df_view is not None:
2655
+ # Find RID values
2656
+ curr_rid = self.df[RID][curr_row_idx]
2657
+ swap_rid = self.df[RID][swap_row_idx]
2658
+
2659
+ # Locate the rows by RID in the view
2660
+ curr_ridx = self.df_view[RID].index_of(curr_rid)
2661
+ swap_ridx = self.df_view[RID].index_of(swap_rid)
2662
+ first, second = sorted([curr_ridx, swap_ridx])
2663
+
2664
+ # Swap the rows in the view
2665
+ self.df_view = pl.concat(
2666
+ [
2667
+ self.df_view.slice(0, first).lazy(),
2668
+ self.df_view.slice(second, 1).lazy(),
2669
+ self.df_view.slice(first + 1, second - first - 1).lazy(),
2670
+ self.df_view.slice(first, 1).lazy(),
2671
+ self.df_view.slice(second + 1).lazy(),
2672
+ ]
2673
+ ).collect()
2674
+
2675
+ # self.notify(f"Moved row [$success]{row_key.value}[/] {direction}", title="Move Row")
2676
+
2677
+ # Type casting
2678
+ def do_cast_column_dtype(self, dtype: str) -> None:
2679
+ """Cast the current column to a different data type.
2680
+
2681
+ Args:
2682
+ dtype: Target data type (string representation, e.g., "pl.String", "pl.Int64")
2683
+ """
2684
+ cidx = self.cursor_col_idx
2685
+ col_name = self.cursor_col_name
2686
+ current_dtype = self.df.dtypes[cidx]
2687
+
2688
+ try:
2689
+ target_dtype = eval(dtype)
2690
+ except Exception:
2691
+ self.notify(f"Invalid target data type: [$error]{dtype}[/]", title="Cast", severity="error", timeout=10)
2692
+ return
2693
+
2694
+ if current_dtype == target_dtype:
2695
+ self.notify(
2696
+ f"Column [$warning]{col_name}[/] is already of type [$accent]{target_dtype}[/]",
2697
+ title="Cast",
2698
+ severity="warning",
2699
+ )
2700
+ return # No change needed
2701
+
2702
+ # Add to history
2703
+ self.add_history(
2704
+ f"Cast column [$success]{col_name}[/] from [$accent]{current_dtype}[/] to [$success]{target_dtype}[/]",
2705
+ dirty=True,
2706
+ )
2707
+
2708
+ try:
2709
+ # Cast the column using Polars
2710
+ self.df = self.df.with_columns(pl.col(col_name).cast(target_dtype))
2711
+
2712
+ # Also update the view if applicable
2713
+ if self.df_view is not None:
2714
+ self.df_view = self.df_view.with_columns(pl.col(col_name).cast(target_dtype))
2715
+
2716
+ # Recreate table for display
2717
+ self.setup_table()
2718
+
2719
+ self.notify(f"Cast column [$success]{col_name}[/] to [$accent]{target_dtype}[/]", title="Cast")
2720
+ except Exception as e:
2721
+ self.notify(
2722
+ f"Error casting column [$error]{col_name}[/] to [$accent]{target_dtype}[/]",
2723
+ title="Cast",
2724
+ severity="error",
2725
+ timeout=10,
2726
+ )
2727
+ self.log(f"Error casting column `{col_name}`: {str(e)}")
2728
+
2729
+ # Row selection
2730
+ def do_select_row(self) -> None:
2731
+ """Select rows.
2732
+
2733
+ If there are existing cell matches, use those to select rows.
2734
+ Otherwise, use the current cell value as the search term and select rows matching that value.
2735
+ """
2736
+ cidx = self.cursor_col_idx
2737
+
2738
+ # Use existing cell matches if present
2739
+ if self.matches:
2740
+ term = pl.col(RID).is_in(self.matches)
2741
+ else:
2742
+ col_name = self.cursor_col_name
2743
+
2744
+ # Get the value of the currently selected cell
2745
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
2746
+ if self.cursor_value is None:
2747
+ term = pl.col(col_name).is_null()
2748
+ else:
2749
+ term = pl.col(col_name) == self.cursor_value
2750
+
2751
+ self.select_row((term, cidx, False, True))
2752
+
2753
+ def do_select_row_expr(self) -> None:
2754
+ """Select rows by expression."""
2755
+ cidx = self.cursor_col_idx
2756
+
2757
+ # Use current cell value as default search term
2758
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
2759
+
2760
+ # Push the search modal screen
2761
+ self.app.push_screen(
2762
+ SearchScreen("Select", term, self.df, cidx),
2763
+ callback=self.select_row,
2764
+ )
2765
+
2766
+ def select_row(self, result) -> None:
2767
+ """Select rows by value or expression."""
2768
+ if result is None:
2769
+ return
2770
+
2771
+ term, cidx, match_nocase, match_whole = result
2772
+ col_name = "all columns" if cidx is None else self.df.columns[cidx]
2773
+
2774
+ # Already a Polars expression
2775
+ if isinstance(term, pl.Expr):
2776
+ expr = term
2777
+
2778
+ # bool list or Series
2779
+ elif isinstance(term, (list, pl.Series)):
2780
+ expr = term
2781
+
2782
+ # Null case
2783
+ elif term == NULL:
2784
+ expr = pl.col(col_name).is_null()
2785
+
2786
+ # Expression in string form
2787
+ elif tentative_expr(term):
2788
+ try:
2789
+ expr = validate_expr(term, self.df.columns, cidx)
2790
+ except Exception as e:
2791
+ self.notify(
2792
+ f"Error validating expression [$error]{term}[/]", title="Search", severity="error", timeout=10
2793
+ )
2794
+ self.log(f"Error validating expression `{term}`: {str(e)}")
2795
+ return
2796
+
2797
+ # Perform type-aware search based on column dtype
2798
+ else:
2799
+ dtype = self.df.dtypes[cidx]
2800
+ if dtype == pl.String:
2801
+ if match_whole:
2802
+ term = f"^{term}$"
2803
+ if match_nocase:
2804
+ term = f"(?i){term}"
2805
+ expr = pl.col(col_name).str.contains(term)
2806
+ else:
2807
+ try:
2808
+ value = DtypeConfig(dtype).convert(term)
2809
+ expr = pl.col(col_name) == value
2810
+ except Exception:
2811
+ if match_whole:
2812
+ term = f"^{term}$"
2813
+ if match_nocase:
2814
+ term = f"(?i){term}"
2815
+ expr = pl.col(col_name).cast(pl.String).str.contains(term)
2816
+ self.notify(
2817
+ f"Error converting [$error]{term}[/] to [$accent]{dtype}[/]. Cast to string.",
2818
+ title="Search",
2819
+ severity="warning",
2820
+ )
2821
+
2822
+ # Lazyframe for filtering
2823
+ lf = self.df.lazy()
2824
+
2825
+ # Apply filter to get matched row indices
2826
+ try:
2827
+ ok_rids = set(lf.filter(expr).collect()[RID])
2828
+ except Exception as e:
2829
+ self.notify(
2830
+ f"Error applying search filter `[$error]{term}[/]`", title="Search", severity="error", timeout=10
2831
+ )
2832
+ self.log(f"Error applying search filter `{term}`: {str(e)}")
2833
+ return
2834
+
2835
+ match_count = len(ok_rids)
2836
+ if match_count == 0:
2837
+ self.notify(
2838
+ f"No matches found for `[$warning]{term}[/]`. Try [$accent](?i)abc[/] for case-insensitive search.",
2839
+ title="Search",
2840
+ severity="warning",
2841
+ )
2842
+ return
2843
+
2844
+ message = f"Found [$success]{match_count}[/] matching row(s)"
2845
+
2846
+ # Add to history
2847
+ self.add_history(message)
2848
+
2849
+ # Update selected rows to include new selections
2850
+ self.selected_rows.update(ok_rids)
2851
+
2852
+ # Show notification immediately, then start highlighting
2853
+ self.notify(message, title="Select Row")
2854
+
2855
+ # Recreate table for display
2856
+ self.setup_table()
2857
+
2858
+ def do_toggle_selections(self) -> None:
2859
+ """Toggle selected rows highlighting on/off."""
2860
+ # Add to history
2861
+ self.add_history("Toggled row selection")
2862
+
2863
+ # Invert all selected rows
2864
+ self.selected_rows = {rid for rid in self.df[RID] if rid not in self.selected_rows}
2865
+
2866
+ # Check if we're highlighting or un-highlighting
2867
+ if selected_count := len(self.selected_rows):
2868
+ self.notify(f"Toggled selection for [$success]{selected_count}[/] rows", title="Toggle")
2869
+
2870
+ # Recreate table for display
2871
+ self.setup_table()
2872
+
2873
+ def do_toggle_row_selection(self) -> None:
2874
+ """Select/deselect current row."""
2875
+ # Add to history
2876
+ self.add_history("Toggled row selection")
2877
+
2878
+ # Get current row RID
2879
+ ridx = self.cursor_row_idx
2880
+ rid = self.df[RID][ridx]
2881
+
2882
+ if rid in self.selected_rows:
2883
+ self.selected_rows.discard(rid)
2884
+ else:
2885
+ self.selected_rows.add(rid)
2886
+
2887
+ row_key = self.cursor_row_key
2888
+ is_selected = rid in self.selected_rows
2889
+ match_cols = self.matches.get(rid, set())
2890
+
2891
+ for col_idx, col in enumerate(self.ordered_columns):
2892
+ col_key = col.key
2893
+ col_name = col_key.value
2894
+ cell_text: Text = self.get_cell(row_key, col_key)
2895
+
2896
+ if is_selected or (col_name in match_cols):
2897
+ cell_text.style = HIGHLIGHT_COLOR
2898
+ else:
2899
+ # Reset to default style based on dtype
2900
+ dtype = self.df.dtypes[col_idx]
2901
+ dc = DtypeConfig(dtype)
2902
+ cell_text.style = dc.style
2903
+
2904
+ self.update_cell(row_key, col_key, cell_text)
2905
+
2906
+ def do_clear_selections_and_matches(self) -> None:
2907
+ """Clear all selected rows and matches without removing them from the dataframe."""
2908
+ # Check if any selected rows or matches
2909
+ if not self.selected_rows and not self.matches:
2910
+ self.notify("No selections to clear", title="Clear", severity="warning")
2911
+ return
2912
+
2913
+ row_count = len(self.selected_rows | set(self.matches.keys()))
2914
+
2915
+ # Add to history
2916
+ self.add_history("Cleared all selected rows")
2917
+
2918
+ # Clear all selections
2919
+ self.selected_rows = set()
2920
+ self.matches = defaultdict(set)
2921
+
2922
+ # Recreate table for display
2923
+ self.setup_table()
2924
+
2925
+ self.notify(f"Cleared selections for [$success]{row_count}[/] rows", title="Clear")
2926
+
2927
+ # Find & Replace
2928
+ def find_matches(
2929
+ self, term: str, cidx: int | None = None, match_nocase: bool = False, match_whole: bool = False
2930
+ ) -> dict[int, set[str]]:
2931
+ """Find matches for a term in the dataframe.
2932
+
2933
+ Args:
2934
+ term: The search term (can be NULL, expression, or plain text)
2935
+ cidx: Column index for column-specific search. If None, searches all columns.
2936
+ match_nocase: Whether to perform case-insensitive matching (for string terms)
2937
+ match_whole: Whether to match the whole cell content (for string terms)
2938
+
2939
+ Returns:
2940
+ Dictionary mapping row indices to sets of column indices containing matches.
2941
+ For column-specific search, each matched row has a set with single cidx.
2942
+ For global search, each matched row has a set of all matching cidxs in that row.
2943
+
2944
+ Raises:
2945
+ Exception: If expression validation or filtering fails.
2946
+ """
2947
+ matches: dict[int, set[str]] = defaultdict(set)
2948
+
2949
+ # Lazyframe for filtering
2950
+ lf = self.df.lazy()
2951
+
2952
+ # Determine which columns to search: single column or all columns
2953
+ if cidx is not None:
2954
+ columns_to_search = [(cidx, self.df.columns[cidx])]
2955
+ else:
2956
+ columns_to_search = list(enumerate(self.df.columns))
2957
+
2958
+ # Search each column consistently
2959
+ for col_idx, col_name in columns_to_search:
2960
+ # Build expression based on term type
2961
+ if term == NULL:
2962
+ expr = pl.col(col_name).is_null()
2963
+ elif tentative_expr(term):
2964
+ try:
2965
+ expr = validate_expr(term, self.df.columns, col_idx)
2966
+ except Exception as e:
2967
+ self.notify(
2968
+ f"Error validating expression [$error]{term}[/]", title="Find", severity="error", timeout=10
2969
+ )
2970
+ self.log(f"Error validating expression `{term}`: {str(e)}")
2971
+ return matches
2972
+ else:
2973
+ if match_whole:
2974
+ term = f"^{term}$"
2975
+ if match_nocase:
2976
+ term = f"(?i){term}"
2977
+ expr = pl.col(col_name).cast(pl.String).str.contains(term)
2978
+
2979
+ # Get matched row indices
2980
+ try:
2981
+ matched_ridxs = lf.filter(expr).collect()[RID]
2982
+ except Exception as e:
2983
+ self.notify(f"Error applying filter: [$error]{expr}[/]", title="Find", severity="error", timeout=10)
2984
+ self.log(f"Error applying filter: {str(e)}")
2985
+ return matches
2986
+
2987
+ for ridx in matched_ridxs:
2988
+ matches[ridx].add(col_name)
2989
+
2990
+ return matches
2991
+
2992
+ def do_find_cursor_value(self, scope="column") -> None:
2993
+ """Find by cursor value.
2994
+
2995
+ Args:
2996
+ scope: "column" to find in current column, "global" to find across all columns.
2997
+ """
2998
+ # Get the value of the currently selected cell
2999
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
3000
+
3001
+ if scope == "column":
3002
+ cidx = self.cursor_col_idx
3003
+ self.find((term, cidx, False, True))
3004
+ else:
3005
+ self.find_global((term, None, False, True))
3006
+
3007
+ def do_find_expr(self, scope="column") -> None:
3008
+ """Open screen to find by expression.
3009
+
3010
+ Args:
3011
+ scope: "column" to find in current column, "global" to find across all columns.
3012
+ """
3013
+ # Use current cell value as default search term
3014
+ term = NULL if self.cursor_value is None else str(self.cursor_value)
3015
+ cidx = self.cursor_col_idx if scope == "column" else None
3016
+
3017
+ # Push the search modal screen
3018
+ self.app.push_screen(
3019
+ SearchScreen("Find", term, self.df, cidx),
3020
+ callback=self.find if scope == "column" else self.find_global,
3021
+ )
3022
+
3023
+ def find(self, result) -> None:
3024
+ """Find a term in current column."""
3025
+ if result is None:
3026
+ return
3027
+ term, cidx, match_nocase, match_whole = result
3028
+
3029
+ col_name = self.df.columns[cidx]
3030
+
3031
+ try:
3032
+ matches = self.find_matches(term, cidx, match_nocase, match_whole)
3033
+ except Exception as e:
3034
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
3035
+ self.log(f"Error finding matches for `{term}`: {str(e)}")
3036
+ return
3037
+
3038
+ if not matches:
3039
+ self.notify(
3040
+ f"No matches found for `[$warning]{term}[/]` in current column. Try [$accent](?i)abc[/] for case-insensitive search.",
3041
+ title="Find",
3042
+ severity="warning",
3043
+ )
3044
+ return
3045
+
3046
+ # Add to history
3047
+ self.add_history(f"Found `[$success]{term}[/]` in column [$accent]{col_name}[/]")
3048
+
3049
+ # Add to matches and count total
3050
+ match_count = sum(len(cols) for cols in matches.values())
3051
+ for rid, cols in matches.items():
3052
+ self.matches[rid].update(cols)
3053
+
3054
+ self.notify(f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]`", title="Find")
3055
+
3056
+ # Recreate table for display
3057
+ self.setup_table()
3058
+
3059
+ def find_global(self, result) -> None:
3060
+ """Global find a term across all columns."""
3061
+ if result is None:
3062
+ return
3063
+ term, cidx, match_nocase, match_whole = result
3064
+
3065
+ try:
3066
+ matches = self.find_matches(term, cidx=None, match_nocase=match_nocase, match_whole=match_whole)
3067
+ except Exception as e:
3068
+ self.notify(f"Error finding matches for `[$error]{term}[/]`", title="Find", severity="error", timeout=10)
3069
+ self.log(f"Error finding matches for `{term}`: {str(e)}")
3070
+ return
3071
+
3072
+ if not matches:
3073
+ self.notify(
3074
+ f"No matches found for `[$warning]{term}[/]` in any column. Try [$accent](?i)abc[/] for case-insensitive search.",
3075
+ title="Global Find",
3076
+ severity="warning",
3077
+ )
3078
+ return
3079
+
3080
+ # Add to history
3081
+ self.add_history(f"Found `[$success]{term}[/]` across all columns")
3082
+
3083
+ # Add to matches and count total
3084
+ match_count = sum(len(cols) for cols in matches.values())
3085
+ for rid, cols in matches.items():
3086
+ self.matches[rid].update(cols)
3087
+
3088
+ self.notify(
3089
+ f"Found [$success]{match_count}[/] matches for `[$accent]{term}[/]` across all columns",
3090
+ title="Global Find",
3091
+ )
3092
+
3093
+ # Recreate table for display
3094
+ self.setup_table()
3095
+
3096
+ def do_next_match(self) -> None:
3097
+ """Move cursor to the next match."""
3098
+ if not self.matches:
3099
+ self.notify("No matches to navigate", title="Next Match", severity="warning")
3100
+ return
3101
+
3102
+ # Get sorted list of matched coordinates
3103
+ ordered_matches = self.ordered_matches
3104
+
3105
+ # Current cursor position
3106
+ current_pos = (self.cursor_row_idx, self.cursor_col_idx)
3107
+
3108
+ # Find the next match after current position
3109
+ for ridx, cidx in ordered_matches:
3110
+ if (ridx, cidx) > current_pos:
3111
+ self.move_cursor_to(ridx, cidx)
3112
+ return
3113
+
3114
+ # If no next match, wrap around to the first match
3115
+ first_ridx, first_cidx = ordered_matches[0]
3116
+ self.move_cursor_to(first_ridx, first_cidx)
3117
+
3118
+ def do_previous_match(self) -> None:
3119
+ """Move cursor to the previous match."""
3120
+ if not self.matches:
3121
+ self.notify("No matches to navigate", title="Previous Match", severity="warning")
3122
+ return
3123
+
3124
+ # Get sorted list of matched coordinates
3125
+ ordered_matches = self.ordered_matches
3126
+
3127
+ # Current cursor position
3128
+ current_pos = (self.cursor_row_idx, self.cursor_col_idx)
3129
+
3130
+ # Find the previous match before current position
3131
+ for ridx, cidx in reversed(ordered_matches):
3132
+ if (ridx, cidx) < current_pos:
3133
+ row_key = str(ridx)
3134
+ col_key = self.df.columns[cidx]
3135
+ row_idx, col_idx = self.get_cell_coordinate(row_key, col_key)
3136
+ self.move_cursor(row=row_idx, column=col_idx)
3137
+ return
3138
+
3139
+ # If no previous match, wrap around to the last match
3140
+ last_ridx, last_cidx = ordered_matches[-1]
3141
+ row_key = str(last_ridx)
3142
+ col_key = self.df.columns[last_cidx]
3143
+ row_idx, col_idx = self.get_cell_coordinate(row_key, col_key)
3144
+ self.move_cursor(row=row_idx, column=col_idx)
3145
+
3146
+ def do_next_selected_row(self) -> None:
3147
+ """Move cursor to the next selected row."""
3148
+ if not self.selected_rows:
3149
+ self.notify("No selected rows to navigate", title="Next Selected Row", severity="warning")
3150
+ return
3151
+
3152
+ # Get list of selected row indices in order
3153
+ selected_row_indices = self.ordered_selected_rows
3154
+
3155
+ # Current cursor row
3156
+ current_ridx = self.cursor_row_idx
3157
+
3158
+ # Find the next selected row after current position
3159
+ for ridx in selected_row_indices:
3160
+ if ridx > current_ridx:
3161
+ self.move_cursor_to(ridx, self.cursor_col_idx)
3162
+ return
3163
+
3164
+ # If no next selected row, wrap around to the first selected row
3165
+ first_ridx = selected_row_indices[0]
3166
+ self.move_cursor_to(first_ridx, self.cursor_col_idx)
3167
+
3168
+ def do_previous_selected_row(self) -> None:
3169
+ """Move cursor to the previous selected row."""
3170
+ if not self.selected_rows:
3171
+ self.notify("No selected rows to navigate", title="Previous Selected Row", severity="warning")
3172
+ return
3173
+
3174
+ # Get list of selected row indices in order
3175
+ selected_row_indices = self.ordered_selected_rows
3176
+
3177
+ # Current cursor row
3178
+ current_ridx = self.cursor_row_idx
3179
+
3180
+ # Find the previous selected row before current position
3181
+ for ridx in reversed(selected_row_indices):
3182
+ if ridx < current_ridx:
3183
+ self.move_cursor_to(ridx, self.cursor_col_idx)
3184
+ return
3185
+
3186
+ # If no previous selected row, wrap around to the last selected row
3187
+ last_ridx = selected_row_indices[-1]
3188
+ self.move_cursor_to(last_ridx, self.cursor_col_idx)
3189
+
3190
+ def do_replace(self) -> None:
3191
+ """Open replace screen for current column."""
3192
+ # Push the replace modal screen
3193
+ self.app.push_screen(
3194
+ FindReplaceScreen(self, title="Find and Replace in Current Column"),
3195
+ callback=self.replace,
3196
+ )
3197
+
3198
+ def replace(self, result) -> None:
3199
+ """Handle replace in current column."""
3200
+ self.handle_replace(result, self.cursor_col_idx)
3201
+
3202
+ def do_replace_global(self) -> None:
3203
+ """Open replace screen for all columns."""
3204
+ # Push the replace modal screen
3205
+ self.app.push_screen(
3206
+ FindReplaceScreen(self, title="Global Find and Replace"),
3207
+ callback=self.replace_global,
3208
+ )
3209
+
3210
+ def replace_global(self, result) -> None:
3211
+ """Handle replace across all columns."""
3212
+ self.handle_replace(result, None)
3213
+
3214
+ def handle_replace(self, result, cidx) -> None:
3215
+ """Handle replace result from ReplaceScreen.
3216
+
3217
+ Args:
3218
+ result: Result tuple from ReplaceScreen
3219
+ cidx: Column index to perform replacement. If None, replace across all columns.
3220
+ """
3221
+ if result is None:
3222
+ return
3223
+ term_find, term_replace, match_nocase, match_whole, replace_all = result
3224
+
3225
+ if cidx is None:
3226
+ col_name = "all columns"
3227
+ else:
3228
+ col_name = self.df.columns[cidx]
3229
+
3230
+ # Find all matches
3231
+ matches = self.find_matches(term_find, cidx, match_nocase, match_whole)
3232
+
3233
+ if not matches:
3234
+ self.notify(f"No matches found for [$warning]{term_find}[/]", title="Replace", severity="warning")
3235
+ return
3236
+
3237
+ # Add to history
3238
+ self.add_history(
3239
+ f"Replaced [$success]{term_find}[/] with [$accent]{term_replace}[/] in column [$success]{col_name}[/]"
3240
+ )
3241
+
3242
+ # Update matches
3243
+ self.matches = matches
3244
+
3245
+ # Recreate table for display
3246
+ self.setup_table()
3247
+
3248
+ # Store state for interactive replacement using dataclass
3249
+ rid2ridx = {rid: ridx for ridx, rid in enumerate(self.df[RID]) if rid in self.matches}
3250
+
3251
+ # Unique columns to replace
3252
+ cols_to_replace = set()
3253
+ for cols in self.matches.values():
3254
+ cols_to_replace.update(cols)
3255
+
3256
+ # Sorted column indices to replace
3257
+ cidx2col = {cidx: col for cidx, col in enumerate(self.df.columns) if col in cols_to_replace}
3258
+
3259
+ self.replace_state = ReplaceState(
3260
+ term_find=term_find,
3261
+ term_replace=term_replace,
3262
+ match_nocase=match_nocase,
3263
+ match_whole=match_whole,
3264
+ cidx=cidx,
3265
+ rows=list(rid2ridx.values()),
3266
+ cols_per_row=[[cidx for cidx, col in cidx2col.items() if col in self.matches[rid]] for rid in rid2ridx],
3267
+ current_rpos=0,
3268
+ current_cpos=0,
3269
+ current_occurrence=0,
3270
+ total_occurrence=sum(len(cols) for cols in self.matches.values()),
3271
+ replaced_occurrence=0,
3272
+ skipped_occurrence=0,
3273
+ done=False,
3274
+ )
3275
+
3276
+ try:
3277
+ if replace_all:
3278
+ # Replace all occurrences
3279
+ self.replace_all(term_find, term_replace)
3280
+ else:
3281
+ # Replace with confirmation for each occurrence
3282
+ self.replace_interactive(term_find, term_replace)
3283
+
3284
+ except Exception as e:
3285
+ self.notify(
3286
+ f"Error replacing [$error]{term_find}[/] with [$accent]{term_replace}[/]",
3287
+ title="Replace",
3288
+ severity="error",
3289
+ timeout=10,
3290
+ )
3291
+ self.log(f"Error replacing `{term_find}` with `{term_replace}`: {str(e)}")
3292
+
3293
+ def replace_all(self, term_find: str, term_replace: str) -> None:
3294
+ """Replace all occurrences."""
3295
+ state = self.replace_state
3296
+ self.app.push_screen(
3297
+ ConfirmScreen(
3298
+ "Replace All",
3299
+ label=f"Replace `[$success]{term_find}[/]` with `[$success]{term_replace}[/]` for all [$accent]{state.total_occurrence}[/] occurrences?",
3300
+ ),
3301
+ callback=self.handle_replace_all_confirmation,
3302
+ )
3303
+
3304
+ def handle_replace_all_confirmation(self, result) -> None:
3305
+ """Handle user's confirmation for replace all."""
3306
+ if result is None:
3307
+ return
3308
+
3309
+ state = self.replace_state
3310
+ rows = state.rows
3311
+ cols_per_row = state.cols_per_row
3312
+
3313
+ # Batch replacements by column for efficiency
3314
+ # Group row indices by column to minimize dataframe operations
3315
+ cidxs_to_replace: dict[int, set[int]] = defaultdict(set)
3316
+
3317
+ # Single column replacement
3318
+ if state.cidx is not None:
3319
+ cidxs_to_replace[state.cidx].update(rows)
3320
+ # Multiple columns replacement
3321
+ else:
3322
+ for ridx, cidxs in zip(rows, cols_per_row):
3323
+ for cidx in cidxs:
3324
+ cidxs_to_replace[cidx].add(ridx)
3325
+
3326
+ # Apply replacements column by column (single operation per column)
3327
+ for cidx, ridxs in cidxs_to_replace.items():
3328
+ col_name = self.df.columns[cidx]
3329
+ dtype = self.df.dtypes[cidx]
3330
+
3331
+ # Create a mask for rows to replace
3332
+ mask = pl.arange(0, len(self.df)).is_in(ridxs)
3333
+
3334
+ # Only applicable to string columns for substring matches
3335
+ if dtype == pl.String and not state.match_whole:
3336
+ term_find = f"(?i){state.term_find}" if state.match_nocase else state.term_find
3337
+ self.df = self.df.with_columns(
3338
+ pl.when(mask)
3339
+ .then(pl.col(col_name).str.replace_all(term_find, state.term_replace))
3340
+ .otherwise(pl.col(col_name))
3341
+ .alias(col_name)
3342
+ )
3343
+ else:
3344
+ # Try to convert replacement value to column dtype
3345
+ try:
3346
+ value = DtypeConfig(dtype).convert(state.term_replace)
3347
+ except Exception:
3348
+ value = state.term_replace
3349
+
3350
+ self.df = self.df.with_columns(
3351
+ pl.when(mask).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
3352
+ )
3353
+
3354
+ # Also update the view if applicable
3355
+ if self.df_view is not None:
3356
+ col_updated = f"^_{col_name}_^"
3357
+ lf_updated = self.df.lazy().filter(mask).select(pl.col(col_name).alias(col_updated), pl.col(RID))
3358
+ self.df_view = (
3359
+ self.df_view.lazy()
3360
+ .join(lf_updated, on=RID, how="left")
3361
+ .with_columns(pl.coalesce(pl.col(col_updated), pl.col(col_name)).alias(col_name))
3362
+ .drop(col_updated)
3363
+ .collect()
3364
+ )
3365
+
3366
+ state.replaced_occurrence += len(ridxs)
3367
+
3368
+ # Recreate table for display
3369
+ self.setup_table()
3370
+
3371
+ # Mark as dirty if any replacements were made
3372
+ if state.replaced_occurrence > 0:
3373
+ self.dirty = True
3374
+
3375
+ col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
3376
+ self.notify(
3377
+ f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]",
3378
+ title="Replace",
3379
+ )
3380
+
3381
+ def replace_interactive(self, term_find: str, term_replace: str) -> None:
3382
+ """Replace with user confirmation for each occurrence."""
3383
+ try:
3384
+ # Start with first match
3385
+ self.show_next_replace_confirmation()
3386
+ except Exception as e:
3387
+ self.notify(
3388
+ f"Error replacing [$error]{term_find}[/] with [$accent]{term_replace}[/]",
3389
+ title="Replace",
3390
+ severity="error",
3391
+ timeout=10,
3392
+ )
3393
+ self.log(f"Error in interactive replace: {str(e)}")
3394
+
3395
+ def show_next_replace_confirmation(self) -> None:
3396
+ """Show confirmation for next replacement."""
3397
+ state = self.replace_state
3398
+ if state.done:
3399
+ # All done - show final notification
3400
+ col_name = "all columns" if state.cidx is None else self.df.columns[state.cidx]
3401
+ msg = f"Replaced [$success]{state.replaced_occurrence}[/] of [$success]{state.total_occurrence}[/] in [$accent]{col_name}[/]"
3402
+ if state.skipped_occurrence > 0:
3403
+ msg += f", [$warning]{state.skipped_occurrence}[/] skipped"
3404
+ self.notify(msg, title="Replace")
3405
+
3406
+ if state.replaced_occurrence > 0:
3407
+ self.dirty = True
3408
+
3409
+ return
3410
+
3411
+ # Move cursor to next match
3412
+ ridx = state.rows[state.current_rpos]
3413
+ cidx = state.cols_per_row[state.current_rpos][state.current_cpos]
3414
+ self.move_cursor_to(ridx, cidx)
3415
+
3416
+ state.current_occurrence += 1
3417
+
3418
+ # Show confirmation
3419
+ label = f"Replace `[$warning]{state.term_find}[/]` with `[$success]{state.term_replace}[/]` ({state.current_occurrence} of {state.total_occurrence})?"
3420
+
3421
+ self.app.push_screen(
3422
+ ConfirmScreen("Replace", label=label, maybe="Skip"),
3423
+ callback=self.handle_replace_confirmation,
3424
+ )
3425
+
3426
+ def handle_replace_confirmation(self, result) -> None:
3427
+ """Handle user's confirmation response."""
3428
+ state = self.replace_state
3429
+ if state.done:
3430
+ return
3431
+
3432
+ ridx = state.rows[state.current_rpos]
3433
+ cidx = state.cols_per_row[state.current_rpos][state.current_cpos]
3434
+ col_name = self.df.columns[cidx]
3435
+ dtype = self.df.dtypes[cidx]
3436
+ rid = self.df[RID][ridx]
3437
+
3438
+ # Replace
3439
+ if result is True:
3440
+ # Only applicable to string columns for substring matches
3441
+ if dtype == pl.String and not state.match_whole:
3442
+ term_find = f"(?i){state.term_find}" if state.match_nocase else state.term_find
3443
+ self.df = self.df.with_columns(
3444
+ pl.when(pl.arange(0, len(self.df)) == ridx)
3445
+ .then(pl.col(col_name).str.replace_all(term_find, state.term_replace))
3446
+ .otherwise(pl.col(col_name))
3447
+ .alias(col_name)
3448
+ )
3449
+
3450
+ # Also update the view if applicable
3451
+ if self.df_view is not None:
3452
+ self.df_view = self.df_view.with_columns(
3453
+ pl.when(pl.col(RID) == rid)
3454
+ .then(pl.col(col_name).str.replace_all(term_find, state.term_replace))
3455
+ .otherwise(pl.col(col_name))
3456
+ .alias(col_name)
3457
+ )
3458
+ else:
3459
+ # try to convert replacement value to column dtype
3460
+ try:
3461
+ value = DtypeConfig(dtype).convert(state.term_replace)
3462
+ except Exception:
3463
+ value = state.term_replace
3464
+
3465
+ self.df = self.df.with_columns(
3466
+ pl.when(pl.arange(0, len(self.df)) == ridx)
3467
+ .then(pl.lit(value))
3468
+ .otherwise(pl.col(col_name))
3469
+ .alias(col_name)
3470
+ )
3471
+
3472
+ # Also update the view if applicable
3473
+ if self.df_view is not None:
3474
+ self.df_view = self.df_view.with_columns(
3475
+ pl.when(pl.col(RID) == rid).then(pl.lit(value)).otherwise(pl.col(col_name)).alias(col_name)
3476
+ )
3477
+
3478
+ state.replaced_occurrence += 1
3479
+
3480
+ # Skip
3481
+ elif result is False:
3482
+ state.skipped_occurrence += 1
3483
+
3484
+ # Cancel
3485
+ else:
3486
+ state.done = True
3487
+
3488
+ if not state.done:
3489
+ # Get the new value of the current cell after replacement
3490
+ new_cell_value = self.df.item(ridx, cidx)
3491
+ row_key = str(ridx)
3492
+ col_key = col_name
3493
+ self.update_cell(
3494
+ row_key, col_key, Text(str(new_cell_value), style=HIGHLIGHT_COLOR, justify=DtypeConfig(dtype).justify)
3495
+ )
3496
+
3497
+ # Move to next
3498
+ if state.current_cpos + 1 < len(state.cols_per_row[state.current_rpos]):
3499
+ state.current_cpos += 1
3500
+ else:
3501
+ state.current_cpos = 0
3502
+ state.current_rpos += 1
3503
+
3504
+ if state.current_rpos >= len(state.rows):
3505
+ state.done = True
3506
+
3507
+ # Show next confirmation
3508
+ self.show_next_replace_confirmation()
3509
+
3510
+ # View & Filter
3511
+ def do_view_rows(self) -> None:
3512
+ """View rows.
3513
+
3514
+ If there are selected rows, view those.
3515
+ Otherwise, view based on the cursor value.
3516
+ """
3517
+
3518
+ cidx = self.cursor_col_idx
3519
+ col_name = self.cursor_col_name
3520
+
3521
+ # If there are selected rows, use those
3522
+ if self.selected_rows:
3523
+ term = pl.col(RID).is_in(self.selected_rows)
3524
+ # Otherwise, use the current cell value
3525
+ else:
3526
+ ridx = self.cursor_row_idx
3527
+ value = self.df.item(ridx, cidx)
3528
+ term = pl.col(col_name).is_null() if value is None else pl.col(col_name) == value
3529
+
3530
+ self.view_rows((term, cidx, False, True))
3531
+
3532
+ def do_view_rows_expr(self) -> None:
3533
+ """Open the filter screen to enter an expression."""
3534
+ ridx = self.cursor_row_idx
3535
+ cidx = self.cursor_col_idx
3536
+ cursor_value = self.df.item(ridx, cidx)
3537
+ term = NULL if cursor_value is None else str(cursor_value)
3538
+
3539
+ self.app.push_screen(
3540
+ FilterScreen(self.df, cidx, term),
3541
+ callback=self.view_rows,
3542
+ )
3543
+
3544
+ def view_rows(self, result) -> None:
3545
+ """View selected rows and hide others. Do not modify the dataframe."""
3546
+ if result is None:
3547
+ return
3548
+ term, cidx, match_nocase, match_whole = result
3549
+
3550
+ col_name = self.df.columns[cidx]
3551
+
3552
+ # Support for polars expression
3553
+ if isinstance(term, pl.Expr):
3554
+ expr = term
3555
+
3556
+ # Support for list of booleans (selected rows)
3557
+ elif isinstance(term, (list, pl.Series)):
3558
+ expr = term
3559
+
3560
+ # Null case
3561
+ elif term == NULL:
3562
+ expr = pl.col(col_name).is_null()
3563
+
3564
+ # Support for polars expression in string form
3565
+ elif tentative_expr(term):
3566
+ try:
3567
+ expr = validate_expr(term, self.df.columns, cidx)
3568
+ except Exception as e:
3569
+ self.notify(
3570
+ f"Error validating expression [$error]{term}[/]", title="Filter", severity="error", timeout=10
3571
+ )
3572
+ self.log(f"Error validating expression `{term}`: {str(e)}")
3573
+ return
3574
+
3575
+ # Type-aware search based on column dtype
3576
+ else:
3577
+ dtype = self.df.dtypes[cidx]
3578
+ if dtype == pl.String:
3579
+ if match_whole:
3580
+ term = f"^{term}$"
3581
+ if match_nocase:
3582
+ term = f"(?i){term}"
3583
+ expr = pl.col(col_name).str.contains(term)
3584
+ else:
3585
+ try:
3586
+ value = DtypeConfig(dtype).convert(term)
3587
+ expr = pl.col(col_name) == value
3588
+ except Exception:
3589
+ if match_whole:
3590
+ term = f"^{term}$"
3591
+ if match_nocase:
3592
+ term = f"(?i){term}"
3593
+ expr = pl.col(col_name).cast(pl.String).str.contains(term)
3594
+ self.notify(
3595
+ f"Unknown column type [$warning]{dtype}[/]. Cast to string.", title="Filter", severity="warning"
3596
+ )
3597
+
3598
+ # Lazyframe with row indices
3599
+ lf = self.df.lazy()
3600
+
3601
+ expr_str = "boolean list or series" if isinstance(expr, (list, pl.Series)) else str(expr)
3602
+
3603
+ # Apply the filter expression
3604
+ try:
3605
+ df_filtered = lf.filter(expr).collect()
3606
+ except Exception as e:
3607
+ self.histories_undo.pop() # Remove last history entry
3608
+ self.notify(f"Error applying filter [$error]{expr_str}[/]", title="Filter", severity="error", timeout=10)
3609
+ self.log(f"Error applying filter `{expr_str}`: {str(e)}")
3610
+ return
3611
+
3612
+ matched_count = len(df_filtered)
3613
+ if not matched_count:
3614
+ self.notify(f"No rows match the expression: [$success]{expr}[/]", title="Filter", severity="warning")
3615
+ return
3616
+
3617
+ # Add to history
3618
+ self.add_history(f"Filtered by expression [$success]{expr_str}[/]")
3619
+
3620
+ ok_rids = set(df_filtered[RID])
3621
+
3622
+ # Create a view of self.df as a copy
3623
+ if self.df_view is None:
3624
+ self.df_view = self.df
3625
+
3626
+ # Update dataframe
3627
+ self.df = df_filtered
3628
+
3629
+ # Update selected rows
3630
+ if self.selected_rows:
3631
+ self.selected_rows.intersection_update(ok_rids)
3632
+
3633
+ # Update matches
3634
+ if self.matches:
3635
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3636
+
3637
+ # Recreate table for display
3638
+ self.setup_table()
3639
+
3640
+ self.notify(f"Filtered to [$success]{matched_count}[/] matching row(s)", title="Filter")
3641
+
3642
+ def do_filter_rows(self) -> None:
3643
+ """Filter rows.
3644
+
3645
+ If there are selected rows, use those.
3646
+ Otherwise, filter based on the cursor value.
3647
+ """
3648
+ if self.selected_rows:
3649
+ message = "Filtered to selected rows (other rows removed)"
3650
+ filter_expr = pl.col(RID).is_in(self.selected_rows)
3651
+ else: # Search cursor value in current column
3652
+ message = "Filtered to rows matching cursor value (other rows removed)"
3653
+ cidx = self.cursor_col_idx
3654
+ col_name = self.df.columns[cidx]
3655
+ value = self.cursor_value
3656
+
3657
+ if value is None:
3658
+ filter_expr = pl.col(col_name).is_null()
3659
+ else:
3660
+ filter_expr = pl.col(col_name) == value
3661
+
3662
+ # Add to history
3663
+ self.add_history(message, dirty=True)
3664
+
3665
+ # Apply filter to dataframe with row indices
3666
+ df_filtered = self.df.lazy().filter(filter_expr).collect()
3667
+ ok_rids = set(df_filtered[RID])
3668
+
3669
+ # Update selected rows
3670
+ if self.selected_rows:
3671
+ selected_rows = {rid for rid in self.selected_rows if rid in ok_rids}
3672
+ else:
3673
+ selected_rows = set()
3674
+
3675
+ # Update matches
3676
+ if self.matches:
3677
+ matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3678
+ else:
3679
+ matches = defaultdict(set)
3680
+
3681
+ # Update dataframe
3682
+ self.reset_df(df_filtered)
3683
+
3684
+ # Clear view for filter mode
3685
+ self.df_view = None
3686
+
3687
+ # Restore selected rows and matches
3688
+ self.selected_rows = selected_rows
3689
+ self.matches = matches
3690
+
3691
+ # Recreate table for display
3692
+ self.setup_table()
3693
+
3694
+ self.notify(f"{message}. Now showing [$success]{len(self.df)}[/] rows.", title="Filter")
3695
+
3696
+ # Copy & Save
3697
+ def do_copy_to_clipboard(self, content: str, message: str) -> None:
3698
+ """Copy content to clipboard using pbcopy (macOS) or xclip (Linux).
3699
+
3700
+ Args:
3701
+ content: The text content to copy to clipboard.
3702
+ message: The notification message to display on success.
3703
+ """
3704
+ import subprocess
3705
+
3706
+ try:
3707
+ subprocess.run(
3708
+ [
3709
+ "pbcopy" if sys.platform == "darwin" else "xclip",
3710
+ "-selection",
3711
+ "clipboard",
3712
+ ],
3713
+ input=content,
3714
+ text=True,
3715
+ )
3716
+ self.notify(message, title="Clipboard")
3717
+ except FileNotFoundError:
3718
+ self.notify("Error copying to clipboard", title="Clipboard", severity="error", timeout=10)
3719
+
3720
+ def do_save_to_file(self, all_tabs: bool | None = None, task_after_save: str | None = None) -> None:
3721
+ """Open screen to save file."""
3722
+ self._task_after_save = task_after_save
3723
+ tab_count = len(self.app.tabs)
3724
+ save_all = tab_count > 1 and all_tabs is not False
3725
+
3726
+ filepath = Path(self.filename)
3727
+ if save_all:
3728
+ ext = filepath.suffix.lower()
3729
+ if ext in (".xlsx", ".xls"):
3730
+ filename = self.filename
3731
+ else:
3732
+ filename = "all-tabs.xlsx"
3733
+ else:
3734
+ filename = str(filepath.with_stem(self.tabname))
3735
+
3736
+ self.app.push_screen(
3737
+ SaveFileScreen(filename, save_all=save_all, tab_count=tab_count),
3738
+ callback=self.save_to_file,
3739
+ )
3740
+
3741
+ def save_to_file(self, result) -> None:
3742
+ """Handle result from SaveFileScreen."""
3743
+ if result is None:
3744
+ return
3745
+ filename, save_all, overwrite_prompt = result
3746
+ self._save_all = save_all
3747
+
3748
+ # Check if file exists
3749
+ if overwrite_prompt and Path(filename).exists():
3750
+ self._pending_filename = filename
3751
+ self.app.push_screen(
3752
+ ConfirmScreen("File already exists. Overwrite?"),
3753
+ callback=self.confirm_overwrite,
3754
+ )
3755
+ else:
3756
+ self.save_file(filename)
3757
+
3758
+ def confirm_overwrite(self, should_overwrite: bool) -> None:
3759
+ """Handle result from ConfirmScreen."""
3760
+ if should_overwrite:
3761
+ self.save_file(self._pending_filename)
3762
+ else:
3763
+ # Go back to SaveFileScreen to allow user to enter a different name
3764
+ self.app.push_screen(
3765
+ SaveFileScreen(self._pending_filename, save_all=self._save_all),
3766
+ callback=self.save_to_file,
3767
+ )
3768
+
3769
+ def save_file(self, filename: str) -> None:
3770
+ """Actually save the dataframe to a file."""
3771
+ filepath = Path(filename)
3772
+ ext = filepath.suffix.lower()
3773
+ if ext == ".gz":
3774
+ ext = Path(filename).with_suffix("").suffix.lower()
3775
+
3776
+ fmt = ext.removeprefix(".")
3777
+ if fmt not in SUPPORTED_FORMATS:
3778
+ self.notify(
3779
+ f"Unsupported file format [$success]{fmt}[/]. Use [$accent]CSV[/] as fallback. Supported formats: {', '.join(SUPPORTED_FORMATS)}",
3780
+ title="Save to File",
3781
+ severity="warning",
3782
+ )
3783
+ fmt = "csv"
3784
+
3785
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3786
+ try:
3787
+ if fmt == "csv":
3788
+ df.write_csv(filename)
3789
+ elif fmt in ("tsv", "tab"):
3790
+ df.write_csv(filename, separator="\t")
3791
+ elif fmt in ("xlsx", "xls"):
3792
+ self.save_excel(filename)
3793
+ elif fmt == "json":
3794
+ df.write_json(filename)
3795
+ elif fmt == "ndjson":
3796
+ df.write_ndjson(filename)
3797
+ elif fmt == "parquet":
3798
+ df.write_parquet(filename)
3799
+ else: # Fallback to CSV
3800
+ df.write_csv(filename)
3801
+
3802
+ # Update current filename
3803
+ self.filename = filename
3804
+
3805
+ # Reset dirty flag after save
3806
+ if self._save_all:
3807
+ tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3808
+ for table in tabs.values():
3809
+ table.dirty = False
3810
+ else:
3811
+ self.dirty = False
3812
+
3813
+ if hasattr(self, "_task_after_save"):
3814
+ if self._task_after_save == "close_tab":
3815
+ self.app.do_close_tab()
3816
+ elif self._task_after_save == "quit_app":
3817
+ self.app.exit()
3818
+
3819
+ # From ConfirmScreen callback, so notify accordingly
3820
+ if self._save_all:
3821
+ self.notify(f"Saved all tabs to [$success]{filename}[/]", title="Save to File")
3822
+ else:
3823
+ self.notify(f"Saved current tab to [$success]{filename}[/]", title="Save to File")
3824
+
3825
+ except Exception as e:
3826
+ self.notify(f"Error saving [$error]{filename}[/]", title="Save to File", severity="error", timeout=10)
3827
+ self.log(f"Error saving file `{filename}`: {str(e)}")
3828
+
3829
+ def save_excel(self, filename: str) -> None:
3830
+ """Save to an Excel file."""
3831
+ import xlsxwriter
3832
+
3833
+ if not self._save_all or len(self.app.tabs) == 1:
3834
+ # Single tab - save directly
3835
+ df = (self.df if self.df_view is None else self.df_view).select(pl.exclude(RID))
3836
+ df.write_excel(filename, worksheet=self.tabname)
3837
+ else:
3838
+ # Multiple tabs - use xlsxwriter to create multiple sheets
3839
+ with xlsxwriter.Workbook(filename) as wb:
3840
+ tabs: dict[TabPane, DataFrameTable] = self.app.tabs
3841
+ for table in tabs.values():
3842
+ worksheet = wb.add_worksheet(table.tabname)
3843
+ df = (table.df if table.df_view is None else table.df_view).select(pl.exclude(RID))
3844
+ df.write_excel(workbook=wb, worksheet=worksheet)
3845
+
3846
+ # SQL Interface
3847
+ def do_simple_sql(self) -> None:
3848
+ """Open the SQL interface screen."""
3849
+ self.app.push_screen(
3850
+ SimpleSqlScreen(self),
3851
+ callback=self.simple_sql,
3852
+ )
3853
+
3854
+ def simple_sql(self, result) -> None:
3855
+ """Handle SQL result result from SimpleSqlScreen."""
3856
+ if result is None:
3857
+ return
3858
+ columns, where, view = result
3859
+
3860
+ sql = f"SELECT {columns} FROM self"
3861
+ if where:
3862
+ sql += f" WHERE {where}"
3863
+
3864
+ self.run_sql(sql, view)
3865
+
3866
+ def do_advanced_sql(self) -> None:
3867
+ """Open the advanced SQL interface screen."""
3868
+ self.app.push_screen(
3869
+ AdvancedSqlScreen(self),
3870
+ callback=self.advanced_sql,
3871
+ )
3872
+
3873
+ def advanced_sql(self, result) -> None:
3874
+ """Handle SQL result result from AdvancedSqlScreen."""
3875
+ if result is None:
3876
+ return
3877
+ sql, view = result
3878
+
3879
+ self.run_sql(sql, view)
3880
+
3881
+ def run_sql(self, sql: str, view: bool = True) -> None:
3882
+ """Execute a SQL query directly.
3883
+
3884
+ Args:
3885
+ sql: The SQL query string to execute.
3886
+ """
3887
+
3888
+ sql = sql.replace("$#", f"(`{RID}` + 1)")
3889
+ if RID not in sql and "*" not in sql:
3890
+ # Ensure RID is selected
3891
+ import re
3892
+
3893
+ RE_FROM_SELF = re.compile(r"\bFROM\s+self\b", re.IGNORECASE)
3894
+ sql = RE_FROM_SELF.sub(f", `{RID}` FROM self", sql)
3895
+
3896
+ # Execute the SQL query
3897
+ try:
3898
+ df_filtered = self.df.lazy().sql(sql).collect()
3899
+
3900
+ if not len(df_filtered):
3901
+ self.notify(
3902
+ f"SQL query returned no results for [$warning]{sql}[/]", title="SQL Query", severity="warning"
3903
+ )
3904
+ return
3905
+
3906
+ except Exception as e:
3907
+ self.notify(f"Error executing SQL query [$error]{sql}[/]", title="SQL Query", severity="error", timeout=10)
3908
+ self.log(f"Error executing SQL query `{sql}`: {str(e)}")
3909
+ return
3910
+
3911
+ # Add to history
3912
+ self.add_history(f"SQL Query:\n[$success]{sql}[/]", dirty=not view)
3913
+
3914
+ # Create a view of self.df as a copy
3915
+ if view and self.df_view is None:
3916
+ self.df_view = self.df
3917
+
3918
+ # Clear view for filter mode
3919
+ if not view:
3920
+ self.df_view = None
3921
+
3922
+ # Update dataframe
3923
+ self.df = df_filtered
3924
+ ok_rids = set(df_filtered[RID])
3925
+
3926
+ # Update selected rows
3927
+ if self.selected_rows:
3928
+ self.selected_rows.intersection_update(ok_rids)
3929
+
3930
+ # Update matches
3931
+ if self.matches:
3932
+ self.matches = {rid: cols for rid, cols in self.matches.items() if rid in ok_rids}
3933
+
3934
+ # Recreate table for display
3935
+ self.setup_table()
3936
+
3937
+ self.notify(
3938
+ f"SQL query executed successfully. Now showing [$accent]{len(self.df)}[/] rows and [$accent]{len(self.df.columns)}[/] columns.",
3939
+ title="SQL Query",
3940
+ )