dataframe-textual 0.3.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataframe_textual/__main__.py +29 -12
- dataframe_textual/common.py +207 -91
- dataframe_textual/data_frame_help_panel.py +22 -4
- dataframe_textual/data_frame_table.py +1962 -587
- dataframe_textual/data_frame_viewer.py +273 -136
- dataframe_textual/table_screen.py +265 -86
- dataframe_textual/yes_no_screen.py +428 -163
- dataframe_textual-1.0.0.dist-info/METADATA +733 -0
- dataframe_textual-1.0.0.dist-info/RECORD +13 -0
- dataframe_textual-0.3.2.dist-info/METADATA +0 -548
- dataframe_textual-0.3.2.dist-info/RECORD +0 -13
- {dataframe_textual-0.3.2.dist-info → dataframe_textual-1.0.0.dist-info}/WHEEL +0 -0
- {dataframe_textual-0.3.2.dist-info → dataframe_textual-1.0.0.dist-info}/entry_points.txt +0 -0
- {dataframe_textual-0.3.2.dist-info → dataframe_textual-1.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,7 +13,7 @@ from textual.theme import BUILTIN_THEMES
|
|
|
13
13
|
from textual.widgets import TabbedContent, TabPane
|
|
14
14
|
from textual.widgets.tabbed_content import ContentTab, ContentTabs
|
|
15
15
|
|
|
16
|
-
from .common import
|
|
16
|
+
from .common import get_next_item
|
|
17
17
|
from .data_frame_help_panel import DataFrameHelpPanel
|
|
18
18
|
from .data_frame_table import DataFrameTable
|
|
19
19
|
from .yes_no_screen import OpenFileScreen, SaveFileScreen
|
|
@@ -35,7 +35,7 @@ class DataFrameViewer(App):
|
|
|
35
35
|
- **q** - 🚪 Quit application
|
|
36
36
|
|
|
37
37
|
## 🎨 View & Settings
|
|
38
|
-
-
|
|
38
|
+
- **Ctrl+H** - ❓ Toggle this help panel
|
|
39
39
|
- **k** - 🌙 Cycle through themes
|
|
40
40
|
|
|
41
41
|
## ⭐ Features
|
|
@@ -52,7 +52,7 @@ class DataFrameViewer(App):
|
|
|
52
52
|
|
|
53
53
|
BINDINGS = [
|
|
54
54
|
("q", "quit", "Quit"),
|
|
55
|
-
("h
|
|
55
|
+
("ctrl+h", "toggle_help_panel", "Help"),
|
|
56
56
|
("B", "toggle_tab_bar", "Toggle Tab Bar"),
|
|
57
57
|
("ctrl+o", "add_tab", "Add Tab"),
|
|
58
58
|
("ctrl+shift+s", "save_all_tabs", "Save All Tabs"),
|
|
@@ -79,14 +79,33 @@ class DataFrameViewer(App):
|
|
|
79
79
|
}
|
|
80
80
|
"""
|
|
81
81
|
|
|
82
|
-
def __init__(self, *filenames):
|
|
82
|
+
def __init__(self, *filenames: str, file_format: str | None = None, has_header: bool = True) -> None:
|
|
83
|
+
"""Initialize the DataFrame Viewer application.
|
|
84
|
+
|
|
85
|
+
Loads dataframes from provided filenames and prepares the tabbed interface.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*filenames: Variable number of file paths to load (CSV, Excel, Parquet, etc).
|
|
89
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
90
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
None
|
|
94
|
+
"""
|
|
83
95
|
super().__init__()
|
|
84
|
-
self.sources = _load_dataframe(filenames)
|
|
96
|
+
self.sources = _load_dataframe(filenames, file_format, has_header=has_header)
|
|
85
97
|
self.tabs: dict[TabPane, DataFrameTable] = {}
|
|
86
98
|
self.help_panel = None
|
|
87
99
|
|
|
88
100
|
def compose(self) -> ComposeResult:
|
|
89
|
-
"""
|
|
101
|
+
"""Compose the application widget structure.
|
|
102
|
+
|
|
103
|
+
Creates a tabbed interface with one tab per file/sheet loaded. Each tab
|
|
104
|
+
contains a DataFrameTable widget for displaying and interacting with the data.
|
|
105
|
+
|
|
106
|
+
Yields:
|
|
107
|
+
TabPane: One tab per file or sheet for the tabbed interface.
|
|
108
|
+
"""
|
|
90
109
|
# Tabbed interface
|
|
91
110
|
self.tabbed = TabbedContent(id="main_tabs")
|
|
92
111
|
with self.tabbed:
|
|
@@ -99,9 +118,7 @@ class DataFrameViewer(App):
|
|
|
99
118
|
|
|
100
119
|
tab_id = f"tab_{idx}"
|
|
101
120
|
try:
|
|
102
|
-
table = DataFrameTable(
|
|
103
|
-
df, filename, name=tabname, id=tab_id, zebra_stripes=True
|
|
104
|
-
)
|
|
121
|
+
table = DataFrameTable(df, filename, name=tabname, id=tab_id, zebra_stripes=True)
|
|
105
122
|
tab = TabPane(tabname, table, name=tabname, id=tab_id)
|
|
106
123
|
self.tabs[tab] = table
|
|
107
124
|
yield tab
|
|
@@ -109,51 +126,68 @@ class DataFrameViewer(App):
|
|
|
109
126
|
self.notify(f"Error loading {tabname}: {e}", severity="error")
|
|
110
127
|
|
|
111
128
|
def on_mount(self) -> None:
|
|
112
|
-
"""Set up the
|
|
129
|
+
"""Set up the application when it starts.
|
|
130
|
+
|
|
131
|
+
Initializes the app by hiding the tab bar for single-file mode and focusing
|
|
132
|
+
the active table widget.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
None
|
|
136
|
+
"""
|
|
113
137
|
if len(self.tabs) == 1:
|
|
114
138
|
self.query_one(ContentTabs).display = False
|
|
115
139
|
self._get_active_table().focus()
|
|
116
140
|
|
|
117
|
-
def on_key(self, event):
|
|
141
|
+
def on_key(self, event) -> None:
|
|
142
|
+
"""Handle key press events at the application level.
|
|
143
|
+
|
|
144
|
+
Currently handles theme cycling with the 'k' key.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
event: The key event object containing key information.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
None
|
|
151
|
+
"""
|
|
118
152
|
if event.key == "k":
|
|
119
|
-
self.theme =
|
|
120
|
-
self.notify(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
153
|
+
self.theme = get_next_item(list(BUILTIN_THEMES.keys()), self.theme)
|
|
154
|
+
self.notify(f"Switched to theme: [$success]{self.theme}[/]", title="Theme")
|
|
155
|
+
|
|
156
|
+
def on_tabbed_content_tab_activated(self, event: TabbedContent.TabActivated) -> None:
|
|
157
|
+
"""Handle tab activation events.
|
|
158
|
+
|
|
159
|
+
When a tab is activated, focuses the table widget and loads its data if not already loaded.
|
|
160
|
+
Applies active styling to the clicked tab and removes it from others.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
event: The tab activated event containing the activated tab pane.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
None
|
|
167
|
+
"""
|
|
168
|
+
# Focus the table in the newly activated tab
|
|
169
|
+
if table := self._get_active_table():
|
|
170
|
+
table.focus()
|
|
171
|
+
else:
|
|
130
172
|
return
|
|
131
173
|
|
|
174
|
+
if table.loaded_rows == 0:
|
|
175
|
+
table._setup_table()
|
|
176
|
+
|
|
132
177
|
# Apply background color to active tab
|
|
133
178
|
event.tab.add_class("active")
|
|
134
179
|
for tab in self.tabbed.query(ContentTab):
|
|
135
180
|
if tab != event.tab:
|
|
136
181
|
tab.remove_class("active")
|
|
137
182
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if table := self._get_active_table():
|
|
141
|
-
table.focus()
|
|
142
|
-
except NoMatches:
|
|
143
|
-
pass
|
|
183
|
+
def action_toggle_help_panel(self) -> None:
|
|
184
|
+
"""Toggle the help panel on or off.
|
|
144
185
|
|
|
145
|
-
|
|
146
|
-
"""Get the currently active table."""
|
|
147
|
-
try:
|
|
148
|
-
tabbed: TabbedContent = self.query_one(TabbedContent)
|
|
149
|
-
if active_pane := tabbed.active_pane:
|
|
150
|
-
return active_pane.query_one(DataFrameTable)
|
|
151
|
-
except (NoMatches, AttributeError):
|
|
152
|
-
pass
|
|
153
|
-
return None
|
|
186
|
+
Shows or hides the context-sensitive help panel. Creates it on first use.
|
|
154
187
|
|
|
155
|
-
|
|
156
|
-
|
|
188
|
+
Returns:
|
|
189
|
+
None
|
|
190
|
+
"""
|
|
157
191
|
if self.help_panel:
|
|
158
192
|
self.help_panel.display = not self.help_panel.display
|
|
159
193
|
else:
|
|
@@ -161,50 +195,135 @@ class DataFrameViewer(App):
|
|
|
161
195
|
self.mount(self.help_panel)
|
|
162
196
|
|
|
163
197
|
def action_add_tab(self) -> None:
|
|
164
|
-
"""Open file
|
|
165
|
-
self.push_screen(OpenFileScreen(), self._handle_file_open)
|
|
198
|
+
"""Open file browser to load a file in a new tab.
|
|
166
199
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
f"Opened: [on $primary]{Path(filename).name}[/]", title="Open"
|
|
175
|
-
)
|
|
176
|
-
except Exception as e:
|
|
177
|
-
self.notify(f"Error: {e}", severity="error")
|
|
200
|
+
Displays the file open dialog for the user to select a file to load
|
|
201
|
+
as a new tab in the interface.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
206
|
+
self.push_screen(OpenFileScreen(), self._do_add_tab)
|
|
178
207
|
|
|
179
208
|
def action_save_all_tabs(self) -> None:
|
|
180
|
-
"""Save all tabs to a Excel file.
|
|
181
|
-
|
|
209
|
+
"""Save all open tabs to a single Excel file.
|
|
210
|
+
|
|
211
|
+
Displays a save dialog to choose filename and location, then saves all
|
|
212
|
+
open tabs as separate sheets in a single Excel workbook.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
None
|
|
216
|
+
"""
|
|
217
|
+
callback = partial(self._get_active_table()._do_save_file, all_tabs=True)
|
|
182
218
|
self.push_screen(
|
|
183
219
|
SaveFileScreen("all-tabs.xlsx", title="Save All Tabs"),
|
|
184
220
|
callback=callback,
|
|
185
221
|
)
|
|
186
222
|
|
|
187
223
|
def action_close_tab(self) -> None:
|
|
188
|
-
"""Close
|
|
224
|
+
"""Close the currently active tab.
|
|
225
|
+
|
|
226
|
+
Closes the current tab. If this is the only tab, exits the application instead.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
None
|
|
230
|
+
"""
|
|
189
231
|
if len(self.tabs) <= 1:
|
|
190
232
|
self.app.exit()
|
|
191
233
|
return
|
|
192
234
|
self._close_tab()
|
|
193
235
|
|
|
194
|
-
def action_next_tab(self, offset: int = 1) ->
|
|
195
|
-
"""Switch to next tab
|
|
236
|
+
def action_next_tab(self, offset: int = 1) -> None:
|
|
237
|
+
"""Switch to the next tab or previous tab.
|
|
238
|
+
|
|
239
|
+
Cycles through tabs by the specified offset. With offset=1, moves to next tab.
|
|
240
|
+
With offset=-1, moves to previous tab. Wraps around when reaching edges.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
offset: Number of tabs to advance (+1 for next, -1 for previous). Defaults to 1.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
None
|
|
247
|
+
"""
|
|
196
248
|
if len(self.tabs) <= 1:
|
|
197
249
|
return
|
|
198
250
|
try:
|
|
199
251
|
tabs: list[TabPane] = list(self.tabs.keys())
|
|
200
|
-
next_tab =
|
|
252
|
+
next_tab = get_next_item(tabs, self.tabbed.active_pane, offset)
|
|
201
253
|
self.tabbed.active = next_tab.id
|
|
202
254
|
except (NoMatches, ValueError):
|
|
203
255
|
pass
|
|
204
256
|
|
|
205
|
-
def
|
|
206
|
-
"""
|
|
207
|
-
|
|
257
|
+
def action_toggle_tab_bar(self) -> None:
|
|
258
|
+
"""Toggle the tab bar visibility.
|
|
259
|
+
|
|
260
|
+
Shows or hides the tab bar at the bottom of the window. Useful for maximizing
|
|
261
|
+
screen space in single-tab mode.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
None
|
|
265
|
+
"""
|
|
266
|
+
tabs = self.query_one(ContentTabs)
|
|
267
|
+
tabs.display = not tabs.display
|
|
268
|
+
# status = "shown" if tabs.display else "hidden"
|
|
269
|
+
# self.notify(f"Tab bar [$success]{status}[/]", title="Toggle")
|
|
270
|
+
|
|
271
|
+
def _get_active_table(self) -> DataFrameTable | None:
|
|
272
|
+
"""Get the currently active DataFrameTable widget.
|
|
273
|
+
|
|
274
|
+
Retrieves the table from the currently active tab. Returns None if no
|
|
275
|
+
table is found or an error occurs.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The active DataFrameTable widget, or None if not found.
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
tabbed: TabbedContent = self.query_one(TabbedContent)
|
|
282
|
+
if active_pane := tabbed.active_pane:
|
|
283
|
+
return active_pane.query_one(DataFrameTable)
|
|
284
|
+
except (NoMatches, AttributeError):
|
|
285
|
+
self.notify("No active table found", title="Locate", severity="error")
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def _do_add_tab(self, filename: str) -> None:
|
|
289
|
+
"""Add a tab for the opened file.
|
|
290
|
+
|
|
291
|
+
Loads the specified file and creates one or more tabs for it. For Excel files,
|
|
292
|
+
creates one tab per sheet. For other formats, creates a single tab.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
filename: Path to the file to load and add as tab(s).
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
None
|
|
299
|
+
"""
|
|
300
|
+
if filename and os.path.exists(filename):
|
|
301
|
+
try:
|
|
302
|
+
n_tab = 0
|
|
303
|
+
for lf, filename, tabname in _load_file(filename, prefix_sheet=True):
|
|
304
|
+
self._add_tab(lf.collect(), filename, tabname)
|
|
305
|
+
n_tab += 1
|
|
306
|
+
self.notify(f"Added [$accent]{n_tab}[/] tab(s) for [$success]{filename}[/]", title="Open")
|
|
307
|
+
except Exception as e:
|
|
308
|
+
self.notify(f"Error: {e}", title="Open", severity="error")
|
|
309
|
+
else:
|
|
310
|
+
self.notify(f"File does not exist: [$warning]{filename}[/]", title="Open", severity="warning")
|
|
311
|
+
|
|
312
|
+
def _add_tab(self, df: pl.DataFrame, filename: str, tabname: str) -> None:
|
|
313
|
+
"""Add new tab for the given DataFrame.
|
|
314
|
+
|
|
315
|
+
Creates and adds a new tab with the provided DataFrame and configuration.
|
|
316
|
+
Ensures unique tab names by appending an index if needed. Shows the tab bar
|
|
317
|
+
if this is no longer the only tab.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
df: The Polars DataFrame to display in the new tab.
|
|
321
|
+
filename: The source filename for this data (used in table metadata).
|
|
322
|
+
tabname: The display name for the tab.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
None
|
|
326
|
+
"""
|
|
208
327
|
if any(tab.name == tabname for tab in self.tabs):
|
|
209
328
|
tabname = f"{tabname}_{len(self.tabs) + 1}"
|
|
210
329
|
|
|
@@ -218,9 +337,7 @@ class DataFrameViewer(App):
|
|
|
218
337
|
tab_idx = pending_tab_idx
|
|
219
338
|
break
|
|
220
339
|
|
|
221
|
-
table = DataFrameTable(
|
|
222
|
-
df, filename, zebra_stripes=True, id=tab_idx, name=tabname
|
|
223
|
-
)
|
|
340
|
+
table = DataFrameTable(df, filename, zebra_stripes=True, id=tab_idx, name=tabname)
|
|
224
341
|
tab = TabPane(tabname, table, name=tabname, id=tab_idx)
|
|
225
342
|
self.tabbed.add_pane(tab)
|
|
226
343
|
self.tabs[tab] = table
|
|
@@ -233,7 +350,14 @@ class DataFrameViewer(App):
|
|
|
233
350
|
table.focus()
|
|
234
351
|
|
|
235
352
|
def _close_tab(self) -> None:
|
|
236
|
-
"""Close
|
|
353
|
+
"""Close the currently active tab.
|
|
354
|
+
|
|
355
|
+
Removes the active tab from the interface. If only one tab remains and no more
|
|
356
|
+
can be closed, the application exits instead.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
None
|
|
360
|
+
"""
|
|
237
361
|
try:
|
|
238
362
|
if len(self.tabs) == 1:
|
|
239
363
|
self.app.exit()
|
|
@@ -241,95 +365,108 @@ class DataFrameViewer(App):
|
|
|
241
365
|
if active_pane := self.tabbed.active_pane:
|
|
242
366
|
self.tabbed.remove_pane(active_pane.id)
|
|
243
367
|
self.tabs.pop(active_pane)
|
|
244
|
-
self.notify(
|
|
245
|
-
f"Closed tab [on $primary]{active_pane.name}[/]", title="Close"
|
|
246
|
-
)
|
|
368
|
+
self.notify(f"Closed tab [$success]{active_pane.name}[/]", title="Close")
|
|
247
369
|
except NoMatches:
|
|
248
370
|
pass
|
|
249
371
|
|
|
250
|
-
def action_toggle_tab_bar(self) -> None:
|
|
251
|
-
"""Toggle tab bar visibility."""
|
|
252
|
-
tabs = self.query_one(ContentTabs)
|
|
253
|
-
tabs.display = not tabs.display
|
|
254
|
-
status = "shown" if tabs.display else "hidden"
|
|
255
|
-
self.notify(f"Tab bar [on $primary]{status}[/]", title="Toggle")
|
|
256
372
|
|
|
373
|
+
def _load_dataframe(
|
|
374
|
+
filenames: list[str], file_format: str | None = None, has_header: bool = True
|
|
375
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
376
|
+
"""Load DataFrames from file specifications.
|
|
257
377
|
|
|
258
|
-
|
|
259
|
-
|
|
378
|
+
Handles loading from multiple files, single files, or stdin. For Excel files,
|
|
379
|
+
loads all sheets as separate entries. For other formats, loads as single file.
|
|
260
380
|
|
|
261
381
|
Args:
|
|
262
382
|
filenames: List of filenames to load. If single filename is "-", read from stdin.
|
|
383
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
384
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
263
385
|
|
|
264
386
|
Returns:
|
|
265
|
-
List of tuples of (
|
|
387
|
+
List of tuples of (LazyFrame, filename, tabname) ready for display.
|
|
266
388
|
"""
|
|
267
389
|
sources = []
|
|
268
390
|
|
|
269
|
-
|
|
270
|
-
if len(filenames) == 1:
|
|
271
|
-
filename = filenames[0]
|
|
272
|
-
filepath = Path(filename)
|
|
273
|
-
ext = filepath.suffix.lower()
|
|
391
|
+
prefix_sheet = len(filenames) > 1
|
|
274
392
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
393
|
+
for filename in filenames:
|
|
394
|
+
sources.extend(_load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
|
|
395
|
+
return sources
|
|
278
396
|
|
|
279
|
-
# Read CSV from stdin into memory first (stdin is not seekable)
|
|
280
|
-
stdin_data = sys.stdin.read()
|
|
281
|
-
df = pl.read_csv(StringIO(stdin_data))
|
|
282
397
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
398
|
+
def _load_file(
|
|
399
|
+
filename: str,
|
|
400
|
+
first_sheet: bool = False,
|
|
401
|
+
prefix_sheet: bool = False,
|
|
402
|
+
file_format: str | None = None,
|
|
403
|
+
has_header: bool = True,
|
|
404
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
405
|
+
"""Load a single file and return list of sources.
|
|
406
|
+
|
|
407
|
+
For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
|
|
408
|
+
For other files or multiple files, returns one entry per file.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
filename: Path to file to load.
|
|
412
|
+
first_sheet: If True, only load first sheet for Excel files. Defaults to False.
|
|
413
|
+
prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
|
|
414
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson').
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
List of tuples of (LazyFrame, filename, tabname).
|
|
418
|
+
"""
|
|
419
|
+
sources = []
|
|
420
|
+
|
|
421
|
+
if filename == "-":
|
|
422
|
+
from io import StringIO
|
|
423
|
+
|
|
424
|
+
# Read from stdin into memory first (stdin is not seekable)
|
|
425
|
+
stdin_data = sys.stdin.read()
|
|
426
|
+
lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
|
|
427
|
+
|
|
428
|
+
# Reopen stdin to /dev/tty for proper terminal interaction
|
|
429
|
+
try:
|
|
430
|
+
tty = open("/dev/tty")
|
|
431
|
+
os.dup2(tty.fileno(), sys.stdin.fileno())
|
|
432
|
+
except (OSError, FileNotFoundError):
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
sources.append((lf, "stdin.tsv" if file_format == "tsv" else "stdin.csv", "stdin"))
|
|
436
|
+
return sources
|
|
437
|
+
|
|
438
|
+
filepath = Path(filename)
|
|
439
|
+
ext = filepath.suffix.lower()
|
|
440
|
+
|
|
441
|
+
if file_format == "csv" or ext == ".csv":
|
|
442
|
+
lf = pl.scan_csv(filename, has_header=has_header)
|
|
443
|
+
sources.append((lf, filename, filepath.stem))
|
|
444
|
+
elif file_format == "excel" or ext in (".xlsx", ".xls"):
|
|
445
|
+
if first_sheet:
|
|
446
|
+
# Read only the first sheet for multiple files
|
|
447
|
+
lf = pl.read_excel(filename).lazy()
|
|
448
|
+
sources.append((lf, filename, filepath.stem))
|
|
449
|
+
else:
|
|
450
|
+
# For single file, expand all sheets
|
|
293
451
|
sheets = pl.read_excel(filename, sheet_id=0)
|
|
294
452
|
for sheet_name, df in sheets.items():
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
else:
|
|
310
|
-
df = pl.read_csv(filename)
|
|
311
|
-
sources.append((df, filename, filepath.stem))
|
|
312
|
-
# Multiple files
|
|
453
|
+
tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
|
|
454
|
+
sources.append((df.lazy(), filename, tabname))
|
|
455
|
+
elif file_format == "tsv" or ext in (".tsv", ".tab"):
|
|
456
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
457
|
+
sources.append((lf, filename, filepath.stem))
|
|
458
|
+
elif file_format == "parquet" or ext == ".parquet":
|
|
459
|
+
lf = pl.scan_parquet(filename)
|
|
460
|
+
sources.append((lf, filename, filepath.stem))
|
|
461
|
+
elif file_format == "json" or ext == ".json":
|
|
462
|
+
df = pl.read_json(filename)
|
|
463
|
+
sources.append((df, filename, filepath.stem))
|
|
464
|
+
elif file_format == "ndjson" or ext == ".ndjson":
|
|
465
|
+
lf = pl.scan_ndjson(filename)
|
|
466
|
+
sources.append((lf, filename, filepath.stem))
|
|
313
467
|
else:
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
if ext in (".xlsx", ".xls"):
|
|
319
|
-
# Read only the first sheet for multiple files
|
|
320
|
-
df = pl.read_excel(filename)
|
|
321
|
-
sources.append((df, filename, filepath.stem))
|
|
322
|
-
elif ext in (".tsv", ".tab"):
|
|
323
|
-
df = pl.read_csv(filename, separator="\t")
|
|
324
|
-
sources.append((df, filename, filepath.stem))
|
|
325
|
-
elif ext == ".json":
|
|
326
|
-
df = pl.read_json(filename)
|
|
327
|
-
sources.append((df, filename, filepath.stem))
|
|
328
|
-
elif ext == ".parquet":
|
|
329
|
-
df = pl.read_parquet(filename)
|
|
330
|
-
sources.append((df, filename, filepath.stem))
|
|
331
|
-
else:
|
|
332
|
-
df = pl.read_csv(filename)
|
|
333
|
-
sources.append((df, filename, filepath.stem))
|
|
468
|
+
# Treat other formats as TSV
|
|
469
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
470
|
+
sources.append((lf, filename, filepath.stem))
|
|
334
471
|
|
|
335
472
|
return sources
|