dataframe-textual 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataframe_textual/__main__.py +29 -12
- dataframe_textual/common.py +207 -91
- dataframe_textual/data_frame_help_panel.py +22 -4
- dataframe_textual/data_frame_table.py +1964 -591
- dataframe_textual/data_frame_viewer.py +285 -133
- dataframe_textual/table_screen.py +320 -145
- dataframe_textual/yes_no_screen.py +429 -166
- dataframe_textual-1.0.0.dist-info/METADATA +733 -0
- dataframe_textual-1.0.0.dist-info/RECORD +13 -0
- dataframe_textual-0.3.0.dist-info/METADATA +0 -548
- dataframe_textual-0.3.0.dist-info/RECORD +0 -13
- {dataframe_textual-0.3.0.dist-info → dataframe_textual-1.0.0.dist-info}/WHEEL +0 -0
- {dataframe_textual-0.3.0.dist-info → dataframe_textual-1.0.0.dist-info}/entry_points.txt +0 -0
- {dataframe_textual-0.3.0.dist-info → dataframe_textual-1.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,7 +13,7 @@ from textual.theme import BUILTIN_THEMES
|
|
|
13
13
|
from textual.widgets import TabbedContent, TabPane
|
|
14
14
|
from textual.widgets.tabbed_content import ContentTab, ContentTabs
|
|
15
15
|
|
|
16
|
-
from .common import
|
|
16
|
+
from .common import get_next_item
|
|
17
17
|
from .data_frame_help_panel import DataFrameHelpPanel
|
|
18
18
|
from .data_frame_table import DataFrameTable
|
|
19
19
|
from .yes_no_screen import OpenFileScreen, SaveFileScreen
|
|
@@ -35,7 +35,7 @@ class DataFrameViewer(App):
|
|
|
35
35
|
- **q** - 🚪 Quit application
|
|
36
36
|
|
|
37
37
|
## 🎨 View & Settings
|
|
38
|
-
-
|
|
38
|
+
- **Ctrl+H** - ❓ Toggle this help panel
|
|
39
39
|
- **k** - 🌙 Cycle through themes
|
|
40
40
|
|
|
41
41
|
## ⭐ Features
|
|
@@ -52,7 +52,7 @@ class DataFrameViewer(App):
|
|
|
52
52
|
|
|
53
53
|
BINDINGS = [
|
|
54
54
|
("q", "quit", "Quit"),
|
|
55
|
-
("h
|
|
55
|
+
("ctrl+h", "toggle_help_panel", "Help"),
|
|
56
56
|
("B", "toggle_tab_bar", "Toggle Tab Bar"),
|
|
57
57
|
("ctrl+o", "add_tab", "Add Tab"),
|
|
58
58
|
("ctrl+shift+s", "save_all_tabs", "Save All Tabs"),
|
|
@@ -79,14 +79,33 @@ class DataFrameViewer(App):
|
|
|
79
79
|
}
|
|
80
80
|
"""
|
|
81
81
|
|
|
82
|
-
def __init__(self, *filenames):
|
|
82
|
+
def __init__(self, *filenames: str, file_format: str | None = None, has_header: bool = True) -> None:
|
|
83
|
+
"""Initialize the DataFrame Viewer application.
|
|
84
|
+
|
|
85
|
+
Loads dataframes from provided filenames and prepares the tabbed interface.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*filenames: Variable number of file paths to load (CSV, Excel, Parquet, etc).
|
|
89
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
90
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
None
|
|
94
|
+
"""
|
|
83
95
|
super().__init__()
|
|
84
|
-
self.sources = _load_dataframe(filenames)
|
|
96
|
+
self.sources = _load_dataframe(filenames, file_format, has_header=has_header)
|
|
85
97
|
self.tabs: dict[TabPane, DataFrameTable] = {}
|
|
86
98
|
self.help_panel = None
|
|
87
99
|
|
|
88
100
|
def compose(self) -> ComposeResult:
|
|
89
|
-
"""
|
|
101
|
+
"""Compose the application widget structure.
|
|
102
|
+
|
|
103
|
+
Creates a tabbed interface with one tab per file/sheet loaded. Each tab
|
|
104
|
+
contains a DataFrameTable widget for displaying and interacting with the data.
|
|
105
|
+
|
|
106
|
+
Yields:
|
|
107
|
+
TabPane: One tab per file or sheet for the tabbed interface.
|
|
108
|
+
"""
|
|
90
109
|
# Tabbed interface
|
|
91
110
|
self.tabbed = TabbedContent(id="main_tabs")
|
|
92
111
|
with self.tabbed:
|
|
@@ -99,9 +118,7 @@ class DataFrameViewer(App):
|
|
|
99
118
|
|
|
100
119
|
tab_id = f"tab_{idx}"
|
|
101
120
|
try:
|
|
102
|
-
table = DataFrameTable(
|
|
103
|
-
df, filename, name=tabname, id=tab_id, zebra_stripes=True
|
|
104
|
-
)
|
|
121
|
+
table = DataFrameTable(df, filename, name=tabname, id=tab_id, zebra_stripes=True)
|
|
105
122
|
tab = TabPane(tabname, table, name=tabname, id=tab_id)
|
|
106
123
|
self.tabs[tab] = table
|
|
107
124
|
yield tab
|
|
@@ -109,49 +126,68 @@ class DataFrameViewer(App):
|
|
|
109
126
|
self.notify(f"Error loading {tabname}: {e}", severity="error")
|
|
110
127
|
|
|
111
128
|
def on_mount(self) -> None:
|
|
112
|
-
"""Set up the
|
|
129
|
+
"""Set up the application when it starts.
|
|
130
|
+
|
|
131
|
+
Initializes the app by hiding the tab bar for single-file mode and focusing
|
|
132
|
+
the active table widget.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
None
|
|
136
|
+
"""
|
|
113
137
|
if len(self.tabs) == 1:
|
|
114
138
|
self.query_one(ContentTabs).display = False
|
|
115
139
|
self._get_active_table().focus()
|
|
116
140
|
|
|
117
|
-
def on_key(self, event):
|
|
141
|
+
def on_key(self, event) -> None:
|
|
142
|
+
"""Handle key press events at the application level.
|
|
143
|
+
|
|
144
|
+
Currently handles theme cycling with the 'k' key.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
event: The key event object containing key information.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
None
|
|
151
|
+
"""
|
|
118
152
|
if event.key == "k":
|
|
119
|
-
self.theme =
|
|
120
|
-
self.notify(f"Switched to theme: [$
|
|
121
|
-
|
|
122
|
-
def on_tabbed_content_tab_activated(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
153
|
+
self.theme = get_next_item(list(BUILTIN_THEMES.keys()), self.theme)
|
|
154
|
+
self.notify(f"Switched to theme: [$success]{self.theme}[/]", title="Theme")
|
|
155
|
+
|
|
156
|
+
def on_tabbed_content_tab_activated(self, event: TabbedContent.TabActivated) -> None:
|
|
157
|
+
"""Handle tab activation events.
|
|
158
|
+
|
|
159
|
+
When a tab is activated, focuses the table widget and loads its data if not already loaded.
|
|
160
|
+
Applies active styling to the clicked tab and removes it from others.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
event: The tab activated event containing the activated tab pane.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
None
|
|
167
|
+
"""
|
|
168
|
+
# Focus the table in the newly activated tab
|
|
169
|
+
if table := self._get_active_table():
|
|
170
|
+
table.focus()
|
|
171
|
+
else:
|
|
128
172
|
return
|
|
129
173
|
|
|
174
|
+
if table.loaded_rows == 0:
|
|
175
|
+
table._setup_table()
|
|
176
|
+
|
|
130
177
|
# Apply background color to active tab
|
|
131
178
|
event.tab.add_class("active")
|
|
132
179
|
for tab in self.tabbed.query(ContentTab):
|
|
133
180
|
if tab != event.tab:
|
|
134
181
|
tab.remove_class("active")
|
|
135
182
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if table := self._get_active_table():
|
|
139
|
-
table.focus()
|
|
140
|
-
except NoMatches:
|
|
141
|
-
pass
|
|
183
|
+
def action_toggle_help_panel(self) -> None:
|
|
184
|
+
"""Toggle the help panel on or off.
|
|
142
185
|
|
|
143
|
-
|
|
144
|
-
"""Get the currently active table."""
|
|
145
|
-
try:
|
|
146
|
-
tabbed: TabbedContent = self.query_one(TabbedContent)
|
|
147
|
-
if active_pane := tabbed.active_pane:
|
|
148
|
-
return active_pane.query_one(DataFrameTable)
|
|
149
|
-
except (NoMatches, AttributeError):
|
|
150
|
-
pass
|
|
151
|
-
return None
|
|
186
|
+
Shows or hides the context-sensitive help panel. Creates it on first use.
|
|
152
187
|
|
|
153
|
-
|
|
154
|
-
|
|
188
|
+
Returns:
|
|
189
|
+
None
|
|
190
|
+
"""
|
|
155
191
|
if self.help_panel:
|
|
156
192
|
self.help_panel.display = not self.help_panel.display
|
|
157
193
|
else:
|
|
@@ -159,55 +195,150 @@ class DataFrameViewer(App):
|
|
|
159
195
|
self.mount(self.help_panel)
|
|
160
196
|
|
|
161
197
|
def action_add_tab(self) -> None:
|
|
162
|
-
"""Open file
|
|
163
|
-
self.push_screen(OpenFileScreen(), self._handle_file_open)
|
|
198
|
+
"""Open file browser to load a file in a new tab.
|
|
164
199
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
f"Opened: [on $primary]{Path(filename).name}[/]", title="Open"
|
|
173
|
-
)
|
|
174
|
-
except Exception as e:
|
|
175
|
-
self.notify(f"Error: {e}", severity="error")
|
|
200
|
+
Displays the file open dialog for the user to select a file to load
|
|
201
|
+
as a new tab in the interface.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
206
|
+
self.push_screen(OpenFileScreen(), self._do_add_tab)
|
|
176
207
|
|
|
177
208
|
def action_save_all_tabs(self) -> None:
|
|
178
|
-
"""Save all tabs to a Excel file.
|
|
179
|
-
|
|
209
|
+
"""Save all open tabs to a single Excel file.
|
|
210
|
+
|
|
211
|
+
Displays a save dialog to choose filename and location, then saves all
|
|
212
|
+
open tabs as separate sheets in a single Excel workbook.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
None
|
|
216
|
+
"""
|
|
217
|
+
callback = partial(self._get_active_table()._do_save_file, all_tabs=True)
|
|
180
218
|
self.push_screen(
|
|
181
219
|
SaveFileScreen("all-tabs.xlsx", title="Save All Tabs"),
|
|
182
220
|
callback=callback,
|
|
183
221
|
)
|
|
184
222
|
|
|
185
223
|
def action_close_tab(self) -> None:
|
|
186
|
-
"""Close
|
|
224
|
+
"""Close the currently active tab.
|
|
225
|
+
|
|
226
|
+
Closes the current tab. If this is the only tab, exits the application instead.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
None
|
|
230
|
+
"""
|
|
187
231
|
if len(self.tabs) <= 1:
|
|
188
232
|
self.app.exit()
|
|
189
233
|
return
|
|
190
234
|
self._close_tab()
|
|
191
235
|
|
|
192
|
-
def action_next_tab(self, offset: int = 1) ->
|
|
193
|
-
"""Switch to next tab
|
|
236
|
+
def action_next_tab(self, offset: int = 1) -> None:
|
|
237
|
+
"""Switch to the next tab or previous tab.
|
|
238
|
+
|
|
239
|
+
Cycles through tabs by the specified offset. With offset=1, moves to next tab.
|
|
240
|
+
With offset=-1, moves to previous tab. Wraps around when reaching edges.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
offset: Number of tabs to advance (+1 for next, -1 for previous). Defaults to 1.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
None
|
|
247
|
+
"""
|
|
194
248
|
if len(self.tabs) <= 1:
|
|
195
249
|
return
|
|
196
250
|
try:
|
|
197
251
|
tabs: list[TabPane] = list(self.tabs.keys())
|
|
198
|
-
next_tab =
|
|
252
|
+
next_tab = get_next_item(tabs, self.tabbed.active_pane, offset)
|
|
199
253
|
self.tabbed.active = next_tab.id
|
|
200
254
|
except (NoMatches, ValueError):
|
|
201
255
|
pass
|
|
202
256
|
|
|
203
|
-
def
|
|
204
|
-
"""
|
|
205
|
-
|
|
206
|
-
|
|
257
|
+
def action_toggle_tab_bar(self) -> None:
|
|
258
|
+
"""Toggle the tab bar visibility.
|
|
259
|
+
|
|
260
|
+
Shows or hides the tab bar at the bottom of the window. Useful for maximizing
|
|
261
|
+
screen space in single-tab mode.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
None
|
|
265
|
+
"""
|
|
266
|
+
tabs = self.query_one(ContentTabs)
|
|
267
|
+
tabs.display = not tabs.display
|
|
268
|
+
# status = "shown" if tabs.display else "hidden"
|
|
269
|
+
# self.notify(f"Tab bar [$success]{status}[/]", title="Toggle")
|
|
270
|
+
|
|
271
|
+
def _get_active_table(self) -> DataFrameTable | None:
|
|
272
|
+
"""Get the currently active DataFrameTable widget.
|
|
273
|
+
|
|
274
|
+
Retrieves the table from the currently active tab. Returns None if no
|
|
275
|
+
table is found or an error occurs.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The active DataFrameTable widget, or None if not found.
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
tabbed: TabbedContent = self.query_one(TabbedContent)
|
|
282
|
+
if active_pane := tabbed.active_pane:
|
|
283
|
+
return active_pane.query_one(DataFrameTable)
|
|
284
|
+
except (NoMatches, AttributeError):
|
|
285
|
+
self.notify("No active table found", title="Locate", severity="error")
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def _do_add_tab(self, filename: str) -> None:
|
|
289
|
+
"""Add a tab for the opened file.
|
|
290
|
+
|
|
291
|
+
Loads the specified file and creates one or more tabs for it. For Excel files,
|
|
292
|
+
creates one tab per sheet. For other formats, creates a single tab.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
filename: Path to the file to load and add as tab(s).
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
None
|
|
299
|
+
"""
|
|
300
|
+
if filename and os.path.exists(filename):
|
|
301
|
+
try:
|
|
302
|
+
n_tab = 0
|
|
303
|
+
for lf, filename, tabname in _load_file(filename, prefix_sheet=True):
|
|
304
|
+
self._add_tab(lf.collect(), filename, tabname)
|
|
305
|
+
n_tab += 1
|
|
306
|
+
self.notify(f"Added [$accent]{n_tab}[/] tab(s) for [$success]{filename}[/]", title="Open")
|
|
307
|
+
except Exception as e:
|
|
308
|
+
self.notify(f"Error: {e}", title="Open", severity="error")
|
|
309
|
+
else:
|
|
310
|
+
self.notify(f"File does not exist: [$warning]{filename}[/]", title="Open", severity="warning")
|
|
311
|
+
|
|
312
|
+
def _add_tab(self, df: pl.DataFrame, filename: str, tabname: str) -> None:
|
|
313
|
+
"""Add new tab for the given DataFrame.
|
|
314
|
+
|
|
315
|
+
Creates and adds a new tab with the provided DataFrame and configuration.
|
|
316
|
+
Ensures unique tab names by appending an index if needed. Shows the tab bar
|
|
317
|
+
if this is no longer the only tab.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
df: The Polars DataFrame to display in the new tab.
|
|
321
|
+
filename: The source filename for this data (used in table metadata).
|
|
322
|
+
tabname: The display name for the tab.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
None
|
|
326
|
+
"""
|
|
207
327
|
if any(tab.name == tabname for tab in self.tabs):
|
|
208
328
|
tabname = f"{tabname}_{len(self.tabs) + 1}"
|
|
209
329
|
|
|
210
|
-
|
|
330
|
+
# Find an available tab index
|
|
331
|
+
tab_idx = f"tab_{len(self.tabs) + 1}"
|
|
332
|
+
for idx in range(len(self.tabs)):
|
|
333
|
+
pending_tab_idx = f"tab_{idx + 1}"
|
|
334
|
+
if any(tab.id == pending_tab_idx for tab in self.tabs):
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
tab_idx = pending_tab_idx
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
table = DataFrameTable(df, filename, zebra_stripes=True, id=tab_idx, name=tabname)
|
|
341
|
+
tab = TabPane(tabname, table, name=tabname, id=tab_idx)
|
|
211
342
|
self.tabbed.add_pane(tab)
|
|
212
343
|
self.tabs[tab] = table
|
|
213
344
|
|
|
@@ -219,102 +350,123 @@ class DataFrameViewer(App):
|
|
|
219
350
|
table.focus()
|
|
220
351
|
|
|
221
352
|
def _close_tab(self) -> None:
|
|
222
|
-
"""Close
|
|
353
|
+
"""Close the currently active tab.
|
|
354
|
+
|
|
355
|
+
Removes the active tab from the interface. If only one tab remains and no more
|
|
356
|
+
can be closed, the application exits instead.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
None
|
|
360
|
+
"""
|
|
223
361
|
try:
|
|
224
362
|
if len(self.tabs) == 1:
|
|
225
363
|
self.app.exit()
|
|
226
364
|
else:
|
|
227
365
|
if active_pane := self.tabbed.active_pane:
|
|
228
366
|
self.tabbed.remove_pane(active_pane.id)
|
|
229
|
-
self.
|
|
230
|
-
|
|
231
|
-
)
|
|
367
|
+
self.tabs.pop(active_pane)
|
|
368
|
+
self.notify(f"Closed tab [$success]{active_pane.name}[/]", title="Close")
|
|
232
369
|
except NoMatches:
|
|
233
370
|
pass
|
|
234
371
|
|
|
235
|
-
def action_toggle_tab_bar(self) -> None:
|
|
236
|
-
"""Toggle tab bar visibility."""
|
|
237
|
-
tabs = self.query_one(ContentTabs)
|
|
238
|
-
tabs.display = not tabs.display
|
|
239
|
-
status = "shown" if tabs.display else "hidden"
|
|
240
|
-
self.notify(f"Tab bar [on $primary]{status}[/]", title="Toggle")
|
|
241
372
|
|
|
373
|
+
def _load_dataframe(
|
|
374
|
+
filenames: list[str], file_format: str | None = None, has_header: bool = True
|
|
375
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
376
|
+
"""Load DataFrames from file specifications.
|
|
242
377
|
|
|
243
|
-
|
|
244
|
-
|
|
378
|
+
Handles loading from multiple files, single files, or stdin. For Excel files,
|
|
379
|
+
loads all sheets as separate entries. For other formats, loads as single file.
|
|
245
380
|
|
|
246
381
|
Args:
|
|
247
382
|
filenames: List of filenames to load. If single filename is "-", read from stdin.
|
|
383
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
384
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
248
385
|
|
|
249
386
|
Returns:
|
|
250
|
-
List of tuples of (
|
|
387
|
+
List of tuples of (LazyFrame, filename, tabname) ready for display.
|
|
251
388
|
"""
|
|
252
389
|
sources = []
|
|
253
390
|
|
|
254
|
-
|
|
255
|
-
if len(filenames) == 1:
|
|
256
|
-
filename = filenames[0]
|
|
257
|
-
filepath = Path(filename)
|
|
258
|
-
ext = filepath.suffix.lower()
|
|
391
|
+
prefix_sheet = len(filenames) > 1
|
|
259
392
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
393
|
+
for filename in filenames:
|
|
394
|
+
sources.extend(_load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
|
|
395
|
+
return sources
|
|
263
396
|
|
|
264
|
-
# Read CSV from stdin into memory first (stdin is not seekable)
|
|
265
|
-
stdin_data = sys.stdin.read()
|
|
266
|
-
df = pl.read_csv(StringIO(stdin_data))
|
|
267
397
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
398
|
+
def _load_file(
|
|
399
|
+
filename: str,
|
|
400
|
+
first_sheet: bool = False,
|
|
401
|
+
prefix_sheet: bool = False,
|
|
402
|
+
file_format: str | None = None,
|
|
403
|
+
has_header: bool = True,
|
|
404
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
405
|
+
"""Load a single file and return list of sources.
|
|
406
|
+
|
|
407
|
+
For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
|
|
408
|
+
For other files or multiple files, returns one entry per file.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
filename: Path to file to load.
|
|
412
|
+
first_sheet: If True, only load first sheet for Excel files. Defaults to False.
|
|
413
|
+
prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
|
|
414
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson').
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
List of tuples of (LazyFrame, filename, tabname).
|
|
418
|
+
"""
|
|
419
|
+
sources = []
|
|
420
|
+
|
|
421
|
+
if filename == "-":
|
|
422
|
+
from io import StringIO
|
|
423
|
+
|
|
424
|
+
# Read from stdin into memory first (stdin is not seekable)
|
|
425
|
+
stdin_data = sys.stdin.read()
|
|
426
|
+
lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
|
|
427
|
+
|
|
428
|
+
# Reopen stdin to /dev/tty for proper terminal interaction
|
|
429
|
+
try:
|
|
430
|
+
tty = open("/dev/tty")
|
|
431
|
+
os.dup2(tty.fileno(), sys.stdin.fileno())
|
|
432
|
+
except (OSError, FileNotFoundError):
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
sources.append((lf, "stdin.tsv" if file_format == "tsv" else "stdin.csv", "stdin"))
|
|
436
|
+
return sources
|
|
437
|
+
|
|
438
|
+
filepath = Path(filename)
|
|
439
|
+
ext = filepath.suffix.lower()
|
|
440
|
+
|
|
441
|
+
if file_format == "csv" or ext == ".csv":
|
|
442
|
+
lf = pl.scan_csv(filename, has_header=has_header)
|
|
443
|
+
sources.append((lf, filename, filepath.stem))
|
|
444
|
+
elif file_format == "excel" or ext in (".xlsx", ".xls"):
|
|
445
|
+
if first_sheet:
|
|
446
|
+
# Read only the first sheet for multiple files
|
|
447
|
+
lf = pl.read_excel(filename).lazy()
|
|
448
|
+
sources.append((lf, filename, filepath.stem))
|
|
449
|
+
else:
|
|
450
|
+
# For single file, expand all sheets
|
|
278
451
|
sheets = pl.read_excel(filename, sheet_id=0)
|
|
279
452
|
for sheet_name, df in sheets.items():
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
else:
|
|
295
|
-
df = pl.read_csv(filename)
|
|
296
|
-
sources.append((df, filename, filepath.stem))
|
|
297
|
-
# Multiple files
|
|
453
|
+
tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
|
|
454
|
+
sources.append((df.lazy(), filename, tabname))
|
|
455
|
+
elif file_format == "tsv" or ext in (".tsv", ".tab"):
|
|
456
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
457
|
+
sources.append((lf, filename, filepath.stem))
|
|
458
|
+
elif file_format == "parquet" or ext == ".parquet":
|
|
459
|
+
lf = pl.scan_parquet(filename)
|
|
460
|
+
sources.append((lf, filename, filepath.stem))
|
|
461
|
+
elif file_format == "json" or ext == ".json":
|
|
462
|
+
df = pl.read_json(filename)
|
|
463
|
+
sources.append((df, filename, filepath.stem))
|
|
464
|
+
elif file_format == "ndjson" or ext == ".ndjson":
|
|
465
|
+
lf = pl.scan_ndjson(filename)
|
|
466
|
+
sources.append((lf, filename, filepath.stem))
|
|
298
467
|
else:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
if ext in (".xlsx", ".xls"):
|
|
304
|
-
# Read only the first sheet for multiple files
|
|
305
|
-
df = pl.read_excel(filename)
|
|
306
|
-
sources.append((df, filename, filepath.stem))
|
|
307
|
-
elif ext in (".tsv", ".tab"):
|
|
308
|
-
df = pl.read_csv(filename, separator="\t")
|
|
309
|
-
sources.append((df, filename, filepath.stem))
|
|
310
|
-
elif ext == ".json":
|
|
311
|
-
df = pl.read_json(filename)
|
|
312
|
-
sources.append((df, filename, filepath.stem))
|
|
313
|
-
elif ext == ".parquet":
|
|
314
|
-
df = pl.read_parquet(filename)
|
|
315
|
-
sources.append((df, filename, filepath.stem))
|
|
316
|
-
else:
|
|
317
|
-
df = pl.read_csv(filename)
|
|
318
|
-
sources.append((df, filename, filepath.stem))
|
|
468
|
+
# Treat other formats as TSV
|
|
469
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
470
|
+
sources.append((lf, filename, filepath.stem))
|
|
319
471
|
|
|
320
472
|
return sources
|