dataframe-textual 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dataframe-textual might be problematic. Click here for more details.
- dataframe_textual/__main__.py +65 -0
- dataframe_textual/common.py +340 -0
- {dataframe_viewer → dataframe_textual}/data_frame_help_panel.py +22 -4
- dataframe_textual/data_frame_table.py +2768 -0
- dataframe_textual/data_frame_viewer.py +472 -0
- dataframe_textual/table_screen.py +490 -0
- dataframe_textual/yes_no_screen.py +672 -0
- dataframe_textual-1.1.0.dist-info/METADATA +726 -0
- dataframe_textual-1.1.0.dist-info/RECORD +13 -0
- dataframe_textual-1.1.0.dist-info/entry_points.txt +2 -0
- dataframe_textual-0.1.0.dist-info/METADATA +0 -522
- dataframe_textual-0.1.0.dist-info/RECORD +0 -13
- dataframe_textual-0.1.0.dist-info/entry_points.txt +0 -2
- dataframe_viewer/__main__.py +0 -48
- dataframe_viewer/common.py +0 -204
- dataframe_viewer/data_frame_table.py +0 -1395
- dataframe_viewer/data_frame_viewer.py +0 -320
- dataframe_viewer/table_screen.py +0 -311
- dataframe_viewer/yes_no_screen.py +0 -409
- {dataframe_viewer → dataframe_textual}/__init__.py +0 -0
- {dataframe_textual-0.1.0.dist-info → dataframe_textual-1.1.0.dist-info}/WHEEL +0 -0
- {dataframe_textual-0.1.0.dist-info → dataframe_textual-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""DataFrame Viewer application and utilities."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from functools import partial
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from textwrap import dedent
|
|
8
|
+
|
|
9
|
+
import polars as pl
|
|
10
|
+
from textual.app import App, ComposeResult
|
|
11
|
+
from textual.css.query import NoMatches
|
|
12
|
+
from textual.theme import BUILTIN_THEMES
|
|
13
|
+
from textual.widgets import TabbedContent, TabPane
|
|
14
|
+
from textual.widgets.tabbed_content import ContentTab, ContentTabs
|
|
15
|
+
|
|
16
|
+
from .common import get_next_item
|
|
17
|
+
from .data_frame_help_panel import DataFrameHelpPanel
|
|
18
|
+
from .data_frame_table import DataFrameTable
|
|
19
|
+
from .yes_no_screen import OpenFileScreen, SaveFileScreen
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameViewer(App):
|
|
23
|
+
"""A Textual app to interact with multiple Polars DataFrames via tabbed interface."""
|
|
24
|
+
|
|
25
|
+
HELP = dedent("""
|
|
26
|
+
# 📊 DataFrame Viewer - App Controls
|
|
27
|
+
|
|
28
|
+
## 🎯 File & Tab Management
|
|
29
|
+
- **Ctrl+O** - 📁 Add a new tab
|
|
30
|
+
- **Ctrl+Shift+S** - 💾 Save all tabs
|
|
31
|
+
- **Ctrl+W** - ❌ Close current tab
|
|
32
|
+
- **>** or **b** - ▶️ Next tab
|
|
33
|
+
- **<** - ◀️ Previous tab
|
|
34
|
+
- **B** - 👁️ Toggle tab bar visibility
|
|
35
|
+
- **q** - 🚪 Quit application
|
|
36
|
+
|
|
37
|
+
## 🎨 View & Settings
|
|
38
|
+
- **Ctrl+H** - ❓ Toggle this help panel
|
|
39
|
+
- **k** - 🌙 Cycle through themes
|
|
40
|
+
|
|
41
|
+
## ⭐ Features
|
|
42
|
+
- **Multi-file support** - 📂 Open multiple CSV/Excel files as tabs
|
|
43
|
+
- **Excel sheets** - 📊 Excel files auto-expand sheets into tabs
|
|
44
|
+
- **Lazy loading** - ⚡ Large files load on demand
|
|
45
|
+
- **Sticky tabs** - 📌 Tab bar stays visible when scrolling
|
|
46
|
+
- **Rich formatting** - 🎨 Color-coded data types
|
|
47
|
+
- **Search & filter** - 🔍 Find and filter data quickly
|
|
48
|
+
- **Sort & reorder** - ⬆️ Multi-column sort, drag rows/columns
|
|
49
|
+
- **Undo/Redo** - 🔄 Full history of operations
|
|
50
|
+
- **Freeze rows/cols** - 🔒 Pin header rows and columns
|
|
51
|
+
""").strip()
|
|
52
|
+
|
|
53
|
+
BINDINGS = [
|
|
54
|
+
("q", "quit", "Quit"),
|
|
55
|
+
("ctrl+h", "toggle_help_panel", "Help"),
|
|
56
|
+
("B", "toggle_tab_bar", "Toggle Tab Bar"),
|
|
57
|
+
("ctrl+o", "add_tab", "Add Tab"),
|
|
58
|
+
("ctrl+shift+s", "save_all_tabs", "Save All Tabs"),
|
|
59
|
+
("ctrl+w", "close_tab", "Close Tab"),
|
|
60
|
+
("greater_than_sign,b", "next_tab(1)", "Next Tab"),
|
|
61
|
+
("less_than_sign", "next_tab(-1)", "Prev Tab"),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
CSS = """
|
|
65
|
+
TabbedContent {
|
|
66
|
+
height: 100%; /* Or a specific value, e.g., 20; */
|
|
67
|
+
}
|
|
68
|
+
TabbedContent > ContentTabs {
|
|
69
|
+
dock: bottom;
|
|
70
|
+
}
|
|
71
|
+
TabbedContent > ContentSwitcher {
|
|
72
|
+
overflow: auto;
|
|
73
|
+
height: 1fr; /* Takes the remaining space below tabs */
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
TabbedContent ContentTab.active {
|
|
77
|
+
background: $primary;
|
|
78
|
+
color: $text;
|
|
79
|
+
}
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, *filenames: str, file_format: str | None = None, has_header: bool = True) -> None:
|
|
83
|
+
"""Initialize the DataFrame Viewer application.
|
|
84
|
+
|
|
85
|
+
Loads dataframes from provided filenames and prepares the tabbed interface.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*filenames: Variable number of file paths to load (CSV, Excel, Parquet, etc).
|
|
89
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
90
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
None
|
|
94
|
+
"""
|
|
95
|
+
super().__init__()
|
|
96
|
+
self.sources = _load_dataframe(filenames, file_format, has_header=has_header)
|
|
97
|
+
self.tabs: dict[TabPane, DataFrameTable] = {}
|
|
98
|
+
self.help_panel = None
|
|
99
|
+
|
|
100
|
+
def compose(self) -> ComposeResult:
|
|
101
|
+
"""Compose the application widget structure.
|
|
102
|
+
|
|
103
|
+
Creates a tabbed interface with one tab per file/sheet loaded. Each tab
|
|
104
|
+
contains a DataFrameTable widget for displaying and interacting with the data.
|
|
105
|
+
|
|
106
|
+
Yields:
|
|
107
|
+
TabPane: One tab per file or sheet for the tabbed interface.
|
|
108
|
+
"""
|
|
109
|
+
# Tabbed interface
|
|
110
|
+
self.tabbed = TabbedContent(id="main_tabs")
|
|
111
|
+
with self.tabbed:
|
|
112
|
+
seen_names = set()
|
|
113
|
+
for idx, (df, filename, tabname) in enumerate(self.sources, start=1):
|
|
114
|
+
# Ensure unique tab names
|
|
115
|
+
if tabname in seen_names:
|
|
116
|
+
tabname = f"{tabname}_{idx}"
|
|
117
|
+
seen_names.add(tabname)
|
|
118
|
+
|
|
119
|
+
tab_id = f"tab_{idx}"
|
|
120
|
+
try:
|
|
121
|
+
table = DataFrameTable(df, filename, name=tabname, id=tab_id, zebra_stripes=True)
|
|
122
|
+
tab = TabPane(tabname, table, name=tabname, id=tab_id)
|
|
123
|
+
self.tabs[tab] = table
|
|
124
|
+
yield tab
|
|
125
|
+
except Exception as e:
|
|
126
|
+
self.notify(f"Error loading {tabname}: {e}", severity="error")
|
|
127
|
+
|
|
128
|
+
def on_mount(self) -> None:
|
|
129
|
+
"""Set up the application when it starts.
|
|
130
|
+
|
|
131
|
+
Initializes the app by hiding the tab bar for single-file mode and focusing
|
|
132
|
+
the active table widget.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
None
|
|
136
|
+
"""
|
|
137
|
+
if len(self.tabs) == 1:
|
|
138
|
+
self.query_one(ContentTabs).display = False
|
|
139
|
+
self._get_active_table().focus()
|
|
140
|
+
|
|
141
|
+
def on_key(self, event) -> None:
|
|
142
|
+
"""Handle key press events at the application level.
|
|
143
|
+
|
|
144
|
+
Currently handles theme cycling with the 'k' key.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
event: The key event object containing key information.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
None
|
|
151
|
+
"""
|
|
152
|
+
if event.key == "k":
|
|
153
|
+
self.theme = get_next_item(list(BUILTIN_THEMES.keys()), self.theme)
|
|
154
|
+
self.notify(f"Switched to theme: [$success]{self.theme}[/]", title="Theme")
|
|
155
|
+
|
|
156
|
+
def on_tabbed_content_tab_activated(self, event: TabbedContent.TabActivated) -> None:
|
|
157
|
+
"""Handle tab activation events.
|
|
158
|
+
|
|
159
|
+
When a tab is activated, focuses the table widget and loads its data if not already loaded.
|
|
160
|
+
Applies active styling to the clicked tab and removes it from others.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
event: The tab activated event containing the activated tab pane.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
None
|
|
167
|
+
"""
|
|
168
|
+
# Focus the table in the newly activated tab
|
|
169
|
+
if table := self._get_active_table():
|
|
170
|
+
table.focus()
|
|
171
|
+
else:
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
if table.loaded_rows == 0:
|
|
175
|
+
table._setup_table()
|
|
176
|
+
|
|
177
|
+
# Apply background color to active tab
|
|
178
|
+
event.tab.add_class("active")
|
|
179
|
+
for tab in self.tabbed.query(ContentTab):
|
|
180
|
+
if tab != event.tab:
|
|
181
|
+
tab.remove_class("active")
|
|
182
|
+
|
|
183
|
+
def action_toggle_help_panel(self) -> None:
|
|
184
|
+
"""Toggle the help panel on or off.
|
|
185
|
+
|
|
186
|
+
Shows or hides the context-sensitive help panel. Creates it on first use.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
None
|
|
190
|
+
"""
|
|
191
|
+
if self.help_panel:
|
|
192
|
+
self.help_panel.display = not self.help_panel.display
|
|
193
|
+
else:
|
|
194
|
+
self.help_panel = DataFrameHelpPanel()
|
|
195
|
+
self.mount(self.help_panel)
|
|
196
|
+
|
|
197
|
+
def action_add_tab(self) -> None:
|
|
198
|
+
"""Open file browser to load a file in a new tab.
|
|
199
|
+
|
|
200
|
+
Displays the file open dialog for the user to select a file to load
|
|
201
|
+
as a new tab in the interface.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
206
|
+
self.push_screen(OpenFileScreen(), self._do_add_tab)
|
|
207
|
+
|
|
208
|
+
def action_save_all_tabs(self) -> None:
|
|
209
|
+
"""Save all open tabs to a single Excel file.
|
|
210
|
+
|
|
211
|
+
Displays a save dialog to choose filename and location, then saves all
|
|
212
|
+
open tabs as separate sheets in a single Excel workbook.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
None
|
|
216
|
+
"""
|
|
217
|
+
callback = partial(self._get_active_table()._do_save_file, all_tabs=True)
|
|
218
|
+
self.push_screen(
|
|
219
|
+
SaveFileScreen("all-tabs.xlsx", title="Save All Tabs"),
|
|
220
|
+
callback=callback,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def action_close_tab(self) -> None:
|
|
224
|
+
"""Close the currently active tab.
|
|
225
|
+
|
|
226
|
+
Closes the current tab. If this is the only tab, exits the application instead.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
None
|
|
230
|
+
"""
|
|
231
|
+
if len(self.tabs) <= 1:
|
|
232
|
+
self.app.exit()
|
|
233
|
+
return
|
|
234
|
+
self._close_tab()
|
|
235
|
+
|
|
236
|
+
def action_next_tab(self, offset: int = 1) -> None:
|
|
237
|
+
"""Switch to the next tab or previous tab.
|
|
238
|
+
|
|
239
|
+
Cycles through tabs by the specified offset. With offset=1, moves to next tab.
|
|
240
|
+
With offset=-1, moves to previous tab. Wraps around when reaching edges.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
offset: Number of tabs to advance (+1 for next, -1 for previous). Defaults to 1.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
None
|
|
247
|
+
"""
|
|
248
|
+
if len(self.tabs) <= 1:
|
|
249
|
+
return
|
|
250
|
+
try:
|
|
251
|
+
tabs: list[TabPane] = list(self.tabs.keys())
|
|
252
|
+
next_tab = get_next_item(tabs, self.tabbed.active_pane, offset)
|
|
253
|
+
self.tabbed.active = next_tab.id
|
|
254
|
+
except (NoMatches, ValueError):
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
def action_toggle_tab_bar(self) -> None:
|
|
258
|
+
"""Toggle the tab bar visibility.
|
|
259
|
+
|
|
260
|
+
Shows or hides the tab bar at the bottom of the window. Useful for maximizing
|
|
261
|
+
screen space in single-tab mode.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
None
|
|
265
|
+
"""
|
|
266
|
+
tabs = self.query_one(ContentTabs)
|
|
267
|
+
tabs.display = not tabs.display
|
|
268
|
+
# status = "shown" if tabs.display else "hidden"
|
|
269
|
+
# self.notify(f"Tab bar [$success]{status}[/]", title="Toggle")
|
|
270
|
+
|
|
271
|
+
def _get_active_table(self) -> DataFrameTable | None:
|
|
272
|
+
"""Get the currently active DataFrameTable widget.
|
|
273
|
+
|
|
274
|
+
Retrieves the table from the currently active tab. Returns None if no
|
|
275
|
+
table is found or an error occurs.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The active DataFrameTable widget, or None if not found.
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
tabbed: TabbedContent = self.query_one(TabbedContent)
|
|
282
|
+
if active_pane := tabbed.active_pane:
|
|
283
|
+
return active_pane.query_one(DataFrameTable)
|
|
284
|
+
except (NoMatches, AttributeError):
|
|
285
|
+
self.notify("No active table found", title="Locate", severity="error")
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
def _do_add_tab(self, filename: str) -> None:
|
|
289
|
+
"""Add a tab for the opened file.
|
|
290
|
+
|
|
291
|
+
Loads the specified file and creates one or more tabs for it. For Excel files,
|
|
292
|
+
creates one tab per sheet. For other formats, creates a single tab.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
filename: Path to the file to load and add as tab(s).
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
None
|
|
299
|
+
"""
|
|
300
|
+
if filename and os.path.exists(filename):
|
|
301
|
+
try:
|
|
302
|
+
n_tab = 0
|
|
303
|
+
for lf, filename, tabname in _load_file(filename, prefix_sheet=True):
|
|
304
|
+
self._add_tab(lf.collect(), filename, tabname)
|
|
305
|
+
n_tab += 1
|
|
306
|
+
self.notify(f"Added [$accent]{n_tab}[/] tab(s) for [$success]{filename}[/]", title="Open")
|
|
307
|
+
except Exception as e:
|
|
308
|
+
self.notify(f"Error: {e}", title="Open", severity="error")
|
|
309
|
+
else:
|
|
310
|
+
self.notify(f"File does not exist: [$warning]{filename}[/]", title="Open", severity="warning")
|
|
311
|
+
|
|
312
|
+
def _add_tab(self, df: pl.DataFrame, filename: str, tabname: str) -> None:
|
|
313
|
+
"""Add new tab for the given DataFrame.
|
|
314
|
+
|
|
315
|
+
Creates and adds a new tab with the provided DataFrame and configuration.
|
|
316
|
+
Ensures unique tab names by appending an index if needed. Shows the tab bar
|
|
317
|
+
if this is no longer the only tab.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
df: The Polars DataFrame to display in the new tab.
|
|
321
|
+
filename: The source filename for this data (used in table metadata).
|
|
322
|
+
tabname: The display name for the tab.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
None
|
|
326
|
+
"""
|
|
327
|
+
if any(tab.name == tabname for tab in self.tabs):
|
|
328
|
+
tabname = f"{tabname}_{len(self.tabs) + 1}"
|
|
329
|
+
|
|
330
|
+
# Find an available tab index
|
|
331
|
+
tab_idx = f"tab_{len(self.tabs) + 1}"
|
|
332
|
+
for idx in range(len(self.tabs)):
|
|
333
|
+
pending_tab_idx = f"tab_{idx + 1}"
|
|
334
|
+
if any(tab.id == pending_tab_idx for tab in self.tabs):
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
tab_idx = pending_tab_idx
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
table = DataFrameTable(df, filename, zebra_stripes=True, id=tab_idx, name=tabname)
|
|
341
|
+
tab = TabPane(tabname, table, name=tabname, id=tab_idx)
|
|
342
|
+
self.tabbed.add_pane(tab)
|
|
343
|
+
self.tabs[tab] = table
|
|
344
|
+
|
|
345
|
+
if len(self.tabs) > 1:
|
|
346
|
+
self.query_one(ContentTabs).display = True
|
|
347
|
+
|
|
348
|
+
# Activate the new tab
|
|
349
|
+
self.tabbed.active = tab.id
|
|
350
|
+
table.focus()
|
|
351
|
+
|
|
352
|
+
def _close_tab(self) -> None:
|
|
353
|
+
"""Close the currently active tab.
|
|
354
|
+
|
|
355
|
+
Removes the active tab from the interface. If only one tab remains and no more
|
|
356
|
+
can be closed, the application exits instead.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
None
|
|
360
|
+
"""
|
|
361
|
+
try:
|
|
362
|
+
if len(self.tabs) == 1:
|
|
363
|
+
self.app.exit()
|
|
364
|
+
else:
|
|
365
|
+
if active_pane := self.tabbed.active_pane:
|
|
366
|
+
self.tabbed.remove_pane(active_pane.id)
|
|
367
|
+
self.tabs.pop(active_pane)
|
|
368
|
+
self.notify(f"Closed tab [$success]{active_pane.name}[/]", title="Close")
|
|
369
|
+
except NoMatches:
|
|
370
|
+
pass
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _load_dataframe(
|
|
374
|
+
filenames: list[str], file_format: str | None = None, has_header: bool = True
|
|
375
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
376
|
+
"""Load DataFrames from file specifications.
|
|
377
|
+
|
|
378
|
+
Handles loading from multiple files, single files, or stdin. For Excel files,
|
|
379
|
+
loads all sheets as separate entries. For other formats, loads as single file.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
filenames: List of filenames to load. If single filename is "-", read from stdin.
|
|
383
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
384
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
List of tuples of (LazyFrame, filename, tabname) ready for display.
|
|
388
|
+
"""
|
|
389
|
+
sources = []
|
|
390
|
+
|
|
391
|
+
prefix_sheet = len(filenames) > 1
|
|
392
|
+
|
|
393
|
+
for filename in filenames:
|
|
394
|
+
sources.extend(_load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
|
|
395
|
+
return sources
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _load_file(
|
|
399
|
+
filename: str,
|
|
400
|
+
first_sheet: bool = False,
|
|
401
|
+
prefix_sheet: bool = False,
|
|
402
|
+
file_format: str | None = None,
|
|
403
|
+
has_header: bool = True,
|
|
404
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
405
|
+
"""Load a single file and return list of sources.
|
|
406
|
+
|
|
407
|
+
For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
|
|
408
|
+
For other files or multiple files, returns one entry per file.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
filename: Path to file to load.
|
|
412
|
+
first_sheet: If True, only load first sheet for Excel files. Defaults to False.
|
|
413
|
+
prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
|
|
414
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson').
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
List of tuples of (LazyFrame, filename, tabname).
|
|
418
|
+
"""
|
|
419
|
+
sources = []
|
|
420
|
+
|
|
421
|
+
if filename == "-":
|
|
422
|
+
from io import StringIO
|
|
423
|
+
|
|
424
|
+
# Read from stdin into memory first (stdin is not seekable)
|
|
425
|
+
stdin_data = sys.stdin.read()
|
|
426
|
+
lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
|
|
427
|
+
|
|
428
|
+
# Reopen stdin to /dev/tty for proper terminal interaction
|
|
429
|
+
try:
|
|
430
|
+
tty = open("/dev/tty")
|
|
431
|
+
os.dup2(tty.fileno(), sys.stdin.fileno())
|
|
432
|
+
except (OSError, FileNotFoundError):
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
sources.append((lf, "stdin.tsv" if file_format == "tsv" else "stdin.csv", "stdin"))
|
|
436
|
+
return sources
|
|
437
|
+
|
|
438
|
+
filepath = Path(filename)
|
|
439
|
+
ext = filepath.suffix.lower()
|
|
440
|
+
|
|
441
|
+
if file_format == "csv" or ext == ".csv":
|
|
442
|
+
lf = pl.scan_csv(filename, has_header=has_header)
|
|
443
|
+
sources.append((lf, filename, filepath.stem))
|
|
444
|
+
elif file_format == "excel" or ext in (".xlsx", ".xls"):
|
|
445
|
+
if first_sheet:
|
|
446
|
+
# Read only the first sheet for multiple files
|
|
447
|
+
lf = pl.read_excel(filename).lazy()
|
|
448
|
+
sources.append((lf, filename, filepath.stem))
|
|
449
|
+
else:
|
|
450
|
+
# For single file, expand all sheets
|
|
451
|
+
sheets = pl.read_excel(filename, sheet_id=0)
|
|
452
|
+
for sheet_name, df in sheets.items():
|
|
453
|
+
tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
|
|
454
|
+
sources.append((df.lazy(), filename, tabname))
|
|
455
|
+
elif file_format == "tsv" or ext in (".tsv", ".tab"):
|
|
456
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
457
|
+
sources.append((lf, filename, filepath.stem))
|
|
458
|
+
elif file_format == "parquet" or ext == ".parquet":
|
|
459
|
+
lf = pl.scan_parquet(filename)
|
|
460
|
+
sources.append((lf, filename, filepath.stem))
|
|
461
|
+
elif file_format == "json" or ext == ".json":
|
|
462
|
+
df = pl.read_json(filename)
|
|
463
|
+
sources.append((df, filename, filepath.stem))
|
|
464
|
+
elif file_format == "ndjson" or ext == ".ndjson":
|
|
465
|
+
lf = pl.scan_ndjson(filename)
|
|
466
|
+
sources.append((lf, filename, filepath.stem))
|
|
467
|
+
else:
|
|
468
|
+
# Treat other formats as TSV
|
|
469
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
470
|
+
sources.append((lf, filename, filepath.stem))
|
|
471
|
+
|
|
472
|
+
return sources
|