dataframe-textual 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataframe_textual/__init__.py +1 -2
- dataframe_textual/__main__.py +8 -5
- dataframe_textual/common.py +143 -2
- dataframe_textual/data_frame_table.py +342 -161
- dataframe_textual/data_frame_viewer.py +16 -114
- dataframe_textual/table_screen.py +14 -8
- {dataframe_textual-1.0.0.dist-info → dataframe_textual-1.2.0.dist-info}/METADATA +64 -41
- dataframe_textual-1.2.0.dist-info/RECORD +13 -0
- dataframe_textual-1.2.0.dist-info/entry_points.txt +2 -0
- dataframe_textual-1.0.0.dist-info/RECORD +0 -13
- dataframe_textual-1.0.0.dist-info/entry_points.txt +0 -2
- {dataframe_textual-1.0.0.dist-info → dataframe_textual-1.2.0.dist-info}/WHEEL +0 -0
- {dataframe_textual-1.0.0.dist-info → dataframe_textual-1.2.0.dist-info}/licenses/LICENSE +0 -0
dataframe_textual/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from .data_frame_help_panel import DataFrameHelpPanel
|
|
4
4
|
from .data_frame_table import DataFrameTable, History
|
|
5
|
-
from .data_frame_viewer import DataFrameViewer
|
|
5
|
+
from .data_frame_viewer import DataFrameViewer
|
|
6
6
|
from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
|
|
7
7
|
from .yes_no_screen import (
|
|
8
8
|
ConfirmScreen,
|
|
@@ -31,5 +31,4 @@ __all__ = [
|
|
|
31
31
|
"FilterScreen",
|
|
32
32
|
"FreezeScreen",
|
|
33
33
|
"OpenFileScreen",
|
|
34
|
-
"_load_dataframe",
|
|
35
34
|
]
|
dataframe_textual/__main__.py
CHANGED
|
@@ -4,6 +4,7 @@ import argparse
|
|
|
4
4
|
import sys
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
from .common import load_dataframe
|
|
7
8
|
from .data_frame_viewer import DataFrameViewer
|
|
8
9
|
|
|
9
10
|
SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
|
|
@@ -22,13 +23,14 @@ def main() -> None:
|
|
|
22
23
|
SystemExit: If invalid arguments are provided or required files are missing.
|
|
23
24
|
"""
|
|
24
25
|
parser = argparse.ArgumentParser(
|
|
26
|
+
prog="dv",
|
|
25
27
|
description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
|
|
26
28
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
27
29
|
epilog="Examples:\n"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
" cat data.csv |
|
|
30
|
+
" %(prog)s data.csv\n"
|
|
31
|
+
" %(prog)s file1.csv file2.csv file3.csv\n"
|
|
32
|
+
" %(prog)s data.xlsx (opens each sheet in separate tab)\n"
|
|
33
|
+
" cat data.csv | %(prog)s --format csv\n",
|
|
32
34
|
)
|
|
33
35
|
parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
|
|
34
36
|
parser.add_argument(
|
|
@@ -57,7 +59,8 @@ def main() -> None:
|
|
|
57
59
|
parser.print_help()
|
|
58
60
|
sys.exit(1)
|
|
59
61
|
|
|
60
|
-
|
|
62
|
+
sources = load_dataframe(filenames, file_format=args.format, has_header=not args.no_header)
|
|
63
|
+
app = DataFrameViewer(*sources)
|
|
61
64
|
app.run()
|
|
62
65
|
|
|
63
66
|
|
dataframe_textual/common.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Common utilities and constants for dataframe_viewer."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
import sys
|
|
4
5
|
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
5
7
|
from typing import Any
|
|
6
8
|
|
|
7
9
|
import polars as pl
|
|
@@ -111,6 +113,26 @@ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
|
|
|
111
113
|
return STYLES[pl.Unknown]
|
|
112
114
|
|
|
113
115
|
|
|
116
|
+
def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
|
|
117
|
+
"""Format a float value, keeping integers without decimal point.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
val: The float value to format.
|
|
121
|
+
thousand_separator: Whether to include thousand separators. Defaults to False.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
The formatted float as a string.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if (val := int(value)) == value:
|
|
128
|
+
return f"{val:,}" if thousand_separator else str(val)
|
|
129
|
+
else:
|
|
130
|
+
if precision > 0:
|
|
131
|
+
return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
|
|
132
|
+
else:
|
|
133
|
+
return f"{value:,f}" if thousand_separator else str(value)
|
|
134
|
+
|
|
135
|
+
|
|
114
136
|
def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
|
|
115
137
|
"""Format a single row with proper styling and justification.
|
|
116
138
|
|
|
@@ -135,8 +157,8 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
|
|
|
135
157
|
text_val = NULL_DISPLAY
|
|
136
158
|
elif dc.gtype == "integer" and thousand_separator:
|
|
137
159
|
text_val = f"{val:,}"
|
|
138
|
-
elif dc.gtype == "float"
|
|
139
|
-
text_val =
|
|
160
|
+
elif dc.gtype == "float":
|
|
161
|
+
text_val = format_float(val, thousand_separator)
|
|
140
162
|
else:
|
|
141
163
|
text_val = str(val)
|
|
142
164
|
|
|
@@ -318,3 +340,122 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
|
|
|
318
340
|
raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
|
|
319
341
|
except Exception as ve:
|
|
320
342
|
raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def load_dataframe(
|
|
346
|
+
filenames: list[str], file_format: str | None = None, has_header: bool = True
|
|
347
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
348
|
+
"""Load DataFrames from file specifications.
|
|
349
|
+
|
|
350
|
+
Handles loading from multiple files, single files, or stdin. For Excel files,
|
|
351
|
+
loads all sheets as separate entries. For other formats, loads as single file.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
filenames: List of filenames to load. If single filename is "-", read from stdin.
|
|
355
|
+
file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
|
|
356
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
List of tuples of (LazyFrame, filename, tabname) ready for display.
|
|
360
|
+
"""
|
|
361
|
+
sources = []
|
|
362
|
+
|
|
363
|
+
prefix_sheet = len(filenames) > 1
|
|
364
|
+
|
|
365
|
+
for filename in filenames:
|
|
366
|
+
sources.extend(load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
|
|
367
|
+
return sources
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def load_file(
|
|
371
|
+
filename: str,
|
|
372
|
+
first_sheet: bool = False,
|
|
373
|
+
prefix_sheet: bool = False,
|
|
374
|
+
file_format: str | None = None,
|
|
375
|
+
has_header: bool = True,
|
|
376
|
+
) -> list[tuple[pl.LazyFrame, str, str]]:
|
|
377
|
+
"""Load a single file and return list of sources.
|
|
378
|
+
|
|
379
|
+
For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
|
|
380
|
+
For other files or multiple files, returns one entry per file.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
filename: Path to file to load.
|
|
384
|
+
first_sheet: If True, only load first sheet for Excel files. Defaults to False.
|
|
385
|
+
prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
|
|
386
|
+
file_format: Optional format specifier (i.e., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson') for input files.
|
|
387
|
+
By default, infers from file extension.
|
|
388
|
+
has_header: Whether the input files have a header row. Defaults to True.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
List of tuples of (LazyFrame, filename, tabname).
|
|
392
|
+
"""
|
|
393
|
+
sources = []
|
|
394
|
+
|
|
395
|
+
if filename == "-":
|
|
396
|
+
import os
|
|
397
|
+
from io import StringIO
|
|
398
|
+
|
|
399
|
+
# Read from stdin into memory first (stdin is not seekable)
|
|
400
|
+
stdin_data = sys.stdin.read()
|
|
401
|
+
lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
|
|
402
|
+
|
|
403
|
+
# Reopen stdin to /dev/tty for proper terminal interaction
|
|
404
|
+
try:
|
|
405
|
+
tty = open("/dev/tty")
|
|
406
|
+
os.dup2(tty.fileno(), sys.stdin.fileno())
|
|
407
|
+
except (OSError, FileNotFoundError):
|
|
408
|
+
pass
|
|
409
|
+
|
|
410
|
+
sources.append((lf, f"stdin.{file_format}" if file_format else "stdin", "stdin"))
|
|
411
|
+
return sources
|
|
412
|
+
|
|
413
|
+
filepath = Path(filename)
|
|
414
|
+
|
|
415
|
+
if file_format == "csv":
|
|
416
|
+
lf = pl.scan_csv(filename, has_header=has_header)
|
|
417
|
+
sources.append((lf, filename, filepath.stem))
|
|
418
|
+
elif file_format == "excel":
|
|
419
|
+
if first_sheet:
|
|
420
|
+
# Read only the first sheet for multiple files
|
|
421
|
+
lf = pl.read_excel(filename).lazy()
|
|
422
|
+
sources.append((lf, filename, filepath.stem))
|
|
423
|
+
else:
|
|
424
|
+
# For single file, expand all sheets
|
|
425
|
+
sheets = pl.read_excel(filename, sheet_id=0)
|
|
426
|
+
for sheet_name, df in sheets.items():
|
|
427
|
+
tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
|
|
428
|
+
sources.append((df.lazy(), filename, tabname))
|
|
429
|
+
elif file_format == "tsv":
|
|
430
|
+
lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
|
|
431
|
+
sources.append((lf, filename, filepath.stem))
|
|
432
|
+
elif file_format == "parquet":
|
|
433
|
+
lf = pl.scan_parquet(filename)
|
|
434
|
+
sources.append((lf, filename, filepath.stem))
|
|
435
|
+
elif file_format == "json":
|
|
436
|
+
df = pl.read_json(filename)
|
|
437
|
+
sources.append((df, filename, filepath.stem))
|
|
438
|
+
elif file_format == "ndjson":
|
|
439
|
+
lf = pl.scan_ndjson(filename)
|
|
440
|
+
sources.append((lf, filename, filepath.stem))
|
|
441
|
+
else:
|
|
442
|
+
ext = filepath.suffix.lower()
|
|
443
|
+
if ext == ".csv":
|
|
444
|
+
file_format = "csv"
|
|
445
|
+
elif ext in (".xlsx", ".xls"):
|
|
446
|
+
file_format = "excel"
|
|
447
|
+
elif ext in (".tsv", ".tab"):
|
|
448
|
+
file_format = "tsv"
|
|
449
|
+
elif ext == ".parquet":
|
|
450
|
+
file_format = "parquet"
|
|
451
|
+
elif ext == ".json":
|
|
452
|
+
file_format = "json"
|
|
453
|
+
elif ext == ".ndjson":
|
|
454
|
+
file_format = "ndjson"
|
|
455
|
+
else:
|
|
456
|
+
# Default to TSV
|
|
457
|
+
file_format = "tsv"
|
|
458
|
+
|
|
459
|
+
sources.extend(load_file(filename, first_sheet, prefix_sheet, file_format, has_header))
|
|
460
|
+
|
|
461
|
+
return sources
|