dataframe-textual 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  from .data_frame_help_panel import DataFrameHelpPanel
4
4
  from .data_frame_table import DataFrameTable, History
5
- from .data_frame_viewer import DataFrameViewer, _load_dataframe
5
+ from .data_frame_viewer import DataFrameViewer
6
6
  from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
7
7
  from .yes_no_screen import (
8
8
  ConfirmScreen,
@@ -31,5 +31,4 @@ __all__ = [
31
31
  "FilterScreen",
32
32
  "FreezeScreen",
33
33
  "OpenFileScreen",
34
- "_load_dataframe",
35
34
  ]
@@ -4,6 +4,7 @@ import argparse
4
4
  import sys
5
5
  from pathlib import Path
6
6
 
7
+ from .common import load_dataframe
7
8
  from .data_frame_viewer import DataFrameViewer
8
9
 
9
10
  SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
@@ -22,13 +23,14 @@ def main() -> None:
22
23
  SystemExit: If invalid arguments are provided or required files are missing.
23
24
  """
24
25
  parser = argparse.ArgumentParser(
26
+ prog="dv",
25
27
  description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
26
28
  formatter_class=argparse.RawDescriptionHelpFormatter,
27
29
  epilog="Examples:\n"
28
- " dataframe-textual data.csv\n"
29
- " dataframe-textual file1.csv file2.csv file3.csv\n"
30
- " dataframe-textual data.xlsx (opens all sheets in tabs)\n"
31
- " cat data.csv | dataframe-textual --format csv\n",
30
+ " %(prog)s data.csv\n"
31
+ " %(prog)s file1.csv file2.csv file3.csv\n"
32
+ " %(prog)s data.xlsx (opens each sheet in separate tab)\n"
33
+ " cat data.csv | %(prog)s --format csv\n",
32
34
  )
33
35
  parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
34
36
  parser.add_argument(
@@ -57,7 +59,8 @@ def main() -> None:
57
59
  parser.print_help()
58
60
  sys.exit(1)
59
61
 
60
- app = DataFrameViewer(*filenames, file_format=args.format, has_header=not args.no_header)
62
+ sources = load_dataframe(filenames, file_format=args.format, has_header=not args.no_header)
63
+ app = DataFrameViewer(*sources)
61
64
  app.run()
62
65
 
63
66
 
@@ -1,7 +1,9 @@
1
1
  """Common utilities and constants for dataframe_viewer."""
2
2
 
3
3
  import re
4
+ import sys
4
5
  from dataclasses import dataclass
6
+ from pathlib import Path
5
7
  from typing import Any
6
8
 
7
9
  import polars as pl
@@ -111,6 +113,26 @@ def DtypeConfig(dtype: pl.DataType) -> DtypeClass:
111
113
  return STYLES[pl.Unknown]
112
114
 
113
115
 
116
+ def format_float(value: float, thousand_separator: bool = False, precision: int = 2) -> str:
117
+ """Format a float value, keeping integers without decimal point.
118
+
119
+ Args:
120
+ val: The float value to format.
121
+ thousand_separator: Whether to include thousand separators. Defaults to False.
122
+
123
+ Returns:
124
+ The formatted float as a string.
125
+ """
126
+
127
+ if (val := int(value)) == value:
128
+ return f"{val:,}" if thousand_separator else str(val)
129
+ else:
130
+ if precision > 0:
131
+ return f"{value:,.{precision}f}" if thousand_separator else f"{value:.{precision}f}"
132
+ else:
133
+ return f"{value:,f}" if thousand_separator else str(value)
134
+
135
+
114
136
  def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> list[Text]:
115
137
  """Format a single row with proper styling and justification.
116
138
 
@@ -135,8 +157,8 @@ def format_row(vals, dtypes, apply_justify=True, thousand_separator=False) -> li
135
157
  text_val = NULL_DISPLAY
136
158
  elif dc.gtype == "integer" and thousand_separator:
137
159
  text_val = f"{val:,}"
138
- elif dc.gtype == "float" and thousand_separator:
139
- text_val = f"{val:,}"
160
+ elif dc.gtype == "float":
161
+ text_val = format_float(val, thousand_separator)
140
162
  else:
141
163
  text_val = str(val)
142
164
 
@@ -318,3 +340,122 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
318
340
  raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
319
341
  except Exception as ve:
320
342
  raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
343
+
344
+
345
+ def load_dataframe(
346
+ filenames: list[str], file_format: str | None = None, has_header: bool = True
347
+ ) -> list[tuple[pl.LazyFrame, str, str]]:
348
+ """Load DataFrames from file specifications.
349
+
350
+ Handles loading from multiple files, single files, or stdin. For Excel files,
351
+ loads all sheets as separate entries. For other formats, loads as single file.
352
+
353
+ Args:
354
+ filenames: List of filenames to load. If single filename is "-", read from stdin.
355
+ file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
356
+ has_header: Whether the input files have a header row. Defaults to True.
357
+
358
+ Returns:
359
+ List of tuples of (LazyFrame, filename, tabname) ready for display.
360
+ """
361
+ sources = []
362
+
363
+ prefix_sheet = len(filenames) > 1
364
+
365
+ for filename in filenames:
366
+ sources.extend(load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
367
+ return sources
368
+
369
+
370
+ def load_file(
371
+ filename: str,
372
+ first_sheet: bool = False,
373
+ prefix_sheet: bool = False,
374
+ file_format: str | None = None,
375
+ has_header: bool = True,
376
+ ) -> list[tuple[pl.LazyFrame, str, str]]:
377
+ """Load a single file and return list of sources.
378
+
379
+ For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
380
+ For other files or multiple files, returns one entry per file.
381
+
382
+ Args:
383
+ filename: Path to file to load.
384
+ first_sheet: If True, only load first sheet for Excel files. Defaults to False.
385
+ prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
386
+ file_format: Optional format specifier (i.e., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson') for input files.
387
+ By default, infers from file extension.
388
+ has_header: Whether the input files have a header row. Defaults to True.
389
+
390
+ Returns:
391
+ List of tuples of (LazyFrame, filename, tabname).
392
+ """
393
+ sources = []
394
+
395
+ if filename == "-":
396
+ import os
397
+ from io import StringIO
398
+
399
+ # Read from stdin into memory first (stdin is not seekable)
400
+ stdin_data = sys.stdin.read()
401
+ lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
402
+
403
+ # Reopen stdin to /dev/tty for proper terminal interaction
404
+ try:
405
+ tty = open("/dev/tty")
406
+ os.dup2(tty.fileno(), sys.stdin.fileno())
407
+ except (OSError, FileNotFoundError):
408
+ pass
409
+
410
+ sources.append((lf, f"stdin.{file_format}" if file_format else "stdin", "stdin"))
411
+ return sources
412
+
413
+ filepath = Path(filename)
414
+
415
+ if file_format == "csv":
416
+ lf = pl.scan_csv(filename, has_header=has_header)
417
+ sources.append((lf, filename, filepath.stem))
418
+ elif file_format == "excel":
419
+ if first_sheet:
420
+ # Read only the first sheet for multiple files
421
+ lf = pl.read_excel(filename).lazy()
422
+ sources.append((lf, filename, filepath.stem))
423
+ else:
424
+ # For single file, expand all sheets
425
+ sheets = pl.read_excel(filename, sheet_id=0)
426
+ for sheet_name, df in sheets.items():
427
+ tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
428
+ sources.append((df.lazy(), filename, tabname))
429
+ elif file_format == "tsv":
430
+ lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
431
+ sources.append((lf, filename, filepath.stem))
432
+ elif file_format == "parquet":
433
+ lf = pl.scan_parquet(filename)
434
+ sources.append((lf, filename, filepath.stem))
435
+ elif file_format == "json":
436
+ df = pl.read_json(filename)
437
+ sources.append((df, filename, filepath.stem))
438
+ elif file_format == "ndjson":
439
+ lf = pl.scan_ndjson(filename)
440
+ sources.append((lf, filename, filepath.stem))
441
+ else:
442
+ ext = filepath.suffix.lower()
443
+ if ext == ".csv":
444
+ file_format = "csv"
445
+ elif ext in (".xlsx", ".xls"):
446
+ file_format = "excel"
447
+ elif ext in (".tsv", ".tab"):
448
+ file_format = "tsv"
449
+ elif ext == ".parquet":
450
+ file_format = "parquet"
451
+ elif ext == ".json":
452
+ file_format = "json"
453
+ elif ext == ".ndjson":
454
+ file_format = "ndjson"
455
+ else:
456
+ # Default to TSV
457
+ file_format = "tsv"
458
+
459
+ sources.extend(load_file(filename, first_sheet, prefix_sheet, file_format, has_header))
460
+
461
+ return sources