dataframe-textual 1.1.0__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  from .data_frame_help_panel import DataFrameHelpPanel
4
4
  from .data_frame_table import DataFrameTable, History
5
- from .data_frame_viewer import DataFrameViewer, _load_dataframe
5
+ from .data_frame_viewer import DataFrameViewer
6
6
  from .table_screen import FrequencyScreen, RowDetailScreen, TableScreen
7
7
  from .yes_no_screen import (
8
8
  ConfirmScreen,
@@ -31,5 +31,4 @@ __all__ = [
31
31
  "FilterScreen",
32
32
  "FreezeScreen",
33
33
  "OpenFileScreen",
34
- "_load_dataframe",
35
34
  ]
@@ -4,6 +4,7 @@ import argparse
4
4
  import sys
5
5
  from pathlib import Path
6
6
 
7
+ from .common import load_dataframe
7
8
  from .data_frame_viewer import DataFrameViewer
8
9
 
9
10
  SUPPORTED_FORMATS = ["csv", "excel", "tsv", "parquet", "json", "ndjson"]
@@ -22,13 +23,14 @@ def main() -> None:
22
23
  SystemExit: If invalid arguments are provided or required files are missing.
23
24
  """
24
25
  parser = argparse.ArgumentParser(
26
+ prog="dv",
25
27
  description="Interactive terminal based viewer/editor for tabular data (e.g., CSV/Excel).",
26
28
  formatter_class=argparse.RawDescriptionHelpFormatter,
27
29
  epilog="Examples:\n"
28
- " dataframe-textual data.csv\n"
29
- " dataframe-textual file1.csv file2.csv file3.csv\n"
30
- " dataframe-textual data.xlsx (opens all sheets in tabs)\n"
31
- " cat data.csv | dataframe-textual --format csv\n",
30
+ " %(prog)s data.csv\n"
31
+ " %(prog)s file1.csv file2.csv file3.csv\n"
32
+ " %(prog)s data.xlsx (opens each sheet in separate tab)\n"
33
+ " cat data.csv | %(prog)s --format csv\n",
32
34
  )
33
35
  parser.add_argument("files", nargs="*", help="Files to view (or read from stdin)")
34
36
  parser.add_argument(
@@ -57,7 +59,8 @@ def main() -> None:
57
59
  parser.print_help()
58
60
  sys.exit(1)
59
61
 
60
- app = DataFrameViewer(*filenames, file_format=args.format, has_header=not args.no_header)
62
+ sources = load_dataframe(filenames, file_format=args.format, has_header=not args.no_header)
63
+ app = DataFrameViewer(*sources)
61
64
  app.run()
62
65
 
63
66
 
@@ -1,7 +1,9 @@
1
1
  """Common utilities and constants for dataframe_viewer."""
2
2
 
3
3
  import re
4
+ import sys
4
5
  from dataclasses import dataclass
6
+ from pathlib import Path
5
7
  from typing import Any
6
8
 
7
9
  import polars as pl
@@ -338,3 +340,122 @@ def validate_expr(term: str, df: pl.DataFrame, current_col_idx: int) -> pl.Expr
338
340
  raise ValueError(f"Failed to evaluate expression `{expr_str}`: {e}") from e
339
341
  except Exception as ve:
340
342
  raise ValueError(f"Failed to validate expression `{term}`: {ve}") from ve
343
+
344
+
345
+ def load_dataframe(
346
+ filenames: list[str], file_format: str | None = None, has_header: bool = True
347
+ ) -> list[tuple[pl.LazyFrame, str, str]]:
348
+ """Load DataFrames from file specifications.
349
+
350
+ Handles loading from multiple files, single files, or stdin. For Excel files,
351
+ loads all sheets as separate entries. For other formats, loads as single file.
352
+
353
+ Args:
354
+ filenames: List of filenames to load. If single filename is "-", read from stdin.
355
+ file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
356
+ has_header: Whether the input files have a header row. Defaults to True.
357
+
358
+ Returns:
359
+ List of tuples of (LazyFrame, filename, tabname) ready for display.
360
+ """
361
+ sources = []
362
+
363
+ prefix_sheet = len(filenames) > 1
364
+
365
+ for filename in filenames:
366
+ sources.extend(load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
367
+ return sources
368
+
369
+
370
+ def load_file(
371
+ filename: str,
372
+ first_sheet: bool = False,
373
+ prefix_sheet: bool = False,
374
+ file_format: str | None = None,
375
+ has_header: bool = True,
376
+ ) -> list[tuple[pl.LazyFrame, str, str]]:
377
+ """Load a single file and return list of sources.
378
+
379
+ For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
380
+ For other files or multiple files, returns one entry per file.
381
+
382
+ Args:
383
+ filename: Path to file to load.
384
+ first_sheet: If True, only load first sheet for Excel files. Defaults to False.
385
+ prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
386
+ file_format: Optional format specifier (i.e., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson') for input files.
387
+ By default, infers from file extension.
388
+ has_header: Whether the input files have a header row. Defaults to True.
389
+
390
+ Returns:
391
+ List of tuples of (LazyFrame, filename, tabname).
392
+ """
393
+ sources = []
394
+
395
+ if filename == "-":
396
+ import os
397
+ from io import StringIO
398
+
399
+ # Read from stdin into memory first (stdin is not seekable)
400
+ stdin_data = sys.stdin.read()
401
+ lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
402
+
403
+ # Reopen stdin to /dev/tty for proper terminal interaction
404
+ try:
405
+ tty = open("/dev/tty")
406
+ os.dup2(tty.fileno(), sys.stdin.fileno())
407
+ except (OSError, FileNotFoundError):
408
+ pass
409
+
410
+ sources.append((lf, f"stdin.{file_format}" if file_format else "stdin", "stdin"))
411
+ return sources
412
+
413
+ filepath = Path(filename)
414
+
415
+ if file_format == "csv":
416
+ lf = pl.scan_csv(filename, has_header=has_header)
417
+ sources.append((lf, filename, filepath.stem))
418
+ elif file_format == "excel":
419
+ if first_sheet:
420
+ # Read only the first sheet for multiple files
421
+ lf = pl.read_excel(filename).lazy()
422
+ sources.append((lf, filename, filepath.stem))
423
+ else:
424
+ # For single file, expand all sheets
425
+ sheets = pl.read_excel(filename, sheet_id=0)
426
+ for sheet_name, df in sheets.items():
427
+ tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
428
+ sources.append((df.lazy(), filename, tabname))
429
+ elif file_format == "tsv":
430
+ lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
431
+ sources.append((lf, filename, filepath.stem))
432
+ elif file_format == "parquet":
433
+ lf = pl.scan_parquet(filename)
434
+ sources.append((lf, filename, filepath.stem))
435
+ elif file_format == "json":
436
+ df = pl.read_json(filename)
437
+ sources.append((df, filename, filepath.stem))
438
+ elif file_format == "ndjson":
439
+ lf = pl.scan_ndjson(filename)
440
+ sources.append((lf, filename, filepath.stem))
441
+ else:
442
+ ext = filepath.suffix.lower()
443
+ if ext == ".csv":
444
+ file_format = "csv"
445
+ elif ext in (".xlsx", ".xls"):
446
+ file_format = "excel"
447
+ elif ext in (".tsv", ".tab"):
448
+ file_format = "tsv"
449
+ elif ext == ".parquet":
450
+ file_format = "parquet"
451
+ elif ext == ".json":
452
+ file_format = "json"
453
+ elif ext == ".ndjson":
454
+ file_format = "ndjson"
455
+ else:
456
+ # Default to TSV
457
+ file_format = "tsv"
458
+
459
+ sources.extend(load_file(filename, first_sheet, prefix_sheet, file_format, has_header))
460
+
461
+ return sources
@@ -1,7 +1,6 @@
1
1
  """DataFrame Viewer application and utilities."""
2
2
 
3
3
  import os
4
- import sys
5
4
  from functools import partial
6
5
  from pathlib import Path
7
6
  from textwrap import dedent
@@ -13,7 +12,7 @@ from textual.theme import BUILTIN_THEMES
13
12
  from textual.widgets import TabbedContent, TabPane
14
13
  from textual.widgets.tabbed_content import ContentTab, ContentTabs
15
14
 
16
- from .common import get_next_item
15
+ from .common import get_next_item, load_file
17
16
  from .data_frame_help_panel import DataFrameHelpPanel
18
17
  from .data_frame_table import DataFrameTable
19
18
  from .yes_no_screen import OpenFileScreen, SaveFileScreen
@@ -79,21 +78,20 @@ class DataFrameViewer(App):
79
78
  }
80
79
  """
81
80
 
82
- def __init__(self, *filenames: str, file_format: str | None = None, has_header: bool = True) -> None:
81
+ def __init__(self, *sources: str) -> None:
83
82
  """Initialize the DataFrame Viewer application.
84
83
 
85
- Loads dataframes from provided filenames and prepares the tabbed interface.
84
+ Loads data from provided sources and prepares the tabbed interface.
86
85
 
87
86
  Args:
88
- *filenames: Variable number of file paths to load (CSV, Excel, Parquet, etc).
89
- file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
90
- has_header: Whether the input files have a header row. Defaults to True.
87
+ sources: sources to load dataframes from, each as a tuple of
88
+ (DataFrame | LazyFrame, filename, tabname).
91
89
 
92
90
  Returns:
93
91
  None
94
92
  """
95
93
  super().__init__()
96
- self.sources = _load_dataframe(filenames, file_format, has_header=has_header)
94
+ self.sources = sources
97
95
  self.tabs: dict[TabPane, DataFrameTable] = {}
98
96
  self.help_panel = None
99
97
 
@@ -111,12 +109,18 @@ class DataFrameViewer(App):
111
109
  with self.tabbed:
112
110
  seen_names = set()
113
111
  for idx, (df, filename, tabname) in enumerate(self.sources, start=1):
112
+ tab_id = f"tab_{idx}"
113
+
114
+ if not tabname:
115
+ tabname = Path(filename).stem or tab_id
116
+
114
117
  # Ensure unique tab names
115
- if tabname in seen_names:
116
- tabname = f"{tabname}_{idx}"
118
+ counter = 1
119
+ while tabname in seen_names:
120
+ tabname = f"{tabname}_{counter}"
121
+ counter += 1
117
122
  seen_names.add(tabname)
118
123
 
119
- tab_id = f"tab_{idx}"
120
124
  try:
121
125
  table = DataFrameTable(df, filename, name=tabname, id=tab_id, zebra_stripes=True)
122
126
  tab = TabPane(tabname, table, name=tabname, id=tab_id)
@@ -300,7 +304,7 @@ class DataFrameViewer(App):
300
304
  if filename and os.path.exists(filename):
301
305
  try:
302
306
  n_tab = 0
303
- for lf, filename, tabname in _load_file(filename, prefix_sheet=True):
307
+ for lf, filename, tabname in load_file(filename, prefix_sheet=True):
304
308
  self._add_tab(lf.collect(), filename, tabname)
305
309
  n_tab += 1
306
310
  self.notify(f"Added [$accent]{n_tab}[/] tab(s) for [$success]{filename}[/]", title="Open")
@@ -368,105 +372,3 @@ class DataFrameViewer(App):
368
372
  self.notify(f"Closed tab [$success]{active_pane.name}[/]", title="Close")
369
373
  except NoMatches:
370
374
  pass
371
-
372
-
373
- def _load_dataframe(
374
- filenames: list[str], file_format: str | None = None, has_header: bool = True
375
- ) -> list[tuple[pl.LazyFrame, str, str]]:
376
- """Load DataFrames from file specifications.
377
-
378
- Handles loading from multiple files, single files, or stdin. For Excel files,
379
- loads all sheets as separate entries. For other formats, loads as single file.
380
-
381
- Args:
382
- filenames: List of filenames to load. If single filename is "-", read from stdin.
383
- file_format: Optional format specifier for input files (e.g., 'csv', 'excel').
384
- has_header: Whether the input files have a header row. Defaults to True.
385
-
386
- Returns:
387
- List of tuples of (LazyFrame, filename, tabname) ready for display.
388
- """
389
- sources = []
390
-
391
- prefix_sheet = len(filenames) > 1
392
-
393
- for filename in filenames:
394
- sources.extend(_load_file(filename, prefix_sheet=prefix_sheet, file_format=file_format, has_header=has_header))
395
- return sources
396
-
397
-
398
- def _load_file(
399
- filename: str,
400
- first_sheet: bool = False,
401
- prefix_sheet: bool = False,
402
- file_format: str | None = None,
403
- has_header: bool = True,
404
- ) -> list[tuple[pl.LazyFrame, str, str]]:
405
- """Load a single file and return list of sources.
406
-
407
- For Excel files, when `first_sheet` is True, returns only the first sheet. Otherwise, returns one entry per sheet.
408
- For other files or multiple files, returns one entry per file.
409
-
410
- Args:
411
- filename: Path to file to load.
412
- first_sheet: If True, only load first sheet for Excel files. Defaults to False.
413
- prefix_sheet: If True, prefix filename to sheet name as the tab name for Excel files. Defaults to False.
414
- file_format: Optional format specifier for input files (e.g., 'csv', 'excel', 'tsv', 'parquet', 'json', 'ndjson').
415
-
416
- Returns:
417
- List of tuples of (LazyFrame, filename, tabname).
418
- """
419
- sources = []
420
-
421
- if filename == "-":
422
- from io import StringIO
423
-
424
- # Read from stdin into memory first (stdin is not seekable)
425
- stdin_data = sys.stdin.read()
426
- lf = pl.scan_csv(StringIO(stdin_data), has_header=has_header, separator="," if file_format == "csv" else "\t")
427
-
428
- # Reopen stdin to /dev/tty for proper terminal interaction
429
- try:
430
- tty = open("/dev/tty")
431
- os.dup2(tty.fileno(), sys.stdin.fileno())
432
- except (OSError, FileNotFoundError):
433
- pass
434
-
435
- sources.append((lf, "stdin.tsv" if file_format == "tsv" else "stdin.csv", "stdin"))
436
- return sources
437
-
438
- filepath = Path(filename)
439
- ext = filepath.suffix.lower()
440
-
441
- if file_format == "csv" or ext == ".csv":
442
- lf = pl.scan_csv(filename, has_header=has_header)
443
- sources.append((lf, filename, filepath.stem))
444
- elif file_format == "excel" or ext in (".xlsx", ".xls"):
445
- if first_sheet:
446
- # Read only the first sheet for multiple files
447
- lf = pl.read_excel(filename).lazy()
448
- sources.append((lf, filename, filepath.stem))
449
- else:
450
- # For single file, expand all sheets
451
- sheets = pl.read_excel(filename, sheet_id=0)
452
- for sheet_name, df in sheets.items():
453
- tabname = f"{filepath.stem}_{sheet_name}" if prefix_sheet else sheet_name
454
- sources.append((df.lazy(), filename, tabname))
455
- elif file_format == "tsv" or ext in (".tsv", ".tab"):
456
- lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
457
- sources.append((lf, filename, filepath.stem))
458
- elif file_format == "parquet" or ext == ".parquet":
459
- lf = pl.scan_parquet(filename)
460
- sources.append((lf, filename, filepath.stem))
461
- elif file_format == "json" or ext == ".json":
462
- df = pl.read_json(filename)
463
- sources.append((df, filename, filepath.stem))
464
- elif file_format == "ndjson" or ext == ".ndjson":
465
- lf = pl.scan_ndjson(filename)
466
- sources.append((lf, filename, filepath.stem))
467
- else:
468
- # Treat other formats as TSV
469
- lf = pl.scan_csv(filename, has_header=has_header, separator="\t")
470
- sources.append((lf, filename, filepath.stem))
471
-
472
- return sources
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataframe-textual
3
- Version: 1.1.0
3
+ Version: 1.1.4
4
4
  Summary: Interactive terminal viewer/editor for tabular data
5
5
  Project-URL: Homepage, https://github.com/need47/dataframe-textual
6
6
  Project-URL: Repository, https://github.com/need47/dataframe-textual.git
@@ -92,6 +92,7 @@ uvx https://github.com/need47/dataframe-textual.git <csvfile>
92
92
 
93
93
  # Clone or download the project
94
94
  cd dataframe-textual
95
+ uv sync --extra excel # with Excel support
95
96
 
96
97
  # Run directly with uv
97
98
  uv run dv <csv_file>
@@ -0,0 +1,13 @@
1
+ dataframe_textual/__init__.py,sha256=IFPb8RMUgghw0eRomehkkC684Iny_gs1VkiZMQ5ZpFk,813
2
+ dataframe_textual/__main__.py,sha256=hgjKLT3ggGxVVeRBunkArTy7PTqOHVJUf3fsH4P5dfU,2146
3
+ dataframe_textual/common.py,sha256=q4bXS7oiJAsdcMEfHkPm5-e8SlfcwCBNhFN9TinQqV0,16171
4
+ dataframe_textual/data_frame_help_panel.py,sha256=XgKGEPJr2hnDWpZ5mavLRcBSPa9cvrXdzVUGFQavXm4,3353
5
+ dataframe_textual/data_frame_table.py,sha256=mOm0pzuN61N01KXi_II6Ld4-NqVYYdiKfzukDMlJpxw,102644
6
+ dataframe_textual/data_frame_viewer.py,sha256=4mV3k7MNTf9TKBmGJ8fDx7itA1vo4qSmaWpvZozwfjs,12987
7
+ dataframe_textual/table_screen.py,sha256=vSevLh944xBTIYVcokABCfSUi84s9gclizx6eDmxyFY,17585
8
+ dataframe_textual/yes_no_screen.py,sha256=vyUKMBbbwgt5At1U430eLg3WbJvqUNoz2GpvdnMd7q0,22921
9
+ dataframe_textual-1.1.4.dist-info/METADATA,sha256=GKyTQtMDbIdgJD6zv9WIex6Oq4SvmYfNH3xDw4mZwb4,25422
10
+ dataframe_textual-1.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ dataframe_textual-1.1.4.dist-info/entry_points.txt,sha256=Z0XKp1ACvmbJymekkxt-C81I0OoInksr5Ib0w2OT_a4,55
12
+ dataframe_textual-1.1.4.dist-info/licenses/LICENSE,sha256=AVTg0gk1X-LHI-nnHlAMDQetrwuDZK4eypgSMDO46Yc,1069
13
+ dataframe_textual-1.1.4.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- dataframe_textual/__init__.py,sha256=uzB3bjlbm8JbsjxEgwqvPcYERktm3F9d9Op_6cWJ1sk,853
2
- dataframe_textual/__main__.py,sha256=ANe7s2SKO53ksFg-0VrCT2GRCG48wDSAZsLrWvoQwmQ,2082
3
- dataframe_textual/common.py,sha256=JVGl5ImbC8h9-nodcGZCyNmRcO2VlCF1-yo8Zg8uD90,11583
4
- dataframe_textual/data_frame_help_panel.py,sha256=XgKGEPJr2hnDWpZ5mavLRcBSPa9cvrXdzVUGFQavXm4,3353
5
- dataframe_textual/data_frame_table.py,sha256=mOm0pzuN61N01KXi_II6Ld4-NqVYYdiKfzukDMlJpxw,102644
6
- dataframe_textual/data_frame_viewer.py,sha256=9vUBdIgSoOf4fdDtI4sAPY6cF1tFdsXwpWatyDsuXh0,17196
7
- dataframe_textual/table_screen.py,sha256=vSevLh944xBTIYVcokABCfSUi84s9gclizx6eDmxyFY,17585
8
- dataframe_textual/yes_no_screen.py,sha256=vyUKMBbbwgt5At1U430eLg3WbJvqUNoz2GpvdnMd7q0,22921
9
- dataframe_textual-1.1.0.dist-info/METADATA,sha256=FvSPyXMtELpoIrqfXhzlUkPRb91-6GM16NDwIr-q24g,25378
10
- dataframe_textual-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- dataframe_textual-1.1.0.dist-info/entry_points.txt,sha256=Z0XKp1ACvmbJymekkxt-C81I0OoInksr5Ib0w2OT_a4,55
12
- dataframe_textual-1.1.0.dist-info/licenses/LICENSE,sha256=AVTg0gk1X-LHI-nnHlAMDQetrwuDZK4eypgSMDO46Yc,1069
13
- dataframe_textual-1.1.0.dist-info/RECORD,,