pq-peek 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pq_peek/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ def hello() -> str:
2
+ return "Hello from pq-peek!"
pq_peek/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .main import app
2
+
3
+ if __name__ == "__main__":
4
+ app()
pq_peek/main.py ADDED
@@ -0,0 +1,98 @@
1
+ from pathlib import Path
2
+
3
+ import polars as pl
4
+ import typer
5
+ from rich import print as rprint
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ app = typer.Typer(help="A fast CLI tool to inspect Parquet files")
10
+ console = Console()
11
+
12
+ def get_scan(file_path: Path) -> pl.LazyFrame:
13
+ verify_path(file_path)
14
+ return pl.scan_parquet(file_path)
15
+
16
+ def verify_path(file_path: Path):
17
+ """
18
+ Verifies that the file exists and checks the correct ending
19
+ """
20
+ if not file_path.exists():
21
+ rprint(f"[bold red]Error:[/bold red] File was not found: '{file_path}'")
22
+ raise typer.Exit(code=1)
23
+ if file_path.suffix != ".parquet":
24
+ rprint("[bold yellow]Warning:[/bold yellow] File does not end with '.parquet'")
25
+
26
+ @app.command()
27
+ def schema(file_path: Path):
28
+ """
29
+ Displays the schema (column names, types) of the parquet file
30
+ Uses Lazy Loading (scan_parquet) to save memory
31
+ """
32
+ try:
33
+ file = get_scan(file_path)
34
+ schema = file.collect_schema()
35
+
36
+ table = Table(title=f"Schema: {file_path.name}")
37
+ table.add_column("Column name", style="cyan", no_wrap=True)
38
+ table.add_column("Type", style="magenta")
39
+
40
+ for name, dtype in schema.items():
41
+ table.add_row(name, str(dtype))
42
+
43
+ console.print(table)
44
+ rprint(f"\n[green]Amount of columns:[/green] {len(schema)}")
45
+ except Exception as failure:
46
+ rprint(f"[bold red]Failure while reading the parquet file:[/bold red] {failure}")
47
+ raise typer.Exit(code=1)
48
+
49
+ @app.command()
50
+ def head(file_path: Path, n: int = typer.Option(5, help="Amount of rows")):
51
+ """
52
+ Displays the first n rows of the parquet file
53
+ """
54
+ try:
55
+ file = get_scan(file_path)
56
+ df = file.limit(n).collect()
57
+
58
+ if df.is_empty():
59
+ rprint("[yellow]File is empty.[/yellow]")
60
+ return
61
+
62
+ table = Table(title=f"Preview {file_path.name} ({n} rows)")
63
+
64
+ for column in df.columns:
65
+ table.add_column(column, overflow="fold")
66
+
67
+ for row in df.iter_rows():
68
+ str_row = [str(x) for x in row]
69
+ table.add_row(*str_row)
70
+
71
+ console.print(table)
72
+ except Exception as failure:
73
+ rprint(f"[bold red]Failure while reading the parquet file:[/bold red] {failure}")
74
+ raise typer.Exit(code=1)
75
+
76
+
77
+
78
+ @app.command()
79
+ def stats(file_path: Path):
80
+ """
81
+ Displays stats of the parquet file (min, max, nulls)
82
+ Uses Polars Query Engine for parallel computation
83
+ """
84
+ try:
85
+ with console.status("[bold green]Calculating stats...[/bold green]"):
86
+ file = get_scan(file_path)
87
+
88
+ stats_df = file.describe()
89
+
90
+ console.print(stats_df)
91
+
92
+ except Exception as failure:
93
+ rprint(f"[bold red]Failure while calculating stats:[/bold red] {failure}")
94
+ raise typer.Exit(code=1)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ app()
pq_peek/py.typed ADDED
File without changes
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.3
2
+ Name: pq-peek
3
+ Version: 0.1.1
4
+ Summary: A blazing fast CLI tool to inspect Parquet files using Polars.
5
+ Author: Lars Lewerenz
6
+ Author-email: Lars Lewerenz <git.lars.lewerenz@gmail.com>
7
+ Requires-Dist: polars>=1.37.1
8
+ Requires-Dist: rich>=14.3.1
9
+ Requires-Dist: typer>=0.21.1
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+
13
+ pq-peek 🦆
14
+
15
+ A blazing fast, memory-efficient CLI tool to inspect large Parquet files directly in the terminal.
16
+ Built with *Polars*, *Typer*, and *Rich*. Managed via *uv*.
17
+
18
+ ## Install (uv)
19
+
20
+ ```bash
21
+ uv pip install pq-peek
22
+ ```
23
+
24
+ ## CLI usage
25
+
26
+ ```bash
27
+ pq-peek schema /path/to/file.parquet
28
+ pq-peek head /path/to/file.parquet --n 5
29
+ pq-peek stats /path/to/file.parquet
30
+ ```
31
+
32
+ ## Module usage
33
+
34
+ ```bash
35
+ python -m pq_peek schema /path/to/file.parquet
36
+ ```
37
+
38
+ ## Build and publish (uv)
39
+
40
+ ```bash
41
+ uv build
42
+ uv publish
43
+ ```
44
+
45
+ ## Publishing notes
46
+
47
+ CI publishing uses GitHub's Trusted Publisher OIDC. See `PUBLISHING.md` for the full release steps.
@@ -0,0 +1,8 @@
1
+ pq_peek/__init__.py,sha256=JYq5WlD6oEtrkztuq_nXw2yyh0f8TE_tT5Icqj6NzPw,53
2
+ pq_peek/__main__.py,sha256=RCZmmoCNOWC7rAfIDm_LaymsybXIzE6McYbUEEkf9P8,60
3
+ pq_peek/main.py,sha256=MVmxFoA49dGKLuoDz7iNrV5bOLtcXUwBOeoWGBq9lFA,2899
4
+ pq_peek/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ pq_peek-0.1.1.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
6
+ pq_peek-0.1.1.dist-info/entry_points.txt,sha256=1UxcnCZp2LTe0m4lJeiqct4aIEecydBoYd03rh0EfnY,46
7
+ pq_peek-0.1.1.dist-info/METADATA,sha256=_Nxm_rYOKGEg_mcJqppW_5yWvX6C-XU1d4uHS68Jz5M,980
8
+ pq_peek-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.28
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ pq-peek = pq_peek.main:app
3
+