pq-peek 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pq_peek/__init__.py +2 -0
- pq_peek/__main__.py +4 -0
- pq_peek/main.py +98 -0
- pq_peek/py.typed +0 -0
- pq_peek-0.1.1.dist-info/METADATA +47 -0
- pq_peek-0.1.1.dist-info/RECORD +8 -0
- pq_peek-0.1.1.dist-info/WHEEL +4 -0
- pq_peek-0.1.1.dist-info/entry_points.txt +3 -0
pq_peek/__init__.py
ADDED
pq_peek/__main__.py
ADDED
pq_peek/main.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
import typer
|
|
5
|
+
from rich import print as rprint
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
app = typer.Typer(help="A fast CLI tool to inspect Parquet files")
|
|
10
|
+
console = Console()
|
|
11
|
+
|
|
12
|
+
def get_scan(file_path: Path) -> pl.LazyFrame:
|
|
13
|
+
verify_path(file_path)
|
|
14
|
+
return pl.scan_parquet(file_path)
|
|
15
|
+
|
|
16
|
+
def verify_path(file_path: Path):
|
|
17
|
+
"""
|
|
18
|
+
Verifies that the file exists and checks the correct ending
|
|
19
|
+
"""
|
|
20
|
+
if not file_path.exists():
|
|
21
|
+
rprint(f"[bold red]Error:[/bold red] File was not found: '{file_path}'")
|
|
22
|
+
raise typer.Exit(code=1)
|
|
23
|
+
if file_path.suffix != ".parquet":
|
|
24
|
+
rprint("[bold yellow]Warning:[/bold yellow] File does not end with '.parquet'")
|
|
25
|
+
|
|
26
|
+
@app.command()
|
|
27
|
+
def schema(file_path: Path):
|
|
28
|
+
"""
|
|
29
|
+
Displays the schema (column names, types) of the parquet file
|
|
30
|
+
Uses Lazy Loading (scan_parquet) to save memory
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
file = get_scan(file_path)
|
|
34
|
+
schema = file.collect_schema()
|
|
35
|
+
|
|
36
|
+
table = Table(title=f"Schema: {file_path.name}")
|
|
37
|
+
table.add_column("Column name", style="cyan", no_wrap=True)
|
|
38
|
+
table.add_column("Type", style="magenta")
|
|
39
|
+
|
|
40
|
+
for name, dtype in schema.items():
|
|
41
|
+
table.add_row(name, str(dtype))
|
|
42
|
+
|
|
43
|
+
console.print(table)
|
|
44
|
+
rprint(f"\n[green]Amount of columns:[/green] {len(schema)}")
|
|
45
|
+
except Exception as failure:
|
|
46
|
+
rprint(f"[bold red]Failure while reading the parquet file:[/bold red] {failure}")
|
|
47
|
+
raise typer.Exit(code=1)
|
|
48
|
+
|
|
49
|
+
@app.command()
|
|
50
|
+
def head(file_path: Path, n: int = typer.Option(5, help="Amount of rows")):
|
|
51
|
+
"""
|
|
52
|
+
Displays the first n rows of the parquet file
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
file = get_scan(file_path)
|
|
56
|
+
df = file.limit(n).collect()
|
|
57
|
+
|
|
58
|
+
if df.is_empty():
|
|
59
|
+
rprint("[yellow]File is empty.[/yellow]")
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
table = Table(title=f"Preview {file_path.name} ({n} rows)")
|
|
63
|
+
|
|
64
|
+
for column in df.columns:
|
|
65
|
+
table.add_column(column, overflow="fold")
|
|
66
|
+
|
|
67
|
+
for row in df.iter_rows():
|
|
68
|
+
str_row = [str(x) for x in row]
|
|
69
|
+
table.add_row(*str_row)
|
|
70
|
+
|
|
71
|
+
console.print(table)
|
|
72
|
+
except Exception as failure:
|
|
73
|
+
rprint(f"[bold red]Failure while reading the parquet file:[/bold red] {failure}")
|
|
74
|
+
raise typer.Exit(code=1)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def stats(file_path: Path):
|
|
80
|
+
"""
|
|
81
|
+
Displays stats of the parquet file (min, max, nulls)
|
|
82
|
+
Uses Polars Query Engine for parallel computation
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
with console.status("[bold green]Calculating stats...[/bold green]"):
|
|
86
|
+
file = get_scan(file_path)
|
|
87
|
+
|
|
88
|
+
stats_df = file.describe()
|
|
89
|
+
|
|
90
|
+
console.print(stats_df)
|
|
91
|
+
|
|
92
|
+
except Exception as failure:
|
|
93
|
+
rprint(f"[bold red]Failure while calculating stats:[/bold red] {failure}")
|
|
94
|
+
raise typer.Exit(code=1)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == "__main__":
|
|
98
|
+
app()
|
pq_peek/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pq-peek
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A blazing fast CLI tool to inspect Parquet files using Polars.
|
|
5
|
+
Author: Lars Lewerenz
|
|
6
|
+
Author-email: Lars Lewerenz <git.lars.lewerenz@gmail.com>
|
|
7
|
+
Requires-Dist: polars>=1.37.1
|
|
8
|
+
Requires-Dist: rich>=14.3.1
|
|
9
|
+
Requires-Dist: typer>=0.21.1
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
pq-peek 🦆
|
|
14
|
+
|
|
15
|
+
A blazing fast, memory-efficient CLI tool to inspect large Parquet files directly in the terminal.
|
|
16
|
+
Built with *Polars*, *Typer*, and *Rich*. Managed via *uv*.
|
|
17
|
+
|
|
18
|
+
## Install (uv)
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv pip install pq-peek
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## CLI usage
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pq-peek schema /path/to/file.parquet
|
|
28
|
+
pq-peek head /path/to/file.parquet --n 5
|
|
29
|
+
pq-peek stats /path/to/file.parquet
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Module usage
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python -m pq_peek schema /path/to/file.parquet
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Build and publish (uv)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv build
|
|
42
|
+
uv publish
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Publishing notes
|
|
46
|
+
|
|
47
|
+
CI publishing uses GitHub's Trusted Publisher OIDC. See `PUBLISHING.md` for the full release steps.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pq_peek/__init__.py,sha256=JYq5WlD6oEtrkztuq_nXw2yyh0f8TE_tT5Icqj6NzPw,53
|
|
2
|
+
pq_peek/__main__.py,sha256=RCZmmoCNOWC7rAfIDm_LaymsybXIzE6McYbUEEkf9P8,60
|
|
3
|
+
pq_peek/main.py,sha256=MVmxFoA49dGKLuoDz7iNrV5bOLtcXUwBOeoWGBq9lFA,2899
|
|
4
|
+
pq_peek/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
pq_peek-0.1.1.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
|
|
6
|
+
pq_peek-0.1.1.dist-info/entry_points.txt,sha256=1UxcnCZp2LTe0m4lJeiqct4aIEecydBoYd03rh0EfnY,46
|
|
7
|
+
pq_peek-0.1.1.dist-info/METADATA,sha256=_Nxm_rYOKGEg_mcJqppW_5yWvX6C-XU1d4uHS68Jz5M,980
|
|
8
|
+
pq_peek-0.1.1.dist-info/RECORD,,
|