parq-cli 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parq/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """
2
+ parq-cli: A powerful command-line tool for inspecting and analyzing Apache Parquet files.
3
+ """
4
+
5
+ __version__ = "0.1.0"
6
+ __author__ = "Jinfeng Sun"
7
+
8
+ # {{CHENGQI:
9
+ # Action: Created; Timestamp: 2025-10-14 16:12:00 +08:00; Reason: Package initialization file; Principle_Applied: KISS
10
+ # }}
parq/__main__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Entry point for running parq as a module.
3
+ Allows execution via: python -m parq
4
+ """
5
+
6
+ from parq.cli import app
7
+
8
+ if __name__ == "__main__":
9
+ app()
10
+
11
+ # {{CHENGQI:
12
+ # Action: Modified; Timestamp: 2025-10-14 18:07:04 +08:00;
13
+ # Reason: Entry point using app() with command-based design;
14
+ # Principle_Applied: KISS, Typer best practices
15
+ # }}
parq/cli.py ADDED
@@ -0,0 +1,130 @@
1
+ """
2
+ CLI application module.
3
+ Command-line interface for parq-cli tool.
4
+ """
5
+
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ import typer
10
+ from typing_extensions import Annotated
11
+
12
+ from parq.output import OutputFormatter
13
+ from parq.reader import ParquetReader
14
+
15
+ app = typer.Typer(
16
+ name="parq",
17
+ help="A powerful command-line tool for inspecting Apache Parquet files ๐Ÿš€",
18
+ add_completion=False,
19
+ no_args_is_help=False,
20
+ )
21
+
22
+ formatter = OutputFormatter()
23
+
24
+
25
+ @app.command()
26
+ def main(
27
+ file: Annotated[
28
+ Optional[Path],
29
+ typer.Argument(
30
+ help="Path to Parquet file",
31
+ ),
32
+ ] = None,
33
+ schema: Annotated[
34
+ bool, typer.Option("--schema", "-s", help="Display schema information")
35
+ ] = False,
36
+ head: Annotated[Optional[int], typer.Option("--head", help="Display first N rows")] = None,
37
+ tail: Annotated[Optional[int], typer.Option("--tail", help="Display last N rows")] = None,
38
+ count: Annotated[bool, typer.Option("--count", "-c", help="Display total row count")] = False,
39
+ version: Annotated[
40
+ bool, typer.Option("--version", "-v", help="Show version information")
41
+ ] = False,
42
+ ) -> None:
43
+ """
44
+ A powerful command-line tool for inspecting Apache Parquet files ๐Ÿš€
45
+
46
+ Examples:
47
+
48
+ # Show file metadata
49
+ parq data.parquet
50
+
51
+ # Show schema
52
+ parq data.parquet --schema
53
+
54
+ # Show first 10 rows
55
+ parq data.parquet --head 10
56
+
57
+ # Show last 5 rows
58
+ parq data.parquet --tail 5
59
+
60
+ # Show row count
61
+ parq data.parquet --count
62
+
63
+ # Show version
64
+ parq --version
65
+ """
66
+
67
+ # Handle version flag
68
+ if version:
69
+ from parq import __version__
70
+
71
+ typer.echo(f"parq-cli version {__version__}")
72
+ return
73
+
74
+ # File is required if not showing version
75
+ if file is None:
76
+ typer.echo("Error: Missing argument 'FILE'.")
77
+ typer.echo("Try 'parq --help' for help.")
78
+ raise typer.Exit(code=1)
79
+
80
+ try:
81
+ reader = ParquetReader(str(file))
82
+
83
+ # If no options specified, show metadata
84
+ if not any([schema, head is not None, tail is not None, count]):
85
+ metadata = reader.get_metadata_dict()
86
+ formatter.print_metadata(metadata)
87
+ return
88
+
89
+ # Show schema
90
+ if schema:
91
+ schema_info = reader.get_schema_info()
92
+ formatter.print_schema(schema_info)
93
+
94
+ # Show head
95
+ if head is not None:
96
+ table = reader.read_head(head)
97
+ formatter.print_table(table, f"First {head} Rows")
98
+
99
+ # Show tail
100
+ if tail is not None:
101
+ table = reader.read_tail(tail)
102
+ formatter.print_table(table, f"Last {tail} Rows")
103
+
104
+ # Show count
105
+ if count:
106
+ formatter.print_count(reader.num_rows)
107
+
108
+ except FileNotFoundError as e:
109
+ formatter.print_error(str(e))
110
+ raise typer.Exit(code=1)
111
+ except Exception as e:
112
+ formatter.print_error(f"Failed to read Parquet file: {e}")
113
+ raise typer.Exit(code=1)
114
+
115
+
116
+ if __name__ == "__main__":
117
+ app()
118
+
119
+
120
+ # {{CHENGQI:
121
+ # Action: Modified; Timestamp: 2025-10-14 18:07:04 +08:00;
122
+ # Reason: Fixed CLI options parsing by using @app.command() instead of @app.callback();
123
+ # Principle_Applied: KISS, Typer best practices - single command app should use @app.command()
124
+ # }}
125
+ # {{START MODIFICATIONS}}
126
+ # - Changed @app.callback(invoke_without_command=True) to @app.command()
127
+ # - Removed ctx parameter and subcommand checking logic (lines 67-69)
128
+ # - This fixes the issue where options like --schema were incorrectly parsed as subcommands
129
+ # - Now 'parq file.parquet --schema' works correctly as expected
130
+ # {{END MODIFICATIONS}}
parq/output.py ADDED
@@ -0,0 +1,176 @@
1
+ """
2
+ Output formatting module.
3
+ Handles pretty-printing of Parquet data and metadata.
4
+ """
5
+
6
+ from typing import Any, Dict, List
7
+
8
+ import pyarrow as pa
9
+ from rich import box
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.table import Table
13
+
14
+ console = Console()
15
+
16
+
17
+ class OutputFormatter:
18
+ """Formatter for displaying Parquet data and metadata."""
19
+
20
+ @staticmethod
21
+ def _format_file_size(size_bytes: int) -> str:
22
+ """
23
+ Format file size in human-readable format.
24
+
25
+ Args:
26
+ size_bytes: Size in bytes
27
+
28
+ Returns:
29
+ Formatted string like "1.23 MB"
30
+ """
31
+ if size_bytes < 1024:
32
+ return f"{size_bytes} B"
33
+ elif size_bytes < 1024 * 1024:
34
+ return f"{size_bytes / 1024:.2f} KB"
35
+ elif size_bytes < 1024 * 1024 * 1024:
36
+ return f"{size_bytes / (1024 * 1024):.2f} MB"
37
+ else:
38
+ return f"{size_bytes / (1024 * 1024 * 1024):.2f} GB"
39
+
40
+ @staticmethod
41
+ def print_metadata(metadata_dict: Dict[str, Any]) -> None:
42
+ """
43
+ Print file metadata in a formatted panel.
44
+
45
+ Args:
46
+ metadata_dict: Dictionary containing metadata
47
+ """
48
+ # Special handling for specific fields
49
+ content_lines = []
50
+ for key, value in metadata_dict.items():
51
+ if key == "num_columns":
52
+ content_lines.append(
53
+ f"[cyan]{key}:[/cyan] [yellow]{value}[/yellow] [dim](logical)[/dim]"
54
+ )
55
+ elif key == "num_physical_columns":
56
+ content_lines.append(
57
+ f"[cyan]{key}:[/cyan] [yellow]{value}[/yellow] [dim](storage)[/dim]"
58
+ )
59
+ elif key == "file_size":
60
+ # Format file size in human-readable format
61
+ formatted_size = OutputFormatter._format_file_size(value)
62
+ content_lines.append(f"[cyan]{key}:[/cyan] [yellow]{formatted_size}[/yellow]")
63
+ else:
64
+ content_lines.append(f"[cyan]{key}:[/cyan] [yellow]{value}[/yellow]")
65
+
66
+ content = "\n".join(content_lines)
67
+
68
+ panel = Panel(
69
+ content,
70
+ title="[bold green]๐Ÿ“Š Parquet File Metadata[/bold green]",
71
+ border_style="green",
72
+ box=box.ROUNDED,
73
+ )
74
+ console.print(panel)
75
+
76
+ @staticmethod
77
+ def print_schema(schema_info: List[Dict[str, Any]]) -> None:
78
+ """
79
+ Print schema information as a table.
80
+
81
+ Args:
82
+ schema_info: List of column information dictionaries
83
+ """
84
+ table = Table(
85
+ title="[bold blue]๐Ÿ“‹ Schema Information[/bold blue]",
86
+ box=box.ROUNDED,
87
+ show_header=True,
88
+ header_style="bold magenta",
89
+ )
90
+
91
+ table.add_column("Column Name", style="cyan", no_wrap=True)
92
+ table.add_column("Data Type", style="green")
93
+ table.add_column("Nullable", style="yellow")
94
+
95
+ for col in schema_info:
96
+ table.add_row(col["name"], col["type"], "โœ“" if col["nullable"] else "โœ—")
97
+
98
+ console.print(table)
99
+
100
+ @staticmethod
101
+ def print_table(arrow_table: pa.Table, title: str = "Data Preview") -> None:
102
+ """
103
+ Print PyArrow table as a Rich table.
104
+
105
+ Args:
106
+ arrow_table: PyArrow table to display
107
+ title: Title for the table
108
+ """
109
+ # Convert to pandas for easier display
110
+ df = arrow_table.to_pandas()
111
+
112
+ table = Table(
113
+ title=f"[bold blue]๐Ÿ“„ {title}[/bold blue]",
114
+ box=box.ROUNDED,
115
+ show_header=True,
116
+ header_style="bold magenta",
117
+ )
118
+
119
+ # Add columns
120
+ for col in df.columns:
121
+ table.add_column(str(col), style="cyan")
122
+
123
+ # Add rows
124
+ for _, row in df.iterrows():
125
+ table.add_row(*[str(val) for val in row])
126
+
127
+ console.print(table)
128
+
129
+ @staticmethod
130
+ def print_count(count: int) -> None:
131
+ """
132
+ Print row count.
133
+
134
+ Args:
135
+ count: Number of rows
136
+ """
137
+ panel = Panel(
138
+ f"[bold yellow]{count:,}[/bold yellow] rows",
139
+ title="[bold green]๐Ÿ“Š Total Rows[/bold green]",
140
+ border_style="green",
141
+ box=box.ROUNDED,
142
+ )
143
+ console.print(panel)
144
+
145
+ @staticmethod
146
+ def print_error(message: str) -> None:
147
+ """
148
+ Print error message.
149
+
150
+ Args:
151
+ message: Error message to display
152
+ """
153
+ console.print(f"[bold red]โŒ Error:[/bold red] {message}")
154
+
155
+ @staticmethod
156
+ def print_success(message: str) -> None:
157
+ """
158
+ Print success message.
159
+
160
+ Args:
161
+ message: Success message to display
162
+ """
163
+ console.print(f"[bold green]โœ“[/bold green] {message}")
164
+
165
+
166
+ # {{CHENGQI:
167
+ # Action: Modified; Timestamp: 2025-10-14 HH:MM:SS +08:00;
168
+ # Reason: Enhanced metadata display to show both logical and physical column counts;
169
+ # Principle_Applied: User-centric design - clear distinction between logical and physical columns
170
+ # }}
171
+ # {{START MODIFICATIONS}}
172
+ # - Enhanced print_metadata to distinguish logical vs physical columns
173
+ # - Added visual indicators: (logical) for num_columns, (storage) for num_physical_columns
174
+ # - Added informative note when physical columns differ from logical columns
175
+ # - Helps users understand nested structure impact on column count
176
+ # {{END MODIFICATIONS}}
parq/reader.py ADDED
@@ -0,0 +1,164 @@
1
+ """
2
+ Parquet file reader module.
3
+ Provides functionality to read and inspect Parquet files.
4
+ """
5
+
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ import pyarrow as pa
10
+ import pyarrow.parquet as pq
11
+
12
+
13
+ class ParquetReader:
14
+ """Parquet file reader with metadata inspection capabilities."""
15
+
16
+ def __init__(self, file_path: str):
17
+ """
18
+ Initialize ParquetReader with a file path.
19
+
20
+ Args:
21
+ file_path: Path to the Parquet file
22
+ """
23
+ self.file_path = Path(file_path)
24
+ if not self.file_path.exists():
25
+ raise FileNotFoundError(f"File not found: {file_path}")
26
+
27
+ self._parquet_file = pq.ParquetFile(self.file_path)
28
+
29
+ @property
30
+ def metadata(self) -> pq.FileMetaData:
31
+ """Get file metadata."""
32
+ return self._parquet_file.metadata
33
+
34
+ @property
35
+ def schema(self) -> pa.Schema:
36
+ """Get file schema."""
37
+ return self._parquet_file.schema_arrow
38
+
39
+ @property
40
+ def num_rows(self) -> int:
41
+ """Get total number of rows."""
42
+ return self.metadata.num_rows
43
+
44
+ @property
45
+ def num_columns(self) -> int:
46
+ """Get total number of columns (logical schema columns)."""
47
+ return len(self.schema)
48
+
49
+ @property
50
+ def num_physical_columns(self) -> int:
51
+ """Get total number of physical columns (from metadata)."""
52
+ return self.metadata.num_columns
53
+
54
+ @property
55
+ def num_row_groups(self) -> int:
56
+ """Get number of row groups."""
57
+ return self.metadata.num_row_groups
58
+
59
+ def get_metadata_dict(self) -> dict:
60
+ """
61
+ Get metadata as a dictionary.
62
+
63
+ Returns:
64
+ Dictionary containing file metadata
65
+ """
66
+ metadata_dict = {
67
+ "file_path": str(self.file_path),
68
+ "num_rows": self.num_rows,
69
+ "num_columns": self.num_columns,
70
+ }
71
+
72
+ # Add physical columns right after logical columns if different
73
+ if self.num_physical_columns != self.num_columns:
74
+ metadata_dict["num_physical_columns"] = self.num_physical_columns
75
+
76
+ # Add file size
77
+ file_size = self.file_path.stat().st_size
78
+ metadata_dict["file_size"] = file_size
79
+
80
+ # Add compression type (from first row group, first column)
81
+ if self.num_row_groups > 0:
82
+ compression = self.metadata.row_group(0).column(0).compression
83
+ metadata_dict["compression_types"] = compression
84
+
85
+ # Add remaining metadata
86
+ metadata_dict.update(
87
+ {
88
+ "num_row_groups": self.num_row_groups,
89
+ "format_version": self.metadata.format_version,
90
+ "serialized_size": self.metadata.serialized_size,
91
+ "created_by": self.metadata.created_by,
92
+ }
93
+ )
94
+
95
+ return metadata_dict
96
+
97
+ def get_schema_info(self) -> List[dict]:
98
+ """
99
+ Get schema information as a list of column details.
100
+
101
+ Returns:
102
+ List of dictionaries with column information
103
+ """
104
+ schema_info = []
105
+ for field in self.schema:
106
+ schema_info.append(
107
+ {
108
+ "name": field.name,
109
+ "type": str(field.type),
110
+ "nullable": field.nullable,
111
+ }
112
+ )
113
+ return schema_info
114
+
115
+ def read_head(self, n: int = 5) -> pa.Table:
116
+ """
117
+ Read first n rows.
118
+
119
+ Args:
120
+ n: Number of rows to read
121
+
122
+ Returns:
123
+ PyArrow table with first n rows
124
+ """
125
+ table = self._parquet_file.read()
126
+ return table.slice(0, min(n, self.num_rows))
127
+
128
+ def read_tail(self, n: int = 5) -> pa.Table:
129
+ """
130
+ Read last n rows.
131
+
132
+ Args:
133
+ n: Number of rows to read
134
+
135
+ Returns:
136
+ PyArrow table with last n rows
137
+ """
138
+ table = self._parquet_file.read()
139
+ start = max(0, self.num_rows - n)
140
+ return table.slice(start, n)
141
+
142
+ def read_columns(self, columns: Optional[List[str]] = None) -> pa.Table:
143
+ """
144
+ Read specific columns.
145
+
146
+ Args:
147
+ columns: List of column names to read. If None, read all columns.
148
+
149
+ Returns:
150
+ PyArrow table with selected columns
151
+ """
152
+ return self._parquet_file.read(columns=columns)
153
+
154
+
155
+ # {{CHENGQI:
156
+ # Action: Modified; Timestamp: 2025-10-14 HH:MM:SS +08:00;
157
+ # Reason: Fixed num_columns to use schema length instead of metadata for accurate logical column count;
158
+ # Principle_Applied: User-centric design - show logical columns that users actually see
159
+ # }}
160
+ # {{START MODIFICATIONS}}
161
+ # - Changed num_columns from metadata.num_columns to len(self.schema)
162
+ # - Reason: metadata.num_columns may include physical columns from nested structures
163
+ # - Schema length represents logical columns that users expect to see
164
+ # {{END MODIFICATIONS}}
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: parq-cli
3
+ Version: 0.0.1
4
+ Summary: A powerful command-line tool for inspecting and analyzing Apache Parquet files
5
+ Project-URL: Homepage, https://github.com/Tendo33/parq-cli
6
+ Project-URL: Repository, https://github.com/Tendo33/parq-cli
7
+ Project-URL: Issues, https://github.com/Tendo33/parq-cli/issues
8
+ Project-URL: Documentation, https://github.com/Tendo33/parq-cli#readme
9
+ Author-email: SimonSun <sjf19981112@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: analytics,apache-parquet,cli,data,data-tools,parquet
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Requires-Python: >=3.9
23
+ Requires-Dist: pandas>=2.0.0
24
+ Requires-Dist: pyarrow>=18.0.0
25
+ Requires-Dist: rich>=13.0.0
26
+ Requires-Dist: typer>=0.15.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: black>=24.0.0; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
30
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
31
+ Requires-Dist: ruff>=0.7.0; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # parq-cli
35
+
36
+ [![Python Version](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
37
+ [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
38
+
39
+ ไธ€ไธชๅผบๅคง็š„ Apache Parquet ๆ–‡ไปถๅ‘ฝไปค่กŒๅทฅๅ…ท ๐Ÿš€
40
+
41
+ ## โœจ ็‰นๆ€ง
42
+
43
+ - ๐Ÿ“Š **ๅ…ƒๆ•ฐๆฎๆŸฅ็œ‹**: ๅฟซ้€ŸๆŸฅ็œ‹ Parquet ๆ–‡ไปถ็š„ๅ…ƒๆ•ฐๆฎไฟกๆฏ
44
+ - ๐Ÿ“‹ **Schema ๅฑ•็คบ**: ็พŽ่ง‚ๅœฐๅฑ•็คบๆ–‡ไปถ็š„ๅˆ—็ป“ๆž„ๅ’Œๆ•ฐๆฎ็ฑปๅž‹
45
+ - ๐Ÿ‘€ **ๆ•ฐๆฎ้ข„่งˆ**: ๆ”ฏๆŒๆŸฅ็œ‹ๆ–‡ไปถ็š„ๅ‰ N ่กŒๆˆ–ๅŽ N ่กŒ
46
+ - ๐Ÿ”ข **่กŒๆ•ฐ็ปŸ่ฎก**: ๅฟซ้€Ÿ่Žทๅ–ๆ–‡ไปถ็š„ๆ€ป่กŒๆ•ฐ
47
+ - ๐ŸŽจ **็พŽ่ง‚่พ“ๅ‡บ**: ไฝฟ็”จ Rich ๅบ“ๆไพ›ๅฝฉ่‰ฒใ€ๆ ผๅผๅŒ–็š„็ปˆ็ซฏ่พ“ๅ‡บ
48
+
49
+ ## ๐Ÿ“ฆ ๅฎ‰่ฃ…
50
+
51
+ ### ไปŽๆบ็ ๅฎ‰่ฃ…
52
+
53
+ ```bash
54
+ git clone https://github.com/yourusername/parq-cli.git
55
+ cd parq-cli
56
+ pip install -e .
57
+ ```
58
+
59
+ ### ไฝฟ็”จ pip ๅฎ‰่ฃ…๏ผˆๅณๅฐ†ๆ”ฏๆŒ๏ผ‰
60
+
61
+ ```bash
62
+ pip install parq-cli
63
+ ```
64
+
65
+ ## ๐Ÿš€ ๅฟซ้€Ÿๅผ€ๅง‹
66
+
67
+ ### ๅŸบๆœฌ็”จๆณ•
68
+
69
+ ```bash
70
+ # ๆŸฅ็œ‹ๆ–‡ไปถๅ…ƒๆ•ฐๆฎ
71
+ parq data.parquet
72
+
73
+ # ๆ˜พ็คบ schema ไฟกๆฏ
74
+ parq data.parquet --schema
75
+
76
+ # ๆ˜พ็คบๅ‰ 10 ่กŒ
77
+ parq data.parquet --head 10
78
+
79
+ # ๆ˜พ็คบๅŽ 5 ่กŒ
80
+ parq data.parquet --tail 5
81
+
82
+ # ๆ˜พ็คบๆ€ป่กŒๆ•ฐ
83
+ parq data.parquet --count
84
+ ```
85
+
86
+ ### ็ป„ๅˆไฝฟ็”จ
87
+
88
+ ```bash
89
+ # ๅŒๆ—ถๆ˜พ็คบ schema ๅ’Œ่กŒๆ•ฐ
90
+ parq data.parquet --schema --count
91
+
92
+ # ๆ˜พ็คบๅ‰ 5 ่กŒๅ’Œ schema
93
+ parq data.parquet --head 5 --schema
94
+ ```
95
+
96
+ ## ๐Ÿ“– ๅ‘ฝไปคๅ‚่€ƒ
97
+
98
+ ### ไธปๅ‘ฝไปค
99
+
100
+ ```
101
+ parq [OPTIONS] FILE
102
+ ```
103
+
104
+ **ๅ‚ๆ•ฐ:**
105
+ - `FILE`: Parquet ๆ–‡ไปถ่ทฏๅพ„๏ผˆๅฟ…้œ€๏ผ‰
106
+
107
+ **้€‰้กน:**
108
+ - `--schema, -s`: ๆ˜พ็คบ schema ไฟกๆฏ
109
+ - `--head N`: ๆ˜พ็คบๅ‰ N ่กŒ
110
+ - `--tail N`: ๆ˜พ็คบๅŽ N ่กŒ
111
+ - `--count, -c`: ๆ˜พ็คบๆ€ป่กŒๆ•ฐ
112
+ - `--help`: ๆ˜พ็คบๅธฎๅŠฉไฟกๆฏ
113
+
114
+ ### ็‰ˆๆœฌไฟกๆฏ
115
+
116
+ ```bash
117
+ parq version
118
+ ```
119
+
120
+ ## ๐ŸŽจ ่พ“ๅ‡บ็คบไพ‹
121
+
122
+ ### ๅ…ƒๆ•ฐๆฎๅฑ•็คบ
123
+
124
+ ```
125
+ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๐Ÿ“Š Parquet File Metadata โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ
126
+ โ”‚ file_path: /path/to/data.parquet โ”‚
127
+ โ”‚ num_rows: 1000 โ”‚
128
+ โ”‚ num_columns: 5 โ”‚
129
+ โ”‚ num_row_groups: 1 โ”‚
130
+ โ”‚ format_version: 2.6 โ”‚
131
+ โ”‚ serialized_size: 2048 โ”‚
132
+ โ”‚ created_by: parquet-cpp-arrow version 18.0.0 โ”‚
133
+ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ
134
+ ```
135
+
136
+ ### Schema ๅฑ•็คบ
137
+
138
+ ```
139
+ ๐Ÿ“‹ Schema Information
140
+ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“
141
+ โ”ƒ Column Name โ”ƒ Data Type โ”ƒ Nullable โ”ƒ
142
+ โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ
143
+ โ”‚ id โ”‚ int64 โ”‚ โœ— โ”‚
144
+ โ”‚ name โ”‚ string โ”‚ โœ“ โ”‚
145
+ โ”‚ age โ”‚ int64 โ”‚ โœ“ โ”‚
146
+ โ”‚ city โ”‚ string โ”‚ โœ“ โ”‚
147
+ โ”‚ salary โ”‚ double โ”‚ โœ“ โ”‚
148
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
149
+ ```
150
+
151
+ ## ๐Ÿ› ๏ธ ๆŠ€ๆœฏๆ ˆ
152
+
153
+ - **[PyArrow](https://arrow.apache.org/docs/python/)**: ้ซ˜ๆ€ง่ƒฝ็š„ Parquet ่ฏปๅ–ๅผ•ๆ“Ž
154
+ - **[Typer](https://typer.tiangolo.com/)**: ็ŽฐไปฃๅŒ–็š„ CLI ๆก†ๆžถ
155
+ - **[Rich](https://rich.readthedocs.io/)**: ็พŽ่ง‚็š„็ปˆ็ซฏ่พ“ๅ‡บ
156
+
157
+ ## ๐Ÿงช ๅผ€ๅ‘
158
+
159
+ ### ๅฎ‰่ฃ…ๅผ€ๅ‘ไพ่ต–
160
+
161
+ ```bash
162
+ pip install -e ".[dev]"
163
+ ```
164
+
165
+ ### ่ฟ่กŒๆต‹่ฏ•
166
+
167
+ ```bash
168
+ pytest
169
+ ```
170
+
171
+ ### ่ฟ่กŒๆต‹่ฏ•๏ผˆๅธฆ่ฆ†็›–็އ๏ผ‰
172
+
173
+ ```bash
174
+ pytest --cov=parq --cov-report=html
175
+ ```
176
+
177
+ ### ไปฃ็ ๆ ผๅผๅŒ–
178
+
179
+ ```bash
180
+ # ไฝฟ็”จ Black
181
+ black parq tests
182
+
183
+ # ไฝฟ็”จ Ruff ๆฃ€ๆŸฅ
184
+ ruff check parq tests
185
+ ```
186
+
187
+ ## ๐Ÿ—บ๏ธ ่ทฏ็บฟๅ›พ
188
+
189
+ - [x] ๅŸบ็ก€ๅ…ƒๆ•ฐๆฎๆŸฅ็œ‹
190
+ - [x] Schema ๅฑ•็คบ
191
+ - [x] ๆ•ฐๆฎ้ข„่งˆ๏ผˆhead/tail๏ผ‰
192
+ - [x] ่กŒๆ•ฐ็ปŸ่ฎก
193
+ - [ ] SQL ๆŸฅ่ฏขๆ”ฏๆŒ
194
+ - [ ] ๆ•ฐๆฎ็ปŸ่ฎกๅˆ†ๆž
195
+ - [ ] ๆ ผๅผ่ฝฌๆข๏ผˆCSV, JSON, Excel๏ผ‰
196
+ - [ ] ๆ–‡ไปถๅฏนๆฏ”
197
+ - [ ] ไบ‘ๅญ˜ๅ‚จๆ”ฏๆŒ๏ผˆS3, GCS, Azure๏ผ‰
198
+
199
+ ## ๐Ÿค ่ดก็Œฎ
200
+
201
+ ๆฌข่ฟŽๆไบค Issue ๅ’Œ Pull Request๏ผ
202
+
203
+ 1. Fork ๆœฌไป“ๅบ“
204
+ 2. ๅˆ›ๅปบ็‰นๆ€งๅˆ†ๆ”ฏ (`git checkout -b feature/AmazingFeature`)
205
+ 3. ๆไบคๆ›ดๆ”น (`git commit -m 'Add some AmazingFeature'`)
206
+ 4. ๆŽจ้€ๅˆฐๅˆ†ๆ”ฏ (`git push origin feature/AmazingFeature`)
207
+ 5. ๅผ€ๅฏ Pull Request
208
+
209
+ ## ๐Ÿ“„ ่ฎธๅฏ่ฏ
210
+
211
+ ๆœฌ้กน็›ฎ้‡‡็”จ MIT ่ฎธๅฏ่ฏ - ่ฏฆ่ง [LICENSE](LICENSE) ๆ–‡ไปถ
212
+
213
+ ## ๐Ÿ™ ่‡ด่ฐข
214
+
215
+ - ็ตๆ„ŸๆฅๆบไบŽ [parquet-cli](https://github.com/chhantyal/parquet-cli)
216
+ - ๆ„Ÿ่ฐข Apache Arrow ๅ›ข้˜Ÿๆไพ›ๅผบๅคง็š„ Parquet ๆ”ฏๆŒ
217
+ - ๆ„Ÿ่ฐข Rich ๅบ“ไธบ็ปˆ็ซฏ่พ“ๅ‡บๅขžๆทป่‰ฒๅฝฉ
218
+
219
+ ## ๐Ÿ“ฎ ่”็ณปๆ–นๅผ
220
+
221
+ - ไฝœ่€…: Jinfeng Sun
222
+ - ้กน็›ฎๅœฐๅ€: https://github.com/Tendo33/parq-cli
223
+
224
+ ---
225
+
226
+ **โญ ๅฆ‚ๆžœ่ฟ™ไธช้กน็›ฎๅฏนไฝ ๆœ‰ๅธฎๅŠฉ๏ผŒ่ฏท็ป™ไธช Star๏ผ**
@@ -0,0 +1,10 @@
1
+ parq/__init__.py,sha256=gwi-2yJ5Vi64or1d2PUmaNh5kqK3_Zk0NqNTR5Ehyos,286
2
+ parq/__main__.py,sha256=WGh0rg8ZQI3jEKQDY07_v9B4ZHhvfyF0bLQi-oQnpxY,338
3
+ parq/cli.py,sha256=cNeWoO1vuDGhIQxFjB6EyWtVZngZKa8uxKSXIUw-j5w,3694
4
+ parq/output.py,sha256=irl0ckCBWy15hJqy4QxGBLrRAdIWT-6AxFxjAoJac24,5415
5
+ parq/reader.py,sha256=7S0D4RVDz0ElB5XugbfI4AIAI--MFsg8JqEUb4zkJVI,4859
6
+ parq_cli-0.0.1.dist-info/METADATA,sha256=wBWuZf6ft07TfOTlp5aFwJcMeWGYCbLpzKQtXzZ9qug,6315
7
+ parq_cli-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ parq_cli-0.0.1.dist-info/entry_points.txt,sha256=reTENlFOrUkuoCs5VGNvxD1FWvnpyIL4CP2GucIY0Hw,38
9
+ parq_cli-0.0.1.dist-info/licenses/LICENSE,sha256=RCmRAGCROPvNK8H6jyCxN2CH3ka_-lb2_m8LpwjJl3w,1068
10
+ parq_cli-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ parq = parq.cli:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Jinfeng Sun
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.