iparq 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
iparq/source.py CHANGED
@@ -2,8 +2,10 @@ import pyarrow.parquet as pq
2
2
  import typer
3
3
  from pydantic import BaseModel
4
4
  from rich import print
5
+ from rich.console import Console
5
6
 
6
7
  app = typer.Typer()
8
+ console = Console()
7
9
 
8
10
 
9
11
  class ParquetMetaModel(BaseModel):
@@ -39,12 +41,21 @@ def read_parquet_metadata(filename: str):
39
41
  - parquet_metadata (pyarrow.parquet.FileMetaData): The metadata of the Parquet file.
40
42
  - compression_codecs (set): A set of compression codecs used in the Parquet file.
41
43
  """
42
- compression_codecs = set([])
43
- parquet_metadata = pq.ParquetFile(filename).metadata
44
-
45
- for i in range(parquet_metadata.num_row_groups):
46
- for j in range(parquet_metadata.num_columns):
47
- compression_codecs.add(parquet_metadata.row_group(i).column(j).compression)
44
+ try:
45
+ compression_codecs = set([])
46
+ parquet_metadata = pq.ParquetFile(filename).metadata
47
+
48
+ for i in range(parquet_metadata.num_row_groups):
49
+ for j in range(parquet_metadata.num_columns):
50
+ compression_codecs.add(
51
+ parquet_metadata.row_group(i).column(j).compression
52
+ )
53
+
54
+ except FileNotFoundError:
55
+ console.print(
56
+ f"Cannot open: {filename}.", style="blink bold red underline on white"
57
+ )
58
+ exit(1)
48
59
 
49
60
  return parquet_metadata, compression_codecs
50
61
 
@@ -75,10 +86,10 @@ def print_parquet_metadata(parquet_metadata):
75
86
  format_version=str(parquet_metadata.format_version),
76
87
  serialized_size=parquet_metadata.serialized_size,
77
88
  )
78
- print(meta)
89
+ console.print(meta)
79
90
 
80
91
  except AttributeError as e:
81
- print(f"Error: {e}")
92
+ console.print(f"Error: {e}", style="blink bold red underline on white")
82
93
  finally:
83
94
  pass
84
95
 
@@ -92,7 +103,7 @@ def main(filename: str):
92
103
  filename (str): The path to the Parquet file.
93
104
 
94
105
  Returns:
95
- None
106
+ Metadata of the Parquet file and the compression codecs used.
96
107
  """
97
108
  (parquet_metadata, compression) = read_parquet_metadata(filename)
98
109
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iparq
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Display version and compression information about a parquet file
5
5
  Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
6
  License-File: LICENSE
@@ -8,11 +8,23 @@ Requires-Python: >=3.9
8
8
  Requires-Dist: pyarrow>=19.0.0
9
9
  Requires-Dist: pydantic>=2.10.6
10
10
  Requires-Dist: typer>=0.15.1
11
+ Provides-Extra: checks
12
+ Requires-Dist: mypy>=1.14.1; extra == 'checks'
13
+ Requires-Dist: ruff>=0.9.3; extra == 'checks'
14
+ Provides-Extra: test
15
+ Requires-Dist: pytest>=7.0; extra == 'test'
11
16
  Description-Content-Type: text/markdown
12
17
 
13
18
  # iparq
14
19
 
15
- After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there’s no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create ipq (Information Parquet). My goal with ipq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
20
+ [![Python package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml)
21
+
22
+ [![Dependabot Updates](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates)
23
+
24
+ [![Upload Python Package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
25
+
26
+ ![alt text](media/iparq.png)
27
+ After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there’s no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
16
28
 
17
29
  ## Installation
18
30
 
@@ -48,15 +60,27 @@ After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html),
48
60
  iparq --help
49
61
  ```
50
62
 
51
- ## Usage
63
+ ### Using Homebrew in a MAC
64
+
65
+ 1) Run the following:
52
66
 
53
- Run
54
67
  ```sh
55
- iparq <filename>
68
+ brew tap MiguelElGallo/tap https://github.com/MiguelElGallo//homebrew-iparq.git
69
+ brew install MiguelElGallo/tap/iparq
70
+ iparq —help
56
71
  ```
72
+
73
+ ## Usage
74
+
75
+ Run
76
+
77
+ ```sh
78
+ iparq <filename>
79
+ ```
57
80
 
58
81
  Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
59
82
 
83
+
60
84
  ## Example output
61
85
 
62
86
  ```log
@@ -0,0 +1,7 @@
1
+ iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
2
+ iparq/source.py,sha256=Jo_q4vo39MyJHF1GAqxW6DAJ47pqP5VNYJ2xvlLqUdk,3784
3
+ iparq-0.1.4.dist-info/METADATA,sha256=aU7lRS8-sSIak88j-31FTh9mz4XhmbbHUGurjinbD9k,2950
4
+ iparq-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
+ iparq-0.1.4.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
6
+ iparq-0.1.4.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
7
+ iparq-0.1.4.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
2
- iparq/source.py,sha256=I0K9HQ294PGhsBIS-op_ZNzKvG3J0rkrS5ESQAVPibM,3385
3
- iparq-0.1.2.dist-info/METADATA,sha256=yoUrBXJK_HxyBUJO_nDjRhxkfoAkzIYfYhDYv6xBjkA,1940
4
- iparq-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- iparq-0.1.2.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
6
- iparq-0.1.2.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
7
- iparq-0.1.2.dist-info/RECORD,,
File without changes