iparq 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iparq/source.py +28 -3
- {iparq-0.1.4.dist-info → iparq-0.1.5.dist-info}/METADATA +68 -8
- iparq-0.1.5.dist-info/RECORD +7 -0
- iparq-0.1.4.dist-info/RECORD +0 -7
- {iparq-0.1.4.dist-info → iparq-0.1.5.dist-info}/WHEEL +0 -0
- {iparq-0.1.4.dist-info → iparq-0.1.5.dist-info}/entry_points.txt +0 -0
- {iparq-0.1.4.dist-info → iparq-0.1.5.dist-info}/licenses/LICENSE +0 -0
iparq/source.py
CHANGED
|
@@ -94,6 +94,32 @@ def print_parquet_metadata(parquet_metadata):
|
|
|
94
94
|
pass
|
|
95
95
|
|
|
96
96
|
|
|
97
|
+
def print_compression_types(parquet_metadata) -> None:
|
|
98
|
+
"""
|
|
99
|
+
Prints the compression type for each column in each row group of the Parquet file.
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
num_row_groups = parquet_metadata.num_row_groups
|
|
103
|
+
num_columns = parquet_metadata.num_columns
|
|
104
|
+
console.print("[bold underline]Column Compression Info:[/bold underline]")
|
|
105
|
+
for i in range(num_row_groups):
|
|
106
|
+
console.print(f"[bold]Row Group {i}:[/bold]")
|
|
107
|
+
for j in range(num_columns):
|
|
108
|
+
column_chunk = parquet_metadata.row_group(i).column(j)
|
|
109
|
+
compression = column_chunk.compression
|
|
110
|
+
column_name = parquet_metadata.schema.column(j).name
|
|
111
|
+
console.print(
|
|
112
|
+
f" Column '{column_name}' (Index {j}): [italic]{compression}[/italic]"
|
|
113
|
+
)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
console.print(
|
|
116
|
+
f"Error while printing compression types: {e}",
|
|
117
|
+
style="blink bold red underline on white",
|
|
118
|
+
)
|
|
119
|
+
finally:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
|
|
97
123
|
@app.command()
|
|
98
124
|
def main(filename: str):
|
|
99
125
|
"""
|
|
@@ -107,9 +133,8 @@ def main(filename: str):
|
|
|
107
133
|
"""
|
|
108
134
|
(parquet_metadata, compression) = read_parquet_metadata(filename)
|
|
109
135
|
|
|
110
|
-
print_parquet_metadata(
|
|
111
|
-
|
|
112
|
-
)
|
|
136
|
+
print_parquet_metadata(parquet_metadata)
|
|
137
|
+
print_compression_types(parquet_metadata)
|
|
113
138
|
print(f"Compression codecs: {compression}")
|
|
114
139
|
|
|
115
140
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: iparq
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Display version and compression information about a parquet file
|
|
5
5
|
Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -69,7 +69,7 @@ After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html),
|
|
|
69
69
|
brew install MiguelElGallo/tap/iparq
|
|
70
70
|
iparq —help
|
|
71
71
|
```
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
## Usage
|
|
74
74
|
|
|
75
75
|
Run
|
|
@@ -80,17 +80,77 @@ iparq <filename>
|
|
|
80
80
|
|
|
81
81
|
Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
|
|
82
82
|
|
|
83
|
-
|
|
84
83
|
## Example output
|
|
85
84
|
|
|
86
85
|
```log
|
|
87
86
|
ParquetMetaModel(
|
|
88
87
|
created_by='parquet-cpp-arrow version 14.0.2',
|
|
89
|
-
num_columns=
|
|
90
|
-
num_rows=
|
|
91
|
-
num_row_groups=
|
|
88
|
+
num_columns=19,
|
|
89
|
+
num_rows=2964624,
|
|
90
|
+
num_row_groups=3,
|
|
92
91
|
format_version='2.6',
|
|
93
|
-
serialized_size=
|
|
92
|
+
serialized_size=6357
|
|
94
93
|
)
|
|
95
|
-
Compression
|
|
94
|
+
Column Compression Info:
|
|
95
|
+
Row Group 0:
|
|
96
|
+
Column 'VendorID' (Index 0): ZSTD
|
|
97
|
+
Column 'tpep_pickup_datetime' (Index 1): ZSTD
|
|
98
|
+
Column 'tpep_dropoff_datetime' (Index 2): ZSTD
|
|
99
|
+
Column 'passenger_count' (Index 3): ZSTD
|
|
100
|
+
Column 'trip_distance' (Index 4): ZSTD
|
|
101
|
+
Column 'RatecodeID' (Index 5): ZSTD
|
|
102
|
+
Column 'store_and_fwd_flag' (Index 6): ZSTD
|
|
103
|
+
Column 'PULocationID' (Index 7): ZSTD
|
|
104
|
+
Column 'DOLocationID' (Index 8): ZSTD
|
|
105
|
+
Column 'payment_type' (Index 9): ZSTD
|
|
106
|
+
Column 'fare_amount' (Index 10): ZSTD
|
|
107
|
+
Column 'extra' (Index 11): ZSTD
|
|
108
|
+
Column 'mta_tax' (Index 12): ZSTD
|
|
109
|
+
Column 'tip_amount' (Index 13): ZSTD
|
|
110
|
+
Column 'tolls_amount' (Index 14): ZSTD
|
|
111
|
+
Column 'improvement_surcharge' (Index 15): ZSTD
|
|
112
|
+
Column 'total_amount' (Index 16): ZSTD
|
|
113
|
+
Column 'congestion_surcharge' (Index 17): ZSTD
|
|
114
|
+
Column 'Airport_fee' (Index 18): ZSTD
|
|
115
|
+
Row Group 1:
|
|
116
|
+
Column 'VendorID' (Index 0): ZSTD
|
|
117
|
+
Column 'tpep_pickup_datetime' (Index 1): ZSTD
|
|
118
|
+
Column 'tpep_dropoff_datetime' (Index 2): ZSTD
|
|
119
|
+
Column 'passenger_count' (Index 3): ZSTD
|
|
120
|
+
Column 'trip_distance' (Index 4): ZSTD
|
|
121
|
+
Column 'RatecodeID' (Index 5): ZSTD
|
|
122
|
+
Column 'store_and_fwd_flag' (Index 6): ZSTD
|
|
123
|
+
Column 'PULocationID' (Index 7): ZSTD
|
|
124
|
+
Column 'DOLocationID' (Index 8): ZSTD
|
|
125
|
+
Column 'payment_type' (Index 9): ZSTD
|
|
126
|
+
Column 'fare_amount' (Index 10): ZSTD
|
|
127
|
+
Column 'extra' (Index 11): ZSTD
|
|
128
|
+
Column 'mta_tax' (Index 12): ZSTD
|
|
129
|
+
Column 'tip_amount' (Index 13): ZSTD
|
|
130
|
+
Column 'tolls_amount' (Index 14): ZSTD
|
|
131
|
+
Column 'improvement_surcharge' (Index 15): ZSTD
|
|
132
|
+
Column 'total_amount' (Index 16): ZSTD
|
|
133
|
+
Column 'congestion_surcharge' (Index 17): ZSTD
|
|
134
|
+
Column 'Airport_fee' (Index 18): ZSTD
|
|
135
|
+
Row Group 2:
|
|
136
|
+
Column 'VendorID' (Index 0): ZSTD
|
|
137
|
+
Column 'tpep_pickup_datetime' (Index 1): ZSTD
|
|
138
|
+
Column 'tpep_dropoff_datetime' (Index 2): ZSTD
|
|
139
|
+
Column 'passenger_count' (Index 3): ZSTD
|
|
140
|
+
Column 'trip_distance' (Index 4): ZSTD
|
|
141
|
+
Column 'RatecodeID' (Index 5): ZSTD
|
|
142
|
+
Column 'store_and_fwd_flag' (Index 6): ZSTD
|
|
143
|
+
Column 'PULocationID' (Index 7): ZSTD
|
|
144
|
+
Column 'DOLocationID' (Index 8): ZSTD
|
|
145
|
+
Column 'payment_type' (Index 9): ZSTD
|
|
146
|
+
Column 'fare_amount' (Index 10): ZSTD
|
|
147
|
+
Column 'extra' (Index 11): ZSTD
|
|
148
|
+
Column 'mta_tax' (Index 12): ZSTD
|
|
149
|
+
Column 'tip_amount' (Index 13): ZSTD
|
|
150
|
+
Column 'tolls_amount' (Index 14): ZSTD
|
|
151
|
+
Column 'improvement_surcharge' (Index 15): ZSTD
|
|
152
|
+
Column 'total_amount' (Index 16): ZSTD
|
|
153
|
+
Column 'congestion_surcharge' (Index 17): ZSTD
|
|
154
|
+
Column 'Airport_fee' (Index 18): ZSTD
|
|
155
|
+
Compression codecs: {'ZSTD'}
|
|
96
156
|
```
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
|
|
2
|
+
iparq/source.py,sha256=7ocTpA7j5C-oSyLkMPhDifpH3cPhqyK3LBu0CjjG83s,4851
|
|
3
|
+
iparq-0.1.5.dist-info/METADATA,sha256=7kLNc40ROyYot5N37GXjSErzpp5WPDWa9_Y2BQpmr6o,5387
|
|
4
|
+
iparq-0.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
5
|
+
iparq-0.1.5.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
|
|
6
|
+
iparq-0.1.5.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
|
|
7
|
+
iparq-0.1.5.dist-info/RECORD,,
|
iparq-0.1.4.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
|
|
2
|
-
iparq/source.py,sha256=Jo_q4vo39MyJHF1GAqxW6DAJ47pqP5VNYJ2xvlLqUdk,3784
|
|
3
|
-
iparq-0.1.4.dist-info/METADATA,sha256=aU7lRS8-sSIak88j-31FTh9mz4XhmbbHUGurjinbD9k,2950
|
|
4
|
-
iparq-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
5
|
-
iparq-0.1.4.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
|
|
6
|
-
iparq-0.1.4.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
|
|
7
|
-
iparq-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|