iparq 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
iparq/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.6"
1
+ __version__ = "0.3.0"
iparq/source.py CHANGED
@@ -1,3 +1,4 @@
1
+ import glob
1
2
  import json
2
3
  from enum import Enum
3
4
  from typing import List, Optional
@@ -84,22 +85,16 @@ def read_parquet_metadata(filename: str):
84
85
  tuple: A tuple containing:
85
86
  - parquet_metadata (pyarrow.parquet.FileMetaData): The metadata of the Parquet file.
86
87
  - compression_codecs (set): A set of compression codecs used in the Parquet file.
88
+
89
+ Raises:
90
+ FileNotFoundError: If the file cannot be found or opened.
87
91
  """
88
- try:
89
- compression_codecs = set([])
90
- parquet_metadata = pq.ParquetFile(filename).metadata
92
+ compression_codecs = set([])
93
+ parquet_metadata = pq.ParquetFile(filename).metadata
91
94
 
92
- for i in range(parquet_metadata.num_row_groups):
93
- for j in range(parquet_metadata.num_columns):
94
- compression_codecs.add(
95
- parquet_metadata.row_group(i).column(j).compression
96
- )
97
-
98
- except FileNotFoundError:
99
- console.print(
100
- f"Cannot open: {filename}.", style="blink bold red underline on white"
101
- )
102
- exit(1)
95
+ for i in range(parquet_metadata.num_row_groups):
96
+ for j in range(parquet_metadata.num_columns):
97
+ compression_codecs.add(parquet_metadata.row_group(i).column(j).compression)
103
98
 
104
99
  return parquet_metadata, compression_codecs
105
100
 
@@ -260,27 +255,24 @@ def output_json(
260
255
  print(json.dumps(result, indent=2))
261
256
 
262
257
 
263
- @app.command(name="")
264
- @app.command(name="inspect")
265
- def inspect(
266
- filename: str = typer.Argument(..., help="Path to the Parquet file to inspect"),
267
- format: OutputFormat = typer.Option(
268
- OutputFormat.RICH, "--format", "-f", help="Output format (rich or json)"
269
- ),
270
- metadata_only: bool = typer.Option(
271
- False,
272
- "--metadata-only",
273
- "-m",
274
- help="Show only file metadata without column details",
275
- ),
276
- column_filter: Optional[str] = typer.Option(
277
- None, "--column", "-c", help="Filter results to show only specific column"
278
- ),
279
- ):
258
+ def inspect_single_file(
259
+ filename: str,
260
+ format: OutputFormat,
261
+ metadata_only: bool,
262
+ column_filter: Optional[str],
263
+ ) -> None:
280
264
  """
281
- Inspect a Parquet file and display its metadata, compression settings, and bloom filter information.
265
+ Inspect a single Parquet file and display its metadata, compression settings, and bloom filter information.
266
+
267
+ Raises:
268
+ Exception: If the file cannot be processed.
282
269
  """
283
- (parquet_metadata, compression) = read_parquet_metadata(filename)
270
+ try:
271
+ (parquet_metadata, compression) = read_parquet_metadata(filename)
272
+ except FileNotFoundError:
273
+ raise Exception(f"Cannot open: {filename}.")
274
+ except Exception as e:
275
+ raise Exception(f"Failed to read metadata: {e}")
284
276
 
285
277
  # Create metadata model
286
278
  meta_model = ParquetMetaModel(
@@ -322,5 +314,61 @@ def inspect(
322
314
  console.print(f"Compression codecs: {compression}")
323
315
 
324
316
 
317
+ @app.command(name="")
318
+ @app.command(name="inspect")
319
+ def inspect(
320
+ filenames: List[str] = typer.Argument(
321
+ ..., help="Path(s) or pattern(s) to Parquet files to inspect"
322
+ ),
323
+ format: OutputFormat = typer.Option(
324
+ OutputFormat.RICH, "--format", "-f", help="Output format (rich or json)"
325
+ ),
326
+ metadata_only: bool = typer.Option(
327
+ False,
328
+ "--metadata-only",
329
+ "-m",
330
+ help="Show only file metadata without column details",
331
+ ),
332
+ column_filter: Optional[str] = typer.Option(
333
+ None, "--column", "-c", help="Filter results to show only specific column"
334
+ ),
335
+ ):
336
+ """
337
+ Inspect Parquet files and display their metadata, compression settings, and bloom filter information.
338
+ """
339
+ # Expand glob patterns and collect all matching files
340
+ all_files = []
341
+ for pattern in filenames:
342
+ matches = glob.glob(pattern)
343
+ if matches:
344
+ all_files.extend(matches)
345
+ else:
346
+ # If no matches found, treat as literal filename (for better error reporting)
347
+ all_files.append(pattern)
348
+
349
+ # Remove duplicates while preserving order
350
+ seen = set()
351
+ unique_files = []
352
+ for file in all_files:
353
+ if file not in seen:
354
+ seen.add(file)
355
+ unique_files.append(file)
356
+
357
+ # Process each file
358
+ for i, filename in enumerate(unique_files):
359
+ # For multiple files, add a header to separate results
360
+ if len(unique_files) > 1:
361
+ if i > 0:
362
+ console.print() # Add blank line between files
363
+ console.print(f"[bold blue]File: {filename}[/bold blue]")
364
+ console.print("─" * (len(filename) + 6))
365
+
366
+ try:
367
+ inspect_single_file(filename, format, metadata_only, column_filter)
368
+ except Exception as e:
369
+ console.print(f"Error processing {filename}: {e}", style="red")
370
+ continue
371
+
372
+
325
373
  if __name__ == "__main__":
326
374
  app()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iparq
3
- Version: 0.2.6
3
+ Version: 0.3.0
4
4
  Summary: Display version compression and bloom filter information about a parquet file
5
5
  Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
6
  License-File: LICENSE
@@ -88,10 +88,10 @@ Read more about bloom filters in this [great article](https://duckdb.org/2025/03
88
88
 
89
89
  ## Usage
90
90
 
91
- iparq now supports additional options:
91
+ iparq supports inspecting single files, multiple files, and glob patterns:
92
92
 
93
93
  ```sh
94
- iparq inspect <filename> [OPTIONS]
94
+ iparq inspect <filename(s)> [OPTIONS]
95
95
  ```
96
96
 
97
97
  Options include:
@@ -100,9 +100,12 @@ Options include:
100
100
  - `--metadata-only`, `-m`: Show only file metadata without column details
101
101
  - `--column`, `-c`: Filter results to show only a specific column
102
102
 
103
- Examples:
103
+ ### Single File Examples:
104
104
 
105
105
  ```sh
106
+ # Basic inspection
107
+ iparq inspect yourfile.parquet
108
+
106
109
  # Output in JSON format
107
110
  iparq inspect yourfile.parquet --format json
108
111
 
@@ -113,7 +116,23 @@ iparq inspect yourfile.parquet --metadata-only
113
116
  iparq inspect yourfile.parquet --column column_name
114
117
  ```
115
118
 
116
- Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
119
+ ### Multiple Files and Glob Patterns:
120
+
121
+ ```sh
122
+ # Inspect multiple specific files
123
+ iparq inspect file1.parquet file2.parquet file3.parquet
124
+
125
+ # Use glob patterns to inspect all parquet files
126
+ iparq inspect *.parquet
127
+
128
+ # Use specific patterns
129
+ iparq inspect yellow*.parquet data_*.parquet
130
+
131
+ # Combine patterns and specific files
132
+ iparq inspect important.parquet temp_*.parquet
133
+ ```
134
+
135
+ When inspecting multiple files, each file's results are displayed with a header showing the filename. The utility will read the metadata of each file and print the compression codecs used in the parquet files.
117
136
 
118
137
  ## Example ouput - Bloom Filters
119
138
 
@@ -0,0 +1,8 @@
1
+ iparq/__init__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
2
+ iparq/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ iparq/source.py,sha256=wMg7c2lw1TSoCd5h3zvMNT_bAqWJMObRq5aPyXrVBik,12288
4
+ iparq-0.3.0.dist-info/METADATA,sha256=mWSvqv57D5PmtzqarAm-vAQoHFDPoiMkHS-nJcnoM_g,6018
5
+ iparq-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ iparq-0.3.0.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
7
+ iparq-0.3.0.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
8
+ iparq-0.3.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- iparq/__init__.py,sha256=Oz5HbwHMyE87nmwV80AZzpkJPf-wBg7eDuJr_BXZkhU,22
2
- iparq/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- iparq/source.py,sha256=huC6I0hqwyv4BZ5xjI6FMZs9KH60xVHEKbmX6X8hhiA,10721
4
- iparq-0.2.6.dist-info/METADATA,sha256=LtiLJlVCHuOlx0gOOTqJ97S8baPFhnd_lOmKG2a-94g,5496
5
- iparq-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- iparq-0.2.6.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
7
- iparq-0.2.6.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
8
- iparq-0.2.6.dist-info/RECORD,,
File without changes