iparq 0.2.6__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ name: "Copilot Setup Steps"
2
+
3
+ # Allow testing of the setup steps from your repository's "Actions" tab.
4
+ on: workflow_dispatch
5
+
6
+ jobs:
7
+ # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot.
8
+ copilot-setup-steps:
9
+ runs-on: ubuntu-latest
10
+
11
+ # Set the permissions to the lowest permissions possible needed for your steps.
12
+ # Copilot will be given its own token for its operations.
13
+ permissions:
14
+ # If you want to clone the repository as part of your setup steps, for example to install dependencies, you'll need the `contents: read` permission. If you don't clone the repository in your setup steps, Copilot will do this for you automatically after the steps complete.
15
+ contents: read
16
+
17
+ # You can define any steps you want, and they will run before the agent starts.
18
+ # If you do not check out your code, Copilot will do this for you.
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Install UV (Python package manager)
24
+ run: |
25
+ curl -LsSf https://astral.sh/uv/install.sh | sh
26
+ export PATH="$HOME/.cargo/bin:$PATH"
27
+ echo "$HOME/.cargo/bin" >> $GITHUB_PATH
28
+ uv --version
29
+
30
+ # Note: GitHub MCP server is not publicly available as npm package
31
+ # Remove this step until the package is officially released
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iparq
3
- Version: 0.2.6
3
+ Version: 0.3.0
4
4
  Summary: Display version compression and bloom filter information about a parquet file
5
5
  Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
6
  License-File: LICENSE
@@ -88,10 +88,10 @@ Read more about bloom filters in this [great article](https://duckdb.org/2025/03
88
88
 
89
89
  ## Usage
90
90
 
91
- iparq now supports additional options:
91
+ iparq supports inspecting single files, multiple files, and glob patterns:
92
92
 
93
93
  ```sh
94
- iparq inspect <filename> [OPTIONS]
94
+ iparq inspect <filename(s)> [OPTIONS]
95
95
  ```
96
96
 
97
97
  Options include:
@@ -100,9 +100,12 @@ Options include:
100
100
  - `--metadata-only`, `-m`: Show only file metadata without column details
101
101
  - `--column`, `-c`: Filter results to show only a specific column
102
102
 
103
- Examples:
103
+ ### Single File Examples:
104
104
 
105
105
  ```sh
106
+ # Basic inspection
107
+ iparq inspect yourfile.parquet
108
+
106
109
  # Output in JSON format
107
110
  iparq inspect yourfile.parquet --format json
108
111
 
@@ -113,7 +116,23 @@ iparq inspect yourfile.parquet --metadata-only
113
116
  iparq inspect yourfile.parquet --column column_name
114
117
  ```
115
118
 
116
- Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
119
+ ### Multiple Files and Glob Patterns:
120
+
121
+ ```sh
122
+ # Inspect multiple specific files
123
+ iparq inspect file1.parquet file2.parquet file3.parquet
124
+
125
+ # Use glob patterns to inspect all parquet files
126
+ iparq inspect *.parquet
127
+
128
+ # Use specific patterns
129
+ iparq inspect yellow*.parquet data_*.parquet
130
+
131
+ # Combine patterns and specific files
132
+ iparq inspect important.parquet temp_*.parquet
133
+ ```
134
+
135
+ When inspecting multiple files, each file's results are displayed with a header showing the filename. The utility will read the metadata of each file and print the compression codecs used in the parquet files.
117
136
 
118
137
  ## Example ouput - Bloom Filters
119
138
 
@@ -70,10 +70,10 @@ Read more about bloom filters in this [great article](https://duckdb.org/2025/03
70
70
 
71
71
  ## Usage
72
72
 
73
- iparq now supports additional options:
73
+ iparq supports inspecting single files, multiple files, and glob patterns:
74
74
 
75
75
  ```sh
76
- iparq inspect <filename> [OPTIONS]
76
+ iparq inspect <filename(s)> [OPTIONS]
77
77
  ```
78
78
 
79
79
  Options include:
@@ -82,9 +82,12 @@ Options include:
82
82
  - `--metadata-only`, `-m`: Show only file metadata without column details
83
83
  - `--column`, `-c`: Filter results to show only a specific column
84
84
 
85
- Examples:
85
+ ### Single File Examples:
86
86
 
87
87
  ```sh
88
+ # Basic inspection
89
+ iparq inspect yourfile.parquet
90
+
88
91
  # Output in JSON format
89
92
  iparq inspect yourfile.parquet --format json
90
93
 
@@ -95,7 +98,23 @@ iparq inspect yourfile.parquet --metadata-only
95
98
  iparq inspect yourfile.parquet --column column_name
96
99
  ```
97
100
 
98
- Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
101
+ ### Multiple Files and Glob Patterns:
102
+
103
+ ```sh
104
+ # Inspect multiple specific files
105
+ iparq inspect file1.parquet file2.parquet file3.parquet
106
+
107
+ # Use glob patterns to inspect all parquet files
108
+ iparq inspect *.parquet
109
+
110
+ # Use specific patterns
111
+ iparq inspect yellow*.parquet data_*.parquet
112
+
113
+ # Combine patterns and specific files
114
+ iparq inspect important.parquet temp_*.parquet
115
+ ```
116
+
117
+ When inspecting multiple files, each file's results are displayed with a header showing the filename. The utility will read the metadata of each file and print the compression codecs used in the parquet files.
99
118
 
100
119
  ## Example ouput - Bloom Filters
101
120
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "iparq"
3
- version = "0.2.6"
3
+ version = "0.3.0"
4
4
  description = "Display version compression and bloom filter information about a parquet file"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0"
@@ -1,3 +1,4 @@
1
+ import glob
1
2
  import json
2
3
  from enum import Enum
3
4
  from typing import List, Optional
@@ -84,22 +85,16 @@ def read_parquet_metadata(filename: str):
84
85
  tuple: A tuple containing:
85
86
  - parquet_metadata (pyarrow.parquet.FileMetaData): The metadata of the Parquet file.
86
87
  - compression_codecs (set): A set of compression codecs used in the Parquet file.
88
+
89
+ Raises:
90
+ FileNotFoundError: If the file cannot be found or opened.
87
91
  """
88
- try:
89
- compression_codecs = set([])
90
- parquet_metadata = pq.ParquetFile(filename).metadata
92
+ compression_codecs = set([])
93
+ parquet_metadata = pq.ParquetFile(filename).metadata
91
94
 
92
- for i in range(parquet_metadata.num_row_groups):
93
- for j in range(parquet_metadata.num_columns):
94
- compression_codecs.add(
95
- parquet_metadata.row_group(i).column(j).compression
96
- )
97
-
98
- except FileNotFoundError:
99
- console.print(
100
- f"Cannot open: {filename}.", style="blink bold red underline on white"
101
- )
102
- exit(1)
95
+ for i in range(parquet_metadata.num_row_groups):
96
+ for j in range(parquet_metadata.num_columns):
97
+ compression_codecs.add(parquet_metadata.row_group(i).column(j).compression)
103
98
 
104
99
  return parquet_metadata, compression_codecs
105
100
 
@@ -260,27 +255,24 @@ def output_json(
260
255
  print(json.dumps(result, indent=2))
261
256
 
262
257
 
263
- @app.command(name="")
264
- @app.command(name="inspect")
265
- def inspect(
266
- filename: str = typer.Argument(..., help="Path to the Parquet file to inspect"),
267
- format: OutputFormat = typer.Option(
268
- OutputFormat.RICH, "--format", "-f", help="Output format (rich or json)"
269
- ),
270
- metadata_only: bool = typer.Option(
271
- False,
272
- "--metadata-only",
273
- "-m",
274
- help="Show only file metadata without column details",
275
- ),
276
- column_filter: Optional[str] = typer.Option(
277
- None, "--column", "-c", help="Filter results to show only specific column"
278
- ),
279
- ):
258
+ def inspect_single_file(
259
+ filename: str,
260
+ format: OutputFormat,
261
+ metadata_only: bool,
262
+ column_filter: Optional[str],
263
+ ) -> None:
280
264
  """
281
- Inspect a Parquet file and display its metadata, compression settings, and bloom filter information.
265
+ Inspect a single Parquet file and display its metadata, compression settings, and bloom filter information.
266
+
267
+ Raises:
268
+ Exception: If the file cannot be processed.
282
269
  """
283
- (parquet_metadata, compression) = read_parquet_metadata(filename)
270
+ try:
271
+ (parquet_metadata, compression) = read_parquet_metadata(filename)
272
+ except FileNotFoundError:
273
+ raise Exception(f"Cannot open: {filename}.")
274
+ except Exception as e:
275
+ raise Exception(f"Failed to read metadata: {e}")
284
276
 
285
277
  # Create metadata model
286
278
  meta_model = ParquetMetaModel(
@@ -322,5 +314,61 @@ def inspect(
322
314
  console.print(f"Compression codecs: {compression}")
323
315
 
324
316
 
317
+ @app.command(name="")
318
+ @app.command(name="inspect")
319
+ def inspect(
320
+ filenames: List[str] = typer.Argument(
321
+ ..., help="Path(s) or pattern(s) to Parquet files to inspect"
322
+ ),
323
+ format: OutputFormat = typer.Option(
324
+ OutputFormat.RICH, "--format", "-f", help="Output format (rich or json)"
325
+ ),
326
+ metadata_only: bool = typer.Option(
327
+ False,
328
+ "--metadata-only",
329
+ "-m",
330
+ help="Show only file metadata without column details",
331
+ ),
332
+ column_filter: Optional[str] = typer.Option(
333
+ None, "--column", "-c", help="Filter results to show only specific column"
334
+ ),
335
+ ):
336
+ """
337
+ Inspect Parquet files and display their metadata, compression settings, and bloom filter information.
338
+ """
339
+ # Expand glob patterns and collect all matching files
340
+ all_files = []
341
+ for pattern in filenames:
342
+ matches = glob.glob(pattern)
343
+ if matches:
344
+ all_files.extend(matches)
345
+ else:
346
+ # If no matches found, treat as literal filename (for better error reporting)
347
+ all_files.append(pattern)
348
+
349
+ # Remove duplicates while preserving order
350
+ seen = set()
351
+ unique_files = []
352
+ for file in all_files:
353
+ if file not in seen:
354
+ seen.add(file)
355
+ unique_files.append(file)
356
+
357
+ # Process each file
358
+ for i, filename in enumerate(unique_files):
359
+ # For multiple files, add a header to separate results
360
+ if len(unique_files) > 1:
361
+ if i > 0:
362
+ console.print() # Add blank line between files
363
+ console.print(f"[bold blue]File: {filename}[/bold blue]")
364
+ console.print("─" * (len(filename) + 6))
365
+
366
+ try:
367
+ inspect_single_file(filename, format, metadata_only, column_filter)
368
+ except Exception as e:
369
+ console.print(f"Error processing {filename}: {e}", style="red")
370
+ continue
371
+
372
+
325
373
  if __name__ == "__main__":
326
374
  app()
@@ -0,0 +1,164 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from typer.testing import CliRunner
5
+
6
+ from iparq.source import app
7
+
8
+ # Define path to test fixtures
9
+ FIXTURES_DIR = Path(__file__).parent
10
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
11
+
12
+
13
+ def test_parquet_info():
14
+ """Test that the CLI correctly displays parquet file information."""
15
+ runner = CliRunner()
16
+ result = runner.invoke(app, ["inspect", str(fixture_path)])
17
+
18
+ assert result.exit_code == 0
19
+
20
+ expected_output = """ParquetMetaModel(
21
+ created_by='parquet-cpp-arrow version 14.0.2',
22
+ num_columns=3,
23
+ num_rows=3,
24
+ num_row_groups=1,
25
+ format_version='2.6',
26
+ serialized_size=2223
27
+ )
28
+ Parquet Column Information
29
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
30
+ ┃ Row Group ┃ Column Name ┃ Index ┃ Compression ┃ Bloom Filter ┃
31
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
32
+ │ 0 │ one │ 0 │ SNAPPY │ ✅ │
33
+ │ 0 │ two │ 1 │ SNAPPY │ ✅ │
34
+ │ 0 │ three │ 2 │ SNAPPY │ ✅ │
35
+ └───────────┴─────────────┴───────┴─────────────┴──────────────┘
36
+ Compression codecs: {'SNAPPY'}"""
37
+
38
+ assert expected_output in result.stdout
39
+
40
+
41
+ def test_metadata_only_flag():
42
+ """Test that the metadata-only flag works correctly."""
43
+ runner = CliRunner()
44
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
45
+ result = runner.invoke(app, ["inspect", "--metadata-only", str(fixture_path)])
46
+
47
+ assert result.exit_code == 0
48
+ assert "ParquetMetaModel" in result.stdout
49
+ assert "Parquet Column Information" not in result.stdout
50
+
51
+
52
+ def test_column_filter():
53
+ """Test that filtering by column name works correctly."""
54
+ runner = CliRunner()
55
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
56
+ result = runner.invoke(app, ["inspect", "--column", "one", str(fixture_path)])
57
+
58
+ assert result.exit_code == 0
59
+ assert "one" in result.stdout
60
+ assert "two" not in result.stdout
61
+
62
+
63
+ def test_json_output():
64
+ """Test JSON output format."""
65
+ runner = CliRunner()
66
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
67
+ result = runner.invoke(app, ["inspect", "--format", "json", str(fixture_path)])
68
+
69
+ assert result.exit_code == 0
70
+
71
+ # Test that output is valid JSON
72
+ data = json.loads(result.stdout)
73
+
74
+ # Check JSON structure
75
+ assert "metadata" in data
76
+ assert "columns" in data
77
+ assert "compression_codecs" in data
78
+ assert data["metadata"]["num_columns"] == 3
79
+
80
+
81
+ def test_multiple_files():
82
+ """Test that multiple files can be inspected in a single command."""
83
+ runner = CliRunner()
84
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
85
+ # Use the same file twice to test deduplication behavior
86
+
87
+ result = runner.invoke(app, ["inspect", str(fixture_path), str(fixture_path)])
88
+
89
+ assert result.exit_code == 0
90
+ # Since both arguments are the same file, deduplication means only one file is processed
91
+ # and since there's only one unique file, no file header should be shown
92
+ assert (
93
+ "File:" not in result.stdout
94
+ ) # No header for single file (after deduplication)
95
+ assert result.stdout.count("ParquetMetaModel") == 1
96
+
97
+
98
+ def test_multiple_different_files():
99
+ """Test multiple different files by creating a temporary copy."""
100
+ import shutil
101
+ import tempfile
102
+
103
+ runner = CliRunner()
104
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
105
+
106
+ # Create a temporary file copy
107
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp_file:
108
+ shutil.copy2(fixture_path, tmp_file.name)
109
+ tmp_path = tmp_file.name
110
+
111
+ try:
112
+ result = runner.invoke(app, ["inspect", str(fixture_path), tmp_path])
113
+
114
+ assert result.exit_code == 0
115
+ # Should contain file headers for both files
116
+ assert f"File: {fixture_path}" in result.stdout
117
+ assert f"File: {tmp_path}" in result.stdout
118
+ # Should contain metadata for both files
119
+ assert result.stdout.count("ParquetMetaModel") == 2
120
+ assert result.stdout.count("Parquet Column Information") == 2
121
+ finally:
122
+ # Clean up temporary file
123
+ import os
124
+
125
+ os.unlink(tmp_path)
126
+
127
+
128
+ def test_glob_pattern():
129
+ """Test that glob patterns work correctly."""
130
+ runner = CliRunner()
131
+ # Test with a pattern that should match dummy files
132
+ result = runner.invoke(app, ["inspect", str(FIXTURES_DIR / "dummy*.parquet")])
133
+
134
+ assert result.exit_code == 0
135
+ # Should process at least one file
136
+ assert "ParquetMetaModel" in result.stdout
137
+
138
+
139
+ def test_single_file_no_header():
140
+ """Test that single files don't show file headers."""
141
+ runner = CliRunner()
142
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
143
+ result = runner.invoke(app, ["inspect", str(fixture_path)])
144
+
145
+ assert result.exit_code == 0
146
+ # Should not contain file header for single file
147
+ assert "File:" not in result.stdout
148
+ assert "ParquetMetaModel" in result.stdout
149
+
150
+
151
+ def test_error_handling_with_multiple_files():
152
+ """Test that errors in one file don't stop processing of other files."""
153
+ runner = CliRunner()
154
+ fixture_path = FIXTURES_DIR / "dummy.parquet"
155
+ nonexistent_path = FIXTURES_DIR / "nonexistent.parquet"
156
+
157
+ result = runner.invoke(app, ["inspect", str(fixture_path), str(nonexistent_path)])
158
+
159
+ assert result.exit_code == 0
160
+ # Should process the good file
161
+ assert "ParquetMetaModel" in result.stdout
162
+ # Should show error for bad file
163
+ assert "Error processing" in result.stdout
164
+ assert "nonexistent.parquet" in result.stdout
@@ -77,7 +77,7 @@ wheels = [
77
77
 
78
78
  [[package]]
79
79
  name = "iparq"
80
- version = "0.2.6"
80
+ version = "0.3.0"
81
81
  source = { editable = "." }
82
82
  dependencies = [
83
83
  { name = "pyarrow" },
@@ -406,7 +406,7 @@ wheels = [
406
406
 
407
407
  [[package]]
408
408
  name = "pytest"
409
- version = "8.3.5"
409
+ version = "8.4.0"
410
410
  source = { registry = "https://pypi.org/simple" }
411
411
  dependencies = [
412
412
  { name = "colorama", marker = "sys_platform == 'win32'" },
@@ -414,11 +414,12 @@ dependencies = [
414
414
  { name = "iniconfig" },
415
415
  { name = "packaging" },
416
416
  { name = "pluggy" },
417
+ { name = "pygments" },
417
418
  { name = "tomli", marker = "python_full_version < '3.11'" },
418
419
  ]
419
- sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
420
+ sdist = { url = "https://files.pythonhosted.org/packages/fb/aa/405082ce2749be5398045152251ac69c0f3578c7077efc53431303af97ce/pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6", size = 1515232, upload-time = "2025-06-02T17:36:30.03Z" }
420
421
  wheels = [
421
- { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" },
422
+ { url = "https://files.pythonhosted.org/packages/2f/de/afa024cbe022b1b318a3d224125aa24939e99b4ff6f22e0ba639a2eaee47/pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e", size = 363797, upload-time = "2025-06-02T17:36:27.859Z" },
422
423
  ]
423
424
 
424
425
  [[package]]
@@ -437,27 +438,27 @@ wheels = [
437
438
 
438
439
  [[package]]
439
440
  name = "ruff"
440
- version = "0.11.12"
441
+ version = "0.11.13"
441
442
  source = { registry = "https://pypi.org/simple" }
442
- sdist = { url = "https://files.pythonhosted.org/packages/15/0a/92416b159ec00cdf11e5882a9d80d29bf84bba3dbebc51c4898bfbca1da6/ruff-0.11.12.tar.gz", hash = "sha256:43cf7f69c7d7c7d7513b9d59c5d8cafd704e05944f978614aa9faff6ac202603", size = 4202289, upload-time = "2025-05-29T13:31:40.037Z" }
443
+ sdist = { url = "https://files.pythonhosted.org/packages/ed/da/9c6f995903b4d9474b39da91d2d626659af3ff1eeb43e9ae7c119349dba6/ruff-0.11.13.tar.gz", hash = "sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514", size = 4282054, upload-time = "2025-06-05T21:00:15.721Z" }
443
444
  wheels = [
444
- { url = "https://files.pythonhosted.org/packages/60/cc/53eb79f012d15e136d40a8e8fc519ba8f55a057f60b29c2df34efd47c6e3/ruff-0.11.12-py3-none-linux_armv6l.whl", hash = "sha256:c7680aa2f0d4c4f43353d1e72123955c7a2159b8646cd43402de6d4a3a25d7cc", size = 10285597, upload-time = "2025-05-29T13:30:57.539Z" },
445
- { url = "https://files.pythonhosted.org/packages/e7/d7/73386e9fb0232b015a23f62fea7503f96e29c29e6c45461d4a73bac74df9/ruff-0.11.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2cad64843da9f134565c20bcc430642de897b8ea02e2e79e6e02a76b8dcad7c3", size = 11053154, upload-time = "2025-05-29T13:31:00.865Z" },
446
- { url = "https://files.pythonhosted.org/packages/4e/eb/3eae144c5114e92deb65a0cb2c72326c8469e14991e9bc3ec0349da1331c/ruff-0.11.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9b6886b524a1c659cee1758140138455d3c029783d1b9e643f3624a5ee0cb0aa", size = 10403048, upload-time = "2025-05-29T13:31:03.413Z" },
447
- { url = "https://files.pythonhosted.org/packages/29/64/20c54b20e58b1058db6689e94731f2a22e9f7abab74e1a758dfba058b6ca/ruff-0.11.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc3a3690aad6e86c1958d3ec3c38c4594b6ecec75c1f531e84160bd827b2012", size = 10597062, upload-time = "2025-05-29T13:31:05.539Z" },
448
- { url = "https://files.pythonhosted.org/packages/29/3a/79fa6a9a39422a400564ca7233a689a151f1039110f0bbbabcb38106883a/ruff-0.11.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f97fdbc2549f456c65b3b0048560d44ddd540db1f27c778a938371424b49fe4a", size = 10155152, upload-time = "2025-05-29T13:31:07.986Z" },
449
- { url = "https://files.pythonhosted.org/packages/e5/a4/22c2c97b2340aa968af3a39bc38045e78d36abd4ed3fa2bde91c31e712e3/ruff-0.11.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74adf84960236961090e2d1348c1a67d940fd12e811a33fb3d107df61eef8fc7", size = 11723067, upload-time = "2025-05-29T13:31:10.57Z" },
450
- { url = "https://files.pythonhosted.org/packages/bc/cf/3e452fbd9597bcd8058856ecd42b22751749d07935793a1856d988154151/ruff-0.11.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b56697e5b8bcf1d61293ccfe63873aba08fdbcbbba839fc046ec5926bdb25a3a", size = 12460807, upload-time = "2025-05-29T13:31:12.88Z" },
451
- { url = "https://files.pythonhosted.org/packages/2f/ec/8f170381a15e1eb7d93cb4feef8d17334d5a1eb33fee273aee5d1f8241a3/ruff-0.11.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d47afa45e7b0eaf5e5969c6b39cbd108be83910b5c74626247e366fd7a36a13", size = 12063261, upload-time = "2025-05-29T13:31:15.236Z" },
452
- { url = "https://files.pythonhosted.org/packages/0d/bf/57208f8c0a8153a14652a85f4116c0002148e83770d7a41f2e90b52d2b4e/ruff-0.11.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bf9603fe1bf949de8b09a2da896f05c01ed7a187f4a386cdba6760e7f61be", size = 11329601, upload-time = "2025-05-29T13:31:18.68Z" },
453
- { url = "https://files.pythonhosted.org/packages/c3/56/edf942f7fdac5888094d9ffa303f12096f1a93eb46570bcf5f14c0c70880/ruff-0.11.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08033320e979df3b20dba567c62f69c45e01df708b0f9c83912d7abd3e0801cd", size = 11522186, upload-time = "2025-05-29T13:31:21.216Z" },
454
- { url = "https://files.pythonhosted.org/packages/ed/63/79ffef65246911ed7e2290aeece48739d9603b3a35f9529fec0fc6c26400/ruff-0.11.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:929b7706584f5bfd61d67d5070f399057d07c70585fa8c4491d78ada452d3bef", size = 10449032, upload-time = "2025-05-29T13:31:23.417Z" },
455
- { url = "https://files.pythonhosted.org/packages/88/19/8c9d4d8a1c2a3f5a1ea45a64b42593d50e28b8e038f1aafd65d6b43647f3/ruff-0.11.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7de4a73205dc5756b8e09ee3ed67c38312dce1aa28972b93150f5751199981b5", size = 10129370, upload-time = "2025-05-29T13:31:25.777Z" },
456
- { url = "https://files.pythonhosted.org/packages/bc/0f/2d15533eaa18f460530a857e1778900cd867ded67f16c85723569d54e410/ruff-0.11.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2635c2a90ac1b8ca9e93b70af59dfd1dd2026a40e2d6eebaa3efb0465dd9cf02", size = 11123529, upload-time = "2025-05-29T13:31:28.396Z" },
457
- { url = "https://files.pythonhosted.org/packages/4f/e2/4c2ac669534bdded835356813f48ea33cfb3a947dc47f270038364587088/ruff-0.11.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d05d6a78a89166f03f03a198ecc9d18779076ad0eec476819467acb401028c0c", size = 11577642, upload-time = "2025-05-29T13:31:30.647Z" },
458
- { url = "https://files.pythonhosted.org/packages/a7/9b/c9ddf7f924d5617a1c94a93ba595f4b24cb5bc50e98b94433ab3f7ad27e5/ruff-0.11.12-py3-none-win32.whl", hash = "sha256:f5a07f49767c4be4772d161bfc049c1f242db0cfe1bd976e0f0886732a4765d6", size = 10475511, upload-time = "2025-05-29T13:31:32.917Z" },
459
- { url = "https://files.pythonhosted.org/packages/fd/d6/74fb6d3470c1aada019ffff33c0f9210af746cca0a4de19a1f10ce54968a/ruff-0.11.12-py3-none-win_amd64.whl", hash = "sha256:5a4d9f8030d8c3a45df201d7fb3ed38d0219bccd7955268e863ee4a115fa0832", size = 11523573, upload-time = "2025-05-29T13:31:35.782Z" },
460
- { url = "https://files.pythonhosted.org/packages/44/42/d58086ec20f52d2b0140752ae54b355ea2be2ed46f914231136dd1effcc7/ruff-0.11.12-py3-none-win_arm64.whl", hash = "sha256:65194e37853158d368e333ba282217941029a28ea90913c67e558c611d04daa5", size = 10697770, upload-time = "2025-05-29T13:31:38.009Z" },
445
+ { url = "https://files.pythonhosted.org/packages/7d/ce/a11d381192966e0b4290842cc8d4fac7dc9214ddf627c11c1afff87da29b/ruff-0.11.13-py3-none-linux_armv6l.whl", hash = "sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46", size = 10292516, upload-time = "2025-06-05T20:59:32.944Z" },
446
+ { url = "https://files.pythonhosted.org/packages/78/db/87c3b59b0d4e753e40b6a3b4a2642dfd1dcaefbff121ddc64d6c8b47ba00/ruff-0.11.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48", size = 11106083, upload-time = "2025-06-05T20:59:37.03Z" },
447
+ { url = "https://files.pythonhosted.org/packages/77/79/d8cec175856ff810a19825d09ce700265f905c643c69f45d2b737e4a470a/ruff-0.11.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b", size = 10436024, upload-time = "2025-06-05T20:59:39.741Z" },
448
+ { url = "https://files.pythonhosted.org/packages/8b/5b/f6d94f2980fa1ee854b41568368a2e1252681b9238ab2895e133d303538f/ruff-0.11.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a", size = 10646324, upload-time = "2025-06-05T20:59:42.185Z" },
449
+ { url = "https://files.pythonhosted.org/packages/6c/9c/b4c2acf24ea4426016d511dfdc787f4ce1ceb835f3c5fbdbcb32b1c63bda/ruff-0.11.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc", size = 10174416, upload-time = "2025-06-05T20:59:44.319Z" },
450
+ { url = "https://files.pythonhosted.org/packages/f3/10/e2e62f77c65ede8cd032c2ca39c41f48feabedb6e282bfd6073d81bb671d/ruff-0.11.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629", size = 11724197, upload-time = "2025-06-05T20:59:46.935Z" },
451
+ { url = "https://files.pythonhosted.org/packages/bb/f0/466fe8469b85c561e081d798c45f8a1d21e0b4a5ef795a1d7f1a9a9ec182/ruff-0.11.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933", size = 12511615, upload-time = "2025-06-05T20:59:49.534Z" },
452
+ { url = "https://files.pythonhosted.org/packages/17/0e/cefe778b46dbd0cbcb03a839946c8f80a06f7968eb298aa4d1a4293f3448/ruff-0.11.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165", size = 12117080, upload-time = "2025-06-05T20:59:51.654Z" },
453
+ { url = "https://files.pythonhosted.org/packages/5d/2c/caaeda564cbe103bed145ea557cb86795b18651b0f6b3ff6a10e84e5a33f/ruff-0.11.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71", size = 11326315, upload-time = "2025-06-05T20:59:54.469Z" },
454
+ { url = "https://files.pythonhosted.org/packages/75/f0/782e7d681d660eda8c536962920c41309e6dd4ebcea9a2714ed5127d44bd/ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9", size = 11555640, upload-time = "2025-06-05T20:59:56.986Z" },
455
+ { url = "https://files.pythonhosted.org/packages/5d/d4/3d580c616316c7f07fb3c99dbecfe01fbaea7b6fd9a82b801e72e5de742a/ruff-0.11.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc", size = 10507364, upload-time = "2025-06-05T20:59:59.154Z" },
456
+ { url = "https://files.pythonhosted.org/packages/5a/dc/195e6f17d7b3ea6b12dc4f3e9de575db7983db187c378d44606e5d503319/ruff-0.11.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7", size = 10141462, upload-time = "2025-06-05T21:00:01.481Z" },
457
+ { url = "https://files.pythonhosted.org/packages/f4/8e/39a094af6967faa57ecdeacb91bedfb232474ff8c3d20f16a5514e6b3534/ruff-0.11.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432", size = 11121028, upload-time = "2025-06-05T21:00:04.06Z" },
458
+ { url = "https://files.pythonhosted.org/packages/5a/c0/b0b508193b0e8a1654ec683ebab18d309861f8bd64e3a2f9648b80d392cb/ruff-0.11.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492", size = 11602992, upload-time = "2025-06-05T21:00:06.249Z" },
459
+ { url = "https://files.pythonhosted.org/packages/7c/91/263e33ab93ab09ca06ce4f8f8547a858cc198072f873ebc9be7466790bae/ruff-0.11.13-py3-none-win32.whl", hash = "sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250", size = 10474944, upload-time = "2025-06-05T21:00:08.459Z" },
460
+ { url = "https://files.pythonhosted.org/packages/46/f4/7c27734ac2073aae8efb0119cae6931b6fb48017adf048fdf85c19337afc/ruff-0.11.13-py3-none-win_amd64.whl", hash = "sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3", size = 11548669, upload-time = "2025-06-05T21:00:11.147Z" },
461
+ { url = "https://files.pythonhosted.org/packages/ec/bf/b273dd11673fed8a6bd46032c0ea2a04b2ac9bfa9c628756a5856ba113b0/ruff-0.11.13-py3-none-win_arm64.whl", hash = "sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b", size = 10683928, upload-time = "2025-06-05T21:00:13.758Z" },
461
462
  ]
462
463
 
463
464
  [[package]]
@@ -1 +0,0 @@
1
- __version__ = "0.2.6"
@@ -1,78 +0,0 @@
1
- import json
2
- from pathlib import Path
3
-
4
- from typer.testing import CliRunner
5
-
6
- from iparq.source import app
7
-
8
- # Define path to test fixtures
9
- FIXTURES_DIR = Path(__file__).parent
10
- fixture_path = FIXTURES_DIR / "dummy.parquet"
11
-
12
-
13
- def test_parquet_info():
14
- """Test that the CLI correctly displays parquet file information."""
15
- runner = CliRunner()
16
- result = runner.invoke(app, ["inspect", str(fixture_path)])
17
-
18
- assert result.exit_code == 0
19
-
20
- expected_output = """ParquetMetaModel(
21
- created_by='parquet-cpp-arrow version 14.0.2',
22
- num_columns=3,
23
- num_rows=3,
24
- num_row_groups=1,
25
- format_version='2.6',
26
- serialized_size=2223
27
- )
28
- Parquet Column Information
29
- ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
30
- ┃ Row Group ┃ Column Name ┃ Index ┃ Compression ┃ Bloom Filter ┃
31
- ┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
32
- │ 0 │ one │ 0 │ SNAPPY │ ✅ │
33
- │ 0 │ two │ 1 │ SNAPPY │ ✅ │
34
- │ 0 │ three │ 2 │ SNAPPY │ ✅ │
35
- └───────────┴─────────────┴───────┴─────────────┴──────────────┘
36
- Compression codecs: {'SNAPPY'}"""
37
-
38
- assert expected_output in result.stdout
39
-
40
-
41
- def test_metadata_only_flag():
42
- """Test that the metadata-only flag works correctly."""
43
- runner = CliRunner()
44
- fixture_path = FIXTURES_DIR / "dummy.parquet"
45
- result = runner.invoke(app, ["inspect", "--metadata-only", str(fixture_path)])
46
-
47
- assert result.exit_code == 0
48
- assert "ParquetMetaModel" in result.stdout
49
- assert "Parquet Column Information" not in result.stdout
50
-
51
-
52
- def test_column_filter():
53
- """Test that filtering by column name works correctly."""
54
- runner = CliRunner()
55
- fixture_path = FIXTURES_DIR / "dummy.parquet"
56
- result = runner.invoke(app, ["inspect", "--column", "one", str(fixture_path)])
57
-
58
- assert result.exit_code == 0
59
- assert "one" in result.stdout
60
- assert "two" not in result.stdout
61
-
62
-
63
- def test_json_output():
64
- """Test JSON output format."""
65
- runner = CliRunner()
66
- fixture_path = FIXTURES_DIR / "dummy.parquet"
67
- result = runner.invoke(app, ["inspect", "--format", "json", str(fixture_path)])
68
-
69
- assert result.exit_code == 0
70
-
71
- # Test that output is valid JSON
72
- data = json.loads(result.stdout)
73
-
74
- # Check JSON structure
75
- assert "metadata" in data
76
- assert "columns" in data
77
- assert "compression_codecs" in data
78
- assert data["metadata"]["num_columns"] == 3
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes