iparq 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/python-package.yml +12 -2
  2. iparq-0.4.1/.github/workflows/test.yml +37 -0
  3. {iparq-0.4.0 → iparq-0.4.1}/PKG-INFO +19 -22
  4. {iparq-0.4.0 → iparq-0.4.1}/README.md +17 -21
  5. {iparq-0.4.0 → iparq-0.4.1}/pyproject.toml +3 -1
  6. iparq-0.4.1/src/iparq/__init__.py +1 -0
  7. iparq-0.4.1/uv.lock +923 -0
  8. iparq-0.4.0/src/iparq/__init__.py +0 -1
  9. iparq-0.4.0/uv.lock +0 -568
  10. {iparq-0.4.0 → iparq-0.4.1}/.github/FUNDING.yml +0 -0
  11. {iparq-0.4.0 → iparq-0.4.1}/.github/copilot-instructions.md +0 -0
  12. {iparq-0.4.0 → iparq-0.4.1}/.github/dependabot.yml +0 -0
  13. {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/copilot-setup-steps.yml +0 -0
  14. {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/merge.yml +0 -0
  15. {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/python-publish.yml +0 -0
  16. {iparq-0.4.0 → iparq-0.4.1}/.gitignore +0 -0
  17. {iparq-0.4.0 → iparq-0.4.1}/.python-version +0 -0
  18. {iparq-0.4.0 → iparq-0.4.1}/.vscode/launch.json +0 -0
  19. {iparq-0.4.0 → iparq-0.4.1}/.vscode/settings.json +0 -0
  20. {iparq-0.4.0 → iparq-0.4.1}/CONTRIBUTING.md +0 -0
  21. {iparq-0.4.0 → iparq-0.4.1}/LICENSE +0 -0
  22. {iparq-0.4.0 → iparq-0.4.1}/dummy.parquet +0 -0
  23. {iparq-0.4.0 → iparq-0.4.1}/media/iparq.png +0 -0
  24. {iparq-0.4.0 → iparq-0.4.1}/src/iparq/py.typed +0 -0
  25. {iparq-0.4.0 → iparq-0.4.1}/src/iparq/source.py +0 -0
  26. {iparq-0.4.0 → iparq-0.4.1}/tests/conftest.py +0 -0
  27. {iparq-0.4.0 → iparq-0.4.1}/tests/dummy.parquet +0 -0
  28. {iparq-0.4.0 → iparq-0.4.1}/tests/test_cli.py +0 -0
@@ -45,6 +45,16 @@ jobs:
45
45
  uv run mypy . --config-file=../../pyproject.toml
46
46
  - name: Check formatting with black
47
47
  run: uvx black . --check --verbose
48
- - name: Run Python tests
48
+ - name: Run Python tests with coverage
49
49
  if: runner.os != 'Windows'
50
- run: uv run pytest -vv
50
+ run: uv run pytest -vv --cov=src/iparq --cov-report=xml --cov-report=term-missing
51
+
52
+ - name: Upload coverage to Codecov
53
+ if: runner.os != 'Windows'
54
+ uses: codecov/codecov-action@v5
55
+ with:
56
+ files: ./coverage.xml
57
+ fail_ci_if_error: false
58
+ verbose: true
59
+ env:
60
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
@@ -0,0 +1,37 @@
1
+ name: Run Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ test:
12
+ permissions:
13
+ contents: read
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest]
19
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
20
+
21
+ steps:
22
+ - name: Checkout code
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install uv
31
+ uses: astral-sh/setup-uv@v5
32
+
33
+ - name: Install dependencies
34
+ run: uv sync --all-extras
35
+
36
+ - name: Run tests
37
+ run: uv run pytest -vv
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iparq
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Display version compression and bloom filter information about a parquet file
5
5
  Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
6
  License-File: LICENSE
@@ -13,6 +13,7 @@ Provides-Extra: checks
13
13
  Requires-Dist: mypy>=1.14.1; extra == 'checks'
14
14
  Requires-Dist: ruff>=0.9.3; extra == 'checks'
15
15
  Provides-Extra: test
16
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
16
17
  Requires-Dist: pytest>=7.0; extra == 'test'
17
18
  Description-Content-Type: text/markdown
18
19
 
@@ -24,6 +25,8 @@ Description-Content-Type: text/markdown
24
25
 
25
26
  [![Upload Python Package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
26
27
 
28
+ [![codecov](https://codecov.io/gh/MiguelElGallo/iparq/branch/main/graph/badge.svg)](https://codecov.io/gh/MiguelElGallo/iparq)
29
+
27
30
  ![alt text](media/iparq.png)
28
31
  After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there's no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
29
32
 
@@ -138,27 +141,21 @@ When inspecting multiple files, each file's results are displayed with a header
138
141
 
139
142
  ```log
140
143
  ParquetMetaModel(
141
- created_by='DuckDB version v1.2.1 (build 8e52ec4395)',
142
- num_columns=1,
143
- num_rows=100000000,
144
- num_row_groups=10,
145
- format_version='1.0',
146
- serialized_size=1196
144
+ created_by='parquet-cpp-arrow version 14.0.2',
145
+ num_columns=3,
146
+ num_rows=3,
147
+ num_row_groups=1,
148
+ format_version='2.6',
149
+ serialized_size=2223
147
150
  )
148
- Parquet Column Information
149
- ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
150
- Row Group ┃ Column Name Index Compression Bloom Filter
151
- ┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
152
- │ 0 │ r │ 0 │ SNAPPY │ ✅ │
153
- 1r │ 0 │ SNAPPY ✅ │
154
- 2r 0 │ SNAPPY
155
- 3r 0 │ SNAPPY ✅ │
156
- │ 4 │ r │ 0 │ SNAPPY │ ✅ │
157
- │ 5 │ r │ 0 │ SNAPPY │ ✅ │
158
- │ 6 │ r │ 0 │ SNAPPY │ ✅ │
159
- │ 7 │ r │ 0 │ SNAPPY │ ✅ │
160
- │ 8 │ r │ 0 │ SNAPPY │ ✅ │
161
- │ 9 │ r │ 0 │ SNAPPY │ ✅ │
162
- └───────────┴─────────────┴───────┴─────────────┴──────────────┘
151
+ Parquet Column Information
152
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
153
+ ┃ Column Bloom ┃ ┃
154
+ ┃ Row Group ┃ Name ┃ Index ┃ Compress… ┃ Filter ┃ Min Value ┃ Max Value ┃
155
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
156
+ 0one │ 0 │ SNAPPY │ -1.0 2.5 │
157
+ 0two 1 │ SNAPPY bar │ foo │
158
+ 0three 2 │ SNAPPY │ False │ True
159
+ └───────────┴────────────┴───────┴───────────┴────────────┴───────────┴───────────┘
163
160
  Compression codecs: {'SNAPPY'}
164
161
  ```
@@ -6,6 +6,8 @@
6
6
 
7
7
  [![Upload Python Package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
8
8
 
9
+ [![codecov](https://codecov.io/gh/MiguelElGallo/iparq/branch/main/graph/badge.svg)](https://codecov.io/gh/MiguelElGallo/iparq)
10
+
9
11
  ![alt text](media/iparq.png)
10
12
  After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there's no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
11
13
 
@@ -120,27 +122,21 @@ When inspecting multiple files, each file's results are displayed with a header
120
122
 
121
123
  ```log
122
124
  ParquetMetaModel(
123
- created_by='DuckDB version v1.2.1 (build 8e52ec4395)',
124
- num_columns=1,
125
- num_rows=100000000,
126
- num_row_groups=10,
127
- format_version='1.0',
128
- serialized_size=1196
125
+ created_by='parquet-cpp-arrow version 14.0.2',
126
+ num_columns=3,
127
+ num_rows=3,
128
+ num_row_groups=1,
129
+ format_version='2.6',
130
+ serialized_size=2223
129
131
  )
130
- Parquet Column Information
131
- ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
132
- Row Group ┃ Column Name Index Compression Bloom Filter
133
- ┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
134
- │ 0 │ r │ 0 │ SNAPPY │ ✅ │
135
- 1r │ 0 │ SNAPPY ✅ │
136
- 2r 0 │ SNAPPY
137
- 3r 0 │ SNAPPY ✅ │
138
- │ 4 │ r │ 0 │ SNAPPY │ ✅ │
139
- │ 5 │ r │ 0 │ SNAPPY │ ✅ │
140
- │ 6 │ r │ 0 │ SNAPPY │ ✅ │
141
- │ 7 │ r │ 0 │ SNAPPY │ ✅ │
142
- │ 8 │ r │ 0 │ SNAPPY │ ✅ │
143
- │ 9 │ r │ 0 │ SNAPPY │ ✅ │
144
- └───────────┴─────────────┴───────┴─────────────┴──────────────┘
132
+ Parquet Column Information
133
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
134
+ ┃ Column Bloom ┃ ┃
135
+ ┃ Row Group ┃ Name ┃ Index ┃ Compress… ┃ Filter ┃ Min Value ┃ Max Value ┃
136
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
137
+ 0one │ 0 │ SNAPPY │ -1.0 2.5 │
138
+ 0two 1 │ SNAPPY bar │ foo │
139
+ 0three 2 │ SNAPPY │ False │ True
140
+ └───────────┴────────────┴───────┴───────────┴────────────┴───────────┴───────────┘
145
141
  Compression codecs: {'SNAPPY'}
146
142
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "iparq"
3
- version = "0.4.0"
3
+ version = "0.4.1"
4
4
  description = "Display version compression and bloom filter information about a parquet file"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -17,6 +17,7 @@ dependencies = [
17
17
  [project.optional-dependencies]
18
18
  test = [
19
19
  "pytest>=7.0",
20
+ "pytest-cov>=4.0.0",
20
21
  ]
21
22
  checks = [
22
23
  "mypy>=1.14.1",
@@ -43,4 +44,5 @@ ignore_missing_imports = true
43
44
  [dependency-groups]
44
45
  dev = [
45
46
  "pytest>=8.4.1",
47
+ "pytest-cov>=4.0.0",
46
48
  ]
@@ -0,0 +1 @@
1
+ __version__ = "0.4.1"