iparq 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/python-package.yml +12 -2
- iparq-0.4.1/.github/workflows/test.yml +37 -0
- {iparq-0.4.0 → iparq-0.4.1}/PKG-INFO +19 -22
- {iparq-0.4.0 → iparq-0.4.1}/README.md +17 -21
- {iparq-0.4.0 → iparq-0.4.1}/pyproject.toml +3 -1
- iparq-0.4.1/src/iparq/__init__.py +1 -0
- iparq-0.4.1/uv.lock +923 -0
- iparq-0.4.0/src/iparq/__init__.py +0 -1
- iparq-0.4.0/uv.lock +0 -568
- {iparq-0.4.0 → iparq-0.4.1}/.github/FUNDING.yml +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.github/copilot-instructions.md +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.github/dependabot.yml +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/copilot-setup-steps.yml +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/merge.yml +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.github/workflows/python-publish.yml +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.gitignore +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.python-version +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.vscode/launch.json +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/.vscode/settings.json +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/CONTRIBUTING.md +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/LICENSE +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/dummy.parquet +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/media/iparq.png +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/src/iparq/py.typed +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/src/iparq/source.py +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/tests/conftest.py +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/tests/dummy.parquet +0 -0
- {iparq-0.4.0 → iparq-0.4.1}/tests/test_cli.py +0 -0
|
@@ -45,6 +45,16 @@ jobs:
|
|
|
45
45
|
uv run mypy . --config-file=../../pyproject.toml
|
|
46
46
|
- name: Check formatting with black
|
|
47
47
|
run: uvx black . --check --verbose
|
|
48
|
-
- name: Run Python tests
|
|
48
|
+
- name: Run Python tests with coverage
|
|
49
49
|
if: runner.os != 'Windows'
|
|
50
|
-
run: uv run pytest -vv
|
|
50
|
+
run: uv run pytest -vv --cov=src/iparq --cov-report=xml --cov-report=term-missing
|
|
51
|
+
|
|
52
|
+
- name: Upload coverage to Codecov
|
|
53
|
+
if: runner.os != 'Windows'
|
|
54
|
+
uses: codecov/codecov-action@v5
|
|
55
|
+
with:
|
|
56
|
+
files: ./coverage.xml
|
|
57
|
+
fail_ci_if_error: false
|
|
58
|
+
verbose: true
|
|
59
|
+
env:
|
|
60
|
+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: Run Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ "main" ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ "main" ]
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
test:
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest]
|
|
19
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout code
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
|
|
30
|
+
- name: Install uv
|
|
31
|
+
uses: astral-sh/setup-uv@v5
|
|
32
|
+
|
|
33
|
+
- name: Install dependencies
|
|
34
|
+
run: uv sync --all-extras
|
|
35
|
+
|
|
36
|
+
- name: Run tests
|
|
37
|
+
run: uv run pytest -vv
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: iparq
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Display version compression and bloom filter information about a parquet file
|
|
5
5
|
Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -13,6 +13,7 @@ Provides-Extra: checks
|
|
|
13
13
|
Requires-Dist: mypy>=1.14.1; extra == 'checks'
|
|
14
14
|
Requires-Dist: ruff>=0.9.3; extra == 'checks'
|
|
15
15
|
Provides-Extra: test
|
|
16
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
16
17
|
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
17
18
|
Description-Content-Type: text/markdown
|
|
18
19
|
|
|
@@ -24,6 +25,8 @@ Description-Content-Type: text/markdown
|
|
|
24
25
|
|
|
25
26
|
[](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
|
|
26
27
|
|
|
28
|
+
[](https://codecov.io/gh/MiguelElGallo/iparq)
|
|
29
|
+
|
|
27
30
|

|
|
28
31
|
After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there's no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
|
|
29
32
|
|
|
@@ -138,27 +141,21 @@ When inspecting multiple files, each file's results are displayed with a header
|
|
|
138
141
|
|
|
139
142
|
```log
|
|
140
143
|
ParquetMetaModel(
|
|
141
|
-
created_by='
|
|
142
|
-
num_columns=
|
|
143
|
-
num_rows=
|
|
144
|
-
num_row_groups=
|
|
145
|
-
format_version='
|
|
146
|
-
serialized_size=
|
|
144
|
+
created_by='parquet-cpp-arrow version 14.0.2',
|
|
145
|
+
num_columns=3,
|
|
146
|
+
num_rows=3,
|
|
147
|
+
num_row_groups=1,
|
|
148
|
+
format_version='2.6',
|
|
149
|
+
serialized_size=2223
|
|
147
150
|
)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
┃
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
│
|
|
154
|
-
│
|
|
155
|
-
│
|
|
156
|
-
|
|
157
|
-
│ 5 │ r │ 0 │ SNAPPY │ ✅ │
|
|
158
|
-
│ 6 │ r │ 0 │ SNAPPY │ ✅ │
|
|
159
|
-
│ 7 │ r │ 0 │ SNAPPY │ ✅ │
|
|
160
|
-
│ 8 │ r │ 0 │ SNAPPY │ ✅ │
|
|
161
|
-
│ 9 │ r │ 0 │ SNAPPY │ ✅ │
|
|
162
|
-
└───────────┴─────────────┴───────┴─────────────┴──────────────┘
|
|
151
|
+
Parquet Column Information
|
|
152
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
|
|
153
|
+
┃ ┃ Column ┃ ┃ ┃ Bloom ┃ ┃ ┃
|
|
154
|
+
┃ Row Group ┃ Name ┃ Index ┃ Compress… ┃ Filter ┃ Min Value ┃ Max Value ┃
|
|
155
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
|
|
156
|
+
│ 0 │ one │ 0 │ SNAPPY │ ✅ │ -1.0 │ 2.5 │
|
|
157
|
+
│ 0 │ two │ 1 │ SNAPPY │ ✅ │ bar │ foo │
|
|
158
|
+
│ 0 │ three │ 2 │ SNAPPY │ ✅ │ False │ True │
|
|
159
|
+
└───────────┴────────────┴───────┴───────────┴────────────┴───────────┴───────────┘
|
|
163
160
|
Compression codecs: {'SNAPPY'}
|
|
164
161
|
```
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
[](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
|
|
8
8
|
|
|
9
|
+
[](https://codecov.io/gh/MiguelElGallo/iparq)
|
|
10
|
+
|
|
9
11
|

|
|
10
12
|
After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there's no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
|
|
11
13
|
|
|
@@ -120,27 +122,21 @@ When inspecting multiple files, each file's results are displayed with a header
|
|
|
120
122
|
|
|
121
123
|
```log
|
|
122
124
|
ParquetMetaModel(
|
|
123
|
-
created_by='
|
|
124
|
-
num_columns=
|
|
125
|
-
num_rows=
|
|
126
|
-
num_row_groups=
|
|
127
|
-
format_version='
|
|
128
|
-
serialized_size=
|
|
125
|
+
created_by='parquet-cpp-arrow version 14.0.2',
|
|
126
|
+
num_columns=3,
|
|
127
|
+
num_rows=3,
|
|
128
|
+
num_row_groups=1,
|
|
129
|
+
format_version='2.6',
|
|
130
|
+
serialized_size=2223
|
|
129
131
|
)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
┃
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
│
|
|
136
|
-
│
|
|
137
|
-
│
|
|
138
|
-
|
|
139
|
-
│ 5 │ r │ 0 │ SNAPPY │ ✅ │
|
|
140
|
-
│ 6 │ r │ 0 │ SNAPPY │ ✅ │
|
|
141
|
-
│ 7 │ r │ 0 │ SNAPPY │ ✅ │
|
|
142
|
-
│ 8 │ r │ 0 │ SNAPPY │ ✅ │
|
|
143
|
-
│ 9 │ r │ 0 │ SNAPPY │ ✅ │
|
|
144
|
-
└───────────┴─────────────┴───────┴─────────────┴──────────────┘
|
|
132
|
+
Parquet Column Information
|
|
133
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
|
|
134
|
+
┃ ┃ Column ┃ ┃ ┃ Bloom ┃ ┃ ┃
|
|
135
|
+
┃ Row Group ┃ Name ┃ Index ┃ Compress… ┃ Filter ┃ Min Value ┃ Max Value ┃
|
|
136
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
|
|
137
|
+
│ 0 │ one │ 0 │ SNAPPY │ ✅ │ -1.0 │ 2.5 │
|
|
138
|
+
│ 0 │ two │ 1 │ SNAPPY │ ✅ │ bar │ foo │
|
|
139
|
+
│ 0 │ three │ 2 │ SNAPPY │ ✅ │ False │ True │
|
|
140
|
+
└───────────┴────────────┴───────┴───────────┴────────────┴───────────┴───────────┘
|
|
145
141
|
Compression codecs: {'SNAPPY'}
|
|
146
142
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "iparq"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.1"
|
|
4
4
|
description = "Display version compression and bloom filter information about a parquet file"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -17,6 +17,7 @@ dependencies = [
|
|
|
17
17
|
[project.optional-dependencies]
|
|
18
18
|
test = [
|
|
19
19
|
"pytest>=7.0",
|
|
20
|
+
"pytest-cov>=4.0.0",
|
|
20
21
|
]
|
|
21
22
|
checks = [
|
|
22
23
|
"mypy>=1.14.1",
|
|
@@ -43,4 +44,5 @@ ignore_missing_imports = true
|
|
|
43
44
|
[dependency-groups]
|
|
44
45
|
dev = [
|
|
45
46
|
"pytest>=8.4.1",
|
|
47
|
+
"pytest-cov>=4.0.0",
|
|
46
48
|
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.1"
|