pyrasterix 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyrasterix-0.0.1/.github/workflows/ci.yml +72 -0
- pyrasterix-0.0.1/.github/workflows/wheels.yml +150 -0
- pyrasterix-0.0.1/.gitignore +21 -0
- pyrasterix-0.0.1/CHANGELOG.md +54 -0
- pyrasterix-0.0.1/Cargo.lock +1739 -0
- pyrasterix-0.0.1/Cargo.toml +92 -0
- pyrasterix-0.0.1/LICENSE-APACHE +201 -0
- pyrasterix-0.0.1/LICENSE-MIT +21 -0
- pyrasterix-0.0.1/PKG-INFO +182 -0
- pyrasterix-0.0.1/README.md +192 -0
- pyrasterix-0.0.1/assets/fonts/AUTHORS-LIBERATION +16 -0
- pyrasterix-0.0.1/assets/fonts/LICENSE-LIBERATION +102 -0
- pyrasterix-0.0.1/assets/fonts/LiberationMono-Bold.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationMono-BoldItalic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationMono-Italic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationMono-Regular.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSans-Bold.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSans-BoldItalic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSans-Italic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSans-Regular.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSerif-Bold.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSerif-BoldItalic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSerif-Italic.ttf +0 -0
- pyrasterix-0.0.1/assets/fonts/LiberationSerif-Regular.ttf +0 -0
- pyrasterix-0.0.1/benches/parse.rs +10 -0
- pyrasterix-0.0.1/benches/pipeline.rs +10 -0
- pyrasterix-0.0.1/benches/render.rs +10 -0
- pyrasterix-0.0.1/deny.toml +44 -0
- pyrasterix-0.0.1/docs/BENCHMARKS.md +608 -0
- pyrasterix-0.0.1/docs/CLAUDE.md +265 -0
- pyrasterix-0.0.1/docs/DECISIONS.md +384 -0
- pyrasterix-0.0.1/docs/HANDOFF-PROMPT.md +51 -0
- pyrasterix-0.0.1/docs/HANDOFF.md +207 -0
- pyrasterix-0.0.1/docs/PROGRESS.md +1325 -0
- pyrasterix-0.0.1/docs/QUESTIONS.md +137 -0
- pyrasterix-0.0.1/docs/SPEC.md +431 -0
- pyrasterix-0.0.1/docs/TASKS.md +680 -0
- pyrasterix-0.0.1/docs/UNSAFE.md +50 -0
- pyrasterix-0.0.1/docs/images/render_latency_matrix.png +0 -0
- pyrasterix-0.0.1/justfile +30 -0
- pyrasterix-0.0.1/pyproject.toml +25 -0
- pyrasterix-0.0.1/pyrasterix/.cargo/config.toml +11 -0
- pyrasterix-0.0.1/pyrasterix/.gitignore +9 -0
- pyrasterix-0.0.1/pyrasterix/Cargo.lock +927 -0
- pyrasterix-0.0.1/pyrasterix/Cargo.toml +17 -0
- pyrasterix-0.0.1/pyrasterix/README.md +167 -0
- pyrasterix-0.0.1/pyrasterix/bench_compare.py +99 -0
- pyrasterix-0.0.1/pyrasterix/bench_dir.py +176 -0
- pyrasterix-0.0.1/pyrasterix/scripts/make_bench_pdf.py +63 -0
- pyrasterix-0.0.1/pyrasterix/scripts/run_bench.py +62 -0
- pyrasterix-0.0.1/pyrasterix/src/lib.rs +381 -0
- pyrasterix-0.0.1/pyrasterix/tests/test_smoke.py +192 -0
- pyrasterix-0.0.1/rust-toolchain.toml +4 -0
- pyrasterix-0.0.1/src/bin/cli.rs +521 -0
- pyrasterix-0.0.1/src/document.rs +2308 -0
- pyrasterix-0.0.1/src/error.rs +187 -0
- pyrasterix-0.0.1/src/interpreter/mod.rs +361 -0
- pyrasterix-0.0.1/src/interpreter/operators.rs +2258 -0
- pyrasterix-0.0.1/src/interpreter/state.rs +474 -0
- pyrasterix-0.0.1/src/lib.rs +58 -0
- pyrasterix-0.0.1/src/mmap.rs +32 -0
- pyrasterix-0.0.1/src/parser/filters.rs +1093 -0
- pyrasterix-0.0.1/src/parser/lexer.rs +675 -0
- pyrasterix-0.0.1/src/parser/mod.rs +14 -0
- pyrasterix-0.0.1/src/parser/objects.rs +765 -0
- pyrasterix-0.0.1/src/parser/resolver.rs +561 -0
- pyrasterix-0.0.1/src/parser/xref.rs +924 -0
- pyrasterix-0.0.1/src/pipeline.rs +325 -0
- pyrasterix-0.0.1/src/pixmap.rs +574 -0
- pyrasterix-0.0.1/src/render.rs +185 -0
- pyrasterix-0.0.1/src/resources/colorspace.rs +6 -0
- pyrasterix-0.0.1/src/resources/fonts.rs +1750 -0
- pyrasterix-0.0.1/src/resources/images.rs +960 -0
- pyrasterix-0.0.1/src/resources/mod.rs +11 -0
- pyrasterix-0.0.1/tests/cli.rs +58 -0
- pyrasterix-0.0.1/tests/golden.rs +156 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
CARGO_TERM_COLOR: always
|
|
11
|
+
RUSTFLAGS: -D warnings
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
verify:
|
|
15
|
+
name: just verify
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Install Rust toolchain
|
|
21
|
+
uses: dtolnay/rust-toolchain@stable
|
|
22
|
+
with:
|
|
23
|
+
components: rustfmt, clippy
|
|
24
|
+
|
|
25
|
+
- name: Install just
|
|
26
|
+
uses: taiki-e/install-action@v2
|
|
27
|
+
with:
|
|
28
|
+
tool: just
|
|
29
|
+
|
|
30
|
+
- name: Install cargo-deny
|
|
31
|
+
uses: taiki-e/install-action@v2
|
|
32
|
+
with:
|
|
33
|
+
tool: cargo-deny
|
|
34
|
+
|
|
35
|
+
- name: Cache cargo
|
|
36
|
+
uses: Swatinem/rust-cache@v2
|
|
37
|
+
|
|
38
|
+
- name: just verify
|
|
39
|
+
run: just verify
|
|
40
|
+
|
|
41
|
+
extras:
|
|
42
|
+
name: docs + benches compile
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
needs: verify
|
|
45
|
+
steps:
|
|
46
|
+
- uses: actions/checkout@v4
|
|
47
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
48
|
+
- uses: Swatinem/rust-cache@v2
|
|
49
|
+
- name: cargo doc
|
|
50
|
+
run: cargo doc --no-deps --all-features
|
|
51
|
+
- name: cargo bench --no-run
|
|
52
|
+
run: cargo bench --all-features --no-run
|
|
53
|
+
|
|
54
|
+
fuzz-smoke:
|
|
55
|
+
name: fuzz smoke (60s per target)
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
needs: verify
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/checkout@v4
|
|
60
|
+
- uses: dtolnay/rust-toolchain@nightly
|
|
61
|
+
- uses: Swatinem/rust-cache@v2
|
|
62
|
+
- name: Install cargo-fuzz
|
|
63
|
+
run: cargo install cargo-fuzz --locked
|
|
64
|
+
- name: parse_object
|
|
65
|
+
working-directory: fuzz
|
|
66
|
+
run: cargo +nightly fuzz run parse_object -- -max_total_time=60
|
|
67
|
+
- name: parse_xref
|
|
68
|
+
working-directory: fuzz
|
|
69
|
+
run: cargo +nightly fuzz run parse_xref -- -max_total_time=60
|
|
70
|
+
- name: parse_document
|
|
71
|
+
working-directory: fuzz
|
|
72
|
+
run: cargo +nightly fuzz run parse_document -- -max_total_time=60
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
name: wheels
|
|
2
|
+
|
|
3
|
+
# Builds Python wheels for the pyrasterix crate across Linux,
|
|
4
|
+
# macOS, and Windows. Trigger options:
|
|
5
|
+
# - push a tag like `v1.0.0` to build a full release matrix
|
|
6
|
+
# - hit "Run workflow" in the Actions tab to build on demand
|
|
7
|
+
#
|
|
8
|
+
# Wheels are uploaded as workflow artifacts. Download from the run page,
|
|
9
|
+
# then `pip install pyrasterix-*-<platform>.whl` on the target machine.
|
|
10
|
+
# (Publishing to PyPI is intentionally not wired up; flip the commented
|
|
11
|
+
# `release` job below on when you have a PYPI_API_TOKEN secret set.)
|
|
12
|
+
|
|
13
|
+
on:
|
|
14
|
+
push:
|
|
15
|
+
tags: ['v*']
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
linux:
|
|
23
|
+
name: linux ${{ matrix.target }}
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
strategy:
|
|
26
|
+
fail-fast: false
|
|
27
|
+
matrix:
|
|
28
|
+
target: [x86_64, aarch64]
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: '3.9'
|
|
34
|
+
- name: Build wheel (manylinux2014)
|
|
35
|
+
uses: PyO3/maturin-action@v1
|
|
36
|
+
with:
|
|
37
|
+
target: ${{ matrix.target }}
|
|
38
|
+
manylinux: '2014'
|
|
39
|
+
args: --release --out dist
|
|
40
|
+
working-directory: pyrasterix
|
|
41
|
+
- name: Smoke import + bench (native x86_64 only)
|
|
42
|
+
if: matrix.target == 'x86_64'
|
|
43
|
+
run: |
|
|
44
|
+
pip install --find-links pyrasterix/dist pyrasterix fpdf2
|
|
45
|
+
python -c "import pyrasterix; print(pyrasterix.__version__)"
|
|
46
|
+
python pyrasterix/scripts/make_bench_pdf.py /tmp/bench.pdf --pages 100
|
|
47
|
+
python pyrasterix/scripts/run_bench.py /tmp/bench.pdf
|
|
48
|
+
- uses: actions/upload-artifact@v4
|
|
49
|
+
with:
|
|
50
|
+
name: wheels-linux-${{ matrix.target }}
|
|
51
|
+
path: pyrasterix/dist/*.whl
|
|
52
|
+
|
|
53
|
+
macos:
|
|
54
|
+
name: macos ${{ matrix.target }}
|
|
55
|
+
runs-on: macos-latest
|
|
56
|
+
strategy:
|
|
57
|
+
fail-fast: false
|
|
58
|
+
matrix:
|
|
59
|
+
target: [x86_64, aarch64]
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
- uses: actions/setup-python@v5
|
|
63
|
+
with:
|
|
64
|
+
python-version: '3.9'
|
|
65
|
+
- name: Build wheel
|
|
66
|
+
uses: PyO3/maturin-action@v1
|
|
67
|
+
with:
|
|
68
|
+
target: ${{ matrix.target }}
|
|
69
|
+
args: --release --out dist
|
|
70
|
+
working-directory: pyrasterix
|
|
71
|
+
- name: Smoke import + bench (arm64 only - macos-latest is Apple Silicon)
|
|
72
|
+
if: matrix.target == 'aarch64'
|
|
73
|
+
run: |
|
|
74
|
+
pip install --find-links pyrasterix/dist pyrasterix fpdf2
|
|
75
|
+
python -c "import pyrasterix; print(pyrasterix.__version__)"
|
|
76
|
+
python pyrasterix/scripts/make_bench_pdf.py /tmp/bench.pdf --pages 100
|
|
77
|
+
python pyrasterix/scripts/run_bench.py /tmp/bench.pdf
|
|
78
|
+
- uses: actions/upload-artifact@v4
|
|
79
|
+
with:
|
|
80
|
+
name: wheels-macos-${{ matrix.target }}
|
|
81
|
+
path: pyrasterix/dist/*.whl
|
|
82
|
+
|
|
83
|
+
windows:
|
|
84
|
+
name: windows x64
|
|
85
|
+
runs-on: windows-latest
|
|
86
|
+
steps:
|
|
87
|
+
- uses: actions/checkout@v4
|
|
88
|
+
- uses: actions/setup-python@v5
|
|
89
|
+
with:
|
|
90
|
+
python-version: '3.9'
|
|
91
|
+
architecture: x64
|
|
92
|
+
- name: Build wheel
|
|
93
|
+
uses: PyO3/maturin-action@v1
|
|
94
|
+
with:
|
|
95
|
+
target: x64
|
|
96
|
+
args: --release --out dist
|
|
97
|
+
working-directory: pyrasterix
|
|
98
|
+
- name: Smoke import + bench
|
|
99
|
+
run: |
|
|
100
|
+
pip install --find-links pyrasterix/dist pyrasterix fpdf2
|
|
101
|
+
python -c "import pyrasterix; print(pyrasterix.__version__)"
|
|
102
|
+
python pyrasterix/scripts/make_bench_pdf.py D:/bench.pdf --pages 100
|
|
103
|
+
python pyrasterix/scripts/run_bench.py D:/bench.pdf
|
|
104
|
+
- uses: actions/upload-artifact@v4
|
|
105
|
+
with:
|
|
106
|
+
name: wheels-windows-x64
|
|
107
|
+
path: pyrasterix/dist/*.whl
|
|
108
|
+
|
|
109
|
+
sdist:
|
|
110
|
+
name: source distribution
|
|
111
|
+
runs-on: ubuntu-latest
|
|
112
|
+
steps:
|
|
113
|
+
- uses: actions/checkout@v4
|
|
114
|
+
- name: Build sdist
|
|
115
|
+
uses: PyO3/maturin-action@v1
|
|
116
|
+
with:
|
|
117
|
+
command: sdist
|
|
118
|
+
args: --out dist
|
|
119
|
+
working-directory: pyrasterix
|
|
120
|
+
- uses: actions/upload-artifact@v4
|
|
121
|
+
with:
|
|
122
|
+
name: sdist
|
|
123
|
+
path: pyrasterix/dist/*.tar.gz
|
|
124
|
+
|
|
125
|
+
# Publishes all built wheels + sdist to PyPI on tag push. Uses PyPI
|
|
126
|
+
# trusted publishing (OIDC), no API tokens stored. You need to:
|
|
127
|
+
#
|
|
128
|
+
# 1. Create a Pending Publisher on https://pypi.org for the
|
|
129
|
+
# `pyrasterix` project, pointing at this repo + workflow file
|
|
130
|
+
# + the `pypi` environment name below.
|
|
131
|
+
# 2. Create a `pypi` GitHub Environment under Settings -> Environments.
|
|
132
|
+
# 3. Push a tag like `v0.0.1` and this job fires.
|
|
133
|
+
release:
|
|
134
|
+
name: publish to pypi
|
|
135
|
+
needs: [linux, macos, windows, sdist]
|
|
136
|
+
runs-on: ubuntu-latest
|
|
137
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
138
|
+
environment:
|
|
139
|
+
name: pypi
|
|
140
|
+
url: https://pypi.org/p/pyrasterix
|
|
141
|
+
permissions:
|
|
142
|
+
id-token: write
|
|
143
|
+
steps:
|
|
144
|
+
- uses: actions/download-artifact@v4
|
|
145
|
+
with:
|
|
146
|
+
path: dist
|
|
147
|
+
merge-multiple: true
|
|
148
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
149
|
+
with:
|
|
150
|
+
packages-dir: dist
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/target
|
|
2
|
+
Cargo.lock.bak
|
|
3
|
+
**/*.rs.bk
|
|
4
|
+
|
|
5
|
+
# Fuzz crash dumps, coverage, and build artifacts are machine-specific.
|
|
6
|
+
# Inside /fuzz/corpus/* only files named `*.pdf` / `*.obj` are committed
|
|
7
|
+
# (curated seeds); libfuzzer's auto-generated inputs are not tracked.
|
|
8
|
+
/fuzz/artifacts
|
|
9
|
+
/fuzz/coverage
|
|
10
|
+
/fuzz/target
|
|
11
|
+
/fuzz/corpus/*/*
|
|
12
|
+
!/fuzz/corpus/*/*.pdf
|
|
13
|
+
!/fuzz/corpus/*/*.obj
|
|
14
|
+
|
|
15
|
+
# Editor/OS junk
|
|
16
|
+
.DS_Store
|
|
17
|
+
.idea/
|
|
18
|
+
.vscode/
|
|
19
|
+
|
|
20
|
+
# Claude Code per-machine runtime state.
|
|
21
|
+
/.claude
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
Quality gate closed (mean corpus SSIM 0.944, target was 0.94).
|
|
6
|
+
Perf at 1.84x a comparable Python library's MP pool cold, target was 2x. Honest writeup in
|
|
7
|
+
`docs/BENCHMARKS.md`.
|
|
8
|
+
|
|
9
|
+
Two bugs in this batch landed the quality gate. Both took five
|
|
10
|
+
minutes to fix once the actual cause was traced:
|
|
11
|
+
|
|
12
|
+
- Image XObjects with `/ColorSpace [/Separation /Black …]` were
|
|
13
|
+
being painted as raw luminance instead of as tint (so photos
|
|
14
|
+
came out inverted). Fixed in T-216. Worst-page SSIM went 0.68
|
|
15
|
+
to 0.95 on the Springer figure pages.
|
|
16
|
+
- The page-tree walker only resolved `/Resources` when stored as
|
|
17
|
+
an indirect reference. Some PDFs (e.g. the No-Starch Rust book)
|
|
18
|
+
store `/Resources` inline as a Dict on every page, so no fonts
|
|
19
|
+
ever got registered and everything fell back to Helvetica.
|
|
20
|
+
Fixed in T-218. Rust book SSIM went 0.65 to 0.94 on the worst
|
|
21
|
+
pages.
|
|
22
|
+
|
|
23
|
+
Perf work this round:
|
|
24
|
+
|
|
25
|
+
- Page-level work stealing across PDFs (T-302b). Previously batch
|
|
26
|
+
ran rayon at PDF granularity so one worker would stall on a
|
|
27
|
+
700-page textbook while others sat idle. Flat page-level
|
|
28
|
+
`par_iter` over the merged page list pushed throughput from
|
|
29
|
+
270 to 303 p/s wall.
|
|
30
|
+
- Separate PNG encode pool fed by a crossbeam channel (T-303).
|
|
31
|
+
No measurable win at 10 workers (encode is 3% of single-thread
|
|
32
|
+
time) but it's the SPEC §3.1 architectural shape.
|
|
33
|
+
- Optional `mtpng` parallel PNG encoder behind the
|
|
34
|
+
`fast-encoders` feature flag (T-304). Slower at 10 workers due
|
|
35
|
+
to oversubscription. Off by default.
|
|
36
|
+
|
|
37
|
+
Spec coverage this round: ICC profiles for image XObjects via
|
|
38
|
+
qcms (T-406), CCITT Fax G3/G4 (T-404), tiling patterns (T-402),
|
|
39
|
+
`/SMask` alpha (T-401 step 2), `/Indexed` color space (T-208).
|
|
40
|
+
|
|
41
|
+
Tried and rejected: multi-process worker pool (T-301b). LPT-
|
|
42
|
+
balanced 10 child processes gave 226 p/s vs rayon's 303 p/s.
|
|
43
|
+
Static partitioning loses to dynamic work-stealing when per-page
|
|
44
|
+
render cost varies.
|
|
45
|
+
|
|
46
|
+
## 0.0.0
|
|
47
|
+
|
|
48
|
+
The first 200-ish commits. PDF parser, page tree, content-stream
|
|
49
|
+
interpreter, Standard 14 fonts via Liberation TTFs, DCT/Flate
|
|
50
|
+
image decoding, JPEG output, Form XObject recursion, embedded
|
|
51
|
+
TrueType / CFF / Type 1 via stet-fonts, ToUnicode CMaps for
|
|
52
|
+
Identity-H.
|
|
53
|
+
|
|
54
|
+
Full history in `docs/PROGRESS.md`.
|