pdf-email-optimizer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. pdf_email_optimizer-0.1.0/CHANGELOG.md +42 -0
  2. pdf_email_optimizer-0.1.0/CONTRIBUTING.md +71 -0
  3. pdf_email_optimizer-0.1.0/LICENSE +21 -0
  4. pdf_email_optimizer-0.1.0/MANIFEST.in +13 -0
  5. pdf_email_optimizer-0.1.0/PKG-INFO +225 -0
  6. pdf_email_optimizer-0.1.0/README.md +178 -0
  7. pdf_email_optimizer-0.1.0/SECURITY.md +7 -0
  8. pdf_email_optimizer-0.1.0/SKILL.md +81 -0
  9. pdf_email_optimizer-0.1.0/agents/openai.yaml +7 -0
  10. pdf_email_optimizer-0.1.0/assets/icon.png +0 -0
  11. pdf_email_optimizer-0.1.0/assets/logo.png +0 -0
  12. pdf_email_optimizer-0.1.0/benchmarks/benchmark_manifest.yaml +64 -0
  13. pdf_email_optimizer-0.1.0/benchmarks/fixtures/README.md +39 -0
  14. pdf_email_optimizer-0.1.0/benchmarks/make_fixtures.py +403 -0
  15. pdf_email_optimizer-0.1.0/benchmarks/make_gallery.py +136 -0
  16. pdf_email_optimizer-0.1.0/benchmarks/results/README.md +9 -0
  17. pdf_email_optimizer-0.1.0/benchmarks/results/latest.json +200 -0
  18. pdf_email_optimizer-0.1.0/benchmarks/results/latest.md +14 -0
  19. pdf_email_optimizer-0.1.0/benchmarks/run_benchmarks.py +179 -0
  20. pdf_email_optimizer-0.1.0/docs/agent-usage.md +57 -0
  21. pdf_email_optimizer-0.1.0/docs/benchmarking.md +41 -0
  22. pdf_email_optimizer-0.1.0/docs/compatibility.md +26 -0
  23. pdf_email_optimizer-0.1.0/docs/examples.md +41 -0
  24. pdf_email_optimizer-0.1.0/docs/installation.md +31 -0
  25. pdf_email_optimizer-0.1.0/docs/json-output.md +40 -0
  26. pdf_email_optimizer-0.1.0/docs/known-limitations.md +12 -0
  27. pdf_email_optimizer-0.1.0/docs/troubleshooting.md +29 -0
  28. pdf_email_optimizer-0.1.0/examples/agent-transcripts/aggressive.md +21 -0
  29. pdf_email_optimizer-0.1.0/examples/agent-transcripts/audit.md +20 -0
  30. pdf_email_optimizer-0.1.0/examples/agent-transcripts/quality-target.md +22 -0
  31. pdf_email_optimizer-0.1.0/examples/agent-transcripts/range.md +21 -0
  32. pdf_email_optimizer-0.1.0/examples/agent-transcripts/visual-compare.md +20 -0
  33. pdf_email_optimizer-0.1.0/pyproject.toml +84 -0
  34. pdf_email_optimizer-0.1.0/requirements.txt +10 -0
  35. pdf_email_optimizer-0.1.0/schema/output-summary.schema.json +65 -0
  36. pdf_email_optimizer-0.1.0/scripts/compare_pdf_render.py +16 -0
  37. pdf_email_optimizer-0.1.0/scripts/optimize_pdf_email.py +16 -0
  38. pdf_email_optimizer-0.1.0/setup.cfg +4 -0
  39. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/__init__.py +5 -0
  40. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/__main__.py +8 -0
  41. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/cli.py +7 -0
  42. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/errors.py +7 -0
  43. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/ghostscript.py +7 -0
  44. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/optimizer.py +1172 -0
  45. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/pikepdf_backend.py +84 -0
  46. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/profiles.py +7 -0
  47. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/render_qa.py +171 -0
  48. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer/reporting.py +7 -0
  49. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/PKG-INFO +225 -0
  50. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/SOURCES.txt +58 -0
  51. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/dependency_links.txt +1 -0
  52. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/entry_points.txt +3 -0
  53. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/requires.txt +25 -0
  54. pdf_email_optimizer-0.1.0/src/pdf_email_optimizer.egg-info/top_level.txt +1 -0
  55. pdf_email_optimizer-0.1.0/tests/conftest.py +49 -0
  56. pdf_email_optimizer-0.1.0/tests/fixtures/README.md +4 -0
  57. pdf_email_optimizer-0.1.0/tests/test_integration.py +127 -0
  58. pdf_email_optimizer-0.1.0/tests/test_optimizer.py +437 -0
  59. pdf_email_optimizer-0.1.0/tests/test_render_qa.py +113 -0
  60. pdf_email_optimizer-0.1.0/tests/test_targets.py +37 -0
@@ -0,0 +1,42 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2026-06-20
10
+
11
+ ### Added
12
+
13
+ - Installable Python package metadata and console scripts.
14
+ - `python -m pdf_email_optimizer` support.
15
+ - Shorthand CLI flags for targets, ranges, and profiles.
16
+ - Audit-only mode.
17
+ - Markdown report output and JSON output schema.
18
+ - Optional `pikepdf`/`qpdf` lossless structural backend, enabled automatically
19
+ when installed and accepted only when it yields a smaller, pixel-identical
20
+ result. Toggle with `--pikepdf`/`--no-pikepdf`; install via
21
+ `pip install "pdf-email-optimizer[pikepdf]"`.
22
+ - `benchmarks/make_fixtures.py` generates 12 redistributable, synthetic (CC0)
23
+ benchmark/test fixtures, including Illustrator- and InDesign-style exports.
24
+ - `benchmarks/make_gallery.py` renders before/after side-by-side images, plus a
25
+ README gallery and a populated benchmark table with real size-reduction and
26
+ PSNR/RMS numbers.
27
+ - Integration test suite (`tests/test_integration.py`, `integration` marker)
28
+ covering design-tool exports, photo brochures, screenshots, transparency,
29
+ forms/annotations, scans, and encrypted inputs.
30
+ - `fixtures` and `pikepdf` optional dependency groups.
31
+ - Benchmark harness, CI workflow, trusted-publishing workflow, documentation,
32
+ and governance files.
33
+
34
+ ### Notes
35
+
36
+ - Benchmark harness records an honest original-vs-output render comparison
37
+ (PSNR/RMS) for every successful case and a row for failed cases.
38
+ - Image recompression is gated on the smallest lossless candidate, so a
39
+ successful pikepdf pass can skip lossy work entirely.
40
+
41
+ [Unreleased]: https://github.com/petehottelet/pdf-email-optimizer/compare/v0.1.0...HEAD
42
+ [0.1.0]: https://github.com/petehottelet/pdf-email-optimizer/releases/tag/v0.1.0
@@ -0,0 +1,71 @@
1
+ # Contributing
2
+
3
+ Thanks for improving PDF Email Optimizer.
4
+
5
+ ## Development
6
+
7
+ ```bash
8
+ python -m pip install -e ".[dev]"
9
+ pytest # full suite (includes slow integration tests)
10
+ pytest -m "not integration" # fast unit tests only
11
+ pytest --cov
12
+ ruff check .
13
+ python -m build
14
+ ```
15
+
16
+ ## Tests
17
+
18
+ - Unit tests live in `tests/` and use small synthetic PDFs generated at runtime.
19
+ - Integration tests (`tests/test_integration.py`, marked `integration`) run the
20
+ optimizer end-to-end against realistic generated fixtures (design-tool
21
+ exports, photos, screenshots, transparency, forms, scans, encrypted files).
22
+ They require `reportlab` (included in the `dev` extra) and are skipped if it
23
+ is unavailable.
24
+ - Include tests for behavior changes. For PDF edge cases, prefer small synthetic
25
+ fixtures that can be regenerated.
26
+
27
+ ## Fixtures
28
+
29
+ Do not commit confidential or copyrighted PDFs. Use generated, public domain,
30
+ CC0, or explicitly redistributable fixtures only, and document the origin and
31
+ license of each fixture.
32
+
33
+ The benchmark/test fixtures are synthesized from scratch:
34
+
35
+ ```bash
36
+ python benchmarks/make_fixtures.py # regenerate all fixtures
37
+ python benchmarks/make_fixtures.py --only photo_brochure
38
+ ```
39
+
40
+ See [`benchmarks/fixtures/README.md`](benchmarks/fixtures/README.md) for the
41
+ catalog and what each fixture exercises.
42
+
43
+ ## Benchmarks and gallery
44
+
45
+ ```bash
46
+ python benchmarks/run_benchmarks.py # writes benchmarks/results/latest.{json,md}
47
+ python benchmarks/make_gallery.py # writes docs/gallery/*.png
48
+ ```
49
+
50
+ When optimizer behavior changes, regenerate both and commit the updated
51
+ `benchmarks/results/latest.md` and gallery images so published numbers stay
52
+ honest. Never hand-edit benchmark numbers.
53
+
54
+ ## Optional backends
55
+
56
+ - `pikepdf`/`qpdf`: lossless structural pass, installed via
57
+ `pip install "pdf-email-optimizer[pikepdf]"`. It bundles qpdf, so no system
58
+ binary is needed.
59
+ - Ghostscript: external binary used only for the aggressive last-resort raster
60
+ rewrite.
61
+
62
+ ## Pull requests
63
+
64
+ Keep changes focused, add tests, run `ruff check .` and `pytest`, and update the
65
+ `[Unreleased]` section of [`CHANGELOG.md`](CHANGELOG.md).
66
+
67
+ ## Releases
68
+
69
+ 1. Move the `[Unreleased]` entries under a new version heading with the date.
70
+ 2. Bump the version in `pyproject.toml` (and the README badge).
71
+ 3. Tag the release and let CI build and publish.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pete Hottelet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,13 @@
1
+ include CHANGELOG.md
2
+ include CONTRIBUTING.md
3
+ include SECURITY.md
4
+ include requirements.txt
5
+ include SKILL.md
6
+ recursive-include agents *.yaml
7
+ recursive-include assets *.png
8
+ recursive-include benchmarks *.md *.py *.yaml *.json
9
+ recursive-include docs *.md
10
+ recursive-include examples *.md
11
+ recursive-include schema *.json
12
+ recursive-include scripts *.py
13
+ recursive-include tests *.md *.py
@@ -0,0 +1,225 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-email-optimizer
3
+ Version: 0.1.0
4
+ Summary: Shrink PDFs to email-safe sizes while preserving visual quality.
5
+ Author: PDF Email Optimizer contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/petehottelet/pdf-email-optimizer
8
+ Project-URL: Issues, https://github.com/petehottelet/pdf-email-optimizer/issues
9
+ Keywords: pdf,compression,email,optimization
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Graphics
22
+ Classifier: Topic :: Office/Business
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: pypdf>=4.0
27
+ Requires-Dist: Pillow>=10.0
28
+ Requires-Dist: pypdfium2>=4.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: build; extra == "dev"
31
+ Requires-Dist: jsonschema; extra == "dev"
32
+ Requires-Dist: mypy; extra == "dev"
33
+ Requires-Dist: pikepdf>=8; extra == "dev"
34
+ Requires-Dist: pytest; extra == "dev"
35
+ Requires-Dist: pytest-cov; extra == "dev"
36
+ Requires-Dist: reportlab>=4.0; extra == "dev"
37
+ Requires-Dist: ruff; extra == "dev"
38
+ Requires-Dist: twine; extra == "dev"
39
+ Provides-Extra: qa
40
+ Requires-Dist: pypdfium2>=4.0; extra == "qa"
41
+ Provides-Extra: ghostscript
42
+ Provides-Extra: pikepdf
43
+ Requires-Dist: pikepdf>=8; extra == "pikepdf"
44
+ Provides-Extra: fixtures
45
+ Requires-Dist: reportlab>=4.0; extra == "fixtures"
46
+ Dynamic: license-file
47
+
48
+ <p align="center">
49
+ <img src="assets/logo.png" alt="PDF Email Optimizer" width="480">
50
+ </p>
51
+
52
+ # PDF Email Optimizer
53
+
54
+ [![CI](https://github.com/petehottelet/pdf-email-optimizer/actions/workflows/ci.yml/badge.svg)](https://github.com/petehottelet/pdf-email-optimizer/actions/workflows/ci.yml)
55
+ [![PyPI](https://img.shields.io/pypi/v/pdf-email-optimizer.svg)](https://pypi.org/project/pdf-email-optimizer/)
56
+ [![Python](https://img.shields.io/pypi/pyversions/pdf-email-optimizer.svg)](https://pypi.org/project/pdf-email-optimizer/)
57
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
58
+ [![Agent Skill](https://img.shields.io/badge/Agent_Skill-SKILL.md-orange.svg)](SKILL.md)
59
+
60
+ ![Profiles](https://img.shields.io/badge/profiles-quality_%7C_balanced_%7C_aggressive-blue)
61
+ ![Backends](https://img.shields.io/badge/backends-pypdf_%7C_pikepdf_%7C_ghostscript-555)
62
+ ![Optimizes](https://img.shields.io/badge/optimizes-photos_%7C_scans_%7C_screenshots_%7C_vectors-blueviolet)
63
+
64
+ Shrink PDFs to email-safe sizes while preserving visual quality.
65
+
66
+ PDF Email Optimizer is built for posters, brochures, reports, photo-heavy decks, and design-tool exports that need to fit under a target like 5-7 MB. It starts with structural cleanup, recompresses images only when needed, and reports when a requested size conflicts with visual quality.
67
+
68
+ ## Install
69
+
70
+ From a checkout:
71
+
72
+ ```bash
73
+ python -m pip install -e ".[dev]"
74
+ pdf-email-optimizer --help
75
+ ```
76
+
77
+ Once published to a package index:
78
+
79
+ ```bash
80
+ pipx install pdf-email-optimizer
81
+ pdf-email-optimizer input.pdf output.pdf --target-mb 7 --profile quality
82
+ ```
83
+
84
+ Also supported:
85
+
86
+ ```bash
87
+ uvx pdf-email-optimizer input.pdf output.pdf --target 7mb
88
+ python -m pdf_email_optimizer input.pdf output.pdf --target-mb 7
89
+ ```
90
+
91
+ ## Quick Start
92
+
93
+ ```bash
94
+ # Ordinary email optimization
95
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7
96
+
97
+ # Preserve photos, screenshots, maps, and other detail
98
+ pdf-email-optimizer input.pdf output_email.pdf --target 7mb --quality
99
+
100
+ # Land inside a 5-7 MB range when possible
101
+ pdf-email-optimizer input.pdf output_email.pdf --range 5-7mb --quality
102
+
103
+ # Produce a Markdown report beside the output
104
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7 --report report.md
105
+
106
+ # Inspect without writing an optimized PDF
107
+ pdf-email-optimizer input.pdf --audit
108
+ ```
109
+
110
+ The source PDF is never overwritten. Existing output files are rejected unless `--force` is supplied.
111
+
112
+ ## Profiles
113
+
114
+ | Profile | Use When | Behavior |
115
+ |---|---|---|
116
+ | `quality` | Photos, screenshots, maps, product images, "do not degrade" requests | High JPEG floor, protects small images, runs render QA, does not use Ghostscript by default |
117
+ | `balanced` | General email delivery | Moderate recompression ladder and conservative structural cleanup |
118
+ | `aggressive` | Smallest file matters more than perfect fidelity | Lower quality floor, smaller long-edge caps, optional Ghostscript fallback |
119
+
120
+ If `quality` mode cannot hit the requested size, the tool keeps the smallest quality-preserving output and emits a direct warning with next steps.
121
+
122
+ ## Output
123
+
124
+ Use `--json` for machine-readable summaries:
125
+
126
+ ```bash
127
+ pdf-email-optimizer input.pdf output.pdf --target-mb 7 --json
128
+ ```
129
+
130
+ The JSON contract is documented in [docs/json-output.md](docs/json-output.md) and validated by [schema/output-summary.schema.json](schema/output-summary.schema.json). Important fields include input/output size, target status, strategy, page count, private payload removals, image statistics, render QA, quality status, and warnings.
131
+
132
+ ## Gallery
133
+
134
+ Original page (left) vs. email copy (right). All inputs are synthetic, CC0 fixtures generated by `benchmarks/make_fixtures.py`; regenerate the images with `python benchmarks/make_gallery.py`.
135
+
136
+ **InDesign-style export — 2.35 MB → 0.18 MB (92% smaller, PSNR 57.8 dB)**
137
+
138
+ ![InDesign-style export before and after](docs/gallery/indesign_export.png)
139
+
140
+ **Scanned document — 0.73 MB → 0.25 MB (66% smaller)**
141
+
142
+ ![Scanned document before and after](docs/gallery/scanned_pdf.png)
143
+
144
+ **Repeated images — 0.81 MB → 0.14 MB (83% smaller, lossless dedupe)**
145
+
146
+ ![Repeated images before and after](docs/gallery/repeated_images.png)
147
+
148
+ ## Benchmarks
149
+
150
+ The benchmark harness runs against the bundled redistributable fixtures:
151
+
152
+ ```bash
153
+ python benchmarks/make_fixtures.py # (re)generate CC0 sample PDFs
154
+ python benchmarks/run_benchmarks.py --manifest benchmarks/benchmark_manifest.yaml --output benchmarks/results/latest.json
155
+ ```
156
+
157
+ It writes JSON plus a Markdown table. Missing fixtures are marked as skipped so published results stay honest. The table below is generated output (`benchmarks/results/latest.md`); PSNR/RMS compare the optimized copy against the original render, and `inf`/`0.0` denote a pixel-identical (lossless) result.
158
+
159
+ | Case | Input | Target | Profile | Output | Reduction | Target Hit | Worst PSNR | Worst RMS | Strategy |
160
+ |---|---:|---:|---|---:|---:|---|---:|---:|---|
161
+ | photo_brochure | 1.10 MB | 0.6 MB | quality | 1.10 MB | 0.1% | No | inf | 0.0 | pikepdf-structural |
162
+ | indesign_export | 2.35 MB | 1 MB | balanced | 0.18 MB | 92.3% | Yes | 57.822 | 0.327679 | image-recompress |
163
+ | illustrator_export | 0.01 MB | 7 MB | balanced | 0.01 MB | 18.6% | Yes | inf | 0.0 | structural-cleanup |
164
+ | private_payload_export | 0.16 MB | 7 MB | quality | 0.16 MB | 0.1% | Yes | inf | 0.0 | structural-cleanup |
165
+ | screenshot_report | 0.27 MB | 0.2 MB | quality | 0.09 MB | 66.4% | Yes | inf | 0.0 | structural-cleanup |
166
+ | text_vector_document | 0.00 MB | 7 MB | balanced | 0.00 MB | 12.2% | Yes | inf | 0.0 | structural-cleanup |
167
+ | scanned_pdf | 0.73 MB | 0.4 MB | balanced | 0.25 MB | 66.6% | Yes | inf | 0.0 | structural-cleanup |
168
+ | mixed_transparency | 1.75 MB | 1 MB | quality | 1.75 MB | -0.0% | No | inf | 0.0 | structural-cleanup |
169
+ | embedded_metadata | 0.12 MB | 7 MB | balanced | 0.12 MB | 0.1% | Yes | inf | 0.0 | structural-cleanup |
170
+ | repeated_images | 0.81 MB | 0.5 MB | balanced | 0.14 MB | 83.2% | Yes | inf | 0.0 | structural-cleanup |
171
+ | forms_annotations | 0.01 MB | 7 MB | quality | 0.01 MB | 3.9% | Yes | inf | 0.0 | structural-cleanup |
172
+ | encrypted_pdf | - | 7.0 MB | balanced | failed | - | - | - | - | Encrypted PDFs must be unlocked before optimization. |
173
+
174
+ The `quality` profile deliberately refuses to degrade `photo_brochure` and `mixed_transparency` below their targets, emitting a warning instead of shipping a blurry file.
175
+
176
+ See [docs/benchmarking.md](docs/benchmarking.md) before adding fixtures.
177
+
178
+ ## Visual QA
179
+
180
+ Render and compare two PDFs:
181
+
182
+ ```bash
183
+ pdf-email-render-compare original.pdf optimized.pdf --output-dir qa-renders
184
+ ```
185
+
186
+ This reports page-level pixel differences and can write original, optimized, and amplified diff PNGs for review.
187
+
188
+ ## Agent Usage
189
+
190
+ The repo includes [SKILL.md](SKILL.md) for agent runtimes that load local skills. The short version:
191
+
192
+ - Use `quality` when the user asks to preserve image fidelity.
193
+ - Use `balanced` for ordinary email optimization.
194
+ - Use `aggressive` only when visible quality loss is acceptable.
195
+ - Report size, target status, strategy, and warnings.
196
+ - Never overwrite the source PDF.
197
+
198
+ More examples are in [docs/agent-usage.md](docs/agent-usage.md).
199
+
200
+ ## Development
201
+
202
+ ```bash
203
+ python -m pip install -e ".[dev]"
204
+ pytest
205
+ pytest --cov
206
+ ruff check .
207
+ python -m build
208
+ ```
209
+
210
+ CI runs linting, tests, coverage, package build, and CLI smoke checks on Python 3.9-3.13.
211
+
212
+ ## Documentation
213
+
214
+ - [Installation](docs/installation.md)
215
+ - [Examples](docs/examples.md)
216
+ - [Benchmarking](docs/benchmarking.md)
217
+ - [Compatibility](docs/compatibility.md)
218
+ - [JSON output](docs/json-output.md)
219
+ - [Agent usage](docs/agent-usage.md)
220
+ - [Known limitations](docs/known-limitations.md)
221
+ - [Troubleshooting](docs/troubleshooting.md)
222
+
223
+ ## License
224
+
225
+ [MIT](LICENSE)
@@ -0,0 +1,178 @@
1
+ <p align="center">
2
+ <img src="assets/logo.png" alt="PDF Email Optimizer" width="480">
3
+ </p>
4
+
5
+ # PDF Email Optimizer
6
+
7
+ [![CI](https://github.com/petehottelet/pdf-email-optimizer/actions/workflows/ci.yml/badge.svg)](https://github.com/petehottelet/pdf-email-optimizer/actions/workflows/ci.yml)
8
+ [![PyPI](https://img.shields.io/pypi/v/pdf-email-optimizer.svg)](https://pypi.org/project/pdf-email-optimizer/)
9
+ [![Python](https://img.shields.io/pypi/pyversions/pdf-email-optimizer.svg)](https://pypi.org/project/pdf-email-optimizer/)
10
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
11
+ [![Agent Skill](https://img.shields.io/badge/Agent_Skill-SKILL.md-orange.svg)](SKILL.md)
12
+
13
+ ![Profiles](https://img.shields.io/badge/profiles-quality_%7C_balanced_%7C_aggressive-blue)
14
+ ![Backends](https://img.shields.io/badge/backends-pypdf_%7C_pikepdf_%7C_ghostscript-555)
15
+ ![Optimizes](https://img.shields.io/badge/optimizes-photos_%7C_scans_%7C_screenshots_%7C_vectors-blueviolet)
16
+
17
+ Shrink PDFs to email-safe sizes while preserving visual quality.
18
+
19
+ PDF Email Optimizer is built for posters, brochures, reports, photo-heavy decks, and design-tool exports that need to fit under a target like 5-7 MB. It starts with structural cleanup, recompresses images only when needed, and reports when a requested size conflicts with visual quality.
20
+
21
+ ## Install
22
+
23
+ From a checkout:
24
+
25
+ ```bash
26
+ python -m pip install -e ".[dev]"
27
+ pdf-email-optimizer --help
28
+ ```
29
+
30
+ Once published to a package index:
31
+
32
+ ```bash
33
+ pipx install pdf-email-optimizer
34
+ pdf-email-optimizer input.pdf output.pdf --target-mb 7 --profile quality
35
+ ```
36
+
37
+ Also supported:
38
+
39
+ ```bash
40
+ uvx pdf-email-optimizer input.pdf output.pdf --target 7mb
41
+ python -m pdf_email_optimizer input.pdf output.pdf --target-mb 7
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ```bash
47
+ # Ordinary email optimization
48
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7
49
+
50
+ # Preserve photos, screenshots, maps, and other detail
51
+ pdf-email-optimizer input.pdf output_email.pdf --target 7mb --quality
52
+
53
+ # Land inside a 5-7 MB range when possible
54
+ pdf-email-optimizer input.pdf output_email.pdf --range 5-7mb --quality
55
+
56
+ # Produce a Markdown report beside the output
57
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7 --report report.md
58
+
59
+ # Inspect without writing an optimized PDF
60
+ pdf-email-optimizer input.pdf --audit
61
+ ```
62
+
63
+ The source PDF is never overwritten. Existing output files are rejected unless `--force` is supplied.
64
+
65
+ ## Profiles
66
+
67
+ | Profile | Use When | Behavior |
68
+ |---|---|---|
69
+ | `quality` | Photos, screenshots, maps, product images, "do not degrade" requests | High JPEG floor, protects small images, runs render QA, does not use Ghostscript by default |
70
+ | `balanced` | General email delivery | Moderate recompression ladder and conservative structural cleanup |
71
+ | `aggressive` | Smallest file matters more than perfect fidelity | Lower quality floor, smaller long-edge caps, optional Ghostscript fallback |
72
+
73
+ If `quality` mode cannot hit the requested size, the tool keeps the smallest quality-preserving output and emits a direct warning with next steps.
74
+
75
+ ## Output
76
+
77
+ Use `--json` for machine-readable summaries:
78
+
79
+ ```bash
80
+ pdf-email-optimizer input.pdf output.pdf --target-mb 7 --json
81
+ ```
82
+
83
+ The JSON contract is documented in [docs/json-output.md](docs/json-output.md) and validated by [schema/output-summary.schema.json](schema/output-summary.schema.json). Important fields include input/output size, target status, strategy, page count, private payload removals, image statistics, render QA, quality status, and warnings.
84
+
85
+ ## Gallery
86
+
87
+ Original page (left) vs. email copy (right). All inputs are synthetic, CC0 fixtures generated by `benchmarks/make_fixtures.py`; regenerate the images with `python benchmarks/make_gallery.py`.
88
+
89
+ **InDesign-style export — 2.35 MB → 0.18 MB (92% smaller, PSNR 57.8 dB)**
90
+
91
+ ![InDesign-style export before and after](docs/gallery/indesign_export.png)
92
+
93
+ **Scanned document — 0.73 MB → 0.25 MB (66% smaller)**
94
+
95
+ ![Scanned document before and after](docs/gallery/scanned_pdf.png)
96
+
97
+ **Repeated images — 0.81 MB → 0.14 MB (83% smaller, lossless dedupe)**
98
+
99
+ ![Repeated images before and after](docs/gallery/repeated_images.png)
100
+
101
+ ## Benchmarks
102
+
103
+ The benchmark harness runs against the bundled redistributable fixtures:
104
+
105
+ ```bash
106
+ python benchmarks/make_fixtures.py # (re)generate CC0 sample PDFs
107
+ python benchmarks/run_benchmarks.py --manifest benchmarks/benchmark_manifest.yaml --output benchmarks/results/latest.json
108
+ ```
109
+
110
+ It writes JSON plus a Markdown table. Missing fixtures are marked as skipped so published results stay honest. The table below is generated output (`benchmarks/results/latest.md`); PSNR/RMS compare the optimized copy against the original render, and `inf`/`0.0` denote a pixel-identical (lossless) result.
111
+
112
+ | Case | Input | Target | Profile | Output | Reduction | Target Hit | Worst PSNR | Worst RMS | Strategy |
113
+ |---|---:|---:|---|---:|---:|---|---:|---:|---|
114
+ | photo_brochure | 1.10 MB | 0.6 MB | quality | 1.10 MB | 0.1% | No | inf | 0.0 | pikepdf-structural |
115
+ | indesign_export | 2.35 MB | 1 MB | balanced | 0.18 MB | 92.3% | Yes | 57.822 | 0.327679 | image-recompress |
116
+ | illustrator_export | 0.01 MB | 7 MB | balanced | 0.01 MB | 18.6% | Yes | inf | 0.0 | structural-cleanup |
117
+ | private_payload_export | 0.16 MB | 7 MB | quality | 0.16 MB | 0.1% | Yes | inf | 0.0 | structural-cleanup |
118
+ | screenshot_report | 0.27 MB | 0.2 MB | quality | 0.09 MB | 66.4% | Yes | inf | 0.0 | structural-cleanup |
119
+ | text_vector_document | 0.00 MB | 7 MB | balanced | 0.00 MB | 12.2% | Yes | inf | 0.0 | structural-cleanup |
120
+ | scanned_pdf | 0.73 MB | 0.4 MB | balanced | 0.25 MB | 66.6% | Yes | inf | 0.0 | structural-cleanup |
121
+ | mixed_transparency | 1.75 MB | 1 MB | quality | 1.75 MB | -0.0% | No | inf | 0.0 | structural-cleanup |
122
+ | embedded_metadata | 0.12 MB | 7 MB | balanced | 0.12 MB | 0.1% | Yes | inf | 0.0 | structural-cleanup |
123
+ | repeated_images | 0.81 MB | 0.5 MB | balanced | 0.14 MB | 83.2% | Yes | inf | 0.0 | structural-cleanup |
124
+ | forms_annotations | 0.01 MB | 7 MB | quality | 0.01 MB | 3.9% | Yes | inf | 0.0 | structural-cleanup |
125
+ | encrypted_pdf | - | 7.0 MB | balanced | failed | - | - | - | - | Encrypted PDFs must be unlocked before optimization. |
126
+
127
+ The `quality` profile deliberately refuses to degrade `photo_brochure` and `mixed_transparency` below their targets, emitting a warning instead of shipping a blurry file.
128
+
129
+ See [docs/benchmarking.md](docs/benchmarking.md) before adding fixtures.
130
+
131
+ ## Visual QA
132
+
133
+ Render and compare two PDFs:
134
+
135
+ ```bash
136
+ pdf-email-render-compare original.pdf optimized.pdf --output-dir qa-renders
137
+ ```
138
+
139
+ This reports page-level pixel differences and can write original, optimized, and amplified diff PNGs for review.
140
+
141
+ ## Agent Usage
142
+
143
+ The repo includes [SKILL.md](SKILL.md) for agent runtimes that load local skills. The short version:
144
+
145
+ - Use `quality` when the user asks to preserve image fidelity.
146
+ - Use `balanced` for ordinary email optimization.
147
+ - Use `aggressive` only when visible quality loss is acceptable.
148
+ - Report size, target status, strategy, and warnings.
149
+ - Never overwrite the source PDF.
150
+
151
+ More examples are in [docs/agent-usage.md](docs/agent-usage.md).
152
+
153
+ ## Development
154
+
155
+ ```bash
156
+ python -m pip install -e ".[dev]"
157
+ pytest
158
+ pytest --cov
159
+ ruff check .
160
+ python -m build
161
+ ```
162
+
163
+ CI runs linting, tests, coverage, package build, and CLI smoke checks on Python 3.9-3.13.
164
+
165
+ ## Documentation
166
+
167
+ - [Installation](docs/installation.md)
168
+ - [Examples](docs/examples.md)
169
+ - [Benchmarking](docs/benchmarking.md)
170
+ - [Compatibility](docs/compatibility.md)
171
+ - [JSON output](docs/json-output.md)
172
+ - [Agent usage](docs/agent-usage.md)
173
+ - [Known limitations](docs/known-limitations.md)
174
+ - [Troubleshooting](docs/troubleshooting.md)
175
+
176
+ ## License
177
+
178
+ [MIT](LICENSE)
@@ -0,0 +1,7 @@
1
+ # Security Policy
2
+
3
+ PDF Email Optimizer runs locally and does not upload PDFs or collect telemetry.
4
+
5
+ Do not attach confidential PDFs to public issues. If you need to report a problem with sensitive material, create a synthetic or redacted fixture that reproduces the behavior.
6
+
7
+ Report security concerns privately through the repository's security advisory flow when available.
@@ -0,0 +1,81 @@
1
+ ---
2
+ name: pdf-email-optimizer
3
+ description: Shrink large PDFs to email-safe sizes while preserving visual quality, with safe defaults, JSON summaries, reports, audit mode, and render QA.
4
+ license: MIT
5
+ ---
6
+
7
+ # PDF Email Optimizer
8
+
9
+ ## Core Rules
10
+
11
+ 1. Never overwrite the source PDF. Write an optimized copy to a new path.
12
+ 2. Use `--quality` when the user mentions photos, images, screenshots, maps, visual fidelity, sharpness, or "do not degrade."
13
+ 3. Use `--balanced` for ordinary email optimization.
14
+ 4. Use `--aggressive` only when the user explicitly accepts visible quality loss or asks for the smallest possible file.
15
+ 5. Run render QA when available for quality-sensitive work.
16
+ 6. Report original size, final size, target status, profile, strategy, and warnings.
17
+ 7. If quality mode misses the target, say clearly that the target conflicts with image fidelity.
18
+
19
+ ## Commands
20
+
21
+ ```bash
22
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7
23
+ pdf-email-optimizer input.pdf output_email.pdf --target 7mb --quality
24
+ pdf-email-optimizer input.pdf output_email.pdf --range 5-7mb --quality
25
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 5 --preferred-mb 5 --balanced
26
+ pdf-email-optimizer input.pdf output_small.pdf --target-mb 5 --aggressive
27
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7 --no-image-recompress
28
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7 --json
29
+ pdf-email-optimizer input.pdf output_email.pdf --target-mb 7 --report report.md
30
+ pdf-email-optimizer input.pdf --audit --json
31
+ ```
32
+
33
+ Backward-compatible script form:
34
+
35
+ ```bash
36
+ python scripts/optimize_pdf_email.py input.pdf output_email.pdf --target-mb 7
37
+ ```
38
+
39
+ ## Size Targets
40
+
41
+ - "Under 7 MB" or "max 7 MB": use `--target-mb 7` or `--target 7mb`.
42
+ - "Between 5 and 7 MB": use `--range 5-7mb`.
43
+ - "Make it 5 MB": use `--target-mb 5 --preferred-mb 5`.
44
+ - If cleanup alone makes the file smaller than a requested range, keep it smaller. Do not pad files.
45
+
46
+ ## Audit First
47
+
48
+ Use audit mode when the user asks why a PDF is large or when the right strategy is unclear:
49
+
50
+ ```bash
51
+ pdf-email-optimizer input.pdf --audit --json
52
+ ```
53
+
54
+ Audit reports file size, page count, image count, private payload indicators, forms, annotations, transparency, masks, and recommended profile.
55
+
56
+ ## Visual QA
57
+
58
+ When available:
59
+
60
+ ```bash
61
+ pdf-email-render-compare original.pdf optimized.pdf --output-dir qa-renders
62
+ ```
63
+
64
+ Check page count, missing layers, clipped art, changed colors, broken transparency, and softened important images. Automated render QA is a signal, not a human proof.
65
+
66
+ ## Quality Conflict Response
67
+
68
+ Use this pattern when `quality` cannot hit the requested target:
69
+
70
+ ```text
71
+ Target not met. The requested 5 MB target conflicts with the selected quality profile. Output is 8.4 MB.
72
+ To go smaller, rerun with --profile aggressive, split the PDF, remove pages, or accept lower image fidelity.
73
+ ```
74
+
75
+ ## Failure Handling
76
+
77
+ - Encrypted PDFs must be unlocked first.
78
+ - Existing outputs require `--force`.
79
+ - Transparent images may be skipped unless `--flatten-alpha` is appropriate.
80
+ - Ghostscript is optional; if missing, report the warning and keep the best non-Ghostscript result.
81
+ - For high-stakes documents, ask the user to spot-check the final PDF locally.
@@ -0,0 +1,7 @@
1
+ interface:
2
+ display_name: "PDF Email Optimizer"
3
+ short_description: "Hit target PDF filesizes with maximum visual quality"
4
+ icon_small: "./assets/icon.png"
5
+ icon_large: "./assets/logo.png"
6
+ brand_color: "#B0141B"
7
+ default_prompt: "Use $pdf-email-optimizer to reduce this PDF to an email-safe file size while preserving visual quality."