epub2pdf-cli 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. epub2pdf_cli-0.3.0/LICENSE +21 -0
  2. epub2pdf_cli-0.3.0/PKG-INFO +443 -0
  3. epub2pdf_cli-0.3.0/README.md +404 -0
  4. epub2pdf_cli-0.3.0/pyproject.toml +100 -0
  5. epub2pdf_cli-0.3.0/setup.cfg +4 -0
  6. epub2pdf_cli-0.3.0/src/epub2pdf_cli/__init__.py +5 -0
  7. epub2pdf_cli-0.3.0/src/epub2pdf_cli/__main__.py +4 -0
  8. epub2pdf_cli-0.3.0/src/epub2pdf_cli/api.py +160 -0
  9. epub2pdf_cli-0.3.0/src/epub2pdf_cli/cli.py +223 -0
  10. epub2pdf_cli-0.3.0/src/epub2pdf_cli/config.py +109 -0
  11. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/__init__.py +3 -0
  12. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/chapters.py +81 -0
  13. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/container.py +25 -0
  14. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/href.py +24 -0
  15. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/opf.py +159 -0
  16. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/parser.py +64 -0
  17. epub2pdf_cli-0.3.0/src/epub2pdf_cli/epub/toc.py +101 -0
  18. epub2pdf_cli-0.3.0/src/epub2pdf_cli/errors.py +27 -0
  19. epub2pdf_cli-0.3.0/src/epub2pdf_cli/html/__init__.py +3 -0
  20. epub2pdf_cli-0.3.0/src/epub2pdf_cli/html/builder.py +190 -0
  21. epub2pdf_cli-0.3.0/src/epub2pdf_cli/html/css.py +49 -0
  22. epub2pdf_cli-0.3.0/src/epub2pdf_cli/html/links.py +144 -0
  23. epub2pdf_cli-0.3.0/src/epub2pdf_cli/html/template.py +92 -0
  24. epub2pdf_cli-0.3.0/src/epub2pdf_cli/io_utils.py +24 -0
  25. epub2pdf_cli-0.3.0/src/epub2pdf_cli/markdown.py +97 -0
  26. epub2pdf_cli-0.3.0/src/epub2pdf_cli/mcp_server.py +189 -0
  27. epub2pdf_cli-0.3.0/src/epub2pdf_cli/models.py +116 -0
  28. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/__init__.py +5 -0
  29. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extract.py +79 -0
  30. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/__init__.py +0 -0
  31. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/base.py +23 -0
  32. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/docling_extractor.py +139 -0
  33. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/opendataloader_extractor.py +86 -0
  34. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/pdfplumber_extractor.py +150 -0
  35. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/extractors/pypdfium2_extractor.py +151 -0
  36. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/text.py +45 -0
  37. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pdf/validate.py +37 -0
  38. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pipeline/__init__.py +6 -0
  39. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pipeline/batch.py +84 -0
  40. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pipeline/convert.py +122 -0
  41. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pipeline/extract.py +64 -0
  42. epub2pdf_cli-0.3.0/src/epub2pdf_cli/pipeline/inspect.py +15 -0
  43. epub2pdf_cli-0.3.0/src/epub2pdf_cli/render/__init__.py +17 -0
  44. epub2pdf_cli-0.3.0/src/epub2pdf_cli/render/options.py +19 -0
  45. epub2pdf_cli-0.3.0/src/epub2pdf_cli/render/playwright.py +91 -0
  46. epub2pdf_cli-0.3.0/src/epub2pdf_cli/render/protocol.py +13 -0
  47. epub2pdf_cli-0.3.0/src/epub2pdf_cli/render/weasyprint.py +28 -0
  48. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/PKG-INFO +443 -0
  49. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/SOURCES.txt +52 -0
  50. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/dependency_links.txt +1 -0
  51. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/entry_points.txt +3 -0
  52. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/requires.txt +36 -0
  53. epub2pdf_cli-0.3.0/src/epub2pdf_cli.egg-info/top_level.txt +1 -0
  54. epub2pdf_cli-0.3.0/tests/test_install_agent_skills.py +42 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 epub2pdf contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,443 @@
1
+ Metadata-Version: 2.4
2
+ Name: epub2pdf-cli
3
+ Version: 0.3.0
4
+ Summary: Local CLI to convert EPUB files into machine-readable PDFs.
5
+ Author: min9lin9
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: beautifulsoup4>=4.12
11
+ Requires-Dist: lxml>=4.9
12
+ Requires-Dist: pypdf>=5.4.0
13
+ Requires-Dist: pypdfium2>=4.30.0
14
+ Provides-Extra: weasyprint
15
+ Requires-Dist: weasyprint>=62.0; extra == "weasyprint"
16
+ Provides-Extra: playwright
17
+ Requires-Dist: playwright>=1.58.0; extra == "playwright"
18
+ Provides-Extra: docling
19
+ Requires-Dist: docling>=2.0.0; extra == "docling"
20
+ Provides-Extra: pdfplumber
21
+ Requires-Dist: pdfplumber>=0.11.0; extra == "pdfplumber"
22
+ Provides-Extra: legacy-pdf
23
+ Requires-Dist: opendataloader-pdf>=2.4.0; extra == "legacy-pdf"
24
+ Provides-Extra: mcp
25
+ Requires-Dist: mcp>=1.0.0; extra == "mcp"
26
+ Provides-Extra: dev
27
+ Requires-Dist: ruff>=0.6.0; extra == "dev"
28
+ Requires-Dist: mypy>=1.11.0; extra == "dev"
29
+ Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
30
+ Requires-Dist: reportlab>=4.0.0; extra == "dev"
31
+ Requires-Dist: mcp>=1.0.0; extra == "dev"
32
+ Provides-Extra: all
33
+ Requires-Dist: weasyprint>=62.0; extra == "all"
34
+ Requires-Dist: playwright>=1.58.0; extra == "all"
35
+ Requires-Dist: docling>=2.0.0; extra == "all"
36
+ Requires-Dist: pdfplumber>=0.11.0; extra == "all"
37
+ Requires-Dist: mcp>=1.0.0; extra == "all"
38
+ Dynamic: license-file
39
+
40
+ # epub2pdf
41
+
42
+ [![PyPI](https://img.shields.io/pypi/v/epub2pdf-cli.svg)](https://pypi.org/project/epub2pdf-cli/)
43
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
44
+ [![CI](https://github.com/min9lin9/epub2pdf/actions/workflows/ci.yml/badge.svg)](https://github.com/min9lin9/epub2pdf/actions/workflows/ci.yml)
45
+ [![Docker](https://github.com/min9lin9/epub2pdf/actions/workflows/docker.yml/badge.svg)](https://github.com/min9lin9/epub2pdf/actions/workflows/docker.yml)
46
+
47
+ Local CLI for turning EPUB files into searchable PDFs, plus optional AI-readable extraction from existing PDFs.
48
+
49
+ The project is intentionally CLI-first. It does not run a long-lived server for the default workflows, and Codex/OpenCode integrations are thin wrappers around the same installed `epub2pdf` command.
50
+
51
+ ## What It Does
52
+
53
+ - Converts `.epub` files into selectable/searchable PDFs.
54
+ - Writes optional EPUB sidecars as structured JSON, normalized HTML, and Markdown.
55
+ - Inspects EPUB metadata, manifest, spine, TOC, and chapter order without rendering.
56
+ - Extracts Markdown, JSON, text, or HTML from existing PDFs through native backends (no Java required by default).
57
+ - Installs global Codex and OpenCode skills that call this CLI instead of duplicating conversion logic.
58
+
59
+ ## Requirements
60
+
61
+ - Python 3.10+
62
+ - WeasyPrint system libraries for the default renderer (Pango, Cairo, GDK-PixBuf)
63
+ - Playwright Chromium only when using `--engine playwright`
64
+ - Java 11+ only when using `--engine opendataloader` for `pdf-extract`
65
+ - macOS/Linux shell environment
66
+
67
+ Check runtime dependencies:
68
+
69
+ ```bash
70
+ python3 --version
71
+ ```
72
+
73
+ ## Install
74
+
75
+ [![PyPI](https://img.shields.io/pypi/v/epub2pdf-cli.svg)](https://pypi.org/project/epub2pdf-cli/)
76
+
77
+ From PyPI (recommended):
78
+
79
+ ```bash
80
+ python3 -m pip install epub2pdf-cli
81
+ ```
82
+
83
+ From source:
84
+
85
+ ```bash
86
+ python3 -m pip install -e .
87
+ ```
88
+
89
+ Install the optional Playwright backend for Chromium-based rendering:
90
+
91
+ ```bash
92
+ python3 -m pip install epub2pdf-cli[playwright]
93
+ playwright install chromium
94
+ ```
95
+
96
+ Install enhanced PDF extraction backends:
97
+
98
+ ```bash
99
+ # Best structured extraction (tables, reading order, OCR)
100
+ python3 -m pip install epub2pdf-cli[docling]
101
+
102
+ # Table specialist
103
+ python3 -m pip install epub2pdf-cli[pdfplumber]
104
+
105
+ # Legacy Java-based extractor
106
+ python3 -m pip install epub2pdf-cli[legacy-pdf]
107
+ ```
108
+
109
+ Install the MCP server for Claude Desktop:
110
+
111
+ ```bash
112
+ python3 -m pip install epub2pdf-cli[mcp]
113
+ ```
114
+
115
+ Or use the Docker image (no local Python dependencies):
116
+
117
+ ```bash
118
+ docker run --rm -v "$PWD:/workspace" ghcr.io/min9lin9/epub2pdf \
119
+ convert book.epub --no-validate
120
+ ```
121
+
122
+ Install Codex/OpenCode skill wrappers globally:
123
+
124
+ ```bash
125
+ python3 scripts/install_agent_skills.py
126
+ ```
127
+
128
+ The installer copies templates into:
129
+
130
+ ```text
131
+ ~/.codex/skills/epub2pdf/
132
+ ~/.config/opencode/skills/epub2pdf/
133
+ ```
134
+
135
+ ## Quick Start
136
+
137
+ Convert an EPUB to PDF:
138
+
139
+ ```bash
140
+ epub2pdf convert book.epub
141
+ ```
142
+
143
+ Convert with stable output paths and sidecars:
144
+
145
+ ```bash
146
+ epub2pdf convert book.epub \
147
+ --output book.pdf \
148
+ --sidecar-json book.json \
149
+ --sidecar-html book.html \
150
+ --sidecar-markdown book.md
151
+ ```
152
+
153
+ Skip PDF validation to speed up batch pipelines:
154
+
155
+ ```bash
156
+ epub2pdf convert book.epub --no-validate
157
+ ```
158
+
159
+ Convert multiple EPUBs in parallel:
160
+
161
+ ```bash
162
+ epub2pdf batch *.epub \
163
+ --output-dir out/ \
164
+ --workers 4 \
165
+ --sidecar-json \
166
+ --force
167
+ ```
168
+
169
+ Inspect an EPUB before rendering:
170
+
171
+ ```bash
172
+ epub2pdf inspect book.epub --json book.inspect.json
173
+ ```
174
+
175
+ Extract Markdown and JSON from an existing PDF:
176
+
177
+ ```bash
178
+ epub2pdf pdf-extract book.pdf \
179
+ --output-dir book_extracted \
180
+ --format markdown,json
181
+ ```
182
+
183
+ Use a specific extraction backend:
184
+
185
+ ```bash
186
+ epub2pdf pdf-extract book.pdf \
187
+ --engine docling \
188
+ --output-dir book_extracted \
189
+ --format markdown,json
190
+ ```
191
+
192
+ ## CLI Reference
193
+
194
+ ### `convert`
195
+
196
+ ```bash
197
+ epub2pdf convert INPUT.epub [options]
198
+ ```
199
+
200
+ Common options:
201
+
202
+ - `-o, --output PATH`: output PDF path. Defaults to the input basename with `.pdf`.
203
+ - `--engine playwright|weasyprint`: rendering backend. Default: `weasyprint`.
204
+ - `--sidecar-json PATH`: write structured conversion metadata.
205
+ - `--sidecar-html PATH`: write the normalized merged HTML used for rendering.
206
+ - `--sidecar-markdown PATH`: write a Markdown version of the EPUB.
207
+ - `--page-size A4|Letter`: output page size. Default: `A4`.
208
+ - `--margin-mm N`: page margin in millimeters. Default: `12`.
209
+ - `--cover first|none`: include or skip the detected cover image. Default: `first`.
210
+ - `--no-validate`: skip PDF validation after rendering.
211
+ - `--force`: overwrite an existing output PDF.
212
+
213
+ On success, stdout prints only the PDF path.
214
+
215
+ ### `batch`
216
+
217
+ ```bash
218
+ epub2pdf batch INPUT1.epub INPUT2.epub ... [options]
219
+ ```
220
+
221
+ Common options:
222
+
223
+ - `-o, --output-dir DIR`: required output directory for PDFs and sidecars.
224
+ - `-j, --workers N`: number of parallel worker processes. Default: `1`.
225
+ - `--engine playwright|weasyprint`: rendering backend. Default: `weasyprint`.
226
+ - `--sidecar-json`: write a JSON report next to each PDF.
227
+ - `--sidecar-html`: write normalized merged HTML next to each PDF.
228
+ - `--sidecar-markdown`: write Markdown next to each PDF.
229
+ - `--no-validate`: skip PDF validation after rendering.
230
+ - `--force`: overwrite existing outputs.
231
+
232
+ On success, stdout prints one output PDF path per line.
233
+
234
+ ### `inspect`
235
+
236
+ ```bash
237
+ epub2pdf inspect INPUT.epub [--json PATH]
238
+ ```
239
+
240
+ Use this when an agent or script needs EPUB structure before rendering. Without `--json`, the report is written to stdout.
241
+
242
+ ### `pdf-extract`
243
+
244
+ ```bash
245
+ epub2pdf pdf-extract INPUT.pdf [options]
246
+ ```
247
+
248
+ Common options:
249
+
250
+ - `-o, --output-dir DIR`: output directory. Defaults to `<pdf-stem>_extracted`.
251
+ - `--engine pypdfium2|docling|pdfplumber|opendataloader`: extraction backend. Default: `pypdfium2`.
252
+ - `--format LIST`: comma-separated formats. Default: `markdown,json`.
253
+ - `--pages SPEC`: page selection, for example `1,3,5-7`.
254
+ - `--password`: password for encrypted PDF files.
255
+ - `--use-struct-tree`: use tagged PDF structure when available.
256
+ - `--sanitize`: redact common sensitive data patterns.
257
+ - `--keep-line-breaks`: preserve original line breaks.
258
+ - `--include-header-footer`: include page headers and footers.
259
+ - `--detect-strikethrough`: detect strikethrough text in Markdown/HTML.
260
+ - `--table-method default|cluster`: table detection mode.
261
+ - `--reading-order off|xycut`: reading order algorithm. Default: `xycut`.
262
+ - `--image-output off|embedded|external`: extracted image handling. Default: `external`.
263
+ - `--image-dir`: directory for extracted images.
264
+ - `--threads`: worker thread count for native extraction.
265
+ - `--sidecar-json PATH`: write structured extraction report JSON to this path.
266
+ - `--force`: overwrite existing extraction outputs.
267
+
268
+ On success, stdout prints one created output path per line.
269
+
270
+ ## Extraction Backends
271
+
272
+ | Engine | Speed | Quality | Best for | Dependencies |
273
+ |---|---|---|---|---|
274
+ | `pypdfium2` | Fastest | High text fidelity | Digital text PDFs | Bundled with base install |
275
+ | `docling` | Moderate | Best structure/tables | Complex layouts, tables, OCR | `pip install -e '.[docling]'` |
276
+ | `pdfplumber` | Slower | Excellent tables | Table-heavy financial docs | `pip install -e '.[pdfplumber]'` |
277
+ | `opendataloader` | Moderate | Highest overall accuracy | Legacy high-quality extraction | Java 11+ + `pip install -e '.[legacy-pdf]'` |
278
+
279
+ ## AI-Readable Outputs
280
+
281
+ `convert --sidecar-json` writes a stable report with:
282
+
283
+ - `source`: input path, hash, conversion timestamp
284
+ - `metadata`: title, language, creators, identifiers, publisher, dates
285
+ - `manifest`: EPUB manifest items and media types
286
+ - `spine`: ordered reading sequence
287
+ - `toc`: table of contents entries
288
+ - `chapters`: chapter ids, hrefs, titles, text statistics, anchors
289
+ - `assets`: rewritten images, CSS assets, and embedded resources
290
+ - `warnings`: unsupported media, missing assets, image-heavy chapters
291
+ - `output`: PDF path, backend, page size, validation summary
292
+
293
+ `convert --sidecar-html` writes the normalized merged HTML that was rendered into the PDF.
294
+
295
+ `convert --sidecar-markdown` writes a Markdown version of the EPUB suitable for RAG ingestion.
296
+
297
+ `pdf-extract` writes Markdown/JSON/text/HTML files from an existing PDF. This is the path to use when the source is already a PDF rather than an EPUB.
298
+
299
+ ## Programmatic API
300
+
301
+ Use `epub2pdf_cli.api.Epub2Pdf` to convert from Python. When the Playwright
302
+ engine is selected, use the client as a context manager to keep one browser
303
+ process alive across multiple conversions.
304
+
305
+ ```python
306
+ from epub2pdf_cli.api import Epub2Pdf
307
+
308
+ # WeasyPrint (no context manager required)
309
+ client = Epub2Pdf(engine="weasyprint")
310
+ client.convert("book.epub", "book.pdf")
311
+
312
+ # Playwright with browser pooling
313
+ with Epub2Pdf(engine="playwright") as client:
314
+ client.convert("a.epub", "a.pdf")
315
+ client.convert("b.epub", "b.pdf")
316
+
317
+ # Parallel batch conversion
318
+ with Epub2Pdf(engine="playwright") as client:
319
+ reports = client.batch_convert(
320
+ [("a.epub", "a.pdf"), ("b.epub", "b.pdf")],
321
+ max_workers=4,
322
+ )
323
+ ```
324
+
325
+ ## MCP (Claude Desktop / Claude Code)
326
+
327
+ Install the optional MCP dependency:
328
+
329
+ ```bash
330
+ python3 -m pip install -e '.[mcp]'
331
+ ```
332
+
333
+ Add the server to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS or `%APPDATA%/Claude/claude_desktop_config.json` on Windows):
334
+
335
+ ```json
336
+ {
337
+ "mcpServers": {
338
+ "epub2pdf": {
339
+ "command": "epub2pdf-mcp"
340
+ }
341
+ }
342
+ }
343
+ ```
344
+
345
+ If `epub2pdf-mcp` is not on your PATH, use the absolute path to your Python interpreter:
346
+
347
+ ```json
348
+ {
349
+ "mcpServers": {
350
+ "epub2pdf": {
351
+ "command": "/path/to/python3",
352
+ "args": ["-m", "epub2pdf_cli.mcp_server"]
353
+ }
354
+ }
355
+ }
356
+ ```
357
+
358
+ The MCP server uses low-resource defaults (WeasyPrint, no PDF validation, no long-lived browser) and exposes these tools:
359
+
360
+ - `convert_epub`
361
+ - `batch_convert`
362
+ - `inspect_epub`
363
+ - `extract_pdf`
364
+
365
+ ## Codex and OpenCode Usage
366
+
367
+ After running:
368
+
369
+ ```bash
370
+ python3 scripts/install_agent_skills.py
371
+ ```
372
+
373
+ Codex and OpenCode can use the global `epub2pdf` skill.
374
+
375
+ Example prompts:
376
+
377
+ - `Use $epub2pdf to inspect this EPUB and convert it to a searchable PDF with JSON and HTML sidecars.`
378
+ - `Use $epub2pdf to extract Markdown and JSON from this PDF for RAG ingestion.`
379
+ - `Use $epub2pdf to inspect the EPUB TOC before rendering the PDF.`
380
+
381
+ The skill should call `epub2pdf inspect`, `epub2pdf convert`, or `epub2pdf pdf-extract` directly.
382
+
383
+ ## Guarantees
384
+
385
+ - Normal XHTML-based EPUB text is rendered as a real PDF text layer, not as raster-only pages.
386
+ - Rendered PDFs are validated for page count and extractable text when textual source chapters exist.
387
+ - EPUB sidecars are produced from the source EPUB before PDF rendering, so AI workflows do not need to reverse-engineer the PDF.
388
+ - PDF extraction is local and deterministic by default.
389
+
390
+ ## Limitations
391
+
392
+ - Fixed-layout comics and image-only EPUBs are rendered, but not OCR-processed.
393
+ - Scanned PDFs that require OCR need a separate workflow; the local `pdf-extract` command does not start the hybrid OCR backend unless Docling is used.
394
+ - Complex EPUB CSS may be simplified during normalization.
395
+ - `WeasyPrint` is the default backend and may require native system libraries (Pango, Cairo, GDK-PixBuf).
396
+ - `pdf-extract` no longer requires Java by default; Java is only needed for the legacy `opendataloader` engine.
397
+
398
+ ## Development
399
+
400
+ Run tests:
401
+
402
+ ```bash
403
+ PYTHONPATH=src python3 -m pytest -q
404
+ ```
405
+
406
+ The source is organized into layered packages:
407
+
408
+ - `epub2pdf_cli/cli.py` — argument parsing and command dispatch only.
409
+ - `epub2pdf_cli/config.py` — validated configuration objects.
410
+ - `epub2pdf_cli/epub/` — EPUB container, OPF, TOC, and chapter parsing.
411
+ - `epub2pdf_cli/html/` — HTML normalization, asset rewriting, CSS rewriting, and template generation.
412
+ - `epub2pdf_cli/render/` — PDF rendering engines behind a `Renderer` protocol.
413
+ - `epub2pdf_cli/pdf/` — PDF validation and extraction adapters.
414
+ - `epub2pdf_cli/pipeline/` — high-level `inspect`, `convert`, and `extract` workflows.
415
+
416
+ Validate the Codex skill template:
417
+
418
+ ```bash
419
+ python3 /Users/burt/.codex/skills/.system/skill-creator/scripts/quick_validate.py integrations/codex/epub2pdf
420
+ ```
421
+
422
+ Refresh global skill wrappers after editing integration templates:
423
+
424
+ ```bash
425
+ python3 scripts/install_agent_skills.py
426
+ ```
427
+
428
+ ## Contributing
429
+
430
+ We welcome bug reports, feature requests, documentation fixes, and pull requests.
431
+
432
+ - Read [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and PR standards.
433
+ - Read [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) before participating.
434
+ - Report security issues privately via [SECURITY.md](SECURITY.md).
435
+ - See [ROADMAP.md](ROADMAP.md) for planned work and [GOVERNANCE.md](GOVERNANCE.md) for project roles.
436
+
437
+ ## Repository
438
+
439
+ Public GitHub repository:
440
+
441
+ ```text
442
+ https://github.com/min9lin9/epub2pdf
443
+ ```