compile-pdf-rewrite 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compile_pdf_rewrite-0.1.0/.coderabbit.yaml +29 -0
- compile_pdf_rewrite-0.1.0/.cursor/rules/baseline.mdc +21 -0
- compile_pdf_rewrite-0.1.0/.cursorrules +16 -0
- compile_pdf_rewrite-0.1.0/.github/dependabot.yml +11 -0
- compile_pdf_rewrite-0.1.0/.github/workflows/publish-pypi.yml +39 -0
- compile_pdf_rewrite-0.1.0/CLAUDE.md +9 -0
- compile_pdf_rewrite-0.1.0/PKG-INFO +52 -0
- compile_pdf_rewrite-0.1.0/README.md +19 -0
- compile_pdf_rewrite-0.1.0/pyproject.toml +70 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/__init__.py +28 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/api.py +168 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/cli.py +91 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/engine.py +443 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/plan_schema.py +185 -0
- compile_pdf_rewrite-0.1.0/src/compile_pdf_rewrite/verify.py +267 -0
- compile_pdf_rewrite-0.1.0/tests/__init__.py +0 -0
- compile_pdf_rewrite-0.1.0/tests/conftest.py +191 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_api.py +110 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_cli.py +81 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_determinism.py +61 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_engine.py +306 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_plan_schema.py +115 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_surface.py +22 -0
- compile_pdf_rewrite-0.1.0/tests/test_rewrite_verify.py +106 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
language: en-US
|
|
2
|
+
reviews:
|
|
3
|
+
profile: chill
|
|
4
|
+
request_changes_workflow: false
|
|
5
|
+
high_level_summary: true
|
|
6
|
+
poem: false
|
|
7
|
+
review_status: true
|
|
8
|
+
collapse_walkthrough: true
|
|
9
|
+
auto_review:
|
|
10
|
+
enabled: true
|
|
11
|
+
drafts: false
|
|
12
|
+
path_filters:
|
|
13
|
+
- "!dist/**"
|
|
14
|
+
- "!build/**"
|
|
15
|
+
- "!.next/**"
|
|
16
|
+
- "!node_modules/**"
|
|
17
|
+
- "!**/__pycache__/**"
|
|
18
|
+
- "!**/*.min.js"
|
|
19
|
+
- "!**/*.lock"
|
|
20
|
+
- "!**/package-lock.json"
|
|
21
|
+
- "!**/uv.lock"
|
|
22
|
+
- "!**/poetry.lock"
|
|
23
|
+
path_instructions:
|
|
24
|
+
- path: "**/*.{ts,tsx,js,jsx}"
|
|
25
|
+
instructions: "Flag unhandled promises, missing null checks, hardcoded secrets/env vars, and unsafe `any` usage. Skip stylistic preferences — Prettier handles those."
|
|
26
|
+
- path: "**/*.py"
|
|
27
|
+
instructions: "Flag unhandled exceptions, missing type hints on public functions, hardcoded secrets, and SQL injection risks. Skip style — Ruff handles those."
|
|
28
|
+
chat:
|
|
29
|
+
auto_reply: true
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: PrintWithSynergy baseline rules
|
|
3
|
+
alwaysApply: true
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
You are working in this repository, part of the printwithsynergy organization.
|
|
7
|
+
|
|
8
|
+
# Conventions
|
|
9
|
+
- Conventional commits (feat:, fix:, chore:, refactor:, docs:, test:)
|
|
10
|
+
- Never commit secrets. Use .env (gitignored) for local config.
|
|
11
|
+
- Prefer pure functions; isolate side effects.
|
|
12
|
+
- Write tests alongside code changes when behavior changes.
|
|
13
|
+
|
|
14
|
+
# Before modifying code
|
|
15
|
+
- Use the code-review-graph MCP tools (get_impact_radius_tool, get_blast_radius) to check downstream impact.
|
|
16
|
+
- For unfamiliar symbols, use sverklo_lookup or sverklo_refs first.
|
|
17
|
+
- Never assume context. If the codebase has CLAUDE.md or AGENTS.md, read it first.
|
|
18
|
+
|
|
19
|
+
# Style
|
|
20
|
+
- Match existing code style. Do not reformat unrelated lines.
|
|
21
|
+
- No "helper" refactors mixed with feature changes.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
You are working in the `compile-pdf-rewrite` repository, part of the printwithsynergy organization.
|
|
2
|
+
|
|
3
|
+
# Conventions
|
|
4
|
+
- Conventional commits (feat:, fix:, chore:, refactor:, docs:, test:)
|
|
5
|
+
- Never commit secrets. Use .env (gitignored) for local config.
|
|
6
|
+
- Prefer pure functions; isolate side effects.
|
|
7
|
+
- Write tests alongside code changes when behavior changes.
|
|
8
|
+
|
|
9
|
+
# Before modifying code
|
|
10
|
+
- Use the code-review-graph MCP tools (get_impact_radius_tool, get_blast_radius) to check downstream impact.
|
|
11
|
+
- For unfamiliar symbols, use sverklo_lookup or sverklo_refs first.
|
|
12
|
+
- Never assume context. If the codebase has CLAUDE.md or AGENTS.md, read it first.
|
|
13
|
+
|
|
14
|
+
# Style
|
|
15
|
+
- Match existing code style. Do not reformat unrelated lines.
|
|
16
|
+
- No "helper" refactors mixed with feature changes.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
name: publish-pypi
|
|
2
|
+
|
|
3
|
+
# Publishes compile-pdf-rewrite to PyPI when a v*.*.* tag is pushed.
|
|
4
|
+
#
|
|
5
|
+
# Auth: PyPI API token stored in PYPI_TOKEN repository secret.
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
tags:
|
|
10
|
+
- "v*.*.*"
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
name: build sdist + wheel
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
|
|
18
|
+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.12"
|
|
21
|
+
- run: pip install --upgrade pip build
|
|
22
|
+
- run: python -m build --sdist --wheel --outdir dist/
|
|
23
|
+
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
|
24
|
+
with:
|
|
25
|
+
name: dist
|
|
26
|
+
path: dist/*
|
|
27
|
+
|
|
28
|
+
publish:
|
|
29
|
+
name: publish to PyPI
|
|
30
|
+
needs: build
|
|
31
|
+
runs-on: ubuntu-latest
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
|
34
|
+
with:
|
|
35
|
+
name: dist
|
|
36
|
+
path: dist/
|
|
37
|
+
- uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1
|
|
38
|
+
with:
|
|
39
|
+
password: ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# compile-pdf-rewrite
|
|
2
|
+
|
|
3
|
+
compile-pdf-rewrite
|
|
4
|
+
|
|
5
|
+
## Code Review & Blast-Radius Protocol
|
|
6
|
+
- Before edits: run code-review-graph impact tools on changed symbols
|
|
7
|
+
- After edits: ensure tests pass before commit
|
|
8
|
+
- CodeRabbit reviews PRs automatically; Cursor BugBot provides second opinion
|
|
9
|
+
- Never disable the code-review-graph Launch Agent
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: compile-pdf-rewrite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CompilePDF rewrite producer.
|
|
5
|
+
Project-URL: Homepage, https://compilepdf.com
|
|
6
|
+
Project-URL: Repository, https://github.com/printwithsynergy/compile-pdf-rewrite
|
|
7
|
+
Project-URL: Issues, https://github.com/printwithsynergy/compile-pdf-rewrite/issues
|
|
8
|
+
Author-email: Print With Synergy <iam@quincy.codes>
|
|
9
|
+
License: AGPL-3.0-or-later
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Multimedia :: Graphics
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.12
|
|
18
|
+
Requires-Dist: click>=8.1
|
|
19
|
+
Requires-Dist: codex-pdf<2.0,>=1.15.0
|
|
20
|
+
Requires-Dist: compile-pdf-core<1.0,>=0.1.0
|
|
21
|
+
Requires-Dist: fastapi>=0.110
|
|
22
|
+
Requires-Dist: pikepdf>=8.13
|
|
23
|
+
Requires-Dist: pillow>=10.2
|
|
24
|
+
Requires-Dist: pydantic>=2.6
|
|
25
|
+
Requires-Dist: structlog>=24.1
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: mypy>=1.9; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# compile-pdf-rewrite
|
|
35
|
+
|
|
36
|
+
Object-tree mutations for CompilePDF: OCG, metadata, colour-space, hygiene, lifecycle.
|
|
37
|
+
|
|
38
|
+
Fifteen mutations across structural, hygiene, and lifecycle categories. OCG flips, page lifecycle ops, page-box patches, metadata set/strip, colour-space swap, JavaScript strip, PDF/X pin. Three-layer verifier: schema, determinism, nothing-else-touched.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
uv pip install compile-pdf-rewrite
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Position in the stack
|
|
47
|
+
|
|
48
|
+
One of four [CompilePDF](https://compilepdf.com) producers (trap, impose, marks, rewrite). Each lives in its own repo and PyPI package so you install only what you need. Producers depend on `compile-pdf-core`, never on each other.
|
|
49
|
+
|
|
50
|
+
- Repo: https://github.com/printwithsynergy/compile-pdf-rewrite
|
|
51
|
+
- Deployment host: https://github.com/printwithsynergy/compile-pdf
|
|
52
|
+
- License: AGPL-3.0-or-later
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# compile-pdf-rewrite
|
|
2
|
+
|
|
3
|
+
Object-tree mutations for CompilePDF: OCG, metadata, colour-space, hygiene, lifecycle.
|
|
4
|
+
|
|
5
|
+
Fifteen mutations across structural, hygiene, and lifecycle categories. OCG flips, page lifecycle ops, page-box patches, metadata set/strip, colour-space swap, JavaScript strip, PDF/X pin. Three-layer verifier: schema, determinism, nothing-else-touched.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv pip install compile-pdf-rewrite
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Position in the stack
|
|
14
|
+
|
|
15
|
+
One of four [CompilePDF](https://compilepdf.com) producers (trap, impose, marks, rewrite). Each lives in its own repo and PyPI package so you install only what you need. Producers depend on `compile-pdf-core`, never on each other.
|
|
16
|
+
|
|
17
|
+
- Repo: https://github.com/printwithsynergy/compile-pdf-rewrite
|
|
18
|
+
- Deployment host: https://github.com/printwithsynergy/compile-pdf
|
|
19
|
+
- License: AGPL-3.0-or-later
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "compile-pdf-rewrite"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "CompilePDF rewrite producer."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "AGPL-3.0-or-later" }
|
|
7
|
+
authors = [{ name = "Print With Synergy", email = "iam@quincy.codes" }]
|
|
8
|
+
requires-python = ">=3.12"
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 3 - Alpha",
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Topic :: Multimedia :: Graphics",
|
|
16
|
+
"Topic :: Software Development :: Libraries",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
dependencies = [
|
|
20
|
+
"compile-pdf-core>=0.1.0,<1.0",
|
|
21
|
+
"codex-pdf>=1.15.0,<2.0",
|
|
22
|
+
"fastapi>=0.110",
|
|
23
|
+
"pikepdf>=8.13",
|
|
24
|
+
"pydantic>=2.6",
|
|
25
|
+
"click>=8.1",
|
|
26
|
+
"structlog>=24.1",
|
|
27
|
+
"Pillow>=10.2",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"pytest>=8.0",
|
|
33
|
+
"pytest-asyncio>=0.23",
|
|
34
|
+
"pytest-cov>=4.1",
|
|
35
|
+
"ruff>=0.4",
|
|
36
|
+
"mypy>=1.9",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
compile-pdf-rewrite = "compile_pdf_rewrite.cli:main"
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://compilepdf.com"
|
|
44
|
+
Repository = "https://github.com/printwithsynergy/compile-pdf-rewrite"
|
|
45
|
+
Issues = "https://github.com/printwithsynergy/compile-pdf-rewrite/issues"
|
|
46
|
+
|
|
47
|
+
[build-system]
|
|
48
|
+
requires = ["hatchling"]
|
|
49
|
+
build-backend = "hatchling.build"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/compile_pdf_rewrite"]
|
|
53
|
+
|
|
54
|
+
[tool.ruff]
|
|
55
|
+
line-length = 100
|
|
56
|
+
target-version = "py312"
|
|
57
|
+
|
|
58
|
+
[tool.ruff.lint]
|
|
59
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "C4", "RET"]
|
|
60
|
+
ignore = ["E501"]
|
|
61
|
+
|
|
62
|
+
[tool.pytest.ini_options]
|
|
63
|
+
testpaths = ["tests"]
|
|
64
|
+
python_files = ["test_*.py"]
|
|
65
|
+
addopts = ["-ra", "--strict-markers", "--strict-config"]
|
|
66
|
+
asyncio_mode = "auto"
|
|
67
|
+
|
|
68
|
+
[tool.coverage.run]
|
|
69
|
+
source = ["src/compile_pdf_rewrite"]
|
|
70
|
+
branch = true
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Rewrite producer — single-PDF-in/out object-tree mutations.
|
|
2
|
+
|
|
3
|
+
Per spec §2.1 — 15 in-scope mutations across structural / hygiene /
|
|
4
|
+
lifecycle / page-level categories. **No content-stream surgery** (font
|
|
5
|
+
subsetting, image recompression, color reflow are out of scope and
|
|
6
|
+
gated by a STOP-gate).
|
|
7
|
+
|
|
8
|
+
Module structure (lands in Phase 1.x):
|
|
9
|
+
|
|
10
|
+
- ``compile_pdf.rewrite.engine`` — pikepdf-driven mutator
|
|
11
|
+
- ``compile_pdf.rewrite.plan_schema`` — JSON Schema validator
|
|
12
|
+
- ``compile_pdf.rewrite.verify`` — three-layer post-condition checks (§2.3)
|
|
13
|
+
- ``compile_pdf.rewrite.api`` — ``router`` exposing /v1/rewrite/apply
|
|
14
|
+
- ``compile_pdf.rewrite.cli`` — ``register(group)`` for the top-level CLI
|
|
15
|
+
|
|
16
|
+
Codex surface consumed (read-only context for plan validation):
|
|
17
|
+
|
|
18
|
+
- :class:`codex_pdf.CodexDocument` — the document model rewrite plans
|
|
19
|
+
reference for page-index / OCG / metadata addressing.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from codex_pdf import CodexDocument
|
|
25
|
+
|
|
26
|
+
from compile_pdf_core.version import REWRITE_SCHEMA_VERSION
|
|
27
|
+
|
|
28
|
+
__all__ = ["CodexDocument", "REWRITE_SCHEMA_VERSION"]
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""FastAPI router for the rewrite producer.
|
|
2
|
+
|
|
3
|
+
Mounts under ``/v1/rewrite`` from :mod:`compile_pdf.api.main`. Single
|
|
4
|
+
endpoint today: ``POST /v1/rewrite/apply``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import hashlib
|
|
11
|
+
|
|
12
|
+
import structlog
|
|
13
|
+
from fastapi import APIRouter, HTTPException, Request, status
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
|
|
16
|
+
from compile_pdf_core.cache import compute_cache_key, hash_canonical_plan
|
|
17
|
+
from compile_pdf_core.retention import (
|
|
18
|
+
parse_consent,
|
|
19
|
+
persist_if_opted_in,
|
|
20
|
+
resolve_tenant,
|
|
21
|
+
)
|
|
22
|
+
from compile_pdf_rewrite.engine import RewritePlanError, apply_plan
|
|
23
|
+
from compile_pdf_rewrite.plan_schema import RewritePlan
|
|
24
|
+
from compile_pdf_rewrite.verify import verify_rewrite
|
|
25
|
+
from compile_pdf_core.version import (
|
|
26
|
+
CODEX_DOCUMENT_SCHEMA_VERSION_PIN,
|
|
27
|
+
REWRITE_SCHEMA_VERSION,
|
|
28
|
+
VERSION,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
logger = structlog.get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RewriteApplyRequest(BaseModel):
|
|
37
|
+
"""Request envelope: an inline base64-encoded PDF + a plan.
|
|
38
|
+
|
|
39
|
+
Bytes-in / bytes-out. Lineage records persist to the configured S3
|
|
40
|
+
bucket asynchronously and are addressable by the returned
|
|
41
|
+
``cache_key`` (Phase 5 lights up the actual store).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
model_config = {"extra": "forbid"}
|
|
45
|
+
|
|
46
|
+
input_pdf_b64: str = Field(min_length=1)
|
|
47
|
+
plan: RewritePlan
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class RewriteApplyResponse(BaseModel):
|
|
51
|
+
"""Response envelope. Output bytes are returned base64 so the
|
|
52
|
+
transport stays JSON; bypassed by the streaming variant in Phase 1.x."""
|
|
53
|
+
|
|
54
|
+
model_config = {"extra": "forbid"}
|
|
55
|
+
|
|
56
|
+
output_pdf_b64: str
|
|
57
|
+
pdf_sha256: str
|
|
58
|
+
input_sha256: str
|
|
59
|
+
plan_sha256: str
|
|
60
|
+
cache_key: str
|
|
61
|
+
cache_hit: bool = False
|
|
62
|
+
ops_applied: int
|
|
63
|
+
schema_version: str = REWRITE_SCHEMA_VERSION
|
|
64
|
+
compile_version: str = VERSION
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@router.post("/apply", response_model=RewriteApplyResponse, status_code=status.HTTP_200_OK)
|
|
68
|
+
async def rewrite_apply(payload: RewriteApplyRequest, request: Request) -> RewriteApplyResponse:
|
|
69
|
+
"""Apply a rewrite plan to an inline base64-encoded PDF.
|
|
70
|
+
|
|
71
|
+
Verification (spec §2.3 — three layers) runs server-side before the
|
|
72
|
+
response is returned. A failed verify is a 500 — the plan was valid
|
|
73
|
+
but the engine produced output that doesn't satisfy the post-conditions.
|
|
74
|
+
"""
|
|
75
|
+
try:
|
|
76
|
+
input_bytes = base64.b64decode(payload.input_pdf_b64, validate=True)
|
|
77
|
+
except (ValueError, TypeError) as exc:
|
|
78
|
+
raise HTTPException(
|
|
79
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
80
|
+
detail=f"input_pdf_b64 is not valid base64: {exc}",
|
|
81
|
+
) from exc
|
|
82
|
+
|
|
83
|
+
if not input_bytes:
|
|
84
|
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="input is empty")
|
|
85
|
+
|
|
86
|
+
input_sha256 = hashlib.sha256(input_bytes).hexdigest()
|
|
87
|
+
plan_sha256 = hash_canonical_plan(payload.plan.model_dump(mode="json"))
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
from codex_pdf.color import COLOR_SCHEMA_VERSION
|
|
91
|
+
from codex_pdf.geom import GEOM_SCHEMA_VERSION
|
|
92
|
+
except ImportError as exc: # pragma: no cover — codex-pdf is a hard dep
|
|
93
|
+
raise HTTPException(
|
|
94
|
+
status_code=500, detail=f"codex-pdf surface unavailable: {exc}"
|
|
95
|
+
) from exc
|
|
96
|
+
|
|
97
|
+
cache_key = compute_cache_key(
|
|
98
|
+
producer="rewrite",
|
|
99
|
+
input_sha256=input_sha256,
|
|
100
|
+
canonical_plan_sha256=plan_sha256,
|
|
101
|
+
codex_pdf_package_version=_resolve_codex_pdf_version(),
|
|
102
|
+
color_schema_version=COLOR_SCHEMA_VERSION,
|
|
103
|
+
geom_schema_version=GEOM_SCHEMA_VERSION,
|
|
104
|
+
codex_document_schema_version=CODEX_DOCUMENT_SCHEMA_VERSION_PIN,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
logger.info(
|
|
108
|
+
"rewrite.apply.start",
|
|
109
|
+
ops=len(payload.plan.ops),
|
|
110
|
+
input_sha256=input_sha256[:16],
|
|
111
|
+
plan_sha256=plan_sha256[:16],
|
|
112
|
+
cache_key=cache_key[:16],
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
result = apply_plan(input_bytes, payload.plan)
|
|
117
|
+
except RewritePlanError as exc:
|
|
118
|
+
raise HTTPException(status_code=422, detail=f"plan rejected: {exc}") from exc
|
|
119
|
+
|
|
120
|
+
verify = verify_rewrite(
|
|
121
|
+
input_bytes=input_bytes,
|
|
122
|
+
output_bytes=result.output_bytes,
|
|
123
|
+
plan=payload.plan,
|
|
124
|
+
determinism_replay=False,
|
|
125
|
+
)
|
|
126
|
+
if not (verify.layer1_schema and verify.layer3_unchanged):
|
|
127
|
+
logger.error("rewrite.apply.verify_failed", failures=verify.failures)
|
|
128
|
+
raise HTTPException(
|
|
129
|
+
status_code=500,
|
|
130
|
+
detail={"error": "verify failed", "failures": verify.failures},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
consent = parse_consent(request)
|
|
134
|
+
response = RewriteApplyResponse(
|
|
135
|
+
output_pdf_b64=base64.b64encode(result.output_bytes).decode("ascii"),
|
|
136
|
+
pdf_sha256=result.pdf_sha256,
|
|
137
|
+
input_sha256=input_sha256,
|
|
138
|
+
plan_sha256=plan_sha256,
|
|
139
|
+
cache_key=cache_key,
|
|
140
|
+
cache_hit=False,
|
|
141
|
+
ops_applied=result.ops_applied,
|
|
142
|
+
)
|
|
143
|
+
retained = persist_if_opted_in(
|
|
144
|
+
consent=consent,
|
|
145
|
+
producer="rewrite",
|
|
146
|
+
tenant=resolve_tenant(request),
|
|
147
|
+
input_bytes=input_bytes,
|
|
148
|
+
output_bytes=result.output_bytes,
|
|
149
|
+
result=response.model_dump(mode="json"),
|
|
150
|
+
input_sha256=input_sha256,
|
|
151
|
+
)
|
|
152
|
+
logger.info(
|
|
153
|
+
"rewrite.apply.ok",
|
|
154
|
+
output_sha256=result.pdf_sha256[:16],
|
|
155
|
+
ops_applied=result.ops_applied,
|
|
156
|
+
consent=consent,
|
|
157
|
+
retained=retained,
|
|
158
|
+
)
|
|
159
|
+
return response
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _resolve_codex_pdf_version() -> str:
|
|
163
|
+
"""Read codex_pdf wheel version Compile was deployed against."""
|
|
164
|
+
try:
|
|
165
|
+
from codex_pdf import __version__ as codex_version
|
|
166
|
+
except ImportError:
|
|
167
|
+
return "unknown"
|
|
168
|
+
return str(codex_version)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Click subcommand registration for ``compile-pdf rewrite``.
|
|
2
|
+
|
|
3
|
+
Local mode reads the input + plan from disk and runs the engine in-process.
|
|
4
|
+
HTTP mode (``COMPILE_API_BASE`` set) is wired in Phase 1.x once the
|
|
5
|
+
sidecar deploy lights up.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import click
|
|
15
|
+
|
|
16
|
+
from compile_pdf_rewrite.engine import RewritePlanError, apply_plan
|
|
17
|
+
from compile_pdf_rewrite.plan_schema import RewritePlan, rewrite_plan_json_schema
|
|
18
|
+
from compile_pdf_rewrite.verify import verify_rewrite
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def register(group: click.Group) -> None:
|
|
22
|
+
"""Attach the ``rewrite`` subcommand to the top-level CLI group."""
|
|
23
|
+
|
|
24
|
+
@group.command("rewrite", help="Apply a rewrite plan to a PDF.")
|
|
25
|
+
@click.option(
|
|
26
|
+
"--plan",
|
|
27
|
+
"plan_path",
|
|
28
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
29
|
+
required=True,
|
|
30
|
+
help="JSON rewrite-plan document.",
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--verify/--no-verify",
|
|
34
|
+
default=True,
|
|
35
|
+
help="Run three-layer post-condition checks (spec §2.3) before writing output.",
|
|
36
|
+
)
|
|
37
|
+
@click.argument(
|
|
38
|
+
"input_path",
|
|
39
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
40
|
+
)
|
|
41
|
+
@click.argument(
|
|
42
|
+
"output_path",
|
|
43
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
44
|
+
)
|
|
45
|
+
def rewrite_cmd(
|
|
46
|
+
plan_path: Path,
|
|
47
|
+
input_path: Path,
|
|
48
|
+
output_path: Path,
|
|
49
|
+
verify: bool,
|
|
50
|
+
) -> None:
|
|
51
|
+
plan_dict = json.loads(plan_path.read_text(encoding="utf-8"))
|
|
52
|
+
try:
|
|
53
|
+
plan = RewritePlan.model_validate(plan_dict)
|
|
54
|
+
except Exception as exc:
|
|
55
|
+
click.echo(f"plan validation failed: {exc}", err=True)
|
|
56
|
+
sys.exit(3)
|
|
57
|
+
|
|
58
|
+
input_bytes = input_path.read_bytes()
|
|
59
|
+
try:
|
|
60
|
+
result = apply_plan(input_bytes, plan)
|
|
61
|
+
except RewritePlanError as exc:
|
|
62
|
+
click.echo(f"plan rejected: {exc}", err=True)
|
|
63
|
+
sys.exit(4)
|
|
64
|
+
|
|
65
|
+
if verify:
|
|
66
|
+
check = verify_rewrite(
|
|
67
|
+
input_bytes=input_bytes,
|
|
68
|
+
output_bytes=result.output_bytes,
|
|
69
|
+
plan=plan,
|
|
70
|
+
)
|
|
71
|
+
if not check.passed:
|
|
72
|
+
click.echo("verify failed:", err=True)
|
|
73
|
+
for failure in check.failures:
|
|
74
|
+
click.echo(f" - {failure}", err=True)
|
|
75
|
+
sys.exit(4)
|
|
76
|
+
|
|
77
|
+
output_path.write_bytes(result.output_bytes)
|
|
78
|
+
click.echo(
|
|
79
|
+
json.dumps(
|
|
80
|
+
{
|
|
81
|
+
"ops_applied": result.ops_applied,
|
|
82
|
+
"pdf_sha256": result.pdf_sha256,
|
|
83
|
+
"output": str(output_path),
|
|
84
|
+
},
|
|
85
|
+
indent=2,
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@group.command("rewrite-schema", hidden=True, help="Dump the rewrite-plan JSON Schema.")
|
|
90
|
+
def rewrite_schema_cmd() -> None:
|
|
91
|
+
click.echo(json.dumps(rewrite_plan_json_schema(), indent=2))
|