compile-pdf-rewrite 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ language: en-US
2
+ reviews:
3
+ profile: chill
4
+ request_changes_workflow: false
5
+ high_level_summary: true
6
+ poem: false
7
+ review_status: true
8
+ collapse_walkthrough: true
9
+ auto_review:
10
+ enabled: true
11
+ drafts: false
12
+ path_filters:
13
+ - "!dist/**"
14
+ - "!build/**"
15
+ - "!.next/**"
16
+ - "!node_modules/**"
17
+ - "!**/__pycache__/**"
18
+ - "!**/*.min.js"
19
+ - "!**/*.lock"
20
+ - "!**/package-lock.json"
21
+ - "!**/uv.lock"
22
+ - "!**/poetry.lock"
23
+ path_instructions:
24
+ - path: "**/*.{ts,tsx,js,jsx}"
25
+ instructions: "Flag unhandled promises, missing null checks, hardcoded secrets/env vars, and unsafe `any` usage. Skip stylistic preferences — Prettier handles those."
26
+ - path: "**/*.py"
27
+ instructions: "Flag unhandled exceptions, missing type hints on public functions, hardcoded secrets, and SQL injection risks. Skip style — Ruff handles those."
28
+ chat:
29
+ auto_reply: true
@@ -0,0 +1,21 @@
1
+ ---
2
+ description: PrintWithSynergy baseline rules
3
+ alwaysApply: true
4
+ ---
5
+
6
+ You are working in this repository, part of the printwithsynergy organization.
7
+
8
+ # Conventions
9
+ - Conventional commits (feat:, fix:, chore:, refactor:, docs:, test:)
10
+ - Never commit secrets. Use .env (gitignored) for local config.
11
+ - Prefer pure functions; isolate side effects.
12
+ - Write tests alongside code changes when behavior changes.
13
+
14
+ # Before modifying code
15
+ - Use the code-review-graph MCP tools (get_impact_radius_tool, get_blast_radius) to check downstream impact.
16
+ - For unfamiliar symbols, use sverklo_lookup or sverklo_refs first.
17
+ - Never assume context. If the codebase has CLAUDE.md or AGENTS.md, read it first.
18
+
19
+ # Style
20
+ - Match existing code style. Do not reformat unrelated lines.
21
+ - No "helper" refactors mixed with feature changes.
@@ -0,0 +1,16 @@
1
+ You are working in the `compile-pdf-rewrite` repository, part of the printwithsynergy organization.
2
+
3
+ # Conventions
4
+ - Conventional commits (feat:, fix:, chore:, refactor:, docs:, test:)
5
+ - Never commit secrets. Use .env (gitignored) for local config.
6
+ - Prefer pure functions; isolate side effects.
7
+ - Write tests alongside code changes when behavior changes.
8
+
9
+ # Before modifying code
10
+ - Use the code-review-graph MCP tools (get_impact_radius_tool, get_blast_radius) to check downstream impact.
11
+ - For unfamiliar symbols, use sverklo_lookup or sverklo_refs first.
12
+ - Never assume context. If the codebase has CLAUDE.md or AGENTS.md, read it first.
13
+
14
+ # Style
15
+ - Match existing code style. Do not reformat unrelated lines.
16
+ - No "helper" refactors mixed with feature changes.
@@ -0,0 +1,11 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ open-pull-requests-limit: 5
8
+ - package-ecosystem: "github-actions"
9
+ directory: "/"
10
+ schedule:
11
+ interval: "monthly"
@@ -0,0 +1,39 @@
1
+ name: publish-pypi
2
+
3
+ # Publishes compile-pdf-rewrite to PyPI when a v*.*.* tag is pushed.
4
+ #
5
+ # Auth: PyPI API token stored in PYPI_TOKEN repository secret.
6
+
7
+ on:
8
+ push:
9
+ tags:
10
+ - "v*.*.*"
11
+
12
+ jobs:
13
+ build:
14
+ name: build sdist + wheel
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
18
+ - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
19
+ with:
20
+ python-version: "3.12"
21
+ - run: pip install --upgrade pip build
22
+ - run: python -m build --sdist --wheel --outdir dist/
23
+ - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
24
+ with:
25
+ name: dist
26
+ path: dist/*
27
+
28
+ publish:
29
+ name: publish to PyPI
30
+ needs: build
31
+ runs-on: ubuntu-latest
32
+ steps:
33
+ - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
34
+ with:
35
+ name: dist
36
+ path: dist/
37
+ - uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1
38
+ with:
39
+ password: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,9 @@
1
+ # compile-pdf-rewrite
2
+
3
+ compile-pdf-rewrite
4
+
5
+ ## Code Review & Blast-Radius Protocol
6
+ - Before edits: run code-review-graph impact tools on changed symbols
7
+ - After edits: ensure tests pass before commit
8
+ - CodeRabbit reviews PRs automatically; Cursor BugBot provides second opinion
9
+ - Never disable the code-review-graph Launch Agent
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: compile-pdf-rewrite
3
+ Version: 0.1.0
4
+ Summary: CompilePDF rewrite producer.
5
+ Project-URL: Homepage, https://compilepdf.com
6
+ Project-URL: Repository, https://github.com/printwithsynergy/compile-pdf-rewrite
7
+ Project-URL: Issues, https://github.com/printwithsynergy/compile-pdf-rewrite/issues
8
+ Author-email: Print With Synergy <iam@quincy.codes>
9
+ License: AGPL-3.0-or-later
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Multimedia :: Graphics
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Requires-Python: >=3.12
18
+ Requires-Dist: click>=8.1
19
+ Requires-Dist: codex-pdf<2.0,>=1.15.0
20
+ Requires-Dist: compile-pdf-core<1.0,>=0.1.0
21
+ Requires-Dist: fastapi>=0.110
22
+ Requires-Dist: pikepdf>=8.13
23
+ Requires-Dist: pillow>=10.2
24
+ Requires-Dist: pydantic>=2.6
25
+ Requires-Dist: structlog>=24.1
26
+ Provides-Extra: dev
27
+ Requires-Dist: mypy>=1.9; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
30
+ Requires-Dist: pytest>=8.0; extra == 'dev'
31
+ Requires-Dist: ruff>=0.4; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # compile-pdf-rewrite
35
+
36
+ Object-tree mutations for CompilePDF: OCG, metadata, colour-space, hygiene, lifecycle.
37
+
38
+ Fifteen mutations across structural, hygiene, and lifecycle categories. OCG flips, page lifecycle ops, page-box patches, metadata set/strip, colour-space swap, JavaScript strip, PDF/X pin. Three-layer verifier: schema, determinism, nothing-else-touched.
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ uv pip install compile-pdf-rewrite
44
+ ```
45
+
46
+ ## Position in the stack
47
+
48
+ One of four [CompilePDF](https://compilepdf.com) producers (trap, impose, marks, rewrite). Each lives in its own repo and PyPI package so you install only what you need. Producers depend on `compile-pdf-core`, never on each other.
49
+
50
+ - Repo: https://github.com/printwithsynergy/compile-pdf-rewrite
51
+ - Deployment host: https://github.com/printwithsynergy/compile-pdf
52
+ - License: AGPL-3.0-or-later
@@ -0,0 +1,19 @@
1
+ # compile-pdf-rewrite
2
+
3
+ Object-tree mutations for CompilePDF: OCG, metadata, colour-space, hygiene, lifecycle.
4
+
5
+ Fifteen mutations across structural, hygiene, and lifecycle categories. OCG flips, page lifecycle ops, page-box patches, metadata set/strip, colour-space swap, JavaScript strip, PDF/X pin. Three-layer verifier: schema, determinism, nothing-else-touched.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ uv pip install compile-pdf-rewrite
11
+ ```
12
+
13
+ ## Position in the stack
14
+
15
+ One of four [CompilePDF](https://compilepdf.com) producers (trap, impose, marks, rewrite). Each lives in its own repo and PyPI package so you install only what you need. Producers depend on `compile-pdf-core`, never on each other.
16
+
17
+ - Repo: https://github.com/printwithsynergy/compile-pdf-rewrite
18
+ - Deployment host: https://github.com/printwithsynergy/compile-pdf
19
+ - License: AGPL-3.0-or-later
@@ -0,0 +1,70 @@
1
+ [project]
2
+ name = "compile-pdf-rewrite"
3
+ version = "0.1.0"
4
+ description = "CompilePDF rewrite producer."
5
+ readme = "README.md"
6
+ license = { text = "AGPL-3.0-or-later" }
7
+ authors = [{ name = "Print With Synergy", email = "iam@quincy.codes" }]
8
+ requires-python = ">=3.12"
9
+ classifiers = [
10
+ "Development Status :: 3 - Alpha",
11
+ "Intended Audience :: Developers",
12
+ "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Multimedia :: Graphics",
16
+ "Topic :: Software Development :: Libraries",
17
+ ]
18
+
19
+ dependencies = [
20
+ "compile-pdf-core>=0.1.0,<1.0",
21
+ "codex-pdf>=1.15.0,<2.0",
22
+ "fastapi>=0.110",
23
+ "pikepdf>=8.13",
24
+ "pydantic>=2.6",
25
+ "click>=8.1",
26
+ "structlog>=24.1",
27
+ "Pillow>=10.2",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "pytest>=8.0",
33
+ "pytest-asyncio>=0.23",
34
+ "pytest-cov>=4.1",
35
+ "ruff>=0.4",
36
+ "mypy>=1.9",
37
+ ]
38
+
39
+ [project.scripts]
40
+ compile-pdf-rewrite = "compile_pdf_rewrite.cli:main"
41
+
42
+ [project.urls]
43
+ Homepage = "https://compilepdf.com"
44
+ Repository = "https://github.com/printwithsynergy/compile-pdf-rewrite"
45
+ Issues = "https://github.com/printwithsynergy/compile-pdf-rewrite/issues"
46
+
47
+ [build-system]
48
+ requires = ["hatchling"]
49
+ build-backend = "hatchling.build"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/compile_pdf_rewrite"]
53
+
54
+ [tool.ruff]
55
+ line-length = 100
56
+ target-version = "py312"
57
+
58
+ [tool.ruff.lint]
59
+ select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "C4", "RET"]
60
+ ignore = ["E501"]
61
+
62
+ [tool.pytest.ini_options]
63
+ testpaths = ["tests"]
64
+ python_files = ["test_*.py"]
65
+ addopts = ["-ra", "--strict-markers", "--strict-config"]
66
+ asyncio_mode = "auto"
67
+
68
+ [tool.coverage.run]
69
+ source = ["src/compile_pdf_rewrite"]
70
+ branch = true
@@ -0,0 +1,28 @@
1
+ """Rewrite producer — single-PDF-in/out object-tree mutations.
2
+
3
+ Per spec §2.1 — 15 in-scope mutations across structural / hygiene /
4
+ lifecycle / page-level categories. **No content-stream surgery** (font
5
+ subsetting, image recompression, color reflow are out of scope and
6
+ gated by a STOP-gate).
7
+
8
+ Module structure (lands in Phase 1.x):
9
+
10
+ - ``compile_pdf.rewrite.engine`` — pikepdf-driven mutator
11
+ - ``compile_pdf.rewrite.plan_schema`` — JSON Schema validator
12
+ - ``compile_pdf.rewrite.verify`` — three-layer post-condition checks (§2.3)
13
+ - ``compile_pdf.rewrite.api`` — ``router`` exposing /v1/rewrite/apply
14
+ - ``compile_pdf.rewrite.cli`` — ``register(group)`` for the top-level CLI
15
+
16
+ Codex surface consumed (read-only context for plan validation):
17
+
18
+ - :class:`codex_pdf.CodexDocument` — the document model rewrite plans
19
+ reference for page-index / OCG / metadata addressing.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from codex_pdf import CodexDocument
25
+
26
+ from compile_pdf_core.version import REWRITE_SCHEMA_VERSION
27
+
28
+ __all__ = ["CodexDocument", "REWRITE_SCHEMA_VERSION"]
@@ -0,0 +1,168 @@
1
+ """FastAPI router for the rewrite producer.
2
+
3
+ Mounts under ``/v1/rewrite`` from :mod:`compile_pdf.api.main`. Single
4
+ endpoint today: ``POST /v1/rewrite/apply``.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import base64
10
+ import hashlib
11
+
12
+ import structlog
13
+ from fastapi import APIRouter, HTTPException, Request, status
14
+ from pydantic import BaseModel, Field
15
+
16
+ from compile_pdf_core.cache import compute_cache_key, hash_canonical_plan
17
+ from compile_pdf_core.retention import (
18
+ parse_consent,
19
+ persist_if_opted_in,
20
+ resolve_tenant,
21
+ )
22
+ from compile_pdf_rewrite.engine import RewritePlanError, apply_plan
23
+ from compile_pdf_rewrite.plan_schema import RewritePlan
24
+ from compile_pdf_rewrite.verify import verify_rewrite
25
+ from compile_pdf_core.version import (
26
+ CODEX_DOCUMENT_SCHEMA_VERSION_PIN,
27
+ REWRITE_SCHEMA_VERSION,
28
+ VERSION,
29
+ )
30
+
31
+ logger = structlog.get_logger(__name__)
32
+
33
+ router = APIRouter()
34
+
35
+
36
+ class RewriteApplyRequest(BaseModel):
37
+ """Request envelope: an inline base64-encoded PDF + a plan.
38
+
39
+ Bytes-in / bytes-out. Lineage records persist to the configured S3
40
+ bucket asynchronously and are addressable by the returned
41
+ ``cache_key`` (Phase 5 lights up the actual store).
42
+ """
43
+
44
+ model_config = {"extra": "forbid"}
45
+
46
+ input_pdf_b64: str = Field(min_length=1)
47
+ plan: RewritePlan
48
+
49
+
50
+ class RewriteApplyResponse(BaseModel):
51
+ """Response envelope. Output bytes are returned base64 so the
52
+ transport stays JSON; bypassed by the streaming variant in Phase 1.x."""
53
+
54
+ model_config = {"extra": "forbid"}
55
+
56
+ output_pdf_b64: str
57
+ pdf_sha256: str
58
+ input_sha256: str
59
+ plan_sha256: str
60
+ cache_key: str
61
+ cache_hit: bool = False
62
+ ops_applied: int
63
+ schema_version: str = REWRITE_SCHEMA_VERSION
64
+ compile_version: str = VERSION
65
+
66
+
67
+ @router.post("/apply", response_model=RewriteApplyResponse, status_code=status.HTTP_200_OK)
68
+ async def rewrite_apply(payload: RewriteApplyRequest, request: Request) -> RewriteApplyResponse:
69
+ """Apply a rewrite plan to an inline base64-encoded PDF.
70
+
71
+ Verification (spec §2.3 — three layers) runs server-side before the
72
+ response is returned. A failed verify is a 500 — the plan was valid
73
+ but the engine produced output that doesn't satisfy the post-conditions.
74
+ """
75
+ try:
76
+ input_bytes = base64.b64decode(payload.input_pdf_b64, validate=True)
77
+ except (ValueError, TypeError) as exc:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_400_BAD_REQUEST,
80
+ detail=f"input_pdf_b64 is not valid base64: {exc}",
81
+ ) from exc
82
+
83
+ if not input_bytes:
84
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="input is empty")
85
+
86
+ input_sha256 = hashlib.sha256(input_bytes).hexdigest()
87
+ plan_sha256 = hash_canonical_plan(payload.plan.model_dump(mode="json"))
88
+
89
+ try:
90
+ from codex_pdf.color import COLOR_SCHEMA_VERSION
91
+ from codex_pdf.geom import GEOM_SCHEMA_VERSION
92
+ except ImportError as exc: # pragma: no cover — codex-pdf is a hard dep
93
+ raise HTTPException(
94
+ status_code=500, detail=f"codex-pdf surface unavailable: {exc}"
95
+ ) from exc
96
+
97
+ cache_key = compute_cache_key(
98
+ producer="rewrite",
99
+ input_sha256=input_sha256,
100
+ canonical_plan_sha256=plan_sha256,
101
+ codex_pdf_package_version=_resolve_codex_pdf_version(),
102
+ color_schema_version=COLOR_SCHEMA_VERSION,
103
+ geom_schema_version=GEOM_SCHEMA_VERSION,
104
+ codex_document_schema_version=CODEX_DOCUMENT_SCHEMA_VERSION_PIN,
105
+ )
106
+
107
+ logger.info(
108
+ "rewrite.apply.start",
109
+ ops=len(payload.plan.ops),
110
+ input_sha256=input_sha256[:16],
111
+ plan_sha256=plan_sha256[:16],
112
+ cache_key=cache_key[:16],
113
+ )
114
+
115
+ try:
116
+ result = apply_plan(input_bytes, payload.plan)
117
+ except RewritePlanError as exc:
118
+ raise HTTPException(status_code=422, detail=f"plan rejected: {exc}") from exc
119
+
120
+ verify = verify_rewrite(
121
+ input_bytes=input_bytes,
122
+ output_bytes=result.output_bytes,
123
+ plan=payload.plan,
124
+ determinism_replay=False,
125
+ )
126
+ if not (verify.layer1_schema and verify.layer3_unchanged):
127
+ logger.error("rewrite.apply.verify_failed", failures=verify.failures)
128
+ raise HTTPException(
129
+ status_code=500,
130
+ detail={"error": "verify failed", "failures": verify.failures},
131
+ )
132
+
133
+ consent = parse_consent(request)
134
+ response = RewriteApplyResponse(
135
+ output_pdf_b64=base64.b64encode(result.output_bytes).decode("ascii"),
136
+ pdf_sha256=result.pdf_sha256,
137
+ input_sha256=input_sha256,
138
+ plan_sha256=plan_sha256,
139
+ cache_key=cache_key,
140
+ cache_hit=False,
141
+ ops_applied=result.ops_applied,
142
+ )
143
+ retained = persist_if_opted_in(
144
+ consent=consent,
145
+ producer="rewrite",
146
+ tenant=resolve_tenant(request),
147
+ input_bytes=input_bytes,
148
+ output_bytes=result.output_bytes,
149
+ result=response.model_dump(mode="json"),
150
+ input_sha256=input_sha256,
151
+ )
152
+ logger.info(
153
+ "rewrite.apply.ok",
154
+ output_sha256=result.pdf_sha256[:16],
155
+ ops_applied=result.ops_applied,
156
+ consent=consent,
157
+ retained=retained,
158
+ )
159
+ return response
160
+
161
+
162
+ def _resolve_codex_pdf_version() -> str:
163
+ """Read codex_pdf wheel version Compile was deployed against."""
164
+ try:
165
+ from codex_pdf import __version__ as codex_version
166
+ except ImportError:
167
+ return "unknown"
168
+ return str(codex_version)
@@ -0,0 +1,91 @@
1
+ """Click subcommand registration for ``compile-pdf rewrite``.
2
+
3
+ Local mode reads the input + plan from disk and runs the engine in-process.
4
+ HTTP mode (``COMPILE_API_BASE`` set) is wired in Phase 1.x once the
5
+ sidecar deploy lights up.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import click
15
+
16
+ from compile_pdf_rewrite.engine import RewritePlanError, apply_plan
17
+ from compile_pdf_rewrite.plan_schema import RewritePlan, rewrite_plan_json_schema
18
+ from compile_pdf_rewrite.verify import verify_rewrite
19
+
20
+
21
+ def register(group: click.Group) -> None:
22
+ """Attach the ``rewrite`` subcommand to the top-level CLI group."""
23
+
24
+ @group.command("rewrite", help="Apply a rewrite plan to a PDF.")
25
+ @click.option(
26
+ "--plan",
27
+ "plan_path",
28
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
29
+ required=True,
30
+ help="JSON rewrite-plan document.",
31
+ )
32
+ @click.option(
33
+ "--verify/--no-verify",
34
+ default=True,
35
+ help="Run three-layer post-condition checks (spec §2.3) before writing output.",
36
+ )
37
+ @click.argument(
38
+ "input_path",
39
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
40
+ )
41
+ @click.argument(
42
+ "output_path",
43
+ type=click.Path(dir_okay=False, path_type=Path),
44
+ )
45
+ def rewrite_cmd(
46
+ plan_path: Path,
47
+ input_path: Path,
48
+ output_path: Path,
49
+ verify: bool,
50
+ ) -> None:
51
+ plan_dict = json.loads(plan_path.read_text(encoding="utf-8"))
52
+ try:
53
+ plan = RewritePlan.model_validate(plan_dict)
54
+ except Exception as exc:
55
+ click.echo(f"plan validation failed: {exc}", err=True)
56
+ sys.exit(3)
57
+
58
+ input_bytes = input_path.read_bytes()
59
+ try:
60
+ result = apply_plan(input_bytes, plan)
61
+ except RewritePlanError as exc:
62
+ click.echo(f"plan rejected: {exc}", err=True)
63
+ sys.exit(4)
64
+
65
+ if verify:
66
+ check = verify_rewrite(
67
+ input_bytes=input_bytes,
68
+ output_bytes=result.output_bytes,
69
+ plan=plan,
70
+ )
71
+ if not check.passed:
72
+ click.echo("verify failed:", err=True)
73
+ for failure in check.failures:
74
+ click.echo(f" - {failure}", err=True)
75
+ sys.exit(4)
76
+
77
+ output_path.write_bytes(result.output_bytes)
78
+ click.echo(
79
+ json.dumps(
80
+ {
81
+ "ops_applied": result.ops_applied,
82
+ "pdf_sha256": result.pdf_sha256,
83
+ "output": str(output_path),
84
+ },
85
+ indent=2,
86
+ )
87
+ )
88
+
89
+ @group.command("rewrite-schema", hidden=True, help="Dump the rewrite-plan JSON Schema.")
90
+ def rewrite_schema_cmd() -> None:
91
+ click.echo(json.dumps(rewrite_plan_json_schema(), indent=2))