codex-pdf 1.7.0__tar.gz → 1.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/CLAUDE.md +1 -1
- codex_pdf-1.7.2/CONTRIBUTING.md +84 -0
- codex_pdf-1.7.2/PKG-INFO +134 -0
- codex_pdf-1.7.2/README.md +111 -0
- codex_pdf-1.7.2/SECURITY.md +72 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/package.json +1 -1
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/README.md +1 -1
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/wrangler.toml +1 -1
- codex_pdf-1.7.2/docs/architecture.md +79 -0
- codex_pdf-1.7.2/docs/cli.md +91 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/docs/contract.md +7 -7
- codex_pdf-1.7.2/docs/deploy.md +164 -0
- codex_pdf-1.7.2/docs/operations/codex-change-ripple.md +84 -0
- codex_pdf-1.7.2/docs/operations/marketing-deploy-template.md +74 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/pyproject.toml +1 -1
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/path.py +10 -5
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/version.py +1 -1
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_geom.py +35 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/uv.lock +1 -1
- codex_pdf-1.7.0/APPROVALS.md +0 -36
- codex_pdf-1.7.0/PKG-INFO +0 -93
- codex_pdf-1.7.0/README.md +0 -70
- codex_pdf-1.7.0/docs/architecture.md +0 -35
- codex_pdf-1.7.0/docs/backward-compatibility.md +0 -24
- codex_pdf-1.7.0/docs/cleanup-stop-gates.md +0 -25
- codex_pdf-1.7.0/docs/cli.md +0 -44
- codex_pdf-1.7.0/docs/deploy.md +0 -131
- codex_pdf-1.7.0/docs/discovery-audit.md +0 -61
- codex_pdf-1.7.0/docs/migration-plan.md +0 -32
- codex_pdf-1.7.0/docs/operations/codex-change-ripple.md +0 -110
- codex_pdf-1.7.0/docs/operations/marketing-deploy-template.md +0 -78
- codex_pdf-1.7.0/docs/release-1.0.0.md +0 -21
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/.cursor/rules/service-ownership.mdc +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/.github/workflows/ci.yml +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/.gitignore +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/.windsurf/rules/service-ownership.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/Dockerfile +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/LICENSE +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/Procfile +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/README.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/package-lock.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/src/color.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/src/index.test.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/src/index.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/clients/ts/tsconfig.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/.github/workflows/deploy.yml +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/package-lock.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/package.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/cache_key.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/env.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/handlers/extract.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/handlers/probe.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/index.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/src/sse_tee.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/test/cache_key.test.ts +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/codex-edge/tsconfig.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/docs/parity.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/docs/preflight-ingest.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/docs/service-ownership-contract.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/railway.speculator.toml +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/railway.toml +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/audit/mislocated-closure.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/audit/produce_surface.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/dieline_calibration_report.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/codex_deep.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/codex_inventory.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/codex_summary.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/criterion4_parser_surface.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/pdfx4_deep.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/pdfx4_inventory.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/pdfx4_summary.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/render_baseline.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/reports/parity/viewer_essentials.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/CHANGELOG.md +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-annotation.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-box.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-color-space.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-document.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-font.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-form-xobject.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-image.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-issue.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-ocg.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-output-intent.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-page-object.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-page.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-preflight-report.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-source.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-spot-colorant.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-transparency-tree.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-trap-evidence.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/codex-warning.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/schemas/v1/probe.schema.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/scripts/calibrate_dieline_heuristics.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/scripts/parity_viewer_essentials.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/scripts/produce_surface_audit.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/auth.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/blob_store.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/cache.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/main.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/url_ingest.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/warmup.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/api/warmup.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/cli.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/client/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/client/http_client.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/alt_space.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/color_math.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/curated.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/data/pantone_reference.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/normalize.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/pantone.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/color/resolver.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/eval/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/eval/ps_type4.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/annotations.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/color.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/common.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/content_inventory.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/dieline_detector.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/document.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/fonts.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/forms.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/images.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/ocg.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/probe.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/signals.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/structure.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/summary.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/transparency.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/extract/trapping.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/box.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/matrix.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/tile.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/geom/units.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/models/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/models/v1.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/parity.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/preflight_ingest/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/preflight_ingest/adapters.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/_common.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/content_stream.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/layer.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/page.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/render/separations.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/schema.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/speculator/__init__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/speculator/__main__.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/src/codex_pdf/speculator/consumer.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/conftest.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/conforming/minimal.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/generate_fixtures.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/violating/no_output_intent.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/violating/no_trim_box.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/violating/no_xmp.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/fixtures/violating/pdf_1_4.pdf +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/golden/1.0.0/reference.json +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_alt_space.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_api.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_cache.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_cli_contract.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_client_routing.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_color.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_extract_analysis_signals.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_extract_structural.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_golden.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_golden_corpus.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_models.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_parity.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_pdf_sha256.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_preflight_ingest.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_produce_surface_audit.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_schema.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_schemas_all.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_speculator.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_summary_dieline.py +0 -0
- {codex_pdf-1.7.0 → codex_pdf-1.7.2}/tests/test_summary_spot_colors.py +0 -0
|
@@ -25,7 +25,7 @@ For new products (Forge, Trap, Impose, Marks, etc.), map capabilities to one own
|
|
|
25
25
|
|
|
26
26
|
When work spans layers, define a contract seam and keep logic in its owner service.
|
|
27
27
|
|
|
28
|
-
## Deployed surface (1.7.
|
|
28
|
+
## Deployed surface (1.7.2)
|
|
29
29
|
|
|
30
30
|
Codex now runs as **three services** in production. They share the
|
|
31
31
|
same content-addressed cache key format
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Contributing to codex-pdf
|
|
2
|
+
|
|
3
|
+
`codex-pdf` is the read-only PDF facts + render service for the
|
|
4
|
+
Print with Synergy / Think Neverland tool family. It owns
|
|
5
|
+
extraction, normalization, and reusable summary payloads — keeping
|
|
6
|
+
outputs deterministic, versioned, and backward-compatible.
|
|
7
|
+
|
|
8
|
+
Before opening a PR, please confirm your change fits codex's lane
|
|
9
|
+
(extraction / normalized facts / detection signals) rather than a
|
|
10
|
+
display or rules concern. UI presentation belongs to **Loupe** and
|
|
11
|
+
rule pass/fail logic belongs to **Lint** — see
|
|
12
|
+
[`docs/service-ownership-contract.md`](./docs/service-ownership-contract.md).
|
|
13
|
+
|
|
14
|
+
## Local setup
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
git clone https://github.com/printwithsynergy/codex-pdf
|
|
18
|
+
cd codex-pdf
|
|
19
|
+
uv sync
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Running the test suite
|
|
23
|
+
|
|
24
|
+
The full sweep, including the producer-surface audit that gates the
|
|
25
|
+
read-only invariant:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv run pytest -q
|
|
29
|
+
uv run python scripts/produce_surface_audit.py
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
TypeScript client + Cloudflare Worker:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
cd clients/ts && npm test
|
|
36
|
+
cd ../../codex-edge && npx tsc --noEmit && npx vitest run
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Conventions
|
|
40
|
+
|
|
41
|
+
- **Schema additions are additive.** Adding a field is a minor bump
|
|
42
|
+
(`MINOR` of `codex_pdf.version.VERSION` and the relevant
|
|
43
|
+
`*_SCHEMA_VERSION`). Changing a field's shape or removing a field
|
|
44
|
+
is a major bump and must be paired with consumer migrations in
|
|
45
|
+
`lint-pdf` / `loupe-pdf` before the codex release ships.
|
|
46
|
+
- **Read-only.** Codex never produces new PDF bytes. The audit at
|
|
47
|
+
`scripts/produce_surface_audit.py` enforces this; it must stay
|
|
48
|
+
green on every commit.
|
|
49
|
+
- **No customer policy.** Detection signals belong here; pass/fail
|
|
50
|
+
verdicts belong in Lint.
|
|
51
|
+
- **Branches** are named `claude/<short-topic>-<version>` for agent
|
|
52
|
+
work; humans use whatever they like.
|
|
53
|
+
- **Commits** follow Conventional Commits (`feat:`, `fix:`, `chore:`,
|
|
54
|
+
`perf:`, `docs:`).
|
|
55
|
+
|
|
56
|
+
## Releasing a new VERSION
|
|
57
|
+
|
|
58
|
+
When bumping `codex_pdf.version.VERSION`:
|
|
59
|
+
|
|
60
|
+
1. Update `pyproject.toml`, `src/codex_pdf/version.py`,
|
|
61
|
+
`clients/ts/package.json`, `codex-edge/wrangler.toml`
|
|
62
|
+
(`CODEX_VERSION`), and the deployed-surface heading in
|
|
63
|
+
[`CLAUDE.md`](./CLAUDE.md).
|
|
64
|
+
2. `uv build` → publish wheel + sdist to PyPI.
|
|
65
|
+
3. `npm publish --access public` from `clients/ts`.
|
|
66
|
+
4. `wrangler deploy` from `codex-edge` so KV cache keys rotate.
|
|
67
|
+
5. Railway autodeploys the API + speculator from `main`.
|
|
68
|
+
|
|
69
|
+
Cache keys (`codex:{VERSION}:{kind}:{pdf_sha}:{args_sha}`) rotate
|
|
70
|
+
atomically across all three tiers when `VERSION` changes — no KV
|
|
71
|
+
purge needed.
|
|
72
|
+
|
|
73
|
+
## Reporting bugs
|
|
74
|
+
|
|
75
|
+
Please open a GitHub issue with:
|
|
76
|
+
|
|
77
|
+
- A minimal reproducing PDF (or its `pdf_sha256` and how it was
|
|
78
|
+
generated) when the bug is content-dependent.
|
|
79
|
+
- The `codex-pdf --version` (or the `version` field returned by
|
|
80
|
+
`GET /v1/version`).
|
|
81
|
+
- The exact request / CLI invocation.
|
|
82
|
+
|
|
83
|
+
For security issues, see [`SECURITY.md`](./SECURITY.md) — please do
|
|
84
|
+
**not** open a public issue.
|
codex_pdf-1.7.2/PKG-INFO
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codex-pdf
|
|
3
|
+
Version: 1.7.2
|
|
4
|
+
Summary: Authoritative, versioned PDF facts contract for Think Neverland tools.
|
|
5
|
+
Author-email: Think Neverland <dev@thinkneverland.com>
|
|
6
|
+
License-Expression: AGPL-3.0-or-later
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Requires-Dist: fastapi>=0.115
|
|
10
|
+
Requires-Dist: gunicorn>=23.0
|
|
11
|
+
Requires-Dist: jsonschema>=4.23
|
|
12
|
+
Requires-Dist: pikepdf>=9.0
|
|
13
|
+
Requires-Dist: pydantic>=2.8
|
|
14
|
+
Requires-Dist: pymupdf>=1.24
|
|
15
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
16
|
+
Requires-Dist: structlog>=24.1
|
|
17
|
+
Requires-Dist: uvicorn>=0.30
|
|
18
|
+
Provides-Extra: geom
|
|
19
|
+
Requires-Dist: pyclipr>=0.1.8; extra == 'geom'
|
|
20
|
+
Provides-Extra: redis
|
|
21
|
+
Requires-Dist: redis>=5.0; extra == 'redis'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
title: "Overview"
|
|
26
|
+
description: "Authoritative read-only PDF facts + render engine for Print with Synergy / Think Neverland tools. Versioned contract, schema-validated output, deployed as three services."
|
|
27
|
+
group: "Getting started"
|
|
28
|
+
order: 1
|
|
29
|
+
slug: "overview"
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
# codexPDF
|
|
33
|
+
|
|
34
|
+
`codexPDF` is the authoritative, read-only PDF facts + render reference
|
|
35
|
+
for the Print with Synergy / Think Neverland tool family.
|
|
36
|
+
|
|
37
|
+
Other engines consult `codexPDF` for canonical document facts instead
|
|
38
|
+
of re-parsing PDFs independently. The contract is versioned and
|
|
39
|
+
schema-validated.
|
|
40
|
+
|
|
41
|
+
## Status
|
|
42
|
+
|
|
43
|
+
`codex-pdf 1.7.0`. Current surface includes:
|
|
44
|
+
|
|
45
|
+
- Python package (`codex_pdf`) with typed `pydantic` models.
|
|
46
|
+
- CLI (`codex-pdf extract|schema|contract|validate|probe|parity|render|serve`).
|
|
47
|
+
- HTTP API (`/v1/extract`, `/v1/probe`, `/v1/extract/stream`,
|
|
48
|
+
`/v1/render/{page,separations,heatmap,layer}`,
|
|
49
|
+
`/v1/sample/{color,density}`, `/v1/walk/{type4,content-stream}`,
|
|
50
|
+
`/v1/color/{resolve,match-pantone,inkbook}`,
|
|
51
|
+
`/v1/geom/{tile,intersect,union,difference}`).
|
|
52
|
+
- TypeScript client (`@printwithsynergy/codex-client`) mirroring the
|
|
53
|
+
Python `codex_pdf.client` surface, with SSE streaming for probe
|
|
54
|
+
and extract.
|
|
55
|
+
- Versioned schemas in `schemas/v1/` (document, color, geom).
|
|
56
|
+
- Cloudflare Worker (`codex-edge`) providing a KV-backed
|
|
57
|
+
write-through cache layer in front of the API.
|
|
58
|
+
- Redis-Streams speculator (`codex-speculator`) that pre-warms
|
|
59
|
+
Phase 1 + Phase 2 caches.
|
|
60
|
+
|
|
61
|
+
See [`CLAUDE.md`](./CLAUDE.md) for the full deployed-service map
|
|
62
|
+
(URLs, account IDs, version-bump checklist).
|
|
63
|
+
|
|
64
|
+
## Quickstart
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
uv sync
|
|
68
|
+
uv run codex-pdf probe input.pdf --json
|
|
69
|
+
uv run codex-pdf extract input.pdf --pretty > out.json
|
|
70
|
+
uv run codex-pdf validate out.json
|
|
71
|
+
uv run codex-pdf parity --fixtures-root tests/fixtures --profile summary --max-files 5
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Run the HTTP API locally:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv run codex-pdf serve --host 0.0.0.0 --port 8080
|
|
78
|
+
curl localhost:8080/v1/version
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Contract
|
|
82
|
+
|
|
83
|
+
The public surface is the JSON contract rooted at `CodexDocument`,
|
|
84
|
+
plus the per-section contracts under color and geom.
|
|
85
|
+
|
|
86
|
+
- Document schema: `schemas/v1/codex-document.schema.json`
|
|
87
|
+
- Runtime model: `codex_pdf.models.v1.CodexDocument`
|
|
88
|
+
- Stability policy: SemVer (`major` for breaking contract changes;
|
|
89
|
+
field additions are minor bumps).
|
|
90
|
+
- Live contract endpoint: `GET /v1/contract` returns the endpoint
|
|
91
|
+
inventory plus `section_schema_versions`.
|
|
92
|
+
|
|
93
|
+
## Documentation
|
|
94
|
+
|
|
95
|
+
| Topic | Doc |
|
|
96
|
+
| --- | --- |
|
|
97
|
+
| Architecture and boundaries | [docs/architecture.md](./docs/architecture.md) |
|
|
98
|
+
| CLI commands and usage | [docs/cli.md](./docs/cli.md) |
|
|
99
|
+
| Contract and schema versioning | [docs/contract.md](./docs/contract.md) |
|
|
100
|
+
| Deploying the API + speculator + edge | [docs/deploy.md](./docs/deploy.md) |
|
|
101
|
+
| Parity profiles and baselines | [docs/parity.md](./docs/parity.md) |
|
|
102
|
+
| Preflight ingest adapters | [docs/preflight-ingest.md](./docs/preflight-ingest.md) |
|
|
103
|
+
| Service-ownership contract | [docs/service-ownership-contract.md](./docs/service-ownership-contract.md) |
|
|
104
|
+
| Codex change ripple rule | [docs/operations/codex-change-ripple.md](./docs/operations/codex-change-ripple.md) |
|
|
105
|
+
| Marketing deploy template | [docs/operations/marketing-deploy-template.md](./docs/operations/marketing-deploy-template.md) |
|
|
106
|
+
|
|
107
|
+
## Contributing
|
|
108
|
+
|
|
109
|
+
We welcome PRs that fit codex's lane (extraction, normalization,
|
|
110
|
+
detection signals). Display concerns belong in **Loupe**; rule
|
|
111
|
+
pass/fail logic belongs in **Lint**.
|
|
112
|
+
|
|
113
|
+
Read [`CONTRIBUTING.md`](./CONTRIBUTING.md) for the dev setup, test
|
|
114
|
+
commands, schema-bump rules, and release checklist.
|
|
115
|
+
|
|
116
|
+
## Security
|
|
117
|
+
|
|
118
|
+
Please report vulnerabilities privately to
|
|
119
|
+
**`security@thinkneverland.com`** — do not open a public issue.
|
|
120
|
+
|
|
121
|
+
The full disclosure policy, supported-version matrix, and scope
|
|
122
|
+
(including the read-only PDF invariant) live in
|
|
123
|
+
[`SECURITY.md`](./SECURITY.md).
|
|
124
|
+
|
|
125
|
+
## License
|
|
126
|
+
|
|
127
|
+
`codexPDF` is distributed under the **GNU Affero General Public
|
|
128
|
+
License v3.0 or later** (`SPDX-License-Identifier:
|
|
129
|
+
AGPL-3.0-or-later`). The full license text is in
|
|
130
|
+
[`LICENSE`](./LICENSE).
|
|
131
|
+
|
|
132
|
+
AGPL applies in particular when codex is reachable over a network —
|
|
133
|
+
modifications served to remote users must be made available to
|
|
134
|
+
those users under the same terms.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Overview"
|
|
3
|
+
description: "Authoritative read-only PDF facts + render engine for Print with Synergy / Think Neverland tools. Versioned contract, schema-validated output, deployed as three services."
|
|
4
|
+
group: "Getting started"
|
|
5
|
+
order: 1
|
|
6
|
+
slug: "overview"
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# codexPDF
|
|
10
|
+
|
|
11
|
+
`codexPDF` is the authoritative, read-only PDF facts + render reference
|
|
12
|
+
for the Print with Synergy / Think Neverland tool family.
|
|
13
|
+
|
|
14
|
+
Other engines consult `codexPDF` for canonical document facts instead
|
|
15
|
+
of re-parsing PDFs independently. The contract is versioned and
|
|
16
|
+
schema-validated.
|
|
17
|
+
|
|
18
|
+
## Status
|
|
19
|
+
|
|
20
|
+
`codex-pdf 1.7.0`. Current surface includes:
|
|
21
|
+
|
|
22
|
+
- Python package (`codex_pdf`) with typed `pydantic` models.
|
|
23
|
+
- CLI (`codex-pdf extract|schema|contract|validate|probe|parity|render|serve`).
|
|
24
|
+
- HTTP API (`/v1/extract`, `/v1/probe`, `/v1/extract/stream`,
|
|
25
|
+
`/v1/render/{page,separations,heatmap,layer}`,
|
|
26
|
+
`/v1/sample/{color,density}`, `/v1/walk/{type4,content-stream}`,
|
|
27
|
+
`/v1/color/{resolve,match-pantone,inkbook}`,
|
|
28
|
+
`/v1/geom/{tile,intersect,union,difference}`).
|
|
29
|
+
- TypeScript client (`@printwithsynergy/codex-client`) mirroring the
|
|
30
|
+
Python `codex_pdf.client` surface, with SSE streaming for probe
|
|
31
|
+
and extract.
|
|
32
|
+
- Versioned schemas in `schemas/v1/` (document, color, geom).
|
|
33
|
+
- Cloudflare Worker (`codex-edge`) providing a KV-backed
|
|
34
|
+
write-through cache layer in front of the API.
|
|
35
|
+
- Redis-Streams speculator (`codex-speculator`) that pre-warms
|
|
36
|
+
Phase 1 + Phase 2 caches.
|
|
37
|
+
|
|
38
|
+
See [`CLAUDE.md`](./CLAUDE.md) for the full deployed-service map
|
|
39
|
+
(URLs, account IDs, version-bump checklist).
|
|
40
|
+
|
|
41
|
+
## Quickstart
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
uv sync
|
|
45
|
+
uv run codex-pdf probe input.pdf --json
|
|
46
|
+
uv run codex-pdf extract input.pdf --pretty > out.json
|
|
47
|
+
uv run codex-pdf validate out.json
|
|
48
|
+
uv run codex-pdf parity --fixtures-root tests/fixtures --profile summary --max-files 5
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Run the HTTP API locally:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv run codex-pdf serve --host 0.0.0.0 --port 8080
|
|
55
|
+
curl localhost:8080/v1/version
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Contract
|
|
59
|
+
|
|
60
|
+
The public surface is the JSON contract rooted at `CodexDocument`,
|
|
61
|
+
plus the per-section contracts under color and geom.
|
|
62
|
+
|
|
63
|
+
- Document schema: `schemas/v1/codex-document.schema.json`
|
|
64
|
+
- Runtime model: `codex_pdf.models.v1.CodexDocument`
|
|
65
|
+
- Stability policy: SemVer (`major` for breaking contract changes;
|
|
66
|
+
field additions are minor bumps).
|
|
67
|
+
- Live contract endpoint: `GET /v1/contract` returns the endpoint
|
|
68
|
+
inventory plus `section_schema_versions`.
|
|
69
|
+
|
|
70
|
+
## Documentation
|
|
71
|
+
|
|
72
|
+
| Topic | Doc |
|
|
73
|
+
| --- | --- |
|
|
74
|
+
| Architecture and boundaries | [docs/architecture.md](./docs/architecture.md) |
|
|
75
|
+
| CLI commands and usage | [docs/cli.md](./docs/cli.md) |
|
|
76
|
+
| Contract and schema versioning | [docs/contract.md](./docs/contract.md) |
|
|
77
|
+
| Deploying the API + speculator + edge | [docs/deploy.md](./docs/deploy.md) |
|
|
78
|
+
| Parity profiles and baselines | [docs/parity.md](./docs/parity.md) |
|
|
79
|
+
| Preflight ingest adapters | [docs/preflight-ingest.md](./docs/preflight-ingest.md) |
|
|
80
|
+
| Service-ownership contract | [docs/service-ownership-contract.md](./docs/service-ownership-contract.md) |
|
|
81
|
+
| Codex change ripple rule | [docs/operations/codex-change-ripple.md](./docs/operations/codex-change-ripple.md) |
|
|
82
|
+
| Marketing deploy template | [docs/operations/marketing-deploy-template.md](./docs/operations/marketing-deploy-template.md) |
|
|
83
|
+
|
|
84
|
+
## Contributing
|
|
85
|
+
|
|
86
|
+
We welcome PRs that fit codex's lane (extraction, normalization,
|
|
87
|
+
detection signals). Display concerns belong in **Loupe**; rule
|
|
88
|
+
pass/fail logic belongs in **Lint**.
|
|
89
|
+
|
|
90
|
+
Read [`CONTRIBUTING.md`](./CONTRIBUTING.md) for the dev setup, test
|
|
91
|
+
commands, schema-bump rules, and release checklist.
|
|
92
|
+
|
|
93
|
+
## Security
|
|
94
|
+
|
|
95
|
+
Please report vulnerabilities privately to
|
|
96
|
+
**`security@thinkneverland.com`** — do not open a public issue.
|
|
97
|
+
|
|
98
|
+
The full disclosure policy, supported-version matrix, and scope
|
|
99
|
+
(including the read-only PDF invariant) live in
|
|
100
|
+
[`SECURITY.md`](./SECURITY.md).
|
|
101
|
+
|
|
102
|
+
## License
|
|
103
|
+
|
|
104
|
+
`codexPDF` is distributed under the **GNU Affero General Public
|
|
105
|
+
License v3.0 or later** (`SPDX-License-Identifier:
|
|
106
|
+
AGPL-3.0-or-later`). The full license text is in
|
|
107
|
+
[`LICENSE`](./LICENSE).
|
|
108
|
+
|
|
109
|
+
AGPL applies in particular when codex is reachable over a network —
|
|
110
|
+
modifications served to remote users must be made available to
|
|
111
|
+
those users under the same terms.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Security policy
|
|
2
|
+
|
|
3
|
+
`codex-pdf` parses untrusted PDF input on behalf of upstream services
|
|
4
|
+
(Loupe, Lint, marketing demos). We take vulnerability reports
|
|
5
|
+
seriously — especially around PDF parsing, SSRF, authentication, and
|
|
6
|
+
cache-poisoning vectors.
|
|
7
|
+
|
|
8
|
+
## Reporting a vulnerability
|
|
9
|
+
|
|
10
|
+
Please **do not open a public GitHub issue**.
|
|
11
|
+
|
|
12
|
+
Email security disclosures to **`security@thinkneverland.com`**.
|
|
13
|
+
If you need an encrypted channel, request our PGP key in the first
|
|
14
|
+
message and we will reply with the fingerprint and public key.
|
|
15
|
+
|
|
16
|
+
A useful report includes:
|
|
17
|
+
|
|
18
|
+
- The codex-pdf version (`GET /v1/version` or the published package
|
|
19
|
+
version on PyPI / npm).
|
|
20
|
+
- Whether the issue is reachable through the deployed Railway API,
|
|
21
|
+
the codex-edge Cloudflare Worker, the local CLI, the Python or
|
|
22
|
+
TypeScript SDK — or some combination.
|
|
23
|
+
- A minimal reproducer: a PDF / JSON request body / curl invocation
|
|
24
|
+
that triggers the issue.
|
|
25
|
+
- The behaviour observed and the behaviour you expected.
|
|
26
|
+
|
|
27
|
+
## Response timeline
|
|
28
|
+
|
|
29
|
+
- We acknowledge receipt within **two business days**.
|
|
30
|
+
- We aim to ship a fix or mitigation within **30 days** for
|
|
31
|
+
high-severity issues, **90 days** for low-severity.
|
|
32
|
+
- You will be credited in the release notes (and the security
|
|
33
|
+
advisory, when one is published) unless you ask to remain
|
|
34
|
+
anonymous.
|
|
35
|
+
|
|
36
|
+
## Supported versions
|
|
37
|
+
|
|
38
|
+
Only the latest minor release is patched for security issues. Older
|
|
39
|
+
minors get advisories but no backports.
|
|
40
|
+
|
|
41
|
+
| Version | Status |
|
|
42
|
+
|---------|--------|
|
|
43
|
+
| `1.7.x` | ✅ patched |
|
|
44
|
+
| `< 1.7` | ❌ unsupported — please upgrade |
|
|
45
|
+
|
|
46
|
+
## In scope
|
|
47
|
+
|
|
48
|
+
- The Python package (`pip install codex-pdf`) and its CLI.
|
|
49
|
+
- The TypeScript client (`@printwithsynergy/codex-client`).
|
|
50
|
+
- The deployed Railway HTTP API
|
|
51
|
+
(`codex-pdf-lint-sidecar-production.up.railway.app`).
|
|
52
|
+
- The Cloudflare Worker (`codex-edge.thinkneverland.workers.dev`).
|
|
53
|
+
- The codex-speculator Redis-Streams consumer.
|
|
54
|
+
|
|
55
|
+
## Out of scope
|
|
56
|
+
|
|
57
|
+
- Issues in upstream dependencies (PyMuPDF, pikepdf, Ghostscript)
|
|
58
|
+
that have already been disclosed upstream — please report those
|
|
59
|
+
to the upstream project.
|
|
60
|
+
- Denial-of-service from PDFs that legitimately take a long time to
|
|
61
|
+
parse but are not malformed (for example, very large multi-page
|
|
62
|
+
artwork). Codex sets a hard timeout; reports about *bypassing*
|
|
63
|
+
that timeout are in scope.
|
|
64
|
+
- Self-inflicted misconfiguration (e.g. running with no auth tokens
|
|
65
|
+
set on a public network).
|
|
66
|
+
|
|
67
|
+
## Scope of the read-only invariant
|
|
68
|
+
|
|
69
|
+
Codex never produces new PDF bytes. If you can demonstrate codex
|
|
70
|
+
emitting a PDF or a `b"%PDF-"` payload through any of the surfaces
|
|
71
|
+
above, that is a security finding even if no other compromise
|
|
72
|
+
follows — please report it.
|
|
@@ -10,7 +10,7 @@ the Railway codex-pdf service.
|
|
|
10
10
|
- **Account**: `99aa3f9229469650a746a7d39ac58448` (`Quincy@thinkneverland.com's Account`)
|
|
11
11
|
- **KV namespace `CACHE`**: `89a21ce1937046018a3d9d38f4e763ff` (preview `a4856d6f3b244087b907c189c2a2277d`)
|
|
12
12
|
- **Origin** (`CODEX_ORIGIN_URL`): `https://codex-pdf-lint-sidecar-production.up.railway.app`
|
|
13
|
-
- **Codex version pinned**: `1.7.
|
|
13
|
+
- **Codex version pinned**: `1.7.2` (`CODEX_VERSION` var — bump on origin release)
|
|
14
14
|
- **TTLs**: probe 24 h, Phase 1 24 h, Phase 2 7 d
|
|
15
15
|
|
|
16
16
|
## What it caches
|
|
@@ -23,7 +23,7 @@ CODEX_ORIGIN_URL = "https://codex-pdf-lint-sidecar-production.up.railway.app"
|
|
|
23
23
|
# Codex package VERSION at deploy time. MUST match the origin's
|
|
24
24
|
# `codex_pdf.version.VERSION` so KV keys line up. Bump on every
|
|
25
25
|
# origin release.
|
|
26
|
-
CODEX_VERSION = "1.7.
|
|
26
|
+
CODEX_VERSION = "1.7.2"
|
|
27
27
|
# TTL (seconds) for cached SSE event payloads.
|
|
28
28
|
PROBE_TTL = "86400" # 24 h — small payload, refresh daily
|
|
29
29
|
PHASE1_TTL = "86400" # 24 h — matches origin Redis TTL
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Architecture"
|
|
3
|
+
description: "codexPDF boundaries, extraction + render pipeline, and the three deployed services that share one cache key namespace."
|
|
4
|
+
group: "Getting started"
|
|
5
|
+
order: 2
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Architecture
|
|
9
|
+
|
|
10
|
+
`codexPDF` is a contract-first facts engine for PDF documents.
|
|
11
|
+
|
|
12
|
+
## Boundary
|
|
13
|
+
|
|
14
|
+
- Read-only extraction + render. Codex never produces new PDF bytes
|
|
15
|
+
— `scripts/produce_surface_audit.py` enforces this on every CI
|
|
16
|
+
run.
|
|
17
|
+
- No customer policy / rule adjudication. Codex emits detection
|
|
18
|
+
signals; pass/fail belongs to **Lint**.
|
|
19
|
+
- No display / viewer presentation. PNG renders are byte-accurate
|
|
20
|
+
source-of-truth for **Loupe** to display, not a viewer in
|
|
21
|
+
themselves.
|
|
22
|
+
- Consumer-agnostic output: same JSON contract regardless of
|
|
23
|
+
caller.
|
|
24
|
+
|
|
25
|
+
## Pipeline
|
|
26
|
+
|
|
27
|
+
1. Input PDF bytes are loaded by the extractor layer (PyMuPDF
|
|
28
|
+
for the fast path, pikepdf for slower per-object inspection).
|
|
29
|
+
2. Domain extractors populate `CodexDocument` fields: pages,
|
|
30
|
+
boxes, fonts, images, color spaces (with Separation tint
|
|
31
|
+
transforms evaluated at `t=1.0` so spot inks land on the right
|
|
32
|
+
swatch), OCG / layers, annotations, transparency, trapping,
|
|
33
|
+
form XObjects.
|
|
34
|
+
3. Output is serialized as JSON against the published schemas in
|
|
35
|
+
`schemas/v1/`. Each section (document, color, geom) versions
|
|
36
|
+
independently and reports its `schema_version` inline.
|
|
37
|
+
4. Render endpoints rasterize pages, separations, TAC heatmaps,
|
|
38
|
+
and OCG-isolated layers via Ghostscript + PyMuPDF.
|
|
39
|
+
|
|
40
|
+
## Primary contract
|
|
41
|
+
|
|
42
|
+
- Runtime model: `codex_pdf.models.v1.CodexDocument`
|
|
43
|
+
- Document schema: `schemas/v1/codex-document.schema.json`
|
|
44
|
+
- Section versions: `codex_pdf.color.COLOR_SCHEMA_VERSION`,
|
|
45
|
+
`codex_pdf.geom.GEOM_SCHEMA_VERSION`
|
|
46
|
+
- Live manifest: `GET /v1/contract`
|
|
47
|
+
|
|
48
|
+
## Deployed surface
|
|
49
|
+
|
|
50
|
+
In production, codex runs as **three services** sharing one
|
|
51
|
+
content-addressed cache namespace
|
|
52
|
+
(`codex:{VERSION}:{kind}:{pdf_sha}:{args_sha}`), so a `VERSION`
|
|
53
|
+
bump invalidates every tier atomically. The full deployed map —
|
|
54
|
+
URLs, account / service IDs, and the version-bump checklist —
|
|
55
|
+
lives in [`CLAUDE.md`](../CLAUDE.md).
|
|
56
|
+
|
|
57
|
+
1. **codex-pdf API** (Railway) — FastAPI under gunicorn + uvicorn
|
|
58
|
+
workers. Bearer + internal token auth. Backed by Redis for
|
|
59
|
+
cache and blob storage.
|
|
60
|
+
2. **codex-speculator** (Railway sidecar) — a Redis-Streams
|
|
61
|
+
consumer. `POST /v1/probe` and the blob-store put both XADD a
|
|
62
|
+
sha onto the `codex:speculate` stream; the speculator runs
|
|
63
|
+
Phase 1 + Phase 2 ahead of the next request so `/v1/extract`
|
|
64
|
+
lands warm. Idempotent — cache-hit short-circuit collapses
|
|
65
|
+
replays to a single Redis GET.
|
|
66
|
+
3. **codex-edge** (Cloudflare Worker + KV) — drop-in DNS-level
|
|
67
|
+
replacement that captures every probe / extract SSE frame and
|
|
68
|
+
replays from KV on the next hash-keyed request. Multipart
|
|
69
|
+
uploads bypass to origin. `ctx.waitUntil` keeps the Worker
|
|
70
|
+
alive long enough to persist every frame before the response
|
|
71
|
+
stream closes.
|
|
72
|
+
|
|
73
|
+
## Consumer relationship
|
|
74
|
+
|
|
75
|
+
Downstream engines (`lint-pdf`, `loupe-pdf`, marketing demos)
|
|
76
|
+
treat codex output as the source of truth for document facts and
|
|
77
|
+
keep any product-specific behaviour in adapter layers. New
|
|
78
|
+
products map to one owner per capability — see
|
|
79
|
+
[`docs/service-ownership-contract.md`](./service-ownership-contract.md).
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "CLI"
|
|
3
|
+
description: "Command reference for extract, probe, render, serve, parity, schema, contract, and validate workflows."
|
|
4
|
+
group: "Getting started"
|
|
5
|
+
order: 3
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# CLI
|
|
9
|
+
|
|
10
|
+
`codex-pdf` exposes a contract-oriented CLI built with `argparse`.
|
|
11
|
+
The same code path that the HTTP API uses runs in-process when you
|
|
12
|
+
invoke the CLI, so output is byte-for-byte identical to the
|
|
13
|
+
deployed surface.
|
|
14
|
+
|
|
15
|
+
## Commands
|
|
16
|
+
|
|
17
|
+
| Command | Purpose |
|
|
18
|
+
|---|---|
|
|
19
|
+
| `extract <pdf>` | Emit the full `CodexDocument` JSON. |
|
|
20
|
+
| `probe <pdf>` | Two-event metadata probe (page count, dimensions, info dict, `pdf_sha256`). |
|
|
21
|
+
| `schema [name]` | Print a published JSON schema (default: `codex-document`). |
|
|
22
|
+
| `contract` | Print the machine-readable contract manifest (endpoint inventory + section schema versions). |
|
|
23
|
+
| `validate <codex_json>` | Validate a codex JSON payload against the published schema. |
|
|
24
|
+
| `parity` | Compare codex projections against a baseline command. |
|
|
25
|
+
| `render page` | Render one page to PNG. |
|
|
26
|
+
| `render separations` | Render every separation channel for one page. |
|
|
27
|
+
| `render heatmap` | Render a TAC heatmap PNG plus a per-run header. |
|
|
28
|
+
| `render layer` | Render one OCG-isolated layer to RGBA PNG. |
|
|
29
|
+
| `serve` | Start the codex HTTP API (uvicorn, in-process). |
|
|
30
|
+
|
|
31
|
+
## Common usage
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv run codex-pdf extract input.pdf --pretty > out.json
|
|
35
|
+
uv run codex-pdf validate out.json
|
|
36
|
+
uv run codex-pdf probe input.pdf --json
|
|
37
|
+
uv run codex-pdf contract --pretty
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Streaming probe / extract (HTTP only)
|
|
41
|
+
|
|
42
|
+
The CLI's `probe` and `extract` are synchronous. The deployed HTTP
|
|
43
|
+
API also exposes streaming variants that emit Phase 1 results as
|
|
44
|
+
soon as PyMuPDF is finished and Phase 2 once pikepdf adds the
|
|
45
|
+
slower fields:
|
|
46
|
+
|
|
47
|
+
- `POST /v1/probe` — server-sent events with two frames (`probe-min`
|
|
48
|
+
immediately, `probe-std` after the secondary parse).
|
|
49
|
+
- `POST /v1/extract/stream` — same shape for full extraction; pass
|
|
50
|
+
`?granular=1` to get per-section progress events.
|
|
51
|
+
|
|
52
|
+
The TypeScript client's `probeStream()` and `extractStream()` wrap
|
|
53
|
+
this directly; the Python `codex_pdf.client.HttpClient` also has
|
|
54
|
+
streaming helpers when used against a remote API.
|
|
55
|
+
|
|
56
|
+
## Render usage
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
uv run codex-pdf render page input.pdf --page 0 --dpi 144 -o page.png
|
|
60
|
+
uv run codex-pdf render separations input.pdf --page 0 -o seps/
|
|
61
|
+
uv run codex-pdf render heatmap input.pdf --page 0 -o tac.png
|
|
62
|
+
uv run codex-pdf render layer input.pdf --page 0 --ocg "Dieline" -o dieline.png
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Parity usage
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv run codex-pdf parity \
|
|
69
|
+
--fixtures-root tests/fixtures \
|
|
70
|
+
--profile deep \
|
|
71
|
+
--max-files 10
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Baseline command mode:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv run codex-pdf parity \
|
|
78
|
+
--fixtures-root /path/to/pdfs \
|
|
79
|
+
--profile summary \
|
|
80
|
+
--baseline-command "<command with {pdf} placeholder>"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Local server
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
uv run codex-pdf serve --host 0.0.0.0 --port 8080
|
|
87
|
+
curl localhost:8080/v1/version
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The same image, in production, runs under gunicorn + uvicorn workers
|
|
91
|
+
via the Dockerfile's `CMD` — see [`docs/deploy.md`](./deploy.md).
|
|
@@ -20,7 +20,9 @@ mode for examples.
|
|
|
20
20
|
| `GET /v1/version` | meta | render | bare `{version}` |
|
|
21
21
|
| `GET /v1/contract` | meta | render | endpoint inventory + `section_schema_versions` |
|
|
22
22
|
| `GET /v1/schema/{name}` | document | extract | JSON schemas served from `schemas/v1/<name>.schema.json` |
|
|
23
|
-
| `POST /v1/extract`, `POST /extract` | document | extract | multipart PDF or JSON `{url}` → CodexDocument |
|
|
23
|
+
| `POST /v1/extract`, `POST /extract` | document | extract | multipart PDF or JSON `{url, pdf_sha256}` → CodexDocument |
|
|
24
|
+
| `POST /v1/probe` | document | extract | two-event SSE stream: `probe-min` (instant) + `probe-std` (after secondary parse) |
|
|
25
|
+
| `POST /v1/extract/stream` | document | extract | SSE stream of `phase-1` + `phase-2` extract events; `?granular=1` adds per-section progress |
|
|
24
26
|
| `POST /v1/render/page` | document | render | PNG raster |
|
|
25
27
|
| `POST /v1/render/separations` | document | render | tiffsep channel manifest |
|
|
26
28
|
| `POST /v1/render/heatmap` | document | render | TAC heatmap PNG + per-run header |
|
|
@@ -56,10 +58,10 @@ Sample contract response:
|
|
|
56
58
|
```json
|
|
57
59
|
{
|
|
58
60
|
"contract_name": "codex-document",
|
|
59
|
-
"schema_version": "1.
|
|
60
|
-
"package_version": "1.
|
|
61
|
+
"schema_version": "1.1.0",
|
|
62
|
+
"package_version": "1.7.0",
|
|
61
63
|
"schema_id": "https://schemas.thinkneverland.com/codex-pdf/v1/codex-document.schema.json",
|
|
62
|
-
"endpoints": ["POST /v1/extract", "..."],
|
|
64
|
+
"endpoints": ["POST /v1/extract", "POST /v1/probe", "POST /v1/extract/stream", "..."],
|
|
63
65
|
"section_schema_versions": {
|
|
64
66
|
"color": "1.0.0",
|
|
65
67
|
"geom": "1.0.0"
|
|
@@ -104,6 +106,4 @@ status changes commit-by-commit.
|
|
|
104
106
|
|
|
105
107
|
Any future need to write PDF bytes goes into a separate Forge
|
|
106
108
|
service (rewrite, marks, impose, trap), never into a consumer.
|
|
107
|
-
Codex stays read-only; consumers stay byte-level-clean.
|
|
108
|
-
`/Users/macadmin/Code/printwithsynergy/FORGE-DESIGN-PROMPT.md` for
|
|
109
|
-
the next-session Q&A that defines the Forge contract.
|
|
109
|
+
Codex stays read-only; consumers stay byte-level-clean.
|