codex-pdf 1.7.2__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/CLAUDE.md +1 -1
  2. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/PKG-INFO +3 -1
  3. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/package.json +1 -1
  4. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/README.md +1 -1
  5. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/wrangler.toml +1 -1
  6. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/pyproject.toml +4 -1
  7. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/main.py +109 -3
  8. codex_pdf-1.8.0/src/codex_pdf/api/retention.py +312 -0
  9. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/version.py +1 -1
  10. codex_pdf-1.8.0/tests/test_retention_consent.py +94 -0
  11. codex_pdf-1.8.0/tests/test_retention_integration.py +166 -0
  12. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/uv.lock +85 -2
  13. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/.cursor/rules/service-ownership.mdc +0 -0
  14. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/.github/workflows/ci.yml +0 -0
  15. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/.gitignore +0 -0
  16. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/.windsurf/rules/service-ownership.md +0 -0
  17. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/CONTRIBUTING.md +0 -0
  18. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/Dockerfile +0 -0
  19. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/LICENSE +0 -0
  20. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/Procfile +0 -0
  21. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/README.md +0 -0
  22. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/SECURITY.md +0 -0
  23. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/README.md +0 -0
  24. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/package-lock.json +0 -0
  25. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/src/color.ts +0 -0
  26. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/src/index.test.ts +0 -0
  27. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/src/index.ts +0 -0
  28. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/clients/ts/tsconfig.json +0 -0
  29. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/.github/workflows/deploy.yml +0 -0
  30. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/package-lock.json +0 -0
  31. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/package.json +0 -0
  32. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/cache_key.ts +0 -0
  33. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/env.ts +0 -0
  34. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/handlers/extract.ts +0 -0
  35. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/handlers/probe.ts +0 -0
  36. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/index.ts +0 -0
  37. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/src/sse_tee.ts +0 -0
  38. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/test/cache_key.test.ts +0 -0
  39. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/codex-edge/tsconfig.json +0 -0
  40. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/architecture.md +0 -0
  41. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/cli.md +0 -0
  42. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/contract.md +0 -0
  43. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/deploy.md +0 -0
  44. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/operations/codex-change-ripple.md +0 -0
  45. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/operations/marketing-deploy-template.md +0 -0
  46. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/parity.md +0 -0
  47. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/preflight-ingest.md +0 -0
  48. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/docs/service-ownership-contract.md +0 -0
  49. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/railway.speculator.toml +0 -0
  50. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/railway.toml +0 -0
  51. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/audit/mislocated-closure.json +0 -0
  52. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/audit/produce_surface.json +0 -0
  53. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/dieline_calibration_report.json +0 -0
  54. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/codex_deep.json +0 -0
  55. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/codex_inventory.json +0 -0
  56. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/codex_summary.json +0 -0
  57. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/criterion4_parser_surface.json +0 -0
  58. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/pdfx4_deep.json +0 -0
  59. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/pdfx4_inventory.json +0 -0
  60. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/pdfx4_summary.json +0 -0
  61. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/render_baseline.json +0 -0
  62. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/reports/parity/viewer_essentials.json +0 -0
  63. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/CHANGELOG.md +0 -0
  64. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-annotation.schema.json +0 -0
  65. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-box.schema.json +0 -0
  66. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-color-space.schema.json +0 -0
  67. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-document.schema.json +0 -0
  68. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-font.schema.json +0 -0
  69. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-form-xobject.schema.json +0 -0
  70. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-image.schema.json +0 -0
  71. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-issue.schema.json +0 -0
  72. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-ocg.schema.json +0 -0
  73. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-output-intent.schema.json +0 -0
  74. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-page-object.schema.json +0 -0
  75. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-page.schema.json +0 -0
  76. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-preflight-report.schema.json +0 -0
  77. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-source.schema.json +0 -0
  78. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-spot-colorant.schema.json +0 -0
  79. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-transparency-tree.schema.json +0 -0
  80. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-trap-evidence.schema.json +0 -0
  81. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/codex-warning.schema.json +0 -0
  82. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/schemas/v1/probe.schema.json +0 -0
  83. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/scripts/calibrate_dieline_heuristics.py +0 -0
  84. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/scripts/parity_viewer_essentials.py +0 -0
  85. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/scripts/produce_surface_audit.py +0 -0
  86. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/__init__.py +0 -0
  87. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/__init__.py +0 -0
  88. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/auth.py +0 -0
  89. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/blob_store.py +0 -0
  90. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/cache.py +0 -0
  91. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/url_ingest.py +0 -0
  92. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/warmup.pdf +0 -0
  93. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/api/warmup.py +0 -0
  94. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/cli.py +0 -0
  95. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/client/__init__.py +0 -0
  96. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/client/http_client.py +0 -0
  97. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/__init__.py +0 -0
  98. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/alt_space.py +0 -0
  99. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/color_math.py +0 -0
  100. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/curated.py +0 -0
  101. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/data/pantone_reference.json +0 -0
  102. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/normalize.py +0 -0
  103. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/pantone.py +0 -0
  104. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/color/resolver.py +0 -0
  105. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/eval/__init__.py +0 -0
  106. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/eval/ps_type4.py +0 -0
  107. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/__init__.py +0 -0
  108. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/annotations.py +0 -0
  109. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/color.py +0 -0
  110. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/common.py +0 -0
  111. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/content_inventory.py +0 -0
  112. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/dieline_detector.py +0 -0
  113. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/document.py +0 -0
  114. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/fonts.py +0 -0
  115. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/forms.py +0 -0
  116. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/images.py +0 -0
  117. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/ocg.py +0 -0
  118. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/probe.py +0 -0
  119. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/signals.py +0 -0
  120. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/structure.py +0 -0
  121. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/summary.py +0 -0
  122. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/transparency.py +0 -0
  123. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/extract/trapping.py +0 -0
  124. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/__init__.py +0 -0
  125. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/box.py +0 -0
  126. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/matrix.py +0 -0
  127. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/path.py +0 -0
  128. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/tile.py +0 -0
  129. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/geom/units.py +0 -0
  130. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/models/__init__.py +0 -0
  131. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/models/v1.py +0 -0
  132. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/parity.py +0 -0
  133. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/preflight_ingest/__init__.py +0 -0
  134. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/preflight_ingest/adapters.py +0 -0
  135. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/__init__.py +0 -0
  136. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/_common.py +0 -0
  137. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/content_stream.py +0 -0
  138. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/layer.py +0 -0
  139. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/page.py +0 -0
  140. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/render/separations.py +0 -0
  141. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/schema.py +0 -0
  142. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/speculator/__init__.py +0 -0
  143. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/speculator/__main__.py +0 -0
  144. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/src/codex_pdf/speculator/consumer.py +0 -0
  145. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/conftest.py +0 -0
  146. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/conforming/minimal.pdf +0 -0
  147. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/generate_fixtures.py +0 -0
  148. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/violating/no_output_intent.pdf +0 -0
  149. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/violating/no_trim_box.pdf +0 -0
  150. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/violating/no_xmp.pdf +0 -0
  151. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/fixtures/violating/pdf_1_4.pdf +0 -0
  152. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/golden/1.0.0/reference.json +0 -0
  153. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_alt_space.py +0 -0
  154. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_api.py +0 -0
  155. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_cache.py +0 -0
  156. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_cli_contract.py +0 -0
  157. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_client_routing.py +0 -0
  158. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_color.py +0 -0
  159. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_extract_analysis_signals.py +0 -0
  160. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_extract_structural.py +0 -0
  161. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_geom.py +0 -0
  162. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_golden.py +0 -0
  163. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_golden_corpus.py +0 -0
  164. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_models.py +0 -0
  165. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_parity.py +0 -0
  166. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_pdf_sha256.py +0 -0
  167. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_preflight_ingest.py +0 -0
  168. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_produce_surface_audit.py +0 -0
  169. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_schema.py +0 -0
  170. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_schemas_all.py +0 -0
  171. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_speculator.py +0 -0
  172. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_summary_dieline.py +0 -0
  173. {codex_pdf-1.7.2 → codex_pdf-1.8.0}/tests/test_summary_spot_colors.py +0 -0
@@ -25,7 +25,7 @@ For new products (Forge, Trap, Impose, Marks, etc.), map capabilities to one own
25
25
 
26
26
  When work spans layers, define a contract seam and keep logic in its owner service.
27
27
 
28
- ## Deployed surface (1.7.2)
28
+ ## Deployed surface (1.8.0)
29
29
 
30
30
  Codex now runs as **three services** in production. They share the
31
31
  same content-addressed cache key format
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codex-pdf
3
- Version: 1.7.2
3
+ Version: 1.8.0
4
4
  Summary: Authoritative, versioned PDF facts contract for Think Neverland tools.
5
5
  Author-email: Think Neverland <dev@thinkneverland.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,6 +19,8 @@ Provides-Extra: geom
19
19
  Requires-Dist: pyclipr>=0.1.8; extra == 'geom'
20
20
  Provides-Extra: redis
21
21
  Requires-Dist: redis>=5.0; extra == 'redis'
22
+ Provides-Extra: retain
23
+ Requires-Dist: boto3>=1.34; extra == 'retain'
22
24
  Description-Content-Type: text/markdown
23
25
 
24
26
  ---
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@printwithsynergy/codex-client",
3
- "version": "1.7.2",
3
+ "version": "1.8.0",
4
4
  "description": "TypeScript client for the codex-pdf HTTP API. Mirrors the Python codex_pdf.client surface.",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0-or-later",
@@ -10,7 +10,7 @@ the Railway codex-pdf service.
10
10
  - **Account**: `99aa3f9229469650a746a7d39ac58448` (`Quincy@thinkneverland.com's Account`)
11
11
  - **KV namespace `CACHE`**: `89a21ce1937046018a3d9d38f4e763ff` (preview `a4856d6f3b244087b907c189c2a2277d`)
12
12
  - **Origin** (`CODEX_ORIGIN_URL`): `https://codex-pdf-lint-sidecar-production.up.railway.app`
13
- - **Codex version pinned**: `1.7.2` (`CODEX_VERSION` var — bump on origin release)
13
+ - **Codex version pinned**: `1.8.0` (`CODEX_VERSION` var — bump on origin release)
14
14
  - **TTLs**: probe 24 h, Phase 1 24 h, Phase 2 7 d
15
15
 
16
16
  ## What it caches
@@ -23,7 +23,7 @@ CODEX_ORIGIN_URL = "https://codex-pdf-lint-sidecar-production.up.railway.app"
23
23
  # Codex package VERSION at deploy time. MUST match the origin's
24
24
  # `codex_pdf.version.VERSION` so KV keys line up. Bump on every
25
25
  # origin release.
26
- CODEX_VERSION = "1.7.2"
26
+ CODEX_VERSION = "1.8.0"
27
27
  # TTL (seconds) for cached SSE event payloads.
28
28
  PROBE_TTL = "86400" # 24 h — small payload, refresh daily
29
29
  PHASE1_TTL = "86400" # 24 h — matches origin Redis TTL
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "codex-pdf"
7
- version = "1.7.2"
7
+ version = "1.8.0"
8
8
  description = "Authoritative, versioned PDF facts contract for Think Neverland tools."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -29,6 +29,9 @@ redis = [
29
29
  geom = [
30
30
  "pyclipr>=0.1.8",
31
31
  ]
32
+ retain = [
33
+ "boto3>=1.34",
34
+ ]
32
35
 
33
36
  [project.scripts]
34
37
  codex-pdf = "codex_pdf.cli:main"
@@ -64,6 +64,11 @@ from starlette.middleware.base import BaseHTTPMiddleware
64
64
  from codex_pdf.api.auth import authenticate
65
65
  from codex_pdf.api.blob_store import make_blob_store
66
66
  from codex_pdf.api.cache import cache_key, make_cache
67
+ from codex_pdf.api.retention import (
68
+ make_retention_store,
69
+ normalise_tenant,
70
+ parse_retention_consent,
71
+ )
67
72
  from codex_pdf.api.warmup import warmup_worker
68
73
  from codex_pdf.api.url_ingest import fetch_pdf_from_url
69
74
  from codex_pdf.color import (
@@ -163,6 +168,7 @@ def _record(endpoint: str, status_code: int, duration: float) -> None:
163
168
 
164
169
  _cache = make_cache()
165
170
  _blob_store = make_blob_store()
171
+ _retention_store = make_retention_store()
166
172
 
167
173
 
168
174
  def _repo_root() -> Path:
@@ -790,7 +796,11 @@ def _pre_render_bg(raw: bytes) -> None:
790
796
 
791
797
 
792
798
  async def _extract_impl(
793
- request: Request, pdf: UploadFile | None, *, endpoint_label: str
799
+ request: Request,
800
+ pdf: UploadFile | None,
801
+ *,
802
+ endpoint_label: str,
803
+ retain_form_value: str | None = None,
794
804
  ) -> JSONResponse:
795
805
  started = time.perf_counter()
796
806
  try:
@@ -805,6 +815,7 @@ async def _extract_impl(
805
815
  payload["pdf_sha256"] = sha
806
816
  # Warm the page-1 render cache in the background — don't block the response.
807
817
  asyncio.ensure_future(loop.run_in_executor(None, _pre_render_bg, raw))
818
+ await _maybe_retain(request, raw, sha, payload, retain_form_value)
808
819
  _record(endpoint_label, 200, time.perf_counter() - started)
809
820
  return JSONResponse(payload)
810
821
  except HTTPException as exc:
@@ -819,21 +830,116 @@ async def _extract_impl(
819
830
  ) from exc
820
831
 
821
832
 
833
+ async def _maybe_retain(
834
+ request: Request,
835
+ raw: bytes,
836
+ sha: str,
837
+ payload: dict[str, Any],
838
+ retain_form_value: str | None,
839
+ ) -> None:
840
+ """Audit-log the consent decision; persist to S3 if opted in.
841
+
842
+ Audit log fires on every extract regardless of consent so the
843
+ "off" path is observable. Persistence only runs when consent is
844
+ affirmative AND retention is configured. A persist failure logs
845
+ but does not break the extract response — the user already got
846
+ their answer.
847
+ """
848
+ decision = parse_retention_consent(
849
+ retain_form_value,
850
+ request.headers.get("x-compile-retain-for-training"),
851
+ )
852
+ tenant = normalise_tenant(request.headers.get("x-codex-tenant"))
853
+ request_id = request.headers.get("x-codex-request-id") or ""
854
+ configured = _retention_store is not None
855
+ logger.info(
856
+ "extract_consent decision=%s source=%s mismatch=%s tenant=%s sha=%s "
857
+ "request_id=%s retention_configured=%s",
858
+ decision.consent,
859
+ decision.source,
860
+ decision.mismatch,
861
+ tenant,
862
+ sha[:16],
863
+ request_id,
864
+ configured,
865
+ )
866
+ if not (decision.consent and _retention_store is not None):
867
+ return
868
+ try:
869
+ loop = asyncio.get_event_loop()
870
+ keys = await loop.run_in_executor(
871
+ None,
872
+ lambda: _retention_store.put(
873
+ pdf_bytes=raw,
874
+ extract_payload=payload,
875
+ request_id=request_id,
876
+ tenant=tenant,
877
+ sha256=sha,
878
+ codex_version=VERSION,
879
+ consent_source=decision.source,
880
+ ),
881
+ )
882
+ logger.info(
883
+ "extract_retained sha=%s tenant=%s keys=%s",
884
+ sha[:16],
885
+ tenant,
886
+ list(keys.values()),
887
+ )
888
+ except Exception:
889
+ logger.exception("retention persist failed sha=%s tenant=%s", sha[:16], tenant)
890
+
891
+
822
892
  @app.post("/extract", include_in_schema=False, dependencies=[Depends(authenticate)])
823
893
  async def extract_root_endpoint(
824
894
  request: Request,
825
895
  pdf: UploadFile | None = File(default=None),
896
+ retain_for_training: str | None = Form(default=None),
826
897
  ) -> JSONResponse:
827
- return await _extract_impl(request, pdf, endpoint_label="extract")
898
+ return await _extract_impl(
899
+ request, pdf, endpoint_label="extract", retain_form_value=retain_for_training
900
+ )
828
901
 
829
902
 
830
903
  @app.post("/v1/extract", dependencies=[Depends(authenticate)])
831
904
  async def extract_endpoint(
832
905
  request: Request,
833
906
  pdf: UploadFile | None = File(default=None),
907
+ retain_for_training: str | None = Form(default=None),
834
908
  ) -> JSONResponse:
835
909
  """Extract a CodexDocument from an uploaded PDF or remote URL."""
836
- return await _extract_impl(request, pdf, endpoint_label="extract")
910
+ return await _extract_impl(
911
+ request, pdf, endpoint_label="extract", retain_form_value=retain_for_training
912
+ )
913
+
914
+
915
+ class _RetentionDeleteRequest(BaseModel):
916
+ sha256: str = Field(..., description="Lower-case hex SHA-256 of the PDF to erase.")
917
+
918
+
919
+ @app.post("/v1/retention/delete", dependencies=[Depends(authenticate)])
920
+ async def retention_delete_endpoint(payload: _RetentionDeleteRequest) -> JSONResponse:
921
+ """Erase every retained object for ``sha256`` (DSAR endpoint).
922
+
923
+ 503 when retention is not configured (operator hasn't set
924
+ ``CODEX_RETAIN_BUCKET``). 400 on a malformed sha256. ``deleted=0``
925
+ when no objects matched — DSAR replay is idempotent, callers
926
+ don't need to distinguish "never stored" from "already erased".
927
+ """
928
+ if _retention_store is None:
929
+ raise HTTPException(
930
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
931
+ detail="retention not configured (CODEX_RETAIN_BUCKET unset)",
932
+ )
933
+ sha = payload.sha256.strip().lower()
934
+ loop = asyncio.get_event_loop()
935
+ try:
936
+ deleted = await loop.run_in_executor(None, _retention_store.delete, sha)
937
+ except ValueError as exc:
938
+ raise HTTPException(
939
+ status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)
940
+ ) from exc
941
+ logger.info("retention_delete sha=%s deleted=%d", sha[:16], deleted)
942
+ return JSONResponse({"sha256": sha, "deleted": deleted})
837
943
 
838
944
 
839
945
  async def _read_probe_pdf(request: Request, pdf: UploadFile | None) -> bytes:
@@ -0,0 +1,312 @@
1
+ """Opt-in PDF retention for the marketing demo.
2
+
3
+ When a request to ``/v1/extract`` carries an explicit "yes, retain for
4
+ training" signal, the codex sidecar persists the input PDF, the
5
+ extract response, and a tiny metadata object to S3-compatible storage
6
+ under a hive-partitioned key. Everything else (no flag, ``false``
7
+ flag, storage unconfigured) is a no-op — the bytes leave memory the
8
+ moment the response ships, exactly like before.
9
+
10
+ Object key layout::
11
+
12
+ {prefix}/tenant={tenant}/dt={YYYY-MM-DD}/sha256={hex64}/document.pdf
13
+ {prefix}/tenant={tenant}/dt={YYYY-MM-DD}/sha256={hex64}/extract.json
14
+ {prefix}/tenant={tenant}/dt={YYYY-MM-DD}/sha256={hex64}/meta.json
15
+
16
+ Hive partitioning makes the bucket Athena/Glue-queryable later
17
+ without a migration. ``dt=`` makes an S3 Lifecycle rule trivial
18
+ (operator-owned — the app does *not* try to manage lifecycle
19
+ policies). ``sha256=`` dedupes idempotent re-uploads of the same
20
+ file on the same day.
21
+
22
+ ``CODEX_RETAIN_TTL_DAYS`` is informational: the app writes the
23
+ declared retention window into ``meta.json`` and the audit log, but
24
+ expiry is enforced by the bucket's lifecycle rule the operator
25
+ configures out-of-band.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import logging
32
+ import os
33
+ import re
34
+ from dataclasses import dataclass
35
+ from datetime import datetime, timezone
36
+ from typing import Any, Protocol
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ _TRUE_TOKENS = frozenset({"true", "1", "yes", "on"})
41
+ _TENANT_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,62}$")
42
+ _SHA256_RE = re.compile(r"^[0-9a-f]{64}$")
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Consent parsing
47
+ # ---------------------------------------------------------------------------
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class ConsentDecision:
52
+ consent: bool
53
+ source: str # "form" | "header" | "both" | "none"
54
+ mismatch: bool # form != header when both were present
55
+
56
+
57
+ def _truthy(value: str | None) -> bool | None:
58
+ """Return ``True``/``False`` for a recognised token, ``None`` if absent.
59
+
60
+ Recognised true tokens: ``true``, ``1``, ``yes``, ``on`` (case-
61
+ insensitive, whitespace-stripped). Anything else — including
62
+ ``"false"``, ``"0"``, ``"no"``, ``"off"``, ``""``, garbage — is
63
+ explicitly false. The three-valued return lets the caller tell
64
+ "user said no" from "user said nothing" when reconciling form
65
+ against header.
66
+ """
67
+ if value is None:
68
+ return None
69
+ token = value.strip().lower()
70
+ if not token:
71
+ return None
72
+ return token in _TRUE_TOKENS
73
+
74
+
75
+ def parse_retention_consent(
76
+ form_value: str | None, header_value: str | None
77
+ ) -> ConsentDecision:
78
+ """Reconcile the form-field and header signals from the demo uploader.
79
+
80
+ The browser checkbox is canonical; the header is fallback only
81
+ when the form field is absent. If both are present and disagree,
82
+ the form wins and the mismatch is flagged so the audit log can
83
+ surface the integration bug without overriding user intent.
84
+ """
85
+ form = _truthy(form_value)
86
+ header = _truthy(header_value)
87
+
88
+ if form is None and header is None:
89
+ return ConsentDecision(consent=False, source="none", mismatch=False)
90
+ if form is None:
91
+ return ConsentDecision(consent=bool(header), source="header", mismatch=False)
92
+ if header is None:
93
+ return ConsentDecision(consent=form, source="form", mismatch=False)
94
+
95
+ mismatch = form != header
96
+ if form and header:
97
+ return ConsentDecision(consent=True, source="both", mismatch=False)
98
+ return ConsentDecision(consent=form, source="form", mismatch=mismatch)
99
+
100
+
101
+ def normalise_tenant(raw: str | None) -> str:
102
+ """Validate ``X-Codex-Tenant`` and fall back to ``default``.
103
+
104
+ Invalid values fall back rather than 400ing so an upstream typo
105
+ doesn't break the user-facing extract. The fallback + warning is
106
+ visible in the audit log.
107
+ """
108
+ if raw is None:
109
+ return "default"
110
+ candidate = raw.strip().lower()
111
+ if not candidate:
112
+ return "default"
113
+ if not _TENANT_RE.match(candidate):
114
+ logger.warning("retention tenant header rejected raw=%r → default", raw)
115
+ return "default"
116
+ return candidate
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Storage
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ @dataclass(frozen=True)
125
+ class RetentionConfig:
126
+ bucket: str
127
+ prefix: str
128
+ ttl_days: int
129
+ endpoint_url: str | None
130
+ region: str
131
+ access_key_id: str | None
132
+ secret_access_key: str | None
133
+
134
+ @classmethod
135
+ def from_env(cls) -> RetentionConfig | None:
136
+ bucket = (os.environ.get("CODEX_RETAIN_BUCKET") or "").strip()
137
+ if not bucket:
138
+ return None
139
+ try:
140
+ ttl_days = int(os.environ.get("CODEX_RETAIN_TTL_DAYS", "90"))
141
+ except ValueError:
142
+ logger.warning("CODEX_RETAIN_TTL_DAYS is not an int → defaulting to 90")
143
+ ttl_days = 90
144
+ return cls(
145
+ bucket=bucket,
146
+ prefix=(os.environ.get("CODEX_RETAIN_PREFIX") or "").strip().strip("/"),
147
+ ttl_days=ttl_days,
148
+ endpoint_url=(os.environ.get("CODEX_RETAIN_ENDPOINT_URL") or "").strip() or None,
149
+ region=(os.environ.get("CODEX_RETAIN_REGION") or "us-east-1").strip(),
150
+ access_key_id=(os.environ.get("CODEX_RETAIN_ACCESS_KEY_ID") or "").strip() or None,
151
+ secret_access_key=(
152
+ (os.environ.get("CODEX_RETAIN_SECRET_ACCESS_KEY") or "").strip() or None
153
+ ),
154
+ )
155
+
156
+
157
+ class _S3Client(Protocol):
158
+ def put_object(self, **kwargs: Any) -> Any: ...
159
+ def list_objects_v2(self, **kwargs: Any) -> Any: ...
160
+ def delete_objects(self, **kwargs: Any) -> Any: ...
161
+
162
+
163
+ def _utc_date() -> str:
164
+ return datetime.now(timezone.utc).date().isoformat()
165
+
166
+
167
+ def _utc_ts() -> str:
168
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
169
+
170
+
171
+ def _object_key(prefix: str, tenant: str, dt: str, sha: str, suffix: str) -> str:
172
+ parts = [
173
+ p for p in (prefix, f"tenant={tenant}", f"dt={dt}", f"sha256={sha}", suffix) if p
174
+ ]
175
+ return "/".join(parts)
176
+
177
+
178
+ class RetentionStore:
179
+ """Three-object-per-event S3 writer.
180
+
181
+ The S3 client is constructor-injected so tests substitute a
182
+ ``MagicMock`` without depending on boto3 (or moto). Production
183
+ instantiation goes through ``make_retention_store()``.
184
+ """
185
+
186
+ def __init__(self, config: RetentionConfig, client: _S3Client) -> None:
187
+ self._config = config
188
+ self._client = client
189
+
190
+ @property
191
+ def config(self) -> RetentionConfig:
192
+ return self._config
193
+
194
+ def put(
195
+ self,
196
+ *,
197
+ pdf_bytes: bytes,
198
+ extract_payload: dict[str, Any],
199
+ request_id: str,
200
+ tenant: str,
201
+ sha256: str,
202
+ codex_version: str,
203
+ consent_source: str,
204
+ ) -> dict[str, str]:
205
+ """Write ``document.pdf``, ``extract.json``, ``meta.json``.
206
+
207
+ Returns the three object keys for the audit log.
208
+ """
209
+ dt = _utc_date()
210
+ meta: dict[str, Any] = {
211
+ "request_id": request_id,
212
+ "ts": _utc_ts(),
213
+ "sha256": sha256,
214
+ "content_length": len(pdf_bytes),
215
+ "tenant": tenant,
216
+ "codex_version": codex_version,
217
+ "consent_source": consent_source,
218
+ "retention_window_days": self._config.ttl_days,
219
+ }
220
+ pdf_key = _object_key(self._config.prefix, tenant, dt, sha256, "document.pdf")
221
+ extract_key = _object_key(self._config.prefix, tenant, dt, sha256, "extract.json")
222
+ meta_key = _object_key(self._config.prefix, tenant, dt, sha256, "meta.json")
223
+
224
+ self._client.put_object(
225
+ Bucket=self._config.bucket,
226
+ Key=pdf_key,
227
+ Body=pdf_bytes,
228
+ ContentType="application/pdf",
229
+ )
230
+ self._client.put_object(
231
+ Bucket=self._config.bucket,
232
+ Key=extract_key,
233
+ Body=json.dumps(extract_payload, sort_keys=True, separators=(",", ":")).encode(
234
+ "utf-8"
235
+ ),
236
+ ContentType="application/json",
237
+ )
238
+ self._client.put_object(
239
+ Bucket=self._config.bucket,
240
+ Key=meta_key,
241
+ Body=json.dumps(meta, sort_keys=True, separators=(",", ":")).encode("utf-8"),
242
+ ContentType="application/json",
243
+ )
244
+ return {"pdf": pdf_key, "extract": extract_key, "meta": meta_key}
245
+
246
+ def delete(self, sha256: str) -> int:
247
+ """Erase every object with ``sha256={sha}/`` in its key.
248
+
249
+ Scans every ``dt=`` / ``tenant=`` partition. S3 has no native
250
+ sha-suffix search, but a 90-day window with a couple of
251
+ tenants is small enough that the linear scan is fine — and
252
+ avoids a parallel index we'd have to keep consistent.
253
+ """
254
+ if not _SHA256_RE.match(sha256):
255
+ raise ValueError(f"invalid sha256: {sha256!r}")
256
+ matches: list[dict[str, str]] = []
257
+ token: str | None = None
258
+ list_prefix = f"{self._config.prefix}/" if self._config.prefix else ""
259
+ while True:
260
+ kwargs: dict[str, Any] = {
261
+ "Bucket": self._config.bucket,
262
+ "Prefix": list_prefix,
263
+ }
264
+ if token:
265
+ kwargs["ContinuationToken"] = token
266
+ resp = self._client.list_objects_v2(**kwargs)
267
+ for entry in resp.get("Contents", []) or []:
268
+ key = entry.get("Key", "")
269
+ if f"/sha256={sha256}/" in key:
270
+ matches.append({"Key": key})
271
+ if not resp.get("IsTruncated"):
272
+ break
273
+ token = resp.get("NextContinuationToken")
274
+ if not token:
275
+ break
276
+
277
+ deleted = 0
278
+ for i in range(0, len(matches), 1000):
279
+ batch = matches[i : i + 1000]
280
+ self._client.delete_objects(
281
+ Bucket=self._config.bucket, Delete={"Objects": batch}
282
+ )
283
+ deleted += len(batch)
284
+ return deleted
285
+
286
+
287
+ def make_retention_store() -> RetentionStore | None:
288
+ """Build a production ``RetentionStore`` from env, or return ``None``.
289
+
290
+ ``None`` is the explicit "feature off" sentinel. The boto3 import
291
+ is deferred so the base wheel doesn't pull boto3 in unless the
292
+ operator opts in via ``CODEX_RETAIN_BUCKET``.
293
+ """
294
+ config = RetentionConfig.from_env()
295
+ if config is None:
296
+ return None
297
+ try:
298
+ import boto3 # type: ignore[import-not-found]
299
+ except ImportError:
300
+ logger.warning(
301
+ "CODEX_RETAIN_BUCKET set but boto3 is not installed — "
302
+ "install codex-pdf[retain] to enable retention. Falling back to disabled."
303
+ )
304
+ return None
305
+ client_kwargs: dict[str, Any] = {"region_name": config.region}
306
+ if config.endpoint_url:
307
+ client_kwargs["endpoint_url"] = config.endpoint_url
308
+ if config.access_key_id and config.secret_access_key:
309
+ client_kwargs["aws_access_key_id"] = config.access_key_id
310
+ client_kwargs["aws_secret_access_key"] = config.secret_access_key
311
+ client = boto3.client("s3", **client_kwargs)
312
+ return RetentionStore(config, client)
@@ -38,5 +38,5 @@ or unreachable Redis service can never crash the codex API.
38
38
  1.3.0 (prior): SSRF hardening + /v1/walk/type4 endpoint.
39
39
  """
40
40
 
41
- VERSION = "1.7.2"
41
+ VERSION = "1.8.0"
42
42
  __version__ = VERSION
@@ -0,0 +1,94 @@
1
+ """Unit tests for the retention consent parser.
2
+
3
+ The marketing demo sends both a form field (``retain_for_training``)
4
+ and a header (``X-Compile-Retain-For-Training``). These tests pin the
5
+ full truth matrix: missing/present × every token variant × form-vs-
6
+ header reconciliation. Anything outside the documented true tokens
7
+ must be false — a typo like ``"yes please"`` is not an opt-in.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import pytest
13
+
14
+ from codex_pdf.api.retention import (
15
+ ConsentDecision,
16
+ normalise_tenant,
17
+ parse_retention_consent,
18
+ )
19
+
20
+
21
+ @pytest.mark.parametrize(
22
+ "form,header,want_consent,want_source,want_mismatch",
23
+ [
24
+ # Neither present → off.
25
+ (None, None, False, "none", False),
26
+ ("", None, False, "none", False),
27
+ (None, "", False, "none", False),
28
+ (" ", " ", False, "none", False),
29
+ # Header-only paths.
30
+ (None, "true", True, "header", False),
31
+ (None, "TRUE", True, "header", False),
32
+ (None, "1", True, "header", False),
33
+ (None, "yes", True, "header", False),
34
+ (None, "on", True, "header", False),
35
+ (None, "false", False, "header", False),
36
+ (None, "0", False, "header", False),
37
+ (None, "no", False, "header", False),
38
+ (None, "off", False, "header", False),
39
+ (None, "maybe", False, "header", False),
40
+ # Form-only paths.
41
+ ("true", None, True, "form", False),
42
+ ("TRUE", None, True, "form", False),
43
+ ("1", None, True, "form", False),
44
+ ("yes", None, True, "form", False),
45
+ ("on", None, True, "form", False),
46
+ ("false", None, False, "form", False),
47
+ ("0", None, False, "form", False),
48
+ ("no", None, False, "form", False),
49
+ ("off", None, False, "form", False),
50
+ # Both present + agree.
51
+ ("true", "true", True, "both", False),
52
+ ("1", "yes", True, "both", False),
53
+ ("false", "false", False, "form", False),
54
+ # Both present + disagree → form wins, mismatch flagged.
55
+ ("true", "false", True, "form", True),
56
+ ("false", "true", False, "form", True),
57
+ ("yes", "no", True, "form", True),
58
+ # Whitespace + case tolerance.
59
+ (" Yes ", None, True, "form", False),
60
+ (None, " ON\n", True, "header", False),
61
+ ],
62
+ )
63
+ def test_parse_retention_consent_matrix(
64
+ form: str | None,
65
+ header: str | None,
66
+ want_consent: bool,
67
+ want_source: str,
68
+ want_mismatch: bool,
69
+ ) -> None:
70
+ got = parse_retention_consent(form, header)
71
+ assert got == ConsentDecision(
72
+ consent=want_consent, source=want_source, mismatch=want_mismatch
73
+ )
74
+
75
+
76
+ @pytest.mark.parametrize(
77
+ "raw,want",
78
+ [
79
+ (None, "default"),
80
+ ("", "default"),
81
+ (" ", "default"),
82
+ ("compile-marketing", "compile-marketing"),
83
+ ("COMPILE-Marketing", "compile-marketing"),
84
+ ("acme42", "acme42"),
85
+ # Invalid → silent fallback (logged).
86
+ ("under_score", "default"),
87
+ ("has space", "default"),
88
+ ("-leadingdash", "default"),
89
+ ("a" * 64, "default"),
90
+ ("a/b", "default"),
91
+ ],
92
+ )
93
+ def test_normalise_tenant(raw: str | None, want: str) -> None:
94
+ assert normalise_tenant(raw) == want