codex-pdf 1.4.1__tar.gz → 1.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/PKG-INFO +1 -1
  2. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/pyproject.toml +1 -1
  3. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-document.schema.json +34 -1
  4. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/signals.py +5 -9
  5. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/summary.py +164 -1
  6. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/models/v1.py +16 -1
  7. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/version.py +1 -1
  8. codex_pdf-1.4.2/tests/test_summary_dieline.py +80 -0
  9. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/uv.lock +1 -1
  10. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/.cursor/rules/service-ownership.mdc +0 -0
  11. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/.github/workflows/ci.yml +0 -0
  12. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/.gitignore +0 -0
  13. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/.windsurf/rules/service-ownership.md +0 -0
  14. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/APPROVALS.md +0 -0
  15. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/CLAUDE.md +0 -0
  16. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/Dockerfile +0 -0
  17. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/LICENSE +0 -0
  18. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/Procfile +0 -0
  19. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/README.md +0 -0
  20. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/README.md +0 -0
  21. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/package-lock.json +0 -0
  22. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/package.json +0 -0
  23. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/src/color.ts +0 -0
  24. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/src/index.test.ts +0 -0
  25. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/src/index.ts +0 -0
  26. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/clients/ts/tsconfig.json +0 -0
  27. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/architecture.md +0 -0
  28. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/backward-compatibility.md +0 -0
  29. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/cleanup-stop-gates.md +0 -0
  30. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/cli.md +0 -0
  31. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/contract.md +0 -0
  32. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/deploy.md +0 -0
  33. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/discovery-audit.md +0 -0
  34. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/migration-plan.md +0 -0
  35. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/operations/codex-change-ripple.md +0 -0
  36. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/operations/marketing-deploy-template.md +0 -0
  37. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/parity.md +0 -0
  38. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/preflight-ingest.md +0 -0
  39. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/release-1.0.0.md +0 -0
  40. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/docs/service-ownership-contract.md +0 -0
  41. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/railway.toml +0 -0
  42. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/audit/mislocated-closure.json +0 -0
  43. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/audit/produce_surface.json +0 -0
  44. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/codex_deep.json +0 -0
  45. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/codex_inventory.json +0 -0
  46. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/codex_summary.json +0 -0
  47. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/criterion4_parser_surface.json +0 -0
  48. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/pdfx4_deep.json +0 -0
  49. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/pdfx4_inventory.json +0 -0
  50. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/pdfx4_summary.json +0 -0
  51. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/render_baseline.json +0 -0
  52. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/reports/parity/viewer_essentials.json +0 -0
  53. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/CHANGELOG.md +0 -0
  54. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-annotation.schema.json +0 -0
  55. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-box.schema.json +0 -0
  56. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-color-space.schema.json +0 -0
  57. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-font.schema.json +0 -0
  58. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-form-xobject.schema.json +0 -0
  59. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-image.schema.json +0 -0
  60. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-issue.schema.json +0 -0
  61. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-ocg.schema.json +0 -0
  62. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-output-intent.schema.json +0 -0
  63. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-page-object.schema.json +0 -0
  64. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-page.schema.json +0 -0
  65. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-preflight-report.schema.json +0 -0
  66. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-source.schema.json +0 -0
  67. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-spot-colorant.schema.json +0 -0
  68. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-transparency-tree.schema.json +0 -0
  69. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-trap-evidence.schema.json +0 -0
  70. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/schemas/v1/codex-warning.schema.json +0 -0
  71. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/scripts/parity_viewer_essentials.py +0 -0
  72. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/scripts/produce_surface_audit.py +0 -0
  73. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/__init__.py +0 -0
  74. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/api/__init__.py +0 -0
  75. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/api/auth.py +0 -0
  76. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/api/cache.py +0 -0
  77. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/api/main.py +0 -0
  78. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/api/url_ingest.py +0 -0
  79. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/cli.py +0 -0
  80. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/client/__init__.py +0 -0
  81. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/client/http_client.py +0 -0
  82. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/__init__.py +0 -0
  83. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/color_math.py +0 -0
  84. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/curated.py +0 -0
  85. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/data/pantone_reference.json +0 -0
  86. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/normalize.py +0 -0
  87. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/pantone.py +0 -0
  88. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/color/resolver.py +0 -0
  89. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/eval/__init__.py +0 -0
  90. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/eval/ps_type4.py +0 -0
  91. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/__init__.py +0 -0
  92. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/annotations.py +0 -0
  93. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/color.py +0 -0
  94. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/common.py +0 -0
  95. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/content_inventory.py +0 -0
  96. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/document.py +0 -0
  97. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/fonts.py +0 -0
  98. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/forms.py +0 -0
  99. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/images.py +0 -0
  100. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/ocg.py +0 -0
  101. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/structure.py +0 -0
  102. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/transparency.py +0 -0
  103. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/extract/trapping.py +0 -0
  104. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/__init__.py +0 -0
  105. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/box.py +0 -0
  106. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/matrix.py +0 -0
  107. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/path.py +0 -0
  108. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/tile.py +0 -0
  109. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/geom/units.py +0 -0
  110. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/models/__init__.py +0 -0
  111. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/parity.py +0 -0
  112. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/preflight_ingest/__init__.py +0 -0
  113. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/preflight_ingest/adapters.py +0 -0
  114. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/__init__.py +0 -0
  115. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/_common.py +0 -0
  116. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/content_stream.py +0 -0
  117. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/layer.py +0 -0
  118. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/page.py +0 -0
  119. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/render/separations.py +0 -0
  120. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/src/codex_pdf/schema.py +0 -0
  121. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/conftest.py +0 -0
  122. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/conforming/minimal.pdf +0 -0
  123. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/generate_fixtures.py +0 -0
  124. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/violating/no_output_intent.pdf +0 -0
  125. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/violating/no_trim_box.pdf +0 -0
  126. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/violating/no_xmp.pdf +0 -0
  127. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/fixtures/violating/pdf_1_4.pdf +0 -0
  128. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/golden/1.0.0/reference.json +0 -0
  129. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_api.py +0 -0
  130. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_cache.py +0 -0
  131. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_cli_contract.py +0 -0
  132. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_color.py +0 -0
  133. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_extract_analysis_signals.py +0 -0
  134. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_extract_structural.py +0 -0
  135. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_geom.py +0 -0
  136. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_golden.py +0 -0
  137. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_golden_corpus.py +0 -0
  138. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_models.py +0 -0
  139. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_parity.py +0 -0
  140. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_preflight_ingest.py +0 -0
  141. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_produce_surface_audit.py +0 -0
  142. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_schema.py +0 -0
  143. {codex_pdf-1.4.1 → codex_pdf-1.4.2}/tests/test_schemas_all.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codex-pdf
3
- Version: 1.4.1
3
+ Version: 1.4.2
4
4
  Summary: Authoritative, versioned PDF facts contract for Think Neverland tools.
5
5
  Author-email: Think Neverland <dev@thinkneverland.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "codex-pdf"
7
- version = "1.4.1"
7
+ version = "1.4.2"
8
8
  description = "Authoritative, versioned PDF facts contract for Think Neverland tools."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -1468,7 +1468,8 @@
1468
1468
  "enum": [
1469
1469
  "ocg_name",
1470
1470
  "ocg_processing_step",
1471
- "trap_layer"
1471
+ "trap_layer",
1472
+ "analysis_signal"
1472
1473
  ],
1473
1474
  "title": "Source",
1474
1475
  "type": "string"
@@ -1496,6 +1497,31 @@
1496
1497
  ],
1497
1498
  "default": null,
1498
1499
  "title": "Processing Step"
1500
+ },
1501
+ "confidence": {
1502
+ "default": 0.5,
1503
+ "maximum": 1.0,
1504
+ "minimum": 0.0,
1505
+ "title": "Confidence",
1506
+ "type": "number"
1507
+ },
1508
+ "reason_codes": {
1509
+ "items": {
1510
+ "enum": [
1511
+ "name_keyword",
1512
+ "iso19593_processing_step",
1513
+ "trap_layer_keyword",
1514
+ "analysis_ocg_marked_keyword",
1515
+ "analysis_dash_pattern",
1516
+ "analysis_thin_stroke",
1517
+ "analysis_stroke_dominant",
1518
+ "analysis_dense_path_network",
1519
+ "analysis_low_fill_ratio"
1520
+ ],
1521
+ "type": "string"
1522
+ },
1523
+ "title": "Reason Codes",
1524
+ "type": "array"
1499
1525
  }
1500
1526
  },
1501
1527
  "required": [
@@ -1519,6 +1545,13 @@
1519
1545
  "title": "Candidates",
1520
1546
  "type": "array"
1521
1547
  },
1548
+ "overall_confidence": {
1549
+ "default": 0.0,
1550
+ "maximum": 1.0,
1551
+ "minimum": 0.0,
1552
+ "title": "Overall Confidence",
1553
+ "type": "number"
1554
+ },
1522
1555
  "trapped_flag": {
1523
1556
  "anyOf": [
1524
1557
  {
@@ -18,20 +18,16 @@ def extract_analysis_signals_pikepdf(pdf_bytes: bytes) -> dict[str, Any]:
18
18
  with pikepdf.open(BytesIO(pdf_bytes)) as pdf:
19
19
  out["spot_names"] = _collect_spot_names(pdf)
20
20
  out["layer_names"] = _collect_layer_names(pdf)
21
- page_signals = _extract_page_one_signals(pdf)
22
- if page_signals:
23
- out["page_1"] = page_signals
21
+ for idx, page in enumerate(pdf.pages, start=1):
22
+ page_signals = _extract_page_signals(page)
23
+ if page_signals:
24
+ out[f"page_{idx}"] = page_signals
24
25
  except Exception:
25
26
  return {}
26
27
  return out
27
28
 
28
29
 
29
- def _extract_page_one_signals(pdf: Any) -> dict[str, Any]:
30
- try:
31
- page = pdf.pages[0]
32
- except Exception:
33
- return {}
34
-
30
+ def _extract_page_signals(page: Any) -> dict[str, Any]:
35
31
  resources = page.get("/Resources") if hasattr(page, "get") else None
36
32
  cs_dict = resources.get("/ColorSpace") if resources and hasattr(resources, "get") else None
37
33
  props_dict = resources.get("/Properties") if resources and hasattr(resources, "get") else None
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import re
6
+ from typing import Any
6
7
 
7
8
  from codex_pdf.models.v1 import (
8
9
  CodexDocument,
@@ -104,6 +105,8 @@ def _dieline_candidates(doc: CodexDocument) -> CodexSummaryDielineMetrics:
104
105
  source: str,
105
106
  ocg_id: str | None = None,
106
107
  processing_step: str | None = None,
108
+ confidence: float = 0.5,
109
+ reason_codes: list[str] | None = None,
107
110
  ) -> None:
108
111
  trimmed = name.strip()
109
112
  if not trimmed:
@@ -118,12 +121,20 @@ def _dieline_candidates(doc: CodexDocument) -> CodexSummaryDielineMetrics:
118
121
  source=source, # type: ignore[arg-type]
119
122
  ocg_id=ocg_id,
120
123
  processing_step=processing_step,
124
+ confidence=max(0.0, min(1.0, confidence)),
125
+ reason_codes=sorted(set(reason_codes or [])), # type: ignore[arg-type]
121
126
  )
122
127
  )
123
128
 
124
129
  for ocg in doc.ocgs:
125
130
  if _DIELINE_PATTERN.search(ocg.name):
126
- _add(name=ocg.name, source="ocg_name", ocg_id=ocg.ocg_id)
131
+ _add(
132
+ name=ocg.name,
133
+ source="ocg_name",
134
+ ocg_id=ocg.ocg_id,
135
+ confidence=0.95,
136
+ reason_codes=["name_keyword"],
137
+ )
127
138
  if ocg.iso19593_processing_step and _DIELINE_PATTERN.search(
128
139
  ocg.iso19593_processing_step
129
140
  ):
@@ -132,6 +143,8 @@ def _dieline_candidates(doc: CodexDocument) -> CodexSummaryDielineMetrics:
132
143
  source="ocg_processing_step",
133
144
  ocg_id=ocg.ocg_id,
134
145
  processing_step=ocg.iso19593_processing_step,
146
+ confidence=0.98,
147
+ reason_codes=["iso19593_processing_step"],
135
148
  )
136
149
 
137
150
  for layer in doc.trap_evidence.trap_layers:
@@ -142,15 +155,165 @@ def _dieline_candidates(doc: CodexDocument) -> CodexSummaryDielineMetrics:
142
155
  source="trap_layer",
143
156
  ocg_id=layer.ocg_id,
144
157
  processing_step=layer.processing_step,
158
+ confidence=0.9,
159
+ reason_codes=["trap_layer_keyword"],
160
+ )
161
+
162
+ for page_num, page_signal in _iter_page_signals(doc.analysis):
163
+ signal_hits = _dieline_signal_candidates(page_num, page_signal)
164
+ for hit_name, hit_confidence, hit_reasons in signal_hits:
165
+ _add(
166
+ name=hit_name,
167
+ source="analysis_signal",
168
+ confidence=hit_confidence,
169
+ reason_codes=hit_reasons,
145
170
  )
146
171
 
147
172
  return CodexSummaryDielineMetrics(
148
173
  count=len(candidates),
149
174
  candidates=candidates,
175
+ overall_confidence=max((c.confidence for c in candidates), default=0.0),
150
176
  trapped_flag=doc.trapped_flag,
151
177
  )
152
178
 
153
179
 
180
+ def _iter_page_signals(analysis: dict[str, Any]) -> list[tuple[int, dict[str, Any]]]:
181
+ out: list[tuple[int, dict[str, Any]]] = []
182
+ for key, value in analysis.items():
183
+ if not isinstance(value, dict):
184
+ continue
185
+ if key.startswith("page_"):
186
+ suffix = key.removeprefix("page_")
187
+ if suffix.isdigit():
188
+ out.append((int(suffix), value))
189
+ return sorted(out, key=lambda item: item[0])
190
+
191
+
192
+ def _as_float(value: Any) -> float | None:
193
+ if isinstance(value, (int, float)):
194
+ return float(value)
195
+ if isinstance(value, str):
196
+ try:
197
+ return float(value)
198
+ except ValueError:
199
+ return None
200
+ return None
201
+
202
+
203
+ def _dieline_signal_candidates(
204
+ page_num: int,
205
+ page_signal: dict[str, Any],
206
+ ) -> list[tuple[str, float, list[str]]]:
207
+ content_ops = page_signal.get("content_ops")
208
+ if not isinstance(content_ops, list):
209
+ return []
210
+
211
+ path_ops = 0
212
+ stroke_ops = 0
213
+ fill_ops = 0
214
+ dash_ops = 0
215
+ thin_stroke_ops = 0
216
+ ocg_marked_content: set[str] = set()
217
+
218
+ prop_to_ocg_name = page_signal.get("prop_to_ocg_name")
219
+ ocg_map = prop_to_ocg_name if isinstance(prop_to_ocg_name, dict) else {}
220
+
221
+ for entry in content_ops:
222
+ if not isinstance(entry, dict):
223
+ continue
224
+ op = str(entry.get("op") or "").strip()
225
+ operands = entry.get("operands")
226
+ operands_list = operands if isinstance(operands, list) else []
227
+
228
+ if op in {"m", "l", "c", "v", "y", "re", "h"}:
229
+ path_ops += 1
230
+ elif op in {"S", "s"}:
231
+ stroke_ops += 1
232
+ elif op in {"f", "f*", "F", "B", "B*", "b", "b*"}:
233
+ fill_ops += 1
234
+ elif op == "d":
235
+ # Dashed strokes are a strong fold/crease indicator in packaging art.
236
+ if operands_list and isinstance(operands_list[0], list):
237
+ if any((_as_float(x) or 0.0) > 0.0 for x in operands_list[0]):
238
+ dash_ops += 1
239
+ elif op == "w" and operands_list:
240
+ width = _as_float(operands_list[0])
241
+ if width is not None and width <= 1.0:
242
+ thin_stroke_ops += 1
243
+ elif op == "BDC" and len(operands_list) >= 2:
244
+ maybe_type = str(operands_list[0]).lstrip("/")
245
+ prop_name = str(operands_list[1]).lstrip("/")
246
+ if maybe_type == "OC":
247
+ mapped = ocg_map.get(prop_name)
248
+ if mapped and _DIELINE_PATTERN.search(str(mapped)):
249
+ ocg_marked_content.add(str(mapped))
250
+
251
+ hits: list[tuple[str, float, list[str]]] = []
252
+ for name in sorted(ocg_marked_content):
253
+ hits.append(
254
+ (
255
+ f"{name} (page {page_num}, oc-marked)",
256
+ 0.92,
257
+ ["analysis_ocg_marked_keyword"],
258
+ )
259
+ )
260
+
261
+ # Structural heuristics for files with missing spot/color semantics.
262
+ fold_like = dash_ops >= 2 and stroke_ops >= 4
263
+ dieline_like = (
264
+ stroke_ops >= 8
265
+ and path_ops >= 24
266
+ and fill_ops <= max(1, stroke_ops // 4)
267
+ and (thin_stroke_ops >= 2 or dash_ops >= 1)
268
+ )
269
+
270
+ if fold_like:
271
+ fold_reasons: list[str] = []
272
+ fold_confidence = 0.45
273
+ if dash_ops >= 2:
274
+ fold_reasons.append("analysis_dash_pattern")
275
+ fold_confidence += 0.2
276
+ if stroke_ops >= 4:
277
+ fold_reasons.append("analysis_stroke_dominant")
278
+ fold_confidence += 0.15
279
+ if thin_stroke_ops >= 1:
280
+ fold_reasons.append("analysis_thin_stroke")
281
+ fold_confidence += 0.1
282
+ hits.append(
283
+ (
284
+ f"foldline-like vector strokes (page {page_num})",
285
+ min(0.9, fold_confidence),
286
+ fold_reasons,
287
+ )
288
+ )
289
+ if dieline_like:
290
+ dieline_reasons: list[str] = []
291
+ dieline_confidence = 0.5
292
+ if path_ops >= 24:
293
+ dieline_reasons.append("analysis_dense_path_network")
294
+ dieline_confidence += 0.2
295
+ if stroke_ops >= 8:
296
+ dieline_reasons.append("analysis_stroke_dominant")
297
+ dieline_confidence += 0.15
298
+ if fill_ops <= max(1, stroke_ops // 4):
299
+ dieline_reasons.append("analysis_low_fill_ratio")
300
+ dieline_confidence += 0.1
301
+ if thin_stroke_ops >= 2:
302
+ dieline_reasons.append("analysis_thin_stroke")
303
+ dieline_confidence += 0.05
304
+ if dash_ops >= 1:
305
+ dieline_reasons.append("analysis_dash_pattern")
306
+ dieline_confidence += 0.05
307
+ hits.append(
308
+ (
309
+ f"dieline-like vector path network (page {page_num})",
310
+ min(0.95, dieline_confidence),
311
+ dieline_reasons,
312
+ )
313
+ )
314
+ return hits
315
+
316
+
154
317
  def _image_metrics(doc: CodexDocument) -> CodexSummaryImageMetrics:
155
318
  dpi_values: list[float] = []
156
319
  below_300 = 0
@@ -328,14 +328,29 @@ class CodexSummarySpotColorMetrics(BaseModel):
328
328
 
329
329
  class CodexSummaryDielineCandidate(BaseModel):
330
330
  name: str
331
- source: Literal["ocg_name", "ocg_processing_step", "trap_layer"]
331
+ source: Literal["ocg_name", "ocg_processing_step", "trap_layer", "analysis_signal"]
332
332
  ocg_id: str | None = None
333
333
  processing_step: str | None = None
334
+ confidence: float = Field(default=0.5, ge=0.0, le=1.0)
335
+ reason_codes: list[
336
+ Literal[
337
+ "name_keyword",
338
+ "iso19593_processing_step",
339
+ "trap_layer_keyword",
340
+ "analysis_ocg_marked_keyword",
341
+ "analysis_dash_pattern",
342
+ "analysis_thin_stroke",
343
+ "analysis_stroke_dominant",
344
+ "analysis_dense_path_network",
345
+ "analysis_low_fill_ratio",
346
+ ]
347
+ ] = Field(default_factory=list)
334
348
 
335
349
 
336
350
  class CodexSummaryDielineMetrics(BaseModel):
337
351
  count: int = 0
338
352
  candidates: list[CodexSummaryDielineCandidate] = Field(default_factory=list)
353
+ overall_confidence: float = Field(default=0.0, ge=0.0, le=1.0)
339
354
  trapped_flag: Literal["True", "False", "Unknown"] | None = None
340
355
 
341
356
 
@@ -38,5 +38,5 @@ or unreachable Redis service can never crash the codex API.
38
38
  1.3.0 (prior): SSRF hardening + /v1/walk/type4 endpoint.
39
39
  """
40
40
 
41
- VERSION = "1.4.0"
41
+ VERSION = "1.4.2"
42
42
  __version__ = VERSION
@@ -0,0 +1,80 @@
1
+ from codex_pdf.extract.summary import build_document_summary
2
+ from codex_pdf.models.v1 import CodexDocument, CodexOCG, CodexSourceRef
3
+
4
+
5
+ def _doc_with_analysis(analysis: dict) -> CodexDocument:
6
+ return CodexDocument(
7
+ codex_version="1.4.0",
8
+ document_id="deadbeef",
9
+ source=CodexSourceRef(uri="fixture.pdf", sha256="deadbeef", size_bytes=1234),
10
+ analysis=analysis,
11
+ )
12
+
13
+
14
+ def test_dieline_summary_detects_name_based_candidates() -> None:
15
+ doc = _doc_with_analysis({})
16
+ doc.ocgs = [CodexOCG(ocg_id="oc1", name="Dieline")]
17
+ summary = build_document_summary(doc)
18
+ assert summary.dieline.count >= 1
19
+ ocg_hits = [c for c in summary.dieline.candidates if c.source == "ocg_name"]
20
+ assert ocg_hits
21
+ assert all(c.confidence >= 0.9 for c in ocg_hits)
22
+ assert all("name_keyword" in c.reason_codes for c in ocg_hits)
23
+ assert summary.dieline.overall_confidence >= 0.9
24
+
25
+
26
+ def test_dieline_summary_detects_structural_candidates_without_spot_names() -> None:
27
+ # No spot names / no OCG labels. Detection should still find likely linework.
28
+ ops = []
29
+ for _ in range(32):
30
+ ops.append({"op": "m", "operands": [0, 0]})
31
+ ops.append({"op": "l", "operands": [100, 0]})
32
+ for _ in range(12):
33
+ ops.append({"op": "S", "operands": []})
34
+ ops.extend(
35
+ [
36
+ {"op": "w", "operands": [0.5]},
37
+ {"op": "d", "operands": [[2, 2], 0]},
38
+ {"op": "d", "operands": [[3, 3], 0]},
39
+ ]
40
+ )
41
+ doc = _doc_with_analysis({"page_1": {"content_ops": ops}})
42
+ summary = build_document_summary(doc)
43
+ signal_hits = [c for c in summary.dieline.candidates if c.source == "analysis_signal"]
44
+ assert signal_hits
45
+ names = [c.name for c in summary.dieline.candidates]
46
+ assert any("dieline-like" in name for name in names)
47
+ assert any("foldline-like" in name for name in names)
48
+ fold = next(c for c in signal_hits if "foldline-like" in c.name)
49
+ assert fold.confidence >= 0.7
50
+ assert "analysis_dash_pattern" in fold.reason_codes
51
+ dieline = next(c for c in signal_hits if "dieline-like" in c.name)
52
+ assert dieline.confidence >= 0.8
53
+ assert "analysis_dense_path_network" in dieline.reason_codes
54
+ assert summary.dieline.overall_confidence >= dieline.confidence
55
+
56
+
57
+ def test_dieline_summary_uses_non_first_page_analysis() -> None:
58
+ ops = []
59
+ for _ in range(24):
60
+ ops.append({"op": "re", "operands": [0, 0, 10, 10]})
61
+ for _ in range(10):
62
+ ops.append({"op": "S", "operands": []})
63
+ ops.extend(
64
+ [
65
+ {"op": "w", "operands": [0.75]},
66
+ {"op": "d", "operands": [[1, 1], 0]},
67
+ {"op": "d", "operands": [[1, 2], 0]},
68
+ ]
69
+ )
70
+ doc = _doc_with_analysis({"page_2": {"content_ops": ops}})
71
+ summary = build_document_summary(doc)
72
+ page2_hit = next(c for c in summary.dieline.candidates if "page 2" in c.name)
73
+ assert page2_hit.source == "analysis_signal"
74
+ assert page2_hit.reason_codes
75
+
76
+
77
+ def test_dieline_summary_overall_confidence_defaults_to_zero() -> None:
78
+ summary = build_document_summary(_doc_with_analysis({}))
79
+ assert summary.dieline.count == 0
80
+ assert summary.dieline.overall_confidence == 0.0
@@ -65,7 +65,7 @@ wheels = [
65
65
 
66
66
  [[package]]
67
67
  name = "codex-pdf"
68
- version = "1.4.0"
68
+ version = "1.4.2"
69
69
  source = { editable = "." }
70
70
  dependencies = [
71
71
  { name = "fastapi" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes