docpluck 2.4.44__tar.gz → 2.4.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (335) hide show
  1. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/_project/lessons.md +8 -0
  2. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/LEARNINGS.md +26 -0
  3. {docpluck-2.4.44 → docpluck-2.4.45}/CHANGELOG.md +10 -0
  4. {docpluck-2.4.44 → docpluck-2.4.45}/PKG-INFO +1 -1
  5. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/__init__.py +1 -1
  6. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/render.py +7 -14
  7. {docpluck-2.4.44 → docpluck-2.4.45}/docs/TRIAGE_2026-05-14_phase_5d_gold_audit.md +5 -1
  8. {docpluck-2.4.44 → docpluck-2.4.45}/pyproject.toml +1 -1
  9. {docpluck-2.4.44 → docpluck-2.4.45}/tests/golden/sections/apa_multi_study_pdf.json +15 -15
  10. {docpluck-2.4.44 → docpluck-2.4.45}/tests/golden/sections/apa_single_study_pdf.json +11 -11
  11. docpluck-2.4.45/tests/snapshots/amj_lattice.txt +3022 -0
  12. docpluck-2.4.45/tests/snapshots/apa_chan_feldman_lineless.txt +2390 -0
  13. docpluck-2.4.45/tests/snapshots/apa_chen_jesp_lineless.txt +4054 -0
  14. docpluck-2.4.45/tests/snapshots/apa_efendic_affect.txt +1164 -0
  15. docpluck-2.4.45/tests/snapshots/apa_ip_feldman_pspb.txt +2683 -0
  16. docpluck-2.4.45/tests/snapshots/bmc_lattice.txt +1140 -0
  17. docpluck-2.4.45/tests/snapshots/ieee_figure_heavy.txt +1687 -0
  18. docpluck-2.4.45/tests/snapshots/ieee_lattice.txt +1757 -0
  19. docpluck-2.4.45/tests/snapshots/jama_lattice.txt +1881 -0
  20. docpluck-2.4.45/tests/snapshots/nat_comms_figure_only.txt +2092 -0
  21. docpluck-2.4.45/tests/snapshots/nature_minimal_rule.txt +1293 -0
  22. docpluck-2.4.45/tests/snapshots/scirep_minimal_rule.txt +1481 -0
  23. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_numbered_heading_promotion_real_pdf.py +35 -2
  24. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_render.py +7 -3
  25. docpluck-2.4.44/tests/snapshots/amj_lattice.txt +0 -1165
  26. docpluck-2.4.44/tests/snapshots/apa_chan_feldman_lineless.txt +0 -1200
  27. docpluck-2.4.44/tests/snapshots/apa_chen_jesp_lineless.txt +0 -2122
  28. docpluck-2.4.44/tests/snapshots/apa_efendic_affect.txt +0 -584
  29. docpluck-2.4.44/tests/snapshots/apa_ip_feldman_pspb.txt +0 -1405
  30. docpluck-2.4.44/tests/snapshots/bmc_lattice.txt +0 -318
  31. docpluck-2.4.44/tests/snapshots/ieee_figure_heavy.txt +0 -543
  32. docpluck-2.4.44/tests/snapshots/ieee_lattice.txt +0 -1395
  33. docpluck-2.4.44/tests/snapshots/jama_lattice.txt +0 -1345
  34. docpluck-2.4.44/tests/snapshots/nat_comms_figure_only.txt +0 -913
  35. docpluck-2.4.44/tests/snapshots/nature_minimal_rule.txt +0 -366
  36. docpluck-2.4.44/tests/snapshots/scirep_minimal_rule.txt +0 -307
  37. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-cleanup/SKILL.md +0 -0
  38. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-deploy/SKILL.md +0 -0
  39. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/SKILL.md +0 -0
  40. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/ai-full-doc-verify.md +0 -0
  41. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/cycle-report-template.md +0 -0
  42. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/local-verification.md +0 -0
  43. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/rationalizations.md +0 -0
  44. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/real-library-real-pdf.md +0 -0
  45. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/release-flow.md +0 -0
  46. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/self-improvement.md +0 -0
  47. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-iterate/references/three-tier-parity.md +0 -0
  48. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/SKILL.md +0 -0
  49. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/benchmark-mode.md +0 -0
  50. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/check-11-hard-rules.md +0 -0
  51. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/check-13-escicheck-production.md +0 -0
  52. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/check-5-escicheck-library.md +0 -0
  53. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/check-6-escicheck-local-webapp.md +0 -0
  54. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-qa/references/check-7-batch-smoke.md +0 -0
  55. {docpluck-2.4.44 → docpluck-2.4.45}/.claude/skills/docpluck-review/SKILL.md +0 -0
  56. {docpluck-2.4.44 → docpluck-2.4.45}/.github/workflows/bump-app-pin.yml +0 -0
  57. {docpluck-2.4.44 → docpluck-2.4.45}/.github/workflows/publish.yml +0 -0
  58. {docpluck-2.4.44 → docpluck-2.4.45}/.github/workflows/test.yml +0 -0
  59. {docpluck-2.4.44 → docpluck-2.4.45}/.gitignore +0 -0
  60. {docpluck-2.4.44 → docpluck-2.4.45}/CLAUDE.md +0 -0
  61. {docpluck-2.4.44 → docpluck-2.4.45}/HANDOFF_SECTIONS_APP_INTEGRATION.md +0 -0
  62. {docpluck-2.4.44 → docpluck-2.4.45}/LESSONS.md +0 -0
  63. {docpluck-2.4.44 → docpluck-2.4.45}/LICENSE +0 -0
  64. {docpluck-2.4.44 → docpluck-2.4.45}/REPLY_FROM_DOCPLUCK_v1.4.5.md +0 -0
  65. {docpluck-2.4.44 → docpluck-2.4.45}/REPLY_FROM_DOCPLUCK_v1.5.0.md +0 -0
  66. {docpluck-2.4.44 → docpluck-2.4.45}/REQUEST_08_CHUNKING_ENDPOINT.md +0 -0
  67. {docpluck-2.4.44 → docpluck-2.4.45}/REQUEST_09_REFERENCE_LIST_NORMALIZATION.md +0 -0
  68. {docpluck-2.4.44 → docpluck-2.4.45}/TODO.md +0 -0
  69. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/__main__.py +0 -0
  70. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/batch.py +0 -0
  71. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/cli.py +0 -0
  72. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/extract.py +0 -0
  73. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/extract_docx.py +0 -0
  74. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/extract_html.py +0 -0
  75. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/extract_layout.py +0 -0
  76. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/extract_structured.py +0 -0
  77. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/figures/__init__.py +0 -0
  78. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/figures/detect.py +0 -0
  79. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/normalize.py +0 -0
  80. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/quality.py +0 -0
  81. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/__init__.py +0 -0
  82. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/annotators/__init__.py +0 -0
  83. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/annotators/docx.py +0 -0
  84. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/annotators/html.py +0 -0
  85. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/annotators/pdf.py +0 -0
  86. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/annotators/text.py +0 -0
  87. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/blocks.py +0 -0
  88. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/boundaries.py +0 -0
  89. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/core.py +0 -0
  90. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/taxonomy.py +0 -0
  91. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/sections/types.py +0 -0
  92. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/__init__.py +0 -0
  93. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/bbox_utils.py +0 -0
  94. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/camelot_extract.py +0 -0
  95. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/captions.py +0 -0
  96. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/cell_cleaning.py +0 -0
  97. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/cluster.py +0 -0
  98. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/confidence.py +0 -0
  99. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/detect.py +0 -0
  100. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/render.py +0 -0
  101. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/tables/whitespace.py +0 -0
  102. {docpluck-2.4.44 → docpluck-2.4.45}/docpluck/version.py +0 -0
  103. {docpluck-2.4.44 → docpluck-2.4.45}/docs/BENCHMARKS.md +0 -0
  104. {docpluck-2.4.44 → docpluck-2.4.45}/docs/DESIGN.md +0 -0
  105. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-07_sections_strict_iteration.md +0 -0
  106. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-09_session_state_and_followups.md +0 -0
  107. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-09_unified_extraction_brainstorm.md +0 -0
  108. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration.md +0 -0
  109. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_2.md +0 -0
  110. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_3.md +0 -0
  111. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_4.md +0 -0
  112. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_5.md +0 -0
  113. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_6.md +0 -0
  114. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-10_table_rendering_iteration_7.md +0 -0
  115. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-11_PROMOTE_SPIKE_TO_LIBRARY.md +0 -0
  116. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-11_table_rendering_iteration_8.md +0 -0
  117. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-11_visual_review_findings.md +0 -0
  118. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-12_phase2_101pdf_corpus.md +0 -0
  119. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-12_remaining_ui_and_chrome_verification.md +0 -0
  120. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-12_visual_verify_results.md +0 -0
  121. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_apa_50_expansion.md +0 -0
  122. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_apa_50_expansion_iter_1.md +0 -0
  123. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_apa_50_expansion_iter_2.md +0 -0
  124. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_iterate_skill_first_use.md +0 -0
  125. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_iterative_1.md +0 -0
  126. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_iterative_library_improvement.md +0 -0
  127. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-13_table_extraction_next_iteration.md +0 -0
  128. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_continue_iterations_v2_4_30_to_15n.md +0 -0
  129. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_full_corpus_iteration_v2_4_30.md +0 -0
  130. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_iterate_6_cycles_complete.md +0 -0
  131. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_iterate_9_cycle_run.md +0 -0
  132. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_iterate_resume_4_cycles.md +0 -0
  133. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_iterate_v2_4_31_cycle_15n.md +0 -0
  134. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-14_phase_5d_gold_audit_v2_4_29.md +0 -0
  135. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-15_autonomous_apa_first_10h.md +0 -0
  136. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-15_iterate_apa_run_1.md +0 -0
  137. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-16_ai-gold-instructions.md +0 -0
  138. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-16_iterate_apa_run_2.md +0 -0
  139. {docpluck-2.4.44 → docpluck-2.4.45}/docs/HANDOFF_2026-05-16_iterate_apa_run_3.md +0 -0
  140. {docpluck-2.4.44 → docpluck-2.4.45}/docs/LIBRARY_APP_SYNC.md +0 -0
  141. {docpluck-2.4.44 → docpluck-2.4.45}/docs/NORMALIZATION.md +0 -0
  142. {docpluck-2.4.44 → docpluck-2.4.45}/docs/README.md +0 -0
  143. {docpluck-2.4.44 → docpluck-2.4.45}/docs/TRIAGE_2026-05-10_corpus_assessment.md +0 -0
  144. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/2026-05-06-section-identification.md +0 -0
  145. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/2026-05-06-table-extraction.md +0 -0
  146. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/2026-05-07-sections-strict-iteration-progress.md +0 -0
  147. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/2026-05-08-unified-extraction-phase-0-splice-spike.md +0 -0
  148. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/sections-deferred-items.md +0 -0
  149. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/sections-issues-backlog.md +0 -0
  150. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/2026-05-07_spot-01_apa.md +0 -0
  151. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/2026-05-07_spot-02_pattern-A-shipped.md +0 -0
  152. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/2026-05-08_spot-final_all-styles.md +0 -0
  153. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/COMPARISON.md +0 -0
  154. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/korbmacher_table1.md +0 -0
  155. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/option-a.py +0 -0
  156. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/ziano_table1.md +0 -0
  157. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/korbmacher_notes_raw.txt +0 -0
  158. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/korbmacher_table1.md +0 -0
  159. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/notes.md +0 -0
  160. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/option-b.py +0 -0
  161. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/ziano_notes_raw.txt +0 -0
  162. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/ziano_table1.md +0 -0
  163. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/korbmacher_table1.md +0 -0
  164. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/notes.md +0 -0
  165. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/option-c.py +0 -0
  166. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/sample-pdftotext-bbox.html +0 -0
  167. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/ziano_table1.md +0 -0
  168. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/korbmacher_table1.md +0 -0
  169. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/notes.md +0 -0
  170. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/option-d.py +0 -0
  171. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/ziano_table1.md +0 -0
  172. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_2022_kruger_bbox.html +0 -0
  173. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_bbox.html +0 -0
  174. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_table1.md +0 -0
  175. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/option-e.py +0 -0
  176. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/sample-bbox.html +0 -0
  177. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_2021_joep_bbox.html +0 -0
  178. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_bbox.html +0 -0
  179. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_table1.md +0 -0
  180. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/html-fallback-demo.md +0 -0
  181. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/chandrashekar_2023_mp.err +0 -0
  182. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/chandrashekar_2023_mp.md +0 -0
  183. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/efendic_2022_affect.err +0 -0
  184. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/efendic_2022_affect.md +0 -0
  185. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ieee_access_2.err +0 -0
  186. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ieee_access_2.md +0 -0
  187. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ip_feldman_2025_pspb.err +0 -0
  188. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ip_feldman_2025_pspb.md +0 -0
  189. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/korbmacher_2022_kruger.err +0 -0
  190. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/korbmacher_2022_kruger.md +0 -0
  191. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/nat_comms_1.err +0 -0
  192. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/nat_comms_1.md +0 -0
  193. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ziano_2021_joep.err +0 -0
  194. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ziano_2021_joep.md +0 -0
  195. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/am_sociol_rev_3.err +0 -0
  196. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/am_sociol_rev_3.md +0 -0
  197. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amc_1.err +0 -0
  198. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amc_1.md +0 -0
  199. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amj_1.err +0 -0
  200. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amj_1.md +0 -0
  201. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amle_1.err +0 -0
  202. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amle_1.md +0 -0
  203. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_apa_j_jesp_2009_12_010.err +0 -0
  204. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_apa_j_jesp_2009_12_010.md +0 -0
  205. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140066.err +0 -0
  206. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140066.md +0 -0
  207. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140072.err +0 -0
  208. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140072.md +0 -0
  209. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/bjps_1.err +0 -0
  210. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/bjps_1.md +0 -0
  211. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chan_feldman_2025_cogemo.err +0 -0
  212. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chan_feldman_2025_cogemo.md +0 -0
  213. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chen_2021_jesp.err +0 -0
  214. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chen_2021_jesp.md +0 -0
  215. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/demography_1.err +0 -0
  216. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/demography_1.md +0 -0
  217. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_3.err +0 -0
  218. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_3.md +0 -0
  219. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_4.err +0 -0
  220. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_4.md +0 -0
  221. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_1.err +0 -0
  222. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_1.md +0 -0
  223. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_2.err +0 -0
  224. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_2.md +0 -0
  225. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jmf_1.err +0 -0
  226. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jmf_1.md +0 -0
  227. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/nat_comms_2.err +0 -0
  228. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/nat_comms_2.md +0 -0
  229. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/sci_rep_1.err +0 -0
  230. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/sci_rep_1.md +0 -0
  231. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/social_forces_1.err +0 -0
  232. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/social_forces_1.md +0 -0
  233. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/papers.md +0 -0
  234. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/report.md +0 -0
  235. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/splice_spike.py +0 -0
  236. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/plans/spot-checks/splice-spike/test_splice_spike.py +0 -0
  237. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/specs/2026-04-27-request-09-reference-normalization-design.md +0 -0
  238. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/specs/2026-05-06-section-identification-design.md +0 -0
  239. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/specs/2026-05-06-table-extraction-design.md +0 -0
  240. {docpluck-2.4.44 → docpluck-2.4.45}/docs/superpowers/specs/2026-05-08-unified-extraction-design.md +0 -0
  241. {docpluck-2.4.44 → docpluck-2.4.45}/scripts/lint_rendered_corpus.py +0 -0
  242. {docpluck-2.4.44 → docpluck-2.4.45}/scripts/verify_corpus.py +0 -0
  243. {docpluck-2.4.44 → docpluck-2.4.45}/scripts/verify_corpus_full.py +0 -0
  244. {docpluck-2.4.44 → docpluck-2.4.45}/tests/__init__.py +0 -0
  245. {docpluck-2.4.44 → docpluck-2.4.45}/tests/conftest.py +0 -0
  246. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/__init__.py +0 -0
  247. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/sections/__init__.py +0 -0
  248. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/sections/builders.py +0 -0
  249. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/structured/.gitkeep +0 -0
  250. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/structured/MANIFEST.json +0 -0
  251. {docpluck-2.4.44 → docpluck-2.4.45}/tests/fixtures/structured/README.md +0 -0
  252. {docpluck-2.4.44 → docpluck-2.4.45}/tests/golden/sections/html_real_headings.json +0 -0
  253. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_a3c_leading_zero_decimal_real_pdf.py +0 -0
  254. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_all_caps_section_promote_real_pdf.py +0 -0
  255. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_bbox_utils.py +0 -0
  256. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_benchmark_docx_html.py +0 -0
  257. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_cambridge_footer_strip_real_pdf.py +0 -0
  258. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_caption_regex.py +0 -0
  259. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_chart_data_trim_real_pdf.py +0 -0
  260. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_cid_minus_recovery_real_pdf.py +0 -0
  261. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_cli_sections.py +0 -0
  262. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_cli_structured.py +0 -0
  263. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_confidence.py +0 -0
  264. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_corpus_smoke.py +0 -0
  265. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_d5_normalization_audit.py +0 -0
  266. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_edge_cases.py +0 -0
  267. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_elsevier_footer_strip_real_pdf.py +0 -0
  268. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_equation_page_header_strip_real_pdf.py +0 -0
  269. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extract_docx.py +0 -0
  270. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extract_filter_sugar.py +0 -0
  271. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extract_html.py +0 -0
  272. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extract_layout.py +0 -0
  273. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extract_pdf_structured.py +0 -0
  274. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_extraction.py +0 -0
  275. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_f0_table_region_aware.py +0 -0
  276. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_figure_caption_trim_real_pdf.py +0 -0
  277. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_figure_detect.py +0 -0
  278. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_fixtures_manifest.py +0 -0
  279. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_lattice_cluster.py +0 -0
  280. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_letterspaced_label_real_pdf.py +0 -0
  281. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_ligature_decomposition_real_pdf.py +0 -0
  282. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_lt_operator_recovery_real_pdf.py +0 -0
  283. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_mathitalic_greek_real_pdf.py +0 -0
  284. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_metaesci_followups.py +0 -0
  285. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_minus_sign_recovery_real_pdf.py +0 -0
  286. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalization.py +0 -0
  287. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_a3_r2_body_integer_real_pdf.py +0 -0
  288. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_f0_footnote_strip.py +0 -0
  289. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_layout_param.py +0 -0
  290. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_metadata_leak_real_pdf.py +0 -0
  291. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_report_layout_fields.py +0 -0
  292. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_normalize_v18_strips.py +0 -0
  293. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_numbered_section_promotion_real_pdf.py +0 -0
  294. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_orphan_section_number_real_pdf.py +0 -0
  295. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_preserve_math_glyphs_real_pdf.py +0 -0
  296. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_quality.py +0 -0
  297. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_render_html.py +0 -0
  298. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_request_09_reference_normalization.py +0 -0
  299. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_roman_numeral_section_promote_real_pdf.py +0 -0
  300. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_section_row_label_no_merge_real_pdf.py +0 -0
  301. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_boundaries.py +0 -0
  302. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_boundary_truncation.py +0 -0
  303. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_core_partition.py +0 -0
  304. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_docx_annotator.py +0 -0
  305. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_extract_text.py +0 -0
  306. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_footnote_section.py +0 -0
  307. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_golden.py +0 -0
  308. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_html_annotator.py +0 -0
  309. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_pdf_annotator.py +0 -0
  310. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_public_api.py +0 -0
  311. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_real_corpus.py +0 -0
  312. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_taxonomy.py +0 -0
  313. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_text_annotator.py +0 -0
  314. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_types.py +0 -0
  315. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_unit_corpus.py +0 -0
  316. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_v161_coalesce.py +0 -0
  317. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_v161_subheadings.py +0 -0
  318. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_v161_taxonomy.py +0 -0
  319. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_v161_text_annotator.py +0 -0
  320. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_sections_version.py +0 -0
  321. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_smoke_fixtures.py +0 -0
  322. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_structured_result_type.py +0 -0
  323. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_structured_types.py +0 -0
  324. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_structured_version.py +0 -0
  325. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_table_caption_cell_region_real_pdf.py +0 -0
  326. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_table_detect.py +0 -0
  327. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_tables_cell_cleaning.py +0 -0
  328. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_text_mode.py +0 -0
  329. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v23_1_fixes.py +0 -0
  330. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v23_bug_fixes.py +0 -0
  331. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v23_post_corpus.py +0 -0
  332. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v23_post_corpus_v2.py +0 -0
  333. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v2_backwards_compat.py +0 -0
  334. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_v2_top_level_exports.py +0 -0
  335. {docpluck-2.4.44 → docpluck-2.4.45}/tests/test_whitespace_cluster.py +0 -0
@@ -190,3 +190,11 @@ Plus three golden snapshot files (`tests/golden/sections/*.json`) had the versio
190
190
  **Why:** Two failure modes compounded. (1) A new normalize helper added without grepping the existing `normalize_text` S-steps duplicated S3 and, placed before it, starved it. (2) The cycle was scoped from a symptom ("35 papers show ligatures") without localizing WHICH channel was at fault — the body channel was already correct.
191
191
 
192
192
  **How to detect (next time):** Before adding any glyph/encoding helper to `normalize.py`, grep the existing `S0`-`S9` / `W0*` steps for one already handling that character class — extend/unify it rather than adding a parallel path, and never insert a new step *before* an existing one that consumes the same input. Before scoping a glyph cycle, localize the defect: grep the offending glyph's lines in a recent render and confirm whether they sit in `<td>`/`<th>`/`*Table N*`/```unstructured-table``` (table/caption/fence channels — bypass `normalize_text`) or in body prose (the S-step channel).
193
+
194
+ ## 2026-05-16 · Cycle 13 — a heuristic guard's value depends on the false-positive surface, which differs per call site (v2.4.45)
195
+
196
+ **What:** `render.py`'s two numbered-heading promoters shared a `max_lc_run >= 5` "long lowercase-word run" prose guard. It demoted legitimate descriptive headings — jdm_.2023.16 had 19 multi-level numbered subsection headings rendered as body text, with lowercase-runs up to 12 (`3.3.2.1. The quality of planning on the previous trial moderates the effect of reflection`). The fix removed the guard ENTIRELY from `_promote_numbered_subsection_headings` but KEPT it (raised 5→8) in `_promote_numbered_section_headings`.
197
+
198
+ **Why:** A lowercase-word-run count genuinely cannot distinguish a descriptive section heading from prose — both have many lowercase words. What makes a line a heading is the *number shape* + capital-start + no-terminal-punctuation + single short line. For **multi-level** dotted numbering (`N.N[.N…]`) that signature is decisive — a prose line almost never begins with a multi-level dotted number — so the lc-run guard was pure harm. For **single-level** `N.` numbering the signature is weak (a `2.` line collides with an enumerated-list item), so a prose guard there still adds value as defense-in-depth. Same guard, opposite verdicts, because the false-positive surface differs between the two call sites.
199
+
200
+ **How to detect (next time):** When a heuristic guard rejects legitimate inputs, do not just retune its threshold — ask whether the guard discriminates at all at that call site. Reproduce at HEAD and measure the metric's spread on real positives (here: heading lowercase-runs ran 0-12, overlapping prose entirely → no threshold works). If a guard can't separate the classes, remove it where the *other* gates already suffice and keep it only where they don't. When a guard is removed, grep its tests — a contract test (`test_render.py::test_promote_rejects_prose_with_long_lowercase_run`) was asserting the removed behavior and had to be updated in the same cycle.
@@ -521,3 +521,29 @@ Mid-run, ArticleFinder flagged (and the user confirmed as a directive) that docp
521
521
 
522
522
  ### SPINE-SKIPs
523
523
  - R3 (`/docpluck-cleanup` + `/docpluck-review`) — SKIPPED. Cycle 12 is one normalize helper (explicit table over a 7-codepoint block) + S3 unified to call it + 2 bypass-channel call sites; 26/26 baseline + AI verifier confirm no regression. Same shape as cycles 2/4/6/7.
524
+
525
+ ---
526
+
527
+ ## Run: 2026-05-16 (run 4, fix-and-continue) · Cycles: cycle-12 rework, tests-regen, cycle 13
528
+
529
+ This run executed `docs/HANDOFF_2026-05-16_iterate_run_4_fix_and_continue.md`'s three jobs. Cycle-12 rework + tests-regen + cycle 13 below; the article-finder AI-gold integration (JOB 2) is tracked in the run-meta.
530
+
531
+ ### tests-regen (commit `c831e28`, no version bump)
532
+ - 15 pre-existing pytest failures triaged. 12 `test_extract_pdf_byte_identical` snapshots + 2 `test_sections_golden` goldens = environmental drift (local pdftotext re-wraps lines differently than the build that captured the snapshots; `extract_pdf` is a pure pdftotext passthrough). Regenerated; the 26-paper baseline is the real extraction-quality gate and stays green.
533
+ - **The 15th, `test_request_09`, is NOT snapshot drift** — it is a real COL-class column-interleave defect: the numbered RSOS bibliography renders as `References\n1. 2. 3. ... 16.\n\nThaler RH...` (the number column split from the entry text). Left red and documented as the escalated COL defect class. Lesson: when a handoff lumps failures as "all snapshot drift," still inspect each — a real-defect-detecting test must never be "regenerated" away.
534
+
535
+ ### Cycle 13 (v2.4.45) — G5b long-descriptive numbered headings demoted
536
+
537
+ ### Outcome
538
+ - **Cycle 13 shipped v2.4.45** — `render.py`'s numbered-heading promoters carried a `max_lc_run >= 5` prose guard that demoted legitimate long descriptive headings. Removed the guard entirely from `_promote_numbered_subsection_headings`; raised it `5→8` in `_promote_numbered_section_headings`. jdm_.2023.16: 19 multi-level subsection headings recovered.
539
+
540
+ ### Blind spots / process notes
541
+ - **The TRIAGE estimate ("raise 5→8") was a partial fix.** Reproducing at HEAD showed jdm16 headings with `max_lc` up to 12 — a `5→8` raise would have left 7 of 19 still demoted. The lesson card `reproduce-triage-defect-at-head-before-trusting-cost-estimate` paid off again: always reproduce and measure before trusting a queue item's prescribed fix. The lc-run count genuinely cannot distinguish a 12-lowercase-word descriptive heading from prose — for multi-level dotted numbering the *number shape* is the discriminator, so the guard had to go, not just move.
542
+ - **A guard worth keeping for one promoter, not the other.** Single-level `N.` numbers collide with enumerated lists (real false-positive risk) → keep a prose guard (raised to 8) as defense-in-depth. Multi-level `N.N[.N…]` numbers do not → the guard was pure harm. Same-named guard, opposite verdicts, because the false-positive surface differs.
543
+ - **A contract test encoded the removed guard.** `test_render.py::test_promote_rejects_prose_with_long_lowercase_run` asserted the old behavior; updated it to assert the new contract (long descriptive titles ARE promoted) in the same cycle — per the cycle-2 `a test can encode the bug` lesson.
544
+
545
+ ### SPINE-SKIPs
546
+ - R3 (`/docpluck-cleanup` + `/docpluck-review`) — SKIPPED. Cycle 13 is a guard removal + one threshold bump in two render post-processors; 26/26 baseline + heading-promotion-only diff confirm no regression. Same shape as cycles 9/11.
547
+
548
+ ### Process note — Codex cross-model verification has a Windows UTF-8 bug
549
+ The `gold-generation.md` Step-4 Codex audit misreads UTF-8 gold files as mojibake on this Windows machine (`Västfjäll`→`VA<SI>stfjA<SI>ll`, `–`→`ƒ?"`), producing ~10-24 false "discrepancies" per paper. The gold files are confirmed clean UTF-8. Worked around by re-running Codex with an explicit "files are UTF-8; mojibake is your decode error, not a discrepancy" preamble. **This is article-finder's protocol to fix** — `gold-generation.md` Step 4 needs a UTF-8 read instruction for Windows. Flagged for coordination with the article-finder owner.
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## [2.4.45] — 2026-05-16
4
+
5
+ **Cycle 13 (autonomous APA-first run) — long descriptive numbered headings demoted to body text (G5b, S1).** `render.py`'s numbered-heading promoters carried a "long lowercase-word run" prose guard (`max_lc_run >= 5`) that rejected legitimate descriptive headings — e.g. `2.4.2.2. Inference of planning strategies and strategy types`, `3.3.2.1. The quality of planning on the previous trial moderates the effect of reflection`. jdm_.2023.16 alone had 19 multi-level numbered subsection headings demoted to body text.
6
+
7
+ Fix (v2.4.45) — the lowercase-run guard is **removed from `_promote_numbered_subsection_headings`**: multi-level dotted numbering at line-start is itself a strong section-heading signal (combined with capital-started title + no terminal sentence punctuation + single ≤80-char line), and descriptive subsection titles legitimately run to many lowercase words, so the guard could not distinguish a real heading from prose and only mis-rejected headings. For `_promote_numbered_section_headings` (single-level `N.`, which genuinely collides with enumerated lists) the guard is **kept but raised `5 → 8`** — single-level promotion still has its document-numbering-range / uniqueness / list-adjacency gates as defense in depth.
8
+
9
+ jdm_.2023.16: 19 previously-demoted multi-level headings now render as `###`; the v2.4.44→v2.4.45 diff is heading-promotion only (0 text loss, 0 hallucination). 26/26 baseline PASS. New real-PDF + contract tests in `tests/test_numbered_heading_promotion_real_pdf.py` and `tests/test_render.py`.
10
+
11
+ ~11 APA papers still FAIL Phase-5d verification; the autonomous run continues.
12
+
3
13
  ## [2.4.44] — 2026-05-16
4
14
 
5
15
  **Cycle 12 (autonomous APA-first run) — Latin typographic ligatures not decomposed in the table/caption channels (GLYPH, S2).** pdftotext preserves presentation-form ligature glyphs (`ff fi fl ffi ffl ſt st`, U+FB00-FB06) verbatim, so words rendered as `confident` / `influence` / `efficient` — broken for search, word matching, and any downstream NLP. A corpus scan found the glyphs in 35 rendered papers (korbmacher 82×, jdm_.2023.16 34×, jdm_m.2022.2 8×). The body channel's `normalize.py` S3 step already expanded ligatures correctly; the leak was confined to **table cells, figure/table captions, and `unstructured-table` fenced blocks**, which bypass `normalize_text` entirely.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpluck
3
- Version: 2.4.44
3
+ Version: 2.4.45
4
4
  Summary: PDF, DOCX, and HTML text extraction and normalization for academic papers
5
5
  Project-URL: Homepage, https://github.com/giladfeldman/docpluck
6
6
  Project-URL: Documentation, https://github.com/giladfeldman/docpluck/tree/main/docs
@@ -71,7 +71,7 @@ from .figures import Figure
71
71
  from .extract_structured import TABLE_EXTRACTION_VERSION, StructuredResult, extract_pdf_structured
72
72
  from .render import render_pdf_to_markdown
73
73
 
74
- __version__ = "2.4.44"
74
+ __version__ = "2.4.45"
75
75
  __author__ = "Gilad Feldman"
76
76
  __license__ = "MIT"
77
77
 
@@ -233,8 +233,12 @@ def _promote_numbered_subsection_headings(text: str) -> str:
233
233
  """Promote ``1.2 Foo``-style lines to ``### 1.2 Foo`` h3 headings.
234
234
 
235
235
  Conservative: only multi-level numbering (``N.N`` or deeper), title must
236
- start with a capital letter, must not end in sentence-terminator
237
- punctuation, and must not look like prose (no long lowercase-word runs).
236
+ start with a capital letter and must not end in sentence-terminator
237
+ punctuation. Multi-level dotted numbering at line-start is itself a strong
238
+ section-heading signal — descriptive subsection titles legitimately run to
239
+ many lowercase words ("3.3.2.1 The quality of planning on the previous
240
+ trial moderates the effect of reflection"), so a lowercase-run prose guard
241
+ mis-rejects real headings and is not applied here (cycle 13, G5b).
238
242
  Idempotent: re-running the pass is a no-op.
239
243
  """
240
244
  if not text:
@@ -250,17 +254,6 @@ def _promote_numbered_subsection_headings(text: str) -> str:
250
254
  if title.endswith((".", "?", "!", ":", ",", ";")):
251
255
  out.append(line)
252
256
  continue
253
- tokens = title.split()
254
- lc_run = max_lc_run = 0
255
- for tok in tokens:
256
- if tok and tok[0].islower():
257
- lc_run += 1
258
- max_lc_run = max(max_lc_run, lc_run)
259
- else:
260
- lc_run = 0
261
- if max_lc_run >= 5:
262
- out.append(line)
263
- continue
264
257
  if out and out[-1].startswith(f"### {m.group('num')} "):
265
258
  out.append(line)
266
259
  continue
@@ -357,7 +350,7 @@ def _promote_numbered_section_headings(text: str) -> str:
357
350
  max_lc = max(max_lc, lc_run)
358
351
  else:
359
352
  lc_run = 0
360
- if max_lc >= 5: # prose-like run — not a heading
353
+ if max_lc >= 8: # long prose-like run — not a heading (cycle 13, G5b)
361
354
  continue
362
355
  candidates.setdefault(int(m.group("num")), []).append((i, title))
363
356
  if not candidates:
@@ -271,6 +271,10 @@ New `render.py::_promote_numbered_section_headings` promotes `N. Title` → `##
271
271
 
272
272
  > **Cycle-12 rework note (run 4, 2026-05-16):** the first cycle-12 attempt added a SECOND, parallel `decompose_ligatures` call *before* the pre-existing S3 step inside `normalize_text` — it consumed every ligature before S3 ran, so S3 tracked `ligatures_expanded = 0` and broke `test_normalization.py::test_report_tracks_changes`. The rework removed the duplicate call and unified S3 to use the shared helper. Lesson: before adding a glyph-normalization helper, grep the existing `normalize_text` S-steps for one already handling that glyph class — extend/unify it, do not add a parallel path.
273
273
 
274
+ ### Cycle 13 (v2.4.45) — G5b long-descriptive-title prose guard — SHIPPED
275
+
276
+ `render.py`'s numbered-heading promoters carried a `max_lc_run >= 5` "long lowercase-word run" prose guard that mis-rejected legitimate descriptive headings. Reproduced at HEAD: jdm_.2023.16 alone had **19** multi-level numbered subsection headings demoted to body text, with `max_lc` up to **12** (`3.3.2.1. The quality of planning on the previous trial moderates the effect of reflection`) — far deeper than the TRIAGE's "raise 5→8" estimate. Re-scoped: the lc-run guard is **removed entirely from `_promote_numbered_subsection_headings`** (multi-level dotted numbering + capital-start + no-terminal-punctuation + single ≤80-char line is itself a sufficient heading signature; the lc-run guard cannot distinguish a descriptive heading from prose). For `_promote_numbered_section_headings` (single-level `N.`, real list-collision risk) the guard is kept but raised `5→8`, alongside its existing numbering-range/uniqueness/list-adjacency gates. jdm16: 19 headings recovered; v2.4.44→v2.4.45 diff is heading-promotion only (0 text loss/hallucination); 26/26 baseline.
277
+
274
278
  ### SESSION-3 STANDING VERDICT (rule 0e-bis)
275
279
 
276
280
  The APA corpus is **NOT clean**. Cycles 8-11 shipped 4 verified incremental fixes (v2.4.40-43), each AI-gold-verified OVERALL PASS with 0 regressions. But ~12 APA papers still FAIL Phase-5d on PRE-EXISTING defects the cycles did not reach. Verifier-confirmed open punch-list:
@@ -280,7 +284,7 @@ The APA corpus is **NOT clean**. Cycles 8-11 shipped 4 verified incremental fixe
280
284
  | **TABLE structure destruction** | S0/S1 | efendic, ar_apa_011, xiao, jdm15/16, chen, maier, ip_feldman (~11) | grid lost → caption-bleed; flat number-dump; empty `<table>` shells; two tables merged; rows dropped. C3 — needs a render/structured coordination design. The single largest blocker. |
281
285
  | **G5c split-line numbered headings** | S1 | jdm_m.2022.2 (`5.3.`/`6.3.`/`7.3.` etc.) | number alone on a line, title on the next; renders as orphan bare-number + a MISLABELED generic `## Results`. cycle-3 orphan-folder multi-level analogue. |
282
286
  | **G5d named (unnumbered) heading demotion** | S1 | ar_apa_011 (`Participants`, `Overview`), efendic, chandrashekar, ip_feldman (~7) | section-partitioner work; largest false-positive surface. |
283
- | **G5b long-descriptive-title prose guard** | S1 | jdm16, jdm_m2, chen | `≥5-lowercase-word` guard over-rejects legit long numbered headings. |
287
+ | ~~**G5b long-descriptive-title prose guard**~~ ✓ FIXED v2.4.45 (cycle 13) | S1 | jdm16, jdm_m2, chen | ~~`≥5-lowercase-word` guard over-rejects legit long numbered headings.~~ Subsection promoter's lc-run guard removed; single-level raised 5→8. |
284
288
  | **FIG caption double-emission + truncation** | S2 | jdm_m2, efendic, chan_feldman, ziano, jdm15/16 (~8) | caption inline + in `## Figures` block; truncated mid-word; figure data-labels as orphan body lines. |
285
289
  | **GLYPH ligature** `fi`/`fl` not decomposed | S2 | jdm_m2 (and likely many) | `confident`, `influence` — NFKC would fix; check why current NFC pass misses U+FB01/FB02. |
286
290
  | **D4 metadata residuals** | S2 | ar_apa_011 (`doi:` line), chen, efendic masthead | see D4 RESIDUALS above. |
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "docpluck"
7
- version = "2.4.44"
7
+ version = "2.4.45"
8
8
  description = "PDF, DOCX, and HTML text extraction and normalization for academic papers"
9
9
  readme = "docs/README.md"
10
10
  requires-python = ">=3.10"
@@ -6,7 +6,7 @@
6
6
  "label": "abstract",
7
7
  "canonical_label": "abstract",
8
8
  "char_start": 0,
9
- "char_end": 28,
9
+ "char_end": 29,
10
10
  "pages": [],
11
11
  "confidence": "high",
12
12
  "detected_via": "heading_match",
@@ -15,8 +15,8 @@
15
15
  {
16
16
  "label": "introduction",
17
17
  "canonical_label": "introduction",
18
- "char_start": 28,
19
- "char_end": 53,
18
+ "char_start": 29,
19
+ "char_end": 55,
20
20
  "pages": [],
21
21
  "confidence": "high",
22
22
  "detected_via": "heading_match",
@@ -25,8 +25,8 @@
25
25
  {
26
26
  "label": "methods",
27
27
  "canonical_label": "methods",
28
- "char_start": 53,
29
- "char_end": 78,
28
+ "char_start": 55,
29
+ "char_end": 81,
30
30
  "pages": [],
31
31
  "confidence": "high",
32
32
  "detected_via": "heading_match",
@@ -35,8 +35,8 @@
35
35
  {
36
36
  "label": "results",
37
37
  "canonical_label": "results",
38
- "char_start": 78,
39
- "char_end": 103,
38
+ "char_start": 81,
39
+ "char_end": 107,
40
40
  "pages": [],
41
41
  "confidence": "high",
42
42
  "detected_via": "heading_match",
@@ -45,8 +45,8 @@
45
45
  {
46
46
  "label": "methods_2",
47
47
  "canonical_label": "methods",
48
- "char_start": 103,
49
- "char_end": 128,
48
+ "char_start": 107,
49
+ "char_end": 133,
50
50
  "pages": [],
51
51
  "confidence": "high",
52
52
  "detected_via": "heading_match",
@@ -55,8 +55,8 @@
55
55
  {
56
56
  "label": "results_2",
57
57
  "canonical_label": "results",
58
- "char_start": 128,
59
- "char_end": 153,
58
+ "char_start": 133,
59
+ "char_end": 159,
60
60
  "pages": [],
61
61
  "confidence": "high",
62
62
  "detected_via": "heading_match",
@@ -65,8 +65,8 @@
65
65
  {
66
66
  "label": "general_discussion",
67
67
  "canonical_label": "general_discussion",
68
- "char_start": 153,
69
- "char_end": 183,
68
+ "char_start": 159,
69
+ "char_end": 190,
70
70
  "pages": [],
71
71
  "confidence": "high",
72
72
  "detected_via": "heading_match",
@@ -75,8 +75,8 @@
75
75
  {
76
76
  "label": "references",
77
77
  "canonical_label": "references",
78
- "char_start": 183,
79
- "char_end": 213,
78
+ "char_start": 190,
79
+ "char_end": 220,
80
80
  "pages": [],
81
81
  "confidence": "high",
82
82
  "detected_via": "heading_match",
@@ -6,7 +6,7 @@
6
6
  "label": "abstract",
7
7
  "canonical_label": "abstract",
8
8
  "char_start": 0,
9
- "char_end": 36,
9
+ "char_end": 37,
10
10
  "pages": [],
11
11
  "confidence": "high",
12
12
  "detected_via": "heading_match",
@@ -15,8 +15,8 @@
15
15
  {
16
16
  "label": "introduction",
17
17
  "canonical_label": "introduction",
18
- "char_start": 36,
19
- "char_end": 61,
18
+ "char_start": 37,
19
+ "char_end": 63,
20
20
  "pages": [],
21
21
  "confidence": "high",
22
22
  "detected_via": "heading_match",
@@ -25,8 +25,8 @@
25
25
  {
26
26
  "label": "methods",
27
27
  "canonical_label": "methods",
28
- "char_start": 61,
29
- "char_end": 84,
28
+ "char_start": 63,
29
+ "char_end": 87,
30
30
  "pages": [],
31
31
  "confidence": "high",
32
32
  "detected_via": "heading_match",
@@ -35,8 +35,8 @@
35
35
  {
36
36
  "label": "results",
37
37
  "canonical_label": "results",
38
- "char_start": 84,
39
- "char_end": 108,
38
+ "char_start": 87,
39
+ "char_end": 112,
40
40
  "pages": [],
41
41
  "confidence": "high",
42
42
  "detected_via": "heading_match",
@@ -45,8 +45,8 @@
45
45
  {
46
46
  "label": "discussion",
47
47
  "canonical_label": "discussion",
48
- "char_start": 108,
49
- "char_end": 133,
48
+ "char_start": 112,
49
+ "char_end": 138,
50
50
  "pages": [],
51
51
  "confidence": "high",
52
52
  "detected_via": "heading_match",
@@ -55,8 +55,8 @@
55
55
  {
56
56
  "label": "references",
57
57
  "canonical_label": "references",
58
- "char_start": 133,
59
- "char_end": 163,
58
+ "char_start": 138,
59
+ "char_end": 168,
60
60
  "pages": [],
61
61
  "confidence": "high",
62
62
  "detected_via": "heading_match",