docpluck 2.4.63__tar.gz → 2.4.64__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docpluck-2.4.63 → docpluck-2.4.64}/CHANGELOG.md +32 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/PKG-INFO +1 -1
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/__init__.py +1 -1
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/normalize.py +91 -35
- {docpluck-2.4.63 → docpluck-2.4.64}/pyproject.toml +1 -1
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_idempotent_real_pdf.py +39 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/_project/lessons.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-cleanup/SKILL.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-deploy/SKILL.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/LEARNINGS.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/SKILL.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/ai-full-doc-verify.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/cycle-report-template.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/local-verification.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/rationalizations.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/real-library-real-pdf.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/release-flow.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/self-improvement.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/three-tier-parity.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/SKILL.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/benchmark-mode.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-11-hard-rules.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-13-escicheck-production.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-5-escicheck-library.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-6-escicheck-local-webapp.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-7-batch-smoke.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-review/SKILL.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.github/workflows/bump-app-pin.yml +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.github/workflows/publish.yml +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.github/workflows/test.yml +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/.gitignore +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/CLAUDE.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/HANDOFF_SECTIONS_APP_INTEGRATION.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/LESSONS.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/LICENSE +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/REPLY_FROM_DOCPLUCK_v1.4.5.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/REPLY_FROM_DOCPLUCK_v1.5.0.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/REQUEST_08_CHUNKING_ENDPOINT.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/REQUEST_09_REFERENCE_LIST_NORMALIZATION.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/TODO.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/__main__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/batch.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/cli.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/extract.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/extract_docx.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/extract_html.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/extract_layout.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/extract_structured.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/figures/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/figures/detect.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/quality.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/render.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/annotators/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/annotators/docx.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/annotators/html.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/annotators/pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/annotators/text.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/blocks.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/boundaries.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/core.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/taxonomy.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/sections/types.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/bbox_utils.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/camelot_extract.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/captions.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/cell_cleaning.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/cluster.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/confidence.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/detect.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/render.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/tables/whitespace.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docpluck/version.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/BENCHMARKS.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/DESIGN.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-07_sections_strict_iteration.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-09_session_state_and_followups.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-09_unified_extraction_brainstorm.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_3.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_4.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_5.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_6.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-10_table_rendering_iteration_7.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-11_PROMOTE_SPIKE_TO_LIBRARY.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-11_table_rendering_iteration_8.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-11_visual_review_findings.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-12_phase2_101pdf_corpus.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-12_remaining_ui_and_chrome_verification.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-12_visual_verify_results.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_apa_50_expansion.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_apa_50_expansion_iter_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_apa_50_expansion_iter_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_iterate_skill_first_use.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_iterative_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_iterative_library_improvement.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_table_extraction_next_iteration.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_continue_iterations_v2_4_30_to_15n.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_full_corpus_iteration_v2_4_30.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_iterate_6_cycles_complete.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_iterate_9_cycle_run.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_iterate_resume_4_cycles.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_iterate_v2_4_31_cycle_15n.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_phase_5d_gold_audit_v2_4_29.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-15_autonomous_apa_first_10h.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-15_iterate_apa_run_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_ai-gold-instructions.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_apa_run_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_apa_run_3.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_run_4_final.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_run_4_fix_and_continue.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_run_5.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_run_6.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-17_iterate_run_7.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-17_iterate_run_8.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-17_iterate_run_9.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-18_iterate_run_9_cont.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-18_iterate_run_9_cont2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-20_iterate_run_9_cont3.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/ITERATION_VERIFICATION_LESSONS.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/LIBRARY_APP_SYNC.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/NORMALIZATION.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/README.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/TRIAGE_2026-05-10_corpus_assessment.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/TRIAGE_2026-05-14_phase_5d_gold_audit.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/2026-05-06-section-identification.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/2026-05-06-table-extraction.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/2026-05-07-sections-strict-iteration-progress.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/2026-05-08-unified-extraction-phase-0-splice-spike.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/sections-deferred-items.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/sections-issues-backlog.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/2026-05-07_spot-01_apa.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/2026-05-07_spot-02_pattern-A-shipped.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/2026-05-08_spot-final_all-styles.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/COMPARISON.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/korbmacher_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/option-a.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-a/ziano_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/korbmacher_notes_raw.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/korbmacher_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/notes.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/option-b.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/ziano_notes_raw.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-b/ziano_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/korbmacher_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/notes.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/option-c.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/sample-pdftotext-bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-c/ziano_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/korbmacher_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/notes.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/option-d.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-d/ziano_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_2022_kruger_bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/korbmacher_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/option-e.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/sample-bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_2021_joep_bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_bbox.html +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/experiments/option-e/ziano_table1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/html-fallback-demo.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/chandrashekar_2023_mp.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/chandrashekar_2023_mp.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/efendic_2022_affect.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/efendic_2022_affect.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ieee_access_2.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ieee_access_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ip_feldman_2025_pspb.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ip_feldman_2025_pspb.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/korbmacher_2022_kruger.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/korbmacher_2022_kruger.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/nat_comms_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/nat_comms_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ziano_2021_joep.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs/ziano_2021_joep.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/am_sociol_rev_3.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/am_sociol_rev_3.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amc_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amc_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amj_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amj_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amle_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/amle_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_apa_j_jesp_2009_12_010.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_apa_j_jesp_2009_12_010.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140066.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140066.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140072.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ar_royal_society_rsos_140072.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/bjps_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/bjps_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chan_feldman_2025_cogemo.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chan_feldman_2025_cogemo.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chen_2021_jesp.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/chen_2021_jesp.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/demography_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/demography_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_3.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_3.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_4.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/ieee_access_4.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_2.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jama_open_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jmf_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/jmf_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/nat_comms_2.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/nat_comms_2.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/sci_rep_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/sci_rep_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/social_forces_1.err +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/outputs-new/social_forces_1.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/papers.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/report.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/splice_spike.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/plans/spot-checks/splice-spike/test_splice_spike.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/specs/2026-04-27-request-09-reference-normalization-design.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/specs/2026-05-06-section-identification-design.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/specs/2026-05-06-table-extraction-design.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/docs/superpowers/specs/2026-05-08-unified-extraction-design.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/README.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/VERIFIER_PROMPT.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/baseline_matrix.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/checks.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/corpus_manifest.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/extract.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/gold_keys.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/harness/inspect.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/lint_rendered_corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/verify_corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/scripts/verify_corpus_full.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/conftest.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/sections/__init__.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/sections/builders.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/structured/.gitkeep +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/structured/MANIFEST.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/fixtures/structured/README.md +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/golden/sections/apa_multi_study_pdf.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/golden/sections/apa_single_study_pdf.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/golden/sections/html_real_headings.json +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/amj_lattice.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/apa_chan_feldman_lineless.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/apa_chen_jesp_lineless.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/apa_efendic_affect.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/apa_ip_feldman_pspb.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/bmc_lattice.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/ieee_figure_heavy.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/ieee_lattice.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/jama_lattice.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/nat_comms_figure_only.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/nature_minimal_rule.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/snapshots/scirep_minimal_rule.txt +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_a3c_leading_zero_decimal_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_all_caps_section_promote_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_bbox_utils.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_benchmark_docx_html.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_cambridge_footer_strip_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_caption_only_table_heading_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_caption_regex.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_chart_data_trim_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_cid_minus_recovery_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_cli_sections.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_cli_structured.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_confidence.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_corpus_smoke.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_d5_normalization_audit.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_edge_cases.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_elsevier_footer_strip_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_equation_page_header_strip_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extract_docx.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extract_filter_sugar.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extract_html.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extract_layout.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extract_pdf_structured.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_extraction.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_f0_table_region_aware.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_fffd_comparison_recovery_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_figure_caption_trim_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_figure_detect.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_fixtures_manifest.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_harness_text_loss_reflow.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_lattice_cluster.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_letterspaced_label_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_ligature_decomposition_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_lt_operator_recovery_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_mathitalic_greek_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_metaesci_followups.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_minus_sign_recovery_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalization.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_a3_r2_body_integer_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_f0_footnote_strip.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_layout_param.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_metadata_leak_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_report_layout_fields.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_normalize_v18_strips.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_numbered_heading_promotion_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_numbered_section_promotion_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_orphan_multilevel_number_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_orphan_section_number_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_preserve_math_glyphs_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_pua_glyph_recovery_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_quality.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_render.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_render_html.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_request_09_reference_normalization.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_roman_numeral_section_promote_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_section_row_label_no_merge_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_boundaries.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_boundary_truncation.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_core_partition.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_docx_annotator.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_extract_text.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_footnote_section.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_golden.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_html_annotator.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_pdf_annotator.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_public_api.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_real_corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_taxonomy.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_text_annotator.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_types.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_unit_corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_v161_coalesce.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_v161_subheadings.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_v161_taxonomy.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_v161_text_annotator.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_sections_version.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_smoke_fixtures.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_structured_result_type.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_structured_types.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_structured_version.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_table_caption_cell_region_real_pdf.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_table_detect.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_tables_cell_cleaning.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_text_mode.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v23_1_fixes.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v23_bug_fixes.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v23_post_corpus.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v23_post_corpus_v2.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v2_backwards_compat.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_v2_top_level_exports.py +0 -0
- {docpluck-2.4.63 → docpluck-2.4.64}/tests/test_whitespace_cluster.py +0 -0
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [2.4.64] — 2026-05-22
|
|
4
|
+
|
|
5
|
+
**Cycle 12 (run 9) — three independent normalize_text idempotence fixes.** A 180-doc scan post-cycle-11 found 17 papers still non-idempotent. This cycle packages three independent fixes that together clear 6 of them:
|
|
6
|
+
|
|
7
|
+
### 1. Final blank-line collapse (5 papers — chan-etal, horsham, lee-feldman, li-feldman-mental-acct, kassambara)
|
|
8
|
+
|
|
9
|
+
Raw pdftotext output contains form-feed `\x0c` characters at page boundaries. S9's `re.sub(r"\n{3,}", "\n\n", t)` collapses consecutive blank lines, but the form-feed survives upstream stripping into the references region, where R3 (continuation join) processes line-by-line — `"\x0c".strip() == ""` so the form-feed line becomes an empty entry, surrounded by other empty entries. R3 outputs `"\n".join(["...", "", "", "...", ""])` = `\n\n\n\n` (4 newlines). S9's collapse already ran upstream; nothing else collapses. Pass 2 sees the `\n{4}` run and S9 collapses it — non-idempotence.
|
|
10
|
+
|
|
11
|
+
Fix: add a final `re.sub(r"\n{3,}", "\n\n", t)` right before the H0r/P0r blocks. Any late strip step that empties a line is now safely followed by the collapse, regardless of which step produced the gap.
|
|
12
|
+
|
|
13
|
+
### 2. Cross-paragraph stat-continuation join (2 papers — korbmacher×2)
|
|
14
|
+
|
|
15
|
+
A1 (the early stat-line-repair step using `\s*`) crosses paragraph breaks but runs BEFORE S9 strips header/footer noise. A row like
|
|
16
|
+
|
|
17
|
+
`r(1798) = -0.27,\n\n472\n\nJournal of Decision Making, Vol. 17...\n\n95% CI [-0.31, ...]`
|
|
18
|
+
|
|
19
|
+
has so much intervening junk that A1's lookahead fails on pass 1. S9 then strips `472` (page num) and the journal-masthead/page-header (repeated ≥5 times), leaving `-0.27,\n\n95% CI`. A1 is over; LateJoin's A1r uses strict `[ \t]*\n[ \t]*` (single-newline only) and so doesn't fire. Pass 2's A1 sees the now-clean `,\n\n95% CI` and joins — non-idempotence.
|
|
20
|
+
|
|
21
|
+
Fix: add two paragraph-crossing variants to the LateJoin A1r block, restricted to high-confidence prefixes — `\d+% CI` and `p [<=>]`. No real paragraph STARTS with `95% CI` or `p < .001`, so joining across `\n\n` is safe. The `test_column_bleed_too_many_fragments_ignored` contract is unaffected — its input has no leading `,`/`;`.
|
|
22
|
+
|
|
23
|
+
### 3. LABELED vs BARE CI bracket discriminator (refines cycle 11)
|
|
24
|
+
|
|
25
|
+
Cycle 11's proximity gate broke 2 pre-existing tests:
|
|
26
|
+
- `test_ci_pairing_recovers_body_line`: `Mposterior = 20.54, SD=0.04, CI = [-0.61, -0.47]` — `, SD=` falsely tripped the "new stat label" sentence-break check, blocking the legitimate recovery of `20.54` → `-0.54`.
|
|
27
|
+
- `test_efendic_table_point_estimates_recovered_via_ci`: efendic's body-line CI recoveries no longer fired.
|
|
28
|
+
|
|
29
|
+
Fix: discriminate LABELED brackets (`CI = [...]` / `95% CI [...]` / `CI: [...]`) from BARE brackets (`[lo, hi]` alone). LABELED brackets can pair with any candidate token in the row (the chain `M = X, SD = Y, CI = [...]` is all describing the same estimate). BARE brackets retain the strict 30-char + period/semicolon-break proximity gate (catches the majumder false-positive — bare bracket ~50 chars after `2.01`, attached to a different stat). The `_CI_LABEL_PREFIX_RE` looks back ≤8 chars from the `[` for `CI` / `\d+% CI` (with optional `=`/`:`).
|
|
30
|
+
|
|
31
|
+
**Impact:** corpus-wide non-idempotency 17 → 11 (cycle 12 cleared 6: 5 bibliography-shift + 2 korbmacher; 3 new bibliography cases of the same shape are now caught by the final collapse). Broad pytest 1356 pass + 1 known pre-existing B6 fail. Harness Tier-D academic: 0 regressions, 0 new fails (1 still failing — plos-med-1 / B1).
|
|
32
|
+
|
|
33
|
+
NORMALIZATION_VERSION 1.9.18. New tests: `test_normalize_collapses_late_blank_line_runs` + `test_late_join_crosses_paragraph_for_stat_continuation`. Cycle 11's tests (`*_proximity_gate_*`) still pass under the LABELED/BARE refinement.
|
|
34
|
+
|
|
3
35
|
## [2.4.63] — 2026-05-21
|
|
4
36
|
|
|
5
37
|
**Cycle 11 (run 9) — `recover_minus_via_ci_pairing` proximity gate.** A 180-doc scan post-cycle-10 found 19 papers still non-idempotent. Among them, 8 (majumder, korbmacher×2, van-boven, chan-feldman-baron, ziano, xiao-poc, amp-1, annals-2) shared a structural defect that ALSO ships in single-pass production: the `_recover_minus_in_record` helper paired every candidate `2X.XX` token with EVERY CI bracket in the same record. A record like `M = 5.37, SD = 2.01), t(1827) = 1.83, p tukey = .067, d = 0.09 [-1.86, 0.04]` contains:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docpluck
|
|
3
|
-
Version: 2.4.
|
|
3
|
+
Version: 2.4.64
|
|
4
4
|
Summary: PDF, DOCX, and HTML text extraction and normalization for academic papers
|
|
5
5
|
Project-URL: Homepage, https://github.com/giladfeldman/docpluck
|
|
6
6
|
Project-URL: Documentation, https://github.com/giladfeldman/docpluck/tree/main/docs
|
|
@@ -71,7 +71,7 @@ from .figures import Figure
|
|
|
71
71
|
from .extract_structured import TABLE_EXTRACTION_VERSION, StructuredResult, extract_pdf_structured
|
|
72
72
|
from .render import render_pdf_to_markdown
|
|
73
73
|
|
|
74
|
-
__version__ = "2.4.
|
|
74
|
+
__version__ = "2.4.64"
|
|
75
75
|
__author__ = "Gilad Feldman"
|
|
76
76
|
__license__ = "MIT"
|
|
77
77
|
|
|
@@ -23,7 +23,7 @@ class NormalizationLevel(str, Enum):
|
|
|
23
23
|
academic = "academic"
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
NORMALIZATION_VERSION = "1.9.
|
|
26
|
+
NORMALIZATION_VERSION = "1.9.18"
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
# ── Mathematical Alphanumeric Symbols de-styling (shared, v2.4.34) ──────────
|
|
@@ -1474,35 +1474,45 @@ _CORRUPT_NEG_TOKEN_RE = re.compile(r"(?<![\d.\-])2(\d?\.\d+)\b")
|
|
|
1474
1474
|
_TABLE_ROW_RE = re.compile(r"<tr\b.*?</tr>", re.DOTALL | re.IGNORECASE)
|
|
1475
1475
|
|
|
1476
1476
|
|
|
1477
|
-
# Cycle 11 (v2.4.63) — proximity gate for the CI-pairing recovery.
|
|
1477
|
+
# Cycle 11 (v2.4.63) / 12 (v2.4.64) — proximity gate for the CI-pairing recovery.
|
|
1478
1478
|
#
|
|
1479
|
-
# In stat reporting
|
|
1480
|
-
#
|
|
1481
|
-
#
|
|
1482
|
-
#
|
|
1483
|
-
#
|
|
1484
|
-
#
|
|
1485
|
-
#
|
|
1486
|
-
#
|
|
1487
|
-
# recovered `2.01` → `-.01`, corrupting the SD. 8 papers in the corpus
|
|
1488
|
-
# (majumder, korbmacher, van-boven, ...) had this defect.
|
|
1479
|
+
# In stat reporting a BARE bracket `[lo, hi]` attaches to the IMMEDIATELY-
|
|
1480
|
+
# preceding point estimate; a LABELED bracket `CI = [lo, hi]` or
|
|
1481
|
+
# `95% CI [lo, hi]` can attach to ANY earlier point estimate on the same
|
|
1482
|
+
# row (the SD/SE/df-pair tokens in between are descriptive of the same
|
|
1483
|
+
# estimate). The cycle 11 proximity gate treated both as needing strict
|
|
1484
|
+
# adjacency, which broke efendic's body-line recovery
|
|
1485
|
+
# `Mposterior = 20.54, SD=0.04, CI = [-0.61, -0.47]`
|
|
1486
|
+
# where `, SD=` falsely tripped the "new stat label" sentence-break check.
|
|
1489
1487
|
#
|
|
1490
|
-
#
|
|
1491
|
-
# bracket
|
|
1492
|
-
#
|
|
1493
|
-
#
|
|
1494
|
-
#
|
|
1488
|
+
# Cycle 12 fix: discriminate LABELED vs BARE brackets.
|
|
1489
|
+
# - LABELED bracket (`CI =`/`95% CI`/`CI:` immediately precedes `[`):
|
|
1490
|
+
# pairs with any candidate token in its record (the old wide rule).
|
|
1491
|
+
# - BARE bracket: pairs ONLY with candidates within 30 chars + no
|
|
1492
|
+
# sentence break (period/semicolon + space — NOT comma + new label,
|
|
1493
|
+
# because stat-row labels are comma-separated by convention).
|
|
1494
|
+
#
|
|
1495
|
+
# This keeps the majumder fix (bare bracket far from `2.01`) AND
|
|
1496
|
+
# preserves efendic-style labeled CIs that pair across SD/SE annotations.
|
|
1495
1497
|
_CI_PAIR_MAX_GAP = 30
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
)
|
|
1498
|
+
# Bare-bracket sentence break: only period/semicolon + space. A comma is
|
|
1499
|
+
# NOT a break because stat rows are comma-separated. The majumder false-
|
|
1500
|
+
# positive is now caught by the per-bracket proximity check (the bare
|
|
1501
|
+
# bracket sits ~50 chars after `2.01` — beyond _CI_PAIR_MAX_GAP).
|
|
1502
|
+
_SENTENCE_BREAK_RE = re.compile(r"[.;]\s")
|
|
1503
|
+
# A bracket is "labeled" when prefixed by `CI`, `95 % CI`, or similar
|
|
1504
|
+
# directly before the opening `[`. Allow optional whitespace and an `=` /
|
|
1505
|
+
# `:` between the label and the bracket.
|
|
1506
|
+
_CI_LABEL_PREFIX_RE = re.compile(r"(?:\bCI|\b\d+\s*%\s*CI)\s*[=:]?\s*$", re.IGNORECASE)
|
|
1500
1507
|
|
|
1501
1508
|
|
|
1502
1509
|
def _recover_minus_in_record(record: str) -> str:
|
|
1503
1510
|
"""Recover '2X.XX' tokens in a single record (a table row or a text line)
|
|
1504
1511
|
by pairing each with a CI bracket present in the same record."""
|
|
1505
|
-
|
|
1512
|
+
# Each entry: (lo, hi, (bs, be), is_labeled). `is_labeled` is True when
|
|
1513
|
+
# the bracket is prefixed by `CI`/`95% CI`/etc. — see cycle 12 notes
|
|
1514
|
+
# at _CI_LABEL_PREFIX_RE.
|
|
1515
|
+
brackets: list[tuple[float, float, tuple[int, int], bool]] = []
|
|
1506
1516
|
for m in _CI_PAIR_BRACKET_RE.finditer(record):
|
|
1507
1517
|
try:
|
|
1508
1518
|
lo, hi = float(m.group(1)), float(m.group(2))
|
|
@@ -1510,13 +1520,17 @@ def _recover_minus_in_record(record: str) -> str:
|
|
|
1510
1520
|
continue
|
|
1511
1521
|
if lo > hi:
|
|
1512
1522
|
continue # not a well-formed interval
|
|
1513
|
-
|
|
1523
|
+
# Look back ≤8 chars for a `CI` / `95 % CI` label.
|
|
1524
|
+
bs, be = m.span()
|
|
1525
|
+
prefix = record[max(0, bs - 8): bs]
|
|
1526
|
+
is_labeled = bool(_CI_LABEL_PREFIX_RE.search(prefix))
|
|
1527
|
+
brackets.append((lo, hi, (bs, be), is_labeled))
|
|
1514
1528
|
if not brackets:
|
|
1515
1529
|
return record
|
|
1516
1530
|
|
|
1517
1531
|
def _sub(m: "re.Match[str]") -> str:
|
|
1518
1532
|
# Never touch a token that lies inside a bracket span (a CI bound).
|
|
1519
|
-
for _lo, _hi, (bs, be) in brackets:
|
|
1533
|
+
for _lo, _hi, (bs, be), _lab in brackets:
|
|
1520
1534
|
if bs <= m.start() < be:
|
|
1521
1535
|
return m.group(0)
|
|
1522
1536
|
frac = m.group(1)
|
|
@@ -1525,22 +1539,29 @@ def _recover_minus_in_record(record: str) -> str:
|
|
|
1525
1539
|
recovered = float("-" + frac)
|
|
1526
1540
|
except ValueError:
|
|
1527
1541
|
return m.group(0)
|
|
1528
|
-
# Cycle
|
|
1529
|
-
#
|
|
1530
|
-
#
|
|
1531
|
-
#
|
|
1542
|
+
# Cycle 12: pick the NEAREST bracket whose pairing rules accept this
|
|
1543
|
+
# token. LABELED brackets accept any candidate in the record (legacy
|
|
1544
|
+
# wide rule — efendic body line `Mposterior = 20.54, SD=0.04,
|
|
1545
|
+
# CI = [-0.61, -0.47]` is the canonical case). BARE brackets only
|
|
1546
|
+
# accept the immediately-preceding stat (within 30 chars, no
|
|
1547
|
+
# sentence break) — this is what blocks the majumder false-positive
|
|
1548
|
+
# `M = 5.37, SD = 2.01, t = ..., d = 0.09 [-1.86, 0.04]`.
|
|
1532
1549
|
token_end = m.end()
|
|
1533
1550
|
nearest = None
|
|
1534
1551
|
nearest_dist = None
|
|
1535
|
-
for lo, hi, (bs, be) in brackets:
|
|
1552
|
+
for lo, hi, (bs, be), is_labeled in brackets:
|
|
1536
1553
|
if bs < token_end:
|
|
1537
|
-
continue # bracket precedes the token — not its CI
|
|
1538
|
-
gap = bs - token_end
|
|
1539
|
-
if gap > _CI_PAIR_MAX_GAP:
|
|
1540
|
-
continue
|
|
1541
|
-
intervening = record[token_end:bs]
|
|
1542
|
-
if _SENTENCE_BREAK_RE.search(intervening):
|
|
1543
1554
|
continue
|
|
1555
|
+
gap = bs - token_end
|
|
1556
|
+
if is_labeled:
|
|
1557
|
+
# Labeled bracket: only constraint is "comes after the token".
|
|
1558
|
+
pass
|
|
1559
|
+
else:
|
|
1560
|
+
if gap > _CI_PAIR_MAX_GAP:
|
|
1561
|
+
continue
|
|
1562
|
+
intervening = record[token_end:bs]
|
|
1563
|
+
if _SENTENCE_BREAK_RE.search(intervening):
|
|
1564
|
+
continue
|
|
1544
1565
|
if nearest_dist is None or gap < nearest_dist:
|
|
1545
1566
|
nearest = (lo, hi)
|
|
1546
1567
|
nearest_dist = gap
|
|
@@ -2649,6 +2670,21 @@ def normalize_text(
|
|
|
2649
2670
|
t = re.sub(r"([=<>])[ \t]*\n[ \t]*(?=[-\d.])", r"\1 ", t)
|
|
2650
2671
|
t = re.sub(r"([,;])[ \t]*\n[ \t]*(?=p\s*[<=>])", r"\1 ", t)
|
|
2651
2672
|
t = re.sub(r"([,;])[ \t]*\n[ \t]*(?=\d+%\s*CI)", r"\1 ", t)
|
|
2673
|
+
# Cycle 12 (v2.4.64) — cross-paragraph stat-continuation join.
|
|
2674
|
+
# A1 (which uses `\s*` and so crosses paragraph breaks) runs BEFORE
|
|
2675
|
+
# S9 strips header/footer lines. So a stat row like
|
|
2676
|
+
# `r(1798) = -0.27,\n\n472\n\nJournal of Decision Making, ...\n\n95% CI [-0.31, ...]`
|
|
2677
|
+
# has so much intervening junk that A1's lookahead fails on pass 1;
|
|
2678
|
+
# only after S9 strips the junk (producing `,\n\n95% CI`) can the
|
|
2679
|
+
# join happen, and that's pass 2. The two patterns below are the
|
|
2680
|
+
# paragraph-crossing variants of the comma-to-stat-continuation
|
|
2681
|
+
# patterns above — restricted to the high-confidence prefixes
|
|
2682
|
+
# `\d+% CI` and `p [<=>]` because no real paragraph STARTS with
|
|
2683
|
+
# those tokens (test_column_bleed_too_many_fragments_ignored is
|
|
2684
|
+
# unaffected — its input has no leading `,`/`;`).
|
|
2685
|
+
# Clears korbmacher (2 papers) from the non-idempotent set.
|
|
2686
|
+
t = re.sub(r"([,;])\s*\n\s*\n\s*(?=\d+%\s*CI)", r"\1 ", t)
|
|
2687
|
+
t = re.sub(r"([,;])\s*\n\s*\n\s*(?=p\s*[<=>])", r"\1 ", t)
|
|
2652
2688
|
report._track("LateJoin_line_break_rejoin", before, t, "late_line_joins")
|
|
2653
2689
|
|
|
2654
2690
|
# ── H0r: header-banner re-strip on stabilized line positions ─────────
|
|
@@ -2669,6 +2705,26 @@ def normalize_text(
|
|
|
2669
2705
|
t = _restripped
|
|
2670
2706
|
report._track("H0r_header_banner_restrip", before, t, "header_banners_restripped")
|
|
2671
2707
|
|
|
2708
|
+
# ── Final blank-line collapse ────────────────────────────────────────
|
|
2709
|
+
# S9 enforces `re.sub(r"\n{3,}", "\n\n", t)` once near the top of the
|
|
2710
|
+
# pipeline. Later steps that REMOVE non-blank content can leave blank
|
|
2711
|
+
# gaps that S9's earlier collapse no longer reaches:
|
|
2712
|
+
#
|
|
2713
|
+
# - R3 (refs-section continuation join) walks the refs span line by
|
|
2714
|
+
# line. A bare form-feed `\x0c` (pdftotext page-break) between two
|
|
2715
|
+
# blank lines becomes `"".strip() == ""` and is preserved as a blank
|
|
2716
|
+
# entry; R3 outputs three consecutive blank entries surrounded by
|
|
2717
|
+
# `"\n".join(...)` — `\n\n\n\n`. Pass 1 leaves this; pass 2's S9
|
|
2718
|
+
# collapses it, producing the bibliography-shift non-idempotence
|
|
2719
|
+
# (cycle 12 — 5 papers: chan-etal, horsham, lee-feldman,
|
|
2720
|
+
# li-feldman-mental, + 1 incidental).
|
|
2721
|
+
# - Same pattern for any late strip step that empties a line without
|
|
2722
|
+
# re-collapsing.
|
|
2723
|
+
#
|
|
2724
|
+
# Add the collapse here so the function is idempotent regardless of
|
|
2725
|
+
# which late step produced the blank-line run.
|
|
2726
|
+
t = re.sub(r"\n{3,}", "\n\n", t)
|
|
2727
|
+
|
|
2672
2728
|
# ── P0r: page-footer-line re-strip on stabilized line positions ──────
|
|
2673
2729
|
# Same shape as H0r, applied to P0's anchored ^...$ patterns. P0 runs
|
|
2674
2730
|
# near the top of the pipeline, where some P0-targeted lines are still
|
|
@@ -234,6 +234,45 @@ def test_s9_4digit_pattern_a_still_strips_isolated_page_numbers():
|
|
|
234
234
|
)
|
|
235
235
|
|
|
236
236
|
|
|
237
|
+
def test_normalize_collapses_late_blank_line_runs():
|
|
238
|
+
"""Cycle 12: a late strip step that empties a line (e.g. R3 stripping a
|
|
239
|
+
form-feed `\\x0c` between two blank lines) leaves a `\\n{3+}` run that
|
|
240
|
+
S9's earlier collapse no longer reaches. The final collapse at the end
|
|
241
|
+
of normalize_text catches it."""
|
|
242
|
+
# Simulate: paragraph + form-feed + paragraph (pdftotext page-break case)
|
|
243
|
+
text = "First paragraph ending here.\n\n\x0c\n\nSecond paragraph begins."
|
|
244
|
+
out, _ = normalize_text(text, NormalizationLevel.academic)
|
|
245
|
+
# Should produce one paragraph break, not two
|
|
246
|
+
assert "\n\n\n" not in out, f"normalize_text left a \\n{{3+}} run: {out!r}"
|
|
247
|
+
assert "First paragraph" in out
|
|
248
|
+
assert "Second paragraph" in out
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_late_join_crosses_paragraph_for_stat_continuation():
|
|
252
|
+
"""Cycle 12: a comma/semicolon followed by a paragraph break and a
|
|
253
|
+
high-confidence stat-continuation token (95% CI / p [<=>]) is a
|
|
254
|
+
serializer artifact — joined on pass 1. Pre-cycle-12, only pass 2
|
|
255
|
+
joined it (after S9 stripped the intervening header/footer noise).
|
|
256
|
+
|
|
257
|
+
Defends against the corpus-wide korbmacher pattern where a regression-
|
|
258
|
+
coefficient row was broken by a per-page header insertion."""
|
|
259
|
+
# The full korbmacher pre-S9 pattern is a couple of headers between;
|
|
260
|
+
# post-S9 the input to LateJoin is just `,\n\n95% CI`.
|
|
261
|
+
text = "r(1798) = -0.27,\n\n95% CI [-0.31, -0.22]"
|
|
262
|
+
out, _ = normalize_text(text, NormalizationLevel.academic)
|
|
263
|
+
assert "-0.27, 95% CI" in out, f"cross-paragraph stat join failed: {out!r}"
|
|
264
|
+
|
|
265
|
+
# Same for p-value continuation
|
|
266
|
+
text2 = "t(23) = 2.34,\n\np < .001, d = 0.45"
|
|
267
|
+
out2, _ = normalize_text(text2, NormalizationLevel.academic)
|
|
268
|
+
assert "2.34, p < .001" in out2 or "p < .001" in out2.replace("\n\n", " ")
|
|
269
|
+
|
|
270
|
+
# The column-bleed contract is NOT broken — its input has no leading `,;`.
|
|
271
|
+
cb = "p\n01\n02\n03\n04\n05\n= .05"
|
|
272
|
+
out_cb, _ = normalize_text(cb, NormalizationLevel.academic)
|
|
273
|
+
assert "p = .05" not in out_cb, "column-bleed test contract broken by cycle 12"
|
|
274
|
+
|
|
275
|
+
|
|
237
276
|
def test_recover_minus_proximity_gate_rejects_distant_unrelated_brackets():
|
|
238
277
|
"""Cycle 11: a stat-table row that mixes an unrelated SD value with a
|
|
239
278
|
separately-reported CI bracket must NOT have the SD recovered as a
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/ai-full-doc-verify.md
RENAMED
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/local-verification.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/rationalizations.md
RENAMED
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/release-flow.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/self-improvement.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-iterate/references/three-tier-parity.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-11-hard-rules.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/.claude/skills/docpluck-qa/references/check-7-batch-smoke.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-09_unified_extraction_brainstorm.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-12_remaining_ui_and_chrome_verification.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_iterative_library_improvement.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-13_table_extraction_next_iteration.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_continue_iterations_v2_4_30_to_15n.md
RENAMED
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-14_full_corpus_iteration_v2_4_30.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docpluck-2.4.63 → docpluck-2.4.64}/docs/HANDOFF_2026-05-16_iterate_run_4_fix_and_continue.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|