deepresearch-flow 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepresearch_flow/__init__.py +5 -0
- deepresearch_flow/cli.py +23 -0
- deepresearch_flow/paper/__init__.py +1 -0
- deepresearch_flow/paper/cli.py +286 -0
- deepresearch_flow/paper/config.py +249 -0
- deepresearch_flow/paper/db.py +768 -0
- deepresearch_flow/paper/extract.py +870 -0
- deepresearch_flow/paper/llm.py +115 -0
- deepresearch_flow/paper/prompt_templates/__init__.py +1 -0
- deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +82 -0
- deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +28 -0
- deepresearch_flow/paper/prompt_templates/simple_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/simple_user.j2 +24 -0
- deepresearch_flow/paper/prompt_templates/three_pass_system.j2 +6 -0
- deepresearch_flow/paper/prompt_templates/three_pass_user.j2 +44 -0
- deepresearch_flow/paper/prompts.py +11 -0
- deepresearch_flow/paper/providers/__init__.py +1 -0
- deepresearch_flow/paper/providers/azure_openai.py +66 -0
- deepresearch_flow/paper/providers/base.py +19 -0
- deepresearch_flow/paper/providers/claude.py +71 -0
- deepresearch_flow/paper/providers/dashscope.py +58 -0
- deepresearch_flow/paper/providers/gemini.py +116 -0
- deepresearch_flow/paper/providers/ollama.py +46 -0
- deepresearch_flow/paper/providers/openai_compatible.py +60 -0
- deepresearch_flow/paper/render.py +64 -0
- deepresearch_flow/paper/schema.py +58 -0
- deepresearch_flow/paper/schemas/__init__.py +1 -0
- deepresearch_flow/paper/schemas/deep_read_schema.json +46 -0
- deepresearch_flow/paper/schemas/default_paper_schema.json +47 -0
- deepresearch_flow/paper/schemas/eight_questions_schema.json +34 -0
- deepresearch_flow/paper/schemas/three_pass_schema.json +24 -0
- deepresearch_flow/paper/template_registry.py +189 -0
- deepresearch_flow/paper/templates/__init__.py +1 -0
- deepresearch_flow/paper/templates/deep_read.md.j2 +79 -0
- deepresearch_flow/paper/templates/default_paper.md.j2 +32 -0
- deepresearch_flow/paper/templates/eight_questions.md.j2 +49 -0
- deepresearch_flow/paper/templates/three_pass.md.j2 +28 -0
- deepresearch_flow/paper/utils.py +136 -0
- deepresearch_flow/paper/web/__init__.py +2 -0
- deepresearch_flow/paper/web/app.py +2307 -0
- deepresearch_flow/paper/web/pdfjs/LICENSE +177 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/83pv-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-0.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-1.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-3.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-4.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-5.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-6.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-0.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-1.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-3.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-4.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-5.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-0.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-1.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-3.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-4.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-5.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-6.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-0.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-1.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-V.bcmap +3 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-H.bcmap +3 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-H.bcmap +4 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Hankaku.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Hiragana.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Katakana.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/LICENSE +36 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/Roman.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-H.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/V.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/cmaps/WP-Symbol.bcmap +0 -0
- deepresearch_flow/paper/web/pdfjs/web/compressed.tracemonkey-pldi-09.pdf +0 -0
- deepresearch_flow/paper/web/pdfjs/web/debugger.css +111 -0
- deepresearch_flow/paper/web/pdfjs/web/debugger.js +611 -0
- deepresearch_flow/paper/web/pdfjs/web/images/altText_add.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/altText_done.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-check.svg +11 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-comment.svg +16 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-help.svg +26 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-insert.svg +10 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-key.svg +11 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-newparagraph.svg +11 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-noicon.svg +7 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-note.svg +42 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-paperclip.svg +6 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-paragraph.svg +16 -0
- deepresearch_flow/paper/web/pdfjs/web/images/annotation-pushpin.svg +7 -0
- deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorFreeText.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorInk.svg +4 -0
- deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-next.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-previous.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-download.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-openinapp.svg +11 -0
- deepresearch_flow/paper/web/pdfjs/web/images/loading-dark.svg +24 -0
- deepresearch_flow/paper/web/pdfjs/web/images/loading-icon.gif +0 -0
- deepresearch_flow/paper/web/pdfjs/web/images/loading.svg +1 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-documentProperties.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-firstPage.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-handTool.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-lastPage.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCcw.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCw.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollHorizontal.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollPage.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollVertical.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollWrapped.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-selectTool.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadEven.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadNone.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadOdd.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-bookmark.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-currentOutlineItem.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-download.svg +4 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorFreeText.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorInk.svg +4 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorStamp.svg +8 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-menuArrow.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-openFile.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageDown.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageUp.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-presentationMode.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-print.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-search.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-secondaryToolbarToggle.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-sidebarToggle.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewAttachments.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewLayers.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewOutline.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewThumbnail.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomIn.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomOut.svg +3 -0
- deepresearch_flow/paper/web/pdfjs/web/images/treeitem-collapsed.svg +1 -0
- deepresearch_flow/paper/web/pdfjs/web/images/treeitem-expanded.svg +1 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ach/viewer.properties +203 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/af/viewer.properties +156 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/an/viewer.properties +222 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ar/viewer.properties +224 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ast/viewer.properties +185 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/az/viewer.properties +222 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/be/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/bg/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/bn/viewer.properties +218 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/bo/viewer.properties +217 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/br/viewer.properties +224 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/brx/viewer.properties +184 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/bs/viewer.properties +173 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ca/viewer.properties +256 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/cak/viewer.properties +253 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ckb/viewer.properties +213 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/cs/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/cy/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/da/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/de/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/dsb/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/el/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/en-CA/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/en-GB/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/en-US/viewer.properties +282 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/eo/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/es-AR/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/es-CL/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/es-ES/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/es-MX/viewer.properties +257 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/et/viewer.properties +229 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/eu/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/fa/viewer.properties +221 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ff/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/fi/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/fr/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/fur/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/fy-NL/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ga-IE/viewer.properties +181 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/gd/viewer.properties +257 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/gl/viewer.properties +267 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/gn/viewer.properties +278 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/gu-IN/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/he/viewer.properties +283 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hi-IN/viewer.properties +227 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hr/viewer.properties +243 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hsb/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hu/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hy-AM/viewer.properties +232 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/hye/viewer.properties +229 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ia/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/id/viewer.properties +253 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/is/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/it/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ja/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ka/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/kab/viewer.properties +264 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/kk/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/km/viewer.properties +189 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/kn/viewer.properties +166 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ko/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/lij/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/lo/viewer.properties +257 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/locale.properties +333 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/lt/viewer.properties +229 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ltg/viewer.properties +192 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/lv/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/meh/viewer.properties +106 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/mk/viewer.properties +211 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/mr/viewer.properties +210 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ms/viewer.properties +214 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/my/viewer.properties +170 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/nb-NO/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ne-NP/viewer.properties +197 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/nl/viewer.properties +274 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/nn-NO/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/oc/viewer.properties +278 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/pa-IN/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/pl/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/pt-BR/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/pt-PT/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/rm/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ro/viewer.properties +220 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ru/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sat/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sc/viewer.properties +258 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/scn/viewer.properties +101 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sco/viewer.properties +226 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/si/viewer.properties +228 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sk/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/skr/viewer.properties +264 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sl/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/son/viewer.properties +152 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sq/viewer.properties +247 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sr/viewer.properties +259 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/sv-SE/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/szl/viewer.properties +224 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ta/viewer.properties +173 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/te/viewer.properties +216 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/tg/viewer.properties +281 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/th/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/tl/viewer.properties +222 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/tr/viewer.properties +283 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/trs/viewer.properties +184 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/uk/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/ur/viewer.properties +218 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/uz/viewer.properties +142 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/vi/viewer.properties +270 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/wo/viewer.properties +104 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/xh/viewer.properties +156 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/zh-CN/viewer.properties +284 -0
- deepresearch_flow/paper/web/pdfjs/web/locale/zh-TW/viewer.properties +281 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitDingbats.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixed.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBold.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedItalic.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerif.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBold.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifItalic.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSymbol.pfb +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_FOXIT +27 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_LIBERATION +102 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Bold.ttf +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Italic.ttf +0 -0
- deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Regular.ttf +0 -0
- deepresearch_flow/paper/web/pdfjs/web/viewer.css +3528 -0
- deepresearch_flow/paper/web/pdfjs/web/viewer.html +486 -0
- deepresearch_flow/paper/web/pdfjs/web/viewer.js +14099 -0
- deepresearch_flow/paper/web/pdfjs/web/viewer.js.map +1 -0
- deepresearch_flow/paper/web/query.py +90 -0
- deepresearch_flow/recognize/__init__.py +1 -0
- deepresearch_flow/recognize/cli.py +469 -0
- deepresearch_flow/recognize/markdown.py +277 -0
- deepresearch_flow/recognize/organize.py +95 -0
- deepresearch_flow-0.1.1.dist-info/METADATA +416 -0
- deepresearch_flow-0.1.1.dist-info/RECORD +417 -0
- deepresearch_flow-0.1.1.dist-info/WHEEL +5 -0
- deepresearch_flow-0.1.1.dist-info/entry_points.txt +2 -0
- deepresearch_flow-0.1.1.dist-info/licenses/LICENSE +21 -0
- deepresearch_flow-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2307 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import html
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from html.parser import HTMLParser
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
import re
|
|
11
|
+
from urllib.parse import urlencode, quote
|
|
12
|
+
|
|
13
|
+
from markdown_it import MarkdownIt
|
|
14
|
+
from starlette.applications import Starlette
|
|
15
|
+
from starlette.requests import Request
|
|
16
|
+
from starlette.responses import FileResponse, HTMLResponse, JSONResponse, RedirectResponse, Response
|
|
17
|
+
from starlette.routing import Mount, Route
|
|
18
|
+
from starlette.staticfiles import StaticFiles
|
|
19
|
+
|
|
20
|
+
from deepresearch_flow.paper.render import load_default_template
|
|
21
|
+
from deepresearch_flow.paper.template_registry import (
|
|
22
|
+
list_template_names_in_registry_order,
|
|
23
|
+
load_render_template,
|
|
24
|
+
load_schema_for_template,
|
|
25
|
+
)
|
|
26
|
+
from deepresearch_flow.paper.utils import stable_hash
|
|
27
|
+
from deepresearch_flow.paper.web.query import Query, QueryTerm, parse_query
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
from pybtex.database import parse_file
|
|
31
|
+
PYBTEX_AVAILABLE = True
|
|
32
|
+
except Exception:
|
|
33
|
+
PYBTEX_AVAILABLE = False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_CDN_ECHARTS = "https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"
|
|
37
|
+
_CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"
|
|
38
|
+
_CDN_KATEX = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css"
|
|
39
|
+
_CDN_KATEX_JS = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js"
|
|
40
|
+
_CDN_KATEX_AUTO = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js"
|
|
41
|
+
# Use legacy builds to ensure `pdfjsLib` is available as a global.
|
|
42
|
+
_CDN_PDFJS = "https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/legacy/build/pdf.min.js"
|
|
43
|
+
_CDN_PDFJS_WORKER = "https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/legacy/build/pdf.worker.min.js"
|
|
44
|
+
_PDFJS_VIEWER_PATH = "/pdfjs/web/viewer.html"
|
|
45
|
+
_PDFJS_STATIC_DIR = Path(__file__).resolve().parent / "pdfjs"
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class PaperIndex:
|
|
52
|
+
papers: list[dict[str, Any]]
|
|
53
|
+
id_by_hash: dict[str, int]
|
|
54
|
+
ordered_ids: list[int]
|
|
55
|
+
by_tag: dict[str, set[int]]
|
|
56
|
+
by_author: dict[str, set[int]]
|
|
57
|
+
by_year: dict[str, set[int]]
|
|
58
|
+
by_month: dict[str, set[int]]
|
|
59
|
+
by_venue: dict[str, set[int]]
|
|
60
|
+
stats: dict[str, Any]
|
|
61
|
+
md_path_by_hash: dict[str, Path]
|
|
62
|
+
pdf_path_by_hash: dict[str, Path]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _split_csv(values: list[str]) -> list[str]:
|
|
66
|
+
out: list[str] = []
|
|
67
|
+
for value in values:
|
|
68
|
+
for part in value.split(","):
|
|
69
|
+
part = part.strip()
|
|
70
|
+
if part:
|
|
71
|
+
out.append(part)
|
|
72
|
+
return out
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _normalize_key(value: str) -> str:
|
|
76
|
+
return value.strip().lower()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _parse_year_month(date_str: str | None) -> tuple[str | None, str | None]:
|
|
80
|
+
if not date_str:
|
|
81
|
+
return None, None
|
|
82
|
+
text = str(date_str).strip()
|
|
83
|
+
year = None
|
|
84
|
+
month = None
|
|
85
|
+
|
|
86
|
+
year_match = re.search(r"(19|20)\d{2}", text)
|
|
87
|
+
if year_match:
|
|
88
|
+
year = year_match.group(0)
|
|
89
|
+
|
|
90
|
+
numeric_match = re.search(r"(19|20)\d{2}[-/](\d{1,2})", text)
|
|
91
|
+
if numeric_match:
|
|
92
|
+
m = int(numeric_match.group(2))
|
|
93
|
+
if 1 <= m <= 12:
|
|
94
|
+
month = f"{m:02d}"
|
|
95
|
+
return year, month
|
|
96
|
+
|
|
97
|
+
month_word = re.search(
|
|
98
|
+
r"(jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|"
|
|
99
|
+
r"january|february|march|april|june|july|august|september|october|november|december)",
|
|
100
|
+
text.lower(),
|
|
101
|
+
)
|
|
102
|
+
if month_word:
|
|
103
|
+
lookup = {
|
|
104
|
+
"january": "01",
|
|
105
|
+
"february": "02",
|
|
106
|
+
"march": "03",
|
|
107
|
+
"april": "04",
|
|
108
|
+
"may": "05",
|
|
109
|
+
"june": "06",
|
|
110
|
+
"july": "07",
|
|
111
|
+
"august": "08",
|
|
112
|
+
"september": "09",
|
|
113
|
+
"october": "10",
|
|
114
|
+
"november": "11",
|
|
115
|
+
"december": "12",
|
|
116
|
+
"jan": "01",
|
|
117
|
+
"feb": "02",
|
|
118
|
+
"mar": "03",
|
|
119
|
+
"apr": "04",
|
|
120
|
+
"jun": "06",
|
|
121
|
+
"jul": "07",
|
|
122
|
+
"aug": "08",
|
|
123
|
+
"sep": "09",
|
|
124
|
+
"sept": "09",
|
|
125
|
+
"oct": "10",
|
|
126
|
+
"nov": "11",
|
|
127
|
+
"dec": "12",
|
|
128
|
+
}
|
|
129
|
+
month = lookup.get(month_word.group(0))
|
|
130
|
+
return year, month
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _normalize_month_token(value: str | int | None) -> str | None:
|
|
134
|
+
if value is None:
|
|
135
|
+
return None
|
|
136
|
+
if isinstance(value, int):
|
|
137
|
+
if 1 <= value <= 12:
|
|
138
|
+
return f"{value:02d}"
|
|
139
|
+
return None
|
|
140
|
+
raw = str(value).strip().lower()
|
|
141
|
+
if not raw:
|
|
142
|
+
return None
|
|
143
|
+
if raw.isdigit():
|
|
144
|
+
return _normalize_month_token(int(raw))
|
|
145
|
+
lookup = {
|
|
146
|
+
"january": "01",
|
|
147
|
+
"february": "02",
|
|
148
|
+
"march": "03",
|
|
149
|
+
"april": "04",
|
|
150
|
+
"may": "05",
|
|
151
|
+
"june": "06",
|
|
152
|
+
"july": "07",
|
|
153
|
+
"august": "08",
|
|
154
|
+
"september": "09",
|
|
155
|
+
"october": "10",
|
|
156
|
+
"november": "11",
|
|
157
|
+
"december": "12",
|
|
158
|
+
"jan": "01",
|
|
159
|
+
"feb": "02",
|
|
160
|
+
"mar": "03",
|
|
161
|
+
"apr": "04",
|
|
162
|
+
"jun": "06",
|
|
163
|
+
"jul": "07",
|
|
164
|
+
"aug": "08",
|
|
165
|
+
"sep": "09",
|
|
166
|
+
"sept": "09",
|
|
167
|
+
"oct": "10",
|
|
168
|
+
"nov": "11",
|
|
169
|
+
"dec": "12",
|
|
170
|
+
}
|
|
171
|
+
return lookup.get(raw)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _extract_authors(paper: dict[str, Any]) -> list[str]:
|
|
175
|
+
value = paper.get("paper_authors")
|
|
176
|
+
if value is None:
|
|
177
|
+
return []
|
|
178
|
+
if isinstance(value, list):
|
|
179
|
+
return [str(item).strip() for item in value if str(item).strip()]
|
|
180
|
+
if isinstance(value, str):
|
|
181
|
+
return [part.strip() for part in value.split(",") if part.strip()]
|
|
182
|
+
return [str(value)]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _extract_tags(paper: dict[str, Any]) -> list[str]:
|
|
186
|
+
tags = paper.get("ai_generated_tags") or []
|
|
187
|
+
if isinstance(tags, list):
|
|
188
|
+
return [str(tag).strip() for tag in tags if str(tag).strip()]
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _extract_venue(paper: dict[str, Any]) -> str:
|
|
193
|
+
if isinstance(paper.get("bibtex"), dict):
|
|
194
|
+
bib = paper.get("bibtex") or {}
|
|
195
|
+
fields = bib.get("fields") or {}
|
|
196
|
+
bib_type = (bib.get("type") or "").lower()
|
|
197
|
+
if bib_type == "article" and fields.get("journal"):
|
|
198
|
+
return str(fields.get("journal"))
|
|
199
|
+
if bib_type in {"inproceedings", "conference", "proceedings"} and fields.get("booktitle"):
|
|
200
|
+
return str(fields.get("booktitle"))
|
|
201
|
+
return str(paper.get("publication_venue") or "")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def build_index(
|
|
205
|
+
papers: list[dict[str, Any]],
|
|
206
|
+
*,
|
|
207
|
+
md_roots: list[Path] | None = None,
|
|
208
|
+
pdf_roots: list[Path] | None = None,
|
|
209
|
+
) -> PaperIndex:
|
|
210
|
+
id_by_hash: dict[str, int] = {}
|
|
211
|
+
by_tag: dict[str, set[int]] = {}
|
|
212
|
+
by_author: dict[str, set[int]] = {}
|
|
213
|
+
by_year: dict[str, set[int]] = {}
|
|
214
|
+
by_month: dict[str, set[int]] = {}
|
|
215
|
+
by_venue: dict[str, set[int]] = {}
|
|
216
|
+
|
|
217
|
+
md_path_by_hash: dict[str, Path] = {}
|
|
218
|
+
pdf_path_by_hash: dict[str, Path] = {}
|
|
219
|
+
|
|
220
|
+
md_file_index = _build_file_index(md_roots or [], suffixes={".md"})
|
|
221
|
+
pdf_file_index = _build_file_index(pdf_roots or [], suffixes={".pdf"})
|
|
222
|
+
|
|
223
|
+
year_counts: dict[str, int] = {}
|
|
224
|
+
month_counts: dict[str, int] = {}
|
|
225
|
+
tag_counts: dict[str, int] = {}
|
|
226
|
+
author_counts: dict[str, int] = {}
|
|
227
|
+
venue_counts: dict[str, int] = {}
|
|
228
|
+
|
|
229
|
+
def add_index(index: dict[str, set[int]], key: str, idx: int) -> None:
|
|
230
|
+
index.setdefault(key, set()).add(idx)
|
|
231
|
+
|
|
232
|
+
for idx, paper in enumerate(papers):
|
|
233
|
+
source_hash = paper.get("source_hash")
|
|
234
|
+
if not source_hash and paper.get("source_path"):
|
|
235
|
+
source_hash = stable_hash(str(paper.get("source_path")))
|
|
236
|
+
if source_hash:
|
|
237
|
+
id_by_hash[str(source_hash)] = idx
|
|
238
|
+
|
|
239
|
+
title = str(paper.get("paper_title") or "")
|
|
240
|
+
paper["_title_lc"] = title.lower()
|
|
241
|
+
|
|
242
|
+
bib_fields: dict[str, Any] = {}
|
|
243
|
+
if isinstance(paper.get("bibtex"), dict):
|
|
244
|
+
bib_fields = paper.get("bibtex", {}).get("fields", {}) or {}
|
|
245
|
+
|
|
246
|
+
year = None
|
|
247
|
+
if bib_fields.get("year") and str(bib_fields.get("year")).isdigit():
|
|
248
|
+
year = str(bib_fields.get("year"))
|
|
249
|
+
month = _normalize_month_token(bib_fields.get("month"))
|
|
250
|
+
if not year or not month:
|
|
251
|
+
parsed_year, parsed_month = _parse_year_month(str(paper.get("publication_date") or ""))
|
|
252
|
+
year = year or parsed_year
|
|
253
|
+
month = month or parsed_month
|
|
254
|
+
|
|
255
|
+
year_label = year or "Unknown"
|
|
256
|
+
month_label = month or "Unknown"
|
|
257
|
+
paper["_year"] = year_label
|
|
258
|
+
paper["_month"] = month_label
|
|
259
|
+
add_index(by_year, _normalize_key(year_label), idx)
|
|
260
|
+
add_index(by_month, _normalize_key(month_label), idx)
|
|
261
|
+
year_counts[year_label] = year_counts.get(year_label, 0) + 1
|
|
262
|
+
month_counts[month_label] = month_counts.get(month_label, 0) + 1
|
|
263
|
+
|
|
264
|
+
venue = _extract_venue(paper).strip()
|
|
265
|
+
paper["_venue"] = venue
|
|
266
|
+
if venue:
|
|
267
|
+
add_index(by_venue, _normalize_key(venue), idx)
|
|
268
|
+
venue_counts[venue] = venue_counts.get(venue, 0) + 1
|
|
269
|
+
else:
|
|
270
|
+
add_index(by_venue, "unknown", idx)
|
|
271
|
+
venue_counts["Unknown"] = venue_counts.get("Unknown", 0) + 1
|
|
272
|
+
|
|
273
|
+
authors = _extract_authors(paper)
|
|
274
|
+
paper["_authors"] = authors
|
|
275
|
+
for author in authors:
|
|
276
|
+
key = _normalize_key(author)
|
|
277
|
+
add_index(by_author, key, idx)
|
|
278
|
+
author_counts[author] = author_counts.get(author, 0) + 1
|
|
279
|
+
|
|
280
|
+
tags = _extract_tags(paper)
|
|
281
|
+
paper["_tags"] = tags
|
|
282
|
+
for tag in tags:
|
|
283
|
+
key = _normalize_key(tag)
|
|
284
|
+
add_index(by_tag, key, idx)
|
|
285
|
+
tag_counts[tag] = tag_counts.get(tag, 0) + 1
|
|
286
|
+
|
|
287
|
+
search_parts = [title, venue, " ".join(authors), " ".join(tags)]
|
|
288
|
+
paper["_search_lc"] = " ".join(part for part in search_parts if part).lower()
|
|
289
|
+
|
|
290
|
+
source_hash_str = str(source_hash) if source_hash else str(idx)
|
|
291
|
+
md_path = _resolve_source_md(paper, md_file_index)
|
|
292
|
+
if md_path is not None:
|
|
293
|
+
md_path_by_hash[source_hash_str] = md_path
|
|
294
|
+
pdf_path = _resolve_pdf(paper, pdf_file_index)
|
|
295
|
+
if pdf_path is not None:
|
|
296
|
+
pdf_path_by_hash[source_hash_str] = pdf_path
|
|
297
|
+
|
|
298
|
+
def year_sort_key(item: tuple[int, dict[str, Any]]) -> tuple[int, int, str]:
|
|
299
|
+
idx, paper = item
|
|
300
|
+
year_label = str(paper.get("_year") or "Unknown")
|
|
301
|
+
title_label = str(paper.get("paper_title") or "")
|
|
302
|
+
if year_label.isdigit():
|
|
303
|
+
return (0, -int(year_label), title_label.lower())
|
|
304
|
+
return (1, 0, title_label.lower())
|
|
305
|
+
|
|
306
|
+
ordered_ids = [idx for idx, _ in sorted(enumerate(papers), key=year_sort_key)]
|
|
307
|
+
|
|
308
|
+
stats = {
|
|
309
|
+
"total": len(papers),
|
|
310
|
+
"years": _sorted_counts(year_counts, numeric_desc=True),
|
|
311
|
+
"months": _sorted_month_counts(month_counts),
|
|
312
|
+
"tags": _sorted_counts(tag_counts),
|
|
313
|
+
"authors": _sorted_counts(author_counts),
|
|
314
|
+
"venues": _sorted_counts(venue_counts),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return PaperIndex(
|
|
318
|
+
papers=papers,
|
|
319
|
+
id_by_hash=id_by_hash,
|
|
320
|
+
ordered_ids=ordered_ids,
|
|
321
|
+
by_tag=by_tag,
|
|
322
|
+
by_author=by_author,
|
|
323
|
+
by_year=by_year,
|
|
324
|
+
by_month=by_month,
|
|
325
|
+
by_venue=by_venue,
|
|
326
|
+
stats=stats,
|
|
327
|
+
md_path_by_hash=md_path_by_hash,
|
|
328
|
+
pdf_path_by_hash=pdf_path_by_hash,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _sorted_counts(counts: dict[str, int], *, numeric_desc: bool = False) -> list[dict[str, Any]]:
|
|
333
|
+
items = list(counts.items())
|
|
334
|
+
if numeric_desc:
|
|
335
|
+
def key(item: tuple[str, int]) -> tuple[int, int]:
|
|
336
|
+
label, count = item
|
|
337
|
+
if label.isdigit():
|
|
338
|
+
return (0, -int(label))
|
|
339
|
+
return (1, 0)
|
|
340
|
+
items.sort(key=key)
|
|
341
|
+
else:
|
|
342
|
+
items.sort(key=lambda item: item[1], reverse=True)
|
|
343
|
+
return [{"label": k, "count": v} for k, v in items]
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _sorted_month_counts(counts: dict[str, int]) -> list[dict[str, Any]]:
|
|
347
|
+
def month_sort(label: str) -> int:
|
|
348
|
+
if label == "Unknown":
|
|
349
|
+
return 99
|
|
350
|
+
if label.isdigit():
|
|
351
|
+
return int(label)
|
|
352
|
+
return 98
|
|
353
|
+
|
|
354
|
+
items = sorted(counts.items(), key=lambda item: month_sort(item[0]))
|
|
355
|
+
return [{"label": k, "count": v} for k, v in items]
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
_TEMPLATE_INFER_IGNORE_KEYS = {
|
|
359
|
+
"source_path",
|
|
360
|
+
"source_hash",
|
|
361
|
+
"provider",
|
|
362
|
+
"model",
|
|
363
|
+
"extracted_at",
|
|
364
|
+
"truncation",
|
|
365
|
+
"output_language",
|
|
366
|
+
"prompt_template",
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _load_paper_inputs(paths: list[Path]) -> list[dict[str, Any]]:
|
|
371
|
+
inputs: list[dict[str, Any]] = []
|
|
372
|
+
for path in paths:
|
|
373
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
374
|
+
if isinstance(payload, list):
|
|
375
|
+
raise ValueError(
|
|
376
|
+
f"Input JSON must be an object with template_tag and papers (got array): {path}"
|
|
377
|
+
)
|
|
378
|
+
if not isinstance(payload, dict):
|
|
379
|
+
raise ValueError(f"Input JSON must be an object: {path}")
|
|
380
|
+
papers = payload.get("papers")
|
|
381
|
+
if not isinstance(papers, list):
|
|
382
|
+
raise ValueError(f"Input JSON missing papers list: {path}")
|
|
383
|
+
template_tag = payload.get("template_tag")
|
|
384
|
+
if not template_tag:
|
|
385
|
+
template_tag = _infer_template_tag(papers, path)
|
|
386
|
+
inputs.append({"template_tag": str(template_tag), "papers": papers})
|
|
387
|
+
return inputs
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _infer_template_tag(papers: list[dict[str, Any]], path: Path) -> str:
|
|
391
|
+
prompt_tags = {
|
|
392
|
+
str(paper.get("prompt_template"))
|
|
393
|
+
for paper in papers
|
|
394
|
+
if isinstance(paper, dict) and paper.get("prompt_template")
|
|
395
|
+
}
|
|
396
|
+
if len(prompt_tags) == 1:
|
|
397
|
+
return prompt_tags.pop()
|
|
398
|
+
|
|
399
|
+
sample = next((paper for paper in papers if isinstance(paper, dict)), None)
|
|
400
|
+
if sample is None:
|
|
401
|
+
raise ValueError(f"Input JSON has no paper objects to infer template_tag: {path}")
|
|
402
|
+
|
|
403
|
+
paper_keys = {key for key in sample.keys() if key not in _TEMPLATE_INFER_IGNORE_KEYS}
|
|
404
|
+
if not paper_keys:
|
|
405
|
+
raise ValueError(f"Input JSON papers have no keys to infer template_tag: {path}")
|
|
406
|
+
|
|
407
|
+
best_tag = None
|
|
408
|
+
best_score = -1
|
|
409
|
+
for name in list_template_names_in_registry_order():
|
|
410
|
+
schema = load_schema_for_template(name)
|
|
411
|
+
schema_keys = set((schema.get("properties") or {}).keys())
|
|
412
|
+
score = len(paper_keys & schema_keys)
|
|
413
|
+
if score > best_score:
|
|
414
|
+
best_score = score
|
|
415
|
+
best_tag = name
|
|
416
|
+
elif score == best_score:
|
|
417
|
+
if best_tag != "simple" and name == "simple":
|
|
418
|
+
best_tag = name
|
|
419
|
+
|
|
420
|
+
if not best_tag:
|
|
421
|
+
raise ValueError(f"Unable to infer template_tag from input JSON: {path}")
|
|
422
|
+
return best_tag
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _build_cache_meta(db_paths: list[Path], bibtex_path: Path | None) -> dict[str, Any]:
|
|
426
|
+
def file_meta(path: Path) -> dict[str, Any]:
|
|
427
|
+
try:
|
|
428
|
+
stats = path.stat()
|
|
429
|
+
except OSError as exc:
|
|
430
|
+
raise ValueError(f"Failed to read input metadata for cache: {path}") from exc
|
|
431
|
+
return {"path": str(path), "mtime": stats.st_mtime, "size": stats.st_size}
|
|
432
|
+
|
|
433
|
+
meta = {
|
|
434
|
+
"version": 1,
|
|
435
|
+
"inputs": [file_meta(path) for path in db_paths],
|
|
436
|
+
"bibtex": file_meta(bibtex_path) if bibtex_path else None,
|
|
437
|
+
}
|
|
438
|
+
return meta
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _load_cached_papers(cache_dir: Path, meta: dict[str, Any]) -> list[dict[str, Any]] | None:
|
|
442
|
+
meta_path = cache_dir / "db_serve_cache.meta.json"
|
|
443
|
+
data_path = cache_dir / "db_serve_cache.papers.json"
|
|
444
|
+
if not meta_path.exists() or not data_path.exists():
|
|
445
|
+
return None
|
|
446
|
+
try:
|
|
447
|
+
cached_meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
448
|
+
if cached_meta != meta:
|
|
449
|
+
return None
|
|
450
|
+
cached_papers = json.loads(data_path.read_text(encoding="utf-8"))
|
|
451
|
+
if not isinstance(cached_papers, list):
|
|
452
|
+
return None
|
|
453
|
+
return cached_papers
|
|
454
|
+
except Exception:
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _write_cached_papers(cache_dir: Path, meta: dict[str, Any], papers: list[dict[str, Any]]) -> None:
|
|
459
|
+
meta_path = cache_dir / "db_serve_cache.meta.json"
|
|
460
|
+
data_path = cache_dir / "db_serve_cache.papers.json"
|
|
461
|
+
meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
462
|
+
data_path.write_text(json.dumps(papers, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _load_or_merge_papers(
|
|
466
|
+
db_paths: list[Path],
|
|
467
|
+
bibtex_path: Path | None,
|
|
468
|
+
cache_dir: Path | None,
|
|
469
|
+
use_cache: bool,
|
|
470
|
+
) -> list[dict[str, Any]]:
|
|
471
|
+
cache_meta = None
|
|
472
|
+
if cache_dir and use_cache:
|
|
473
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
474
|
+
cache_meta = _build_cache_meta(db_paths, bibtex_path)
|
|
475
|
+
cached = _load_cached_papers(cache_dir, cache_meta)
|
|
476
|
+
if cached is not None:
|
|
477
|
+
return cached
|
|
478
|
+
|
|
479
|
+
inputs = _load_paper_inputs(db_paths)
|
|
480
|
+
if bibtex_path is not None:
|
|
481
|
+
for bundle in inputs:
|
|
482
|
+
enrich_with_bibtex(bundle["papers"], bibtex_path)
|
|
483
|
+
papers = _merge_paper_inputs(inputs)
|
|
484
|
+
|
|
485
|
+
if cache_dir and use_cache and cache_meta is not None:
|
|
486
|
+
_write_cached_papers(cache_dir, cache_meta, papers)
|
|
487
|
+
return papers
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _md_renderer() -> MarkdownIt:
|
|
491
|
+
return MarkdownIt("commonmark", {"html": False, "linkify": True})
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _normalize_merge_title(value: str | None) -> str | None:
|
|
495
|
+
if not value:
|
|
496
|
+
return None
|
|
497
|
+
return str(value).replace("{", "").replace("}", "").strip().lower()
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _extract_bibtex_title(paper: dict[str, Any]) -> str | None:
|
|
501
|
+
if not isinstance(paper.get("bibtex"), dict):
|
|
502
|
+
return None
|
|
503
|
+
fields = paper.get("bibtex", {}).get("fields", {}) or {}
|
|
504
|
+
return _normalize_merge_title(fields.get("title"))
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def _extract_paper_title(paper: dict[str, Any]) -> str | None:
|
|
508
|
+
return _normalize_merge_title(paper.get("paper_title"))
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _available_templates(paper: dict[str, Any]) -> list[str]:
|
|
512
|
+
templates = paper.get("templates")
|
|
513
|
+
if not isinstance(templates, dict):
|
|
514
|
+
return []
|
|
515
|
+
order = paper.get("template_order") or list(templates.keys())
|
|
516
|
+
seen: set[str] = set()
|
|
517
|
+
available: list[str] = []
|
|
518
|
+
for tag in order:
|
|
519
|
+
if tag in templates and tag not in seen:
|
|
520
|
+
available.append(tag)
|
|
521
|
+
seen.add(tag)
|
|
522
|
+
for tag in templates:
|
|
523
|
+
if tag not in seen:
|
|
524
|
+
available.append(tag)
|
|
525
|
+
seen.add(tag)
|
|
526
|
+
return available
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _select_template_tag(
|
|
530
|
+
paper: dict[str, Any], requested: str | None
|
|
531
|
+
) -> tuple[str | None, list[str]]:
|
|
532
|
+
available = _available_templates(paper)
|
|
533
|
+
if not available:
|
|
534
|
+
return None, []
|
|
535
|
+
default_tag = paper.get("default_template")
|
|
536
|
+
if not default_tag:
|
|
537
|
+
default_tag = "simple" if "simple" in available else available[0]
|
|
538
|
+
selected = requested if requested in available else default_tag
|
|
539
|
+
return selected, available
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def _titles_match(group: dict[str, Any], paper: dict[str, Any], *, threshold: float) -> bool:
|
|
543
|
+
bib_title = _extract_bibtex_title(paper)
|
|
544
|
+
group_bib = group.get("_merge_bibtex_titles") or set()
|
|
545
|
+
if bib_title and group_bib:
|
|
546
|
+
return any(_title_similarity(bib_title, existing) >= threshold for existing in group_bib)
|
|
547
|
+
|
|
548
|
+
paper_title = _extract_paper_title(paper)
|
|
549
|
+
group_titles = group.get("_merge_paper_titles") or set()
|
|
550
|
+
if paper_title and group_titles:
|
|
551
|
+
return any(_title_similarity(paper_title, existing) >= threshold for existing in group_titles)
|
|
552
|
+
return False
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _add_merge_titles(group: dict[str, Any], paper: dict[str, Any]) -> None:
|
|
556
|
+
bib_title = _extract_bibtex_title(paper)
|
|
557
|
+
if bib_title:
|
|
558
|
+
group.setdefault("_merge_bibtex_titles", set()).add(bib_title)
|
|
559
|
+
paper_title = _extract_paper_title(paper)
|
|
560
|
+
if paper_title:
|
|
561
|
+
group.setdefault("_merge_paper_titles", set()).add(paper_title)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _merge_paper_inputs(inputs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
565
|
+
merged: list[dict[str, Any]] = []
|
|
566
|
+
threshold = 0.95
|
|
567
|
+
prefix_len = 5
|
|
568
|
+
bibtex_exact: dict[str, set[int]] = {}
|
|
569
|
+
bibtex_prefix: dict[str, set[int]] = {}
|
|
570
|
+
paper_exact: dict[str, set[int]] = {}
|
|
571
|
+
paper_prefix: dict[str, set[int]] = {}
|
|
572
|
+
|
|
573
|
+
def prefix_key(value: str) -> str:
|
|
574
|
+
return value[:prefix_len] if len(value) >= prefix_len else value
|
|
575
|
+
|
|
576
|
+
def add_index(
|
|
577
|
+
value: str,
|
|
578
|
+
exact_index: dict[str, set[int]],
|
|
579
|
+
prefix_index: dict[str, set[int]],
|
|
580
|
+
idx: int,
|
|
581
|
+
) -> None:
|
|
582
|
+
exact_index.setdefault(value, set()).add(idx)
|
|
583
|
+
prefix_index.setdefault(prefix_key(value), set()).add(idx)
|
|
584
|
+
|
|
585
|
+
def candidate_ids(bib_title: str | None, paper_title: str | None) -> list[int]:
|
|
586
|
+
ids: set[int] = set()
|
|
587
|
+
if bib_title:
|
|
588
|
+
ids |= bibtex_exact.get(bib_title, set())
|
|
589
|
+
ids |= bibtex_prefix.get(prefix_key(bib_title), set())
|
|
590
|
+
if paper_title:
|
|
591
|
+
ids |= paper_exact.get(paper_title, set())
|
|
592
|
+
ids |= paper_prefix.get(prefix_key(paper_title), set())
|
|
593
|
+
return sorted(ids)
|
|
594
|
+
|
|
595
|
+
for bundle in inputs:
|
|
596
|
+
template_tag = bundle.get("template_tag")
|
|
597
|
+
papers = bundle.get("papers") or []
|
|
598
|
+
for paper in papers:
|
|
599
|
+
if not isinstance(paper, dict):
|
|
600
|
+
raise ValueError("Input papers must be objects")
|
|
601
|
+
bib_title = _extract_bibtex_title(paper)
|
|
602
|
+
paper_title = _extract_paper_title(paper)
|
|
603
|
+
match = None
|
|
604
|
+
match_idx = None
|
|
605
|
+
for idx in candidate_ids(bib_title, paper_title):
|
|
606
|
+
candidate = merged[idx]
|
|
607
|
+
if _titles_match(candidate, paper, threshold=threshold):
|
|
608
|
+
match = candidate
|
|
609
|
+
match_idx = idx
|
|
610
|
+
break
|
|
611
|
+
if match is None:
|
|
612
|
+
group = {
|
|
613
|
+
"templates": {template_tag: paper},
|
|
614
|
+
"template_order": [template_tag],
|
|
615
|
+
}
|
|
616
|
+
_add_merge_titles(group, paper)
|
|
617
|
+
merged.append(group)
|
|
618
|
+
group_idx = len(merged) - 1
|
|
619
|
+
if bib_title:
|
|
620
|
+
add_index(bib_title, bibtex_exact, bibtex_prefix, group_idx)
|
|
621
|
+
if paper_title:
|
|
622
|
+
add_index(paper_title, paper_exact, paper_prefix, group_idx)
|
|
623
|
+
else:
|
|
624
|
+
templates = match.setdefault("templates", {})
|
|
625
|
+
templates[template_tag] = paper
|
|
626
|
+
order = match.setdefault("template_order", [])
|
|
627
|
+
if template_tag not in order:
|
|
628
|
+
order.append(template_tag)
|
|
629
|
+
_add_merge_titles(match, paper)
|
|
630
|
+
if match_idx is not None:
|
|
631
|
+
if bib_title:
|
|
632
|
+
add_index(bib_title, bibtex_exact, bibtex_prefix, match_idx)
|
|
633
|
+
if paper_title:
|
|
634
|
+
add_index(paper_title, paper_exact, paper_prefix, match_idx)
|
|
635
|
+
|
|
636
|
+
for group in merged:
|
|
637
|
+
templates = group.get("templates") or {}
|
|
638
|
+
order = group.get("template_order") or list(templates.keys())
|
|
639
|
+
default_tag = "simple" if "simple" in order else (order[0] if order else None)
|
|
640
|
+
group["default_template"] = default_tag
|
|
641
|
+
if default_tag and default_tag in templates:
|
|
642
|
+
base = templates[default_tag]
|
|
643
|
+
for key, value in base.items():
|
|
644
|
+
group[key] = value
|
|
645
|
+
group.pop("_merge_bibtex_titles", None)
|
|
646
|
+
group.pop("_merge_paper_titles", None)
|
|
647
|
+
return merged
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
|
|
651
|
+
rendered, table_placeholders = _extract_html_table_placeholders(text)
|
|
652
|
+
rendered, img_placeholders = _extract_html_img_placeholders(rendered)
|
|
653
|
+
rendered, placeholders = _extract_math_placeholders(rendered)
|
|
654
|
+
html_out = md.render(rendered)
|
|
655
|
+
for key, value in placeholders.items():
|
|
656
|
+
html_out = html_out.replace(key, html.escape(value))
|
|
657
|
+
for key, value in img_placeholders.items():
|
|
658
|
+
html_out = re.sub(rf"<p>\s*{re.escape(key)}\s*</p>", lambda _: value, html_out)
|
|
659
|
+
html_out = html_out.replace(key, value)
|
|
660
|
+
for key, value in table_placeholders.items():
|
|
661
|
+
safe_html = _sanitize_table_html(value)
|
|
662
|
+
html_out = re.sub(rf"<p>\s*{re.escape(key)}\s*</p>", lambda _: safe_html, html_out)
|
|
663
|
+
return html_out
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _extract_math_placeholders(text: str) -> tuple[str, dict[str, str]]:
|
|
667
|
+
placeholders: dict[str, str] = {}
|
|
668
|
+
out: list[str] = []
|
|
669
|
+
idx = 0
|
|
670
|
+
in_fence = False
|
|
671
|
+
fence_char = ""
|
|
672
|
+
fence_len = 0
|
|
673
|
+
inline_delim_len = 0
|
|
674
|
+
|
|
675
|
+
def next_placeholder(value: str) -> str:
|
|
676
|
+
key = f"@@MATH_{len(placeholders)}@@"
|
|
677
|
+
placeholders[key] = value
|
|
678
|
+
return key
|
|
679
|
+
|
|
680
|
+
while idx < len(text):
|
|
681
|
+
at_line_start = idx == 0 or text[idx - 1] == "\n"
|
|
682
|
+
|
|
683
|
+
if inline_delim_len == 0 and at_line_start:
|
|
684
|
+
line_end = text.find("\n", idx)
|
|
685
|
+
if line_end == -1:
|
|
686
|
+
line_end = len(text)
|
|
687
|
+
line = text[idx:line_end]
|
|
688
|
+
stripped = line.lstrip(" ")
|
|
689
|
+
leading_spaces = len(line) - len(stripped)
|
|
690
|
+
if leading_spaces <= 3 and stripped:
|
|
691
|
+
first = stripped[0]
|
|
692
|
+
if first in {"`", "~"}:
|
|
693
|
+
run_len = 0
|
|
694
|
+
while run_len < len(stripped) and stripped[run_len] == first:
|
|
695
|
+
run_len += 1
|
|
696
|
+
if run_len >= 3:
|
|
697
|
+
if not in_fence:
|
|
698
|
+
in_fence = True
|
|
699
|
+
fence_char = first
|
|
700
|
+
fence_len = run_len
|
|
701
|
+
elif first == fence_char and run_len >= fence_len:
|
|
702
|
+
in_fence = False
|
|
703
|
+
fence_char = ""
|
|
704
|
+
fence_len = 0
|
|
705
|
+
out.append(line)
|
|
706
|
+
idx = line_end
|
|
707
|
+
continue
|
|
708
|
+
|
|
709
|
+
if in_fence:
|
|
710
|
+
out.append(text[idx])
|
|
711
|
+
idx += 1
|
|
712
|
+
continue
|
|
713
|
+
|
|
714
|
+
if inline_delim_len > 0:
|
|
715
|
+
delim = "`" * inline_delim_len
|
|
716
|
+
if text.startswith(delim, idx):
|
|
717
|
+
out.append(delim)
|
|
718
|
+
idx += inline_delim_len
|
|
719
|
+
inline_delim_len = 0
|
|
720
|
+
continue
|
|
721
|
+
out.append(text[idx])
|
|
722
|
+
idx += 1
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
ch = text[idx]
|
|
726
|
+
if ch == "`":
|
|
727
|
+
run_len = 0
|
|
728
|
+
while idx + run_len < len(text) and text[idx + run_len] == "`":
|
|
729
|
+
run_len += 1
|
|
730
|
+
inline_delim_len = run_len
|
|
731
|
+
out.append("`" * run_len)
|
|
732
|
+
idx += run_len
|
|
733
|
+
continue
|
|
734
|
+
|
|
735
|
+
# Block math: $$...$$ (can span lines)
|
|
736
|
+
if text.startswith("$$", idx) and (idx == 0 or text[idx - 1] != "\\"):
|
|
737
|
+
search_from = idx + 2
|
|
738
|
+
end = text.find("$$", search_from)
|
|
739
|
+
while end != -1 and text[end - 1] == "\\":
|
|
740
|
+
search_from = end + 2
|
|
741
|
+
end = text.find("$$", search_from)
|
|
742
|
+
if end != -1:
|
|
743
|
+
out.append(next_placeholder(text[idx : end + 2]))
|
|
744
|
+
idx = end + 2
|
|
745
|
+
continue
|
|
746
|
+
|
|
747
|
+
# Inline math: $...$ (single-line)
|
|
748
|
+
if ch == "$" and not text.startswith("$$", idx) and (idx == 0 or text[idx - 1] != "\\"):
|
|
749
|
+
search_from = idx + 1
|
|
750
|
+
end = text.find("$", search_from)
|
|
751
|
+
while end != -1 and text[end - 1] == "\\":
|
|
752
|
+
search_from = end + 1
|
|
753
|
+
end = text.find("$", search_from)
|
|
754
|
+
if end != -1:
|
|
755
|
+
out.append(next_placeholder(text[idx : end + 1]))
|
|
756
|
+
idx = end + 1
|
|
757
|
+
continue
|
|
758
|
+
|
|
759
|
+
out.append(ch)
|
|
760
|
+
idx += 1
|
|
761
|
+
|
|
762
|
+
return "".join(out), placeholders
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
class _TableSanitizer(HTMLParser):
|
|
766
|
+
def __init__(self) -> None:
|
|
767
|
+
super().__init__(convert_charrefs=True)
|
|
768
|
+
self._out: list[str] = []
|
|
769
|
+
self._stack: list[str] = []
|
|
770
|
+
|
|
771
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
772
|
+
t = tag.lower()
|
|
773
|
+
if t not in {
|
|
774
|
+
"table",
|
|
775
|
+
"thead",
|
|
776
|
+
"tbody",
|
|
777
|
+
"tfoot",
|
|
778
|
+
"tr",
|
|
779
|
+
"th",
|
|
780
|
+
"td",
|
|
781
|
+
"caption",
|
|
782
|
+
"colgroup",
|
|
783
|
+
"col",
|
|
784
|
+
"br",
|
|
785
|
+
}:
|
|
786
|
+
return
|
|
787
|
+
|
|
788
|
+
allowed: dict[str, str] = {}
|
|
789
|
+
for name, value in attrs:
|
|
790
|
+
if value is None:
|
|
791
|
+
continue
|
|
792
|
+
n = name.lower()
|
|
793
|
+
v = value.strip()
|
|
794
|
+
if t in {"td", "th"} and n in {"colspan", "rowspan"} and v.isdigit():
|
|
795
|
+
allowed[n] = v
|
|
796
|
+
elif t in {"td", "th"} and n == "align" and v.lower() in {"left", "right", "center"}:
|
|
797
|
+
allowed[n] = v.lower()
|
|
798
|
+
|
|
799
|
+
attr_text = "".join(f' {k}="{html.escape(v, quote=True)}"' for k, v in allowed.items())
|
|
800
|
+
self._out.append(f"<{t}{attr_text}>")
|
|
801
|
+
if t not in {"br", "col"}:
|
|
802
|
+
self._stack.append(t)
|
|
803
|
+
|
|
804
|
+
def handle_endtag(self, tag: str) -> None:
|
|
805
|
+
t = tag.lower()
|
|
806
|
+
if t not in self._stack:
|
|
807
|
+
return
|
|
808
|
+
while self._stack:
|
|
809
|
+
popped = self._stack.pop()
|
|
810
|
+
self._out.append(f"</{popped}>")
|
|
811
|
+
if popped == t:
|
|
812
|
+
break
|
|
813
|
+
|
|
814
|
+
def handle_data(self, data: str) -> None:
|
|
815
|
+
self._out.append(html.escape(data))
|
|
816
|
+
|
|
817
|
+
def handle_entityref(self, name: str) -> None:
|
|
818
|
+
self._out.append(f"&{name};")
|
|
819
|
+
|
|
820
|
+
def handle_charref(self, name: str) -> None:
|
|
821
|
+
self._out.append(f"&#{name};")
|
|
822
|
+
|
|
823
|
+
def close(self) -> None:
|
|
824
|
+
super().close()
|
|
825
|
+
while self._stack:
|
|
826
|
+
self._out.append(f"</{self._stack.pop()}>")
|
|
827
|
+
|
|
828
|
+
def get_html(self) -> str:
|
|
829
|
+
return "".join(self._out)
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def _sanitize_table_html(raw: str) -> str:
|
|
833
|
+
parser = _TableSanitizer()
|
|
834
|
+
try:
|
|
835
|
+
parser.feed(raw)
|
|
836
|
+
parser.close()
|
|
837
|
+
except Exception:
|
|
838
|
+
return f"<pre><code>{html.escape(raw)}</code></pre>"
|
|
839
|
+
return parser.get_html()
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
def _sanitize_img_html(raw: str) -> str | None:
|
|
843
|
+
attrs = {}
|
|
844
|
+
for match in re.finditer(r"(\w+)\s*=\s*(\"[^\"]*\"|'[^']*'|[^\s>]+)", raw):
|
|
845
|
+
name = match.group(1).lower()
|
|
846
|
+
value = match.group(2).strip()
|
|
847
|
+
if value and value[0] in {"\"", "'"} and value[-1] == value[0]:
|
|
848
|
+
value = value[1:-1]
|
|
849
|
+
attrs[name] = value
|
|
850
|
+
|
|
851
|
+
src = attrs.get("src", "")
|
|
852
|
+
src_lower = src.lower()
|
|
853
|
+
if not src_lower.startswith("data:image/") or ";base64," not in src_lower:
|
|
854
|
+
return None
|
|
855
|
+
|
|
856
|
+
alt = attrs.get("alt", "")
|
|
857
|
+
alt_attr = f' alt="{html.escape(alt, quote=True)}"' if alt else ""
|
|
858
|
+
return f'<img src="{html.escape(src, quote=True)}"{alt_attr} />'
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _extract_html_img_placeholders(text: str) -> tuple[str, dict[str, str]]:
|
|
862
|
+
placeholders: dict[str, str] = {}
|
|
863
|
+
out: list[str] = []
|
|
864
|
+
idx = 0
|
|
865
|
+
in_fence = False
|
|
866
|
+
fence_char = ""
|
|
867
|
+
fence_len = 0
|
|
868
|
+
inline_delim_len = 0
|
|
869
|
+
|
|
870
|
+
def next_placeholder(value: str) -> str:
|
|
871
|
+
key = f"@@HTML_IMG_{len(placeholders)}@@"
|
|
872
|
+
placeholders[key] = value
|
|
873
|
+
return key
|
|
874
|
+
|
|
875
|
+
lower = text.lower()
|
|
876
|
+
while idx < len(text):
|
|
877
|
+
at_line_start = idx == 0 or text[idx - 1] == "\n"
|
|
878
|
+
|
|
879
|
+
if inline_delim_len == 0 and at_line_start:
|
|
880
|
+
line_end = text.find("\n", idx)
|
|
881
|
+
if line_end == -1:
|
|
882
|
+
line_end = len(text)
|
|
883
|
+
line = text[idx:line_end]
|
|
884
|
+
stripped = line.lstrip(" ")
|
|
885
|
+
leading_spaces = len(line) - len(stripped)
|
|
886
|
+
if leading_spaces <= 3 and stripped:
|
|
887
|
+
first = stripped[0]
|
|
888
|
+
if first in {"`", "~"}:
|
|
889
|
+
run_len = 0
|
|
890
|
+
while run_len < len(stripped) and stripped[run_len] == first:
|
|
891
|
+
run_len += 1
|
|
892
|
+
if run_len >= 3:
|
|
893
|
+
if not in_fence:
|
|
894
|
+
in_fence = True
|
|
895
|
+
fence_char = first
|
|
896
|
+
fence_len = run_len
|
|
897
|
+
elif first == fence_char and run_len >= fence_len:
|
|
898
|
+
in_fence = False
|
|
899
|
+
fence_char = ""
|
|
900
|
+
fence_len = 0
|
|
901
|
+
out.append(line)
|
|
902
|
+
idx = line_end
|
|
903
|
+
continue
|
|
904
|
+
|
|
905
|
+
if in_fence:
|
|
906
|
+
out.append(text[idx])
|
|
907
|
+
idx += 1
|
|
908
|
+
continue
|
|
909
|
+
|
|
910
|
+
if inline_delim_len > 0:
|
|
911
|
+
delim = "`" * inline_delim_len
|
|
912
|
+
if text.startswith(delim, idx):
|
|
913
|
+
out.append(delim)
|
|
914
|
+
idx += inline_delim_len
|
|
915
|
+
inline_delim_len = 0
|
|
916
|
+
continue
|
|
917
|
+
out.append(text[idx])
|
|
918
|
+
idx += 1
|
|
919
|
+
continue
|
|
920
|
+
|
|
921
|
+
if text[idx] == "`":
|
|
922
|
+
run_len = 0
|
|
923
|
+
while idx + run_len < len(text) and text[idx + run_len] == "`":
|
|
924
|
+
run_len += 1
|
|
925
|
+
inline_delim_len = run_len
|
|
926
|
+
out.append("`" * run_len)
|
|
927
|
+
idx += run_len
|
|
928
|
+
continue
|
|
929
|
+
|
|
930
|
+
if lower.startswith("<img", idx):
|
|
931
|
+
end = text.find(">", idx)
|
|
932
|
+
if end != -1:
|
|
933
|
+
raw = text[idx : end + 1]
|
|
934
|
+
safe_html = _sanitize_img_html(raw)
|
|
935
|
+
if safe_html:
|
|
936
|
+
out.append(next_placeholder(safe_html))
|
|
937
|
+
idx = end + 1
|
|
938
|
+
continue
|
|
939
|
+
|
|
940
|
+
out.append(text[idx])
|
|
941
|
+
idx += 1
|
|
942
|
+
|
|
943
|
+
return "".join(out), placeholders
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def _extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
|
|
947
|
+
placeholders: dict[str, str] = {}
|
|
948
|
+
out: list[str] = []
|
|
949
|
+
idx = 0
|
|
950
|
+
in_fence = False
|
|
951
|
+
fence_char = ""
|
|
952
|
+
fence_len = 0
|
|
953
|
+
inline_delim_len = 0
|
|
954
|
+
|
|
955
|
+
def next_placeholder(value: str) -> str:
|
|
956
|
+
key = f"@@HTML_TABLE_{len(placeholders)}@@"
|
|
957
|
+
placeholders[key] = value
|
|
958
|
+
return key
|
|
959
|
+
|
|
960
|
+
lower = text.lower()
|
|
961
|
+
while idx < len(text):
|
|
962
|
+
at_line_start = idx == 0 or text[idx - 1] == "\n"
|
|
963
|
+
|
|
964
|
+
if inline_delim_len == 0 and at_line_start:
|
|
965
|
+
line_end = text.find("\n", idx)
|
|
966
|
+
if line_end == -1:
|
|
967
|
+
line_end = len(text)
|
|
968
|
+
line = text[idx:line_end]
|
|
969
|
+
stripped = line.lstrip(" ")
|
|
970
|
+
leading_spaces = len(line) - len(stripped)
|
|
971
|
+
if leading_spaces <= 3 and stripped:
|
|
972
|
+
first = stripped[0]
|
|
973
|
+
if first in {"`", "~"}:
|
|
974
|
+
run_len = 0
|
|
975
|
+
while run_len < len(stripped) and stripped[run_len] == first:
|
|
976
|
+
run_len += 1
|
|
977
|
+
if run_len >= 3:
|
|
978
|
+
if not in_fence:
|
|
979
|
+
in_fence = True
|
|
980
|
+
fence_char = first
|
|
981
|
+
fence_len = run_len
|
|
982
|
+
elif first == fence_char and run_len >= fence_len:
|
|
983
|
+
in_fence = False
|
|
984
|
+
fence_char = ""
|
|
985
|
+
fence_len = 0
|
|
986
|
+
out.append(line)
|
|
987
|
+
idx = line_end
|
|
988
|
+
continue
|
|
989
|
+
|
|
990
|
+
if in_fence:
|
|
991
|
+
out.append(text[idx])
|
|
992
|
+
idx += 1
|
|
993
|
+
continue
|
|
994
|
+
|
|
995
|
+
if inline_delim_len > 0:
|
|
996
|
+
delim = "`" * inline_delim_len
|
|
997
|
+
if text.startswith(delim, idx):
|
|
998
|
+
out.append(delim)
|
|
999
|
+
idx += inline_delim_len
|
|
1000
|
+
inline_delim_len = 0
|
|
1001
|
+
continue
|
|
1002
|
+
out.append(text[idx])
|
|
1003
|
+
idx += 1
|
|
1004
|
+
continue
|
|
1005
|
+
|
|
1006
|
+
if text[idx] == "`":
|
|
1007
|
+
run_len = 0
|
|
1008
|
+
while idx + run_len < len(text) and text[idx + run_len] == "`":
|
|
1009
|
+
run_len += 1
|
|
1010
|
+
inline_delim_len = run_len
|
|
1011
|
+
out.append("`" * run_len)
|
|
1012
|
+
idx += run_len
|
|
1013
|
+
continue
|
|
1014
|
+
|
|
1015
|
+
if lower.startswith("<table", idx):
|
|
1016
|
+
end = lower.find("</table>", idx)
|
|
1017
|
+
if end != -1:
|
|
1018
|
+
end += len("</table>")
|
|
1019
|
+
raw = text[idx:end]
|
|
1020
|
+
key = next_placeholder(raw)
|
|
1021
|
+
if out and not out[-1].endswith("\n"):
|
|
1022
|
+
out.append("\n\n")
|
|
1023
|
+
out.append(key)
|
|
1024
|
+
out.append("\n\n")
|
|
1025
|
+
idx = end
|
|
1026
|
+
continue
|
|
1027
|
+
|
|
1028
|
+
out.append(text[idx])
|
|
1029
|
+
idx += 1
|
|
1030
|
+
|
|
1031
|
+
return "".join(out), placeholders
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
def _render_paper_markdown(
|
|
1035
|
+
paper: dict[str, Any],
|
|
1036
|
+
fallback_language: str,
|
|
1037
|
+
*,
|
|
1038
|
+
template_tag: str | None = None,
|
|
1039
|
+
) -> tuple[str, str, str | None]:
|
|
1040
|
+
selected_tag, _ = _select_template_tag(paper, template_tag)
|
|
1041
|
+
selected_paper = paper
|
|
1042
|
+
if selected_tag:
|
|
1043
|
+
selected_paper = (paper.get("templates") or {}).get(selected_tag, paper)
|
|
1044
|
+
|
|
1045
|
+
template_name = selected_tag or selected_paper.get("prompt_template")
|
|
1046
|
+
warning = None
|
|
1047
|
+
if template_name:
|
|
1048
|
+
try:
|
|
1049
|
+
template = load_render_template(str(template_name))
|
|
1050
|
+
except Exception:
|
|
1051
|
+
template = load_default_template()
|
|
1052
|
+
warning = "Rendered using default template (missing template)."
|
|
1053
|
+
template_name = "default_paper"
|
|
1054
|
+
else:
|
|
1055
|
+
template = load_default_template()
|
|
1056
|
+
warning = "Rendered using default template (no template specified)."
|
|
1057
|
+
template_name = "default_paper"
|
|
1058
|
+
|
|
1059
|
+
context = dict(selected_paper)
|
|
1060
|
+
if not context.get("output_language"):
|
|
1061
|
+
context["output_language"] = fallback_language
|
|
1062
|
+
return template.render(**context), str(template_name), warning
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _build_file_index(roots: list[Path], *, suffixes: set[str]) -> dict[str, list[Path]]:
|
|
1066
|
+
index: dict[str, list[Path]] = {}
|
|
1067
|
+
for root in roots:
|
|
1068
|
+
try:
|
|
1069
|
+
if not root.exists() or not root.is_dir():
|
|
1070
|
+
continue
|
|
1071
|
+
except OSError:
|
|
1072
|
+
continue
|
|
1073
|
+
for path in root.rglob("*"):
|
|
1074
|
+
try:
|
|
1075
|
+
if not path.is_file():
|
|
1076
|
+
continue
|
|
1077
|
+
except OSError:
|
|
1078
|
+
continue
|
|
1079
|
+
if path.suffix.lower() not in suffixes:
|
|
1080
|
+
continue
|
|
1081
|
+
index.setdefault(path.name.lower(), []).append(path.resolve())
|
|
1082
|
+
return index
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def _resolve_source_md(paper: dict[str, Any], md_index: dict[str, list[Path]]) -> Path | None:
|
|
1086
|
+
source_path = paper.get("source_path")
|
|
1087
|
+
if not source_path:
|
|
1088
|
+
return None
|
|
1089
|
+
name = Path(str(source_path)).name.lower()
|
|
1090
|
+
candidates = md_index.get(name, [])
|
|
1091
|
+
return candidates[0] if candidates else None
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
def _guess_pdf_names(paper: dict[str, Any]) -> list[str]:
|
|
1095
|
+
source_path = paper.get("source_path")
|
|
1096
|
+
if not source_path:
|
|
1097
|
+
return []
|
|
1098
|
+
name = Path(str(source_path)).name
|
|
1099
|
+
match = re.match(r"(?i)(.+\\.pdf)(?:-[0-9a-f\\-]{8,})?\\.md$", name)
|
|
1100
|
+
if match:
|
|
1101
|
+
return [Path(match.group(1)).name]
|
|
1102
|
+
if ".pdf-" in name.lower():
|
|
1103
|
+
base = name[: name.lower().rfind(".pdf-") + 4]
|
|
1104
|
+
return [Path(base).name]
|
|
1105
|
+
if name.lower().endswith(".pdf.md"):
|
|
1106
|
+
return [name[:-3]]
|
|
1107
|
+
return []
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def _resolve_pdf(paper: dict[str, Any], pdf_index: dict[str, list[Path]]) -> Path | None:
|
|
1111
|
+
for filename in _guess_pdf_names(paper):
|
|
1112
|
+
candidates = pdf_index.get(filename.lower(), [])
|
|
1113
|
+
if candidates:
|
|
1114
|
+
return candidates[0]
|
|
1115
|
+
return None
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def _ensure_under_roots(path: Path, roots: list[Path]) -> bool:
|
|
1119
|
+
resolved = path.resolve()
|
|
1120
|
+
for root in roots:
|
|
1121
|
+
try:
|
|
1122
|
+
resolved.relative_to(root.resolve())
|
|
1123
|
+
return True
|
|
1124
|
+
except Exception:
|
|
1125
|
+
continue
|
|
1126
|
+
return False
|
|
1127
|
+
|
|
1128
|
+
|
|
1129
|
+
def _apply_query(index: PaperIndex, query: Query) -> set[int]:
|
|
1130
|
+
all_ids = set(index.ordered_ids)
|
|
1131
|
+
|
|
1132
|
+
def ids_for_term(term: QueryTerm, base: set[int]) -> set[int]:
|
|
1133
|
+
value_lc = term.value.lower()
|
|
1134
|
+
if term.field is None:
|
|
1135
|
+
return {idx for idx in base if value_lc in str(index.papers[idx].get("_search_lc") or "")}
|
|
1136
|
+
if term.field == "title":
|
|
1137
|
+
return {idx for idx in base if value_lc in str(index.papers[idx].get("_title_lc") or "")}
|
|
1138
|
+
if term.field == "venue":
|
|
1139
|
+
return {idx for idx in base if value_lc in str(index.papers[idx].get("_venue") or "").lower()}
|
|
1140
|
+
if term.field == "tag":
|
|
1141
|
+
exact = index.by_tag.get(value_lc)
|
|
1142
|
+
if exact is not None:
|
|
1143
|
+
return exact & base
|
|
1144
|
+
return {idx for idx in base if any(value_lc in t.lower() for t in (index.papers[idx].get("_tags") or []))}
|
|
1145
|
+
if term.field == "author":
|
|
1146
|
+
exact = index.by_author.get(value_lc)
|
|
1147
|
+
if exact is not None:
|
|
1148
|
+
return exact & base
|
|
1149
|
+
return {idx for idx in base if any(value_lc in a.lower() for a in (index.papers[idx].get("_authors") or []))}
|
|
1150
|
+
if term.field == "month":
|
|
1151
|
+
exact = index.by_month.get(value_lc)
|
|
1152
|
+
if exact is not None:
|
|
1153
|
+
return exact & base
|
|
1154
|
+
return {idx for idx in base if value_lc == str(index.papers[idx].get("_month") or "").lower()}
|
|
1155
|
+
if term.field == "year":
|
|
1156
|
+
if ".." in term.value:
|
|
1157
|
+
start_str, end_str = term.value.split("..", 1)
|
|
1158
|
+
if start_str.strip().isdigit() and end_str.strip().isdigit():
|
|
1159
|
+
start = int(start_str.strip())
|
|
1160
|
+
end = int(end_str.strip())
|
|
1161
|
+
ids: set[int] = set()
|
|
1162
|
+
for y in range(min(start, end), max(start, end) + 1):
|
|
1163
|
+
ids |= index.by_year.get(str(y), set())
|
|
1164
|
+
return ids & base
|
|
1165
|
+
exact = index.by_year.get(value_lc)
|
|
1166
|
+
if exact is not None:
|
|
1167
|
+
return exact & base
|
|
1168
|
+
return {idx for idx in base if value_lc in str(index.papers[idx].get("_year") or "").lower()}
|
|
1169
|
+
return set()
|
|
1170
|
+
|
|
1171
|
+
result: set[int] = set()
|
|
1172
|
+
for group in query.groups:
|
|
1173
|
+
group_ids = set(all_ids)
|
|
1174
|
+
for term in group:
|
|
1175
|
+
matched = ids_for_term(term, group_ids if not term.negated else all_ids)
|
|
1176
|
+
if term.negated:
|
|
1177
|
+
group_ids -= matched
|
|
1178
|
+
else:
|
|
1179
|
+
group_ids &= matched
|
|
1180
|
+
result |= group_ids
|
|
1181
|
+
|
|
1182
|
+
return result
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
def _page_shell(title: str, body_html: str, extra_head: str = "", extra_scripts: str = "") -> str:
|
|
1186
|
+
return f"""<!doctype html>
|
|
1187
|
+
<html lang="en">
|
|
1188
|
+
<head>
|
|
1189
|
+
<meta charset="utf-8" />
|
|
1190
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
1191
|
+
<title>{html.escape(title)}</title>
|
|
1192
|
+
<style>
|
|
1193
|
+
body {{ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; margin: 0; }}
|
|
1194
|
+
header {{ position: sticky; top: 0; background: #0b1220; color: #fff; padding: 12px 16px; z-index: 10; }}
|
|
1195
|
+
header a {{ color: #cfe3ff; text-decoration: none; margin-right: 12px; }}
|
|
1196
|
+
.container {{ max-width: 1100px; margin: 0 auto; padding: 16px; }}
|
|
1197
|
+
.filters {{ display: grid; grid-template-columns: repeat(6, 1fr); gap: 8px; margin: 12px 0 16px; }}
|
|
1198
|
+
.filters input {{ width: 100%; padding: 8px; border: 1px solid #d0d7de; border-radius: 6px; }}
|
|
1199
|
+
.card {{ border: 1px solid #d0d7de; border-radius: 10px; padding: 12px; margin: 10px 0; }}
|
|
1200
|
+
.muted {{ color: #57606a; font-size: 13px; }}
|
|
1201
|
+
.pill {{ display: inline-block; padding: 2px 8px; border-radius: 999px; border: 1px solid #d0d7de; margin-right: 6px; font-size: 12px; }}
|
|
1202
|
+
.warning {{ background: #fff4ce; border: 1px solid #ffd089; padding: 10px; border-radius: 10px; margin: 12px 0; }}
|
|
1203
|
+
.tabs {{ display: flex; gap: 8px; flex-wrap: wrap; }}
|
|
1204
|
+
.tab {{ display: inline-block; padding: 6px 12px; border-radius: 999px; border: 1px solid #d0d7de; background: #f6f8fa; color: #0969da; text-decoration: none; font-size: 13px; }}
|
|
1205
|
+
.tab:hover {{ background: #eef1f4; }}
|
|
1206
|
+
.tab.active {{ background: #0969da; border-color: #0969da; color: #fff; }}
|
|
1207
|
+
pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
|
|
1208
|
+
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
|
|
1209
|
+
a {{ color: #0969da; }}
|
|
1210
|
+
</style>
|
|
1211
|
+
{extra_head}
|
|
1212
|
+
</head>
|
|
1213
|
+
<body>
|
|
1214
|
+
<header>
|
|
1215
|
+
<a href="/">Papers</a>
|
|
1216
|
+
<a href="/stats">Stats</a>
|
|
1217
|
+
</header>
|
|
1218
|
+
<div class="container">
|
|
1219
|
+
{body_html}
|
|
1220
|
+
</div>
|
|
1221
|
+
{extra_scripts}
|
|
1222
|
+
</body>
|
|
1223
|
+
</html>"""
|
|
1224
|
+
|
|
1225
|
+
|
|
1226
|
+
def _embed_shell(title: str, body_html: str, extra_head: str = "", extra_scripts: str = "") -> str:
|
|
1227
|
+
return f"""<!doctype html>
|
|
1228
|
+
<html lang="en">
|
|
1229
|
+
<head>
|
|
1230
|
+
<meta charset="utf-8" />
|
|
1231
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
1232
|
+
<title>{html.escape(title)}</title>
|
|
1233
|
+
<style>
|
|
1234
|
+
body {{ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; margin: 0; padding: 16px; }}
|
|
1235
|
+
h1, h2, h3, h4 {{ margin-top: 1.2em; }}
|
|
1236
|
+
.muted {{ color: #57606a; font-size: 13px; }}
|
|
1237
|
+
.warning {{ background: #fff4ce; border: 1px solid #ffd089; padding: 10px; border-radius: 10px; margin: 12px 0; }}
|
|
1238
|
+
pre {{ overflow: auto; padding: 10px; background: #0b1220; color: #e6edf3; border-radius: 10px; }}
|
|
1239
|
+
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
|
|
1240
|
+
a {{ color: #0969da; }}
|
|
1241
|
+
</style>
|
|
1242
|
+
{extra_head}
|
|
1243
|
+
</head>
|
|
1244
|
+
<body>
|
|
1245
|
+
{body_html}
|
|
1246
|
+
{extra_scripts}
|
|
1247
|
+
</body>
|
|
1248
|
+
</html>"""
|
|
1249
|
+
|
|
1250
|
+
|
|
1251
|
+
def _build_pdfjs_viewer_url(pdf_url: str) -> str:
|
|
1252
|
+
encoded = quote(pdf_url, safe="")
|
|
1253
|
+
return f"{_PDFJS_VIEWER_PATH}?file={encoded}"
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
async def _index_page(request: Request) -> HTMLResponse:
|
|
1257
|
+
return HTMLResponse(
|
|
1258
|
+
_page_shell(
|
|
1259
|
+
"Paper DB",
|
|
1260
|
+
"""
|
|
1261
|
+
<h2>Paper Database</h2>
|
|
1262
|
+
<div class="card">
|
|
1263
|
+
<div class="muted">Search (Scholar-style): <code>tag:fpga year:2023..2025 -survey</code> · Use quotes for phrases and <code>OR</code> for alternatives.</div>
|
|
1264
|
+
<div style="display:flex; gap:8px; margin-top:8px;">
|
|
1265
|
+
<input id="query" placeholder='Search... e.g. title:"nearest neighbor" tag:fpga year:2023..2025' style="flex:1; padding:10px; border:1px solid #d0d7de; border-radius:8px;" />
|
|
1266
|
+
<select id="openView" style="padding:10px; border:1px solid #d0d7de; border-radius:8px;">
|
|
1267
|
+
<option value="summary" selected>Open: Summary</option>
|
|
1268
|
+
<option value="source">Open: Source</option>
|
|
1269
|
+
<option value="pdf">Open: PDF</option>
|
|
1270
|
+
<option value="pdfjs">Open: PDF Viewer</option>
|
|
1271
|
+
<option value="split">Open: Split</option>
|
|
1272
|
+
</select>
|
|
1273
|
+
</div>
|
|
1274
|
+
<details style="margin-top:10px;">
|
|
1275
|
+
<summary>Advanced search</summary>
|
|
1276
|
+
<div style="margin-top:10px;" class="muted">Build a query:</div>
|
|
1277
|
+
<div class="filters" style="grid-template-columns: repeat(3, 1fr);">
|
|
1278
|
+
<input id="advTitle" placeholder="title contains..." />
|
|
1279
|
+
<input id="advAuthor" placeholder="author contains..." />
|
|
1280
|
+
<input id="advTag" placeholder="tag (comma separated)" />
|
|
1281
|
+
<input id="advYear" placeholder="year (e.g. 2020..2024)" />
|
|
1282
|
+
<input id="advMonth" placeholder="month (01-12)" />
|
|
1283
|
+
<input id="advVenue" placeholder="venue contains..." />
|
|
1284
|
+
</div>
|
|
1285
|
+
<div style="display:flex; gap:8px; align-items:center; margin-top:8px;">
|
|
1286
|
+
<button id="buildQuery" style="padding:8px 12px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Build</button>
|
|
1287
|
+
<div class="muted">Generated: <code id="generated"></code></div>
|
|
1288
|
+
</div>
|
|
1289
|
+
</details>
|
|
1290
|
+
</div>
|
|
1291
|
+
<div id="results"></div>
|
|
1292
|
+
<div id="loading" class="muted">Loading...</div>
|
|
1293
|
+
<script>
|
|
1294
|
+
let page = 1;
|
|
1295
|
+
let loading = false;
|
|
1296
|
+
let done = false;
|
|
1297
|
+
|
|
1298
|
+
function currentParams(nextPage) {
|
|
1299
|
+
const params = new URLSearchParams();
|
|
1300
|
+
params.set("page", String(nextPage));
|
|
1301
|
+
params.set("page_size", "30");
|
|
1302
|
+
const q = document.getElementById("query").value.trim();
|
|
1303
|
+
if (q) params.set("q", q);
|
|
1304
|
+
return params;
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
function escapeHtml(text) {
|
|
1308
|
+
const div = document.createElement("div");
|
|
1309
|
+
div.textContent = text;
|
|
1310
|
+
return div.innerHTML;
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
function viewSuffixForItem(item) {
|
|
1314
|
+
const view = document.getElementById("openView").value;
|
|
1315
|
+
if (!view || view === "summary") return "";
|
|
1316
|
+
const params = new URLSearchParams();
|
|
1317
|
+
params.set("view", view);
|
|
1318
|
+
if (view === "split") {
|
|
1319
|
+
params.set("left", "summary");
|
|
1320
|
+
if (item.has_pdf) {
|
|
1321
|
+
params.set("right", "pdfjs");
|
|
1322
|
+
} else if (item.has_source) {
|
|
1323
|
+
params.set("right", "source");
|
|
1324
|
+
} else {
|
|
1325
|
+
params.set("right", "summary");
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
return `?${params.toString()}`;
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
function renderItem(item) {
|
|
1332
|
+
const tags = (item.tags || []).map(t => `<span class="pill">${escapeHtml(t)}</span>`).join("");
|
|
1333
|
+
const authors = (item.authors || []).slice(0, 6).map(a => escapeHtml(a)).join(", ");
|
|
1334
|
+
const meta = `${escapeHtml(item.year || "")}-${escapeHtml(item.month || "")} · ${escapeHtml(item.venue || "")}`;
|
|
1335
|
+
const viewSuffix = viewSuffixForItem(item);
|
|
1336
|
+
const badges = [
|
|
1337
|
+
item.has_source ? `<span class="pill">source</span>` : "",
|
|
1338
|
+
item.has_pdf ? `<span class="pill">pdf</span>` : "",
|
|
1339
|
+
].join("");
|
|
1340
|
+
return `
|
|
1341
|
+
<div class="card">
|
|
1342
|
+
<div><a href="/paper/${encodeURIComponent(item.source_hash)}${viewSuffix}">${escapeHtml(item.title || "")}</a></div>
|
|
1343
|
+
<div class="muted">${authors}</div>
|
|
1344
|
+
<div class="muted">${meta}</div>
|
|
1345
|
+
<div style="margin-top:6px">${badges} ${tags}</div>
|
|
1346
|
+
</div>
|
|
1347
|
+
`;
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
async function loadMore() {
|
|
1351
|
+
if (loading || done) return;
|
|
1352
|
+
loading = true;
|
|
1353
|
+
document.getElementById("loading").textContent = "Loading...";
|
|
1354
|
+
const res = await fetch(`/api/papers?${currentParams(page).toString()}`);
|
|
1355
|
+
const data = await res.json();
|
|
1356
|
+
const results = document.getElementById("results");
|
|
1357
|
+
for (const item of data.items) {
|
|
1358
|
+
results.insertAdjacentHTML("beforeend", renderItem(item));
|
|
1359
|
+
}
|
|
1360
|
+
if (!data.has_more) {
|
|
1361
|
+
done = true;
|
|
1362
|
+
document.getElementById("loading").textContent = "End.";
|
|
1363
|
+
} else {
|
|
1364
|
+
page += 1;
|
|
1365
|
+
document.getElementById("loading").textContent = "Scroll to load more...";
|
|
1366
|
+
}
|
|
1367
|
+
loading = false;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
function resetAndLoad() {
|
|
1371
|
+
page = 1;
|
|
1372
|
+
done = false;
|
|
1373
|
+
document.getElementById("results").innerHTML = "";
|
|
1374
|
+
loadMore();
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
document.getElementById("query").addEventListener("change", resetAndLoad);
|
|
1378
|
+
document.getElementById("openView").addEventListener("change", resetAndLoad);
|
|
1379
|
+
|
|
1380
|
+
document.getElementById("buildQuery").addEventListener("click", () => {
|
|
1381
|
+
function add(field, value) {
|
|
1382
|
+
value = value.trim();
|
|
1383
|
+
if (!value) return "";
|
|
1384
|
+
if (value.includes(" ")) return `${field}:"${value}"`;
|
|
1385
|
+
return `${field}:${value}`;
|
|
1386
|
+
}
|
|
1387
|
+
const parts = [];
|
|
1388
|
+
const t = document.getElementById("advTitle").value.trim();
|
|
1389
|
+
const a = document.getElementById("advAuthor").value.trim();
|
|
1390
|
+
const tag = document.getElementById("advTag").value.trim();
|
|
1391
|
+
const y = document.getElementById("advYear").value.trim();
|
|
1392
|
+
const m = document.getElementById("advMonth").value.trim();
|
|
1393
|
+
const v = document.getElementById("advVenue").value.trim();
|
|
1394
|
+
if (t) parts.push(add("title", t));
|
|
1395
|
+
if (a) parts.push(add("author", a));
|
|
1396
|
+
if (tag) {
|
|
1397
|
+
for (const item of tag.split(",")) {
|
|
1398
|
+
const val = item.trim();
|
|
1399
|
+
if (val) parts.push(add("tag", val));
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
if (y) parts.push(add("year", y));
|
|
1403
|
+
if (m) parts.push(add("month", m));
|
|
1404
|
+
if (v) parts.push(add("venue", v));
|
|
1405
|
+
const q = parts.join(" ");
|
|
1406
|
+
document.getElementById("generated").textContent = q;
|
|
1407
|
+
document.getElementById("query").value = q;
|
|
1408
|
+
resetAndLoad();
|
|
1409
|
+
});
|
|
1410
|
+
|
|
1411
|
+
window.addEventListener("scroll", () => {
|
|
1412
|
+
if ((window.innerHeight + window.scrollY) >= (document.body.offsetHeight - 600)) {
|
|
1413
|
+
loadMore();
|
|
1414
|
+
}
|
|
1415
|
+
});
|
|
1416
|
+
|
|
1417
|
+
loadMore();
|
|
1418
|
+
</script>
|
|
1419
|
+
""",
|
|
1420
|
+
)
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def _parse_filters(request: Request) -> dict[str, list[str] | str | int]:
|
|
1425
|
+
qp = request.query_params
|
|
1426
|
+
page = int(qp.get("page", "1"))
|
|
1427
|
+
page_size = int(qp.get("page_size", "30"))
|
|
1428
|
+
page = max(1, page)
|
|
1429
|
+
page_size = min(max(1, page_size), 200)
|
|
1430
|
+
|
|
1431
|
+
q = qp.get("q", "").strip()
|
|
1432
|
+
|
|
1433
|
+
return {
|
|
1434
|
+
"page": page,
|
|
1435
|
+
"page_size": page_size,
|
|
1436
|
+
"q": q,
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
|
|
1440
|
+
async def _api_papers(request: Request) -> JSONResponse:
|
|
1441
|
+
index: PaperIndex = request.app.state.index
|
|
1442
|
+
filters = _parse_filters(request)
|
|
1443
|
+
page = int(filters["page"])
|
|
1444
|
+
page_size = int(filters["page_size"])
|
|
1445
|
+
q = str(filters["q"])
|
|
1446
|
+
query = parse_query(q)
|
|
1447
|
+
candidate = _apply_query(index, query)
|
|
1448
|
+
ordered = [idx for idx in index.ordered_ids if idx in candidate]
|
|
1449
|
+
total = len(ordered)
|
|
1450
|
+
start = (page - 1) * page_size
|
|
1451
|
+
end = min(start + page_size, total)
|
|
1452
|
+
page_ids = ordered[start:end]
|
|
1453
|
+
|
|
1454
|
+
items: list[dict[str, Any]] = []
|
|
1455
|
+
for idx in page_ids:
|
|
1456
|
+
paper = index.papers[idx]
|
|
1457
|
+
source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
|
|
1458
|
+
items.append(
|
|
1459
|
+
{
|
|
1460
|
+
"source_hash": source_hash,
|
|
1461
|
+
"title": paper.get("paper_title") or "",
|
|
1462
|
+
"authors": paper.get("_authors") or [],
|
|
1463
|
+
"year": paper.get("_year") or "",
|
|
1464
|
+
"month": paper.get("_month") or "",
|
|
1465
|
+
"venue": paper.get("_venue") or "",
|
|
1466
|
+
"tags": paper.get("_tags") or [],
|
|
1467
|
+
"has_source": source_hash in index.md_path_by_hash,
|
|
1468
|
+
"has_pdf": source_hash in index.pdf_path_by_hash,
|
|
1469
|
+
}
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1472
|
+
return JSONResponse(
|
|
1473
|
+
{
|
|
1474
|
+
"page": page,
|
|
1475
|
+
"page_size": page_size,
|
|
1476
|
+
"total": total,
|
|
1477
|
+
"has_more": end < total,
|
|
1478
|
+
"items": items,
|
|
1479
|
+
}
|
|
1480
|
+
)
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
async def _paper_detail(request: Request) -> HTMLResponse:
|
|
1484
|
+
index: PaperIndex = request.app.state.index
|
|
1485
|
+
md = request.app.state.md
|
|
1486
|
+
source_hash = request.path_params["source_hash"]
|
|
1487
|
+
idx = index.id_by_hash.get(source_hash)
|
|
1488
|
+
if idx is None:
|
|
1489
|
+
return RedirectResponse("/")
|
|
1490
|
+
paper = index.papers[idx]
|
|
1491
|
+
view = request.query_params.get("view", "summary")
|
|
1492
|
+
template_param = request.query_params.get("template")
|
|
1493
|
+
embed = request.query_params.get("embed") == "1"
|
|
1494
|
+
if view == "split":
|
|
1495
|
+
embed = False
|
|
1496
|
+
|
|
1497
|
+
pdf_path = index.pdf_path_by_hash.get(source_hash)
|
|
1498
|
+
pdf_url = f"/api/pdf/{source_hash}"
|
|
1499
|
+
shell = _embed_shell if embed else _page_shell
|
|
1500
|
+
source_available = source_hash in index.md_path_by_hash
|
|
1501
|
+
allowed_views = {"summary", "source", "pdf", "pdfjs"}
|
|
1502
|
+
|
|
1503
|
+
def normalize_view(value: str | None, default: str) -> str:
|
|
1504
|
+
if value in allowed_views:
|
|
1505
|
+
return value
|
|
1506
|
+
return default
|
|
1507
|
+
|
|
1508
|
+
default_right = "pdfjs" if pdf_path else ("source" if source_available else "summary")
|
|
1509
|
+
left_param = request.query_params.get("left")
|
|
1510
|
+
right_param = request.query_params.get("right")
|
|
1511
|
+
left = normalize_view(left_param, "summary") if left_param else "summary"
|
|
1512
|
+
right = normalize_view(right_param, default_right) if right_param else default_right
|
|
1513
|
+
|
|
1514
|
+
def nav_link(label: str, v: str) -> str:
|
|
1515
|
+
active = " active" if view == v else ""
|
|
1516
|
+
params: dict[str, str] = {"view": v}
|
|
1517
|
+
if v == "summary" and template_param:
|
|
1518
|
+
params["template"] = str(template_param)
|
|
1519
|
+
if v == "split":
|
|
1520
|
+
params["left"] = left
|
|
1521
|
+
params["right"] = right
|
|
1522
|
+
href = f"/paper/{source_hash}?{urlencode(params)}"
|
|
1523
|
+
return f'<a class="tab{active}" href="{html.escape(href)}">{html.escape(label)}</a>'
|
|
1524
|
+
|
|
1525
|
+
nav = f"""
|
|
1526
|
+
<div class="tabs" style="margin: 8px 0 14px;">
|
|
1527
|
+
{nav_link("Summary", "summary")}
|
|
1528
|
+
{nav_link("Source", "source")}
|
|
1529
|
+
{nav_link("PDF", "pdf")}
|
|
1530
|
+
{nav_link("PDF Viewer", "pdfjs")}
|
|
1531
|
+
{nav_link("Split", "split")}
|
|
1532
|
+
</div>
|
|
1533
|
+
"""
|
|
1534
|
+
nav_html = "" if embed else nav
|
|
1535
|
+
|
|
1536
|
+
if view == "split":
|
|
1537
|
+
def pane_src(pane_view: str) -> str:
|
|
1538
|
+
if pane_view == "pdfjs" and pdf_path:
|
|
1539
|
+
return _build_pdfjs_viewer_url(pdf_url)
|
|
1540
|
+
params: dict[str, str] = {"view": pane_view, "embed": "1"}
|
|
1541
|
+
if pane_view == "summary" and template_param:
|
|
1542
|
+
params["template"] = str(template_param)
|
|
1543
|
+
return f"/paper/{source_hash}?{urlencode(params)}"
|
|
1544
|
+
|
|
1545
|
+
left_src = pane_src(left)
|
|
1546
|
+
right_src = pane_src(right)
|
|
1547
|
+
options = [
|
|
1548
|
+
("summary", "Summary"),
|
|
1549
|
+
("source", "Source"),
|
|
1550
|
+
("pdf", "PDF"),
|
|
1551
|
+
("pdfjs", "PDF Viewer"),
|
|
1552
|
+
]
|
|
1553
|
+
left_options = "\n".join(
|
|
1554
|
+
f'<option value="{value}"{" selected" if value == left else ""}>{label}</option>'
|
|
1555
|
+
for value, label in options
|
|
1556
|
+
)
|
|
1557
|
+
right_options = "\n".join(
|
|
1558
|
+
f'<option value="{value}"{" selected" if value == right else ""}>{label}</option>'
|
|
1559
|
+
for value, label in options
|
|
1560
|
+
)
|
|
1561
|
+
body = f"""
|
|
1562
|
+
<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
|
|
1563
|
+
{nav}
|
|
1564
|
+
<div class="split-controls">
|
|
1565
|
+
<div>
|
|
1566
|
+
<div class="muted">Left pane</div>
|
|
1567
|
+
<select id="splitLeft">
|
|
1568
|
+
{left_options}
|
|
1569
|
+
</select>
|
|
1570
|
+
</div>
|
|
1571
|
+
<div class="split-actions">
|
|
1572
|
+
<button id="splitTighten" type="button" title="Tighten width">-</button>
|
|
1573
|
+
<button id="splitSwap" type="button" title="Swap panes">⇄</button>
|
|
1574
|
+
<button id="splitWiden" type="button" title="Widen width">+</button>
|
|
1575
|
+
</div>
|
|
1576
|
+
<div>
|
|
1577
|
+
<div class="muted">Right pane</div>
|
|
1578
|
+
<select id="splitRight">
|
|
1579
|
+
{right_options}
|
|
1580
|
+
</select>
|
|
1581
|
+
</div>
|
|
1582
|
+
</div>
|
|
1583
|
+
<div class="split-layout">
|
|
1584
|
+
<div class="split-pane">
|
|
1585
|
+
<iframe id="leftPane" src="{html.escape(left_src)}" title="Left pane"></iframe>
|
|
1586
|
+
</div>
|
|
1587
|
+
<div class="split-pane">
|
|
1588
|
+
<iframe id="rightPane" src="{html.escape(right_src)}" title="Right pane"></iframe>
|
|
1589
|
+
</div>
|
|
1590
|
+
</div>
|
|
1591
|
+
"""
|
|
1592
|
+
extra_head = """
|
|
1593
|
+
<style>
|
|
1594
|
+
.container {
|
|
1595
|
+
max-width: 100%;
|
|
1596
|
+
width: 100%;
|
|
1597
|
+
margin: 0 auto;
|
|
1598
|
+
}
|
|
1599
|
+
.split-controls {
|
|
1600
|
+
display: grid;
|
|
1601
|
+
grid-template-columns: 1fr auto 1fr;
|
|
1602
|
+
gap: 12px;
|
|
1603
|
+
align-items: end;
|
|
1604
|
+
margin: 10px 0 14px;
|
|
1605
|
+
}
|
|
1606
|
+
.split-controls select {
|
|
1607
|
+
padding: 6px 8px;
|
|
1608
|
+
border-radius: 8px;
|
|
1609
|
+
border: 1px solid #d0d7de;
|
|
1610
|
+
background: #fff;
|
|
1611
|
+
min-width: 160px;
|
|
1612
|
+
}
|
|
1613
|
+
.split-actions {
|
|
1614
|
+
display: flex;
|
|
1615
|
+
align-items: center;
|
|
1616
|
+
justify-content: center;
|
|
1617
|
+
gap: 8px;
|
|
1618
|
+
height: 100%;
|
|
1619
|
+
}
|
|
1620
|
+
.split-actions button {
|
|
1621
|
+
padding: 6px 10px;
|
|
1622
|
+
border-radius: 999px;
|
|
1623
|
+
border: 1px solid #d0d7de;
|
|
1624
|
+
background: #f6f8fa;
|
|
1625
|
+
cursor: pointer;
|
|
1626
|
+
min-width: 36px;
|
|
1627
|
+
}
|
|
1628
|
+
.split-layout {
|
|
1629
|
+
display: flex;
|
|
1630
|
+
gap: 12px;
|
|
1631
|
+
width: 100%;
|
|
1632
|
+
max-width: min(100%, var(--split-max-width, 100%));
|
|
1633
|
+
margin: 0 auto;
|
|
1634
|
+
height: calc(100vh - 260px);
|
|
1635
|
+
min-height: 420px;
|
|
1636
|
+
}
|
|
1637
|
+
.split-pane {
|
|
1638
|
+
flex: 1;
|
|
1639
|
+
border: 1px solid #d0d7de;
|
|
1640
|
+
border-radius: 10px;
|
|
1641
|
+
overflow: hidden;
|
|
1642
|
+
background: #fff;
|
|
1643
|
+
}
|
|
1644
|
+
.split-pane iframe {
|
|
1645
|
+
width: 100%;
|
|
1646
|
+
height: 100%;
|
|
1647
|
+
border: 0;
|
|
1648
|
+
}
|
|
1649
|
+
@media (max-width: 900px) {
|
|
1650
|
+
.split-layout {
|
|
1651
|
+
flex-direction: column;
|
|
1652
|
+
height: auto;
|
|
1653
|
+
}
|
|
1654
|
+
.split-pane {
|
|
1655
|
+
height: 70vh;
|
|
1656
|
+
}
|
|
1657
|
+
.split-controls {
|
|
1658
|
+
grid-template-columns: 1fr;
|
|
1659
|
+
}
|
|
1660
|
+
}
|
|
1661
|
+
</style>
|
|
1662
|
+
"""
|
|
1663
|
+
extra_scripts = """
|
|
1664
|
+
<script>
|
|
1665
|
+
const leftSelect = document.getElementById('splitLeft');
|
|
1666
|
+
const rightSelect = document.getElementById('splitRight');
|
|
1667
|
+
const swapButton = document.getElementById('splitSwap');
|
|
1668
|
+
const tightenButton = document.getElementById('splitTighten');
|
|
1669
|
+
const widenButton = document.getElementById('splitWiden');
|
|
1670
|
+
function updateSplit() {
|
|
1671
|
+
const params = new URLSearchParams(window.location.search);
|
|
1672
|
+
params.set('view', 'split');
|
|
1673
|
+
params.set('left', leftSelect.value);
|
|
1674
|
+
params.set('right', rightSelect.value);
|
|
1675
|
+
window.location.search = params.toString();
|
|
1676
|
+
}
|
|
1677
|
+
leftSelect.addEventListener('change', updateSplit);
|
|
1678
|
+
rightSelect.addEventListener('change', updateSplit);
|
|
1679
|
+
swapButton.addEventListener('click', () => {
|
|
1680
|
+
const leftValue = leftSelect.value;
|
|
1681
|
+
leftSelect.value = rightSelect.value;
|
|
1682
|
+
rightSelect.value = leftValue;
|
|
1683
|
+
updateSplit();
|
|
1684
|
+
});
|
|
1685
|
+
const widthSteps = ["1200px", "1400px", "1600px", "1800px", "2000px", "100%"];
|
|
1686
|
+
let widthIndex = widthSteps.length - 1;
|
|
1687
|
+
try {
|
|
1688
|
+
const stored = localStorage.getItem('splitWidthIndex');
|
|
1689
|
+
if (stored !== null) {
|
|
1690
|
+
const parsed = Number.parseInt(stored, 10);
|
|
1691
|
+
if (!Number.isNaN(parsed)) {
|
|
1692
|
+
widthIndex = Math.max(0, Math.min(widthSteps.length - 1, parsed));
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
} catch (err) {
|
|
1696
|
+
// Ignore storage errors (e.g. private mode)
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
function applySplitWidth() {
|
|
1700
|
+
const value = widthSteps[widthIndex];
|
|
1701
|
+
document.documentElement.style.setProperty('--split-max-width', value);
|
|
1702
|
+
try {
|
|
1703
|
+
localStorage.setItem('splitWidthIndex', String(widthIndex));
|
|
1704
|
+
} catch (err) {
|
|
1705
|
+
// Ignore storage errors
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
tightenButton.addEventListener('click', () => {
|
|
1710
|
+
widthIndex = Math.max(0, widthIndex - 1);
|
|
1711
|
+
applySplitWidth();
|
|
1712
|
+
});
|
|
1713
|
+
widenButton.addEventListener('click', () => {
|
|
1714
|
+
widthIndex = Math.min(widthSteps.length - 1, widthIndex + 1);
|
|
1715
|
+
applySplitWidth();
|
|
1716
|
+
});
|
|
1717
|
+
applySplitWidth();
|
|
1718
|
+
</script>
|
|
1719
|
+
"""
|
|
1720
|
+
return HTMLResponse(_page_shell("Split View", body, extra_head=extra_head, extra_scripts=extra_scripts))
|
|
1721
|
+
|
|
1722
|
+
if view == "source":
|
|
1723
|
+
source_path = index.md_path_by_hash.get(source_hash)
|
|
1724
|
+
if not source_path:
|
|
1725
|
+
body = nav_html + '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
|
|
1726
|
+
return HTMLResponse(shell("Source", body))
|
|
1727
|
+
try:
|
|
1728
|
+
raw = source_path.read_text(encoding="utf-8")
|
|
1729
|
+
except UnicodeDecodeError:
|
|
1730
|
+
raw = source_path.read_text(encoding="latin-1")
|
|
1731
|
+
rendered = _render_markdown_with_math_placeholders(md, raw)
|
|
1732
|
+
body = (
|
|
1733
|
+
nav_html
|
|
1734
|
+
+ f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
|
|
1735
|
+
+ f'<div class="muted">{html.escape(str(source_path))}</div>'
|
|
1736
|
+
+ '<div class="muted" style="margin-top:10px;">Rendered from source markdown:</div>'
|
|
1737
|
+
+ f'<div id="content">{rendered}</div>'
|
|
1738
|
+
+ "<details style='margin-top:12px;'><summary>Raw markdown</summary>"
|
|
1739
|
+
+ f"<pre><code>{html.escape(raw)}</code></pre></details>"
|
|
1740
|
+
)
|
|
1741
|
+
extra_head = f'<link rel="stylesheet" href="{_CDN_KATEX}" />'
|
|
1742
|
+
extra_scripts = f"""
|
|
1743
|
+
<script src="{_CDN_MERMAID}"></script>
|
|
1744
|
+
<script src="{_CDN_KATEX_JS}"></script>
|
|
1745
|
+
<script src="{_CDN_KATEX_AUTO}"></script>
|
|
1746
|
+
<script>
|
|
1747
|
+
document.querySelectorAll('code.language-mermaid').forEach((code) => {{
|
|
1748
|
+
const pre = code.parentElement;
|
|
1749
|
+
const div = document.createElement('div');
|
|
1750
|
+
div.className = 'mermaid';
|
|
1751
|
+
div.textContent = code.textContent;
|
|
1752
|
+
pre.replaceWith(div);
|
|
1753
|
+
}});
|
|
1754
|
+
if (window.mermaid) {{
|
|
1755
|
+
mermaid.initialize({{ startOnLoad: false }});
|
|
1756
|
+
mermaid.run();
|
|
1757
|
+
}}
|
|
1758
|
+
if (window.renderMathInElement) {{
|
|
1759
|
+
renderMathInElement(document.getElementById('content'), {{
|
|
1760
|
+
delimiters: [
|
|
1761
|
+
{{left: '$$', right: '$$', display: true}},
|
|
1762
|
+
{{left: '$', right: '$', display: false}},
|
|
1763
|
+
{{left: '\\\\(', right: '\\\\)', display: false}},
|
|
1764
|
+
{{left: '\\\\[', right: '\\\\]', display: true}}
|
|
1765
|
+
],
|
|
1766
|
+
throwOnError: false
|
|
1767
|
+
}});
|
|
1768
|
+
}}
|
|
1769
|
+
</script>
|
|
1770
|
+
"""
|
|
1771
|
+
return HTMLResponse(shell("Source", body, extra_head=extra_head, extra_scripts=extra_scripts))
|
|
1772
|
+
|
|
1773
|
+
if view == "pdf":
|
|
1774
|
+
if not pdf_path:
|
|
1775
|
+
body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
|
|
1776
|
+
return HTMLResponse(shell("PDF", body))
|
|
1777
|
+
body = nav_html + f"""
|
|
1778
|
+
<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>
|
|
1779
|
+
<div class="muted">{html.escape(str(pdf_path.name))}</div>
|
|
1780
|
+
<div style="display:flex; gap:8px; align-items:center; margin: 10px 0;">
|
|
1781
|
+
<button id="prev" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Prev</button>
|
|
1782
|
+
<button id="next" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">Next</button>
|
|
1783
|
+
<span class="muted">Page <span id="page_num">1</span> / <span id="page_count">?</span></span>
|
|
1784
|
+
<span style="flex:1"></span>
|
|
1785
|
+
<button id="zoomOut" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">-</button>
|
|
1786
|
+
<button id="zoomIn" style="padding:6px 10px; border-radius:8px; border:1px solid #d0d7de; background:#f6f8fa; cursor:pointer;">+</button>
|
|
1787
|
+
</div>
|
|
1788
|
+
<canvas id="the-canvas" style="width: 100%; border: 1px solid #d0d7de; border-radius: 10px;"></canvas>
|
|
1789
|
+
"""
|
|
1790
|
+
extra_scripts = f"""
|
|
1791
|
+
<script src="{_CDN_PDFJS}"></script>
|
|
1792
|
+
<script>
|
|
1793
|
+
const url = {json.dumps(pdf_url)};
|
|
1794
|
+
pdfjsLib.GlobalWorkerOptions.workerSrc = {json.dumps(_CDN_PDFJS_WORKER)};
|
|
1795
|
+
let pdfDoc = null;
|
|
1796
|
+
let pageNum = 1;
|
|
1797
|
+
let pageRendering = false;
|
|
1798
|
+
let pageNumPending = null;
|
|
1799
|
+
let zoomLevel = 1.0;
|
|
1800
|
+
const canvas = document.getElementById('the-canvas');
|
|
1801
|
+
const ctx = canvas.getContext('2d');
|
|
1802
|
+
|
|
1803
|
+
function renderPage(num) {{
|
|
1804
|
+
pageRendering = true;
|
|
1805
|
+
pdfDoc.getPage(num).then((page) => {{
|
|
1806
|
+
const baseViewport = page.getViewport({{scale: 1}});
|
|
1807
|
+
const containerWidth = canvas.clientWidth || baseViewport.width;
|
|
1808
|
+
const fitScale = containerWidth / baseViewport.width;
|
|
1809
|
+
const scale = fitScale * zoomLevel;
|
|
1810
|
+
|
|
1811
|
+
const viewport = page.getViewport({{scale}});
|
|
1812
|
+
const outputScale = window.devicePixelRatio || 1;
|
|
1813
|
+
|
|
1814
|
+
canvas.width = Math.floor(viewport.width * outputScale);
|
|
1815
|
+
canvas.height = Math.floor(viewport.height * outputScale);
|
|
1816
|
+
canvas.style.width = Math.floor(viewport.width) + 'px';
|
|
1817
|
+
canvas.style.height = Math.floor(viewport.height) + 'px';
|
|
1818
|
+
|
|
1819
|
+
const transform = outputScale !== 1 ? [outputScale, 0, 0, outputScale, 0, 0] : null;
|
|
1820
|
+
const renderContext = {{ canvasContext: ctx, viewport, transform }};
|
|
1821
|
+
const renderTask = page.render(renderContext);
|
|
1822
|
+
renderTask.promise.then(() => {{
|
|
1823
|
+
pageRendering = false;
|
|
1824
|
+
document.getElementById('page_num').textContent = String(pageNum);
|
|
1825
|
+
if (pageNumPending !== null) {{
|
|
1826
|
+
const next = pageNumPending;
|
|
1827
|
+
pageNumPending = null;
|
|
1828
|
+
renderPage(next);
|
|
1829
|
+
}}
|
|
1830
|
+
}});
|
|
1831
|
+
}});
|
|
1832
|
+
}}
|
|
1833
|
+
|
|
1834
|
+
function queueRenderPage(num) {{
|
|
1835
|
+
if (pageRendering) {{
|
|
1836
|
+
pageNumPending = num;
|
|
1837
|
+
}} else {{
|
|
1838
|
+
renderPage(num);
|
|
1839
|
+
}}
|
|
1840
|
+
}}
|
|
1841
|
+
|
|
1842
|
+
function onPrevPage() {{
|
|
1843
|
+
if (pageNum <= 1) return;
|
|
1844
|
+
pageNum--;
|
|
1845
|
+
queueRenderPage(pageNum);
|
|
1846
|
+
}}
|
|
1847
|
+
|
|
1848
|
+
function onNextPage() {{
|
|
1849
|
+
if (pageNum >= pdfDoc.numPages) return;
|
|
1850
|
+
pageNum++;
|
|
1851
|
+
queueRenderPage(pageNum);
|
|
1852
|
+
}}
|
|
1853
|
+
|
|
1854
|
+
function adjustZoom(delta) {{
|
|
1855
|
+
zoomLevel = Math.max(0.5, Math.min(3.0, zoomLevel + delta));
|
|
1856
|
+
queueRenderPage(pageNum);
|
|
1857
|
+
}}
|
|
1858
|
+
|
|
1859
|
+
document.getElementById('prev').addEventListener('click', onPrevPage);
|
|
1860
|
+
document.getElementById('next').addEventListener('click', onNextPage);
|
|
1861
|
+
document.getElementById('zoomOut').addEventListener('click', () => adjustZoom(-0.1));
|
|
1862
|
+
document.getElementById('zoomIn').addEventListener('click', () => adjustZoom(0.1));
|
|
1863
|
+
|
|
1864
|
+
pdfjsLib.getDocument(url).promise.then((pdfDoc_) => {{
|
|
1865
|
+
pdfDoc = pdfDoc_;
|
|
1866
|
+
document.getElementById('page_count').textContent = String(pdfDoc.numPages);
|
|
1867
|
+
renderPage(pageNum);
|
|
1868
|
+
}});
|
|
1869
|
+
|
|
1870
|
+
let resizeTimer = null;
|
|
1871
|
+
window.addEventListener('resize', () => {{
|
|
1872
|
+
if (!pdfDoc) return;
|
|
1873
|
+
if (resizeTimer) clearTimeout(resizeTimer);
|
|
1874
|
+
resizeTimer = setTimeout(() => queueRenderPage(pageNum), 150);
|
|
1875
|
+
}});
|
|
1876
|
+
</script>
|
|
1877
|
+
"""
|
|
1878
|
+
return HTMLResponse(shell("PDF", body, extra_scripts=extra_scripts))
|
|
1879
|
+
|
|
1880
|
+
if view == "pdfjs":
|
|
1881
|
+
if not pdf_path:
|
|
1882
|
+
body = nav_html + '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
|
|
1883
|
+
return HTMLResponse(shell("PDF Viewer", body))
|
|
1884
|
+
viewer_url = _build_pdfjs_viewer_url(pdf_url)
|
|
1885
|
+
header_html = ""
|
|
1886
|
+
if not embed:
|
|
1887
|
+
header_html = (
|
|
1888
|
+
f"<h2>{html.escape(str(paper.get('paper_title') or 'Paper'))}</h2>"
|
|
1889
|
+
+ f'<div class="muted">{html.escape(str(pdf_path.name))}</div>'
|
|
1890
|
+
)
|
|
1891
|
+
frame_height = "calc(100vh - 220px)" if not embed else "calc(100vh - 32px)"
|
|
1892
|
+
body = f"""
|
|
1893
|
+
{nav_html}
|
|
1894
|
+
{header_html}
|
|
1895
|
+
<iframe class="pdfjs-frame" src="{html.escape(viewer_url)}" title="PDF.js Viewer"></iframe>
|
|
1896
|
+
"""
|
|
1897
|
+
extra_head = f"""
|
|
1898
|
+
<style>
|
|
1899
|
+
.pdfjs-frame {{
|
|
1900
|
+
width: 100%;
|
|
1901
|
+
height: {frame_height};
|
|
1902
|
+
border: 1px solid #d0d7de;
|
|
1903
|
+
border-radius: 10px;
|
|
1904
|
+
}}
|
|
1905
|
+
</style>
|
|
1906
|
+
"""
|
|
1907
|
+
return HTMLResponse(shell("PDF Viewer", body, extra_head=extra_head))
|
|
1908
|
+
|
|
1909
|
+
selected_tag, available_templates = _select_template_tag(paper, template_param)
|
|
1910
|
+
markdown, template_name, warning = _render_paper_markdown(
|
|
1911
|
+
paper,
|
|
1912
|
+
request.app.state.fallback_language,
|
|
1913
|
+
template_tag=selected_tag,
|
|
1914
|
+
)
|
|
1915
|
+
rendered_html = _render_markdown_with_math_placeholders(md, markdown)
|
|
1916
|
+
|
|
1917
|
+
warning_html = f'<div class="warning">{html.escape(warning)}</div>' if warning else ""
|
|
1918
|
+
title = str(paper.get("paper_title") or "Paper")
|
|
1919
|
+
outline_top = "72px" if not embed else "16px"
|
|
1920
|
+
template_controls = f'<div class="muted">Template: {html.escape(template_name)}</div>'
|
|
1921
|
+
if available_templates:
|
|
1922
|
+
options = "\n".join(
|
|
1923
|
+
f'<option value="{html.escape(tag)}"{" selected" if tag == selected_tag else ""}>{html.escape(tag)}</option>'
|
|
1924
|
+
for tag in available_templates
|
|
1925
|
+
)
|
|
1926
|
+
template_controls = f"""
|
|
1927
|
+
<div class="muted" style="margin: 6px 0;">
|
|
1928
|
+
Template:
|
|
1929
|
+
<select id="templateSelect" style="padding:6px 8px; border:1px solid #d0d7de; border-radius:6px;">
|
|
1930
|
+
{options}
|
|
1931
|
+
</select>
|
|
1932
|
+
</div>
|
|
1933
|
+
<script>
|
|
1934
|
+
const templateSelect = document.getElementById('templateSelect');
|
|
1935
|
+
if (templateSelect) {{
|
|
1936
|
+
templateSelect.addEventListener('change', () => {{
|
|
1937
|
+
const params = new URLSearchParams(window.location.search);
|
|
1938
|
+
params.set('view', 'summary');
|
|
1939
|
+
params.set('template', templateSelect.value);
|
|
1940
|
+
window.location.search = params.toString();
|
|
1941
|
+
}});
|
|
1942
|
+
}}
|
|
1943
|
+
</script>
|
|
1944
|
+
"""
|
|
1945
|
+
outline_html = """
|
|
1946
|
+
<button id="outlineToggle" class="outline-toggle" title="Toggle outline">☰</button>
|
|
1947
|
+
<div id="outlinePanel" class="outline-panel collapsed">
|
|
1948
|
+
<div class="outline-title">Outline</div>
|
|
1949
|
+
<div id="outlineList" class="outline-list"></div>
|
|
1950
|
+
</div>
|
|
1951
|
+
<button id="backToTop" class="back-to-top" title="Back to top">↑</button>
|
|
1952
|
+
"""
|
|
1953
|
+
body = f"""
|
|
1954
|
+
<h2>{html.escape(title)}</h2>
|
|
1955
|
+
{template_controls}
|
|
1956
|
+
{warning_html}
|
|
1957
|
+
{nav_html}
|
|
1958
|
+
{outline_html}
|
|
1959
|
+
<div id="content">{rendered_html}</div>
|
|
1960
|
+
"""
|
|
1961
|
+
|
|
1962
|
+
extra_head = f"""
|
|
1963
|
+
<link rel="stylesheet" href="{_CDN_KATEX}" />
|
|
1964
|
+
<style>
|
|
1965
|
+
:root {{
|
|
1966
|
+
--outline-top: {outline_top};
|
|
1967
|
+
}}
|
|
1968
|
+
.outline-toggle {{
|
|
1969
|
+
position: fixed;
|
|
1970
|
+
top: var(--outline-top);
|
|
1971
|
+
left: 16px;
|
|
1972
|
+
z-index: 20;
|
|
1973
|
+
padding: 6px 10px;
|
|
1974
|
+
border-radius: 8px;
|
|
1975
|
+
border: 1px solid #d0d7de;
|
|
1976
|
+
background: #f6f8fa;
|
|
1977
|
+
cursor: pointer;
|
|
1978
|
+
}}
|
|
1979
|
+
.outline-panel {{
|
|
1980
|
+
position: fixed;
|
|
1981
|
+
top: calc(var(--outline-top) + 42px);
|
|
1982
|
+
left: 16px;
|
|
1983
|
+
width: 240px;
|
|
1984
|
+
max-height: 60vh;
|
|
1985
|
+
overflow: auto;
|
|
1986
|
+
border: 1px solid #d0d7de;
|
|
1987
|
+
border-radius: 10px;
|
|
1988
|
+
background: #ffffff;
|
|
1989
|
+
padding: 10px;
|
|
1990
|
+
z-index: 20;
|
|
1991
|
+
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
|
|
1992
|
+
}}
|
|
1993
|
+
.outline-panel.collapsed {{
|
|
1994
|
+
display: none;
|
|
1995
|
+
}}
|
|
1996
|
+
.outline-title {{
|
|
1997
|
+
font-size: 12px;
|
|
1998
|
+
text-transform: uppercase;
|
|
1999
|
+
letter-spacing: 0.08em;
|
|
2000
|
+
color: #57606a;
|
|
2001
|
+
margin-bottom: 8px;
|
|
2002
|
+
}}
|
|
2003
|
+
.outline-list a {{
|
|
2004
|
+
display: block;
|
|
2005
|
+
color: #0969da;
|
|
2006
|
+
text-decoration: none;
|
|
2007
|
+
padding: 4px 0;
|
|
2008
|
+
}}
|
|
2009
|
+
.outline-list a:hover {{
|
|
2010
|
+
text-decoration: underline;
|
|
2011
|
+
}}
|
|
2012
|
+
.back-to-top {{
|
|
2013
|
+
position: fixed;
|
|
2014
|
+
left: 16px;
|
|
2015
|
+
bottom: 16px;
|
|
2016
|
+
padding: 6px 10px;
|
|
2017
|
+
border-radius: 999px;
|
|
2018
|
+
border: 1px solid #d0d7de;
|
|
2019
|
+
background: #ffffff;
|
|
2020
|
+
cursor: pointer;
|
|
2021
|
+
opacity: 0;
|
|
2022
|
+
pointer-events: none;
|
|
2023
|
+
transition: opacity 0.2s ease;
|
|
2024
|
+
z-index: 20;
|
|
2025
|
+
}}
|
|
2026
|
+
.back-to-top.visible {{
|
|
2027
|
+
opacity: 1;
|
|
2028
|
+
pointer-events: auto;
|
|
2029
|
+
}}
|
|
2030
|
+
@media (max-width: 900px) {{
|
|
2031
|
+
.outline-panel {{
|
|
2032
|
+
width: 200px;
|
|
2033
|
+
}}
|
|
2034
|
+
}}
|
|
2035
|
+
</style>
|
|
2036
|
+
"""
|
|
2037
|
+
extra_scripts = f"""
|
|
2038
|
+
<script src="{_CDN_MERMAID}"></script>
|
|
2039
|
+
<script src="{_CDN_KATEX_JS}"></script>
|
|
2040
|
+
<script src="{_CDN_KATEX_AUTO}"></script>
|
|
2041
|
+
<script>
|
|
2042
|
+
// Mermaid: convert fenced code blocks to mermaid divs
|
|
2043
|
+
document.querySelectorAll('code.language-mermaid').forEach((code) => {{
|
|
2044
|
+
const pre = code.parentElement;
|
|
2045
|
+
const div = document.createElement('div');
|
|
2046
|
+
div.className = 'mermaid';
|
|
2047
|
+
div.textContent = code.textContent;
|
|
2048
|
+
pre.replaceWith(div);
|
|
2049
|
+
}});
|
|
2050
|
+
if (window.mermaid) {{
|
|
2051
|
+
mermaid.initialize({{ startOnLoad: false }});
|
|
2052
|
+
mermaid.run();
|
|
2053
|
+
}}
|
|
2054
|
+
if (window.renderMathInElement) {{
|
|
2055
|
+
renderMathInElement(document.getElementById('content'), {{
|
|
2056
|
+
delimiters: [
|
|
2057
|
+
{{left: '$$', right: '$$', display: true}},
|
|
2058
|
+
{{left: '$', right: '$', display: false}},
|
|
2059
|
+
{{left: '\\\\(', right: '\\\\)', display: false}},
|
|
2060
|
+
{{left: '\\\\[', right: '\\\\]', display: true}}
|
|
2061
|
+
],
|
|
2062
|
+
throwOnError: false
|
|
2063
|
+
}});
|
|
2064
|
+
}}
|
|
2065
|
+
const outlineToggle = document.getElementById('outlineToggle');
|
|
2066
|
+
const outlinePanel = document.getElementById('outlinePanel');
|
|
2067
|
+
const outlineList = document.getElementById('outlineList');
|
|
2068
|
+
const backToTop = document.getElementById('backToTop');
|
|
2069
|
+
|
|
2070
|
+
function slugify(text) {{
|
|
2071
|
+
return text.toLowerCase().trim()
|
|
2072
|
+
.replace(/[^a-z0-9\\s-]/g, '')
|
|
2073
|
+
.replace(/\\s+/g, '-')
|
|
2074
|
+
.replace(/-+/g, '-');
|
|
2075
|
+
}}
|
|
2076
|
+
|
|
2077
|
+
function buildOutline() {{
|
|
2078
|
+
if (!outlineList) return;
|
|
2079
|
+
const content = document.getElementById('content');
|
|
2080
|
+
if (!content) return;
|
|
2081
|
+
const headings = content.querySelectorAll('h1, h2, h3, h4');
|
|
2082
|
+
if (!headings.length) {{
|
|
2083
|
+
outlineList.innerHTML = '<div class="muted">No headings</div>';
|
|
2084
|
+
return;
|
|
2085
|
+
}}
|
|
2086
|
+
const used = new Set();
|
|
2087
|
+
outlineList.innerHTML = '';
|
|
2088
|
+
headings.forEach((heading) => {{
|
|
2089
|
+
let id = heading.id;
|
|
2090
|
+
if (!id) {{
|
|
2091
|
+
const base = slugify(heading.textContent || 'section') || 'section';
|
|
2092
|
+
id = base;
|
|
2093
|
+
let i = 1;
|
|
2094
|
+
while (used.has(id) || document.getElementById(id)) {{
|
|
2095
|
+
id = `${{base}}-${{i++}}`;
|
|
2096
|
+
}}
|
|
2097
|
+
heading.id = id;
|
|
2098
|
+
}}
|
|
2099
|
+
used.add(id);
|
|
2100
|
+
const level = parseInt(heading.tagName.slice(1), 10) || 1;
|
|
2101
|
+
const link = document.createElement('a');
|
|
2102
|
+
link.href = `#${{id}}`;
|
|
2103
|
+
link.textContent = heading.textContent || '';
|
|
2104
|
+
link.style.paddingLeft = `${{(level - 1) * 12}}px`;
|
|
2105
|
+
outlineList.appendChild(link);
|
|
2106
|
+
}});
|
|
2107
|
+
}}
|
|
2108
|
+
|
|
2109
|
+
function toggleBackToTop() {{
|
|
2110
|
+
if (!backToTop) return;
|
|
2111
|
+
if (window.scrollY > 300) {{
|
|
2112
|
+
backToTop.classList.add('visible');
|
|
2113
|
+
}} else {{
|
|
2114
|
+
backToTop.classList.remove('visible');
|
|
2115
|
+
}}
|
|
2116
|
+
}}
|
|
2117
|
+
|
|
2118
|
+
if (outlineToggle && outlinePanel) {{
|
|
2119
|
+
outlineToggle.addEventListener('click', () => {{
|
|
2120
|
+
outlinePanel.classList.toggle('collapsed');
|
|
2121
|
+
}});
|
|
2122
|
+
}}
|
|
2123
|
+
|
|
2124
|
+
if (backToTop) {{
|
|
2125
|
+
backToTop.addEventListener('click', () => {{
|
|
2126
|
+
window.scrollTo({{ top: 0, behavior: 'smooth' }});
|
|
2127
|
+
}});
|
|
2128
|
+
}}
|
|
2129
|
+
|
|
2130
|
+
buildOutline();
|
|
2131
|
+
window.addEventListener('scroll', toggleBackToTop);
|
|
2132
|
+
toggleBackToTop();
|
|
2133
|
+
</script>
|
|
2134
|
+
"""
|
|
2135
|
+
return HTMLResponse(shell(title, body, extra_head=extra_head, extra_scripts=extra_scripts))
|
|
2136
|
+
|
|
2137
|
+
|
|
2138
|
+
async def _api_stats(request: Request) -> JSONResponse:
|
|
2139
|
+
index: PaperIndex = request.app.state.index
|
|
2140
|
+
return JSONResponse(index.stats)
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
async def _api_pdf(request: Request) -> Response:
|
|
2144
|
+
index: PaperIndex = request.app.state.index
|
|
2145
|
+
source_hash = request.path_params["source_hash"]
|
|
2146
|
+
pdf_path = index.pdf_path_by_hash.get(source_hash)
|
|
2147
|
+
if not pdf_path:
|
|
2148
|
+
return Response("PDF not found", status_code=404)
|
|
2149
|
+
allowed_roots: list[Path] = request.app.state.pdf_roots
|
|
2150
|
+
if allowed_roots and not _ensure_under_roots(pdf_path, allowed_roots):
|
|
2151
|
+
return Response("Forbidden", status_code=403)
|
|
2152
|
+
return FileResponse(pdf_path)
|
|
2153
|
+
|
|
2154
|
+
|
|
2155
|
+
async def _stats_page(request: Request) -> HTMLResponse:
|
|
2156
|
+
body = """
|
|
2157
|
+
<h2>Stats</h2>
|
|
2158
|
+
<div class="muted">Charts are rendered with ECharts (CDN).</div>
|
|
2159
|
+
<div id="year" style="width:100%;height:360px"></div>
|
|
2160
|
+
<div id="month" style="width:100%;height:360px"></div>
|
|
2161
|
+
<div id="tags" style="width:100%;height:420px"></div>
|
|
2162
|
+
<div id="authors" style="width:100%;height:420px"></div>
|
|
2163
|
+
<div id="venues" style="width:100%;height:420px"></div>
|
|
2164
|
+
"""
|
|
2165
|
+
scripts = f"""
|
|
2166
|
+
<script src="{_CDN_ECHARTS}"></script>
|
|
2167
|
+
<script>
|
|
2168
|
+
async function main() {{
|
|
2169
|
+
const res = await fetch('/api/stats');
|
|
2170
|
+
const data = await res.json();
|
|
2171
|
+
|
|
2172
|
+
function bar(el, title, items) {{
|
|
2173
|
+
const chart = echarts.init(document.getElementById(el));
|
|
2174
|
+
const labels = items.map(x => x.label);
|
|
2175
|
+
const counts = items.map(x => x.count);
|
|
2176
|
+
chart.setOption({{
|
|
2177
|
+
title: {{ text: title }},
|
|
2178
|
+
tooltip: {{ trigger: 'axis' }},
|
|
2179
|
+
xAxis: {{ type: 'category', data: labels }},
|
|
2180
|
+
yAxis: {{ type: 'value' }},
|
|
2181
|
+
series: [{{ type: 'bar', data: counts }}]
|
|
2182
|
+
}});
|
|
2183
|
+
}}
|
|
2184
|
+
|
|
2185
|
+
bar('year', 'Publication Year', data.years || []);
|
|
2186
|
+
bar('month', 'Publication Month', data.months || []);
|
|
2187
|
+
bar('tags', 'Top Tags', (data.tags || []).slice(0, 20));
|
|
2188
|
+
bar('authors', 'Top Authors', (data.authors || []).slice(0, 20));
|
|
2189
|
+
bar('venues', 'Top Venues', (data.venues || []).slice(0, 20));
|
|
2190
|
+
}}
|
|
2191
|
+
main();
|
|
2192
|
+
</script>
|
|
2193
|
+
"""
|
|
2194
|
+
return HTMLResponse(_page_shell("Stats", body, extra_scripts=scripts))
|
|
2195
|
+
|
|
2196
|
+
|
|
2197
|
+
def _normalize_bibtex_title(title: str) -> str:
|
|
2198
|
+
value = title.replace("{", "").replace("}", "")
|
|
2199
|
+
value = re.sub(r"[^a-z0-9]+", " ", value.lower())
|
|
2200
|
+
return re.sub(r"\\s+", " ", value).strip()
|
|
2201
|
+
|
|
2202
|
+
|
|
2203
|
+
def _title_similarity(a: str, b: str) -> float:
|
|
2204
|
+
import difflib
|
|
2205
|
+
|
|
2206
|
+
if not a or not b:
|
|
2207
|
+
return 0.0
|
|
2208
|
+
return difflib.SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
|
2209
|
+
|
|
2210
|
+
|
|
2211
|
+
def enrich_with_bibtex(papers: list[dict[str, Any]], bibtex_path: Path) -> None:
|
|
2212
|
+
if not PYBTEX_AVAILABLE:
|
|
2213
|
+
raise RuntimeError("pybtex is required for --bibtex support")
|
|
2214
|
+
|
|
2215
|
+
bib_data = parse_file(str(bibtex_path))
|
|
2216
|
+
entries: list[dict[str, Any]] = []
|
|
2217
|
+
by_prefix: dict[str, list[int]] = {}
|
|
2218
|
+
for key, entry in bib_data.entries.items():
|
|
2219
|
+
fields = dict(entry.fields)
|
|
2220
|
+
title = str(fields.get("title") or "").strip()
|
|
2221
|
+
title_norm = _normalize_bibtex_title(title)
|
|
2222
|
+
if not title_norm:
|
|
2223
|
+
continue
|
|
2224
|
+
record = {
|
|
2225
|
+
"key": key,
|
|
2226
|
+
"type": entry.type,
|
|
2227
|
+
"fields": fields,
|
|
2228
|
+
"persons": {role: [str(p) for p in persons] for role, persons in entry.persons.items()},
|
|
2229
|
+
"_title_norm": title_norm,
|
|
2230
|
+
}
|
|
2231
|
+
idx = len(entries)
|
|
2232
|
+
entries.append(record)
|
|
2233
|
+
prefix = title_norm[:16]
|
|
2234
|
+
by_prefix.setdefault(prefix, []).append(idx)
|
|
2235
|
+
|
|
2236
|
+
for paper in papers:
|
|
2237
|
+
if isinstance(paper.get("bibtex"), dict):
|
|
2238
|
+
continue
|
|
2239
|
+
title = str(paper.get("paper_title") or "").strip()
|
|
2240
|
+
if not title:
|
|
2241
|
+
continue
|
|
2242
|
+
norm = _normalize_bibtex_title(title)
|
|
2243
|
+
if not norm:
|
|
2244
|
+
continue
|
|
2245
|
+
|
|
2246
|
+
candidates = []
|
|
2247
|
+
prefix = norm[:16]
|
|
2248
|
+
for cand_idx in by_prefix.get(prefix, []):
|
|
2249
|
+
candidates.append(entries[cand_idx])
|
|
2250
|
+
if not candidates:
|
|
2251
|
+
candidates = entries
|
|
2252
|
+
|
|
2253
|
+
best = None
|
|
2254
|
+
best_score = 0.0
|
|
2255
|
+
for entry in candidates:
|
|
2256
|
+
score = _title_similarity(norm, entry["_title_norm"])
|
|
2257
|
+
if score > best_score:
|
|
2258
|
+
best_score = score
|
|
2259
|
+
best = entry
|
|
2260
|
+
|
|
2261
|
+
if best is not None and best_score >= 0.9:
|
|
2262
|
+
paper["bibtex"] = {k: v for k, v in best.items() if not k.startswith("_")}
|
|
2263
|
+
|
|
2264
|
+
|
|
2265
|
+
def create_app(
|
|
2266
|
+
*,
|
|
2267
|
+
db_paths: list[Path],
|
|
2268
|
+
fallback_language: str = "en",
|
|
2269
|
+
bibtex_path: Path | None = None,
|
|
2270
|
+
md_roots: list[Path] | None = None,
|
|
2271
|
+
pdf_roots: list[Path] | None = None,
|
|
2272
|
+
cache_dir: Path | None = None,
|
|
2273
|
+
use_cache: bool = True,
|
|
2274
|
+
) -> Starlette:
|
|
2275
|
+
papers = _load_or_merge_papers(db_paths, bibtex_path, cache_dir, use_cache)
|
|
2276
|
+
|
|
2277
|
+
md_roots = md_roots or []
|
|
2278
|
+
pdf_roots = pdf_roots or []
|
|
2279
|
+
index = build_index(papers, md_roots=md_roots, pdf_roots=pdf_roots)
|
|
2280
|
+
md = _md_renderer()
|
|
2281
|
+
routes = [
|
|
2282
|
+
Route("/", _index_page, methods=["GET"]),
|
|
2283
|
+
Route("/stats", _stats_page, methods=["GET"]),
|
|
2284
|
+
Route("/paper/{source_hash:str}", _paper_detail, methods=["GET"]),
|
|
2285
|
+
Route("/api/papers", _api_papers, methods=["GET"]),
|
|
2286
|
+
Route("/api/stats", _api_stats, methods=["GET"]),
|
|
2287
|
+
Route("/api/pdf/{source_hash:str}", _api_pdf, methods=["GET"]),
|
|
2288
|
+
]
|
|
2289
|
+
if _PDFJS_STATIC_DIR.exists():
|
|
2290
|
+
routes.append(
|
|
2291
|
+
Mount(
|
|
2292
|
+
"/pdfjs",
|
|
2293
|
+
app=StaticFiles(directory=str(_PDFJS_STATIC_DIR), html=True),
|
|
2294
|
+
name="pdfjs",
|
|
2295
|
+
)
|
|
2296
|
+
)
|
|
2297
|
+
elif pdf_roots:
|
|
2298
|
+
logger.warning(
|
|
2299
|
+
"PDF.js viewer assets not found at %s; PDF Viewer mode will be unavailable.",
|
|
2300
|
+
_PDFJS_STATIC_DIR,
|
|
2301
|
+
)
|
|
2302
|
+
app = Starlette(routes=routes)
|
|
2303
|
+
app.state.index = index
|
|
2304
|
+
app.state.md = md
|
|
2305
|
+
app.state.fallback_language = fallback_language
|
|
2306
|
+
app.state.pdf_roots = pdf_roots
|
|
2307
|
+
return app
|