deepresearch-flow 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. deepresearch_flow/__init__.py +5 -0
  2. deepresearch_flow/cli.py +23 -0
  3. deepresearch_flow/paper/__init__.py +1 -0
  4. deepresearch_flow/paper/cli.py +286 -0
  5. deepresearch_flow/paper/config.py +249 -0
  6. deepresearch_flow/paper/db.py +768 -0
  7. deepresearch_flow/paper/extract.py +870 -0
  8. deepresearch_flow/paper/llm.py +115 -0
  9. deepresearch_flow/paper/prompt_templates/__init__.py +1 -0
  10. deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +6 -0
  11. deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +82 -0
  12. deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +6 -0
  13. deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +28 -0
  14. deepresearch_flow/paper/prompt_templates/simple_system.j2 +6 -0
  15. deepresearch_flow/paper/prompt_templates/simple_user.j2 +24 -0
  16. deepresearch_flow/paper/prompt_templates/three_pass_system.j2 +6 -0
  17. deepresearch_flow/paper/prompt_templates/three_pass_user.j2 +44 -0
  18. deepresearch_flow/paper/prompts.py +11 -0
  19. deepresearch_flow/paper/providers/__init__.py +1 -0
  20. deepresearch_flow/paper/providers/azure_openai.py +66 -0
  21. deepresearch_flow/paper/providers/base.py +19 -0
  22. deepresearch_flow/paper/providers/claude.py +71 -0
  23. deepresearch_flow/paper/providers/dashscope.py +58 -0
  24. deepresearch_flow/paper/providers/gemini.py +116 -0
  25. deepresearch_flow/paper/providers/ollama.py +46 -0
  26. deepresearch_flow/paper/providers/openai_compatible.py +60 -0
  27. deepresearch_flow/paper/render.py +64 -0
  28. deepresearch_flow/paper/schema.py +58 -0
  29. deepresearch_flow/paper/schemas/__init__.py +1 -0
  30. deepresearch_flow/paper/schemas/deep_read_schema.json +46 -0
  31. deepresearch_flow/paper/schemas/default_paper_schema.json +47 -0
  32. deepresearch_flow/paper/schemas/eight_questions_schema.json +34 -0
  33. deepresearch_flow/paper/schemas/three_pass_schema.json +24 -0
  34. deepresearch_flow/paper/template_registry.py +189 -0
  35. deepresearch_flow/paper/templates/__init__.py +1 -0
  36. deepresearch_flow/paper/templates/deep_read.md.j2 +79 -0
  37. deepresearch_flow/paper/templates/default_paper.md.j2 +32 -0
  38. deepresearch_flow/paper/templates/eight_questions.md.j2 +49 -0
  39. deepresearch_flow/paper/templates/three_pass.md.j2 +28 -0
  40. deepresearch_flow/paper/utils.py +136 -0
  41. deepresearch_flow/paper/web/__init__.py +2 -0
  42. deepresearch_flow/paper/web/app.py +2307 -0
  43. deepresearch_flow/paper/web/pdfjs/LICENSE +177 -0
  44. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-H.bcmap +0 -0
  45. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-V.bcmap +0 -0
  46. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-H.bcmap +0 -0
  47. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-H.bcmap +0 -0
  48. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-V.bcmap +0 -0
  49. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-V.bcmap +0 -0
  50. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-H.bcmap +0 -0
  51. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-V.bcmap +0 -0
  52. deepresearch_flow/paper/web/pdfjs/web/cmaps/83pv-RKSJ-H.bcmap +0 -0
  53. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-H.bcmap +0 -0
  54. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-V.bcmap +0 -0
  55. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-H.bcmap +0 -0
  56. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-V.bcmap +0 -0
  57. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-H.bcmap +0 -0
  58. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-V.bcmap +0 -0
  59. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-H.bcmap +0 -0
  60. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-H.bcmap +0 -0
  61. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-V.bcmap +0 -0
  62. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-V.bcmap +0 -0
  63. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-0.bcmap +0 -0
  64. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-1.bcmap +0 -0
  65. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-2.bcmap +0 -0
  66. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-3.bcmap +0 -0
  67. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-4.bcmap +0 -0
  68. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-5.bcmap +0 -0
  69. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-6.bcmap +0 -0
  70. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  71. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-0.bcmap +0 -0
  72. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-1.bcmap +0 -0
  73. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-2.bcmap +0 -0
  74. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-3.bcmap +0 -0
  75. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-4.bcmap +0 -0
  76. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-5.bcmap +0 -0
  77. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  78. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-0.bcmap +0 -0
  79. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-1.bcmap +0 -0
  80. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-2.bcmap +0 -0
  81. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-3.bcmap +0 -0
  82. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-4.bcmap +0 -0
  83. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-5.bcmap +0 -0
  84. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-6.bcmap +0 -0
  85. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  86. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-0.bcmap +0 -0
  87. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-1.bcmap +0 -0
  88. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-2.bcmap +0 -0
  89. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  90. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-H.bcmap +0 -0
  91. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-V.bcmap +0 -0
  92. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-H.bcmap +0 -0
  93. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-V.bcmap +0 -0
  94. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-H.bcmap +0 -0
  95. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-V.bcmap +0 -0
  96. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-H.bcmap +0 -0
  97. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-V.bcmap +0 -0
  98. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-H.bcmap +0 -0
  99. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-V.bcmap +3 -0
  100. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-H.bcmap +0 -0
  101. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-V.bcmap +0 -0
  102. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-H.bcmap +0 -0
  103. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-V.bcmap +0 -0
  104. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-H.bcmap +3 -0
  105. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-V.bcmap +0 -0
  106. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-H.bcmap +0 -0
  107. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-V.bcmap +0 -0
  108. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-H.bcmap +0 -0
  109. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-H.bcmap +0 -0
  110. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-V.bcmap +0 -0
  111. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-V.bcmap +0 -0
  112. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-H.bcmap +0 -0
  113. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-V.bcmap +0 -0
  114. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-H.bcmap +4 -0
  115. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-V.bcmap +0 -0
  116. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-H.bcmap +0 -0
  117. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-V.bcmap +0 -0
  118. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-H.bcmap +0 -0
  119. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-V.bcmap +0 -0
  120. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-H.bcmap +0 -0
  121. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-V.bcmap +0 -0
  122. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-H.bcmap +0 -0
  123. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-V.bcmap +0 -0
  124. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-H.bcmap +0 -0
  125. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-V.bcmap +0 -0
  126. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-H.bcmap +0 -0
  127. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-V.bcmap +0 -0
  128. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-H.bcmap +0 -0
  129. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-V.bcmap +0 -0
  130. deepresearch_flow/paper/web/pdfjs/web/cmaps/H.bcmap +0 -0
  131. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-H.bcmap +0 -0
  132. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-V.bcmap +0 -0
  133. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-H.bcmap +0 -0
  134. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-V.bcmap +0 -0
  135. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-H.bcmap +0 -0
  136. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-V.bcmap +0 -0
  137. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-H.bcmap +0 -0
  138. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-V.bcmap +0 -0
  139. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-H.bcmap +0 -0
  140. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-V.bcmap +0 -0
  141. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-H.bcmap +0 -0
  142. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-V.bcmap +0 -0
  143. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hankaku.bcmap +0 -0
  144. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hiragana.bcmap +0 -0
  145. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-H.bcmap +0 -0
  146. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-V.bcmap +0 -0
  147. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-H.bcmap +0 -0
  148. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-H.bcmap +0 -0
  149. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-V.bcmap +0 -0
  150. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-V.bcmap +0 -0
  151. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-H.bcmap +0 -0
  152. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  153. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  154. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-V.bcmap +0 -0
  155. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-H.bcmap +0 -0
  156. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-V.bcmap +0 -0
  157. deepresearch_flow/paper/web/pdfjs/web/cmaps/Katakana.bcmap +0 -0
  158. deepresearch_flow/paper/web/pdfjs/web/cmaps/LICENSE +36 -0
  159. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-H.bcmap +0 -0
  160. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-V.bcmap +0 -0
  161. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-H.bcmap +0 -0
  162. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-V.bcmap +0 -0
  163. deepresearch_flow/paper/web/pdfjs/web/cmaps/Roman.bcmap +0 -0
  164. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  165. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  166. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  167. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  168. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  169. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  170. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  171. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  172. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-H.bcmap +0 -0
  173. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-V.bcmap +0 -0
  174. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-H.bcmap +0 -0
  175. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-V.bcmap +0 -0
  176. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-H.bcmap +0 -0
  177. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-V.bcmap +0 -0
  178. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-H.bcmap +0 -0
  179. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-V.bcmap +0 -0
  180. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  181. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  182. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  183. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  184. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  185. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  186. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  187. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  188. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  189. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  190. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  191. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  192. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  193. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  194. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  195. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  196. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  197. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  198. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  199. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  200. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  201. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  202. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  203. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-H.bcmap +0 -0
  204. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-V.bcmap +0 -0
  205. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-H.bcmap +0 -0
  206. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-V.bcmap +0 -0
  207. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-H.bcmap +0 -0
  208. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-V.bcmap +0 -0
  209. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-H.bcmap +0 -0
  210. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-V.bcmap +0 -0
  211. deepresearch_flow/paper/web/pdfjs/web/cmaps/V.bcmap +0 -0
  212. deepresearch_flow/paper/web/pdfjs/web/cmaps/WP-Symbol.bcmap +0 -0
  213. deepresearch_flow/paper/web/pdfjs/web/compressed.tracemonkey-pldi-09.pdf +0 -0
  214. deepresearch_flow/paper/web/pdfjs/web/debugger.css +111 -0
  215. deepresearch_flow/paper/web/pdfjs/web/debugger.js +611 -0
  216. deepresearch_flow/paper/web/pdfjs/web/images/altText_add.svg +3 -0
  217. deepresearch_flow/paper/web/pdfjs/web/images/altText_done.svg +3 -0
  218. deepresearch_flow/paper/web/pdfjs/web/images/annotation-check.svg +11 -0
  219. deepresearch_flow/paper/web/pdfjs/web/images/annotation-comment.svg +16 -0
  220. deepresearch_flow/paper/web/pdfjs/web/images/annotation-help.svg +26 -0
  221. deepresearch_flow/paper/web/pdfjs/web/images/annotation-insert.svg +10 -0
  222. deepresearch_flow/paper/web/pdfjs/web/images/annotation-key.svg +11 -0
  223. deepresearch_flow/paper/web/pdfjs/web/images/annotation-newparagraph.svg +11 -0
  224. deepresearch_flow/paper/web/pdfjs/web/images/annotation-noicon.svg +7 -0
  225. deepresearch_flow/paper/web/pdfjs/web/images/annotation-note.svg +42 -0
  226. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paperclip.svg +6 -0
  227. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paragraph.svg +16 -0
  228. deepresearch_flow/paper/web/pdfjs/web/images/annotation-pushpin.svg +7 -0
  229. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorFreeText.svg +3 -0
  230. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorInk.svg +4 -0
  231. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-next.svg +3 -0
  232. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-previous.svg +3 -0
  233. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-download.svg +3 -0
  234. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-openinapp.svg +11 -0
  235. deepresearch_flow/paper/web/pdfjs/web/images/loading-dark.svg +24 -0
  236. deepresearch_flow/paper/web/pdfjs/web/images/loading-icon.gif +0 -0
  237. deepresearch_flow/paper/web/pdfjs/web/images/loading.svg +1 -0
  238. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-documentProperties.svg +3 -0
  239. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-firstPage.svg +3 -0
  240. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-handTool.svg +3 -0
  241. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-lastPage.svg +3 -0
  242. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCcw.svg +3 -0
  243. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCw.svg +3 -0
  244. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollHorizontal.svg +3 -0
  245. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollPage.svg +3 -0
  246. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollVertical.svg +3 -0
  247. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollWrapped.svg +3 -0
  248. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-selectTool.svg +3 -0
  249. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadEven.svg +3 -0
  250. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadNone.svg +3 -0
  251. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadOdd.svg +3 -0
  252. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-bookmark.svg +3 -0
  253. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-currentOutlineItem.svg +3 -0
  254. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-download.svg +4 -0
  255. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorFreeText.svg +3 -0
  256. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorInk.svg +4 -0
  257. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorStamp.svg +8 -0
  258. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-menuArrow.svg +3 -0
  259. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-openFile.svg +3 -0
  260. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageDown.svg +3 -0
  261. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageUp.svg +3 -0
  262. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-presentationMode.svg +3 -0
  263. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-print.svg +3 -0
  264. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-search.svg +3 -0
  265. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-secondaryToolbarToggle.svg +3 -0
  266. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-sidebarToggle.svg +3 -0
  267. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewAttachments.svg +3 -0
  268. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewLayers.svg +3 -0
  269. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewOutline.svg +3 -0
  270. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewThumbnail.svg +3 -0
  271. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomIn.svg +3 -0
  272. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomOut.svg +3 -0
  273. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-collapsed.svg +1 -0
  274. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-expanded.svg +1 -0
  275. deepresearch_flow/paper/web/pdfjs/web/locale/ach/viewer.properties +203 -0
  276. deepresearch_flow/paper/web/pdfjs/web/locale/af/viewer.properties +156 -0
  277. deepresearch_flow/paper/web/pdfjs/web/locale/an/viewer.properties +222 -0
  278. deepresearch_flow/paper/web/pdfjs/web/locale/ar/viewer.properties +224 -0
  279. deepresearch_flow/paper/web/pdfjs/web/locale/ast/viewer.properties +185 -0
  280. deepresearch_flow/paper/web/pdfjs/web/locale/az/viewer.properties +222 -0
  281. deepresearch_flow/paper/web/pdfjs/web/locale/be/viewer.properties +270 -0
  282. deepresearch_flow/paper/web/pdfjs/web/locale/bg/viewer.properties +214 -0
  283. deepresearch_flow/paper/web/pdfjs/web/locale/bn/viewer.properties +218 -0
  284. deepresearch_flow/paper/web/pdfjs/web/locale/bo/viewer.properties +217 -0
  285. deepresearch_flow/paper/web/pdfjs/web/locale/br/viewer.properties +224 -0
  286. deepresearch_flow/paper/web/pdfjs/web/locale/brx/viewer.properties +184 -0
  287. deepresearch_flow/paper/web/pdfjs/web/locale/bs/viewer.properties +173 -0
  288. deepresearch_flow/paper/web/pdfjs/web/locale/ca/viewer.properties +256 -0
  289. deepresearch_flow/paper/web/pdfjs/web/locale/cak/viewer.properties +253 -0
  290. deepresearch_flow/paper/web/pdfjs/web/locale/ckb/viewer.properties +213 -0
  291. deepresearch_flow/paper/web/pdfjs/web/locale/cs/viewer.properties +284 -0
  292. deepresearch_flow/paper/web/pdfjs/web/locale/cy/viewer.properties +270 -0
  293. deepresearch_flow/paper/web/pdfjs/web/locale/da/viewer.properties +270 -0
  294. deepresearch_flow/paper/web/pdfjs/web/locale/de/viewer.properties +270 -0
  295. deepresearch_flow/paper/web/pdfjs/web/locale/dsb/viewer.properties +284 -0
  296. deepresearch_flow/paper/web/pdfjs/web/locale/el/viewer.properties +270 -0
  297. deepresearch_flow/paper/web/pdfjs/web/locale/en-CA/viewer.properties +270 -0
  298. deepresearch_flow/paper/web/pdfjs/web/locale/en-GB/viewer.properties +284 -0
  299. deepresearch_flow/paper/web/pdfjs/web/locale/en-US/viewer.properties +282 -0
  300. deepresearch_flow/paper/web/pdfjs/web/locale/eo/viewer.properties +270 -0
  301. deepresearch_flow/paper/web/pdfjs/web/locale/es-AR/viewer.properties +284 -0
  302. deepresearch_flow/paper/web/pdfjs/web/locale/es-CL/viewer.properties +284 -0
  303. deepresearch_flow/paper/web/pdfjs/web/locale/es-ES/viewer.properties +270 -0
  304. deepresearch_flow/paper/web/pdfjs/web/locale/es-MX/viewer.properties +257 -0
  305. deepresearch_flow/paper/web/pdfjs/web/locale/et/viewer.properties +229 -0
  306. deepresearch_flow/paper/web/pdfjs/web/locale/eu/viewer.properties +284 -0
  307. deepresearch_flow/paper/web/pdfjs/web/locale/fa/viewer.properties +221 -0
  308. deepresearch_flow/paper/web/pdfjs/web/locale/ff/viewer.properties +214 -0
  309. deepresearch_flow/paper/web/pdfjs/web/locale/fi/viewer.properties +270 -0
  310. deepresearch_flow/paper/web/pdfjs/web/locale/fr/viewer.properties +270 -0
  311. deepresearch_flow/paper/web/pdfjs/web/locale/fur/viewer.properties +270 -0
  312. deepresearch_flow/paper/web/pdfjs/web/locale/fy-NL/viewer.properties +270 -0
  313. deepresearch_flow/paper/web/pdfjs/web/locale/ga-IE/viewer.properties +181 -0
  314. deepresearch_flow/paper/web/pdfjs/web/locale/gd/viewer.properties +257 -0
  315. deepresearch_flow/paper/web/pdfjs/web/locale/gl/viewer.properties +267 -0
  316. deepresearch_flow/paper/web/pdfjs/web/locale/gn/viewer.properties +278 -0
  317. deepresearch_flow/paper/web/pdfjs/web/locale/gu-IN/viewer.properties +214 -0
  318. deepresearch_flow/paper/web/pdfjs/web/locale/he/viewer.properties +283 -0
  319. deepresearch_flow/paper/web/pdfjs/web/locale/hi-IN/viewer.properties +227 -0
  320. deepresearch_flow/paper/web/pdfjs/web/locale/hr/viewer.properties +243 -0
  321. deepresearch_flow/paper/web/pdfjs/web/locale/hsb/viewer.properties +284 -0
  322. deepresearch_flow/paper/web/pdfjs/web/locale/hu/viewer.properties +284 -0
  323. deepresearch_flow/paper/web/pdfjs/web/locale/hy-AM/viewer.properties +232 -0
  324. deepresearch_flow/paper/web/pdfjs/web/locale/hye/viewer.properties +229 -0
  325. deepresearch_flow/paper/web/pdfjs/web/locale/ia/viewer.properties +284 -0
  326. deepresearch_flow/paper/web/pdfjs/web/locale/id/viewer.properties +253 -0
  327. deepresearch_flow/paper/web/pdfjs/web/locale/is/viewer.properties +284 -0
  328. deepresearch_flow/paper/web/pdfjs/web/locale/it/viewer.properties +284 -0
  329. deepresearch_flow/paper/web/pdfjs/web/locale/ja/viewer.properties +270 -0
  330. deepresearch_flow/paper/web/pdfjs/web/locale/ka/viewer.properties +284 -0
  331. deepresearch_flow/paper/web/pdfjs/web/locale/kab/viewer.properties +264 -0
  332. deepresearch_flow/paper/web/pdfjs/web/locale/kk/viewer.properties +284 -0
  333. deepresearch_flow/paper/web/pdfjs/web/locale/km/viewer.properties +189 -0
  334. deepresearch_flow/paper/web/pdfjs/web/locale/kn/viewer.properties +166 -0
  335. deepresearch_flow/paper/web/pdfjs/web/locale/ko/viewer.properties +284 -0
  336. deepresearch_flow/paper/web/pdfjs/web/locale/lij/viewer.properties +214 -0
  337. deepresearch_flow/paper/web/pdfjs/web/locale/lo/viewer.properties +257 -0
  338. deepresearch_flow/paper/web/pdfjs/web/locale/locale.properties +333 -0
  339. deepresearch_flow/paper/web/pdfjs/web/locale/lt/viewer.properties +229 -0
  340. deepresearch_flow/paper/web/pdfjs/web/locale/ltg/viewer.properties +192 -0
  341. deepresearch_flow/paper/web/pdfjs/web/locale/lv/viewer.properties +214 -0
  342. deepresearch_flow/paper/web/pdfjs/web/locale/meh/viewer.properties +106 -0
  343. deepresearch_flow/paper/web/pdfjs/web/locale/mk/viewer.properties +211 -0
  344. deepresearch_flow/paper/web/pdfjs/web/locale/mr/viewer.properties +210 -0
  345. deepresearch_flow/paper/web/pdfjs/web/locale/ms/viewer.properties +214 -0
  346. deepresearch_flow/paper/web/pdfjs/web/locale/my/viewer.properties +170 -0
  347. deepresearch_flow/paper/web/pdfjs/web/locale/nb-NO/viewer.properties +284 -0
  348. deepresearch_flow/paper/web/pdfjs/web/locale/ne-NP/viewer.properties +197 -0
  349. deepresearch_flow/paper/web/pdfjs/web/locale/nl/viewer.properties +274 -0
  350. deepresearch_flow/paper/web/pdfjs/web/locale/nn-NO/viewer.properties +270 -0
  351. deepresearch_flow/paper/web/pdfjs/web/locale/oc/viewer.properties +278 -0
  352. deepresearch_flow/paper/web/pdfjs/web/locale/pa-IN/viewer.properties +270 -0
  353. deepresearch_flow/paper/web/pdfjs/web/locale/pl/viewer.properties +270 -0
  354. deepresearch_flow/paper/web/pdfjs/web/locale/pt-BR/viewer.properties +270 -0
  355. deepresearch_flow/paper/web/pdfjs/web/locale/pt-PT/viewer.properties +270 -0
  356. deepresearch_flow/paper/web/pdfjs/web/locale/rm/viewer.properties +270 -0
  357. deepresearch_flow/paper/web/pdfjs/web/locale/ro/viewer.properties +220 -0
  358. deepresearch_flow/paper/web/pdfjs/web/locale/ru/viewer.properties +270 -0
  359. deepresearch_flow/paper/web/pdfjs/web/locale/sat/viewer.properties +270 -0
  360. deepresearch_flow/paper/web/pdfjs/web/locale/sc/viewer.properties +258 -0
  361. deepresearch_flow/paper/web/pdfjs/web/locale/scn/viewer.properties +101 -0
  362. deepresearch_flow/paper/web/pdfjs/web/locale/sco/viewer.properties +226 -0
  363. deepresearch_flow/paper/web/pdfjs/web/locale/si/viewer.properties +228 -0
  364. deepresearch_flow/paper/web/pdfjs/web/locale/sk/viewer.properties +270 -0
  365. deepresearch_flow/paper/web/pdfjs/web/locale/skr/viewer.properties +264 -0
  366. deepresearch_flow/paper/web/pdfjs/web/locale/sl/viewer.properties +284 -0
  367. deepresearch_flow/paper/web/pdfjs/web/locale/son/viewer.properties +152 -0
  368. deepresearch_flow/paper/web/pdfjs/web/locale/sq/viewer.properties +247 -0
  369. deepresearch_flow/paper/web/pdfjs/web/locale/sr/viewer.properties +259 -0
  370. deepresearch_flow/paper/web/pdfjs/web/locale/sv-SE/viewer.properties +284 -0
  371. deepresearch_flow/paper/web/pdfjs/web/locale/szl/viewer.properties +224 -0
  372. deepresearch_flow/paper/web/pdfjs/web/locale/ta/viewer.properties +173 -0
  373. deepresearch_flow/paper/web/pdfjs/web/locale/te/viewer.properties +216 -0
  374. deepresearch_flow/paper/web/pdfjs/web/locale/tg/viewer.properties +281 -0
  375. deepresearch_flow/paper/web/pdfjs/web/locale/th/viewer.properties +270 -0
  376. deepresearch_flow/paper/web/pdfjs/web/locale/tl/viewer.properties +222 -0
  377. deepresearch_flow/paper/web/pdfjs/web/locale/tr/viewer.properties +283 -0
  378. deepresearch_flow/paper/web/pdfjs/web/locale/trs/viewer.properties +184 -0
  379. deepresearch_flow/paper/web/pdfjs/web/locale/uk/viewer.properties +284 -0
  380. deepresearch_flow/paper/web/pdfjs/web/locale/ur/viewer.properties +218 -0
  381. deepresearch_flow/paper/web/pdfjs/web/locale/uz/viewer.properties +142 -0
  382. deepresearch_flow/paper/web/pdfjs/web/locale/vi/viewer.properties +270 -0
  383. deepresearch_flow/paper/web/pdfjs/web/locale/wo/viewer.properties +104 -0
  384. deepresearch_flow/paper/web/pdfjs/web/locale/xh/viewer.properties +156 -0
  385. deepresearch_flow/paper/web/pdfjs/web/locale/zh-CN/viewer.properties +284 -0
  386. deepresearch_flow/paper/web/pdfjs/web/locale/zh-TW/viewer.properties +281 -0
  387. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitDingbats.pfb +0 -0
  388. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixed.pfb +0 -0
  389. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBold.pfb +0 -0
  390. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  391. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedItalic.pfb +0 -0
  392. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerif.pfb +0 -0
  393. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBold.pfb +0 -0
  394. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  395. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifItalic.pfb +0 -0
  396. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSymbol.pfb +0 -0
  397. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_FOXIT +27 -0
  398. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_LIBERATION +102 -0
  399. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Bold.ttf +0 -0
  400. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  401. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Italic.ttf +0 -0
  402. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Regular.ttf +0 -0
  403. deepresearch_flow/paper/web/pdfjs/web/viewer.css +3528 -0
  404. deepresearch_flow/paper/web/pdfjs/web/viewer.html +486 -0
  405. deepresearch_flow/paper/web/pdfjs/web/viewer.js +14099 -0
  406. deepresearch_flow/paper/web/pdfjs/web/viewer.js.map +1 -0
  407. deepresearch_flow/paper/web/query.py +90 -0
  408. deepresearch_flow/recognize/__init__.py +1 -0
  409. deepresearch_flow/recognize/cli.py +469 -0
  410. deepresearch_flow/recognize/markdown.py +277 -0
  411. deepresearch_flow/recognize/organize.py +95 -0
  412. deepresearch_flow-0.1.1.dist-info/METADATA +416 -0
  413. deepresearch_flow-0.1.1.dist-info/RECORD +417 -0
  414. deepresearch_flow-0.1.1.dist-info/WHEEL +5 -0
  415. deepresearch_flow-0.1.1.dist-info/entry_points.txt +2 -0
  416. deepresearch_flow-0.1.1.dist-info/licenses/LICENSE +21 -0
  417. deepresearch_flow-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,768 @@
1
+ """Database management commands for paper extraction outputs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any, Iterable
10
+ import difflib
11
+
12
+ import click
13
+ import httpx
14
+ from rich.console import Console
15
+ from rich.panel import Panel
16
+ from rich.table import Table
17
+
18
+ from deepresearch_flow.paper.config import load_config, resolve_api_keys
19
+ from deepresearch_flow.paper.extract import parse_model_ref
20
+ from deepresearch_flow.paper.llm import backoff_delay, call_provider
21
+ from deepresearch_flow.paper.providers.base import ProviderError
22
+ from deepresearch_flow.paper.template_registry import list_template_names
23
+ from deepresearch_flow.paper.render import resolve_render_template, render_papers
24
+
25
+ try:
26
+ from pybtex.database import parse_file
27
+ PYBTEX_AVAILABLE = True
28
+ except ImportError:
29
+ PYBTEX_AVAILABLE = False
30
+
31
+
32
+ def load_json(path: Path) -> list[dict[str, Any]]:
33
+ return json.loads(path.read_text(encoding="utf-8"))
34
+
35
+
36
+ def write_json(path: Path, data: Any) -> None:
37
+ path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
38
+
39
+
40
+ def normalize_authors(value: Any) -> list[str]:
41
+ if value is None:
42
+ return []
43
+ if isinstance(value, list):
44
+ return [str(item).strip() for item in value if str(item).strip()]
45
+ if isinstance(value, str):
46
+ return [part.strip() for part in value.split(",") if part.strip()]
47
+ return [str(value)]
48
+
49
+
50
+ def parse_publication_year(paper: dict[str, Any]) -> int | None:
51
+ if "bibtex" in paper and isinstance(paper["bibtex"], dict):
52
+ year_str = paper["bibtex"].get("fields", {}).get("year")
53
+ if year_str and str(year_str).isdigit():
54
+ return int(year_str)
55
+ date_str = paper.get("publication_date") or paper.get("paper_publication_date")
56
+ if not date_str:
57
+ return None
58
+ match = re.search(r"(19|20)\d{2}", str(date_str))
59
+ return int(match.group(0)) if match else None
60
+
61
+
62
+ MONTH_NAMES = [f"{idx:02d}" for idx in range(1, 13)]
63
+ MONTH_LOOKUP = {
64
+ "january": "01",
65
+ "february": "02",
66
+ "march": "03",
67
+ "april": "04",
68
+ "may": "05",
69
+ "june": "06",
70
+ "july": "07",
71
+ "august": "08",
72
+ "september": "09",
73
+ "october": "10",
74
+ "november": "11",
75
+ "december": "12",
76
+ "jan": "01",
77
+ "feb": "02",
78
+ "mar": "03",
79
+ "apr": "04",
80
+ "jun": "06",
81
+ "jul": "07",
82
+ "aug": "08",
83
+ "sep": "09",
84
+ "sept": "09",
85
+ "oct": "10",
86
+ "nov": "11",
87
+ "dec": "12",
88
+ }
89
+
90
+
91
+ def normalize_month(value: str | int | None) -> str | None:
92
+ if value is None:
93
+ return None
94
+ if isinstance(value, int):
95
+ if 1 <= value <= 12:
96
+ return f"{value:02d}"
97
+ return None
98
+ raw = str(value).strip().lower()
99
+ if not raw:
100
+ return None
101
+ if raw.isdigit():
102
+ return normalize_month(int(raw))
103
+ if raw in MONTH_LOOKUP:
104
+ return MONTH_LOOKUP[raw]
105
+ return None
106
+
107
+
108
+ def parse_year_month(date_str: str | None) -> tuple[str | None, str | None]:
109
+ if not date_str:
110
+ return None, None
111
+ text = str(date_str).strip()
112
+ year_match = re.search(r"(19|20)\d{2}", text)
113
+ year = year_match.group(0) if year_match else None
114
+
115
+ numeric_match = re.search(r"(19|20)\d{2}[-/](\d{1,2})", text)
116
+ if numeric_match:
117
+ month = normalize_month(int(numeric_match.group(2)))
118
+ return year, month
119
+
120
+ month_word = re.search(
121
+ r"(jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|"
122
+ r"january|february|march|april|june|july|august|september|october|november|december)",
123
+ text.lower(),
124
+ )
125
+ if month_word:
126
+ return year, normalize_month(month_word.group(0))
127
+
128
+ return year, None
129
+
130
+
131
+ def clean_journal_name(name: str | None) -> str:
132
+ if not name:
133
+ return "Unknown"
134
+ value = re.sub(r"\([^)]*\)", "", name)
135
+ value = re.sub(r"vol\.\s*\d+", "", value, flags=re.IGNORECASE)
136
+ value = re.sub(r"volume\s*\d+", "", value, flags=re.IGNORECASE)
137
+ value = re.sub(r"no\.\s*\d+", "", value, flags=re.IGNORECASE)
138
+ value = re.sub(r"number\s*\d+", "", value, flags=re.IGNORECASE)
139
+ value = re.sub(r"pp\.\s*\d+[-–]\d+", "", value, flags=re.IGNORECASE)
140
+ value = re.sub(r"pages\s*\d+[-–]\d+", "", value, flags=re.IGNORECASE)
141
+ value = re.sub(r"\b(19|20)\d{2}\b", "", value)
142
+ value = re.sub(r"[,:.;]+\s*$", "", value)
143
+ value = re.sub(r"\s+", " ", value).strip()
144
+ value = value.replace("{", "").replace("}", "")
145
+ return value if value else "Unknown"
146
+
147
+
148
+ def clean_conference_name(name: str | None) -> str:
149
+ if not name:
150
+ return "Unknown"
151
+ value = re.sub(r"\b(19|20)\d{2}\b", "", name)
152
+ value = re.sub(r"\b\d+(st|nd|rd|th)\b", "", value, flags=re.IGNORECASE)
153
+ value = re.sub(r"proceedings\s+of\s+the\s+", "", value, flags=re.IGNORECASE)
154
+ value = re.sub(r"proceedings\s+of\s+", "", value, flags=re.IGNORECASE)
155
+ value = re.sub(r"[,:.;]+\s*$", "", value)
156
+ value = re.sub(r"\s+", " ", value).strip()
157
+ value = value.replace("{", "").replace("}", "")
158
+ return value if value else "Unknown"
159
+
160
+
161
+ def classify_venue(name: str | None) -> str:
162
+ if not name:
163
+ return "unknown"
164
+ lowered = name.lower()
165
+ if any(keyword in lowered for keyword in ["journal", "transactions", "letters", "review"]):
166
+ return "journal"
167
+ if any(
168
+ keyword in lowered
169
+ for keyword in ["conference", "proceedings", "symposium", "workshop", "meeting"]
170
+ ):
171
+ return "conference"
172
+ return "other"
173
+
174
+
175
+ def format_distribution(count: int, max_count: int, width: int = 20) -> str:
176
+ if max_count <= 0:
177
+ return ""
178
+ filled = max(1, int(round(width * (count / max_count)))) if count else 0
179
+ return "#" * filled
180
+
181
+
182
+ def similar_title(a: str, b: str, threshold: float = 0.9) -> bool:
183
+ if not a or not b:
184
+ return False
185
+ ratio = difflib.SequenceMatcher(None, a.lower(), b.lower()).ratio()
186
+ return ratio >= threshold
187
+
188
+
189
+ async def generate_tags_for_paper(
190
+ client: httpx.AsyncClient,
191
+ provider,
192
+ model: str,
193
+ api_key: str | None,
194
+ paper: dict[str, Any],
195
+ max_retries: int,
196
+ backoff_base: float,
197
+ backoff_max: float,
198
+ ) -> list[str]:
199
+ system_prompt = (
200
+ "You are a scientific paper tagging assistant. "
201
+ "Return ONLY a JSON array of up to 5 tags. "
202
+ "Each tag should be 1-3 words, lowercase, and use underscores."
203
+ )
204
+ payload = {
205
+ "title": paper.get("paper_title"),
206
+ "authors": normalize_authors(paper.get("paper_authors")),
207
+ "abstract": paper.get("abstract") or paper.get("summary") or "",
208
+ "keywords": paper.get("keywords") or [],
209
+ }
210
+ messages = [
211
+ {"role": "system", "content": system_prompt},
212
+ {"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
213
+ ]
214
+
215
+ attempt = 0
216
+ while attempt < max_retries:
217
+ attempt += 1
218
+ try:
219
+ response_text = await call_provider(
220
+ provider,
221
+ model,
222
+ messages,
223
+ schema={},
224
+ api_key=api_key,
225
+ timeout=60.0,
226
+ structured_mode="none",
227
+ client=client,
228
+ )
229
+ tags = parse_tag_list(response_text)
230
+ if isinstance(tags, list):
231
+ return [str(tag) for tag in tags][:5]
232
+ raise ProviderError("Tag response is not a list", error_type="validation_error")
233
+ except ProviderError as exc:
234
+ if attempt < max_retries:
235
+ await asyncio.sleep(backoff_delay(backoff_base, attempt, backoff_max))
236
+ continue
237
+ raise
238
+ except Exception as exc:
239
+ if attempt < max_retries:
240
+ await asyncio.sleep(backoff_delay(backoff_base, attempt, backoff_max))
241
+ continue
242
+ raise ProviderError(str(exc), error_type="parse_error") from exc
243
+
244
+ raise ProviderError("Max retries exceeded")
245
+
246
+
247
+ def parse_tag_list(text: str) -> list[str]:
248
+ text = text.strip()
249
+ if text.startswith("```"):
250
+ text = text.strip("`")
251
+ try:
252
+ parsed = json.loads(text)
253
+ except json.JSONDecodeError:
254
+ match = re.search(r"\\[[\\s\\S]*\\]", text)
255
+ if not match:
256
+ raise ProviderError("No JSON array found", error_type="parse_error")
257
+ parsed = json.loads(match.group(0))
258
+ if not isinstance(parsed, list):
259
+ raise ProviderError("Tag response is not a list", error_type="validation_error")
260
+ return [str(item) for item in parsed]
261
+
262
+
263
+ def register_db_commands(db_group: click.Group) -> None:
264
+ @db_group.command("append-bibtex")
265
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
266
+ @click.option("-b", "--bibtex", "bibtex_path", required=True, help="Input BibTeX file path")
267
+ @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
268
+ def append_bibtex(input_path: str, bibtex_path: str, output_path: str) -> None:
269
+ if not PYBTEX_AVAILABLE:
270
+ raise click.ClickException("pybtex is required for append-bibtex")
271
+
272
+ papers = load_json(Path(input_path))
273
+ bib_data = parse_file(bibtex_path)
274
+ bib_entries = []
275
+ for key, entry in bib_data.entries.items():
276
+ bib_entries.append(
277
+ {
278
+ "key": key,
279
+ "type": entry.type,
280
+ "fields": dict(entry.fields),
281
+ "persons": {role: [str(p) for p in persons] for role, persons in entry.persons.items()},
282
+ }
283
+ )
284
+
285
+ appended = []
286
+ for paper in papers:
287
+ title = paper.get("paper_title") or ""
288
+ matched = False
289
+ for bib in bib_entries:
290
+ bib_title = bib.get("fields", {}).get("title", "")
291
+ if similar_title(title, bib_title):
292
+ paper["bibtex"] = bib
293
+ matched = True
294
+ break
295
+ if matched:
296
+ appended.append(paper)
297
+ write_json(Path(output_path), appended)
298
+ click.echo(f"Appended bibtex for {len(appended)} papers")
299
+
300
+ @db_group.command("sort-papers")
301
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
302
+ @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
303
+ @click.option("--order", type=click.Choice(["asc", "desc"]), default="desc")
304
+ def sort_papers(input_path: str, output_path: str, order: str) -> None:
305
+ papers = load_json(Path(input_path))
306
+ reverse = order == "desc"
307
+ papers.sort(key=lambda p: parse_publication_year(p) or 0, reverse=reverse)
308
+ write_json(Path(output_path), papers)
309
+ click.echo(f"Sorted {len(papers)} papers")
310
+
311
+ @db_group.command("split-by-tag")
312
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
313
+ @click.option("-d", "--output-dir", "output_dir", required=True, help="Output directory")
314
+ def split_by_tag(input_path: str, output_dir: str) -> None:
315
+ papers = load_json(Path(input_path))
316
+ tag_map: dict[str, list[dict[str, Any]]] = {}
317
+ for paper in papers:
318
+ tags = paper.get("ai_generated_tags") or []
319
+ for tag in tags:
320
+ tag_map.setdefault(tag, []).append(paper)
321
+ out_dir = Path(output_dir)
322
+ out_dir.mkdir(parents=True, exist_ok=True)
323
+ for tag, items in tag_map.items():
324
+ write_json(out_dir / f"{tag}.json", items)
325
+ write_json(out_dir / "index.json", {"tags": sorted(tag_map.keys())})
326
+ click.echo(f"Split into {len(tag_map)} tag files")
327
+
328
+ @db_group.command("split-database")
329
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
330
+ @click.option("-d", "--output-dir", "output_dir", required=True, help="Output directory")
331
+ @click.option(
332
+ "-c",
333
+ "--criteria",
334
+ type=click.Choice(["year", "alphabetical", "count"]),
335
+ default="count",
336
+ )
337
+ @click.option("-n", "--count", "chunk_count", default=100, help="Chunk size for count criteria")
338
+ def split_database(input_path: str, output_dir: str, criteria: str, chunk_count: int) -> None:
339
+ papers = load_json(Path(input_path))
340
+ out_dir = Path(output_dir)
341
+ out_dir.mkdir(parents=True, exist_ok=True)
342
+ if criteria == "year":
343
+ by_year: dict[str, list[dict[str, Any]]] = {}
344
+ for paper in papers:
345
+ year = parse_publication_year(paper)
346
+ key = str(year) if year else "unknown"
347
+ by_year.setdefault(key, []).append(paper)
348
+ for year, items in by_year.items():
349
+ write_json(out_dir / f"year_{year}.json", items)
350
+ click.echo(f"Split into {len(by_year)} year files")
351
+ return
352
+
353
+ if criteria == "alphabetical":
354
+ by_letter: dict[str, list[dict[str, Any]]] = {}
355
+ for paper in papers:
356
+ title = (paper.get("paper_title") or "").strip()
357
+ letter = title[:1].upper() if title else "#"
358
+ by_letter.setdefault(letter, []).append(paper)
359
+ for letter, items in by_letter.items():
360
+ write_json(out_dir / f"{letter}.json", items)
361
+ click.echo(f"Split into {len(by_letter)} letter files")
362
+ return
363
+
364
+ chunks = [papers[i : i + chunk_count] for i in range(0, len(papers), chunk_count)]
365
+ for idx, chunk in enumerate(chunks, start=1):
366
+ write_json(out_dir / f"chunk_{idx}.json", chunk)
367
+ click.echo(f"Split into {len(chunks)} chunks")
368
+
369
+ @db_group.command("statistics")
370
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
371
+ @click.option("--top-n", "top_n", default=20, type=int, show_default=True, help="Top N rows to show")
372
+ def statistics(input_path: str, top_n: int) -> None:
373
+ papers = load_json(Path(input_path))
374
+ console = Console()
375
+ console.print(Panel(f"Statistics for {input_path}", title="Paper Statistics"))
376
+
377
+ year_counts: dict[str, int] = {}
378
+ month_counts: dict[str, int] = {}
379
+ author_counts: dict[str, int] = {}
380
+ tag_counts: dict[str, int] = {}
381
+ journal_counts: dict[str, int] = {}
382
+ conference_counts: dict[str, int] = {}
383
+ other_venue_counts: dict[str, int] = {}
384
+ for paper in papers:
385
+ bibtex_fields = {}
386
+ bibtex_type = None
387
+ if isinstance(paper.get("bibtex"), dict):
388
+ bibtex_fields = paper.get("bibtex", {}).get("fields", {}) or {}
389
+ bibtex_type = (paper.get("bibtex", {}).get("type") or "").lower()
390
+
391
+ year_value = None
392
+ if bibtex_fields.get("year"):
393
+ year_value = str(bibtex_fields.get("year"))
394
+ if not year_value:
395
+ year_value, _ = parse_year_month(str(paper.get("publication_date") or ""))
396
+ year_key = year_value or "Unknown"
397
+ year_counts[year_key] = year_counts.get(year_key, 0) + 1
398
+
399
+ month_value = normalize_month(bibtex_fields.get("month"))
400
+ if not month_value:
401
+ _, month_value = parse_year_month(str(paper.get("publication_date") or ""))
402
+ month_key = month_value or "Unknown"
403
+ month_counts[month_key] = month_counts.get(month_key, 0) + 1
404
+
405
+ for author in normalize_authors(paper.get("paper_authors")):
406
+ author_counts[author] = author_counts.get(author, 0) + 1
407
+ for tag in paper.get("ai_generated_tags") or []:
408
+ tag_counts[tag] = tag_counts.get(tag, 0) + 1
409
+
410
+ venue = None
411
+ if bibtex_type in {"article"}:
412
+ venue = bibtex_fields.get("journal")
413
+ journal_counts[clean_journal_name(venue)] = journal_counts.get(
414
+ clean_journal_name(venue),
415
+ 0,
416
+ ) + 1
417
+ elif bibtex_type in {"inproceedings", "conference", "proceedings"}:
418
+ venue = bibtex_fields.get("booktitle")
419
+ conference_counts[clean_conference_name(venue)] = conference_counts.get(
420
+ clean_conference_name(venue),
421
+ 0,
422
+ ) + 1
423
+ else:
424
+ extracted_venue = paper.get("publication_venue")
425
+ venue_kind = classify_venue(extracted_venue)
426
+ if venue_kind == "journal":
427
+ journal_counts[clean_journal_name(extracted_venue)] = journal_counts.get(
428
+ clean_journal_name(extracted_venue),
429
+ 0,
430
+ ) + 1
431
+ elif venue_kind == "conference":
432
+ conference_counts[clean_conference_name(extracted_venue)] = conference_counts.get(
433
+ clean_conference_name(extracted_venue),
434
+ 0,
435
+ ) + 1
436
+ elif extracted_venue:
437
+ other_venue_counts[clean_conference_name(extracted_venue)] = other_venue_counts.get(
438
+ clean_conference_name(extracted_venue),
439
+ 0,
440
+ ) + 1
441
+
442
+ total = len(papers)
443
+ console.print(f"Total papers: {total}")
444
+
445
+ year_table = Table(title="Publication Year Statistics")
446
+ year_table.add_column("Year", style="cyan")
447
+ year_table.add_column("Count", style="green", justify="right")
448
+ year_table.add_column("Percentage", style="yellow", justify="right")
449
+ year_table.add_column("Distribution", style="magenta")
450
+
451
+ max_year = max(year_counts.values()) if year_counts else 0
452
+ def year_sort_key(item: tuple[str, int]) -> tuple[int, int]:
453
+ label = item[0]
454
+ if label == "Unknown":
455
+ return (1, 0)
456
+ if label.isdigit():
457
+ return (0, -int(label))
458
+ return (0, 0)
459
+
460
+ for year, count in sorted(year_counts.items(), key=year_sort_key):
461
+ percentage = (count / total * 100) if total else 0
462
+ year_table.add_row(
463
+ year,
464
+ str(count),
465
+ f"{percentage:.1f}%",
466
+ format_distribution(count, max_year),
467
+ )
468
+ console.print(year_table)
469
+
470
+ month_table = Table(title="Publication Month Statistics")
471
+ month_table.add_column("Month", style="cyan")
472
+ month_table.add_column("Count", style="green", justify="right")
473
+ month_table.add_column("Percentage", style="yellow", justify="right")
474
+ month_table.add_column("Distribution", style="magenta")
475
+
476
+ max_month = max(month_counts.values()) if month_counts else 0
477
+ def month_sort_key(item: tuple[str, int]) -> int:
478
+ if item[0] == "Unknown":
479
+ return 99
480
+ if item[0] in MONTH_NAMES:
481
+ return MONTH_NAMES.index(item[0])
482
+ return 98
483
+
484
+ for month, count in sorted(month_counts.items(), key=month_sort_key):
485
+ percentage = (count / total * 100) if total else 0
486
+ month_table.add_row(
487
+ month,
488
+ str(count),
489
+ f"{percentage:.1f}%",
490
+ format_distribution(count, max_month),
491
+ )
492
+ console.print(month_table)
493
+
494
+ if journal_counts:
495
+ journal_table = Table(title=f"Top {top_n} Journals")
496
+ journal_table.add_column("Journal", style="cyan")
497
+ journal_table.add_column("Count", style="green", justify="right")
498
+ journal_table.add_column("Percentage", style="yellow", justify="right")
499
+ for journal, count in sorted(journal_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
500
+ percentage = (count / total * 100) if total else 0
501
+ journal_table.add_row(journal, str(count), f"{percentage:.1f}%")
502
+ console.print(journal_table)
503
+
504
+ if conference_counts:
505
+ conference_table = Table(title=f"Top {top_n} Conferences")
506
+ conference_table.add_column("Conference", style="cyan")
507
+ conference_table.add_column("Count", style="green", justify="right")
508
+ conference_table.add_column("Percentage", style="yellow", justify="right")
509
+ for conference, count in sorted(conference_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
510
+ percentage = (count / total * 100) if total else 0
511
+ conference_table.add_row(conference, str(count), f"{percentage:.1f}%")
512
+ console.print(conference_table)
513
+
514
+ if other_venue_counts:
515
+ other_table = Table(title=f"Top {top_n} Other Venues")
516
+ other_table.add_column("Venue", style="cyan")
517
+ other_table.add_column("Count", style="green", justify="right")
518
+ other_table.add_column("Percentage", style="yellow", justify="right")
519
+ for venue, count in sorted(other_venue_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
520
+ percentage = (count / total * 100) if total else 0
521
+ other_table.add_row(venue, str(count), f"{percentage:.1f}%")
522
+ console.print(other_table)
523
+
524
+ if author_counts:
525
+ author_table = Table(title=f"Top {top_n} Authors")
526
+ author_table.add_column("Author", style="cyan")
527
+ author_table.add_column("Papers", style="green", justify="right")
528
+ author_table.add_column("Percentage", style="yellow", justify="right")
529
+ for author, count in sorted(author_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
530
+ percentage = (count / total * 100) if total else 0
531
+ author_table.add_row(author, str(count), f"{percentage:.1f}%")
532
+ console.print(author_table)
533
+
534
+ if tag_counts:
535
+ tag_table = Table(title=f"Top {top_n} Tags")
536
+ tag_table.add_column("Tag", style="cyan")
537
+ tag_table.add_column("Count", style="green", justify="right")
538
+ tag_table.add_column("Percentage", style="yellow", justify="right")
539
+ for tag, count in sorted(tag_counts.items(), key=lambda item: item[1], reverse=True)[:top_n]:
540
+ percentage = (count / total * 100) if total else 0
541
+ tag_table.add_row(tag, str(count), f"{percentage:.1f}%")
542
+ console.print(tag_table)
543
+
544
+ @db_group.command("serve")
545
+ @click.option("-i", "--input", "input_paths", multiple=True, required=True, help="Input JSON file path")
546
+ @click.option("-b", "--bibtex", "bibtex_path", default=None, help="Optional BibTeX file path")
547
+ @click.option(
548
+ "--md-root",
549
+ "md_roots",
550
+ multiple=True,
551
+ default=(),
552
+ help="Optional markdown root directory (repeatable) for source viewing",
553
+ )
554
+ @click.option(
555
+ "--pdf-root",
556
+ "pdf_roots",
557
+ multiple=True,
558
+ default=(),
559
+ help="Optional PDF root directory (repeatable) for in-page PDF viewing",
560
+ )
561
+ @click.option("--cache-dir", "cache_dir", default=None, help="Cache directory for merged inputs")
562
+ @click.option("--no-cache", "no_cache", is_flag=True, help="Disable cache for db serve")
563
+ @click.option("--host", default="127.0.0.1", show_default=True, help="Bind host")
564
+ @click.option("--port", default=8000, type=int, show_default=True, help="Bind port")
565
+ @click.option(
566
+ "--language",
567
+ "fallback_language",
568
+ default="en",
569
+ show_default=True,
570
+ help="Fallback output language for rendering",
571
+ )
572
+ def serve(
573
+ input_paths: tuple[str, ...],
574
+ bibtex_path: str | None,
575
+ md_roots: tuple[str, ...],
576
+ pdf_roots: tuple[str, ...],
577
+ cache_dir: str | None,
578
+ no_cache: bool,
579
+ host: str,
580
+ port: int,
581
+ fallback_language: str,
582
+ ) -> None:
583
+ """Serve a local, read-only web UI for a paper database JSON file."""
584
+ from deepresearch_flow.paper.web.app import create_app
585
+ import uvicorn
586
+
587
+ try:
588
+ app = create_app(
589
+ db_paths=[Path(path) for path in input_paths],
590
+ fallback_language=fallback_language,
591
+ bibtex_path=Path(bibtex_path) if bibtex_path else None,
592
+ md_roots=[Path(root) for root in md_roots],
593
+ pdf_roots=[Path(root) for root in pdf_roots],
594
+ cache_dir=Path(cache_dir) if cache_dir else None,
595
+ use_cache=not no_cache,
596
+ )
597
+ except Exception as exc:
598
+ raise click.ClickException(str(exc)) from exc
599
+ click.echo(f"Serving on http://{host}:{port} (Ctrl+C to stop)")
600
+ uvicorn.run(app, host=host, port=port, log_level="info")
601
+
602
+ @db_group.command("generate-tags")
603
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
604
+ @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
605
+ @click.option("-c", "--config", "config_path", default="config.toml", help="Path to config.toml")
606
+ @click.option("-m", "--model", "model_ref", required=True, help="provider/model")
607
+ @click.option("-w", "--workers", "workers", default=4, type=int, help="Concurrent workers")
608
+ def generate_tags(input_path: str, output_path: str, config_path: str, model_ref: str, workers: int) -> None:
609
+ async def _run() -> None:
610
+ config = load_config(config_path)
611
+ provider, model_name = parse_model_ref(model_ref, config.providers)
612
+ keys = resolve_api_keys(provider.api_keys)
613
+ if provider.type in {
614
+ "openai_compatible",
615
+ "dashscope",
616
+ "gemini_ai_studio",
617
+ "azure_openai",
618
+ "claude",
619
+ } and not keys:
620
+ raise click.ClickException(f"{provider.type} providers require api_keys")
621
+
622
+ papers = load_json(Path(input_path))
623
+ semaphore = asyncio.Semaphore(workers)
624
+ key_idx = 0
625
+
626
+ async with httpx.AsyncClient() as client:
627
+ async def process_one(paper: dict[str, Any]) -> None:
628
+ nonlocal key_idx
629
+ async with semaphore:
630
+ api_key = None
631
+ if keys:
632
+ api_key = keys[key_idx % len(keys)]
633
+ key_idx += 1
634
+ tags = await generate_tags_for_paper(
635
+ client,
636
+ provider,
637
+ model_name,
638
+ api_key,
639
+ paper,
640
+ max_retries=config.extract.max_retries,
641
+ backoff_base=config.extract.backoff_base_seconds,
642
+ backoff_max=config.extract.backoff_max_seconds,
643
+ )
644
+ paper["ai_generated_tags"] = tags
645
+
646
+ await asyncio.gather(*(process_one(paper) for paper in papers))
647
+
648
+ write_json(Path(output_path), papers)
649
+ click.echo(f"Generated tags for {len(papers)} papers")
650
+
651
+ asyncio.run(_run())
652
+
653
+ @db_group.command("filter")
654
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
655
+ @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
656
+ @click.option("-t", "--tags", default=None, help="Comma-separated tags")
657
+ @click.option("-y", "--years", default=None, help="Year range (e.g. 2018-2024, -2019, 2020-)")
658
+ @click.option("-a", "--authors", default=None, help="Comma-separated author names")
659
+ @click.option("-l", "--limit", default=None, type=int, help="Limit results")
660
+ @click.option("-r", "--order", type=click.Choice(["asc", "desc"]), default="desc")
661
+ def filter_papers(
662
+ input_path: str,
663
+ output_path: str,
664
+ tags: str | None,
665
+ years: str | None,
666
+ authors: str | None,
667
+ limit: int | None,
668
+ order: str,
669
+ ) -> None:
670
+ papers = load_json(Path(input_path))
671
+ tag_set = {tag.strip() for tag in tags.split(",")} if tags else set()
672
+ author_set = {a.strip() for a in authors.split(",")} if authors else set()
673
+
674
+ def year_match(paper: dict[str, Any]) -> bool:
675
+ if not years:
676
+ return True
677
+ year = parse_publication_year(paper)
678
+ if year is None:
679
+ return False
680
+ if years.startswith("-"):
681
+ return year <= int(years[1:])
682
+ if years.endswith("-"):
683
+ return year >= int(years[:-1])
684
+ if "-" in years:
685
+ start, end = years.split("-", 1)
686
+ return int(start) <= year <= int(end)
687
+ return year == int(years)
688
+
689
+ filtered = []
690
+ for paper in papers:
691
+ if tag_set:
692
+ paper_tags = set(paper.get("ai_generated_tags") or [])
693
+ if not paper_tags.intersection(tag_set):
694
+ continue
695
+ if author_set:
696
+ paper_authors = set(normalize_authors(paper.get("paper_authors")))
697
+ if not paper_authors.intersection(author_set):
698
+ continue
699
+ if not year_match(paper):
700
+ continue
701
+ filtered.append(paper)
702
+
703
+ filtered.sort(key=lambda p: parse_publication_year(p) or 0, reverse=(order == "desc"))
704
+ if limit:
705
+ filtered = filtered[:limit]
706
+ write_json(Path(output_path), filtered)
707
+ click.echo(f"Filtered down to {len(filtered)} papers")
708
+
709
+ @db_group.command("merge")
710
+ @click.option("-i", "--inputs", "input_paths", multiple=True, required=True, help="Input JSON files")
711
+ @click.option("-o", "--output", "output_path", required=True, help="Output JSON file path")
712
+ def merge_papers(input_paths: Iterable[str], output_path: str) -> None:
713
+ merged: list[dict[str, Any]] = []
714
+ for path in input_paths:
715
+ merged.extend(load_json(Path(path)))
716
+ write_json(Path(output_path), merged)
717
+ click.echo(f"Merged {len(input_paths)} files into {output_path}")
718
+
719
+ @db_group.command("render-md")
720
+ @click.option("-i", "--input", "input_path", required=True, help="Input JSON file path")
721
+ @click.option("-d", "--output-dir", "output_dir", default="rendered_md", help="Output directory")
722
+ @click.option(
723
+ "-t",
724
+ "--markdown-template",
725
+ "--template",
726
+ "template_path",
727
+ default=None,
728
+ help="Jinja2 template path",
729
+ )
730
+ @click.option(
731
+ "-n",
732
+ "--template-name",
733
+ "template_name",
734
+ default=None,
735
+ type=click.Choice(list_template_names()),
736
+ help="Built-in template name",
737
+ )
738
+ @click.option(
739
+ "-T",
740
+ "--template-dir",
741
+ "template_dir",
742
+ default=None,
743
+ help="Directory containing render.j2",
744
+ )
745
+ @click.option(
746
+ "-l",
747
+ "--language",
748
+ "output_language",
749
+ default="en",
750
+ show_default=True,
751
+ help="Fallback output language for rendering",
752
+ )
753
+ def render_md(
754
+ input_path: str,
755
+ output_dir: str,
756
+ template_path: str | None,
757
+ template_name: str | None,
758
+ template_dir: str | None,
759
+ output_language: str,
760
+ ) -> None:
761
+ papers = load_json(Path(input_path))
762
+ out_dir = Path(output_dir)
763
+ try:
764
+ template = resolve_render_template(template_path, template_name, template_dir)
765
+ except ValueError as exc:
766
+ raise click.ClickException(str(exc)) from exc
767
+ rendered = render_papers(papers, out_dir, template, output_language)
768
+ click.echo(f"Rendered {rendered} markdown files")