deepresearch-flow 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. deepresearch_flow/__init__.py +5 -0
  2. deepresearch_flow/cli.py +23 -0
  3. deepresearch_flow/paper/__init__.py +1 -0
  4. deepresearch_flow/paper/cli.py +286 -0
  5. deepresearch_flow/paper/config.py +249 -0
  6. deepresearch_flow/paper/db.py +768 -0
  7. deepresearch_flow/paper/extract.py +870 -0
  8. deepresearch_flow/paper/llm.py +115 -0
  9. deepresearch_flow/paper/prompt_templates/__init__.py +1 -0
  10. deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +6 -0
  11. deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +82 -0
  12. deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +6 -0
  13. deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +28 -0
  14. deepresearch_flow/paper/prompt_templates/simple_system.j2 +6 -0
  15. deepresearch_flow/paper/prompt_templates/simple_user.j2 +24 -0
  16. deepresearch_flow/paper/prompt_templates/three_pass_system.j2 +6 -0
  17. deepresearch_flow/paper/prompt_templates/three_pass_user.j2 +44 -0
  18. deepresearch_flow/paper/prompts.py +11 -0
  19. deepresearch_flow/paper/providers/__init__.py +1 -0
  20. deepresearch_flow/paper/providers/azure_openai.py +66 -0
  21. deepresearch_flow/paper/providers/base.py +19 -0
  22. deepresearch_flow/paper/providers/claude.py +71 -0
  23. deepresearch_flow/paper/providers/dashscope.py +58 -0
  24. deepresearch_flow/paper/providers/gemini.py +116 -0
  25. deepresearch_flow/paper/providers/ollama.py +46 -0
  26. deepresearch_flow/paper/providers/openai_compatible.py +60 -0
  27. deepresearch_flow/paper/render.py +64 -0
  28. deepresearch_flow/paper/schema.py +58 -0
  29. deepresearch_flow/paper/schemas/__init__.py +1 -0
  30. deepresearch_flow/paper/schemas/deep_read_schema.json +46 -0
  31. deepresearch_flow/paper/schemas/default_paper_schema.json +47 -0
  32. deepresearch_flow/paper/schemas/eight_questions_schema.json +34 -0
  33. deepresearch_flow/paper/schemas/three_pass_schema.json +24 -0
  34. deepresearch_flow/paper/template_registry.py +189 -0
  35. deepresearch_flow/paper/templates/__init__.py +1 -0
  36. deepresearch_flow/paper/templates/deep_read.md.j2 +79 -0
  37. deepresearch_flow/paper/templates/default_paper.md.j2 +32 -0
  38. deepresearch_flow/paper/templates/eight_questions.md.j2 +49 -0
  39. deepresearch_flow/paper/templates/three_pass.md.j2 +28 -0
  40. deepresearch_flow/paper/utils.py +136 -0
  41. deepresearch_flow/paper/web/__init__.py +2 -0
  42. deepresearch_flow/paper/web/app.py +2307 -0
  43. deepresearch_flow/paper/web/pdfjs/LICENSE +177 -0
  44. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-H.bcmap +0 -0
  45. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-V.bcmap +0 -0
  46. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-H.bcmap +0 -0
  47. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-H.bcmap +0 -0
  48. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-V.bcmap +0 -0
  49. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-V.bcmap +0 -0
  50. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-H.bcmap +0 -0
  51. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-V.bcmap +0 -0
  52. deepresearch_flow/paper/web/pdfjs/web/cmaps/83pv-RKSJ-H.bcmap +0 -0
  53. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-H.bcmap +0 -0
  54. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-V.bcmap +0 -0
  55. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-H.bcmap +0 -0
  56. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-V.bcmap +0 -0
  57. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-H.bcmap +0 -0
  58. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-V.bcmap +0 -0
  59. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-H.bcmap +0 -0
  60. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-H.bcmap +0 -0
  61. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-V.bcmap +0 -0
  62. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-V.bcmap +0 -0
  63. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-0.bcmap +0 -0
  64. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-1.bcmap +0 -0
  65. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-2.bcmap +0 -0
  66. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-3.bcmap +0 -0
  67. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-4.bcmap +0 -0
  68. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-5.bcmap +0 -0
  69. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-6.bcmap +0 -0
  70. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  71. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-0.bcmap +0 -0
  72. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-1.bcmap +0 -0
  73. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-2.bcmap +0 -0
  74. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-3.bcmap +0 -0
  75. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-4.bcmap +0 -0
  76. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-5.bcmap +0 -0
  77. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  78. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-0.bcmap +0 -0
  79. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-1.bcmap +0 -0
  80. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-2.bcmap +0 -0
  81. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-3.bcmap +0 -0
  82. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-4.bcmap +0 -0
  83. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-5.bcmap +0 -0
  84. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-6.bcmap +0 -0
  85. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  86. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-0.bcmap +0 -0
  87. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-1.bcmap +0 -0
  88. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-2.bcmap +0 -0
  89. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  90. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-H.bcmap +0 -0
  91. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-V.bcmap +0 -0
  92. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-H.bcmap +0 -0
  93. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-V.bcmap +0 -0
  94. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-H.bcmap +0 -0
  95. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-V.bcmap +0 -0
  96. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-H.bcmap +0 -0
  97. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-V.bcmap +0 -0
  98. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-H.bcmap +0 -0
  99. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-V.bcmap +3 -0
  100. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-H.bcmap +0 -0
  101. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-V.bcmap +0 -0
  102. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-H.bcmap +0 -0
  103. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-V.bcmap +0 -0
  104. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-H.bcmap +3 -0
  105. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-V.bcmap +0 -0
  106. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-H.bcmap +0 -0
  107. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-V.bcmap +0 -0
  108. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-H.bcmap +0 -0
  109. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-H.bcmap +0 -0
  110. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-V.bcmap +0 -0
  111. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-V.bcmap +0 -0
  112. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-H.bcmap +0 -0
  113. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-V.bcmap +0 -0
  114. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-H.bcmap +4 -0
  115. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-V.bcmap +0 -0
  116. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-H.bcmap +0 -0
  117. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-V.bcmap +0 -0
  118. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-H.bcmap +0 -0
  119. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-V.bcmap +0 -0
  120. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-H.bcmap +0 -0
  121. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-V.bcmap +0 -0
  122. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-H.bcmap +0 -0
  123. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-V.bcmap +0 -0
  124. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-H.bcmap +0 -0
  125. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-V.bcmap +0 -0
  126. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-H.bcmap +0 -0
  127. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-V.bcmap +0 -0
  128. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-H.bcmap +0 -0
  129. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-V.bcmap +0 -0
  130. deepresearch_flow/paper/web/pdfjs/web/cmaps/H.bcmap +0 -0
  131. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-H.bcmap +0 -0
  132. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-V.bcmap +0 -0
  133. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-H.bcmap +0 -0
  134. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-V.bcmap +0 -0
  135. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-H.bcmap +0 -0
  136. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-V.bcmap +0 -0
  137. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-H.bcmap +0 -0
  138. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-V.bcmap +0 -0
  139. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-H.bcmap +0 -0
  140. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-V.bcmap +0 -0
  141. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-H.bcmap +0 -0
  142. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-V.bcmap +0 -0
  143. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hankaku.bcmap +0 -0
  144. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hiragana.bcmap +0 -0
  145. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-H.bcmap +0 -0
  146. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-V.bcmap +0 -0
  147. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-H.bcmap +0 -0
  148. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-H.bcmap +0 -0
  149. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-V.bcmap +0 -0
  150. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-V.bcmap +0 -0
  151. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-H.bcmap +0 -0
  152. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  153. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  154. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-V.bcmap +0 -0
  155. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-H.bcmap +0 -0
  156. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-V.bcmap +0 -0
  157. deepresearch_flow/paper/web/pdfjs/web/cmaps/Katakana.bcmap +0 -0
  158. deepresearch_flow/paper/web/pdfjs/web/cmaps/LICENSE +36 -0
  159. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-H.bcmap +0 -0
  160. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-V.bcmap +0 -0
  161. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-H.bcmap +0 -0
  162. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-V.bcmap +0 -0
  163. deepresearch_flow/paper/web/pdfjs/web/cmaps/Roman.bcmap +0 -0
  164. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  165. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  166. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  167. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  168. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  169. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  170. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  171. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  172. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-H.bcmap +0 -0
  173. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-V.bcmap +0 -0
  174. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-H.bcmap +0 -0
  175. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-V.bcmap +0 -0
  176. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-H.bcmap +0 -0
  177. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-V.bcmap +0 -0
  178. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-H.bcmap +0 -0
  179. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-V.bcmap +0 -0
  180. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  181. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  182. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  183. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  184. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  185. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  186. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  187. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  188. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  189. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  190. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  191. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  192. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  193. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  194. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  195. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  196. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  197. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  198. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  199. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  200. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  201. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  202. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  203. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-H.bcmap +0 -0
  204. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-V.bcmap +0 -0
  205. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-H.bcmap +0 -0
  206. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-V.bcmap +0 -0
  207. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-H.bcmap +0 -0
  208. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-V.bcmap +0 -0
  209. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-H.bcmap +0 -0
  210. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-V.bcmap +0 -0
  211. deepresearch_flow/paper/web/pdfjs/web/cmaps/V.bcmap +0 -0
  212. deepresearch_flow/paper/web/pdfjs/web/cmaps/WP-Symbol.bcmap +0 -0
  213. deepresearch_flow/paper/web/pdfjs/web/compressed.tracemonkey-pldi-09.pdf +0 -0
  214. deepresearch_flow/paper/web/pdfjs/web/debugger.css +111 -0
  215. deepresearch_flow/paper/web/pdfjs/web/debugger.js +611 -0
  216. deepresearch_flow/paper/web/pdfjs/web/images/altText_add.svg +3 -0
  217. deepresearch_flow/paper/web/pdfjs/web/images/altText_done.svg +3 -0
  218. deepresearch_flow/paper/web/pdfjs/web/images/annotation-check.svg +11 -0
  219. deepresearch_flow/paper/web/pdfjs/web/images/annotation-comment.svg +16 -0
  220. deepresearch_flow/paper/web/pdfjs/web/images/annotation-help.svg +26 -0
  221. deepresearch_flow/paper/web/pdfjs/web/images/annotation-insert.svg +10 -0
  222. deepresearch_flow/paper/web/pdfjs/web/images/annotation-key.svg +11 -0
  223. deepresearch_flow/paper/web/pdfjs/web/images/annotation-newparagraph.svg +11 -0
  224. deepresearch_flow/paper/web/pdfjs/web/images/annotation-noicon.svg +7 -0
  225. deepresearch_flow/paper/web/pdfjs/web/images/annotation-note.svg +42 -0
  226. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paperclip.svg +6 -0
  227. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paragraph.svg +16 -0
  228. deepresearch_flow/paper/web/pdfjs/web/images/annotation-pushpin.svg +7 -0
  229. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorFreeText.svg +3 -0
  230. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorInk.svg +4 -0
  231. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-next.svg +3 -0
  232. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-previous.svg +3 -0
  233. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-download.svg +3 -0
  234. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-openinapp.svg +11 -0
  235. deepresearch_flow/paper/web/pdfjs/web/images/loading-dark.svg +24 -0
  236. deepresearch_flow/paper/web/pdfjs/web/images/loading-icon.gif +0 -0
  237. deepresearch_flow/paper/web/pdfjs/web/images/loading.svg +1 -0
  238. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-documentProperties.svg +3 -0
  239. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-firstPage.svg +3 -0
  240. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-handTool.svg +3 -0
  241. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-lastPage.svg +3 -0
  242. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCcw.svg +3 -0
  243. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCw.svg +3 -0
  244. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollHorizontal.svg +3 -0
  245. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollPage.svg +3 -0
  246. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollVertical.svg +3 -0
  247. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollWrapped.svg +3 -0
  248. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-selectTool.svg +3 -0
  249. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadEven.svg +3 -0
  250. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadNone.svg +3 -0
  251. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadOdd.svg +3 -0
  252. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-bookmark.svg +3 -0
  253. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-currentOutlineItem.svg +3 -0
  254. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-download.svg +4 -0
  255. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorFreeText.svg +3 -0
  256. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorInk.svg +4 -0
  257. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorStamp.svg +8 -0
  258. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-menuArrow.svg +3 -0
  259. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-openFile.svg +3 -0
  260. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageDown.svg +3 -0
  261. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageUp.svg +3 -0
  262. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-presentationMode.svg +3 -0
  263. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-print.svg +3 -0
  264. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-search.svg +3 -0
  265. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-secondaryToolbarToggle.svg +3 -0
  266. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-sidebarToggle.svg +3 -0
  267. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewAttachments.svg +3 -0
  268. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewLayers.svg +3 -0
  269. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewOutline.svg +3 -0
  270. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewThumbnail.svg +3 -0
  271. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomIn.svg +3 -0
  272. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomOut.svg +3 -0
  273. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-collapsed.svg +1 -0
  274. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-expanded.svg +1 -0
  275. deepresearch_flow/paper/web/pdfjs/web/locale/ach/viewer.properties +203 -0
  276. deepresearch_flow/paper/web/pdfjs/web/locale/af/viewer.properties +156 -0
  277. deepresearch_flow/paper/web/pdfjs/web/locale/an/viewer.properties +222 -0
  278. deepresearch_flow/paper/web/pdfjs/web/locale/ar/viewer.properties +224 -0
  279. deepresearch_flow/paper/web/pdfjs/web/locale/ast/viewer.properties +185 -0
  280. deepresearch_flow/paper/web/pdfjs/web/locale/az/viewer.properties +222 -0
  281. deepresearch_flow/paper/web/pdfjs/web/locale/be/viewer.properties +270 -0
  282. deepresearch_flow/paper/web/pdfjs/web/locale/bg/viewer.properties +214 -0
  283. deepresearch_flow/paper/web/pdfjs/web/locale/bn/viewer.properties +218 -0
  284. deepresearch_flow/paper/web/pdfjs/web/locale/bo/viewer.properties +217 -0
  285. deepresearch_flow/paper/web/pdfjs/web/locale/br/viewer.properties +224 -0
  286. deepresearch_flow/paper/web/pdfjs/web/locale/brx/viewer.properties +184 -0
  287. deepresearch_flow/paper/web/pdfjs/web/locale/bs/viewer.properties +173 -0
  288. deepresearch_flow/paper/web/pdfjs/web/locale/ca/viewer.properties +256 -0
  289. deepresearch_flow/paper/web/pdfjs/web/locale/cak/viewer.properties +253 -0
  290. deepresearch_flow/paper/web/pdfjs/web/locale/ckb/viewer.properties +213 -0
  291. deepresearch_flow/paper/web/pdfjs/web/locale/cs/viewer.properties +284 -0
  292. deepresearch_flow/paper/web/pdfjs/web/locale/cy/viewer.properties +270 -0
  293. deepresearch_flow/paper/web/pdfjs/web/locale/da/viewer.properties +270 -0
  294. deepresearch_flow/paper/web/pdfjs/web/locale/de/viewer.properties +270 -0
  295. deepresearch_flow/paper/web/pdfjs/web/locale/dsb/viewer.properties +284 -0
  296. deepresearch_flow/paper/web/pdfjs/web/locale/el/viewer.properties +270 -0
  297. deepresearch_flow/paper/web/pdfjs/web/locale/en-CA/viewer.properties +270 -0
  298. deepresearch_flow/paper/web/pdfjs/web/locale/en-GB/viewer.properties +284 -0
  299. deepresearch_flow/paper/web/pdfjs/web/locale/en-US/viewer.properties +282 -0
  300. deepresearch_flow/paper/web/pdfjs/web/locale/eo/viewer.properties +270 -0
  301. deepresearch_flow/paper/web/pdfjs/web/locale/es-AR/viewer.properties +284 -0
  302. deepresearch_flow/paper/web/pdfjs/web/locale/es-CL/viewer.properties +284 -0
  303. deepresearch_flow/paper/web/pdfjs/web/locale/es-ES/viewer.properties +270 -0
  304. deepresearch_flow/paper/web/pdfjs/web/locale/es-MX/viewer.properties +257 -0
  305. deepresearch_flow/paper/web/pdfjs/web/locale/et/viewer.properties +229 -0
  306. deepresearch_flow/paper/web/pdfjs/web/locale/eu/viewer.properties +284 -0
  307. deepresearch_flow/paper/web/pdfjs/web/locale/fa/viewer.properties +221 -0
  308. deepresearch_flow/paper/web/pdfjs/web/locale/ff/viewer.properties +214 -0
  309. deepresearch_flow/paper/web/pdfjs/web/locale/fi/viewer.properties +270 -0
  310. deepresearch_flow/paper/web/pdfjs/web/locale/fr/viewer.properties +270 -0
  311. deepresearch_flow/paper/web/pdfjs/web/locale/fur/viewer.properties +270 -0
  312. deepresearch_flow/paper/web/pdfjs/web/locale/fy-NL/viewer.properties +270 -0
  313. deepresearch_flow/paper/web/pdfjs/web/locale/ga-IE/viewer.properties +181 -0
  314. deepresearch_flow/paper/web/pdfjs/web/locale/gd/viewer.properties +257 -0
  315. deepresearch_flow/paper/web/pdfjs/web/locale/gl/viewer.properties +267 -0
  316. deepresearch_flow/paper/web/pdfjs/web/locale/gn/viewer.properties +278 -0
  317. deepresearch_flow/paper/web/pdfjs/web/locale/gu-IN/viewer.properties +214 -0
  318. deepresearch_flow/paper/web/pdfjs/web/locale/he/viewer.properties +283 -0
  319. deepresearch_flow/paper/web/pdfjs/web/locale/hi-IN/viewer.properties +227 -0
  320. deepresearch_flow/paper/web/pdfjs/web/locale/hr/viewer.properties +243 -0
  321. deepresearch_flow/paper/web/pdfjs/web/locale/hsb/viewer.properties +284 -0
  322. deepresearch_flow/paper/web/pdfjs/web/locale/hu/viewer.properties +284 -0
  323. deepresearch_flow/paper/web/pdfjs/web/locale/hy-AM/viewer.properties +232 -0
  324. deepresearch_flow/paper/web/pdfjs/web/locale/hye/viewer.properties +229 -0
  325. deepresearch_flow/paper/web/pdfjs/web/locale/ia/viewer.properties +284 -0
  326. deepresearch_flow/paper/web/pdfjs/web/locale/id/viewer.properties +253 -0
  327. deepresearch_flow/paper/web/pdfjs/web/locale/is/viewer.properties +284 -0
  328. deepresearch_flow/paper/web/pdfjs/web/locale/it/viewer.properties +284 -0
  329. deepresearch_flow/paper/web/pdfjs/web/locale/ja/viewer.properties +270 -0
  330. deepresearch_flow/paper/web/pdfjs/web/locale/ka/viewer.properties +284 -0
  331. deepresearch_flow/paper/web/pdfjs/web/locale/kab/viewer.properties +264 -0
  332. deepresearch_flow/paper/web/pdfjs/web/locale/kk/viewer.properties +284 -0
  333. deepresearch_flow/paper/web/pdfjs/web/locale/km/viewer.properties +189 -0
  334. deepresearch_flow/paper/web/pdfjs/web/locale/kn/viewer.properties +166 -0
  335. deepresearch_flow/paper/web/pdfjs/web/locale/ko/viewer.properties +284 -0
  336. deepresearch_flow/paper/web/pdfjs/web/locale/lij/viewer.properties +214 -0
  337. deepresearch_flow/paper/web/pdfjs/web/locale/lo/viewer.properties +257 -0
  338. deepresearch_flow/paper/web/pdfjs/web/locale/locale.properties +333 -0
  339. deepresearch_flow/paper/web/pdfjs/web/locale/lt/viewer.properties +229 -0
  340. deepresearch_flow/paper/web/pdfjs/web/locale/ltg/viewer.properties +192 -0
  341. deepresearch_flow/paper/web/pdfjs/web/locale/lv/viewer.properties +214 -0
  342. deepresearch_flow/paper/web/pdfjs/web/locale/meh/viewer.properties +106 -0
  343. deepresearch_flow/paper/web/pdfjs/web/locale/mk/viewer.properties +211 -0
  344. deepresearch_flow/paper/web/pdfjs/web/locale/mr/viewer.properties +210 -0
  345. deepresearch_flow/paper/web/pdfjs/web/locale/ms/viewer.properties +214 -0
  346. deepresearch_flow/paper/web/pdfjs/web/locale/my/viewer.properties +170 -0
  347. deepresearch_flow/paper/web/pdfjs/web/locale/nb-NO/viewer.properties +284 -0
  348. deepresearch_flow/paper/web/pdfjs/web/locale/ne-NP/viewer.properties +197 -0
  349. deepresearch_flow/paper/web/pdfjs/web/locale/nl/viewer.properties +274 -0
  350. deepresearch_flow/paper/web/pdfjs/web/locale/nn-NO/viewer.properties +270 -0
  351. deepresearch_flow/paper/web/pdfjs/web/locale/oc/viewer.properties +278 -0
  352. deepresearch_flow/paper/web/pdfjs/web/locale/pa-IN/viewer.properties +270 -0
  353. deepresearch_flow/paper/web/pdfjs/web/locale/pl/viewer.properties +270 -0
  354. deepresearch_flow/paper/web/pdfjs/web/locale/pt-BR/viewer.properties +270 -0
  355. deepresearch_flow/paper/web/pdfjs/web/locale/pt-PT/viewer.properties +270 -0
  356. deepresearch_flow/paper/web/pdfjs/web/locale/rm/viewer.properties +270 -0
  357. deepresearch_flow/paper/web/pdfjs/web/locale/ro/viewer.properties +220 -0
  358. deepresearch_flow/paper/web/pdfjs/web/locale/ru/viewer.properties +270 -0
  359. deepresearch_flow/paper/web/pdfjs/web/locale/sat/viewer.properties +270 -0
  360. deepresearch_flow/paper/web/pdfjs/web/locale/sc/viewer.properties +258 -0
  361. deepresearch_flow/paper/web/pdfjs/web/locale/scn/viewer.properties +101 -0
  362. deepresearch_flow/paper/web/pdfjs/web/locale/sco/viewer.properties +226 -0
  363. deepresearch_flow/paper/web/pdfjs/web/locale/si/viewer.properties +228 -0
  364. deepresearch_flow/paper/web/pdfjs/web/locale/sk/viewer.properties +270 -0
  365. deepresearch_flow/paper/web/pdfjs/web/locale/skr/viewer.properties +264 -0
  366. deepresearch_flow/paper/web/pdfjs/web/locale/sl/viewer.properties +284 -0
  367. deepresearch_flow/paper/web/pdfjs/web/locale/son/viewer.properties +152 -0
  368. deepresearch_flow/paper/web/pdfjs/web/locale/sq/viewer.properties +247 -0
  369. deepresearch_flow/paper/web/pdfjs/web/locale/sr/viewer.properties +259 -0
  370. deepresearch_flow/paper/web/pdfjs/web/locale/sv-SE/viewer.properties +284 -0
  371. deepresearch_flow/paper/web/pdfjs/web/locale/szl/viewer.properties +224 -0
  372. deepresearch_flow/paper/web/pdfjs/web/locale/ta/viewer.properties +173 -0
  373. deepresearch_flow/paper/web/pdfjs/web/locale/te/viewer.properties +216 -0
  374. deepresearch_flow/paper/web/pdfjs/web/locale/tg/viewer.properties +281 -0
  375. deepresearch_flow/paper/web/pdfjs/web/locale/th/viewer.properties +270 -0
  376. deepresearch_flow/paper/web/pdfjs/web/locale/tl/viewer.properties +222 -0
  377. deepresearch_flow/paper/web/pdfjs/web/locale/tr/viewer.properties +283 -0
  378. deepresearch_flow/paper/web/pdfjs/web/locale/trs/viewer.properties +184 -0
  379. deepresearch_flow/paper/web/pdfjs/web/locale/uk/viewer.properties +284 -0
  380. deepresearch_flow/paper/web/pdfjs/web/locale/ur/viewer.properties +218 -0
  381. deepresearch_flow/paper/web/pdfjs/web/locale/uz/viewer.properties +142 -0
  382. deepresearch_flow/paper/web/pdfjs/web/locale/vi/viewer.properties +270 -0
  383. deepresearch_flow/paper/web/pdfjs/web/locale/wo/viewer.properties +104 -0
  384. deepresearch_flow/paper/web/pdfjs/web/locale/xh/viewer.properties +156 -0
  385. deepresearch_flow/paper/web/pdfjs/web/locale/zh-CN/viewer.properties +284 -0
  386. deepresearch_flow/paper/web/pdfjs/web/locale/zh-TW/viewer.properties +281 -0
  387. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitDingbats.pfb +0 -0
  388. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixed.pfb +0 -0
  389. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBold.pfb +0 -0
  390. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  391. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedItalic.pfb +0 -0
  392. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerif.pfb +0 -0
  393. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBold.pfb +0 -0
  394. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  395. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifItalic.pfb +0 -0
  396. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSymbol.pfb +0 -0
  397. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_FOXIT +27 -0
  398. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_LIBERATION +102 -0
  399. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Bold.ttf +0 -0
  400. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  401. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Italic.ttf +0 -0
  402. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Regular.ttf +0 -0
  403. deepresearch_flow/paper/web/pdfjs/web/viewer.css +3528 -0
  404. deepresearch_flow/paper/web/pdfjs/web/viewer.html +486 -0
  405. deepresearch_flow/paper/web/pdfjs/web/viewer.js +14099 -0
  406. deepresearch_flow/paper/web/pdfjs/web/viewer.js.map +1 -0
  407. deepresearch_flow/paper/web/query.py +90 -0
  408. deepresearch_flow/recognize/__init__.py +1 -0
  409. deepresearch_flow/recognize/cli.py +469 -0
  410. deepresearch_flow/recognize/markdown.py +277 -0
  411. deepresearch_flow/recognize/organize.py +95 -0
  412. deepresearch_flow-0.1.1.dist-info/METADATA +416 -0
  413. deepresearch_flow-0.1.1.dist-info/RECORD +417 -0
  414. deepresearch_flow-0.1.1.dist-info/WHEEL +5 -0
  415. deepresearch_flow-0.1.1.dist-info/entry_points.txt +2 -0
  416. deepresearch_flow-0.1.1.dist-info/licenses/LICENSE +21 -0
  417. deepresearch_flow-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,5 @@
1
+ """DeepResearch Flow package."""
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.1.0"
@@ -0,0 +1,23 @@
1
+ """CLI entrypoint for deepresearch-flow."""
2
+
3
+ import click
4
+
5
+ from deepresearch_flow.paper.cli import paper
6
+ from deepresearch_flow.recognize.cli import recognize
7
+
8
+
9
+ @click.group()
10
+ def cli() -> None:
11
+ """DeepResearch Flow command line interface."""
12
+
13
+
14
+ cli.add_command(paper)
15
+ cli.add_command(recognize)
16
+
17
+
18
+ def main() -> None:
19
+ cli()
20
+
21
+
22
+ if __name__ == "__main__":
23
+ main()
@@ -0,0 +1 @@
1
+ """Paper extraction and database tools."""
@@ -0,0 +1,286 @@
1
+ """CLI commands for paper workflows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from deepresearch_flow.paper.config import load_config, resolve_api_keys
11
+ from deepresearch_flow.paper.extract import extract_documents, parse_model_ref, configure_logging
12
+ from deepresearch_flow.paper.db import register_db_commands
13
+ from deepresearch_flow.paper.schema import load_schema, validate_schema, SchemaError
14
+ from deepresearch_flow.paper.template_registry import list_template_names, load_schema_for_template
15
+
16
+
17
+ @click.group()
18
+ def paper() -> None:
19
+ """Paper extraction and database commands."""
20
+
21
+
22
+ @paper.command()
23
+ @click.option("-c", "--config", "config_path", default="config.toml", help="Path to config.toml")
24
+ @click.option(
25
+ "-i",
26
+ "--input",
27
+ "inputs",
28
+ multiple=True,
29
+ required=True,
30
+ help="Input markdown file or directory (repeatable)",
31
+ )
32
+ @click.option("-g", "--glob", "glob_pattern", default=None, help="Glob filter when input is a directory")
33
+ @click.option(
34
+ "-s",
35
+ "--schema-json",
36
+ "--schema",
37
+ "schema_path",
38
+ default=None,
39
+ help="Path to JSON schema",
40
+ )
41
+ @click.option("--prompt-system", "prompt_system", default=None, help="Custom system prompt template path")
42
+ @click.option("--prompt-user", "prompt_user", default=None, help="Custom user prompt template path")
43
+ @click.option(
44
+ "--template-dir",
45
+ "template_dir",
46
+ default=None,
47
+ help="Directory containing system.j2, user.j2, schema.json, render.j2",
48
+ )
49
+ @click.option(
50
+ "--prompt-template",
51
+ "prompt_template",
52
+ default="simple",
53
+ type=click.Choice(list_template_names()),
54
+ show_default=True,
55
+ help="Built-in prompt template",
56
+ )
57
+ @click.option(
58
+ "--language",
59
+ "output_language",
60
+ default="en",
61
+ show_default=True,
62
+ help="Output language hint for prompts",
63
+ )
64
+ @click.option("-m", "--model", "model_ref", required=True, help="provider/model")
65
+ @click.option("-o", "--output", "output_path", default=None, help="Aggregated JSON output path")
66
+ @click.option("-e", "--errors", "errors_path", default=None, help="Error JSON output path")
67
+ @click.option("--split", is_flag=True, help="Write per-document JSON outputs")
68
+ @click.option("--split-dir", "split_dir", default=None, help="Directory for split outputs")
69
+ @click.option("--force", is_flag=True, help="Force re-extraction")
70
+ @click.option("--retry-failed", is_flag=True, help="Retry only failed documents")
71
+ @click.option("--dry-run", is_flag=True, help="Discover inputs without calling providers")
72
+ @click.option("--max-concurrency", "max_concurrency", type=int, default=None, help="Override max concurrency")
73
+ @click.option("--sleep-every", "sleep_every", type=int, default=None, help="Sleep after every N requests")
74
+ @click.option("--sleep-time", "sleep_time", type=float, default=None, help="Sleep duration in seconds")
75
+ @click.option("--render-md", "render_md", is_flag=True, help="Render markdown outputs after extraction")
76
+ @click.option(
77
+ "--render-output-dir",
78
+ "render_output_dir",
79
+ default=None,
80
+ help="Output directory for rendered markdown (defaults to --output parent when provided)",
81
+ )
82
+ @click.option(
83
+ "--render-markdown-template",
84
+ "--render-template",
85
+ "render_template_path",
86
+ default=None,
87
+ help="Jinja2 template path for extract-time rendering",
88
+ )
89
+ @click.option(
90
+ "--render-template-name",
91
+ "render_template_name",
92
+ default=None,
93
+ type=click.Choice(list_template_names()),
94
+ help="Built-in render template name",
95
+ )
96
+ @click.option(
97
+ "--render-template-dir",
98
+ "render_template_dir",
99
+ default=None,
100
+ help="Directory containing render.j2 for extract-time rendering",
101
+ )
102
+ @click.option("-v", "--verbose", is_flag=True, help="Enable verbose logging")
103
+ def extract(
104
+ config_path: str,
105
+ inputs: tuple[str, ...],
106
+ glob_pattern: str | None,
107
+ schema_path: str | None,
108
+ prompt_template: str,
109
+ output_language: str,
110
+ prompt_system: str | None,
111
+ prompt_user: str | None,
112
+ template_dir: str | None,
113
+ model_ref: str,
114
+ output_path: str | None,
115
+ errors_path: str | None,
116
+ split: bool,
117
+ split_dir: str | None,
118
+ force: bool,
119
+ retry_failed: bool,
120
+ dry_run: bool,
121
+ max_concurrency: int | None,
122
+ sleep_every: int | None,
123
+ sleep_time: float | None,
124
+ render_md: bool,
125
+ render_output_dir: str,
126
+ render_template_path: str | None,
127
+ render_template_name: str | None,
128
+ render_template_dir: str | None,
129
+ verbose: bool,
130
+ ) -> None:
131
+ """Extract structured information from markdown documents."""
132
+ config = load_config(config_path)
133
+ provider, model_name = parse_model_ref(model_ref, config.providers)
134
+
135
+ if provider.structured_mode not in {"json_schema", "json_object", "none"}:
136
+ raise click.ClickException("structured_mode must be json_schema, json_object, or none")
137
+
138
+ if config.extract.truncate_strategy not in {"head", "head_tail"}:
139
+ raise click.ClickException("truncate_strategy must be head or head_tail")
140
+
141
+ if config.extract.max_concurrency <= 0:
142
+ raise click.ClickException("max_concurrency must be positive")
143
+ if config.extract.max_retries <= 0:
144
+ raise click.ClickException("max_retries must be positive")
145
+ if max_concurrency is not None and max_concurrency <= 0:
146
+ raise click.ClickException("--max-concurrency must be positive")
147
+ if sleep_every is not None and sleep_every <= 0:
148
+ raise click.ClickException("--sleep-every must be positive")
149
+ if sleep_time is not None and sleep_time <= 0:
150
+ raise click.ClickException("--sleep-time must be positive")
151
+ if (sleep_every is None) != (sleep_time is None):
152
+ raise click.ClickException("Both --sleep-every and --sleep-time are required")
153
+
154
+ if provider.type in {
155
+ "openai_compatible",
156
+ "dashscope",
157
+ "gemini_ai_studio",
158
+ "azure_openai",
159
+ "claude",
160
+ }:
161
+ resolved = resolve_api_keys(provider.api_keys)
162
+ if not resolved:
163
+ raise click.ClickException(f"{provider.type} providers require api_keys")
164
+
165
+ if template_dir and (prompt_system or prompt_user or schema_path):
166
+ raise click.ClickException("template-dir cannot be combined with custom prompt or schema flags")
167
+
168
+ if (prompt_system and not prompt_user) or (prompt_user and not prompt_system):
169
+ raise click.ClickException("Both --prompt-system and --prompt-user are required")
170
+
171
+ custom_prompt = bool(prompt_system or prompt_user or template_dir)
172
+ if custom_prompt and prompt_template != "simple":
173
+ raise click.ClickException("Custom prompts cannot be combined with built-in prompt templates")
174
+
175
+ schema_override = schema_path or None
176
+ prompt_system_path = Path(prompt_system) if prompt_system else None
177
+ prompt_user_path = Path(prompt_user) if prompt_user else None
178
+ template_dir_path = Path(template_dir) if template_dir else None
179
+ if template_dir_path:
180
+ prompt_system_path = template_dir_path / "system.j2"
181
+ prompt_user_path = template_dir_path / "user.j2"
182
+ schema_override = str(template_dir_path / "schema.json")
183
+
184
+ for prompt_path in (prompt_system_path, prompt_user_path):
185
+ if prompt_path and not prompt_path.exists():
186
+ raise click.ClickException(f"Prompt template not found: {prompt_path}")
187
+
188
+ if not render_md and any(
189
+ item is not None
190
+ for item in (render_template_path, render_template_name, render_template_dir)
191
+ ):
192
+ raise click.ClickException("Render template options require --render-md")
193
+ if not render_md and render_output_dir is not None:
194
+ raise click.ClickException("--render-output-dir requires --render-md")
195
+ if render_md and sum(
196
+ bool(item) for item in (render_template_path, render_template_name, render_template_dir)
197
+ ) > 1:
198
+ raise click.ClickException(
199
+ "Use only one of --render-markdown-template/--render-template, --render-template-name, or --render-template-dir"
200
+ )
201
+ render_template_path_effective = render_template_path
202
+ render_template_name_effective = render_template_name
203
+ render_template_dir_effective = render_template_dir
204
+ render_output_dir_effective: Path | None = None
205
+
206
+ if render_md and not any(
207
+ item is not None
208
+ for item in (render_template_path, render_template_name, render_template_dir)
209
+ ):
210
+ if template_dir:
211
+ render_template_dir_effective = template_dir
212
+ elif not custom_prompt:
213
+ render_template_name_effective = prompt_template
214
+ if render_md:
215
+ if render_output_dir is not None:
216
+ render_output_dir_effective = Path(render_output_dir)
217
+ elif output_path is not None:
218
+ render_output_dir_effective = Path(output_path).parent
219
+ else:
220
+ render_output_dir_effective = Path("rendered_md")
221
+
222
+ if render_template_path_effective and not Path(render_template_path_effective).exists():
223
+ raise click.ClickException(f"Render template not found: {render_template_path_effective}")
224
+ if render_template_dir_effective:
225
+ render_template_dir_path = Path(render_template_dir_effective)
226
+ render_template_file = render_template_dir_path / "render.j2"
227
+ if not render_template_file.exists():
228
+ raise click.ClickException(f"Render template not found: {render_template_file}")
229
+
230
+ try:
231
+ if schema_override:
232
+ schema = load_schema(schema_override)
233
+ elif prompt_template:
234
+ schema = load_schema_for_template(prompt_template)
235
+ else:
236
+ schema = load_schema(config.extract.schema_path)
237
+ validator = validate_schema(schema)
238
+ except SchemaError as exc:
239
+ raise click.ClickException(str(exc)) from exc
240
+
241
+ output = Path(output_path or config.extract.output)
242
+ errors = Path(errors_path or config.extract.errors)
243
+ split_out = Path(split_dir) if split_dir else None
244
+
245
+ configure_logging(verbose)
246
+
247
+ asyncio.run(
248
+ extract_documents(
249
+ inputs=inputs,
250
+ glob_pattern=glob_pattern,
251
+ provider=provider,
252
+ model=model_name,
253
+ schema=schema,
254
+ validator=validator,
255
+ config=config,
256
+ output_path=output,
257
+ errors_path=errors,
258
+ split=split,
259
+ split_dir=split_out,
260
+ force=force,
261
+ retry_failed=retry_failed,
262
+ dry_run=dry_run,
263
+ max_concurrency_override=max_concurrency,
264
+ prompt_template=prompt_template,
265
+ output_language=output_language,
266
+ custom_prompt=custom_prompt,
267
+ prompt_system_path=prompt_system_path,
268
+ prompt_user_path=prompt_user_path,
269
+ render_md=render_md,
270
+ render_output_dir=render_output_dir_effective,
271
+ render_template_path=render_template_path_effective,
272
+ render_template_name=render_template_name_effective,
273
+ render_template_dir=render_template_dir_effective,
274
+ sleep_every=sleep_every,
275
+ sleep_time=sleep_time,
276
+ verbose=verbose,
277
+ )
278
+ )
279
+
280
+
281
+ @paper.group()
282
+ def db() -> None:
283
+ """Database management commands."""
284
+
285
+
286
+ register_db_commands(db)
@@ -0,0 +1,249 @@
1
+ """Configuration loading and validation for paper tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any
8
+ import os
9
+ import tomllib
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ExtractConfig:
14
+ output: str
15
+ errors: str
16
+ max_concurrency: int
17
+ max_retries: int
18
+ backoff_base_seconds: float
19
+ backoff_max_seconds: float
20
+ truncate_strategy: str
21
+ truncate_max_chars: int
22
+ cost_estimate: bool
23
+ schema_path: str | None
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class RenderConfig:
28
+ template_path: str | None
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class ProviderConfig:
33
+ name: str
34
+ type: str
35
+ base_url: str
36
+ api_keys: list[str]
37
+ api_version: str | None
38
+ deployment: str | None
39
+ project_id: str | None
40
+ location: str | None
41
+ credentials_path: str | None
42
+ anthropic_version: str | None
43
+ structured_mode: str
44
+ extra_headers: dict[str, str]
45
+ system_prompt: str | None
46
+ user_prompt: str | None
47
+ model_list: list[str]
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class PaperConfig:
52
+ extract: ExtractConfig
53
+ render: RenderConfig
54
+ providers: list[ProviderConfig]
55
+
56
+
57
+ DEFAULT_EXTRACT = ExtractConfig(
58
+ output="paper_infos.json",
59
+ errors="paper_errors.json",
60
+ max_concurrency=6,
61
+ max_retries=3,
62
+ backoff_base_seconds=1.0,
63
+ backoff_max_seconds=20.0,
64
+ truncate_strategy="head_tail",
65
+ truncate_max_chars=20000,
66
+ cost_estimate=True,
67
+ schema_path=None,
68
+ )
69
+
70
+ DEFAULT_RENDER = RenderConfig(template_path=None)
71
+
72
+
73
+ def _as_list(value: Any) -> list[str]:
74
+ if value is None:
75
+ return []
76
+ if isinstance(value, list):
77
+ return [str(item) for item in value]
78
+ return [str(value)]
79
+
80
+
81
+ def _as_bool(value: Any, default: bool) -> bool:
82
+ if value is None:
83
+ return default
84
+ return bool(value)
85
+
86
+
87
+ def _as_int(value: Any, default: int) -> int:
88
+ if value is None:
89
+ return default
90
+ return int(value)
91
+
92
+
93
+ def _as_float(value: Any, default: float) -> float:
94
+ if value is None:
95
+ return default
96
+ return float(value)
97
+
98
+
99
+ def _as_str(value: Any, default: str | None = None) -> str | None:
100
+ if value is None:
101
+ return default
102
+ return str(value)
103
+
104
+
105
+ def load_config(path: str) -> PaperConfig:
106
+ config_path = Path(path)
107
+ if not config_path.exists():
108
+ raise FileNotFoundError(f"Config file not found: {config_path}")
109
+
110
+ data = tomllib.loads(config_path.read_text(encoding="utf-8"))
111
+
112
+ extract_data = data.get("extract", {})
113
+ extract = ExtractConfig(
114
+ output=_as_str(extract_data.get("output"), DEFAULT_EXTRACT.output) or DEFAULT_EXTRACT.output,
115
+ errors=_as_str(extract_data.get("errors"), DEFAULT_EXTRACT.errors) or DEFAULT_EXTRACT.errors,
116
+ max_concurrency=_as_int(extract_data.get("max_concurrency"), DEFAULT_EXTRACT.max_concurrency),
117
+ max_retries=_as_int(extract_data.get("max_retries"), DEFAULT_EXTRACT.max_retries),
118
+ backoff_base_seconds=_as_float(
119
+ extract_data.get("backoff_base_seconds"), DEFAULT_EXTRACT.backoff_base_seconds
120
+ ),
121
+ backoff_max_seconds=_as_float(
122
+ extract_data.get("backoff_max_seconds"), DEFAULT_EXTRACT.backoff_max_seconds
123
+ ),
124
+ truncate_strategy=_as_str(
125
+ extract_data.get("truncate_strategy"), DEFAULT_EXTRACT.truncate_strategy
126
+ )
127
+ or DEFAULT_EXTRACT.truncate_strategy,
128
+ truncate_max_chars=_as_int(
129
+ extract_data.get("truncate_max_chars"), DEFAULT_EXTRACT.truncate_max_chars
130
+ ),
131
+ cost_estimate=_as_bool(extract_data.get("cost_estimate"), DEFAULT_EXTRACT.cost_estimate),
132
+ schema_path=_as_str(extract_data.get("schema_path"), DEFAULT_EXTRACT.schema_path),
133
+ )
134
+
135
+ render_data = data.get("render", {})
136
+ render = RenderConfig(template_path=_as_str(render_data.get("template_path"), DEFAULT_RENDER.template_path))
137
+
138
+ providers_data = data.get("providers", [])
139
+ providers: list[ProviderConfig] = []
140
+ for provider in providers_data:
141
+ name = _as_str(provider.get("name"))
142
+ provider_type = _as_str(provider.get("type"))
143
+ if not name or not provider_type:
144
+ raise ValueError("Each provider must include name and type")
145
+
146
+ base_url = _as_str(provider.get("base_url"))
147
+ endpoint = _as_str(provider.get("endpoint"))
148
+ if not base_url:
149
+ if provider_type == "ollama":
150
+ base_url = "http://localhost:11434"
151
+ elif provider_type == "openai_compatible":
152
+ base_url = "https://api.openai.com/v1"
153
+ elif provider_type == "azure_openai" and endpoint:
154
+ base_url = endpoint
155
+ elif provider_type in {"dashscope", "gemini_ai_studio", "gemini_vertex", "claude"}:
156
+ base_url = ""
157
+ else:
158
+ raise ValueError(f"Provider '{name}' requires base_url")
159
+ elif provider_type == "azure_openai" and endpoint:
160
+ base_url = endpoint
161
+
162
+ api_keys = _as_list(provider.get("api_keys"))
163
+ if not api_keys:
164
+ api_key_single = provider.get("api_key")
165
+ api_keys = _as_list(api_key_single)
166
+
167
+ structured_mode = _as_str(provider.get("structured_mode"), None)
168
+ if structured_mode is None:
169
+ if provider_type == "ollama":
170
+ structured_mode = "json_object"
171
+ elif provider_type in {"dashscope", "gemini_ai_studio", "gemini_vertex", "claude"}:
172
+ structured_mode = "none"
173
+ else:
174
+ structured_mode = "json_schema"
175
+
176
+ extra_headers: dict[str, str] = {}
177
+ headers = provider.get("extra_headers")
178
+ if isinstance(headers, dict):
179
+ extra_headers = {str(k): str(v) for k, v in headers.items()}
180
+
181
+ model_list = _as_list(provider.get("model_list"))
182
+ if not model_list:
183
+ raise ValueError(f"Provider '{name}' must include model_list")
184
+
185
+ api_version = _as_str(provider.get("api_version"), None)
186
+ deployment = _as_str(provider.get("deployment"), None)
187
+ project_id = _as_str(provider.get("project_id"), None)
188
+ location = _as_str(provider.get("location"), None)
189
+ credentials_path = _as_str(provider.get("credentials_path"), None)
190
+ anthropic_version = _as_str(provider.get("anthropic_version"), None)
191
+
192
+ if provider_type == "azure_openai":
193
+ if not base_url:
194
+ raise ValueError(f"Provider '{name}' requires endpoint")
195
+ if not api_version:
196
+ raise ValueError(f"Provider '{name}' requires api_version")
197
+ if not deployment:
198
+ raise ValueError(f"Provider '{name}' requires deployment")
199
+ if provider_type == "gemini_ai_studio" and not api_keys:
200
+ raise ValueError(f"Provider '{name}' requires api_keys")
201
+ if provider_type == "gemini_vertex":
202
+ if not project_id:
203
+ raise ValueError(f"Provider '{name}' requires project_id")
204
+ if not location:
205
+ raise ValueError(f"Provider '{name}' requires location")
206
+ if provider_type == "claude":
207
+ if not api_keys:
208
+ raise ValueError(f"Provider '{name}' requires api_keys")
209
+ if not anthropic_version:
210
+ raise ValueError(f"Provider '{name}' requires anthropic_version")
211
+
212
+ providers.append(
213
+ ProviderConfig(
214
+ name=name,
215
+ type=provider_type,
216
+ base_url=base_url,
217
+ api_keys=api_keys,
218
+ api_version=api_version,
219
+ deployment=deployment,
220
+ project_id=project_id,
221
+ location=location,
222
+ credentials_path=credentials_path,
223
+ anthropic_version=anthropic_version,
224
+ structured_mode=structured_mode,
225
+ extra_headers=extra_headers,
226
+ system_prompt=_as_str(provider.get("system_prompt"), None),
227
+ user_prompt=_as_str(provider.get("user_prompt"), None),
228
+ model_list=model_list,
229
+ )
230
+ )
231
+
232
+ if not providers:
233
+ raise ValueError("Config must include at least one [[providers]] entry")
234
+
235
+ return PaperConfig(extract=extract, render=render, providers=providers)
236
+
237
+
238
+ def resolve_api_keys(entries: list[str]) -> list[str]:
239
+ resolved: list[str] = []
240
+ for entry in entries:
241
+ entry = str(entry)
242
+ if entry.startswith("env:"):
243
+ env_name = entry.split(":", 1)[1]
244
+ value = os.environ.get(env_name)
245
+ if value:
246
+ resolved.append(value)
247
+ else:
248
+ resolved.append(entry)
249
+ return resolved