deepresearch-flow 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. deepresearch_flow/__init__.py +5 -0
  2. deepresearch_flow/cli.py +23 -0
  3. deepresearch_flow/paper/__init__.py +1 -0
  4. deepresearch_flow/paper/cli.py +286 -0
  5. deepresearch_flow/paper/config.py +249 -0
  6. deepresearch_flow/paper/db.py +768 -0
  7. deepresearch_flow/paper/extract.py +870 -0
  8. deepresearch_flow/paper/llm.py +115 -0
  9. deepresearch_flow/paper/prompt_templates/__init__.py +1 -0
  10. deepresearch_flow/paper/prompt_templates/deep_read_system.j2 +6 -0
  11. deepresearch_flow/paper/prompt_templates/deep_read_user.j2 +82 -0
  12. deepresearch_flow/paper/prompt_templates/eight_questions_system.j2 +6 -0
  13. deepresearch_flow/paper/prompt_templates/eight_questions_user.j2 +28 -0
  14. deepresearch_flow/paper/prompt_templates/simple_system.j2 +6 -0
  15. deepresearch_flow/paper/prompt_templates/simple_user.j2 +24 -0
  16. deepresearch_flow/paper/prompt_templates/three_pass_system.j2 +6 -0
  17. deepresearch_flow/paper/prompt_templates/three_pass_user.j2 +44 -0
  18. deepresearch_flow/paper/prompts.py +11 -0
  19. deepresearch_flow/paper/providers/__init__.py +1 -0
  20. deepresearch_flow/paper/providers/azure_openai.py +66 -0
  21. deepresearch_flow/paper/providers/base.py +19 -0
  22. deepresearch_flow/paper/providers/claude.py +71 -0
  23. deepresearch_flow/paper/providers/dashscope.py +58 -0
  24. deepresearch_flow/paper/providers/gemini.py +116 -0
  25. deepresearch_flow/paper/providers/ollama.py +46 -0
  26. deepresearch_flow/paper/providers/openai_compatible.py +60 -0
  27. deepresearch_flow/paper/render.py +64 -0
  28. deepresearch_flow/paper/schema.py +58 -0
  29. deepresearch_flow/paper/schemas/__init__.py +1 -0
  30. deepresearch_flow/paper/schemas/deep_read_schema.json +46 -0
  31. deepresearch_flow/paper/schemas/default_paper_schema.json +47 -0
  32. deepresearch_flow/paper/schemas/eight_questions_schema.json +34 -0
  33. deepresearch_flow/paper/schemas/three_pass_schema.json +24 -0
  34. deepresearch_flow/paper/template_registry.py +189 -0
  35. deepresearch_flow/paper/templates/__init__.py +1 -0
  36. deepresearch_flow/paper/templates/deep_read.md.j2 +79 -0
  37. deepresearch_flow/paper/templates/default_paper.md.j2 +32 -0
  38. deepresearch_flow/paper/templates/eight_questions.md.j2 +49 -0
  39. deepresearch_flow/paper/templates/three_pass.md.j2 +28 -0
  40. deepresearch_flow/paper/utils.py +136 -0
  41. deepresearch_flow/paper/web/__init__.py +2 -0
  42. deepresearch_flow/paper/web/app.py +2307 -0
  43. deepresearch_flow/paper/web/pdfjs/LICENSE +177 -0
  44. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-H.bcmap +0 -0
  45. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-EUC-V.bcmap +0 -0
  46. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-H.bcmap +0 -0
  47. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-H.bcmap +0 -0
  48. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-RKSJ-V.bcmap +0 -0
  49. deepresearch_flow/paper/web/pdfjs/web/cmaps/78-V.bcmap +0 -0
  50. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-H.bcmap +0 -0
  51. deepresearch_flow/paper/web/pdfjs/web/cmaps/78ms-RKSJ-V.bcmap +0 -0
  52. deepresearch_flow/paper/web/pdfjs/web/cmaps/83pv-RKSJ-H.bcmap +0 -0
  53. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-H.bcmap +0 -0
  54. deepresearch_flow/paper/web/pdfjs/web/cmaps/90ms-RKSJ-V.bcmap +0 -0
  55. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-H.bcmap +0 -0
  56. deepresearch_flow/paper/web/pdfjs/web/cmaps/90msp-RKSJ-V.bcmap +0 -0
  57. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-H.bcmap +0 -0
  58. deepresearch_flow/paper/web/pdfjs/web/cmaps/90pv-RKSJ-V.bcmap +0 -0
  59. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-H.bcmap +0 -0
  60. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-H.bcmap +0 -0
  61. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-RKSJ-V.bcmap +0 -0
  62. deepresearch_flow/paper/web/pdfjs/web/cmaps/Add-V.bcmap +0 -0
  63. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-0.bcmap +0 -0
  64. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-1.bcmap +0 -0
  65. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-2.bcmap +0 -0
  66. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-3.bcmap +0 -0
  67. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-4.bcmap +0 -0
  68. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-5.bcmap +0 -0
  69. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-6.bcmap +0 -0
  70. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-CNS1-UCS2.bcmap +0 -0
  71. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-0.bcmap +0 -0
  72. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-1.bcmap +0 -0
  73. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-2.bcmap +0 -0
  74. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-3.bcmap +0 -0
  75. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-4.bcmap +0 -0
  76. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-5.bcmap +0 -0
  77. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-GB1-UCS2.bcmap +0 -0
  78. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-0.bcmap +0 -0
  79. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-1.bcmap +0 -0
  80. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-2.bcmap +0 -0
  81. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-3.bcmap +0 -0
  82. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-4.bcmap +0 -0
  83. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-5.bcmap +0 -0
  84. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-6.bcmap +0 -0
  85. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Japan1-UCS2.bcmap +0 -0
  86. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-0.bcmap +0 -0
  87. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-1.bcmap +0 -0
  88. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-2.bcmap +0 -0
  89. deepresearch_flow/paper/web/pdfjs/web/cmaps/Adobe-Korea1-UCS2.bcmap +0 -0
  90. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-H.bcmap +0 -0
  91. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5-V.bcmap +0 -0
  92. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-H.bcmap +0 -0
  93. deepresearch_flow/paper/web/pdfjs/web/cmaps/B5pc-V.bcmap +0 -0
  94. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-H.bcmap +0 -0
  95. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS-EUC-V.bcmap +0 -0
  96. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-H.bcmap +0 -0
  97. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS1-V.bcmap +0 -0
  98. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-H.bcmap +0 -0
  99. deepresearch_flow/paper/web/pdfjs/web/cmaps/CNS2-V.bcmap +3 -0
  100. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-H.bcmap +0 -0
  101. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETHK-B5-V.bcmap +0 -0
  102. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-H.bcmap +0 -0
  103. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETen-B5-V.bcmap +0 -0
  104. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-H.bcmap +3 -0
  105. deepresearch_flow/paper/web/pdfjs/web/cmaps/ETenms-B5-V.bcmap +0 -0
  106. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-H.bcmap +0 -0
  107. deepresearch_flow/paper/web/pdfjs/web/cmaps/EUC-V.bcmap +0 -0
  108. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-H.bcmap +0 -0
  109. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-H.bcmap +0 -0
  110. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-RKSJ-V.bcmap +0 -0
  111. deepresearch_flow/paper/web/pdfjs/web/cmaps/Ext-V.bcmap +0 -0
  112. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-H.bcmap +0 -0
  113. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-EUC-V.bcmap +0 -0
  114. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-H.bcmap +4 -0
  115. deepresearch_flow/paper/web/pdfjs/web/cmaps/GB-V.bcmap +0 -0
  116. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-H.bcmap +0 -0
  117. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK-EUC-V.bcmap +0 -0
  118. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-H.bcmap +0 -0
  119. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBK2K-V.bcmap +0 -0
  120. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-H.bcmap +0 -0
  121. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBKp-EUC-V.bcmap +0 -0
  122. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-H.bcmap +0 -0
  123. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-EUC-V.bcmap +0 -0
  124. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-H.bcmap +0 -0
  125. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBT-V.bcmap +0 -0
  126. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-H.bcmap +0 -0
  127. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBTpc-EUC-V.bcmap +0 -0
  128. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-H.bcmap +0 -0
  129. deepresearch_flow/paper/web/pdfjs/web/cmaps/GBpc-EUC-V.bcmap +0 -0
  130. deepresearch_flow/paper/web/pdfjs/web/cmaps/H.bcmap +0 -0
  131. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-H.bcmap +0 -0
  132. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdla-B5-V.bcmap +0 -0
  133. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-H.bcmap +0 -0
  134. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKdlb-B5-V.bcmap +0 -0
  135. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-H.bcmap +0 -0
  136. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKgccs-B5-V.bcmap +0 -0
  137. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-H.bcmap +0 -0
  138. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm314-B5-V.bcmap +0 -0
  139. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-H.bcmap +0 -0
  140. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKm471-B5-V.bcmap +0 -0
  141. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-H.bcmap +0 -0
  142. deepresearch_flow/paper/web/pdfjs/web/cmaps/HKscs-B5-V.bcmap +0 -0
  143. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hankaku.bcmap +0 -0
  144. deepresearch_flow/paper/web/pdfjs/web/cmaps/Hiragana.bcmap +0 -0
  145. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-H.bcmap +0 -0
  146. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-EUC-V.bcmap +0 -0
  147. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-H.bcmap +0 -0
  148. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-H.bcmap +0 -0
  149. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-Johab-V.bcmap +0 -0
  150. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSC-V.bcmap +0 -0
  151. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-H.bcmap +0 -0
  152. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-H.bcmap +0 -0
  153. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-HW-V.bcmap +0 -0
  154. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCms-UHC-V.bcmap +0 -0
  155. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-H.bcmap +0 -0
  156. deepresearch_flow/paper/web/pdfjs/web/cmaps/KSCpc-EUC-V.bcmap +0 -0
  157. deepresearch_flow/paper/web/pdfjs/web/cmaps/Katakana.bcmap +0 -0
  158. deepresearch_flow/paper/web/pdfjs/web/cmaps/LICENSE +36 -0
  159. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-H.bcmap +0 -0
  160. deepresearch_flow/paper/web/pdfjs/web/cmaps/NWP-V.bcmap +0 -0
  161. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-H.bcmap +0 -0
  162. deepresearch_flow/paper/web/pdfjs/web/cmaps/RKSJ-V.bcmap +0 -0
  163. deepresearch_flow/paper/web/pdfjs/web/cmaps/Roman.bcmap +0 -0
  164. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-H.bcmap +0 -0
  165. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UCS2-V.bcmap +0 -0
  166. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-H.bcmap +0 -0
  167. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF16-V.bcmap +0 -0
  168. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-H.bcmap +0 -0
  169. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF32-V.bcmap +0 -0
  170. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-H.bcmap +0 -0
  171. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniCNS-UTF8-V.bcmap +0 -0
  172. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-H.bcmap +0 -0
  173. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UCS2-V.bcmap +0 -0
  174. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-H.bcmap +0 -0
  175. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF16-V.bcmap +0 -0
  176. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-H.bcmap +0 -0
  177. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF32-V.bcmap +0 -0
  178. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-H.bcmap +0 -0
  179. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniGB-UTF8-V.bcmap +0 -0
  180. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-H.bcmap +0 -0
  181. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-H.bcmap +0 -0
  182. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-HW-V.bcmap +0 -0
  183. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UCS2-V.bcmap +0 -0
  184. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-H.bcmap +0 -0
  185. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF16-V.bcmap +0 -0
  186. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-H.bcmap +0 -0
  187. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF32-V.bcmap +0 -0
  188. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-H.bcmap +0 -0
  189. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS-UTF8-V.bcmap +0 -0
  190. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-H.bcmap +0 -0
  191. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF16-V.bcmap +0 -0
  192. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-H.bcmap +0 -0
  193. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF32-V.bcmap +0 -0
  194. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-H.bcmap +0 -0
  195. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJIS2004-UTF8-V.bcmap +0 -0
  196. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-HW-V.bcmap +0 -0
  197. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UCS2-V.bcmap +0 -0
  198. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISPro-UTF8-V.bcmap +0 -0
  199. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-H.bcmap +0 -0
  200. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX0213-UTF32-V.bcmap +0 -0
  201. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-H.bcmap +0 -0
  202. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniJISX02132004-UTF32-V.bcmap +0 -0
  203. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-H.bcmap +0 -0
  204. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UCS2-V.bcmap +0 -0
  205. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-H.bcmap +0 -0
  206. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF16-V.bcmap +0 -0
  207. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-H.bcmap +0 -0
  208. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF32-V.bcmap +0 -0
  209. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-H.bcmap +0 -0
  210. deepresearch_flow/paper/web/pdfjs/web/cmaps/UniKS-UTF8-V.bcmap +0 -0
  211. deepresearch_flow/paper/web/pdfjs/web/cmaps/V.bcmap +0 -0
  212. deepresearch_flow/paper/web/pdfjs/web/cmaps/WP-Symbol.bcmap +0 -0
  213. deepresearch_flow/paper/web/pdfjs/web/compressed.tracemonkey-pldi-09.pdf +0 -0
  214. deepresearch_flow/paper/web/pdfjs/web/debugger.css +111 -0
  215. deepresearch_flow/paper/web/pdfjs/web/debugger.js +611 -0
  216. deepresearch_flow/paper/web/pdfjs/web/images/altText_add.svg +3 -0
  217. deepresearch_flow/paper/web/pdfjs/web/images/altText_done.svg +3 -0
  218. deepresearch_flow/paper/web/pdfjs/web/images/annotation-check.svg +11 -0
  219. deepresearch_flow/paper/web/pdfjs/web/images/annotation-comment.svg +16 -0
  220. deepresearch_flow/paper/web/pdfjs/web/images/annotation-help.svg +26 -0
  221. deepresearch_flow/paper/web/pdfjs/web/images/annotation-insert.svg +10 -0
  222. deepresearch_flow/paper/web/pdfjs/web/images/annotation-key.svg +11 -0
  223. deepresearch_flow/paper/web/pdfjs/web/images/annotation-newparagraph.svg +11 -0
  224. deepresearch_flow/paper/web/pdfjs/web/images/annotation-noicon.svg +7 -0
  225. deepresearch_flow/paper/web/pdfjs/web/images/annotation-note.svg +42 -0
  226. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paperclip.svg +6 -0
  227. deepresearch_flow/paper/web/pdfjs/web/images/annotation-paragraph.svg +16 -0
  228. deepresearch_flow/paper/web/pdfjs/web/images/annotation-pushpin.svg +7 -0
  229. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorFreeText.svg +3 -0
  230. deepresearch_flow/paper/web/pdfjs/web/images/cursor-editorInk.svg +4 -0
  231. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-next.svg +3 -0
  232. deepresearch_flow/paper/web/pdfjs/web/images/findbarButton-previous.svg +3 -0
  233. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-download.svg +3 -0
  234. deepresearch_flow/paper/web/pdfjs/web/images/gv-toolbarButton-openinapp.svg +11 -0
  235. deepresearch_flow/paper/web/pdfjs/web/images/loading-dark.svg +24 -0
  236. deepresearch_flow/paper/web/pdfjs/web/images/loading-icon.gif +0 -0
  237. deepresearch_flow/paper/web/pdfjs/web/images/loading.svg +1 -0
  238. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-documentProperties.svg +3 -0
  239. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-firstPage.svg +3 -0
  240. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-handTool.svg +3 -0
  241. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-lastPage.svg +3 -0
  242. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCcw.svg +3 -0
  243. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-rotateCw.svg +3 -0
  244. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollHorizontal.svg +3 -0
  245. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollPage.svg +3 -0
  246. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollVertical.svg +3 -0
  247. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-scrollWrapped.svg +3 -0
  248. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-selectTool.svg +3 -0
  249. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadEven.svg +3 -0
  250. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadNone.svg +3 -0
  251. deepresearch_flow/paper/web/pdfjs/web/images/secondaryToolbarButton-spreadOdd.svg +3 -0
  252. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-bookmark.svg +3 -0
  253. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-currentOutlineItem.svg +3 -0
  254. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-download.svg +4 -0
  255. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorFreeText.svg +3 -0
  256. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorInk.svg +4 -0
  257. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-editorStamp.svg +8 -0
  258. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-menuArrow.svg +3 -0
  259. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-openFile.svg +3 -0
  260. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageDown.svg +3 -0
  261. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-pageUp.svg +3 -0
  262. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-presentationMode.svg +3 -0
  263. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-print.svg +3 -0
  264. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-search.svg +3 -0
  265. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-secondaryToolbarToggle.svg +3 -0
  266. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-sidebarToggle.svg +3 -0
  267. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewAttachments.svg +3 -0
  268. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewLayers.svg +3 -0
  269. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewOutline.svg +3 -0
  270. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-viewThumbnail.svg +3 -0
  271. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomIn.svg +3 -0
  272. deepresearch_flow/paper/web/pdfjs/web/images/toolbarButton-zoomOut.svg +3 -0
  273. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-collapsed.svg +1 -0
  274. deepresearch_flow/paper/web/pdfjs/web/images/treeitem-expanded.svg +1 -0
  275. deepresearch_flow/paper/web/pdfjs/web/locale/ach/viewer.properties +203 -0
  276. deepresearch_flow/paper/web/pdfjs/web/locale/af/viewer.properties +156 -0
  277. deepresearch_flow/paper/web/pdfjs/web/locale/an/viewer.properties +222 -0
  278. deepresearch_flow/paper/web/pdfjs/web/locale/ar/viewer.properties +224 -0
  279. deepresearch_flow/paper/web/pdfjs/web/locale/ast/viewer.properties +185 -0
  280. deepresearch_flow/paper/web/pdfjs/web/locale/az/viewer.properties +222 -0
  281. deepresearch_flow/paper/web/pdfjs/web/locale/be/viewer.properties +270 -0
  282. deepresearch_flow/paper/web/pdfjs/web/locale/bg/viewer.properties +214 -0
  283. deepresearch_flow/paper/web/pdfjs/web/locale/bn/viewer.properties +218 -0
  284. deepresearch_flow/paper/web/pdfjs/web/locale/bo/viewer.properties +217 -0
  285. deepresearch_flow/paper/web/pdfjs/web/locale/br/viewer.properties +224 -0
  286. deepresearch_flow/paper/web/pdfjs/web/locale/brx/viewer.properties +184 -0
  287. deepresearch_flow/paper/web/pdfjs/web/locale/bs/viewer.properties +173 -0
  288. deepresearch_flow/paper/web/pdfjs/web/locale/ca/viewer.properties +256 -0
  289. deepresearch_flow/paper/web/pdfjs/web/locale/cak/viewer.properties +253 -0
  290. deepresearch_flow/paper/web/pdfjs/web/locale/ckb/viewer.properties +213 -0
  291. deepresearch_flow/paper/web/pdfjs/web/locale/cs/viewer.properties +284 -0
  292. deepresearch_flow/paper/web/pdfjs/web/locale/cy/viewer.properties +270 -0
  293. deepresearch_flow/paper/web/pdfjs/web/locale/da/viewer.properties +270 -0
  294. deepresearch_flow/paper/web/pdfjs/web/locale/de/viewer.properties +270 -0
  295. deepresearch_flow/paper/web/pdfjs/web/locale/dsb/viewer.properties +284 -0
  296. deepresearch_flow/paper/web/pdfjs/web/locale/el/viewer.properties +270 -0
  297. deepresearch_flow/paper/web/pdfjs/web/locale/en-CA/viewer.properties +270 -0
  298. deepresearch_flow/paper/web/pdfjs/web/locale/en-GB/viewer.properties +284 -0
  299. deepresearch_flow/paper/web/pdfjs/web/locale/en-US/viewer.properties +282 -0
  300. deepresearch_flow/paper/web/pdfjs/web/locale/eo/viewer.properties +270 -0
  301. deepresearch_flow/paper/web/pdfjs/web/locale/es-AR/viewer.properties +284 -0
  302. deepresearch_flow/paper/web/pdfjs/web/locale/es-CL/viewer.properties +284 -0
  303. deepresearch_flow/paper/web/pdfjs/web/locale/es-ES/viewer.properties +270 -0
  304. deepresearch_flow/paper/web/pdfjs/web/locale/es-MX/viewer.properties +257 -0
  305. deepresearch_flow/paper/web/pdfjs/web/locale/et/viewer.properties +229 -0
  306. deepresearch_flow/paper/web/pdfjs/web/locale/eu/viewer.properties +284 -0
  307. deepresearch_flow/paper/web/pdfjs/web/locale/fa/viewer.properties +221 -0
  308. deepresearch_flow/paper/web/pdfjs/web/locale/ff/viewer.properties +214 -0
  309. deepresearch_flow/paper/web/pdfjs/web/locale/fi/viewer.properties +270 -0
  310. deepresearch_flow/paper/web/pdfjs/web/locale/fr/viewer.properties +270 -0
  311. deepresearch_flow/paper/web/pdfjs/web/locale/fur/viewer.properties +270 -0
  312. deepresearch_flow/paper/web/pdfjs/web/locale/fy-NL/viewer.properties +270 -0
  313. deepresearch_flow/paper/web/pdfjs/web/locale/ga-IE/viewer.properties +181 -0
  314. deepresearch_flow/paper/web/pdfjs/web/locale/gd/viewer.properties +257 -0
  315. deepresearch_flow/paper/web/pdfjs/web/locale/gl/viewer.properties +267 -0
  316. deepresearch_flow/paper/web/pdfjs/web/locale/gn/viewer.properties +278 -0
  317. deepresearch_flow/paper/web/pdfjs/web/locale/gu-IN/viewer.properties +214 -0
  318. deepresearch_flow/paper/web/pdfjs/web/locale/he/viewer.properties +283 -0
  319. deepresearch_flow/paper/web/pdfjs/web/locale/hi-IN/viewer.properties +227 -0
  320. deepresearch_flow/paper/web/pdfjs/web/locale/hr/viewer.properties +243 -0
  321. deepresearch_flow/paper/web/pdfjs/web/locale/hsb/viewer.properties +284 -0
  322. deepresearch_flow/paper/web/pdfjs/web/locale/hu/viewer.properties +284 -0
  323. deepresearch_flow/paper/web/pdfjs/web/locale/hy-AM/viewer.properties +232 -0
  324. deepresearch_flow/paper/web/pdfjs/web/locale/hye/viewer.properties +229 -0
  325. deepresearch_flow/paper/web/pdfjs/web/locale/ia/viewer.properties +284 -0
  326. deepresearch_flow/paper/web/pdfjs/web/locale/id/viewer.properties +253 -0
  327. deepresearch_flow/paper/web/pdfjs/web/locale/is/viewer.properties +284 -0
  328. deepresearch_flow/paper/web/pdfjs/web/locale/it/viewer.properties +284 -0
  329. deepresearch_flow/paper/web/pdfjs/web/locale/ja/viewer.properties +270 -0
  330. deepresearch_flow/paper/web/pdfjs/web/locale/ka/viewer.properties +284 -0
  331. deepresearch_flow/paper/web/pdfjs/web/locale/kab/viewer.properties +264 -0
  332. deepresearch_flow/paper/web/pdfjs/web/locale/kk/viewer.properties +284 -0
  333. deepresearch_flow/paper/web/pdfjs/web/locale/km/viewer.properties +189 -0
  334. deepresearch_flow/paper/web/pdfjs/web/locale/kn/viewer.properties +166 -0
  335. deepresearch_flow/paper/web/pdfjs/web/locale/ko/viewer.properties +284 -0
  336. deepresearch_flow/paper/web/pdfjs/web/locale/lij/viewer.properties +214 -0
  337. deepresearch_flow/paper/web/pdfjs/web/locale/lo/viewer.properties +257 -0
  338. deepresearch_flow/paper/web/pdfjs/web/locale/locale.properties +333 -0
  339. deepresearch_flow/paper/web/pdfjs/web/locale/lt/viewer.properties +229 -0
  340. deepresearch_flow/paper/web/pdfjs/web/locale/ltg/viewer.properties +192 -0
  341. deepresearch_flow/paper/web/pdfjs/web/locale/lv/viewer.properties +214 -0
  342. deepresearch_flow/paper/web/pdfjs/web/locale/meh/viewer.properties +106 -0
  343. deepresearch_flow/paper/web/pdfjs/web/locale/mk/viewer.properties +211 -0
  344. deepresearch_flow/paper/web/pdfjs/web/locale/mr/viewer.properties +210 -0
  345. deepresearch_flow/paper/web/pdfjs/web/locale/ms/viewer.properties +214 -0
  346. deepresearch_flow/paper/web/pdfjs/web/locale/my/viewer.properties +170 -0
  347. deepresearch_flow/paper/web/pdfjs/web/locale/nb-NO/viewer.properties +284 -0
  348. deepresearch_flow/paper/web/pdfjs/web/locale/ne-NP/viewer.properties +197 -0
  349. deepresearch_flow/paper/web/pdfjs/web/locale/nl/viewer.properties +274 -0
  350. deepresearch_flow/paper/web/pdfjs/web/locale/nn-NO/viewer.properties +270 -0
  351. deepresearch_flow/paper/web/pdfjs/web/locale/oc/viewer.properties +278 -0
  352. deepresearch_flow/paper/web/pdfjs/web/locale/pa-IN/viewer.properties +270 -0
  353. deepresearch_flow/paper/web/pdfjs/web/locale/pl/viewer.properties +270 -0
  354. deepresearch_flow/paper/web/pdfjs/web/locale/pt-BR/viewer.properties +270 -0
  355. deepresearch_flow/paper/web/pdfjs/web/locale/pt-PT/viewer.properties +270 -0
  356. deepresearch_flow/paper/web/pdfjs/web/locale/rm/viewer.properties +270 -0
  357. deepresearch_flow/paper/web/pdfjs/web/locale/ro/viewer.properties +220 -0
  358. deepresearch_flow/paper/web/pdfjs/web/locale/ru/viewer.properties +270 -0
  359. deepresearch_flow/paper/web/pdfjs/web/locale/sat/viewer.properties +270 -0
  360. deepresearch_flow/paper/web/pdfjs/web/locale/sc/viewer.properties +258 -0
  361. deepresearch_flow/paper/web/pdfjs/web/locale/scn/viewer.properties +101 -0
  362. deepresearch_flow/paper/web/pdfjs/web/locale/sco/viewer.properties +226 -0
  363. deepresearch_flow/paper/web/pdfjs/web/locale/si/viewer.properties +228 -0
  364. deepresearch_flow/paper/web/pdfjs/web/locale/sk/viewer.properties +270 -0
  365. deepresearch_flow/paper/web/pdfjs/web/locale/skr/viewer.properties +264 -0
  366. deepresearch_flow/paper/web/pdfjs/web/locale/sl/viewer.properties +284 -0
  367. deepresearch_flow/paper/web/pdfjs/web/locale/son/viewer.properties +152 -0
  368. deepresearch_flow/paper/web/pdfjs/web/locale/sq/viewer.properties +247 -0
  369. deepresearch_flow/paper/web/pdfjs/web/locale/sr/viewer.properties +259 -0
  370. deepresearch_flow/paper/web/pdfjs/web/locale/sv-SE/viewer.properties +284 -0
  371. deepresearch_flow/paper/web/pdfjs/web/locale/szl/viewer.properties +224 -0
  372. deepresearch_flow/paper/web/pdfjs/web/locale/ta/viewer.properties +173 -0
  373. deepresearch_flow/paper/web/pdfjs/web/locale/te/viewer.properties +216 -0
  374. deepresearch_flow/paper/web/pdfjs/web/locale/tg/viewer.properties +281 -0
  375. deepresearch_flow/paper/web/pdfjs/web/locale/th/viewer.properties +270 -0
  376. deepresearch_flow/paper/web/pdfjs/web/locale/tl/viewer.properties +222 -0
  377. deepresearch_flow/paper/web/pdfjs/web/locale/tr/viewer.properties +283 -0
  378. deepresearch_flow/paper/web/pdfjs/web/locale/trs/viewer.properties +184 -0
  379. deepresearch_flow/paper/web/pdfjs/web/locale/uk/viewer.properties +284 -0
  380. deepresearch_flow/paper/web/pdfjs/web/locale/ur/viewer.properties +218 -0
  381. deepresearch_flow/paper/web/pdfjs/web/locale/uz/viewer.properties +142 -0
  382. deepresearch_flow/paper/web/pdfjs/web/locale/vi/viewer.properties +270 -0
  383. deepresearch_flow/paper/web/pdfjs/web/locale/wo/viewer.properties +104 -0
  384. deepresearch_flow/paper/web/pdfjs/web/locale/xh/viewer.properties +156 -0
  385. deepresearch_flow/paper/web/pdfjs/web/locale/zh-CN/viewer.properties +284 -0
  386. deepresearch_flow/paper/web/pdfjs/web/locale/zh-TW/viewer.properties +281 -0
  387. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitDingbats.pfb +0 -0
  388. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixed.pfb +0 -0
  389. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBold.pfb +0 -0
  390. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedBoldItalic.pfb +0 -0
  391. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitFixedItalic.pfb +0 -0
  392. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerif.pfb +0 -0
  393. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBold.pfb +0 -0
  394. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifBoldItalic.pfb +0 -0
  395. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSerifItalic.pfb +0 -0
  396. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/FoxitSymbol.pfb +0 -0
  397. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_FOXIT +27 -0
  398. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LICENSE_LIBERATION +102 -0
  399. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Bold.ttf +0 -0
  400. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-BoldItalic.ttf +0 -0
  401. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Italic.ttf +0 -0
  402. deepresearch_flow/paper/web/pdfjs/web/standard_fonts/LiberationSans-Regular.ttf +0 -0
  403. deepresearch_flow/paper/web/pdfjs/web/viewer.css +3528 -0
  404. deepresearch_flow/paper/web/pdfjs/web/viewer.html +486 -0
  405. deepresearch_flow/paper/web/pdfjs/web/viewer.js +14099 -0
  406. deepresearch_flow/paper/web/pdfjs/web/viewer.js.map +1 -0
  407. deepresearch_flow/paper/web/query.py +90 -0
  408. deepresearch_flow/recognize/__init__.py +1 -0
  409. deepresearch_flow/recognize/cli.py +469 -0
  410. deepresearch_flow/recognize/markdown.py +277 -0
  411. deepresearch_flow/recognize/organize.py +95 -0
  412. deepresearch_flow-0.1.1.dist-info/METADATA +416 -0
  413. deepresearch_flow-0.1.1.dist-info/RECORD +417 -0
  414. deepresearch_flow-0.1.1.dist-info/WHEEL +5 -0
  415. deepresearch_flow-0.1.1.dist-info/entry_points.txt +2 -0
  416. deepresearch_flow-0.1.1.dist-info/licenses/LICENSE +21 -0
  417. deepresearch_flow-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,115 @@
1
+ """Shared LLM call helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+ import httpx
7
+
8
+ from deepresearch_flow.paper.config import ProviderConfig
9
+ from deepresearch_flow.paper.providers.base import ProviderError
10
+ from deepresearch_flow.paper.providers.azure_openai import chat as azure_openai_chat
11
+ from deepresearch_flow.paper.providers.claude import chat as claude_chat
12
+ from deepresearch_flow.paper.providers.dashscope import chat as dashscope_chat
13
+ from deepresearch_flow.paper.providers.gemini import (
14
+ chat_ai_studio as gemini_ai_studio_chat,
15
+ chat_vertex as gemini_vertex_chat,
16
+ )
17
+ from deepresearch_flow.paper.providers.ollama import chat as ollama_chat
18
+ from deepresearch_flow.paper.providers.openai_compatible import chat as openai_chat
19
+
20
+
21
+ def backoff_delay(base: float, attempt: int, max_delay: float) -> float:
22
+ delay = base * (2 ** max(attempt - 1, 0))
23
+ return min(delay, max_delay)
24
+
25
+
26
+ async def call_provider(
27
+ provider: ProviderConfig,
28
+ model: str,
29
+ messages: list[dict[str, str]],
30
+ schema: dict[str, Any],
31
+ api_key: str | None,
32
+ timeout: float,
33
+ structured_mode: str,
34
+ client: httpx.AsyncClient,
35
+ ) -> str:
36
+ headers = dict(provider.extra_headers)
37
+ if api_key and provider.type == "openai_compatible":
38
+ headers.setdefault("Authorization", f"Bearer {api_key}")
39
+ elif api_key and provider.type == "azure_openai":
40
+ headers.setdefault("api-key", api_key)
41
+
42
+ if provider.type == "ollama":
43
+ return await ollama_chat(
44
+ client,
45
+ provider.base_url,
46
+ model,
47
+ messages,
48
+ structured_mode,
49
+ headers,
50
+ timeout,
51
+ )
52
+
53
+ if provider.type == "dashscope":
54
+ if not api_key:
55
+ raise ProviderError("dashscope provider requires api_key")
56
+ return await dashscope_chat(api_key=api_key, model=model, messages=messages)
57
+
58
+ if provider.type == "gemini_ai_studio":
59
+ if not api_key:
60
+ raise ProviderError("gemini_ai_studio provider requires api_key")
61
+ return await gemini_ai_studio_chat(api_key=api_key, model=model, messages=messages)
62
+
63
+ if provider.type == "gemini_vertex":
64
+ if not provider.project_id or not provider.location:
65
+ raise ProviderError("gemini_vertex provider requires project_id and location")
66
+ return await gemini_vertex_chat(
67
+ project_id=provider.project_id,
68
+ location=provider.location,
69
+ credentials_path=provider.credentials_path,
70
+ model=model,
71
+ messages=messages,
72
+ )
73
+
74
+ if provider.type == "azure_openai":
75
+ if not api_key:
76
+ raise ProviderError("azure_openai provider requires api_key")
77
+ if not provider.deployment or not provider.api_version:
78
+ raise ProviderError("azure_openai provider requires deployment and api_version")
79
+ return await azure_openai_chat(
80
+ client,
81
+ provider.base_url,
82
+ provider.deployment,
83
+ provider.api_version,
84
+ messages,
85
+ structured_mode,
86
+ headers,
87
+ timeout,
88
+ schema,
89
+ )
90
+
91
+ if provider.type == "claude":
92
+ if not api_key:
93
+ raise ProviderError("claude provider requires api_key")
94
+ if not provider.anthropic_version:
95
+ raise ProviderError("claude provider requires anthropic_version")
96
+ return await claude_chat(
97
+ api_key=api_key,
98
+ model=model,
99
+ messages=messages,
100
+ anthropic_version=provider.anthropic_version,
101
+ )
102
+
103
+ if provider.type == "openai_compatible":
104
+ return await openai_chat(
105
+ client,
106
+ provider.base_url,
107
+ model,
108
+ messages,
109
+ structured_mode,
110
+ headers,
111
+ timeout,
112
+ schema,
113
+ )
114
+
115
+ raise ProviderError(f"Unsupported provider type: {provider.type}")
@@ -0,0 +1 @@
1
+ """Built-in prompt templates."""
@@ -0,0 +1,6 @@
1
+ You are an information extraction assistant.
2
+ Output language: {{ output_language }}.
3
+ Extract structured data from the provided markdown document.
4
+ Return ONLY valid JSON that conforms to the given JSON Schema.
5
+ The field "paper_authors" MUST be an array of strings (each author name as one item).
6
+ If a field is unknown, use an empty string or empty list per schema.
@@ -0,0 +1,82 @@
1
+ Template: deep_read
2
+
3
+ {% if stage_name %}
4
+ 当前阶段:{{ stage_name }}。
5
+ 请仅输出 JSON,包含字段:{{ stage_fields | join(", ") }}。
6
+ 输出语言:{{ output_language }}。请使用该语言输出(zh 时使用中文,保留必要英文术语)。
7
+ 如需引用此前结果,可参考以下已完成模块(JSON):{{ previous_outputs }}
8
+ {% else %}
9
+ 注意:本工具是单次输出。请一次性完成所有模块内容,并填入 JSON 字段 module_a, module_b, module_c1..module_c8, module_d, module_e;可选 module_f, module_g, module_h 也请填入(若无法完成则填空字符串)。
10
+ 每个模块字段内部使用 Markdown 编排(标题、列表等)。
11
+ 禁止第一人称(不出现“我/我们/本人”)。
12
+ 关键结论需要注明来自论文的定位(章节/图/表/公式编号)。
13
+ {% endif %}
14
+
15
+ ---
16
+
17
+ # 论文精读 Prompt(多轮输出|由浅入深|术语表|联网核验引用)
18
+
19
+ 你是一个高级语言模型,任务是帮助用户高效、可复现地阅读和理解一篇算法/工程研究论文。方法以 “How to Read a Paper: A Three-Pass Approach(Keshav)” 为骨架,并加入审稿式 checklist、术语解释与联网核验引用。
20
+
21
+ ## 0) 总体硬约束(必须遵守)
22
+ 1. 输出语言:全程使用 {{ output_language }}(保留必要英文术语/公式符号)。
23
+ 2. 写作人称:禁止第一人称。
24
+ 3. 输出形式:使用 Markdown;不省略推理过程;对关键结论给出“来自论文哪里”的定位(章节/图/表/公式编号)。
25
+ 4. 由浅入深:每个模块内按浅层/中层/深层组织。
26
+
27
+ ## 1) 模块表{% if not stage_name %}(本次单次输出需要全部完成){% endif %}
28
+ - 模块 A:阅读对齐与输入校验
29
+ - 模块 B:第一遍鸟瞰扫描
30
+ - 模块 C1~C8:八个问题逐个回答
31
+ - 模块 D:第二遍内容理解
32
+ - 模块 E:第三遍深度审视
33
+ - 可选模块 F:术语表 + 概念依赖图
34
+ - 可选模块 G:联网核验与引用校对
35
+ - 模块 H:图表与表格的客观抽取
36
+
37
+ ## 2) 模块内容规范
38
+ ### 模块 A:阅读对齐与输入校验
39
+ - 明确用户目的与背景
40
+ - 检查输入材料:PDF/附录/代码链接/补充材料/数据集
41
+ - 输出阅读路线图
42
+
43
+ ### 模块 B:第一遍(鸟瞰扫描)
44
+ 必须给出 5 项产出:类别、上下文、正确性直觉检查、贡献、清晰度,并给出继续/放弃建议。
45
+
46
+ ### 模块 C1~C8:八个问题
47
+ 问题1:论文试图解决什么问题?
48
+ 问题2:有哪些相关研究?(最强基线/最相近思路/正交方向)
49
+ 问题3:论文如何解决这个问题?(流程图文字版)
50
+ 问题4:实验数据集与实现设置:用了哪些数据集/场景?为什么选它们?验证哪些关键特性?
51
+ 问题5:指标与结果:有哪些指标?实验结果是什么?这些指标分别体现了哪些创新点?尽力抽取全部结果指标。
52
+ 问题6:有什么可以进一步探索的点?(理论/工程/数据/评测)
53
+ 问题7:现实场景如何体现/实现?(部署约束、算力、延迟、鲁棒性)
54
+ 问题8:总结主要内容(一段摘要 + 5 条要点 + 1 个核心公式/机制)
55
+
56
+ ### 模块 D:第二遍(内容理解)
57
+ - 给出结构树
58
+ - 拆解核心方法:输入/输出/模块/损失/训练或推理流程
59
+ - 关键图表说明
60
+ - 实验部分:数据集、指标、对比方法、公平性、消融充分性
61
+
62
+ ### 模块 E:第三遍(深度审视)
63
+ 必须包含:可复现重建(伪代码)、假设清单、失败模式、强弱点评估、下一步研究。
64
+
65
+ ### 模块 F:术语表(可选)
66
+ - 15-40 个关键词,包含中英文名、定义、直觉解释、作用、前置知识
67
+ - 术语依赖图(文字版)
68
+
69
+ ### 模块 G:联网核验(可选)
70
+ - 关键事实提供至少 2 个独立来源
71
+ - 冲突要显式说明,无法核验要标记
72
+ - 输出引用列表
73
+
74
+ ### 模块 H:图表与表格抽取
75
+ - 全量索引、打分、TOP 深读
76
+ - 需要明确图表位置、图注、类型、支撑的 claim
77
+
78
+ Document content:
79
+ {{ content }}
80
+
81
+ JSON Schema:
82
+ {{ schema }}
@@ -0,0 +1,6 @@
1
+ You are an information extraction assistant.
2
+ Output language: {{ output_language }}.
3
+ Extract structured data from the provided markdown document.
4
+ Return ONLY valid JSON that conforms to the given JSON Schema.
5
+ The field "paper_authors" MUST be an array of strings (each author name as one item).
6
+ If a field is unknown, use an empty string or empty list per schema.
@@ -0,0 +1,28 @@
1
+ Template: eight_questions
2
+
3
+ {% if stage_name %}
4
+ Current stage: {{ stage_name }}.
5
+ Return JSON with keys: {{ stage_fields | join(", ") }}.
6
+ Output language: {{ output_language }}. Use that language in all answers (if zh, use Chinese).
7
+ Previously completed outputs (JSON): {{ previous_outputs }}
8
+ {% else %}
9
+ Answer each question in the corresponding JSON field question1 through question8.
10
+ Use Markdown in each answer if helpful.
11
+ Output language: {{ output_language }}. Use that language in all answers (if zh, use Chinese).
12
+ {% endif %}
13
+
14
+ Questions:
15
+ 1) What problem does the paper try to solve?
16
+ 2) What related research exists? (separate: strongest baseline, closest approach, orthogonal directions)
17
+ 3) How does the paper solve the problem? (include a text flowchart)
18
+ 4) What datasets and implementation settings are used? Why these datasets/scenarios, and which key properties do they validate?
19
+ 5) What are the metrics and results? List metrics, summarize results, and explain which innovation each metric reflects.
20
+ 6) What can be further explored? (split by theory/engineering/data/evaluation)
21
+ 7) How would this work in real-world scenarios? (deployment constraints, compute, latency, robustness)
22
+ 8) Summarize the main content (one paragraph + 5 bullets + one core formula/mechanism)
23
+
24
+ Document content:
25
+ {{ content }}
26
+
27
+ JSON Schema:
28
+ {{ schema }}
@@ -0,0 +1,6 @@
1
+ You are an information extraction assistant.
2
+ Output language: {{ output_language }}.
3
+ Extract structured data from the provided markdown document.
4
+ Return ONLY valid JSON that conforms to the given JSON Schema.
5
+ The field "paper_authors" MUST be an array of strings (each author name as one item).
6
+ If a field is unknown, use an empty string or empty list per schema.
@@ -0,0 +1,24 @@
1
+ Template: simple
2
+
3
+ {% if stage_name %}
4
+ Current stage: {{ stage_name }}.
5
+ Return JSON with keys: {{ stage_fields | join(", ") }}.
6
+ Previously completed outputs (JSON): {{ previous_outputs }}
7
+ {% else %}
8
+ Write a single-paragraph summary that covers:
9
+ - the problem being solved
10
+ - related research context
11
+ - the proposed solution
12
+ - datasets and implementation settings (why chosen, what they validate)
13
+ - metrics and results (what metrics, key results, which innovations they support)
14
+ - further exploration directions
15
+ - real-world scenarios and constraints
16
+ - the main content summary with one core formula/mechanism
17
+ Output language: {{ output_language }}. Use that language in all answers (if zh, use Chinese).
18
+ {% endif %}
19
+
20
+ Document content:
21
+ {{ content }}
22
+
23
+ JSON Schema:
24
+ {{ schema }}
@@ -0,0 +1,6 @@
1
+ You are an information extraction assistant.
2
+ Output language: {{ output_language }}.
3
+ Extract structured data from the provided markdown document.
4
+ Return ONLY valid JSON that conforms to the given JSON Schema.
5
+ The field "paper_authors" MUST be an array of strings (each author name as one item).
6
+ If a field is unknown, use an empty string or empty list per schema.
@@ -0,0 +1,44 @@
1
+ Template: three_pass
2
+
3
+ {% if stage_name %}
4
+ Current stage: {{ stage_name }}.
5
+ Return JSON with keys: {{ stage_fields | join(", ") }}.
6
+ Output language: {{ output_language }}. Use that language in all answers (if zh, use Chinese).
7
+ Previously completed outputs (JSON): {{ previous_outputs }}
8
+ {% else %}
9
+ Follow the three-pass reading method. Fill the JSON fields:
10
+ - step1_summary: "Step 1" output (overview + innovation highlights + structure + figures + conclusion + references + open source).
11
+ - step2_analysis: "Step 2" output (content comprehension + experiments + evidence + related work weaknesses).
12
+ - step3_analysis: "Step 3" output (reconstruction, assumptions, failure modes, strengths/weaknesses, future work).
13
+ Use Markdown inside each field to format lists and sections.
14
+ Output language: {{ output_language }}. Use that language in all answers (if zh, use Chinese).
15
+
16
+ Step 1 guidance:
17
+ - Summarize title/abstract/introduction.
18
+ - List 3-5 innovations.
19
+ - List authors and affiliations if available.
20
+ - List section and subsection titles (translated if needed).
21
+ - Summarize key figures/tables.
22
+ - Summarize conclusions.
23
+ - List key references.
24
+ - State whether code is open-sourced and where.
25
+
26
+ Step 2 guidance:
27
+ - Explain each innovation: source, prior limitations, and solution details.
28
+ - Analyze experiments: datasets, objectives, baselines, results, ablations.
29
+ - Analyze key figures/tables.
30
+ - Summarize important related work and weaknesses.
31
+
32
+ Step 3 guidance:
33
+ - Provide a reconstruction outline and pseudocode if possible.
34
+ - Challenge assumptions and statements.
35
+ - Identify failure modes and reasons.
36
+ - Assess strengths/weaknesses and possible fixes.
37
+ - Suggest future research directions.
38
+ {% endif %}
39
+
40
+ Document content:
41
+ {{ content }}
42
+
43
+ JSON Schema:
44
+ {{ schema }}
@@ -0,0 +1,11 @@
1
+ """Prompt templates for paper extraction."""
2
+
3
+ DEFAULT_SYSTEM_PROMPT = (
4
+ "You are an information extraction assistant. "
5
+ "Extract structured data from the provided markdown document. "
6
+ "Return ONLY valid JSON that conforms to the given JSON Schema. "
7
+ "The field 'paper_authors' MUST be an array of strings (each author name as one item). "
8
+ "If a field is unknown, use an empty string or empty list per schema."
9
+ )
10
+
11
+ DEFAULT_USER_PROMPT = """Document content:\n{content}\n\nJSON Schema:\n{schema}\n"""
@@ -0,0 +1 @@
1
+ """Provider implementations."""
@@ -0,0 +1,66 @@
1
+ """Azure OpenAI provider implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+ import httpx
7
+
8
+ from deepresearch_flow.paper.providers.base import ProviderError
9
+
10
+
11
+ async def chat(
12
+ client: httpx.AsyncClient,
13
+ endpoint: str,
14
+ deployment: str,
15
+ api_version: str,
16
+ messages: list[dict[str, str]],
17
+ structured_mode: str,
18
+ headers: dict[str, str] | None,
19
+ timeout: float,
20
+ schema: dict[str, Any] | None = None,
21
+ ) -> str:
22
+ url = endpoint.rstrip("/") + f"/openai/deployments/{deployment}/chat/completions"
23
+ payload: dict[str, Any] = {
24
+ "messages": messages,
25
+ "temperature": 0,
26
+ }
27
+
28
+ if structured_mode == "json_schema" and schema is not None:
29
+ payload["response_format"] = {
30
+ "type": "json_schema",
31
+ "json_schema": {
32
+ "name": "paper_extract",
33
+ "schema": schema,
34
+ },
35
+ }
36
+ elif structured_mode == "json_object":
37
+ payload["response_format"] = {"type": "json_object"}
38
+
39
+ try:
40
+ response = await client.post(
41
+ url,
42
+ json=payload,
43
+ params={"api-version": api_version},
44
+ headers=headers,
45
+ timeout=timeout,
46
+ )
47
+ except httpx.RequestError as exc:
48
+ raise ProviderError(str(exc), retryable=True) from exc
49
+
50
+ if response.status_code == 429:
51
+ raise ProviderError(response.text, status_code=429, retryable=True)
52
+ if response.status_code >= 500:
53
+ raise ProviderError(response.text, status_code=response.status_code, retryable=True)
54
+ if response.status_code >= 400:
55
+ structured_error = structured_mode in ("json_schema", "json_object")
56
+ raise ProviderError(response.text, status_code=response.status_code, structured_error=structured_error)
57
+
58
+ data = response.json()
59
+ choices = data.get("choices") or []
60
+ if not choices:
61
+ raise ProviderError("Azure OpenAI response missing choices", status_code=response.status_code)
62
+ message = choices[0].get("message", {})
63
+ content = message.get("content")
64
+ if not content:
65
+ raise ProviderError("Azure OpenAI response missing content", status_code=response.status_code)
66
+ return content
@@ -0,0 +1,19 @@
1
+ """Provider base helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class ProviderError(RuntimeError):
7
+ def __init__(
8
+ self,
9
+ message: str,
10
+ status_code: int | None = None,
11
+ retryable: bool = False,
12
+ structured_error: bool = False,
13
+ error_type: str | None = None,
14
+ ) -> None:
15
+ super().__init__(message)
16
+ self.status_code = status_code
17
+ self.retryable = retryable
18
+ self.structured_error = structured_error
19
+ self.error_type = error_type or "provider_error"
@@ -0,0 +1,71 @@
1
+ """Claude provider implementation (Anthropic SDK)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from deepresearch_flow.paper.providers.base import ProviderError
8
+
9
+
10
+ def _extract_text_blocks(content: Any) -> str | None:
11
+ if isinstance(content, str):
12
+ return content
13
+ if not content:
14
+ return None
15
+ parts: list[str] = []
16
+ for block in content:
17
+ if isinstance(block, str):
18
+ parts.append(block)
19
+ continue
20
+ text = getattr(block, "text", None)
21
+ if text:
22
+ parts.append(text)
23
+ continue
24
+ if isinstance(block, dict) and block.get("text"):
25
+ parts.append(str(block["text"]))
26
+ return "".join(parts) if parts else None
27
+
28
+
29
+ async def chat(
30
+ api_key: str,
31
+ model: str,
32
+ messages: list[dict[str, str]],
33
+ anthropic_version: str,
34
+ max_tokens: int = 2048,
35
+ ) -> str:
36
+ try:
37
+ from anthropic import AsyncAnthropic
38
+ except ImportError as exc:
39
+ raise ProviderError("anthropic package is not installed") from exc
40
+
41
+ system_parts: list[str] = []
42
+ claude_messages: list[dict[str, str]] = []
43
+ for message in messages:
44
+ role = message.get("role")
45
+ content = message.get("content", "")
46
+ if role == "system":
47
+ system_parts.append(content)
48
+ else:
49
+ claude_messages.append({"role": role or "user", "content": content})
50
+
51
+ system_prompt = "\n\n".join(system_parts).strip() or None
52
+
53
+ client = AsyncAnthropic(
54
+ api_key=api_key,
55
+ default_headers={"anthropic-version": anthropic_version},
56
+ )
57
+
58
+ try:
59
+ response = await client.messages.create(
60
+ model=model,
61
+ max_tokens=max_tokens,
62
+ system=system_prompt,
63
+ messages=claude_messages,
64
+ )
65
+ except Exception as exc: # pragma: no cover - SDK error
66
+ raise ProviderError(str(exc), retryable=True) from exc
67
+
68
+ content = _extract_text_blocks(response.content)
69
+ if not content:
70
+ raise ProviderError("Claude response missing content")
71
+ return content
@@ -0,0 +1,58 @@
1
+ """DashScope provider implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+ from deepresearch_flow.paper.providers.base import ProviderError
9
+
10
+
11
+ async def chat(
12
+ api_key: str,
13
+ model: str,
14
+ messages: list[dict[str, str]],
15
+ ) -> str:
16
+ try:
17
+ from dashscope.aigc.generation import AioGeneration, Message
18
+ except ImportError as exc:
19
+ raise ProviderError("dashscope package is not installed") from exc
20
+
21
+ ds_messages = [Message(role=item["role"], content=item["content"]) for item in messages]
22
+
23
+ try:
24
+ response = await AioGeneration.call(
25
+ api_key=api_key,
26
+ model=model,
27
+ messages=ds_messages,
28
+ result_format="message",
29
+ )
30
+ except Exception as exc: # pragma: no cover - SDK error
31
+ raise ProviderError(str(exc), retryable=True) from exc
32
+
33
+ data = _normalize_response(response)
34
+ status_code = data.get("status_code")
35
+ if status_code and status_code != 200:
36
+ retryable = status_code in (429, 500, 502, 503, 504)
37
+ raise ProviderError(data.get("message") or "DashScope error", status_code=status_code, retryable=retryable)
38
+
39
+ content = (
40
+ data.get("output", {})
41
+ .get("choices", [{}])[0]
42
+ .get("message", {})
43
+ .get("content")
44
+ )
45
+ if not content:
46
+ raise ProviderError("DashScope response missing content")
47
+ return content
48
+
49
+
50
+ def _normalize_response(response: Any) -> dict[str, Any]:
51
+ if isinstance(response, dict):
52
+ return response
53
+ if hasattr(response, "to_dict"):
54
+ return response.to_dict()
55
+ try:
56
+ return json.loads(str(response))
57
+ except json.JSONDecodeError:
58
+ return {"message": str(response)}
@@ -0,0 +1,116 @@
1
+ """Gemini provider implementation (AI Studio and Vertex)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ from typing import Any
8
+
9
+ from deepresearch_flow.paper.providers.base import ProviderError
10
+
11
+
12
+ def _build_prompt(messages: list[dict[str, str]]) -> str:
13
+ parts: list[str] = []
14
+ for message in messages:
15
+ role = message.get("role", "user")
16
+ content = message.get("content", "")
17
+ parts.append(f"[{role}]\n{content}")
18
+ return "\n\n".join(parts)
19
+
20
+
21
+ def _normalize_response(response: Any) -> dict[str, Any]:
22
+ if isinstance(response, dict):
23
+ return response
24
+ if hasattr(response, "to_dict"):
25
+ return response.to_dict()
26
+ try:
27
+ return json.loads(str(response))
28
+ except json.JSONDecodeError:
29
+ return {"message": str(response)}
30
+
31
+
32
+ def _extract_text(response: Any) -> str | None:
33
+ text = getattr(response, "text", None)
34
+ if text:
35
+ return text
36
+
37
+ data = _normalize_response(response)
38
+ for candidate in data.get("candidates", []):
39
+ content = candidate.get("content", {})
40
+ parts = content.get("parts") or []
41
+ for part in parts:
42
+ part_text = part.get("text")
43
+ if part_text:
44
+ return part_text
45
+ return None
46
+
47
+
48
+ async def chat_ai_studio(
49
+ api_key: str,
50
+ model: str,
51
+ messages: list[dict[str, str]],
52
+ ) -> str:
53
+ try:
54
+ from google import genai
55
+ except ImportError as exc:
56
+ raise ProviderError("google-genai package is not installed") from exc
57
+
58
+ prompt = _build_prompt(messages)
59
+ client = genai.Client(api_key=api_key)
60
+
61
+ try:
62
+ response = await asyncio.to_thread(
63
+ client.models.generate_content,
64
+ model=model,
65
+ contents=prompt,
66
+ )
67
+ except Exception as exc: # pragma: no cover - SDK error
68
+ raise ProviderError(str(exc), retryable=True) from exc
69
+
70
+ content = _extract_text(response)
71
+ if not content:
72
+ raise ProviderError("Gemini response missing content")
73
+ return content
74
+
75
+
76
+ async def chat_vertex(
77
+ project_id: str,
78
+ location: str,
79
+ credentials_path: str | None,
80
+ model: str,
81
+ messages: list[dict[str, str]],
82
+ ) -> str:
83
+ try:
84
+ from google import genai
85
+ except ImportError as exc:
86
+ raise ProviderError("google-genai package is not installed") from exc
87
+
88
+ credentials = None
89
+ if credentials_path:
90
+ try:
91
+ from google.auth import load_credentials_from_file
92
+ except ImportError as exc:
93
+ raise ProviderError("google-auth package is not installed") from exc
94
+ credentials, _ = load_credentials_from_file(credentials_path)
95
+
96
+ prompt = _build_prompt(messages)
97
+ client = genai.Client(
98
+ vertexai=True,
99
+ project=project_id,
100
+ location=location,
101
+ credentials=credentials,
102
+ )
103
+
104
+ try:
105
+ response = await asyncio.to_thread(
106
+ client.models.generate_content,
107
+ model=model,
108
+ contents=prompt,
109
+ )
110
+ except Exception as exc: # pragma: no cover - SDK error
111
+ raise ProviderError(str(exc), retryable=True) from exc
112
+
113
+ content = _extract_text(response)
114
+ if not content:
115
+ raise ProviderError("Gemini response missing content")
116
+ return content