aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ArXiv paper downloader implementation.
4
+ """
5
+
6
+ import logging
7
+ import xml.etree.ElementTree as ET
8
+ from typing import Any
9
+
10
+ import requests
11
+
12
+ from .base_paper_downloader import BasePaperDownloader
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ArxivDownloader(BasePaperDownloader):
18
+ """ArXiv-specific implementation of paper downloader."""
19
+
20
+ def __init__(self, config: Any):
21
+ """Initialize ArXiv downloader with configuration."""
22
+ super().__init__(config)
23
+ self.api_url = config.api_url
24
+ self.pdf_base_url = config.pdf_base_url
25
+ # XML namespace configuration
26
+ self.xml_namespaces = getattr(
27
+ config, "xml_namespace", {"atom": "http://www.w3.org/2005/Atom"}
28
+ )
29
+
30
+ def fetch_metadata(self, identifier: str) -> ET.Element:
31
+ """
32
+ Fetch paper metadata from arXiv API.
33
+
34
+ Args:
35
+ identifier: arXiv ID (e.g., '1234.5678' or '2301.12345')
36
+
37
+ Returns:
38
+ XML root element from arXiv API response
39
+
40
+ Raises:
41
+ requests.RequestException: If API call fails
42
+ RuntimeError: If no entry found in response
43
+ """
44
+ query_url = f"{self.api_url}?search_query=id:{identifier}&start=0&max_results=1"
45
+ logger.info("Fetching metadata for arXiv ID %s from: %s", identifier, query_url)
46
+
47
+ response = requests.get(query_url, timeout=self.request_timeout)
48
+ response.raise_for_status()
49
+
50
+ root = ET.fromstring(response.text)
51
+ entry = root.find("atom:entry", self.xml_namespaces)
52
+
53
+ if entry is None:
54
+ raise RuntimeError("No entry found in arXiv API response")
55
+
56
+ return root
57
+
58
+ def construct_pdf_url(self, metadata: ET.Element, identifier: str) -> str:
59
+ """
60
+ Extract or construct PDF URL from arXiv metadata.
61
+
62
+ Args:
63
+ metadata: XML root from arXiv API
64
+ identifier: arXiv ID
65
+
66
+ Returns:
67
+ PDF URL string
68
+ """
69
+ entry = metadata.find("atom:entry", self.xml_namespaces)
70
+
71
+ if entry is None:
72
+ return ""
73
+
74
+ # Try to find PDF link in metadata first
75
+ pdf_url = next(
76
+ (
77
+ link.attrib.get("href")
78
+ for link in entry.findall("atom:link", self.xml_namespaces)
79
+ if link.attrib.get("title") == "pdf"
80
+ ),
81
+ None,
82
+ )
83
+
84
+ # Fallback to constructed PDF URL if not found in metadata
85
+ if not pdf_url:
86
+ pdf_url = f"{self.pdf_base_url}/{identifier}.pdf"
87
+ logger.info("Using constructed PDF URL for %s: %s", identifier, pdf_url)
88
+
89
+ return pdf_url
90
+
91
+ def extract_paper_metadata(
92
+ self,
93
+ metadata: ET.Element,
94
+ identifier: str,
95
+ pdf_result: tuple[str, str] | None,
96
+ ) -> dict[str, Any]:
97
+ """
98
+ Extract structured metadata from arXiv API response.
99
+
100
+ Args:
101
+ metadata: XML root from arXiv API
102
+ identifier: arXiv ID
103
+ pdf_result: Tuple of (temp_file_path, filename) if PDF downloaded
104
+
105
+ Returns:
106
+ Standardized paper metadata dictionary
107
+ """
108
+ entry = metadata.find("atom:entry", self.xml_namespaces)
109
+
110
+ if entry is None:
111
+ raise RuntimeError("No entry found in metadata")
112
+
113
+ # Extract basic metadata
114
+ basic_metadata = self._extract_basic_metadata(entry, self.xml_namespaces)
115
+
116
+ # Handle PDF download results
117
+ pdf_metadata = self._extract_pdf_metadata(pdf_result, identifier)
118
+
119
+ # Combine all metadata
120
+ return {
121
+ **basic_metadata,
122
+ **pdf_metadata,
123
+ "source": "arxiv",
124
+ "arxiv_id": identifier,
125
+ }
126
+
127
+ def _extract_basic_metadata(self, entry: ET.Element, ns: dict) -> dict[str, Any]:
128
+ """Extract basic metadata (title, authors, abstract, date) from entry."""
129
+ title = self._extract_title(entry, ns)
130
+ authors = self._extract_authors(entry, ns)
131
+ abstract = self._extract_abstract(entry, ns)
132
+ pub_date = self._extract_publication_date(entry, ns)
133
+
134
+ return {
135
+ "Title": title,
136
+ "Authors": authors,
137
+ "Abstract": abstract,
138
+ "Publication Date": pub_date,
139
+ }
140
+
141
+ def _extract_title(self, entry: ET.Element, ns: dict) -> str:
142
+ """Extract title from entry."""
143
+ title_elem = entry.find("atom:title", ns)
144
+ return (title_elem.text or "").strip() if title_elem is not None else "N/A"
145
+
146
+ def _extract_authors(self, entry: ET.Element, ns: dict) -> list:
147
+ """Extract authors from entry."""
148
+ authors = []
149
+ for author_elem in entry.findall("atom:author", ns):
150
+ name_elem = author_elem.find("atom:name", ns)
151
+ if name_elem is not None and name_elem.text:
152
+ authors.append(name_elem.text.strip())
153
+ return authors
154
+
155
+ def _extract_abstract(self, entry: ET.Element, ns: dict) -> str:
156
+ """Extract abstract from entry."""
157
+ summary_elem = entry.find("atom:summary", ns)
158
+ return (summary_elem.text or "").strip() if summary_elem is not None else "N/A"
159
+
160
+ def _extract_publication_date(self, entry: ET.Element, ns: dict) -> str:
161
+ """Extract publication date from entry."""
162
+ published_elem = entry.find("atom:published", ns)
163
+ return (published_elem.text or "").strip() if published_elem is not None else "N/A"
164
+
165
+ def _extract_pdf_metadata(
166
+ self, pdf_result: tuple[str, str] | None, identifier: str
167
+ ) -> dict[str, Any]:
168
+ """Extract PDF-related metadata."""
169
+ if pdf_result:
170
+ temp_file_path, filename = pdf_result
171
+ return {
172
+ "URL": temp_file_path,
173
+ "pdf_url": temp_file_path,
174
+ "filename": filename,
175
+ "access_type": "open_access_downloaded",
176
+ "temp_file_path": temp_file_path,
177
+ }
178
+
179
+ return {
180
+ "URL": "",
181
+ "pdf_url": "",
182
+ "filename": self.get_default_filename(identifier),
183
+ "access_type": "download_failed",
184
+ "temp_file_path": "",
185
+ }
186
+
187
+ def get_service_name(self) -> str:
188
+ """Return service name."""
189
+ return "arXiv"
190
+
191
+ def get_identifier_name(self) -> str:
192
+ """Return identifier display name."""
193
+ return "arXiv ID"
194
+
195
+ def get_default_filename(self, identifier: str) -> str:
196
+ """Generate default filename for arXiv paper."""
197
+ return f"{identifier}.pdf"
198
+
199
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
200
+ """Get arXiv-specific identifier info for paper summary."""
201
+ arxiv_id = paper.get("arxiv_id", "N/A")
202
+ pub_date = paper.get("Publication Date", "N/A")
203
+ return f" (arXiv:{arxiv_id}, {pub_date})"
204
+
205
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
206
+ """Add arXiv ID field to entry."""
207
+ entry["arxiv_id"] = identifier
@@ -0,0 +1,336 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Abstract base class for paper download tools.
4
+ Provides common functionality for arXiv, medRxiv, PubMed, and future paper sources.
5
+ """
6
+
7
+ import logging
8
+ import re
9
+ import tempfile
10
+ from abc import ABC, abstractmethod
11
+ from typing import Any
12
+
13
+ import requests
14
+
15
+ # Configure logging
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class BasePaperDownloader(ABC):
20
+ """Abstract base class for paper download tools."""
21
+
22
+ def __init__(self, config: Any):
23
+ """Initialize with service-specific configuration."""
24
+ self.config = config
25
+ self.request_timeout = getattr(config, "request_timeout", 15)
26
+ self.chunk_size = getattr(config, "chunk_size", 8192)
27
+ self.user_agent = getattr(
28
+ config, "user_agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
29
+ )
30
+
31
+ # Abstract methods that each service must implement
32
+ @abstractmethod
33
+ def fetch_metadata(self, identifier: str) -> Any:
34
+ """
35
+ Fetch paper metadata from the service API.
36
+
37
+ Args:
38
+ identifier: Paper identifier (arXiv ID, DOI, PMID, etc.)
39
+
40
+ Returns:
41
+ Service-specific metadata object (XML, JSON, etc.)
42
+ """
43
+ raise NotImplementedError
44
+
45
+ @abstractmethod
46
+ def construct_pdf_url(self, metadata: Any, identifier: str) -> str:
47
+ """
48
+ Construct or extract PDF URL from metadata.
49
+
50
+ Args:
51
+ metadata: Metadata returned from fetch_metadata()
52
+ identifier: Original paper identifier
53
+
54
+ Returns:
55
+ PDF URL string (empty if not available)
56
+ """
57
+ raise NotImplementedError
58
+
59
+ @abstractmethod
60
+ def extract_paper_metadata(
61
+ self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
62
+ ) -> dict[str, Any]:
63
+ """
64
+ Extract and structure metadata into standardized format.
65
+
66
+ Args:
67
+ metadata: Raw metadata from API
68
+ identifier: Original paper identifier
69
+ pdf_result: Tuple of (temp_file_path, filename) if PDF downloaded
70
+
71
+ Returns:
72
+ Standardized paper metadata dictionary
73
+ """
74
+ raise NotImplementedError
75
+
76
+ @abstractmethod
77
+ def get_service_name(self) -> str:
78
+ """Return service name (e.g., 'arxiv', 'medrxiv', 'pubmed')."""
79
+ raise NotImplementedError
80
+
81
+ @abstractmethod
82
+ def get_identifier_name(self) -> str:
83
+ """Return identifier display name (e.g., 'arXiv ID', 'DOI', 'PMID')."""
84
+ raise NotImplementedError
85
+
86
+ @abstractmethod
87
+ def get_default_filename(self, identifier: str) -> str:
88
+ """Generate default filename for the paper PDF."""
89
+ raise NotImplementedError
90
+
91
+ # Common methods shared by all services
92
+ def download_pdf_to_temp(self, pdf_url: str, identifier: str) -> tuple[str, str] | None:
93
+ """
94
+ Download PDF from URL to a temporary file.
95
+
96
+ Args:
97
+ pdf_url: URL to download PDF from
98
+ identifier: Paper identifier for logging
99
+
100
+ Returns:
101
+ Tuple of (temp_file_path, filename) or None if failed
102
+ """
103
+ if not pdf_url:
104
+ logger.info("No PDF URL available for %s %s", self.get_identifier_name(), identifier)
105
+ return None
106
+
107
+ try:
108
+ logger.info(
109
+ "Downloading PDF for %s %s from %s",
110
+ self.get_identifier_name(),
111
+ identifier,
112
+ pdf_url,
113
+ )
114
+
115
+ headers = {"User-Agent": self.user_agent}
116
+ response = requests.get(
117
+ pdf_url, headers=headers, timeout=self.request_timeout, stream=True
118
+ )
119
+ response.raise_for_status()
120
+
121
+ # Download to temporary file
122
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
123
+ for chunk in response.iter_content(chunk_size=self.chunk_size):
124
+ if chunk: # Filter out keep-alive chunks
125
+ temp_file.write(chunk)
126
+ temp_file_path = temp_file.name
127
+
128
+ logger.info(
129
+ "%s PDF downloaded to temporary file: %s",
130
+ self.get_service_name(),
131
+ temp_file_path,
132
+ )
133
+
134
+ # Try to extract filename from Content-Disposition header
135
+ filename = self.get_default_filename(identifier)
136
+ content_disposition = response.headers.get("Content-Disposition", "")
137
+
138
+ if "filename=" in content_disposition:
139
+ try:
140
+ filename_match = re.search(
141
+ r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition
142
+ )
143
+ if filename_match:
144
+ extracted_filename = filename_match.group(1) or filename_match.group(2)
145
+ extracted_filename = extracted_filename.strip().strip('"')
146
+ if extracted_filename and extracted_filename.endswith(".pdf"):
147
+ filename = extracted_filename
148
+ logger.info("Extracted filename from header: %s", filename)
149
+ except requests.RequestException as e:
150
+ logger.warning("Failed to extract filename from header: %s", e)
151
+
152
+ return temp_file_path, filename
153
+
154
+ except (requests.exceptions.RequestException, OSError) as e:
155
+ logger.error(
156
+ "Failed to download PDF for %s %s: %s",
157
+ self.get_identifier_name(),
158
+ identifier,
159
+ e,
160
+ )
161
+ return None
162
+
163
+ def get_snippet(self, abstract: str) -> str:
164
+ """
165
+ Extract the first one or two sentences from an abstract.
166
+
167
+ Args:
168
+ abstract: Full abstract text
169
+
170
+ Returns:
171
+ Snippet of first 1-2 sentences
172
+ """
173
+ if not abstract or abstract == "N/A":
174
+ return ""
175
+
176
+ sentences = abstract.split(". ")
177
+ snippet_sentences = sentences[:2]
178
+ snippet = ". ".join(snippet_sentences)
179
+
180
+ if not snippet.endswith("."):
181
+ snippet += "."
182
+
183
+ return snippet
184
+
185
+ def create_error_entry(self, identifier: str, error_msg: str) -> dict[str, Any]:
186
+ """
187
+ Create standardized error entry for failed paper processing.
188
+
189
+ Args:
190
+ identifier: Paper identifier
191
+ error_msg: Error message
192
+
193
+ Returns:
194
+ Error entry dictionary
195
+ """
196
+ return {
197
+ "Title": "Error fetching paper",
198
+ "Authors": [],
199
+ "Abstract": f"Error: {error_msg}",
200
+ "Publication Date": "N/A",
201
+ "URL": "",
202
+ "pdf_url": "",
203
+ "filename": self.get_default_filename(identifier),
204
+ "source": self.get_service_name(),
205
+ "access_type": "error",
206
+ "temp_file_path": "",
207
+ "error": error_msg,
208
+ # Service-specific identifier field will be added by subclasses
209
+ }
210
+
211
+ def build_summary(self, article_data: dict[str, Any]) -> str:
212
+ """
213
+ Build a summary string for up to three papers with snippets.
214
+
215
+ Args:
216
+ article_data: Dictionary of paper data keyed by identifier
217
+
218
+ Returns:
219
+ Formatted summary string
220
+ """
221
+ top = list(article_data.values())[:3]
222
+ lines: list[str] = []
223
+ downloaded_count = sum(
224
+ 1
225
+ for paper in article_data.values()
226
+ if paper.get("access_type") == "open_access_downloaded"
227
+ )
228
+
229
+ for idx, paper in enumerate(top):
230
+ title = paper.get("Title", "N/A")
231
+ access_type = paper.get("access_type", "N/A")
232
+ temp_file_path = paper.get("temp_file_path", "")
233
+ snippet = self.get_snippet(paper.get("Abstract", ""))
234
+
235
+ # Build paper line with service-specific identifier info
236
+ line = f"{idx + 1}. {title}"
237
+ line += self._get_paper_identifier_info(paper)
238
+ line += f"\n Access: {access_type}"
239
+
240
+ if temp_file_path:
241
+ line += f"\n Downloaded to: {temp_file_path}"
242
+ if snippet:
243
+ line += f"\n Abstract snippet: {snippet}"
244
+
245
+ lines.append(line)
246
+
247
+ summary = "\n".join(lines)
248
+ service_name = self.get_service_name()
249
+
250
+ return (
251
+ f"Download was successful from {service_name}. "
252
+ "Papers metadata are attached as an artifact. "
253
+ "Here is a summary of the results:\n"
254
+ f"Number of papers found: {len(article_data)}\n"
255
+ f"PDFs successfully downloaded: {downloaded_count}\n"
256
+ "Top 3 papers:\n" + summary
257
+ )
258
+
259
+ @abstractmethod
260
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
261
+ """
262
+ Get service-specific identifier info for paper summary.
263
+
264
+ Args:
265
+ paper: Paper metadata dictionary
266
+
267
+ Returns:
268
+ Formatted identifier string (e.g., " (arXiv:1234.5678, 2023-01-01)")
269
+ """
270
+ raise NotImplementedError
271
+
272
+ def process_identifiers(self, identifiers: list[str]) -> dict[str, Any]:
273
+ """
274
+ Main processing loop for downloading papers.
275
+
276
+ Args:
277
+ identifiers: List of paper identifiers
278
+
279
+ Returns:
280
+ Dictionary of paper data keyed by identifier
281
+ """
282
+ logger.info(
283
+ "Processing %d identifiers from %s: %s",
284
+ len(identifiers),
285
+ self.get_service_name(),
286
+ identifiers,
287
+ )
288
+
289
+ article_data: dict[str, Any] = {}
290
+
291
+ for identifier in identifiers:
292
+ logger.info("Processing %s: %s", self.get_identifier_name(), identifier)
293
+
294
+ try:
295
+ # Step 1: Fetch metadata
296
+ metadata = self.fetch_metadata(identifier)
297
+
298
+ # Step 2: Extract PDF URL
299
+ pdf_url = self.construct_pdf_url(metadata, identifier)
300
+
301
+ # Step 3: Download PDF if available
302
+ pdf_result = None
303
+ if pdf_url:
304
+ pdf_result = self.download_pdf_to_temp(pdf_url, identifier)
305
+
306
+ # Step 4: Extract and structure metadata
307
+ article_data[identifier] = self.extract_paper_metadata(
308
+ metadata, identifier, pdf_result
309
+ )
310
+
311
+ except requests.RequestException as e:
312
+ logger.warning(
313
+ "Error processing %s %s: %s",
314
+ self.get_identifier_name(),
315
+ identifier,
316
+ str(e),
317
+ )
318
+
319
+ # Create error entry
320
+ error_entry = self.create_error_entry(identifier, str(e))
321
+ # Add service-specific identifier field
322
+ self._add_service_identifier(error_entry, identifier)
323
+ article_data[identifier] = error_entry
324
+
325
+ return article_data
326
+
327
+ @abstractmethod
328
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
329
+ """
330
+ Add service-specific identifier field to entry.
331
+
332
+ Args:
333
+ entry: Paper entry dictionary to modify
334
+ identifier: Original identifier
335
+ """
336
+ raise NotImplementedError