aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,469 @@
1
+ """
2
+ Unit tests for ArxivDownloader.
3
+ Tests XML parsing, PDF URL construction, and metadata extraction.
4
+ """
5
+
6
+ import unittest
7
+ import xml.etree.ElementTree as ET
8
+ from unittest.mock import Mock, patch
9
+
10
+ import requests
11
+
12
+ from aiagents4pharma.talk2scholars.tools.paper_download.utils.arxiv_downloader import (
13
+ ArxivDownloader,
14
+ )
15
+
16
+
17
+ class ArxivDownloaderTestShim(ArxivDownloader):
18
+ """Public wrappers to exercise protected helpers without W0212."""
19
+
20
+ def extract_basic_metadata_public(self, entry, ns):
21
+ """extract_basic_metadata_public"""
22
+ return self._extract_basic_metadata(entry, ns)
23
+
24
+ def extract_title_public(self, entry, ns):
25
+ """extract_title_public"""
26
+ return self._extract_title(entry, ns)
27
+
28
+ def extract_authors_public(self, entry, ns):
29
+ """extract_authors_public"""
30
+ return self._extract_authors(entry, ns)
31
+
32
+ def extract_abstract_public(self, entry, ns):
33
+ """extract_authors_public"""
34
+ return self._extract_abstract(entry, ns)
35
+
36
+ def extract_publication_date_public(self, entry, ns):
37
+ """extract_publication_date_public"""
38
+ return self._extract_publication_date(entry, ns)
39
+
40
+ def extract_pdf_metadata_public(self, pdf_result, identifier):
41
+ """extract_pdf_metadata_public"""
42
+ return self._extract_pdf_metadata(pdf_result, identifier)
43
+
44
+ def get_paper_identifier_info_public(self, paper):
45
+ """get_paper_identifier_info_public"""
46
+ return self._get_paper_identifier_info(paper)
47
+
48
+ def add_service_identifier_public(self, entry, identifier):
49
+ """add_service_identifier_public"""
50
+ self._add_service_identifier(entry, identifier)
51
+
52
+
53
+ class TestArxivDownloader(unittest.TestCase):
54
+ """Tests for the ArxivDownloader class."""
55
+
56
+ def setUp(self):
57
+ """Set up test fixtures."""
58
+ self.mock_config = Mock()
59
+ self.mock_config.api_url = "http://export.arxiv.org/api/query"
60
+ self.mock_config.pdf_base_url = "https://arxiv.org/pdf"
61
+ self.mock_config.request_timeout = 30
62
+ self.mock_config.chunk_size = 8192
63
+ self.mock_config.xml_namespace = {"atom": "http://www.w3.org/2005/Atom"}
64
+
65
+ # Use the testable subclass to avoid W0212 while still covering helpers
66
+ self.downloader = ArxivDownloaderTestShim(self.mock_config)
67
+
68
+ # Sample arXiv XML response
69
+ self.sample_xml = """<?xml version="1.0" encoding="UTF-8"?>
70
+ <feed xmlns="http://www.w3.org/2005/Atom">
71
+ <entry>
72
+ <id>http://arxiv.org/abs/1234.5678v1</id>
73
+ <updated>2023-01-01T12:00:00Z</updated>
74
+ <published>2023-01-01T12:00:00Z</published>
75
+ <title>Test Paper Title</title>
76
+ <summary>This is a test abstract for the paper.</summary>
77
+ <author>
78
+ <name>John Doe</name>
79
+ </author>
80
+ <author>
81
+ <name>Jane Smith</name>
82
+ </author>
83
+ <link href="http://arxiv.org/abs/1234.5678v1" rel="alternate" type="text/html"/>
84
+ <link href="http://arxiv.org/pdf/1234.5678v1.pdf"
85
+ rel="related"
86
+ type="application/pdf"
87
+ title="pdf"/>
88
+ </entry>
89
+ </feed>"""
90
+
91
+ def test_initialization(self):
92
+ """Test ArxivDownloader initialization."""
93
+ self.assertEqual(self.downloader.api_url, "http://export.arxiv.org/api/query")
94
+ self.assertEqual(self.downloader.pdf_base_url, "https://arxiv.org/pdf")
95
+ self.assertEqual(self.downloader.request_timeout, 30)
96
+ self.assertEqual(self.downloader.chunk_size, 8192)
97
+
98
+ @patch("requests.get")
99
+ def test_fetch_metadata_success(self, mock_get):
100
+ """Test successful metadata fetching from arXiv API."""
101
+ mock_response = Mock()
102
+ mock_response.text = self.sample_xml
103
+ mock_response.raise_for_status = Mock()
104
+ mock_get.return_value = mock_response
105
+
106
+ result = self.downloader.fetch_metadata("1234.5678")
107
+
108
+ # Verify API call - it uses query string format, not params
109
+ expected_url = (
110
+ "http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1"
111
+ )
112
+ mock_get.assert_called_once_with(expected_url, timeout=30)
113
+ mock_response.raise_for_status.assert_called_once()
114
+
115
+ # Verify XML parsing
116
+ self.assertIsInstance(result, ET.Element)
117
+ self.assertEqual(result.tag, "{http://www.w3.org/2005/Atom}feed")
118
+
119
+ @patch("requests.get")
120
+ def test_fetch_metadata_request_error(self, mock_get):
121
+ """Test fetch_metadata with request error."""
122
+ mock_get.side_effect = requests.RequestException("Network error")
123
+
124
+ with self.assertRaises(requests.RequestException):
125
+ self.downloader.fetch_metadata("1234.5678")
126
+
127
+ @patch("requests.get")
128
+ def test_fetch_metadata_invalid_xml(self, mock_get):
129
+ """Test fetch_metadata with invalid XML response."""
130
+ mock_response = Mock()
131
+ mock_response.text = "Invalid XML content"
132
+ mock_response.raise_for_status = Mock()
133
+ mock_get.return_value = mock_response
134
+
135
+ with self.assertRaises(ET.ParseError):
136
+ self.downloader.fetch_metadata("1234.5678")
137
+
138
+ @patch("requests.get")
139
+ def test_fetch_metadata_no_entry_found(self, mock_get):
140
+ """Test fetch_metadata when no entry is found in arXiv API response."""
141
+ # XML response without any entry - note the namespace declarations
142
+ empty_xml = """<?xml version="1.0" encoding="UTF-8"?>
143
+ <feed xmlns="http://www.w3.org/2005/Atom" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
144
+ <title>ArXiv Query: search_query=all:1234.5678</title>
145
+ <id>http://arxiv.org/api/query?search_query=all:1234.5678</id>
146
+ <opensearch:totalResults>0</opensearch:totalResults>
147
+ <opensearch:startIndex>0</opensearch:startIndex>
148
+ </feed>"""
149
+
150
+ mock_response = Mock()
151
+ mock_response.text = empty_xml
152
+ mock_response.raise_for_status = Mock()
153
+ mock_get.return_value = mock_response
154
+
155
+ with self.assertRaises(RuntimeError) as context:
156
+ self.downloader.fetch_metadata("1234.5678")
157
+
158
+ self.assertIn("No entry found in arXiv API response", str(context.exception))
159
+
160
+ def test_construct_pdf_url_from_metadata(self):
161
+ """Test PDF URL construction from metadata."""
162
+ metadata = ET.fromstring(self.sample_xml)
163
+
164
+ result = self.downloader.construct_pdf_url(metadata, "1234.5678")
165
+
166
+ # Should extract PDF URL from the link with title="pdf"
167
+ self.assertEqual(result, "http://arxiv.org/pdf/1234.5678v1.pdf")
168
+
169
+ def test_construct_pdf_url_fallback(self):
170
+ """Test PDF URL construction fallback when not found in metadata."""
171
+ # XML without PDF link
172
+ xml_no_pdf = """<?xml version="1.0" encoding="UTF-8"?>
173
+ <feed xmlns="http://www.w3.org/2005/Atom">
174
+ <entry>
175
+ <id>http://arxiv.org/abs/1234.5678v1</id>
176
+ <title>Test Paper Title</title>
177
+ <link href="http://arxiv.org/abs/1234.5678v1" rel="alternate" type="text/html"/>
178
+ </entry>
179
+ </feed>"""
180
+
181
+ metadata = ET.fromstring(xml_no_pdf)
182
+
183
+ result = self.downloader.construct_pdf_url(metadata, "1234.5678")
184
+
185
+ # Should fallback to constructed URL
186
+ self.assertEqual(result, "https://arxiv.org/pdf/1234.5678.pdf")
187
+
188
+ def test_construct_pdf_url_no_entry(self):
189
+ """Test PDF URL construction with no entry in metadata."""
190
+ xml_no_entry = """<?xml version="1.0" encoding="UTF-8"?>
191
+ <feed xmlns="http://www.w3.org/2005/Atom">
192
+ </feed>"""
193
+
194
+ metadata = ET.fromstring(xml_no_entry)
195
+
196
+ result = self.downloader.construct_pdf_url(metadata, "1234.5678")
197
+
198
+ # Should return empty string when no entry found
199
+ self.assertEqual(result, "")
200
+
201
+ def test_extract_paper_metadata_success(self):
202
+ """Test successful paper metadata extraction."""
203
+ metadata = ET.fromstring(self.sample_xml)
204
+ pdf_result = ("/tmp/test.pdf", "test_paper.pdf")
205
+
206
+ result = self.downloader.extract_paper_metadata(metadata, "1234.5678", pdf_result)
207
+
208
+ # Verify extracted metadata
209
+ expected_metadata = {
210
+ "Title": "Test Paper Title",
211
+ "Authors": ["John Doe", "Jane Smith"],
212
+ "Abstract": "This is a test abstract for the paper.",
213
+ "Publication Date": "2023-01-01T12:00:00Z",
214
+ "URL": "/tmp/test.pdf",
215
+ "pdf_url": "/tmp/test.pdf",
216
+ "filename": "test_paper.pdf",
217
+ "source": "arxiv",
218
+ "arxiv_id": "1234.5678",
219
+ "access_type": "open_access_downloaded",
220
+ "temp_file_path": "/tmp/test.pdf",
221
+ }
222
+
223
+ self.assertEqual(result, expected_metadata)
224
+
225
+ def test_extract_paper_metadata_no_pdf(self):
226
+ """Test metadata extraction without PDF download."""
227
+ metadata = ET.fromstring(self.sample_xml)
228
+
229
+ with patch.object(self.downloader, "get_default_filename", return_value="1234.5678.pdf"):
230
+ result = self.downloader.extract_paper_metadata(metadata, "1234.5678", None)
231
+
232
+ # Verify metadata without PDF
233
+ self.assertEqual(result["Title"], "Test Paper Title")
234
+ self.assertEqual(result["URL"], "")
235
+ self.assertEqual(result["pdf_url"], "")
236
+ self.assertEqual(result["filename"], "1234.5678.pdf")
237
+ self.assertEqual(result["access_type"], "download_failed")
238
+ self.assertEqual(result["temp_file_path"], "")
239
+
240
+ def test_extract_paper_metadata_no_entry(self):
241
+ """Test metadata extraction with no entry in XML."""
242
+ xml_no_entry = """<?xml version="1.0" encoding="UTF-8"?>
243
+ <feed xmlns="http://www.w3.org/2005/Atom">
244
+ </feed>"""
245
+
246
+ metadata = ET.fromstring(xml_no_entry)
247
+
248
+ with self.assertRaises(RuntimeError) as context:
249
+ self.downloader.extract_paper_metadata(metadata, "1234.5678", None)
250
+
251
+ self.assertIn("No entry found in metadata", str(context.exception))
252
+
253
+ def test_extract_basic_metadata(self):
254
+ """Test basic metadata extraction helper method."""
255
+ metadata = ET.fromstring(self.sample_xml)
256
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
257
+ entry = metadata.find("atom:entry", ns)
258
+
259
+ result = self.downloader.extract_basic_metadata_public(entry, ns)
260
+
261
+ expected = {
262
+ "Title": "Test Paper Title",
263
+ "Authors": ["John Doe", "Jane Smith"],
264
+ "Abstract": "This is a test abstract for the paper.",
265
+ "Publication Date": "2023-01-01T12:00:00Z",
266
+ }
267
+ self.assertEqual(result, expected)
268
+
269
+ def test_extract_title_variants(self):
270
+ """Title extraction for present and missing cases."""
271
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
272
+
273
+ # Case 1: Title present
274
+ metadata1 = ET.fromstring(self.sample_xml)
275
+ entry1 = metadata1.find("atom:entry", ns)
276
+ self.assertEqual(self.downloader.extract_title_public(entry1, ns), "Test Paper Title")
277
+
278
+ # Case 2: Title missing
279
+ xml_no_title = """<?xml version="1.0" encoding="UTF-8"?>
280
+ <feed xmlns="http://www.w3.org/2005/Atom">
281
+ <entry>
282
+ <id>http://arxiv.org/abs/1234.5678v1</id>
283
+ </entry>
284
+ </feed>"""
285
+ metadata2 = ET.fromstring(xml_no_title)
286
+ entry2 = metadata2.find("atom:entry", ns)
287
+ self.assertEqual(self.downloader.extract_title_public(entry2, ns), "N/A")
288
+
289
+ def test_extract_authors_variants(self):
290
+ """Authors extraction for present and empty cases."""
291
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
292
+
293
+ # Case 1: Authors present
294
+ metadata1 = ET.fromstring(self.sample_xml)
295
+ entry1 = metadata1.find("atom:entry", ns)
296
+ self.assertEqual(
297
+ self.downloader.extract_authors_public(entry1, ns),
298
+ ["John Doe", "Jane Smith"],
299
+ )
300
+
301
+ # Case 2: Authors missing
302
+ xml_no_authors = """<?xml version="1.0" encoding="UTF-8"?>
303
+ <feed xmlns="http://www.w3.org/2005/Atom">
304
+ <entry>
305
+ <id>http://arxiv.org/abs/1234.5678v1</id>
306
+ <title>Test Paper Title</title>
307
+ </entry>
308
+ </feed>"""
309
+ metadata2 = ET.fromstring(xml_no_authors)
310
+ entry2 = metadata2.find("atom:entry", ns)
311
+ self.assertEqual(self.downloader.extract_authors_public(entry2, ns), [])
312
+
313
+ def test_extract_abstract_and_publication_date(self):
314
+ """Abstract and publication date extraction."""
315
+ metadata = ET.fromstring(self.sample_xml)
316
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
317
+ entry = metadata.find("atom:entry", ns)
318
+
319
+ self.assertEqual(
320
+ self.downloader.extract_abstract_public(entry, ns),
321
+ "This is a test abstract for the paper.",
322
+ )
323
+ self.assertEqual(
324
+ self.downloader.extract_publication_date_public(entry, ns),
325
+ "2023-01-01T12:00:00Z",
326
+ )
327
+
328
+ def test_extract_pdf_metadata_variants(self):
329
+ """PDF metadata extraction with and without a download result."""
330
+ # With result
331
+ pdf_result = ("/tmp/test.pdf", "paper.pdf")
332
+ expected_with = {
333
+ "URL": "/tmp/test.pdf",
334
+ "pdf_url": "/tmp/test.pdf",
335
+ "filename": "paper.pdf",
336
+ "access_type": "open_access_downloaded",
337
+ "temp_file_path": "/tmp/test.pdf",
338
+ }
339
+ self.assertEqual(
340
+ self.downloader.extract_pdf_metadata_public(pdf_result, "1234.5678"),
341
+ expected_with,
342
+ )
343
+
344
+ # Without result
345
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
346
+ expected_without = {
347
+ "URL": "",
348
+ "pdf_url": "",
349
+ "filename": "default.pdf",
350
+ "access_type": "download_failed",
351
+ "temp_file_path": "",
352
+ }
353
+ self.assertEqual(
354
+ self.downloader.extract_pdf_metadata_public(None, "1234.5678"),
355
+ expected_without,
356
+ )
357
+
358
+ def test_service_and_identifier_helpers(self):
359
+ """Service name, identifier name, and default filename helpers."""
360
+ self.assertEqual(self.downloader.get_service_name(), "arXiv")
361
+ self.assertEqual(self.downloader.get_identifier_name(), "arXiv ID")
362
+ self.assertEqual(self.downloader.get_default_filename("1234.5678"), "1234.5678.pdf")
363
+
364
+ def test_get_paper_identifier_info(self):
365
+ """Test _get_paper_identifier_info method."""
366
+ paper = {"arxiv_id": "1234.5678", "Publication Date": "2023-01-01T12:00:00Z"}
367
+
368
+ result = self.downloader.get_paper_identifier_info_public(paper)
369
+
370
+ self.assertIn("1234.5678", result)
371
+ self.assertIn("2023-01-01", result)
372
+
373
+ def test_add_service_identifier(self):
374
+ """Test _add_service_identifier method."""
375
+ entry = {}
376
+
377
+ self.downloader.add_service_identifier_public(entry, "1234.5678")
378
+
379
+ self.assertEqual(entry["arxiv_id"], "1234.5678")
380
+
381
+
382
+ class TestArxivDownloaderIntegration(unittest.TestCase):
383
+ """Integration tests for ArxivDownloader with mocked external dependencies."""
384
+
385
+ def setUp(self):
386
+ """Set up integration test fixtures."""
387
+ self.mock_config = Mock()
388
+ self.mock_config.api_url = "http://export.arxiv.org/api/query"
389
+ self.mock_config.pdf_base_url = "https://arxiv.org/pdf"
390
+ self.mock_config.request_timeout = 30
391
+ self.mock_config.chunk_size = 8192
392
+ self.mock_config.xml_namespace = {"atom": "http://www.w3.org/2005/Atom"}
393
+
394
+ self.downloader = ArxivDownloaderTestShim(self.mock_config)
395
+
396
+ self.sample_xml = """<?xml version="1.0" encoding="UTF-8"?>
397
+ <feed xmlns="http://www.w3.org/2005/Atom">
398
+ <entry>
399
+ <id>http://arxiv.org/abs/1234.5678v1</id>
400
+ <published>2023-01-01T12:00:00Z</published>
401
+ <title>Integration Test Paper</title>
402
+ <summary>This is a test abstract.</summary>
403
+ <author>
404
+ <name>Test Author</name>
405
+ </author>
406
+ <link href="http://arxiv.org/pdf/1234.5678v1.pdf"
407
+ rel="related"
408
+ type="application/pdf"
409
+ title="pdf"/>
410
+ </entry>
411
+ </feed>"""
412
+
413
+ @patch(
414
+ "aiagents4pharma.talk2scholars.tools.paper_download.utils."
415
+ "arxiv_downloader.ArxivDownloader.download_pdf_to_temp"
416
+ )
417
+ @patch("requests.get")
418
+ def test_full_paper_processing_workflow(self, mock_get, mock_download):
419
+ """Test the complete workflow from identifier to processed paper data."""
420
+ # Mock API response
421
+ mock_response = Mock()
422
+ mock_response.text = self.sample_xml
423
+ mock_response.raise_for_status = Mock()
424
+ mock_get.return_value = mock_response
425
+
426
+ # Mock PDF download
427
+ mock_download.return_value = ("/tmp/paper.pdf", "1234.5678.pdf")
428
+
429
+ # Simulate the workflow
430
+ identifiers = ["1234.5678"]
431
+ results = {}
432
+
433
+ for identifier in identifiers:
434
+ # Step 1: Fetch metadata
435
+ metadata = self.downloader.fetch_metadata(identifier)
436
+
437
+ # Step 2: Construct PDF URL
438
+ pdf_url = self.downloader.construct_pdf_url(metadata, identifier)
439
+
440
+ # Step 3: Download PDF
441
+ pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
442
+
443
+ # Step 4: Extract metadata
444
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
445
+
446
+ results[identifier] = paper_data
447
+
448
+ # Verify the complete workflow
449
+ self.assertIn("1234.5678", results)
450
+ paper = results["1234.5678"]
451
+
452
+ self.assertEqual(paper["Title"], "Integration Test Paper")
453
+ self.assertEqual(paper["Authors"], ["Test Author"])
454
+ self.assertEqual(paper["access_type"], "open_access_downloaded")
455
+ self.assertEqual(paper["filename"], "1234.5678.pdf")
456
+ self.assertEqual(paper["temp_file_path"], "/tmp/paper.pdf")
457
+
458
+ # Verify method calls
459
+ mock_get.assert_called_once()
460
+ mock_download.assert_called_once_with("http://arxiv.org/pdf/1234.5678v1.pdf", "1234.5678")
461
+
462
+ @patch("requests.get")
463
+ def test_error_handling_workflow(self, mock_get):
464
+ """Test error handling in the workflow."""
465
+ # Mock network error
466
+ mock_get.side_effect = requests.RequestException("Network error")
467
+
468
+ with self.assertRaises(requests.RequestException):
469
+ self.downloader.fetch_metadata("1234.5678")