aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,500 @@
1
+ """
2
+ Unit tests for MedrxivDownloader.
3
+ Tests JSON API interaction, PDF URL construction, and metadata extraction.
4
+ """
5
+
6
+ import json
7
+ import unittest
8
+ from unittest.mock import Mock, patch
9
+
10
+ import requests
11
+
12
+ from aiagents4pharma.talk2scholars.tools.paper_download.utils.medrxiv_downloader import (
13
+ MedrxivDownloader,
14
+ )
15
+
16
+
17
+ # ---- Test-only shim to access protected helpers without pylint W0212 ----
18
+ class MedrxivDownloaderTestShim(MedrxivDownloader):
19
+ """mock class to access protected methods for testing."""
20
+
21
+ __test__ = False # prevent pytest collection
22
+
23
+ def extract_basic_metadata_public(self, paper, identifier):
24
+ """extract basic metadata from a paper."""
25
+ return self._extract_basic_metadata(paper, identifier)
26
+
27
+ def extract_authors_public(self, authors_str):
28
+ """extract authors from a semicolon-separated string."""
29
+ return self._extract_authors(authors_str)
30
+
31
+ def extract_pdf_metadata_public(self, pdf_result, identifier):
32
+ """extract PDF metadata from the download result."""
33
+ return self._extract_pdf_metadata(pdf_result, identifier)
34
+
35
+ def get_paper_identifier_info_public(self, paper):
36
+ """get paper identifier info for public use."""
37
+ return self._get_paper_identifier_info(paper)
38
+
39
+ def add_service_identifier_public(self, entry, identifier):
40
+ """add service identifier to an entry."""
41
+ self._add_service_identifier(entry, identifier)
42
+
43
+
44
+ class TestMedrxivDownloader(unittest.TestCase):
45
+ """Tests for the MedrxivDownloader class."""
46
+
47
+ def setUp(self):
48
+ """Set up test fixtures."""
49
+ self.mock_config = Mock()
50
+ self.mock_config.api_url = "https://api.medrxiv.org/details"
51
+ self.mock_config.request_timeout = 30
52
+ self.mock_config.chunk_size = 8192
53
+ self.mock_config.pdf_url_template = (
54
+ "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf"
55
+ )
56
+ self.mock_config.default_version = "1"
57
+
58
+ self.downloader = MedrxivDownloaderTestShim(self.mock_config)
59
+
60
+ # Sample medRxiv API response
61
+ self.sample_json_response = {
62
+ "collection": [
63
+ {
64
+ "title": "Test MedRxiv Paper",
65
+ "authors": "John Doe; Jane Smith",
66
+ "abstract": "This is a test abstract for medRxiv paper.",
67
+ "date": "2023-01-01",
68
+ "category": "Infectious Diseases",
69
+ "version": "1",
70
+ "doi": "10.1101/2023.01.01.123456",
71
+ }
72
+ ]
73
+ }
74
+
75
+ def test_initialization(self):
76
+ """Test MedrxivDownloader initialization."""
77
+ self.assertEqual(self.downloader.api_url, "https://api.medrxiv.org/details")
78
+ self.assertEqual(self.downloader.request_timeout, 30)
79
+ self.assertEqual(self.downloader.chunk_size, 8192)
80
+
81
+ @patch("requests.get")
82
+ def test_fetch_metadata_success(self, mock_get):
83
+ """Test successful metadata fetching from medRxiv API."""
84
+ mock_response = Mock()
85
+ mock_response.json.return_value = self.sample_json_response
86
+ mock_response.raise_for_status = Mock()
87
+ mock_get.return_value = mock_response
88
+
89
+ result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
90
+
91
+ # Verify API call - should include /medrxiv/ and /na/json
92
+ expected_url = "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
93
+ mock_get.assert_called_once_with(expected_url, timeout=30)
94
+ mock_response.raise_for_status.assert_called_once()
95
+
96
+ # Verify JSON parsing
97
+ self.assertEqual(result, self.sample_json_response)
98
+
99
+ @patch("requests.get")
100
+ def test_fetch_metadata_network_error(self, mock_get):
101
+ """Test fetch_metadata with network error."""
102
+ mock_get.side_effect = requests.RequestException("Network error")
103
+
104
+ with self.assertRaises(requests.RequestException):
105
+ self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
106
+
107
+ @patch("requests.get")
108
+ def test_fetch_metadata_json_decode_error(self, mock_get):
109
+ """Test fetch_metadata with JSON decode error."""
110
+ mock_response = Mock()
111
+ mock_response.json.side_effect = json.JSONDecodeError("Invalid JSON", "", 0)
112
+ mock_response.raise_for_status = Mock()
113
+ mock_get.return_value = mock_response
114
+
115
+ with self.assertRaises(json.JSONDecodeError):
116
+ self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
117
+
118
+ def test_construct_pdf_url_variants(self):
119
+ """PDF URL construction: normal, missing/empty collection, custom version."""
120
+ # Success
121
+ self.assertEqual(
122
+ self.downloader.construct_pdf_url(
123
+ self.sample_json_response, "10.1101/2023.01.01.123456"
124
+ ),
125
+ "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf",
126
+ )
127
+ # No collection
128
+ self.assertEqual(
129
+ self.downloader.construct_pdf_url({}, "10.1101/2023.01.01.123456"),
130
+ "",
131
+ )
132
+ # Empty collection
133
+ self.assertEqual(
134
+ self.downloader.construct_pdf_url({"collection": []}, "10.1101/2023.01.01.123456"),
135
+ "",
136
+ )
137
+ # Custom version
138
+ self.assertEqual(
139
+ self.downloader.construct_pdf_url(
140
+ {"collection": [{"title": "Test Paper", "version": "3"}]},
141
+ "10.1101/2023.01.01.123456",
142
+ ),
143
+ "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v3.full.pdf",
144
+ )
145
+
146
+ def test_extract_paper_metadata_success(self):
147
+ """Test successful paper metadata extraction."""
148
+ metadata = self.sample_json_response
149
+ pdf_result = ("/tmp/paper.pdf", "medrxiv_paper.pdf")
150
+
151
+ result = self.downloader.extract_paper_metadata(
152
+ metadata, "10.1101/2023.01.01.123456", pdf_result
153
+ )
154
+
155
+ expected = {
156
+ "Title": "Test MedRxiv Paper",
157
+ "Authors": ["John Doe", "Jane Smith"],
158
+ "Abstract": "This is a test abstract for medRxiv paper.",
159
+ "Publication Date": "2023-01-01",
160
+ "DOI": "10.1101/2023.01.01.123456",
161
+ "Category": "Infectious Diseases",
162
+ "Version": "1",
163
+ "source": "medrxiv",
164
+ "server": "medrxiv",
165
+ "URL": "/tmp/paper.pdf",
166
+ "pdf_url": "/tmp/paper.pdf",
167
+ "filename": "medrxiv_paper.pdf",
168
+ "access_type": "open_access_downloaded",
169
+ "temp_file_path": "/tmp/paper.pdf",
170
+ }
171
+
172
+ self.assertEqual(result, expected)
173
+
174
+ def test_extract_paper_metadata_no_pdf(self):
175
+ """Test metadata extraction without PDF download."""
176
+ metadata = self.sample_json_response
177
+
178
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
179
+ result = self.downloader.extract_paper_metadata(
180
+ metadata, "10.1101/2023.01.01.123456", None
181
+ )
182
+
183
+ self.assertEqual(result["Title"], "Test MedRxiv Paper")
184
+ self.assertEqual(result["URL"], "")
185
+ self.assertEqual(result["access_type"], "download_failed")
186
+ self.assertEqual(result["filename"], "default.pdf")
187
+
188
+ def test_extract_paper_metadata_no_collection(self):
189
+ """Test metadata extraction with missing collection."""
190
+ metadata = {}
191
+
192
+ with self.assertRaises(RuntimeError) as context:
193
+ self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
194
+
195
+ self.assertIn("No collection data found", str(context.exception))
196
+
197
+ def test_extract_basic_metadata_variants(self):
198
+ """Basic metadata extraction: complete and missing fields."""
199
+ # Complete
200
+ paper_full = self.sample_json_response["collection"][0]
201
+ got_full = self.downloader.extract_basic_metadata_public(
202
+ paper_full, "10.1101/2023.01.01.123456"
203
+ )
204
+ expected_full = {
205
+ "Title": "Test MedRxiv Paper",
206
+ "Authors": ["John Doe", "Jane Smith"],
207
+ "Abstract": "This is a test abstract for medRxiv paper.",
208
+ "Publication Date": "2023-01-01",
209
+ "DOI": "10.1101/2023.01.01.123456",
210
+ "Category": "Infectious Diseases",
211
+ "Version": "1",
212
+ "source": "medrxiv",
213
+ "server": "medrxiv",
214
+ }
215
+ self.assertEqual(got_full, expected_full)
216
+
217
+ # Missing fields
218
+ paper_missing = {"title": "Test Paper"} # Missing others
219
+ got_missing = self.downloader.extract_basic_metadata_public(paper_missing, "10.1101/test")
220
+ self.assertEqual(got_missing["Title"], "Test Paper")
221
+ self.assertEqual(got_missing["Authors"], [])
222
+ self.assertEqual(got_missing["Abstract"], "N/A")
223
+ self.assertEqual(got_missing["Category"], "N/A")
224
+
225
+ def test_extract_authors_variants(self):
226
+ """Author parsing from semicolon string, empty, and whitespace-heavy inputs."""
227
+ self.assertEqual(
228
+ self.downloader.extract_authors_public("John Doe; Jane Smith; Bob Johnson"),
229
+ ["John Doe", "Jane Smith", "Bob Johnson"],
230
+ )
231
+ self.assertEqual(self.downloader.extract_authors_public(""), [])
232
+ self.assertEqual(
233
+ self.downloader.extract_authors_public(" John Doe ; Jane Smith ; "),
234
+ ["John Doe", "Jane Smith"],
235
+ )
236
+
237
+ def test_extract_pdf_metadata_variants(self):
238
+ """PDF metadata: with and without download result."""
239
+ # With result
240
+ pdf_result = ("/tmp/test.pdf", "paper.pdf")
241
+ expected_with = {
242
+ "URL": "/tmp/test.pdf",
243
+ "pdf_url": "/tmp/test.pdf",
244
+ "filename": "paper.pdf",
245
+ "access_type": "open_access_downloaded",
246
+ "temp_file_path": "/tmp/test.pdf",
247
+ }
248
+ self.assertEqual(
249
+ self.downloader.extract_pdf_metadata_public(pdf_result, "10.1101/test"),
250
+ expected_with,
251
+ )
252
+
253
+ # Without result
254
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
255
+ expected_without = {
256
+ "URL": "",
257
+ "pdf_url": "",
258
+ "filename": "default.pdf",
259
+ "access_type": "download_failed",
260
+ "temp_file_path": "",
261
+ }
262
+ self.assertEqual(
263
+ self.downloader.extract_pdf_metadata_public(None, "10.1101/test"),
264
+ expected_without,
265
+ )
266
+
267
+ def test_service_and_identifier_helpers(self):
268
+ """Service, identifier, and default filename helpers."""
269
+ self.assertEqual(self.downloader.get_service_name(), "medRxiv")
270
+ self.assertEqual(self.downloader.get_identifier_name(), "DOI")
271
+ self.assertEqual(
272
+ self.downloader.get_default_filename("10.1101/2023.01.01.123456"),
273
+ "10_1101_2023_01_01_123456.pdf",
274
+ )
275
+
276
+ def test_get_paper_identifier_info(self):
277
+ """Test _get_paper_identifier_info method."""
278
+ paper = {
279
+ "DOI": "10.1101/2023.01.01.123456",
280
+ "Publication Date": "2023-01-01",
281
+ "Category": "Medicine",
282
+ }
283
+
284
+ result = self.downloader.get_paper_identifier_info_public(paper)
285
+
286
+ self.assertIn("10.1101/2023.01.01.123456", result)
287
+ self.assertIn("2023-01-01", result)
288
+ self.assertIn("Medicine", result)
289
+
290
+ def test_add_service_identifier(self):
291
+ """Test _add_service_identifier method."""
292
+ entry = {}
293
+ self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
294
+ self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
295
+ self.assertEqual(entry["server"], "medrxiv")
296
+
297
+
298
+ class TestMedrxivDownloaderIntegration(unittest.TestCase):
299
+ """Integration tests for MedrxivDownloader workflow."""
300
+
301
+ def setUp(self):
302
+ """Set up integration test fixtures."""
303
+ self.mock_config = Mock()
304
+ self.mock_config.api_url = "https://api.medrxiv.org/details"
305
+ self.mock_config.request_timeout = 30
306
+ self.mock_config.chunk_size = 8192
307
+ self.mock_config.pdf_url_template = (
308
+ "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf"
309
+ )
310
+ self.mock_config.default_version = "1"
311
+
312
+ self.downloader = MedrxivDownloaderTestShim(self.mock_config)
313
+
314
+ self.sample_response = {
315
+ "collection": [
316
+ {
317
+ "title": "Integration Test Paper",
318
+ "authors": "Test Author",
319
+ "abstract": "Integration test abstract.",
320
+ "date": "2023-01-01",
321
+ "category": "Medicine",
322
+ "version": "2",
323
+ "doi": "10.1101/2023.01.01.123456",
324
+ }
325
+ ]
326
+ }
327
+
328
+ @patch(
329
+ "aiagents4pharma.talk2scholars.tools.paper_download.utils."
330
+ "medrxiv_downloader.MedrxivDownloader.download_pdf_to_temp"
331
+ )
332
+ @patch("requests.get")
333
+ def test_full_paper_processing_workflow(self, mock_get, mock_download):
334
+ """Test the complete workflow from DOI to processed paper data."""
335
+ # Mock API response
336
+ mock_response = Mock()
337
+ mock_response.json.return_value = self.sample_response
338
+ mock_response.raise_for_status = Mock()
339
+ mock_get.return_value = mock_response
340
+
341
+ # Mock PDF download
342
+ mock_download.return_value = ("/tmp/paper.pdf", "medrxiv_paper.pdf")
343
+
344
+ # Simulate the workflow
345
+ identifier = "10.1101/2023.01.01.123456"
346
+
347
+ # Step 1: Fetch metadata
348
+ metadata = self.downloader.fetch_metadata(identifier)
349
+
350
+ # Step 2: Construct PDF URL
351
+ pdf_url = self.downloader.construct_pdf_url(metadata, identifier)
352
+
353
+ # Step 3: Download PDF
354
+ pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
355
+
356
+ # Step 4: Extract metadata
357
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
358
+
359
+ # Verify the complete workflow
360
+ self.assertEqual(paper_data["Title"], "Integration Test Paper")
361
+ self.assertEqual(paper_data["Authors"], ["Test Author"])
362
+ self.assertEqual(paper_data["access_type"], "open_access_downloaded")
363
+ self.assertEqual(paper_data["filename"], "medrxiv_paper.pdf")
364
+ self.assertEqual(paper_data["temp_file_path"], "/tmp/paper.pdf")
365
+
366
+ # Verify method calls
367
+ mock_get.assert_called_once_with(
368
+ "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json",
369
+ timeout=30,
370
+ )
371
+ expected_pdf_url = "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
372
+ mock_download.assert_called_once_with(expected_pdf_url, identifier)
373
+
374
+ @patch("requests.get")
375
+ def test_error_handling_workflow(self, mock_get):
376
+ """Test error handling in the workflow."""
377
+ # Mock API error
378
+ mock_get.side_effect = requests.RequestException("API error")
379
+
380
+ with self.assertRaises(requests.RequestException):
381
+ self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
382
+
383
+ @patch("requests.get")
384
+ def test_workflow_with_empty_collection(self, mock_get):
385
+ """Test workflow with empty collection response."""
386
+ # Mock API response with empty collection - this should raise error in fetch_metadata
387
+ mock_response = Mock()
388
+ mock_response.json.return_value = {"collection": []}
389
+ mock_response.raise_for_status = Mock()
390
+ mock_get.return_value = mock_response
391
+
392
+ identifier = "10.1101/2023.01.01.123456"
393
+
394
+ # Should raise error in fetch_metadata when collection is empty
395
+ with self.assertRaises(RuntimeError) as context:
396
+ self.downloader.fetch_metadata(identifier)
397
+
398
+ self.assertIn("No collection data found in medRxiv API response", str(context.exception))
399
+
400
+ @patch("requests.get")
401
+ def test_multiple_identifiers_workflow(self, mock_get):
402
+ """Test processing multiple identifiers."""
403
+ # Mock different responses for different DOIs
404
+ responses = [
405
+ {"collection": [{"title": "Paper 1", "version": "1", "authors": "Author 1"}]},
406
+ {"collection": [{"title": "Paper 2", "version": "2", "authors": "Author 2"}]},
407
+ ]
408
+
409
+ mock_responses = []
410
+ for response in responses:
411
+ mock_resp = Mock()
412
+ mock_resp.json.return_value = response
413
+ mock_resp.raise_for_status = Mock()
414
+ mock_responses.append(mock_resp)
415
+
416
+ mock_get.side_effect = mock_responses
417
+
418
+ identifiers = ["10.1101/2023.01.01.111111", "10.1101/2023.01.01.222222"]
419
+ results = {}
420
+
421
+ for identifier in identifiers:
422
+ metadata = self.downloader.fetch_metadata(identifier)
423
+ _ = self.downloader.construct_pdf_url(metadata, identifier) # ensure path covered
424
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, None)
425
+ results[identifier] = paper_data
426
+
427
+ # Verify both papers were processed
428
+ self.assertEqual(len(results), 2)
429
+ self.assertEqual(results["10.1101/2023.01.01.111111"]["Title"], "Paper 1")
430
+ self.assertEqual(results["10.1101/2023.01.01.222222"]["Title"], "Paper 2")
431
+
432
+ # Verify API calls with correct URLs
433
+ self.assertEqual(mock_get.call_count, 2)
434
+ expected_calls = [
435
+ "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.111111/na/json",
436
+ "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.222222/na/json",
437
+ ]
438
+ actual_urls = [call[0][0] for call in mock_get.call_args_list]
439
+ for expected_url in expected_calls:
440
+ self.assertIn(expected_url, actual_urls)
441
+
442
+
443
+ class TestMedrxivSpecialCases(unittest.TestCase):
444
+ """Tests for special cases and edge conditions."""
445
+
446
+ def setUp(self):
447
+ """Set up test fixtures for special cases."""
448
+ self.mock_config = Mock()
449
+ self.mock_config.api_url = "https://api.medrxiv.org/details"
450
+ self.mock_config.request_timeout = 30
451
+ self.mock_config.chunk_size = 8192
452
+ self.mock_config.pdf_url_template = (
453
+ "https://www.medrxiv.org/content/{identifier}v{version}.full.pdf"
454
+ )
455
+ self.mock_config.default_version = "1"
456
+
457
+ self.downloader = MedrxivDownloaderTestShim(self.mock_config)
458
+
459
+ def test_filename_generation_special_characters(self):
460
+ """Test filename generation with special characters in DOI."""
461
+ doi_with_special_chars = "10.1101/2023.01.01.123456/special-chars_test"
462
+
463
+ result = self.downloader.get_default_filename(doi_with_special_chars)
464
+
465
+ # Should replace problematic characters
466
+ self.assertEqual(result, "10_1101_2023_01_01_123456_special-chars_test.pdf")
467
+
468
+ def test_version_handling_edge_cases(self):
469
+ """Test PDF URL construction with various version formats."""
470
+ test_cases = [
471
+ ({"collection": [{"version": ""}]}, "v.full.pdf"), # Empty version
472
+ ({"collection": [{"version": None}]}, "vNone.full.pdf"), # None version
473
+ ({"collection": [{}]}, "v1.full.pdf"), # Missing version key defaults to 1
474
+ ]
475
+
476
+ for metadata, expected_suffix in test_cases:
477
+ result = self.downloader.construct_pdf_url(metadata, "10.1101/test")
478
+ self.assertTrue(result.endswith(expected_suffix))
479
+
480
+ def test_metadata_extraction_unicode_handling(self):
481
+ """Test metadata extraction with Unicode characters."""
482
+ metadata = {
483
+ "collection": [
484
+ {
485
+ "title": "Título com acentos é símbolos especiais",
486
+ "authors": "José María; François Müller",
487
+ "abstract": "Resumo com çaracteres especiais ñ símbolos",
488
+ "date": "2023-01-01",
489
+ "category": "Médecine",
490
+ "version": "1",
491
+ }
492
+ ]
493
+ }
494
+
495
+ result = self.downloader.extract_paper_metadata(metadata, "10.1101/test", None)
496
+
497
+ # Should handle Unicode properly
498
+ self.assertEqual(result["Title"], "Título com acentos é símbolos especiais")
499
+ self.assertEqual(result["Authors"], ["José María", "François Müller"])
500
+ self.assertEqual(result["Abstract"], "Resumo com çaracteres especiais ñ símbolos")
@@ -0,0 +1,117 @@
1
+ """
2
+ Unit tests for NVIDIA NIM reranker error handling in nvidia_nim_reranker.py
3
+ """
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pytest
8
+ from langchain_core.documents import Document
9
+
10
+ from aiagents4pharma.talk2scholars.tools.pdf.utils import nvidia_nim_reranker
11
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
12
+ rerank_chunks,
13
+ )
14
+
15
+
16
+ @pytest.fixture(name="chunks_fixture")
17
+ def fixture_chunks():
18
+ """chunks_fixture fixture to simulate PDF chunks."""
19
+ return [
20
+ Document(
21
+ page_content=f"chunk {i}",
22
+ metadata={"paper_id": f"P{i % 2}", "relevance_score": 0.9 - 0.01 * i},
23
+ )
24
+ for i in range(10)
25
+ ]
26
+
27
+
28
+ def test_rerank_chunks_short_input(chunks_fixture):
29
+ """rerank_chunks with fewer chunks than top_k should return original."""
30
+ result = rerank_chunks(chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5)
31
+ assert result == chunks_fixture[:3]
32
+
33
+
34
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.logger")
35
+ def test_rerank_chunks_missing_api_key_logs_and_raises(mock_logger, chunks_fixture):
36
+ """
37
+ If config.reranker.api_key is None:
38
+ - logger.error(...) should be called
39
+ - rerank_chunks should raise ValueError
40
+ """
41
+ mock_config = MagicMock()
42
+ mock_config.reranker.api_key = None
43
+
44
+ with pytest.raises(
45
+ ValueError,
46
+ match="Configuration 'reranker.api_key' must be set for reranking",
47
+ ):
48
+ rerank_chunks(chunks_fixture, "What is cancer?", config=mock_config, top_k=5)
49
+
50
+ mock_logger.error.assert_called_once_with(
51
+ "No NVIDIA API key found in configuration for reranking"
52
+ )
53
+
54
+
55
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
56
+ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
57
+ """rerank_chunks with successful reranking."""
58
+ reranker_instance = MagicMock()
59
+ reranker_instance.compress_documents.return_value = list(reversed(chunks_fixture))
60
+ mock_reranker_cls.return_value = reranker_instance
61
+
62
+ mock_config = MagicMock()
63
+ mock_config.reranker.api_key = "test_key"
64
+ mock_config.reranker.model = "test_model"
65
+
66
+ result = rerank_chunks(chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5)
67
+
68
+ assert isinstance(result, list)
69
+ assert result == list(reversed(chunks_fixture))[:5]
70
+ reranker_instance.compress_documents.assert_called_once_with(
71
+ query="Explain mitochondria.", documents=chunks_fixture
72
+ )
73
+
74
+
75
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
76
+ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(mock_reranker_cls, chunks_fixture):
77
+ """
78
+ If NVIDIARerank.compress_documents raises RuntimeError:
79
+ - rerank_chunks should propagate the RuntimeError
80
+ - and compress_documents should have been called
81
+ """
82
+ reranker_instance = MagicMock()
83
+ reranker_instance.compress_documents.side_effect = RuntimeError("API failure")
84
+ mock_reranker_cls.return_value = reranker_instance
85
+
86
+ mock_config = MagicMock()
87
+ mock_config.reranker.api_key = "valid_key"
88
+ mock_config.reranker.model = "reranker"
89
+
90
+ with pytest.raises(RuntimeError, match="API failure"):
91
+ rerank_chunks(chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3)
92
+
93
+ reranker_instance.compress_documents.assert_called_once_with(
94
+ query="How does light affect plants?", documents=chunks_fixture
95
+ )
96
+
97
+
98
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.logger")
99
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
100
+ def test_rerank_chunks_debug_block_triggered(mock_reranker_cls, mock_logger, chunks_fixture):
101
+ """rerank_chunks should log debug info if debug logging is enabled."""
102
+ mock_logger.isEnabledFor.return_value = True
103
+
104
+ reranker_instance = MagicMock()
105
+ reranker_instance.compress_documents.return_value = chunks_fixture
106
+ mock_reranker_cls.return_value = reranker_instance
107
+
108
+ mock_config = MagicMock()
109
+ mock_config.reranker.api_key = "abc"
110
+ mock_config.reranker.model = "mymodel"
111
+
112
+ result = nvidia_nim_reranker.rerank_chunks(
113
+ chunks_fixture * 2, "Test query", mock_config, top_k=3
114
+ )
115
+
116
+ assert result == chunks_fixture[:3]
117
+ assert mock_logger.debug.called
@@ -0,0 +1,67 @@
1
+ """answer_formatter tests."""
2
+
3
+ from unittest.mock import patch
4
+
5
+ import pytest
6
+
7
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.answer_formatter import format_answer
8
+
9
+
10
+ @pytest.fixture(name="base_args")
11
+ def _base_args():
12
+ """base_args fixture to provide common arguments for tests."""
13
+ return {
14
+ "question": "What is the conclusion?",
15
+ "chunks": [{"content": "chunk1"}, {"content": "chunk2"}],
16
+ "llm": "mock_llm",
17
+ "articles": {
18
+ "paper1": {"Title": "Paper One"},
19
+ "paper2": {"Title": "Paper Two"},
20
+ },
21
+ "config": {"key": "value"},
22
+ "call_id": "test_call_123",
23
+ "has_gpu": True,
24
+ }
25
+
26
+
27
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.answer_formatter.generate_answer")
28
+ def test_format_answer_with_sources(mock_generate_answer, base_args):
29
+ """test format_answer with sources."""
30
+ mock_generate_answer.return_value = {
31
+ "output_text": "This is the generated answer.",
32
+ "papers_used": ["paper1", "paper2"],
33
+ }
34
+
35
+ result = format_answer(**base_args)
36
+
37
+ assert "This is the generated answer." in result
38
+ assert "Sources:" in result
39
+ assert "- Paper One" in result
40
+ assert "- Paper Two" in result
41
+ mock_generate_answer.assert_called_once()
42
+
43
+
44
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.answer_formatter.generate_answer")
45
+ def test_format_answer_no_sources(mock_generate_answer, base_args):
46
+ """test format_answer with no sources."""
47
+ mock_generate_answer.return_value = {
48
+ "output_text": "No sources were used.",
49
+ "papers_used": [], # No papers used
50
+ }
51
+
52
+ result = format_answer(**base_args)
53
+
54
+ assert result == "No sources were used." # No sources section expected
55
+ mock_generate_answer.assert_called_once()
56
+
57
+
58
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.answer_formatter.generate_answer")
59
+ def test_format_answer_missing_output_text(mock_generate_answer, base_args):
60
+ """test format_answer with missing output text."""
61
+ mock_generate_answer.return_value = {"papers_used": ["paper1"]}
62
+
63
+ result = format_answer(**base_args)
64
+
65
+ assert result.startswith("No answer generated.")
66
+ assert "Sources:" in result
67
+ mock_generate_answer.assert_called_once()