aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,598 @@
1
+ """
2
+ Unit tests for BasePaperDownloader.
3
+ Tests the abstract base class functionality and common methods.
4
+ """
5
+
6
+ import inspect
7
+ import unittest
8
+ from typing import Any
9
+ from unittest.mock import Mock, patch
10
+
11
+ import requests
12
+
13
+ from aiagents4pharma.talk2scholars.tools.paper_download.utils.base_paper_downloader import (
14
+ BasePaperDownloader,
15
+ )
16
+
17
+
18
+ class ConcretePaperDownloader(BasePaperDownloader):
19
+ """Concrete implementation of BasePaperDownloader for testing."""
20
+
21
+ def __init__(self, config: Any):
22
+ super().__init__(config)
23
+ self.test_metadata = {"test": "data"}
24
+
25
+ def fetch_metadata(self, identifier: str) -> Any:
26
+ """Concrete implementation for testing."""
27
+ return self.test_metadata
28
+
29
+ def construct_pdf_url(self, metadata: Any, identifier: str) -> str:
30
+ """Concrete implementation for testing."""
31
+ return f"https://test.com/{identifier}.pdf"
32
+
33
+ def extract_paper_metadata(
34
+ self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
35
+ ) -> dict[str, Any]:
36
+ """Concrete implementation for testing."""
37
+ return {
38
+ "Title": f"Test Paper {identifier}",
39
+ "Authors": ["Test Author"],
40
+ "identifier": identifier,
41
+ "metadata_source": metadata,
42
+ }
43
+
44
+ def get_service_name(self) -> str:
45
+ """Concrete implementation for testing."""
46
+ return "TestService"
47
+
48
+ def get_identifier_name(self) -> str:
49
+ """Concrete implementation for testing."""
50
+ return "Test ID"
51
+
52
+ def get_default_filename(self, identifier: str) -> str:
53
+ """Concrete implementation for testing."""
54
+ return f"test_{identifier}.pdf"
55
+
56
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
57
+ """Concrete implementation for testing."""
58
+ return f" ({paper.get('identifier', 'unknown')})"
59
+
60
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
61
+ """Concrete implementation for testing."""
62
+ entry["test_id"] = identifier
63
+
64
+ def get_paper_identifier_info_public(self, paper: dict[str, Any]) -> str:
65
+ """Public wrapper to access protected identifier info for tests."""
66
+ return self._get_paper_identifier_info(paper)
67
+
68
+ def add_service_identifier_public(self, entry: dict[str, Any], identifier: str) -> None:
69
+ """Public wrapper to access protected service identifier for tests."""
70
+ self._add_service_identifier(entry, identifier)
71
+
72
+
73
+ class TestBasePaperDownloader(unittest.TestCase):
74
+ """Tests for the BasePaperDownloader class."""
75
+
76
+ def setUp(self):
77
+ """Set up test fixtures."""
78
+ self.mock_config = Mock()
79
+ self.mock_config.request_timeout = 30
80
+ self.mock_config.chunk_size = 8192
81
+
82
+ self.downloader = ConcretePaperDownloader(self.mock_config)
83
+
84
+ def test_initialization(self):
85
+ """Test BasePaperDownloader initialization."""
86
+ self.assertEqual(self.downloader.request_timeout, 30)
87
+ self.assertEqual(self.downloader.chunk_size, 8192)
88
+
89
+ def test_abstract_methods_raise_not_implemented(self):
90
+ """Test that abstract methods are unimplemented in an incomplete subclass."""
91
+
92
+ # Create an intentionally incomplete subclass **without** instantiating it
93
+ # (avoid E0110) and without a pointless 'pass' (avoid W0107).
94
+ class IncompleteDownloader(BasePaperDownloader):
95
+ """Intentionally incomplete concrete subclass for introspection only."""
96
+
97
+ __test__ = False # not a test class
98
+
99
+ # Assert it's abstract instead of trying to instantiate
100
+ self.assertTrue(inspect.isabstract(IncompleteDownloader))
101
+
102
+ @patch("tempfile.NamedTemporaryFile")
103
+ @patch("requests.get")
104
+ def test_download_pdf_to_temp_success(self, mock_get, mock_tempfile):
105
+ """Test successful PDF download to temporary file."""
106
+ # Mock response
107
+ mock_response = Mock()
108
+ mock_response.raise_for_status = Mock()
109
+ mock_response.iter_content.return_value = [b"PDF chunk 1", b"PDF chunk 2"]
110
+ mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
111
+ mock_get.return_value = mock_response
112
+
113
+ # Mock temporary file
114
+ mock_temp_file = Mock()
115
+ mock_temp_file.name = "/tmp/test.pdf"
116
+ mock_temp_file.__enter__ = Mock(return_value=mock_temp_file)
117
+ mock_temp_file.__exit__ = Mock(return_value=None)
118
+ mock_tempfile.return_value = mock_temp_file
119
+
120
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
121
+
122
+ # Verify result
123
+ self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
124
+
125
+ # Verify HTTP request - includes headers with User-Agent
126
+ expected_headers = {"User-Agent": self.downloader.user_agent}
127
+ mock_get.assert_called_once_with(
128
+ "https://test.com/paper.pdf",
129
+ headers=expected_headers,
130
+ timeout=30,
131
+ stream=True,
132
+ )
133
+ mock_response.raise_for_status.assert_called_once()
134
+
135
+ # Verify file writing
136
+ mock_temp_file.write.assert_any_call(b"PDF chunk 1")
137
+ mock_temp_file.write.assert_any_call(b"PDF chunk 2")
138
+
139
+ def test_download_pdf_to_temp_empty_url(self):
140
+ """Test PDF download with empty URL."""
141
+ result = self.downloader.download_pdf_to_temp("", "12345")
142
+
143
+ self.assertIsNone(result)
144
+
145
+ @patch("requests.get")
146
+ def test_download_pdf_to_temp_network_error(self, mock_get):
147
+ """Test PDF download with network error."""
148
+ mock_get.side_effect = requests.RequestException("Network error")
149
+
150
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
151
+
152
+ self.assertIsNone(result)
153
+
154
+ @patch("tempfile.NamedTemporaryFile")
155
+ @patch("requests.get")
156
+ def test_download_pdf_to_temp_filename_extraction(self, mock_get, mock_tempfile):
157
+ """Test filename extraction from Content-Disposition header."""
158
+ # Mock response with various header formats
159
+ test_cases = [
160
+ ('attachment; filename="test-paper.pdf"', "test-paper.pdf"),
161
+ ("attachment; filename=simple.pdf", "simple.pdf"),
162
+ (
163
+ "attachment; filename*=UTF-8''encoded%20file.pdf",
164
+ "12345.pdf",
165
+ ), # Complex header format falls back to default
166
+ ('inline; filename="quoted file.pdf"', "quoted file.pdf"),
167
+ ("", "12345.pdf"), # No header, should use default
168
+ ]
169
+
170
+ for header_value, expected_filename in test_cases:
171
+ with self.subTest(header=header_value):
172
+ mock_response = Mock()
173
+ mock_response.raise_for_status = Mock()
174
+ mock_response.iter_content.return_value = [b"PDF data"]
175
+ mock_response.headers = (
176
+ {"Content-Disposition": header_value} if header_value else {}
177
+ )
178
+ mock_get.return_value = mock_response
179
+
180
+ # Mock get_default_filename for fallback case
181
+ with patch.object(
182
+ self.downloader, "get_default_filename", return_value="12345.pdf"
183
+ ):
184
+ # Mock temporary file
185
+ mock_temp_file = Mock()
186
+ mock_temp_file.name = "/tmp/test.pdf"
187
+ mock_temp_file.__enter__ = Mock(return_value=mock_temp_file)
188
+ mock_temp_file.__exit__ = Mock(return_value=None)
189
+ mock_tempfile.return_value = mock_temp_file
190
+
191
+ result = self.downloader.download_pdf_to_temp(
192
+ "https://test.com/paper.pdf", "12345"
193
+ )
194
+
195
+ self.assertEqual(result[1], expected_filename)
196
+
197
+ def test_process_identifiers_success(self):
198
+ """Test successful processing of multiple identifiers."""
199
+ identifiers = ["12345", "67890"]
200
+
201
+ # Mock download_pdf_to_temp to return different results
202
+ with patch.object(self.downloader, "download_pdf_to_temp") as mock_download:
203
+ mock_download.side_effect = [
204
+ ("/tmp/paper1.pdf", "paper1.pdf"), # First paper succeeds
205
+ None, # Second paper fails
206
+ ]
207
+
208
+ result = self.downloader.process_identifiers(identifiers)
209
+
210
+ # Verify results
211
+ self.assertIn("12345", result)
212
+ self.assertIn("67890", result)
213
+
214
+ # First paper should have PDF data
215
+ self.assertEqual(result["12345"]["Title"], "Test Paper 12345")
216
+ self.assertEqual(result["12345"]["Authors"], ["Test Author"])
217
+
218
+ # Second paper should also be processed (but without PDF)
219
+ self.assertEqual(result["67890"]["Title"], "Test Paper 67890")
220
+
221
+ def test_process_identifiers_with_errors(self):
222
+ """Test processing identifiers with various errors."""
223
+ identifiers = ["valid", "fetch_error"]
224
+
225
+ def mock_fetch_metadata(identifier):
226
+ if identifier == "fetch_error":
227
+ raise requests.RequestException("Fetch failed")
228
+ return {"test": identifier}
229
+
230
+ with patch.object(self.downloader, "fetch_metadata", side_effect=mock_fetch_metadata):
231
+ with patch.object(self.downloader, "download_pdf_to_temp", return_value=None):
232
+ result = self.downloader.process_identifiers(identifiers)
233
+
234
+ # Valid identifier should succeed
235
+ self.assertIn("valid", result)
236
+ self.assertEqual(result["valid"]["Title"], "Test Paper valid")
237
+
238
+ # Error cases should create error entries (not be excluded)
239
+ self.assertIn("fetch_error", result)
240
+ self.assertEqual(result["fetch_error"]["Title"], "Error fetching paper")
241
+ self.assertIn("Fetch failed", result["fetch_error"]["Abstract"])
242
+ self.assertEqual(result["fetch_error"]["access_type"], "error")
243
+
244
+ def test_build_summary_success(self):
245
+ """Test building summary for successful downloads."""
246
+ article_data = {
247
+ "paper1": {"Title": "Paper 1", "access_type": "open_access_downloaded"},
248
+ "paper2": {"Title": "Paper 2", "access_type": "download_failed"},
249
+ "paper3": {"Title": "Paper 3", "access_type": "open_access_downloaded"},
250
+ }
251
+
252
+ result = self.downloader.build_summary(article_data)
253
+
254
+ # Should include count of papers and successful downloads
255
+ self.assertIn("3", result) # Total papers
256
+ self.assertIn("2", result) # Successful downloads
257
+ self.assertIn("TestService", result) # Service name
258
+
259
+ def test_build_summary_no_papers(self):
260
+ """Test building summary with no papers."""
261
+ result = self.downloader.build_summary({})
262
+
263
+ self.assertIn("0", result)
264
+ self.assertIn("TestService", result)
265
+
266
+ def test_build_summary_all_failed(self):
267
+ """Test building summary with all failed downloads."""
268
+ article_data = {
269
+ "paper1": {"Title": "Paper 1", "access_type": "download_failed"},
270
+ "paper2": {"Title": "Paper 2", "access_type": "download_failed"},
271
+ }
272
+
273
+ result = self.downloader.build_summary(article_data)
274
+
275
+ self.assertIn("2", result) # Total papers
276
+ self.assertIn("0", result) # Successful downloads (should be 0)
277
+
278
+ def test_build_summary_with_papers(self):
279
+ """Test building summary with paper list."""
280
+ article_data = {
281
+ "123": {
282
+ "Title": "Paper 1",
283
+ "identifier": "123",
284
+ "access_type": "open_access_downloaded",
285
+ "Abstract": "Test abstract.",
286
+ },
287
+ "456": {
288
+ "Title": "Paper 2",
289
+ "identifier": "456",
290
+ "access_type": "download_failed",
291
+ "Abstract": "Another abstract.",
292
+ },
293
+ }
294
+
295
+ result = self.downloader.build_summary(article_data)
296
+
297
+ self.assertIn("Paper 1", result)
298
+ self.assertIn("Paper 2", result)
299
+ self.assertIn("TestService", result)
300
+ self.assertIn("2", result) # Total papers
301
+ self.assertIn("1", result) # Successfully downloaded
302
+
303
+ def test_build_summary_truncated_list(self):
304
+ """Test building summary with long list (should show only top 3)."""
305
+ article_data = {}
306
+ for i in range(5): # More than 3
307
+ article_data[f"{i + 1}"] = {
308
+ "Title": f"Paper {i + 1}",
309
+ "identifier": f"{i + 1}",
310
+ "access_type": "open_access_downloaded",
311
+ "Abstract": f"Abstract {i + 1}",
312
+ }
313
+
314
+ result = self.downloader.build_summary(article_data)
315
+
316
+ # Should include first 3 papers only
317
+ self.assertIn("Paper 1", result)
318
+ self.assertIn("Paper 2", result)
319
+ self.assertIn("Paper 3", result)
320
+
321
+ # Should not include papers 4 and 5
322
+ self.assertNotIn("Paper 4", result)
323
+ self.assertNotIn("Paper 5", result)
324
+
325
+ # Should show total count
326
+ self.assertIn("5", result) # Total papers
327
+
328
+ def test_concrete_implementation_methods(self):
329
+ """Test that concrete implementations work correctly."""
330
+ # Test fetch_metadata
331
+ metadata = self.downloader.fetch_metadata("test123")
332
+ self.assertEqual(metadata, {"test": "data"})
333
+
334
+ # Test construct_pdf_url
335
+ pdf_url = self.downloader.construct_pdf_url(metadata, "test123")
336
+ self.assertEqual(pdf_url, "https://test.com/test123.pdf")
337
+
338
+ # Test extract_paper_metadata
339
+ paper_data = self.downloader.extract_paper_metadata(metadata, "test123", None)
340
+ self.assertEqual(paper_data["Title"], "Test Paper test123")
341
+ self.assertEqual(paper_data["Authors"], ["Test Author"])
342
+
343
+ # Test get_service_name
344
+ service_name = self.downloader.get_service_name()
345
+ self.assertEqual(service_name, "TestService")
346
+
347
+ # Test get_identifier_name
348
+ identifier_name = self.downloader.get_identifier_name()
349
+ self.assertEqual(identifier_name, "Test ID")
350
+
351
+ # Test get_default_filename
352
+ filename = self.downloader.get_default_filename("test123")
353
+ self.assertEqual(filename, "test_test123.pdf")
354
+
355
+ def test_helper_methods(self):
356
+ """Test helper methods."""
357
+ # Test _get_paper_identifier_info via public wrapper
358
+ paper = {"identifier": "test123"}
359
+ info = self.downloader.get_paper_identifier_info_public(paper)
360
+ self.assertEqual(info, " (test123)")
361
+
362
+ # Test _add_service_identifier via public wrapper
363
+ entry = {}
364
+ self.downloader.add_service_identifier_public(entry, "test123")
365
+ self.assertEqual(entry["test_id"], "test123")
366
+
367
+ def test_abstract_methods_raise_not_implemented_direct_call(self):
368
+ """Test that base-class abstract methods raise NotImplementedError when called."""
369
+ # Use the already-imported BasePaperDownloader (no reimport/redefinition).
370
+
371
+ # Public abstract methods: call directly on the base to hit the NotImplementedError paths.
372
+ with self.assertRaises(NotImplementedError):
373
+ BasePaperDownloader.fetch_metadata(self.downloader, "test")
374
+
375
+ with self.assertRaises(NotImplementedError):
376
+ BasePaperDownloader.construct_pdf_url(self.downloader, {}, "test")
377
+
378
+ with self.assertRaises(NotImplementedError):
379
+ BasePaperDownloader.extract_paper_metadata(self.downloader, {}, "test", None)
380
+
381
+ with self.assertRaises(NotImplementedError):
382
+ BasePaperDownloader.get_service_name(self.downloader)
383
+
384
+ with self.assertRaises(NotImplementedError):
385
+ BasePaperDownloader.get_identifier_name(self.downloader)
386
+
387
+ with self.assertRaises(NotImplementedError):
388
+ BasePaperDownloader.get_default_filename(self.downloader, "test")
389
+
390
+ # Protected abstract methods: call via getattr to avoid W0212 while still executing code.
391
+ method_name_1 = "_get_paper_identifier_info"
392
+ with self.assertRaises(NotImplementedError):
393
+ getattr(BasePaperDownloader, method_name_1)(self.downloader, {})
394
+
395
+ method_name_2 = "_add_service_identifier"
396
+ with self.assertRaises(NotImplementedError):
397
+ getattr(BasePaperDownloader, method_name_2)(self.downloader, {}, "test")
398
+
399
+ @patch("tempfile.NamedTemporaryFile")
400
+ @patch("requests.get")
401
+ def test_filename_extraction_exception_handling(self, mock_get, mock_tempfile):
402
+ """Test exception handling during filename extraction."""
403
+ # Mock response that will cause an exception in filename extraction
404
+ mock_response = Mock()
405
+ mock_response.raise_for_status = Mock()
406
+ mock_response.iter_content.return_value = [b"PDF data"]
407
+ mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
408
+ mock_get.return_value = mock_response
409
+
410
+ # Mock temporary file
411
+ mock_temp_file = Mock()
412
+ mock_temp_file.name = "/tmp/test.pdf"
413
+ mock_temp_file.__enter__ = Mock(return_value=mock_temp_file)
414
+ mock_temp_file.__exit__ = Mock(return_value=None)
415
+ mock_tempfile.return_value = mock_temp_file
416
+
417
+ # Patch re.search to raise an exception during filename extraction
418
+ with patch("re.search", side_effect=requests.RequestException("Regex error")):
419
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
420
+
421
+ # Should still succeed but use default filename due to exception
422
+ self.assertEqual(result, ("/tmp/test.pdf", "test_12345.pdf"))
423
+
424
+ def test_build_summary_with_temp_file_path(self):
425
+ """Test build_summary with papers that have temp_file_path."""
426
+ article_data = {
427
+ "paper1": {
428
+ "Title": "Paper 1",
429
+ "access_type": "open_access_downloaded",
430
+ "Abstract": "This is a test abstract with multiple sentences."
431
+ "It should be truncated.",
432
+ "temp_file_path": "/tmp/paper1.pdf",
433
+ },
434
+ "paper2": {
435
+ "Title": "Paper 2",
436
+ "access_type": "download_failed",
437
+ "Abstract": "Short abstract.",
438
+ "temp_file_path": "", # Empty temp_file_path
439
+ },
440
+ }
441
+
442
+ result = self.downloader.build_summary(article_data)
443
+
444
+ # Should include temp file path for paper1
445
+ self.assertIn("/tmp/paper1.pdf", result)
446
+ self.assertIn("Downloaded to:", result)
447
+ self.assertIn("Abstract snippet:", result)
448
+
449
+ # Should include count information
450
+ self.assertIn("2", result) # Total papers
451
+ self.assertIn("1", result) # Successfully downloaded
452
+
453
+
454
+ class TestBasePaperDownloaderEdgeCases(unittest.TestCase):
455
+ """Tests for edge cases and error conditions."""
456
+
457
+ def setUp(self):
458
+ """Set up edge case test fixtures."""
459
+ self.mock_config = Mock()
460
+ self.mock_config.request_timeout = 30
461
+ self.mock_config.chunk_size = 8192
462
+
463
+ self.downloader = ConcretePaperDownloader(self.mock_config)
464
+
465
+ @patch("tempfile.NamedTemporaryFile")
466
+ @patch("requests.get")
467
+ def test_download_pdf_chunk_filtering(self, mock_get, mock_tempfile):
468
+ """Test that empty chunks are filtered out during download."""
469
+ # Mock response with mixed chunks including None/empty ones
470
+ mock_response = Mock()
471
+ mock_response.raise_for_status = Mock()
472
+ mock_response.iter_content.return_value = [
473
+ b"chunk1",
474
+ None, # Should be filtered out
475
+ b"", # Empty chunk, should be filtered out
476
+ b"chunk2",
477
+ None,
478
+ b"chunk3",
479
+ ]
480
+ mock_response.headers = {}
481
+ mock_get.return_value = mock_response
482
+
483
+ # Mock temporary file
484
+ mock_temp_file = Mock()
485
+ mock_temp_file.name = "/tmp/test.pdf"
486
+ mock_temp_file.__enter__ = Mock(return_value=mock_temp_file)
487
+ mock_temp_file.__exit__ = Mock(return_value=None)
488
+ mock_tempfile.return_value = mock_temp_file
489
+
490
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
491
+ # Call without assigning to avoid 'unused-variable'
492
+ self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
493
+
494
+ # Should only write non-empty chunks
495
+ self.assertEqual(mock_temp_file.write.call_count, 3)
496
+ mock_temp_file.write.assert_any_call(b"chunk1")
497
+ mock_temp_file.write.assert_any_call(b"chunk2")
498
+ mock_temp_file.write.assert_any_call(b"chunk3")
499
+
500
+ def test_filename_extraction_regex_edge_cases(self):
501
+ """Test filename extraction with various regex edge cases."""
502
+ test_headers = [
503
+ # Various quote combinations
504
+ ('filename="file with spaces.pdf"', "file with spaces.pdf"),
505
+ (
506
+ "filename='single_quotes.pdf'",
507
+ "default.pdf",
508
+ ), # Single quotes don't match regex
509
+ ("filename=no_quotes.pdf", "no_quotes.pdf"),
510
+ # Unicode and special characters
511
+ ('filename="файл.pdf"', "файл.pdf"),
512
+ (
513
+ 'filename="file-with-dashes_and_underscores.pdf"',
514
+ "file-with-dashes_and_underscores.pdf",
515
+ ),
516
+ # Edge cases
517
+ ('filename=""', "default.pdf"), # Empty filename falls back to default
518
+ ("filename=", "default.pdf"), # No value falls back to default
519
+ (
520
+ 'other_param=value; filename="actual.pdf"',
521
+ "actual.pdf",
522
+ ), # Mixed parameters
523
+ # Invalid cases (should fall back to default)
524
+ ("invalid_header_format", None),
525
+ ("filename=not_a_pdf.txt", "default.pdf"), # Non-PDF falls back to default
526
+ ]
527
+
528
+ for header_value, expected in test_headers:
529
+ with self.subTest(header=header_value):
530
+ with patch("requests.get") as mock_get:
531
+ mock_response = Mock()
532
+ mock_response.raise_for_status = Mock()
533
+ mock_response.iter_content.return_value = [b"data"]
534
+ mock_response.headers = {"Content-Disposition": header_value}
535
+ mock_get.return_value = mock_response
536
+
537
+ with patch("tempfile.NamedTemporaryFile") as mock_tempfile:
538
+ mock_temp_file = Mock()
539
+ mock_temp_file.name = "/tmp/test.pdf"
540
+ mock_temp_file.__enter__ = Mock(return_value=mock_temp_file)
541
+ mock_temp_file.__exit__ = Mock(return_value=None)
542
+ mock_tempfile.return_value = mock_temp_file
543
+
544
+ with patch.object(
545
+ self.downloader,
546
+ "get_default_filename",
547
+ return_value="default.pdf",
548
+ ):
549
+ result = self.downloader.download_pdf_to_temp(
550
+ "https://test.com/paper.pdf", "12345"
551
+ )
552
+
553
+ if expected is None:
554
+ # Should fall back to default
555
+ self.assertEqual(result[1], "default.pdf")
556
+ else:
557
+ self.assertEqual(result[1], expected)
558
+
559
+ def test_process_identifiers_empty_list(self):
560
+ """Test processing empty identifier list."""
561
+ result = self.downloader.process_identifiers([])
562
+
563
+ self.assertEqual(result, {})
564
+
565
+ def test_process_identifiers_duplicate_handling(self):
566
+ """Test processing list with duplicate identifiers."""
567
+ identifiers = ["12345", "67890", "12345"] # Duplicate 12345
568
+
569
+ with patch.object(self.downloader, "download_pdf_to_temp", return_value=None):
570
+ result = self.downloader.process_identifiers(identifiers)
571
+
572
+ # Should only have unique entries
573
+ self.assertEqual(len(result), 2)
574
+ self.assertIn("12345", result)
575
+ self.assertIn("67890", result)
576
+
577
+
578
+ class TestBasePaperDownloaderAbstractMethods(unittest.TestCase):
579
+ """Test abstract method behavior."""
580
+
581
+ def test_abstract_class_cannot_be_instantiated(self):
582
+ """BasePaperDownloader should be abstract (non-instantiable)."""
583
+
584
+ self.assertTrue(inspect.isabstract(BasePaperDownloader))
585
+
586
+ def test_complete_implementation_succeeds(self):
587
+ """Test that complete implementations work."""
588
+ # ConcretePaperDownloader from setUp should work
589
+ config = Mock()
590
+ config.request_timeout = 30
591
+ config.chunk_size = 8192
592
+
593
+ downloader = ConcretePaperDownloader(config)
594
+
595
+ # Should be able to call all methods
596
+ self.assertEqual(downloader.get_service_name(), "TestService")
597
+ self.assertEqual(downloader.get_identifier_name(), "Test ID")
598
+ self.assertEqual(downloader.get_default_filename("test"), "test_test.pdf")