aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  114. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  115. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  116. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  117. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  118. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  122. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  123. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  124. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  125. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  126. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  127. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
  151. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  152. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  153. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
  154. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  156. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  157. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  170. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  174. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  175. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  176. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  177. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  178. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  179. aiagents4pharma/talk2scholars/README.md +1 -0
  180. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  181. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  182. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  183. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  184. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  185. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  186. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  191. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  192. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  194. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  196. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  197. aiagents4pharma/talk2scholars/install.md +122 -0
  198. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  199. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  200. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  201. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  202. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  203. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  205. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  206. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  207. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  208. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  209. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  210. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  212. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  213. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  214. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  215. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  216. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  227. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  228. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  229. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  230. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  234. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  235. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  236. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  242. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  243. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  259. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  260. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  261. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  262. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  263. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  264. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  265. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  266. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  268. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  270. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  271. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  277. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  280. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  281. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
  282. aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
  283. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
  284. aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
  285. aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
  286. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  289. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  290. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -3,11 +3,11 @@ Unit tests for BasePaperDownloader.
3
3
  Tests the abstract base class functionality and common methods.
4
4
  """
5
5
 
6
+ import inspect
6
7
  import unittest
7
- from typing import Any, Dict, Optional, Tuple
8
+ from typing import Any
8
9
  from unittest.mock import Mock, patch
9
10
 
10
- import inspect
11
11
  import requests
12
12
 
13
13
  from aiagents4pharma.talk2scholars.tools.paper_download.utils.base_paper_downloader import (
@@ -31,8 +31,8 @@ class ConcretePaperDownloader(BasePaperDownloader):
31
31
  return f"https://test.com/{identifier}.pdf"
32
32
 
33
33
  def extract_paper_metadata(
34
- self, metadata: Any, identifier: str, pdf_result: Optional[Tuple[str, str]]
35
- ) -> Dict[str, Any]:
34
+ self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
35
+ ) -> dict[str, Any]:
36
36
  """Concrete implementation for testing."""
37
37
  return {
38
38
  "Title": f"Test Paper {identifier}",
@@ -53,21 +53,19 @@ class ConcretePaperDownloader(BasePaperDownloader):
53
53
  """Concrete implementation for testing."""
54
54
  return f"test_{identifier}.pdf"
55
55
 
56
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
56
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
57
57
  """Concrete implementation for testing."""
58
58
  return f" ({paper.get('identifier', 'unknown')})"
59
59
 
60
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
60
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
61
61
  """Concrete implementation for testing."""
62
62
  entry["test_id"] = identifier
63
63
 
64
- def get_paper_identifier_info_public(self, paper: Dict[str, Any]) -> str:
64
+ def get_paper_identifier_info_public(self, paper: dict[str, Any]) -> str:
65
65
  """Public wrapper to access protected identifier info for tests."""
66
66
  return self._get_paper_identifier_info(paper)
67
67
 
68
- def add_service_identifier_public(
69
- self, entry: Dict[str, Any], identifier: str
70
- ) -> None:
68
+ def add_service_identifier_public(self, entry: dict[str, Any], identifier: str) -> None:
71
69
  """Public wrapper to access protected service identifier for tests."""
72
70
  self._add_service_identifier(entry, identifier)
73
71
 
@@ -109,9 +107,7 @@ class TestBasePaperDownloader(unittest.TestCase):
109
107
  mock_response = Mock()
110
108
  mock_response.raise_for_status = Mock()
111
109
  mock_response.iter_content.return_value = [b"PDF chunk 1", b"PDF chunk 2"]
112
- mock_response.headers = {
113
- "Content-Disposition": 'attachment; filename="paper.pdf"'
114
- }
110
+ mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
115
111
  mock_get.return_value = mock_response
116
112
 
117
113
  # Mock temporary file
@@ -121,9 +117,7 @@ class TestBasePaperDownloader(unittest.TestCase):
121
117
  mock_temp_file.__exit__ = Mock(return_value=None)
122
118
  mock_tempfile.return_value = mock_temp_file
123
119
 
124
- result = self.downloader.download_pdf_to_temp(
125
- "https://test.com/paper.pdf", "12345"
126
- )
120
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
127
121
 
128
122
  # Verify result
129
123
  self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
@@ -153,9 +147,7 @@ class TestBasePaperDownloader(unittest.TestCase):
153
147
  """Test PDF download with network error."""
154
148
  mock_get.side_effect = requests.RequestException("Network error")
155
149
 
156
- result = self.downloader.download_pdf_to_temp(
157
- "https://test.com/paper.pdf", "12345"
158
- )
150
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
159
151
 
160
152
  self.assertIsNone(result)
161
153
 
@@ -235,12 +227,8 @@ class TestBasePaperDownloader(unittest.TestCase):
235
227
  raise requests.RequestException("Fetch failed")
236
228
  return {"test": identifier}
237
229
 
238
- with patch.object(
239
- self.downloader, "fetch_metadata", side_effect=mock_fetch_metadata
240
- ):
241
- with patch.object(
242
- self.downloader, "download_pdf_to_temp", return_value=None
243
- ):
230
+ with patch.object(self.downloader, "fetch_metadata", side_effect=mock_fetch_metadata):
231
+ with patch.object(self.downloader, "download_pdf_to_temp", return_value=None):
244
232
  result = self.downloader.process_identifiers(identifiers)
245
233
 
246
234
  # Valid identifier should succeed
@@ -316,11 +304,11 @@ class TestBasePaperDownloader(unittest.TestCase):
316
304
  """Test building summary with long list (should show only top 3)."""
317
305
  article_data = {}
318
306
  for i in range(5): # More than 3
319
- article_data[f"{i+1}"] = {
320
- "Title": f"Paper {i+1}",
321
- "identifier": f"{i+1}",
307
+ article_data[f"{i + 1}"] = {
308
+ "Title": f"Paper {i + 1}",
309
+ "identifier": f"{i + 1}",
322
310
  "access_type": "open_access_downloaded",
323
- "Abstract": f"Abstract {i+1}",
311
+ "Abstract": f"Abstract {i + 1}",
324
312
  }
325
313
 
326
314
  result = self.downloader.build_summary(article_data)
@@ -388,9 +376,7 @@ class TestBasePaperDownloader(unittest.TestCase):
388
376
  BasePaperDownloader.construct_pdf_url(self.downloader, {}, "test")
389
377
 
390
378
  with self.assertRaises(NotImplementedError):
391
- BasePaperDownloader.extract_paper_metadata(
392
- self.downloader, {}, "test", None
393
- )
379
+ BasePaperDownloader.extract_paper_metadata(self.downloader, {}, "test", None)
394
380
 
395
381
  with self.assertRaises(NotImplementedError):
396
382
  BasePaperDownloader.get_service_name(self.downloader)
@@ -402,15 +388,13 @@ class TestBasePaperDownloader(unittest.TestCase):
402
388
  BasePaperDownloader.get_default_filename(self.downloader, "test")
403
389
 
404
390
  # Protected abstract methods: call via getattr to avoid W0212 while still executing code.
391
+ method_name_1 = "_get_paper_identifier_info"
405
392
  with self.assertRaises(NotImplementedError):
406
- getattr(BasePaperDownloader, "_get_paper_identifier_info")(
407
- self.downloader, {}
408
- )
393
+ getattr(BasePaperDownloader, method_name_1)(self.downloader, {})
409
394
 
395
+ method_name_2 = "_add_service_identifier"
410
396
  with self.assertRaises(NotImplementedError):
411
- getattr(BasePaperDownloader, "_add_service_identifier")(
412
- self.downloader, {}, "test"
413
- )
397
+ getattr(BasePaperDownloader, method_name_2)(self.downloader, {}, "test")
414
398
 
415
399
  @patch("tempfile.NamedTemporaryFile")
416
400
  @patch("requests.get")
@@ -420,9 +404,7 @@ class TestBasePaperDownloader(unittest.TestCase):
420
404
  mock_response = Mock()
421
405
  mock_response.raise_for_status = Mock()
422
406
  mock_response.iter_content.return_value = [b"PDF data"]
423
- mock_response.headers = {
424
- "Content-Disposition": 'attachment; filename="paper.pdf"'
425
- }
407
+ mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
426
408
  mock_get.return_value = mock_response
427
409
 
428
410
  # Mock temporary file
@@ -434,9 +416,7 @@ class TestBasePaperDownloader(unittest.TestCase):
434
416
 
435
417
  # Patch re.search to raise an exception during filename extraction
436
418
  with patch("re.search", side_effect=requests.RequestException("Regex error")):
437
- result = self.downloader.download_pdf_to_temp(
438
- "https://test.com/paper.pdf", "12345"
439
- )
419
+ result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
440
420
 
441
421
  # Should still succeed but use default filename due to exception
442
422
  self.assertEqual(result, ("/tmp/test.pdf", "test_12345.pdf"))
@@ -507,9 +487,7 @@ class TestBasePaperDownloaderEdgeCases(unittest.TestCase):
507
487
  mock_temp_file.__exit__ = Mock(return_value=None)
508
488
  mock_tempfile.return_value = mock_temp_file
509
489
 
510
- with patch.object(
511
- self.downloader, "get_default_filename", return_value="default.pdf"
512
- ):
490
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
513
491
  # Call without assigning to avoid 'unused-variable'
514
492
  self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
515
493
 
@@ -122,9 +122,7 @@ class TestBiorxivDownloader(unittest.TestCase):
122
122
  result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
123
123
 
124
124
  # Verify API call
125
- expected_url = (
126
- "https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
127
- )
125
+ expected_url = "https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
128
126
  mock_scraper.get.assert_called_once_with(expected_url, timeout=30)
129
127
  mock_response.raise_for_status.assert_called_once()
130
128
 
@@ -171,9 +169,7 @@ class TestBiorxivDownloader(unittest.TestCase):
171
169
  # Default version
172
170
  meta_default = {"collection": [{"title": "Test Paper"}]}
173
171
  self.assertEqual(
174
- self.downloader.construct_pdf_url(
175
- meta_default, "10.1101/2023.01.01.123456"
176
- ),
172
+ self.downloader.construct_pdf_url(meta_default, "10.1101/2023.01.01.123456"),
177
173
  "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf",
178
174
  )
179
175
 
@@ -195,9 +191,7 @@ class TestBiorxivDownloader(unittest.TestCase):
195
191
  b"PDF content chunk 1",
196
192
  b"PDF content chunk 2",
197
193
  ]
198
- mock_pdf_response.headers = {
199
- "Content-Disposition": 'attachment; filename="paper.pdf"'
200
- }
194
+ mock_pdf_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
201
195
 
202
196
  mock_scraper.get.side_effect = [mock_landing_response, mock_pdf_response]
203
197
 
@@ -209,9 +203,7 @@ class TestBiorxivDownloader(unittest.TestCase):
209
203
  mock_tempfile.return_value = mock_temp_file
210
204
 
211
205
  pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
212
- result = self.downloader.download_pdf_to_temp(
213
- pdf_url, "10.1101/2023.01.01.123456"
214
- )
206
+ result = self.downloader.download_pdf_to_temp(pdf_url, "10.1101/2023.01.01.123456")
215
207
 
216
208
  # Verify result
217
209
  self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
@@ -263,9 +255,7 @@ class TestBiorxivDownloader(unittest.TestCase):
263
255
  mock_scraper.get.return_value = ok
264
256
 
265
257
  # Case 1: with .full.pdf -> should visit landing
266
- pdf_url_full = (
267
- "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
268
- )
258
+ pdf_url_full = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
269
259
  self.downloader.visit_landing_page_public(
270
260
  mock_scraper, pdf_url_full, "10.1101/2023.01.01.123456"
271
261
  )
@@ -324,9 +314,7 @@ class TestBiorxivDownloader(unittest.TestCase):
324
314
  ), # trigger exception path
325
315
  ]
326
316
  for headers, expected, raise_regex in cases:
327
- with self.subTest(
328
- headers=headers, expected=expected, raise_regex=raise_regex
329
- ):
317
+ with self.subTest(headers=headers, expected=expected, raise_regex=raise_regex):
330
318
  resp = Mock()
331
319
  resp.headers = headers
332
320
  if raise_regex:
@@ -339,18 +327,14 @@ class TestBiorxivDownloader(unittest.TestCase):
339
327
  "get_default_filename",
340
328
  return_value="default.pdf",
341
329
  ):
342
- got = self.downloader.extract_filename_public(
343
- resp, "10.1101/test"
344
- )
330
+ got = self.downloader.extract_filename_public(resp, "10.1101/test")
345
331
  else:
346
332
  with patch.object(
347
333
  self.downloader,
348
334
  "get_default_filename",
349
335
  return_value="default.pdf",
350
336
  ):
351
- got = self.downloader.extract_filename_public(
352
- resp, "10.1101/test"
353
- )
337
+ got = self.downloader.extract_filename_public(resp, "10.1101/test")
354
338
  self.assertEqual(got, expected)
355
339
 
356
340
  def test_extract_paper_metadata_success(self):
@@ -396,18 +380,14 @@ class TestBiorxivDownloader(unittest.TestCase):
396
380
  self.assertEqual(result["URL"], "")
397
381
  self.assertEqual(result["pdf_url"], "")
398
382
  self.assertEqual(result["temp_file_path"], "")
399
- self.assertEqual(
400
- result["filename"], "10_1101_2023_01_01_123456.pdf"
401
- ) # Default filename
383
+ self.assertEqual(result["filename"], "10_1101_2023_01_01_123456.pdf") # Default filename
402
384
 
403
385
  def test_extract_paper_metadata_no_collection(self):
404
386
  """Test metadata extraction with missing collection."""
405
387
  metadata = {}
406
388
 
407
389
  with self.assertRaises(RuntimeError) as context:
408
- self.downloader.extract_paper_metadata(
409
- metadata, "10.1101/2023.01.01.123456", None
410
- )
390
+ self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
411
391
 
412
392
  self.assertIn("No collection data found", str(context.exception))
413
393
 
@@ -415,9 +395,7 @@ class TestBiorxivDownloader(unittest.TestCase):
415
395
  """Test basic metadata extraction helper method."""
416
396
  paper = self.sample_json_response["collection"][0]
417
397
 
418
- result = self.downloader.extract_basic_metadata_public(
419
- paper, "10.1101/2023.01.01.123456"
420
- )
398
+ result = self.downloader.extract_basic_metadata_public(paper, "10.1101/2023.01.01.123456")
421
399
 
422
400
  expected = {
423
401
  "Title": "Test BioRxiv Paper",
@@ -468,9 +446,7 @@ class TestBiorxivDownloader(unittest.TestCase):
468
446
  """Test _add_service_identifier method."""
469
447
  entry = {}
470
448
 
471
- self.downloader.add_service_identifier_public(
472
- entry, "10.1101/2023.01.01.123456"
473
- )
449
+ self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
474
450
 
475
451
  self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
476
452
  self.assertEqual(entry["server"], "biorxiv")
@@ -561,9 +537,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
561
537
  pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
562
538
 
563
539
  # Step 4: Extract metadata
564
- paper_data = self.downloader.extract_paper_metadata(
565
- metadata, identifier, pdf_result
566
- )
540
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
567
541
 
568
542
  # Verify the complete workflow
569
543
  self.assertEqual(paper_data["Title"], "Integration Test Paper")
@@ -571,9 +545,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
571
545
  self.assertEqual(paper_data["access_type"], "open_access_downloaded")
572
546
  self.assertEqual(paper_data["temp_file_path"], "/tmp/integration.pdf")
573
547
 
574
- expected_pdf_url = (
575
- "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
576
- )
548
+ expected_pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
577
549
  self.assertEqual(pdf_url, expected_pdf_url)
578
550
 
579
551
  # Verify 3 calls: metadata, landing page, PDF
@@ -89,9 +89,7 @@ class TestMedrxivDownloader(unittest.TestCase):
89
89
  result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
90
90
 
91
91
  # Verify API call - should include /medrxiv/ and /na/json
92
- expected_url = (
93
- "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
94
- )
92
+ expected_url = "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
95
93
  mock_get.assert_called_once_with(expected_url, timeout=30)
96
94
  mock_response.raise_for_status.assert_called_once()
97
95
 
@@ -133,9 +131,7 @@ class TestMedrxivDownloader(unittest.TestCase):
133
131
  )
134
132
  # Empty collection
135
133
  self.assertEqual(
136
- self.downloader.construct_pdf_url(
137
- {"collection": []}, "10.1101/2023.01.01.123456"
138
- ),
134
+ self.downloader.construct_pdf_url({"collection": []}, "10.1101/2023.01.01.123456"),
139
135
  "",
140
136
  )
141
137
  # Custom version
@@ -179,9 +175,7 @@ class TestMedrxivDownloader(unittest.TestCase):
179
175
  """Test metadata extraction without PDF download."""
180
176
  metadata = self.sample_json_response
181
177
 
182
- with patch.object(
183
- self.downloader, "get_default_filename", return_value="default.pdf"
184
- ):
178
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
185
179
  result = self.downloader.extract_paper_metadata(
186
180
  metadata, "10.1101/2023.01.01.123456", None
187
181
  )
@@ -196,9 +190,7 @@ class TestMedrxivDownloader(unittest.TestCase):
196
190
  metadata = {}
197
191
 
198
192
  with self.assertRaises(RuntimeError) as context:
199
- self.downloader.extract_paper_metadata(
200
- metadata, "10.1101/2023.01.01.123456", None
201
- )
193
+ self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
202
194
 
203
195
  self.assertIn("No collection data found", str(context.exception))
204
196
 
@@ -224,9 +216,7 @@ class TestMedrxivDownloader(unittest.TestCase):
224
216
 
225
217
  # Missing fields
226
218
  paper_missing = {"title": "Test Paper"} # Missing others
227
- got_missing = self.downloader.extract_basic_metadata_public(
228
- paper_missing, "10.1101/test"
229
- )
219
+ got_missing = self.downloader.extract_basic_metadata_public(paper_missing, "10.1101/test")
230
220
  self.assertEqual(got_missing["Title"], "Test Paper")
231
221
  self.assertEqual(got_missing["Authors"], [])
232
222
  self.assertEqual(got_missing["Abstract"], "N/A")
@@ -261,9 +251,7 @@ class TestMedrxivDownloader(unittest.TestCase):
261
251
  )
262
252
 
263
253
  # Without result
264
- with patch.object(
265
- self.downloader, "get_default_filename", return_value="default.pdf"
266
- ):
254
+ with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
267
255
  expected_without = {
268
256
  "URL": "",
269
257
  "pdf_url": "",
@@ -302,9 +290,7 @@ class TestMedrxivDownloader(unittest.TestCase):
302
290
  def test_add_service_identifier(self):
303
291
  """Test _add_service_identifier method."""
304
292
  entry = {}
305
- self.downloader.add_service_identifier_public(
306
- entry, "10.1101/2023.01.01.123456"
307
- )
293
+ self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
308
294
  self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
309
295
  self.assertEqual(entry["server"], "medrxiv")
310
296
 
@@ -368,9 +354,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
368
354
  pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
369
355
 
370
356
  # Step 4: Extract metadata
371
- paper_data = self.downloader.extract_paper_metadata(
372
- metadata, identifier, pdf_result
373
- )
357
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
374
358
 
375
359
  # Verify the complete workflow
376
360
  self.assertEqual(paper_data["Title"], "Integration Test Paper")
@@ -384,9 +368,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
384
368
  "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json",
385
369
  timeout=30,
386
370
  )
387
- expected_pdf_url = (
388
- "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
389
- )
371
+ expected_pdf_url = "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
390
372
  mock_download.assert_called_once_with(expected_pdf_url, identifier)
391
373
 
392
374
  @patch("requests.get")
@@ -413,25 +395,15 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
413
395
  with self.assertRaises(RuntimeError) as context:
414
396
  self.downloader.fetch_metadata(identifier)
415
397
 
416
- self.assertIn(
417
- "No collection data found in medRxiv API response", str(context.exception)
418
- )
398
+ self.assertIn("No collection data found in medRxiv API response", str(context.exception))
419
399
 
420
400
  @patch("requests.get")
421
401
  def test_multiple_identifiers_workflow(self, mock_get):
422
402
  """Test processing multiple identifiers."""
423
403
  # Mock different responses for different DOIs
424
404
  responses = [
425
- {
426
- "collection": [
427
- {"title": "Paper 1", "version": "1", "authors": "Author 1"}
428
- ]
429
- },
430
- {
431
- "collection": [
432
- {"title": "Paper 2", "version": "2", "authors": "Author 2"}
433
- ]
434
- },
405
+ {"collection": [{"title": "Paper 1", "version": "1", "authors": "Author 1"}]},
406
+ {"collection": [{"title": "Paper 2", "version": "2", "authors": "Author 2"}]},
435
407
  ]
436
408
 
437
409
  mock_responses = []
@@ -448,12 +420,8 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
448
420
 
449
421
  for identifier in identifiers:
450
422
  metadata = self.downloader.fetch_metadata(identifier)
451
- _ = self.downloader.construct_pdf_url(
452
- metadata, identifier
453
- ) # ensure path covered
454
- paper_data = self.downloader.extract_paper_metadata(
455
- metadata, identifier, None
456
- )
423
+ _ = self.downloader.construct_pdf_url(metadata, identifier) # ensure path covered
424
+ paper_data = self.downloader.extract_paper_metadata(metadata, identifier, None)
457
425
  results[identifier] = paper_data
458
426
 
459
427
  # Verify both papers were processed
@@ -529,6 +497,4 @@ class TestMedrxivSpecialCases(unittest.TestCase):
529
497
  # Should handle Unicode properly
530
498
  self.assertEqual(result["Title"], "Título com acentos é símbolos especiais")
531
499
  self.assertEqual(result["Authors"], ["José María", "François Müller"])
532
- self.assertEqual(
533
- result["Abstract"], "Resumo com çaracteres especiais ñ símbolos"
534
- )
500
+ self.assertEqual(result["Abstract"], "Resumo com çaracteres especiais ñ símbolos")
@@ -19,7 +19,7 @@ def fixture_chunks():
19
19
  return [
20
20
  Document(
21
21
  page_content=f"chunk {i}",
22
- metadata={"paper_id": f"P{i%2}", "relevance_score": 0.9 - 0.01 * i},
22
+ metadata={"paper_id": f"P{i % 2}", "relevance_score": 0.9 - 0.01 * i},
23
23
  )
24
24
  for i in range(10)
25
25
  ]
@@ -27,9 +27,7 @@ def fixture_chunks():
27
27
 
28
28
  def test_rerank_chunks_short_input(chunks_fixture):
29
29
  """rerank_chunks with fewer chunks than top_k should return original."""
30
- result = rerank_chunks(
31
- chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5
32
- )
30
+ result = rerank_chunks(chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5)
33
31
  assert result == chunks_fixture[:3]
34
32
 
35
33
 
@@ -65,9 +63,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
65
63
  mock_config.reranker.api_key = "test_key"
66
64
  mock_config.reranker.model = "test_model"
67
65
 
68
- result = rerank_chunks(
69
- chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5
70
- )
66
+ result = rerank_chunks(chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5)
71
67
 
72
68
  assert isinstance(result, list)
73
69
  assert result == list(reversed(chunks_fixture))[:5]
@@ -77,9 +73,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
77
73
 
78
74
 
79
75
  @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
80
- def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
81
- mock_reranker_cls, chunks_fixture
82
- ):
76
+ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(mock_reranker_cls, chunks_fixture):
83
77
  """
84
78
  If NVIDIARerank.compress_documents raises RuntimeError:
85
79
  - rerank_chunks should propagate the RuntimeError
@@ -94,9 +88,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
94
88
  mock_config.reranker.model = "reranker"
95
89
 
96
90
  with pytest.raises(RuntimeError, match="API failure"):
97
- rerank_chunks(
98
- chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3
99
- )
91
+ rerank_chunks(chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3)
100
92
 
101
93
  reranker_instance.compress_documents.assert_called_once_with(
102
94
  query="How does light affect plants?", documents=chunks_fixture
@@ -105,9 +97,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
105
97
 
106
98
  @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.logger")
107
99
  @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
108
- def test_rerank_chunks_debug_block_triggered(
109
- mock_reranker_cls, mock_logger, chunks_fixture
110
- ):
100
+ def test_rerank_chunks_debug_block_triggered(mock_reranker_cls, mock_logger, chunks_fixture):
111
101
  """rerank_chunks should log debug info if debug logging is enabled."""
112
102
  mock_logger.isEnabledFor.return_value = True
113
103
 
@@ -1,6 +1,7 @@
1
1
  """answer_formatter tests."""
2
2
 
3
3
  from unittest.mock import patch
4
+
4
5
  import pytest
5
6
 
6
7
  from aiagents4pharma.talk2scholars.tools.pdf.utils.answer_formatter import format_answer
@@ -1,6 +1,7 @@
1
1
  """Tests for the PDF batch processor module."""
2
2
 
3
3
  from unittest.mock import MagicMock, patch
4
+
4
5
  import pytest
5
6
 
6
7
  from aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor import (
@@ -22,18 +23,14 @@ def _args_fixture():
22
23
  }
23
24
 
24
25
 
25
- @patch(
26
- "aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
27
- )
26
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
28
27
  def test_no_papers_to_add(mock_loader, args_fixture):
29
28
  """Test case where no papers are provided to add."""
30
29
  add_papers_batch(papers_to_add=[], **args_fixture)
31
30
  mock_loader.assert_not_called()
32
31
 
33
32
 
34
- @patch(
35
- "aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
36
- )
33
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
37
34
  def test_all_papers_already_loaded(mock_loader, args_fixture):
38
35
  """Test case where all papers are already loaded."""
39
36
  args_fixture["loaded_papers"].update(["p1", "p2"])
@@ -44,9 +41,7 @@ def test_all_papers_already_loaded(mock_loader, args_fixture):
44
41
  mock_loader.assert_not_called()
45
42
 
46
43
 
47
- @patch(
48
- "aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
49
- )
44
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
50
45
  def test_successful_batch_embedding(mock_loader, args_fixture):
51
46
  """Test case where papers are successfully loaded and embedded."""
52
47
  mock_loader.return_value = [
@@ -70,9 +65,7 @@ def test_successful_batch_embedding(mock_loader, args_fixture):
70
65
  mock_collection.flush.assert_called()
71
66
 
72
67
 
73
- @patch(
74
- "aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
75
- )
68
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
76
69
  def test_empty_chunks_after_loading(mock_loader, args_fixture):
77
70
  """Test case where no chunks are returned after loading PDF."""
78
71
  mock_loader.return_value = []
@@ -82,9 +75,7 @@ def test_empty_chunks_after_loading(mock_loader, args_fixture):
82
75
  args_fixture["vector_store"].add_documents.assert_not_called()
83
76
 
84
77
 
85
- @patch(
86
- "aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
87
- )
78
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
88
79
  def test_vector_store_insert_failure(mock_loader, args_fixture):
89
80
  """Test case where vector store insertion fails."""
90
81
  mock_loader.return_value = [MagicMock(page_content="page")]