aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  109. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  114. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  115. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  116. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  117. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  118. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  122. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  123. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  124. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  125. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  126. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  127. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
  150. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  151. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  152. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
  153. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  154. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  156. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  157. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  170. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  174. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  175. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  176. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  177. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  178. aiagents4pharma/talk2scholars/README.md +1 -0
  179. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  180. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  181. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  182. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  183. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  184. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  185. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  186. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  191. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  192. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  194. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  196. aiagents4pharma/talk2scholars/install.md +122 -0
  197. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  198. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  199. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  200. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  201. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  202. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  203. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  205. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  206. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  207. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  208. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  209. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  210. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  212. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  213. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  214. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  215. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  216. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  227. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  228. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  229. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  230. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  234. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  235. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  236. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  242. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  243. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  259. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  260. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  261. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  262. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  263. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  264. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  265. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  266. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  268. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  270. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  271. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  277. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  280. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
  281. aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
  282. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
  283. aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
  284. aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
  285. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  286. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  289. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -5,11 +5,13 @@ biorxiv and medrxiv.
5
5
  """
6
6
 
7
7
  # Import modules
8
- from . import arxiv_downloader
9
- from . import base_paper_downloader
10
- from . import biorxiv_downloader
11
- from . import medrxiv_downloader
12
- from . import pubmed_downloader
8
+ from . import (
9
+ arxiv_downloader,
10
+ base_paper_downloader,
11
+ biorxiv_downloader,
12
+ medrxiv_downloader,
13
+ pubmed_downloader,
14
+ )
13
15
 
14
16
  __all__ = [
15
17
  "arxiv_downloader",
@@ -5,7 +5,7 @@ ArXiv paper downloader implementation.
5
5
 
6
6
  import logging
7
7
  import xml.etree.ElementTree as ET
8
- from typing import Any, Dict, Optional, Tuple
8
+ from typing import Any
9
9
 
10
10
  import requests
11
11
 
@@ -92,8 +92,8 @@ class ArxivDownloader(BasePaperDownloader):
92
92
  self,
93
93
  metadata: ET.Element,
94
94
  identifier: str,
95
- pdf_result: Optional[Tuple[str, str]],
96
- ) -> Dict[str, Any]:
95
+ pdf_result: tuple[str, str] | None,
96
+ ) -> dict[str, Any]:
97
97
  """
98
98
  Extract structured metadata from arXiv API response.
99
99
 
@@ -124,7 +124,7 @@ class ArxivDownloader(BasePaperDownloader):
124
124
  "arxiv_id": identifier,
125
125
  }
126
126
 
127
- def _extract_basic_metadata(self, entry: ET.Element, ns: dict) -> Dict[str, Any]:
127
+ def _extract_basic_metadata(self, entry: ET.Element, ns: dict) -> dict[str, Any]:
128
128
  """Extract basic metadata (title, authors, abstract, date) from entry."""
129
129
  title = self._extract_title(entry, ns)
130
130
  authors = self._extract_authors(entry, ns)
@@ -160,13 +160,11 @@ class ArxivDownloader(BasePaperDownloader):
160
160
  def _extract_publication_date(self, entry: ET.Element, ns: dict) -> str:
161
161
  """Extract publication date from entry."""
162
162
  published_elem = entry.find("atom:published", ns)
163
- return (
164
- (published_elem.text or "").strip() if published_elem is not None else "N/A"
165
- )
163
+ return (published_elem.text or "").strip() if published_elem is not None else "N/A"
166
164
 
167
165
  def _extract_pdf_metadata(
168
- self, pdf_result: Optional[Tuple[str, str]], identifier: str
169
- ) -> Dict[str, Any]:
166
+ self, pdf_result: tuple[str, str] | None, identifier: str
167
+ ) -> dict[str, Any]:
170
168
  """Extract PDF-related metadata."""
171
169
  if pdf_result:
172
170
  temp_file_path, filename = pdf_result
@@ -198,12 +196,12 @@ class ArxivDownloader(BasePaperDownloader):
198
196
  """Generate default filename for arXiv paper."""
199
197
  return f"{identifier}.pdf"
200
198
 
201
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
199
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
202
200
  """Get arXiv-specific identifier info for paper summary."""
203
201
  arxiv_id = paper.get("arxiv_id", "N/A")
204
202
  pub_date = paper.get("Publication Date", "N/A")
205
203
  return f" (arXiv:{arxiv_id}, {pub_date})"
206
204
 
207
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
205
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
208
206
  """Add arXiv ID field to entry."""
209
207
  entry["arxiv_id"] = identifier
@@ -8,7 +8,7 @@ import logging
8
8
  import re
9
9
  import tempfile
10
10
  from abc import ABC, abstractmethod
11
- from typing import Any, Dict, List, Optional, Tuple
11
+ from typing import Any
12
12
 
13
13
  import requests
14
14
 
@@ -58,8 +58,8 @@ class BasePaperDownloader(ABC):
58
58
 
59
59
  @abstractmethod
60
60
  def extract_paper_metadata(
61
- self, metadata: Any, identifier: str, pdf_result: Optional[Tuple[str, str]]
62
- ) -> Dict[str, Any]:
61
+ self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
62
+ ) -> dict[str, Any]:
63
63
  """
64
64
  Extract and structure metadata into standardized format.
65
65
 
@@ -89,9 +89,7 @@ class BasePaperDownloader(ABC):
89
89
  raise NotImplementedError
90
90
 
91
91
  # Common methods shared by all services
92
- def download_pdf_to_temp(
93
- self, pdf_url: str, identifier: str
94
- ) -> Optional[Tuple[str, str]]:
92
+ def download_pdf_to_temp(self, pdf_url: str, identifier: str) -> tuple[str, str] | None:
95
93
  """
96
94
  Download PDF from URL to a temporary file.
97
95
 
@@ -103,9 +101,7 @@ class BasePaperDownloader(ABC):
103
101
  Tuple of (temp_file_path, filename) or None if failed
104
102
  """
105
103
  if not pdf_url:
106
- logger.info(
107
- "No PDF URL available for %s %s", self.get_identifier_name(), identifier
108
- )
104
+ logger.info("No PDF URL available for %s %s", self.get_identifier_name(), identifier)
109
105
  return None
110
106
 
111
107
  try:
@@ -141,14 +137,11 @@ class BasePaperDownloader(ABC):
141
137
 
142
138
  if "filename=" in content_disposition:
143
139
  try:
144
-
145
140
  filename_match = re.search(
146
141
  r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition
147
142
  )
148
143
  if filename_match:
149
- extracted_filename = filename_match.group(
150
- 1
151
- ) or filename_match.group(2)
144
+ extracted_filename = filename_match.group(1) or filename_match.group(2)
152
145
  extracted_filename = extracted_filename.strip().strip('"')
153
146
  if extracted_filename and extracted_filename.endswith(".pdf"):
154
147
  filename = extracted_filename
@@ -189,7 +182,7 @@ class BasePaperDownloader(ABC):
189
182
 
190
183
  return snippet
191
184
 
192
- def create_error_entry(self, identifier: str, error_msg: str) -> Dict[str, Any]:
185
+ def create_error_entry(self, identifier: str, error_msg: str) -> dict[str, Any]:
193
186
  """
194
187
  Create standardized error entry for failed paper processing.
195
188
 
@@ -215,7 +208,7 @@ class BasePaperDownloader(ABC):
215
208
  # Service-specific identifier field will be added by subclasses
216
209
  }
217
210
 
218
- def build_summary(self, article_data: Dict[str, Any]) -> str:
211
+ def build_summary(self, article_data: dict[str, Any]) -> str:
219
212
  """
220
213
  Build a summary string for up to three papers with snippets.
221
214
 
@@ -226,7 +219,7 @@ class BasePaperDownloader(ABC):
226
219
  Formatted summary string
227
220
  """
228
221
  top = list(article_data.values())[:3]
229
- lines: List[str] = []
222
+ lines: list[str] = []
230
223
  downloaded_count = sum(
231
224
  1
232
225
  for paper in article_data.values()
@@ -240,7 +233,7 @@ class BasePaperDownloader(ABC):
240
233
  snippet = self.get_snippet(paper.get("Abstract", ""))
241
234
 
242
235
  # Build paper line with service-specific identifier info
243
- line = f"{idx+1}. {title}"
236
+ line = f"{idx + 1}. {title}"
244
237
  line += self._get_paper_identifier_info(paper)
245
238
  line += f"\n Access: {access_type}"
246
239
 
@@ -264,7 +257,7 @@ class BasePaperDownloader(ABC):
264
257
  )
265
258
 
266
259
  @abstractmethod
267
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
260
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
268
261
  """
269
262
  Get service-specific identifier info for paper summary.
270
263
 
@@ -276,7 +269,7 @@ class BasePaperDownloader(ABC):
276
269
  """
277
270
  raise NotImplementedError
278
271
 
279
- def process_identifiers(self, identifiers: List[str]) -> Dict[str, Any]:
272
+ def process_identifiers(self, identifiers: list[str]) -> dict[str, Any]:
280
273
  """
281
274
  Main processing loop for downloading papers.
282
275
 
@@ -293,7 +286,7 @@ class BasePaperDownloader(ABC):
293
286
  identifiers,
294
287
  )
295
288
 
296
- article_data: Dict[str, Any] = {}
289
+ article_data: dict[str, Any] = {}
297
290
 
298
291
  for identifier in identifiers:
299
292
  logger.info("Processing %s: %s", self.get_identifier_name(), identifier)
@@ -332,7 +325,7 @@ class BasePaperDownloader(ABC):
332
325
  return article_data
333
326
 
334
327
  @abstractmethod
335
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
328
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
336
329
  """
337
330
  Add service-specific identifier field to entry.
338
331
 
@@ -6,7 +6,7 @@ BioRxiv paper downloader implementation.
6
6
  import logging
7
7
  import re
8
8
  import tempfile
9
- from typing import Any, Dict, Optional, Tuple
9
+ from typing import Any
10
10
 
11
11
  import cloudscraper
12
12
  import requests
@@ -43,9 +43,7 @@ class BiorxivDownloader(BasePaperDownloader):
43
43
  # CloudScraper specific settings
44
44
  self.cf_clearance_timeout = getattr(config, "cf_clearance_timeout", 30)
45
45
  self.session_reuse = getattr(config, "session_reuse", True)
46
- self.browser_config_type = getattr(config, "browser_config", {}).get(
47
- "type", "custom"
48
- )
46
+ self.browser_config_type = getattr(config, "browser_config", {}).get("type", "custom")
49
47
 
50
48
  # Initialize shared CloudScraper session if enabled
51
49
  self._scraper = None
@@ -55,7 +53,7 @@ class BiorxivDownloader(BasePaperDownloader):
55
53
  delay=self.cf_clearance_timeout,
56
54
  )
57
55
 
58
- def fetch_metadata(self, identifier: str) -> Dict[str, Any]:
56
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
59
57
  """
60
58
  Fetch paper metadata from bioRxiv API.
61
59
 
@@ -88,7 +86,7 @@ class BiorxivDownloader(BasePaperDownloader):
88
86
 
89
87
  return paper_data
90
88
 
91
- def construct_pdf_url(self, metadata: Dict[str, Any], identifier: str) -> str:
89
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
92
90
  """
93
91
  Construct PDF URL from bioRxiv metadata and DOI.
94
92
 
@@ -111,9 +109,7 @@ class BiorxivDownloader(BasePaperDownloader):
111
109
 
112
110
  return pdf_url
113
111
 
114
- def download_pdf_to_temp(
115
- self, pdf_url: str, identifier: str
116
- ) -> Optional[Tuple[str, str]]:
112
+ def download_pdf_to_temp(self, pdf_url: str, identifier: str) -> tuple[str, str] | None:
117
113
  """
118
114
  Override base method to use CloudScraper for bioRxiv PDF downloads.
119
115
  Includes landing page visit to handle CloudFlare protection.
@@ -188,9 +184,7 @@ class BiorxivDownloader(BasePaperDownloader):
188
184
  r'filename[*]?=(?:"([^"]+)"|([^;]+))', content_disposition
189
185
  )
190
186
  if filename_match:
191
- extracted_filename = filename_match.group(
192
- 1
193
- ) or filename_match.group(2)
187
+ extracted_filename = filename_match.group(1) or filename_match.group(2)
194
188
  extracted_filename = extracted_filename.strip().strip('"')
195
189
  if extracted_filename and extracted_filename.endswith(".pdf"):
196
190
  filename = extracted_filename
@@ -202,10 +196,10 @@ class BiorxivDownloader(BasePaperDownloader):
202
196
 
203
197
  def extract_paper_metadata(
204
198
  self,
205
- metadata: Dict[str, Any],
199
+ metadata: dict[str, Any],
206
200
  identifier: str,
207
- pdf_result: Optional[Tuple[str, str]],
208
- ) -> Dict[str, Any]:
201
+ pdf_result: tuple[str, str] | None,
202
+ ) -> dict[str, Any]:
209
203
  """
210
204
  Extract structured metadata from bioRxiv API response.
211
205
 
@@ -234,9 +228,7 @@ class BiorxivDownloader(BasePaperDownloader):
234
228
  **pdf_metadata,
235
229
  }
236
230
 
237
- def _extract_basic_metadata(
238
- self, paper: Dict[str, Any], identifier: str
239
- ) -> Dict[str, Any]:
231
+ def _extract_basic_metadata(self, paper: dict[str, Any], identifier: str) -> dict[str, Any]:
240
232
  """Extract basic metadata from paper data."""
241
233
  # Extract basic fields
242
234
  title = paper.get("title", "N/A").strip()
@@ -267,8 +259,8 @@ class BiorxivDownloader(BasePaperDownloader):
267
259
  return [author.strip() for author in authors_str.split(";") if author.strip()]
268
260
 
269
261
  def _extract_pdf_metadata(
270
- self, pdf_result: Optional[Tuple[str, str]], identifier: str
271
- ) -> Dict[str, Any]:
262
+ self, pdf_result: tuple[str, str] | None, identifier: str
263
+ ) -> dict[str, Any]:
272
264
  """Extract PDF-related metadata."""
273
265
  if pdf_result:
274
266
  temp_file_path, filename = pdf_result
@@ -301,7 +293,7 @@ class BiorxivDownloader(BasePaperDownloader):
301
293
  # Sanitize DOI for filename use
302
294
  return f"{identifier.replace('/', '_').replace('.', '_')}.pdf"
303
295
 
304
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
296
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
305
297
  """Get bioRxiv-specific identifier info for paper summary."""
306
298
  doi = paper.get("DOI", "N/A")
307
299
  pub_date = paper.get("Publication Date", "N/A")
@@ -313,7 +305,7 @@ class BiorxivDownloader(BasePaperDownloader):
313
305
 
314
306
  return info
315
307
 
316
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
308
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
317
309
  """Add DOI and bioRxiv-specific fields to entry."""
318
310
  entry["DOI"] = identifier
319
311
  entry["Category"] = "N/A"
@@ -4,7 +4,7 @@ MedRxiv paper downloader implementation.
4
4
  """
5
5
 
6
6
  import logging
7
- from typing import Any, Dict, Optional, Tuple
7
+ from typing import Any
8
8
 
9
9
  import requests
10
10
 
@@ -27,7 +27,7 @@ class MedrxivDownloader(BasePaperDownloader):
27
27
  )
28
28
  self.default_version = getattr(config, "default_version", "1")
29
29
 
30
- def fetch_metadata(self, identifier: str) -> Dict[str, Any]:
30
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
31
31
  """
32
32
  Fetch paper metadata from medRxiv API.
33
33
 
@@ -54,7 +54,7 @@ class MedrxivDownloader(BasePaperDownloader):
54
54
 
55
55
  return paper_data
56
56
 
57
- def construct_pdf_url(self, metadata: Dict[str, Any], identifier: str) -> str:
57
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
58
58
  """
59
59
  Construct PDF URL from medRxiv metadata and DOI.
60
60
 
@@ -79,10 +79,10 @@ class MedrxivDownloader(BasePaperDownloader):
79
79
 
80
80
  def extract_paper_metadata(
81
81
  self,
82
- metadata: Dict[str, Any],
82
+ metadata: dict[str, Any],
83
83
  identifier: str,
84
- pdf_result: Optional[Tuple[str, str]],
85
- ) -> Dict[str, Any]:
84
+ pdf_result: tuple[str, str] | None,
85
+ ) -> dict[str, Any]:
86
86
  """
87
87
  Extract structured metadata from medRxiv API response.
88
88
 
@@ -111,9 +111,7 @@ class MedrxivDownloader(BasePaperDownloader):
111
111
  **pdf_metadata,
112
112
  }
113
113
 
114
- def _extract_basic_metadata(
115
- self, paper: Dict[str, Any], identifier: str
116
- ) -> Dict[str, Any]:
114
+ def _extract_basic_metadata(self, paper: dict[str, Any], identifier: str) -> dict[str, Any]:
117
115
  """Extract basic metadata from paper data."""
118
116
  # Extract basic fields
119
117
  title = paper.get("title", "N/A").strip()
@@ -144,8 +142,8 @@ class MedrxivDownloader(BasePaperDownloader):
144
142
  return [author.strip() for author in authors_str.split(";") if author.strip()]
145
143
 
146
144
  def _extract_pdf_metadata(
147
- self, pdf_result: Optional[Tuple[str, str]], identifier: str
148
- ) -> Dict[str, Any]:
145
+ self, pdf_result: tuple[str, str] | None, identifier: str
146
+ ) -> dict[str, Any]:
149
147
  """Extract PDF-related metadata."""
150
148
  if pdf_result:
151
149
  temp_file_path, filename = pdf_result
@@ -178,7 +176,7 @@ class MedrxivDownloader(BasePaperDownloader):
178
176
  # Sanitize DOI for filename use
179
177
  return f"{identifier.replace('/', '_').replace('.', '_')}.pdf"
180
178
 
181
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
179
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
182
180
  """Get medRxiv-specific identifier info for paper summary."""
183
181
  doi = paper.get("DOI", "N/A")
184
182
  pub_date = paper.get("Publication Date", "N/A")
@@ -190,7 +188,7 @@ class MedrxivDownloader(BasePaperDownloader):
190
188
 
191
189
  return info
192
190
 
193
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
191
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
194
192
  """Add DOI and medRxiv-specific fields to entry."""
195
193
  entry["DOI"] = identifier
196
194
  entry["Category"] = "N/A"
@@ -5,7 +5,7 @@ PubMed paper downloader implementation.
5
5
 
6
6
  import logging
7
7
  import xml.etree.ElementTree as ET
8
- from typing import Any, Dict, Optional, Tuple, cast
8
+ from typing import Any, cast
9
9
 
10
10
  import requests
11
11
  from bs4 import BeautifulSoup, Tag
@@ -37,7 +37,7 @@ class PubmedDownloader(BasePaperDownloader):
37
37
  self.pdf_meta_name = getattr(config, "pdf_meta_name", "citation_pdf_url")
38
38
  self.default_error_code = getattr(config, "default_error_code", "unknown")
39
39
 
40
- def fetch_metadata(self, identifier: str) -> Dict[str, Any]:
40
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
41
41
  """
42
42
  Fetch paper metadata from PubMed ID Converter API.
43
43
 
@@ -52,9 +52,7 @@ class PubmedDownloader(BasePaperDownloader):
52
52
  RuntimeError: If no records found in response
53
53
  """
54
54
  query_url = f"{self.id_converter_url}?ids={identifier}&format={self.id_converter_format}"
55
- logger.info(
56
- "Fetching metadata from ID converter for PMID %s: %s", identifier, query_url
57
- )
55
+ logger.info("Fetching metadata from ID converter for PMID %s: %s", identifier, query_url)
58
56
 
59
57
  response = requests.get(query_url, timeout=self.request_timeout)
60
58
  response.raise_for_status()
@@ -67,7 +65,7 @@ class PubmedDownloader(BasePaperDownloader):
67
65
 
68
66
  return result
69
67
 
70
- def construct_pdf_url(self, metadata: Dict[str, Any], identifier: str) -> str:
68
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
71
69
  """
72
70
  Construct PDF URL using multiple fallback strategies.
73
71
 
@@ -145,18 +143,14 @@ class PubmedDownloader(BasePaperDownloader):
145
143
  if error_elem is not None:
146
144
  error_code = error_elem.get("code", self.default_error_code)
147
145
  error_text = error_elem.text or "unknown error"
148
- logger.info(
149
- "OA API error for PMCID %s: %s - %s", pmcid, error_code, error_text
150
- )
146
+ logger.info("OA API error for PMCID %s: %s - %s", pmcid, error_code, error_text)
151
147
  return ""
152
148
 
153
149
  # Look for PDF link
154
150
  pdf_link = root.find(".//link[@format='pdf']")
155
151
  if pdf_link is not None:
156
152
  pdf_url = pdf_link.get("href", "")
157
- logger.info(
158
- "Found PDF URL from OA API for PMCID %s: %s", pmcid, pdf_url
159
- )
153
+ logger.info("Found PDF URL from OA API for PMCID %s: %s", pmcid, pdf_url)
160
154
 
161
155
  # Convert FTP links to HTTPS for download compatibility
162
156
  if pdf_url.startswith(self.ftp_base_url):
@@ -188,15 +182,11 @@ class PubmedDownloader(BasePaperDownloader):
188
182
  def _try_pmc_page_scraping(self, pmcid: str) -> str:
189
183
  """Try scraping PMC page for PDF meta tag."""
190
184
  pmc_page_url = f"{self.pmc_page_base_url}/{pmcid}/"
191
- logger.info(
192
- "Scraping PMC page for PDF meta tag for %s: %s", pmcid, pmc_page_url
193
- )
185
+ logger.info("Scraping PMC page for PDF meta tag for %s: %s", pmcid, pmc_page_url)
194
186
 
195
187
  try:
196
188
  headers = {"User-Agent": self.user_agent}
197
- response = requests.get(
198
- pmc_page_url, headers=headers, timeout=self.request_timeout
199
- )
189
+ response = requests.get(pmc_page_url, headers=headers, timeout=self.request_timeout)
200
190
  response.raise_for_status()
201
191
 
202
192
  soup = BeautifulSoup(response.content, "html.parser")
@@ -238,10 +228,10 @@ class PubmedDownloader(BasePaperDownloader):
238
228
 
239
229
  def extract_paper_metadata(
240
230
  self,
241
- metadata: Dict[str, Any],
231
+ metadata: dict[str, Any],
242
232
  identifier: str,
243
- pdf_result: Optional[Tuple[str, str]],
244
- ) -> Dict[str, Any]:
233
+ pdf_result: tuple[str, str] | None,
234
+ ) -> dict[str, Any]:
245
235
  """
246
236
  Extract structured metadata from PubMed ID converter response.
247
237
 
@@ -310,15 +300,11 @@ class PubmedDownloader(BasePaperDownloader):
310
300
 
311
301
  def get_snippet(self, abstract: str) -> str:
312
302
  """Override to handle PubMed-specific abstract placeholder."""
313
- if (
314
- not abstract
315
- or abstract == "N/A"
316
- or abstract == "Abstract available in PubMed"
317
- ):
303
+ if not abstract or abstract == "N/A" or abstract == "Abstract available in PubMed":
318
304
  return ""
319
305
  return super().get_snippet(abstract)
320
306
 
321
- def _get_paper_identifier_info(self, paper: Dict[str, Any]) -> str:
307
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
322
308
  """Get PubMed-specific identifier info for paper summary."""
323
309
  pmid = paper.get("PMID", "N/A")
324
310
  pmcid = paper.get("PMCID", "N/A")
@@ -329,7 +315,7 @@ class PubmedDownloader(BasePaperDownloader):
329
315
 
330
316
  return info
331
317
 
332
- def _add_service_identifier(self, entry: Dict[str, Any], identifier: str) -> None:
318
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
333
319
  """Add PMID and PubMed-specific fields to entry."""
334
320
  entry["PMID"] = identifier
335
321
  entry["PMCID"] = "N/A"
@@ -25,11 +25,11 @@ from langgraph.prebuilt import InjectedState
25
25
  from langgraph.types import Command
26
26
  from pydantic import BaseModel, Field
27
27
 
28
+ from .utils.answer_formatter import format_answer
28
29
  from .utils.generate_answer import load_hydra_config
29
- from .utils.tool_helper import QAToolHelper
30
30
  from .utils.paper_loader import load_all_papers
31
31
  from .utils.rag_pipeline import retrieve_and_rerank_chunks
32
- from .utils.answer_formatter import format_answer
32
+ from .utils.tool_helper import QAToolHelper
33
33
 
34
34
  # Helper for managing state, vectorstore, reranking, and formatting
35
35
  helper = QAToolHelper()
@@ -56,9 +56,7 @@ class QuestionAndAnswerInput(BaseModel):
56
56
  - llm_model: chat/LLM instance for answer generation.
57
57
  """
58
58
 
59
- question: str = Field(
60
- description="User question for generating a PDF-based answer."
61
- )
59
+ question: str = Field(description="User question for generating a PDF-based answer.")
62
60
  tool_call_id: Annotated[str, InjectedToolCallId]
63
61
  state: Annotated[dict, InjectedState]
64
62
 
@@ -133,9 +131,7 @@ def question_and_answer(
133
131
  )
134
132
 
135
133
  # Retrieve and rerank chunks in one step
136
- reranked_chunks = retrieve_and_rerank_chunks(
137
- vs, question, config, call_id, helper.has_gpu
138
- )
134
+ reranked_chunks = retrieve_and_rerank_chunks(vs, question, config, call_id, helper.has_gpu)
139
135
 
140
136
  if not reranked_chunks:
141
137
  msg = f"No relevant chunks found for question: '{question}'"
@@ -2,20 +2,22 @@
2
2
  Utility modules for the PDF question_and_answer tool.
3
3
  """
4
4
 
5
- from . import answer_formatter
6
- from . import batch_processor
7
- from . import collection_manager
8
- from . import generate_answer
9
- from . import get_vectorstore
10
- from . import gpu_detection
11
- from . import nvidia_nim_reranker
12
- from . import paper_loader
13
- from . import rag_pipeline
14
- from . import retrieve_chunks
15
- from . import singleton_manager
16
- from . import tool_helper
17
- from . import vector_normalization
18
- from . import vector_store
5
+ from . import (
6
+ answer_formatter,
7
+ batch_processor,
8
+ collection_manager,
9
+ generate_answer,
10
+ get_vectorstore,
11
+ gpu_detection,
12
+ nvidia_nim_reranker,
13
+ paper_loader,
14
+ rag_pipeline,
15
+ retrieve_chunks,
16
+ singleton_manager,
17
+ tool_helper,
18
+ vector_normalization,
19
+ vector_store,
20
+ )
19
21
 
20
22
  __all__ = [
21
23
  "answer_formatter",
@@ -3,7 +3,7 @@ Format the final answer text with source attributions and hardware info.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Any, Dict, List
6
+ from typing import Any
7
7
 
8
8
  from .generate_answer import generate_answer
9
9
 
@@ -12,9 +12,9 @@ logger = logging.getLogger(__name__)
12
12
 
13
13
  def format_answer(
14
14
  question: str,
15
- chunks: List[Any],
15
+ chunks: list[Any],
16
16
  llm: Any,
17
- articles: Dict[str, Any],
17
+ articles: dict[str, Any],
18
18
  config: Any,
19
19
  **kwargs: Any,
20
20
  ) -> str:
@@ -27,7 +27,7 @@ def format_answer(
27
27
  answer = result.get("output_text", "No answer generated.")
28
28
 
29
29
  # Get unique paper titles for source attribution
30
- titles: Dict[str, str] = {}
30
+ titles: dict[str, str] = {}
31
31
  for pid in result.get("papers_used", []):
32
32
  if pid in articles:
33
33
  titles[pid] = articles[pid].get("Title", "Unknown paper")