aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,323 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PubMed paper downloader implementation.
4
+ """
5
+
6
+ import logging
7
+ import xml.etree.ElementTree as ET
8
+ from typing import Any, cast
9
+
10
+ import requests
11
+ from bs4 import BeautifulSoup, Tag
12
+
13
+ from .base_paper_downloader import BasePaperDownloader
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PubmedDownloader(BasePaperDownloader):
19
+ """PubMed-specific implementation of paper downloader."""
20
+
21
+ def __init__(self, config: Any):
22
+ """Initialize PubMed downloader with configuration."""
23
+ super().__init__(config)
24
+ self.id_converter_url = config.id_converter_url
25
+ self.oa_api_url = config.oa_api_url
26
+
27
+ # Alternative PDF sources
28
+ self.europe_pmc_base_url = config.europe_pmc_base_url
29
+ self.pmc_page_base_url = config.pmc_page_base_url
30
+ self.direct_pmc_pdf_base_url = config.direct_pmc_pdf_base_url
31
+
32
+ # URL conversion for NCBI FTP links
33
+ self.ftp_base_url = config.ftp_base_url
34
+ self.https_base_url = config.https_base_url
35
+ # Configuration values
36
+ self.id_converter_format = getattr(config, "id_converter_format", "json")
37
+ self.pdf_meta_name = getattr(config, "pdf_meta_name", "citation_pdf_url")
38
+ self.default_error_code = getattr(config, "default_error_code", "unknown")
39
+
40
+ def fetch_metadata(self, identifier: str) -> dict[str, Any]:
41
+ """
42
+ Fetch paper metadata from PubMed ID Converter API.
43
+
44
+ Args:
45
+ identifier: PMID (e.g., '12345678')
46
+
47
+ Returns:
48
+ JSON response from PMC ID Converter API
49
+
50
+ Raises:
51
+ requests.RequestException: If API call fails
52
+ RuntimeError: If no records found in response
53
+ """
54
+ query_url = f"{self.id_converter_url}?ids={identifier}&format={self.id_converter_format}"
55
+ logger.info("Fetching metadata from ID converter for PMID %s: %s", identifier, query_url)
56
+
57
+ response = requests.get(query_url, timeout=self.request_timeout)
58
+ response.raise_for_status()
59
+
60
+ result = response.json()
61
+ logger.info("ID converter response for PMID %s: %s", identifier, result)
62
+
63
+ if "records" not in result or not result["records"]:
64
+ raise RuntimeError("No records found in PMC ID Converter API response")
65
+
66
+ return result
67
+
68
+ def construct_pdf_url(self, metadata: dict[str, Any], identifier: str) -> str:
69
+ """
70
+ Construct PDF URL using multiple fallback strategies.
71
+
72
+ Args:
73
+ metadata: JSON response from ID converter
74
+ identifier: PMID
75
+
76
+ Returns:
77
+ PDF URL string (empty if no PDF available)
78
+ """
79
+ if "records" not in metadata or not metadata["records"]:
80
+ return ""
81
+
82
+ record = metadata["records"][0]
83
+ pmcid = record.get("pmcid", "")
84
+
85
+ if not pmcid or pmcid == "N/A":
86
+ logger.info("No PMCID available for PDF fetch: PMID %s", identifier)
87
+ return ""
88
+
89
+ return self._fetch_pdf_url_with_fallbacks(pmcid)
90
+
91
+ def _fetch_pdf_url_with_fallbacks(self, pmcid: str) -> str:
92
+ """
93
+ Fetch PDF URL from OA API with comprehensive fallback strategies.
94
+
95
+ Args:
96
+ pmcid: PMC ID (e.g., 'PMC1234567')
97
+
98
+ Returns:
99
+ PDF URL string (empty if all strategies fail)
100
+ """
101
+ logger.info("Fetching PDF URL for PMCID: %s", pmcid)
102
+
103
+ # Strategy 1: Official OA API (fastest when it works)
104
+ pdf_url = self._try_oa_api(pmcid)
105
+ if pdf_url:
106
+ return pdf_url
107
+
108
+ # Strategy 2: Europe PMC Service (most reliable fallback)
109
+ pdf_url = self._try_europe_pmc(pmcid)
110
+ if pdf_url:
111
+ return pdf_url
112
+
113
+ # Strategy 3: Scrape PMC page for citation_pdf_url meta tag
114
+ pdf_url = self._try_pmc_page_scraping(pmcid)
115
+ if pdf_url:
116
+ return pdf_url
117
+
118
+ # Strategy 4: Direct PMC PDF URL pattern (least reliable)
119
+ pdf_url = self._try_direct_pmc_url(pmcid)
120
+ if pdf_url:
121
+ return pdf_url
122
+
123
+ logger.warning("All PDF URL strategies failed for PMCID: %s", pmcid)
124
+ return ""
125
+
126
+ def _try_oa_api(self, pmcid: str) -> str:
127
+ """Try to get PDF URL from official OA API."""
128
+ query_url = f"{self.oa_api_url}?id={pmcid}"
129
+ logger.info("Trying OA API for PMCID %s: %s", pmcid, query_url)
130
+
131
+ try:
132
+ response = requests.get(query_url, timeout=self.request_timeout)
133
+ response.raise_for_status()
134
+
135
+ logger.info("OA API response for PMCID %s: %s", pmcid, response.text[:500])
136
+
137
+ # Parse XML response
138
+
139
+ root = ET.fromstring(response.text)
140
+
141
+ # Check for error first
142
+ error_elem = root.find(".//error")
143
+ if error_elem is not None:
144
+ error_code = error_elem.get("code", self.default_error_code)
145
+ error_text = error_elem.text or "unknown error"
146
+ logger.info("OA API error for PMCID %s: %s - %s", pmcid, error_code, error_text)
147
+ return ""
148
+
149
+ # Look for PDF link
150
+ pdf_link = root.find(".//link[@format='pdf']")
151
+ if pdf_link is not None:
152
+ pdf_url = pdf_link.get("href", "")
153
+ logger.info("Found PDF URL from OA API for PMCID %s: %s", pmcid, pdf_url)
154
+
155
+ # Convert FTP links to HTTPS for download compatibility
156
+ if pdf_url.startswith(self.ftp_base_url):
157
+ pdf_url = pdf_url.replace(self.ftp_base_url, self.https_base_url)
158
+ logger.info("Converted FTP to HTTPS for %s: %s", pmcid, pdf_url)
159
+
160
+ return pdf_url
161
+
162
+ except requests.RequestException as e:
163
+ logger.info("OA API failed for %s: %s", pmcid, str(e))
164
+
165
+ return ""
166
+
167
+ def _try_europe_pmc(self, pmcid: str) -> str:
168
+ """Try Europe PMC service for PDF."""
169
+ europe_pmc_url = f"{self.europe_pmc_base_url}?accid={pmcid}&blobtype=pdf"
170
+ logger.info("Trying Europe PMC service for %s: %s", pmcid, europe_pmc_url)
171
+
172
+ try:
173
+ response = requests.head(europe_pmc_url, timeout=self.request_timeout)
174
+ if response.status_code == 200:
175
+ logger.info("Europe PMC service works for %s", pmcid)
176
+ return europe_pmc_url
177
+ except requests.RequestException as e:
178
+ logger.info("Europe PMC service failed for %s: %s", pmcid, str(e))
179
+
180
+ return ""
181
+
182
+ def _try_pmc_page_scraping(self, pmcid: str) -> str:
183
+ """Try scraping PMC page for PDF meta tag."""
184
+ pmc_page_url = f"{self.pmc_page_base_url}/{pmcid}/"
185
+ logger.info("Scraping PMC page for PDF meta tag for %s: %s", pmcid, pmc_page_url)
186
+
187
+ try:
188
+ headers = {"User-Agent": self.user_agent}
189
+ response = requests.get(pmc_page_url, headers=headers, timeout=self.request_timeout)
190
+ response.raise_for_status()
191
+
192
+ soup = BeautifulSoup(response.content, "html.parser")
193
+
194
+ # Look for PDF meta tag
195
+ pdf_meta = soup.find("meta", attrs={"name": self.pdf_meta_name})
196
+ if pdf_meta is not None:
197
+ # Cast to Tag to help type checker understand this is a BeautifulSoup Tag object
198
+ meta_tag = cast(Tag, pdf_meta)
199
+ content = meta_tag.get("content")
200
+ if content:
201
+ logger.info(
202
+ "Found %s meta tag for %s: %s",
203
+ self.pdf_meta_name,
204
+ pmcid,
205
+ content,
206
+ )
207
+ return str(content)
208
+
209
+ except requests.RequestException as e:
210
+ logger.info("PMC page scraping failed for %s: %s", pmcid, str(e))
211
+
212
+ return ""
213
+
214
+ def _try_direct_pmc_url(self, pmcid: str) -> str:
215
+ """Try direct PMC PDF URL pattern."""
216
+ direct_pmc_url = f"{self.direct_pmc_pdf_base_url}/{pmcid}/pdf/"
217
+ logger.info("Trying direct PMC PDF URL for %s: %s", pmcid, direct_pmc_url)
218
+
219
+ try:
220
+ response = requests.head(direct_pmc_url, timeout=self.request_timeout)
221
+ if response.status_code == 200:
222
+ logger.info("Direct PMC PDF URL works for %s", pmcid)
223
+ return direct_pmc_url
224
+ except requests.RequestException as e:
225
+ logger.info("Direct PMC PDF URL failed for %s: %s", pmcid, str(e))
226
+
227
+ return ""
228
+
229
+ def extract_paper_metadata(
230
+ self,
231
+ metadata: dict[str, Any],
232
+ identifier: str,
233
+ pdf_result: tuple[str, str] | None,
234
+ ) -> dict[str, Any]:
235
+ """
236
+ Extract structured metadata from PubMed ID converter response.
237
+
238
+ Args:
239
+ metadata: JSON response from ID converter
240
+ identifier: PMID
241
+ pdf_result: Tuple of (temp_file_path, filename) if PDF downloaded
242
+
243
+ Returns:
244
+ Standardized paper metadata dictionary
245
+ """
246
+ if "records" not in metadata or not metadata["records"]:
247
+ raise RuntimeError("No records found in metadata")
248
+
249
+ record = metadata["records"][0] # Get first (and should be only) record
250
+
251
+ # Extract basic fields from ID converter
252
+ pmcid = record.get("pmcid", "N/A")
253
+ doi = record.get("doi", "N/A")
254
+
255
+ # Handle PDF download results
256
+ if pdf_result:
257
+ temp_file_path, filename = pdf_result
258
+ access_type = "open_access_downloaded"
259
+ pdf_url = temp_file_path # Use local temp file path
260
+ else:
261
+ temp_file_path = ""
262
+ filename = self.get_default_filename(identifier)
263
+ access_type = "abstract_only" if pmcid != "N/A" else "no_pmcid"
264
+ pdf_url = ""
265
+
266
+ # Note: For PubMed, we don't get title/authors from ID converter
267
+ # In a real implementation, you might want to call E-utilities for full metadata
268
+ # For now, we'll use placeholders and focus on the ID conversion functionality
269
+
270
+ return {
271
+ "Title": (
272
+ f"PubMed Article {identifier}"
273
+ ), # Placeholder - would need E-utilities for real title
274
+ "Authors": [], # Placeholder - would need E-utilities for real authors
275
+ "Abstract": "Abstract available in PubMed", # Placeholder
276
+ "Publication Date": "N/A", # Would need E-utilities for this
277
+ "PMID": identifier,
278
+ "PMCID": pmcid,
279
+ "DOI": doi,
280
+ "Journal": "N/A", # Would need E-utilities for this
281
+ "URL": pdf_url,
282
+ "pdf_url": pdf_url,
283
+ "access_type": access_type,
284
+ "filename": filename,
285
+ "source": "pubmed",
286
+ "temp_file_path": temp_file_path,
287
+ }
288
+
289
+ def get_service_name(self) -> str:
290
+ """Return service name."""
291
+ return "PubMed"
292
+
293
+ def get_identifier_name(self) -> str:
294
+ """Return identifier display name."""
295
+ return "PMID"
296
+
297
+ def get_default_filename(self, identifier: str) -> str:
298
+ """Generate default filename for PubMed paper."""
299
+ return f"pmid_{identifier}.pdf"
300
+
301
+ def get_snippet(self, abstract: str) -> str:
302
+ """Override to handle PubMed-specific abstract placeholder."""
303
+ if not abstract or abstract == "N/A" or abstract == "Abstract available in PubMed":
304
+ return ""
305
+ return super().get_snippet(abstract)
306
+
307
+ def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
308
+ """Get PubMed-specific identifier info for paper summary."""
309
+ pmid = paper.get("PMID", "N/A")
310
+ pmcid = paper.get("PMCID", "N/A")
311
+
312
+ info = f" (PMID: {pmid})"
313
+ if pmcid != "N/A":
314
+ info += f"\n PMCID: {pmcid}"
315
+
316
+ return info
317
+
318
+ def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
319
+ """Add PMID and PubMed-specific fields to entry."""
320
+ entry["PMID"] = identifier
321
+ entry["PMCID"] = "N/A"
322
+ entry["DOI"] = "N/A"
323
+ entry["Journal"] = "N/A"
@@ -0,0 +1,7 @@
1
+ """
2
+ This file is used to import all the modules in the package.
3
+ """
4
+
5
+ from . import question_and_answer
6
+
7
+ __all__ = ["question_and_answer"]
@@ -0,0 +1,170 @@
1
+ """
2
+ LangGraph PDF Retrieval-Augmented Generation (RAG) Tool
3
+
4
+ This tool answers user questions using the traditional RAG pipeline:
5
+ 1. Retrieve relevant chunks from ALL papers in the vector store
6
+ 2. Rerank chunks using NVIDIA NIM reranker to find the most relevant ones
7
+ 3. Generate answer using the top reranked chunks
8
+
9
+ Traditional RAG Pipeline Flow:
10
+ Query → Retrieve chunks from ALL papers → Rerank chunks → Generate answer
11
+
12
+ This ensures the best possible chunks are selected across all available papers,
13
+ not just from pre-selected papers.
14
+ """
15
+
16
+ import logging
17
+ import os
18
+ import time
19
+ from typing import Annotated, Any
20
+
21
+ from langchain_core.messages import ToolMessage
22
+ from langchain_core.tools import tool
23
+ from langchain_core.tools.base import InjectedToolCallId
24
+ from langgraph.prebuilt import InjectedState
25
+ from langgraph.types import Command
26
+ from pydantic import BaseModel, Field
27
+
28
+ from .utils.answer_formatter import format_answer
29
+ from .utils.generate_answer import load_hydra_config
30
+ from .utils.paper_loader import load_all_papers
31
+ from .utils.rag_pipeline import retrieve_and_rerank_chunks
32
+ from .utils.tool_helper import QAToolHelper
33
+
34
+ # Helper for managing state, vectorstore, reranking, and formatting
35
+ helper = QAToolHelper()
36
+ # Load configuration and start logging
37
+ config = load_hydra_config()
38
+
39
+ # Set up logging with configurable level
40
+ log_level = os.environ.get("LOG_LEVEL", "INFO")
41
+ logging.basicConfig(level=getattr(logging, log_level))
42
+ logger = logging.getLogger(__name__)
43
+ logger.setLevel(getattr(logging, log_level))
44
+
45
+
46
+ class QuestionAndAnswerInput(BaseModel):
47
+ """
48
+ Pydantic schema for the PDF Q&A tool inputs.
49
+
50
+ Fields:
51
+ question: User's free-text query to answer based on PDF content.
52
+ tool_call_id: LangGraph-injected call identifier for tracking.
53
+ state: Shared agent state dict containing:
54
+ - article_data: metadata mapping of paper IDs to info (e.g., 'pdf_url', title).
55
+ - text_embedding_model: embedding model instance for chunk indexing.
56
+ - llm_model: chat/LLM instance for answer generation.
57
+ """
58
+
59
+ question: str = Field(description="User question for generating a PDF-based answer.")
60
+ tool_call_id: Annotated[str, InjectedToolCallId]
61
+ state: Annotated[dict, InjectedState]
62
+
63
+
64
+ @tool(args_schema=QuestionAndAnswerInput, parse_docstring=True)
65
+ def question_and_answer(
66
+ question: str,
67
+ state: Annotated[dict, InjectedState],
68
+ tool_call_id: Annotated[str, InjectedToolCallId],
69
+ ) -> Command[Any]:
70
+ """
71
+ LangGraph tool for Retrieval-Augmented Generation over PDFs using traditional RAG pipeline.
72
+
73
+ Traditional RAG Pipeline Implementation:
74
+ 1. Load ALL available PDFs into Milvus vector store (if not already loaded)
75
+ 2. Retrieve relevant chunks from ALL papers using vector similarity search
76
+ 3. Rerank retrieved chunks using NVIDIA NIM semantic reranker
77
+ 4. Generate answer using top reranked chunks with source attribution
78
+
79
+ This approach ensures the best chunks are selected across all available papers,
80
+ rather than pre-selecting papers and potentially missing relevant information.
81
+
82
+ Args:
83
+ question (str): The free-text question to answer.
84
+ state (dict): Injected agent state; must include:
85
+ - article_data: mapping paper IDs → metadata (pdf_url, title, etc.)
86
+ - text_embedding_model: embedding model instance.
87
+ - llm_model: chat/LLM instance.
88
+ tool_call_id (str): Internal identifier for this tool invocation.
89
+
90
+ Returns:
91
+ Command[Any]: updates conversation state with a ToolMessage(answer).
92
+
93
+ Raises:
94
+ ValueError: when required models or metadata are missing in state.
95
+ RuntimeError: when no relevant chunks can be retrieved for the query.
96
+ """
97
+ call_id = f"qa_call_{time.time()}"
98
+ logger.info(
99
+ "Starting PDF Question and Answer tool (Traditional RAG Pipeline) - Call %s",
100
+ call_id,
101
+ )
102
+ logger.info("%s: Question: '%s'", call_id, question)
103
+
104
+ helper.start_call(config, call_id)
105
+
106
+ # Extract models and article metadata
107
+ text_emb, llm_model, article_data = helper.get_state_models_and_data(state)
108
+
109
+ # Initialize or reuse Milvus vector store
110
+ logger.info("%s: Initializing vector store", call_id)
111
+ vs = helper.init_vector_store(text_emb)
112
+
113
+ # Load ALL papers (traditional RAG approach)
114
+ logger.info(
115
+ "%s: Loading all %d papers into vector store (traditional RAG approach)",
116
+ call_id,
117
+ len(article_data),
118
+ )
119
+ load_all_papers(
120
+ vector_store=vs,
121
+ articles=article_data,
122
+ call_id=call_id,
123
+ config=config,
124
+ has_gpu=helper.has_gpu,
125
+ )
126
+
127
+ # Traditional RAG Pipeline: Retrieve from ALL papers, then rerank
128
+ logger.info(
129
+ "%s: Starting traditional RAG pipeline: retrieve → rerank → generate",
130
+ call_id,
131
+ )
132
+
133
+ # Retrieve and rerank chunks in one step
134
+ reranked_chunks = retrieve_and_rerank_chunks(vs, question, config, call_id, helper.has_gpu)
135
+
136
+ if not reranked_chunks:
137
+ msg = f"No relevant chunks found for question: '{question}'"
138
+ logger.warning("%s: %s", call_id, msg)
139
+
140
+ # Generate answer using reranked chunks
141
+ logger.info(
142
+ "%s: Generating answer using %d reranked chunks",
143
+ call_id,
144
+ len(reranked_chunks),
145
+ )
146
+ response_text = format_answer(
147
+ question,
148
+ reranked_chunks,
149
+ llm_model,
150
+ article_data,
151
+ config,
152
+ call_id=call_id,
153
+ has_gpu=helper.has_gpu,
154
+ )
155
+
156
+ logger.info(
157
+ "%s: Successfully traditional completed RAG pipeline",
158
+ call_id,
159
+ )
160
+
161
+ return Command(
162
+ update={
163
+ "messages": [
164
+ ToolMessage(
165
+ content=response_text,
166
+ tool_call_id=tool_call_id,
167
+ )
168
+ ],
169
+ }
170
+ )
@@ -0,0 +1,37 @@
1
+ """
2
+ Utility modules for the PDF question_and_answer tool.
3
+ """
4
+
5
+ from . import (
6
+ answer_formatter,
7
+ batch_processor,
8
+ collection_manager,
9
+ generate_answer,
10
+ get_vectorstore,
11
+ gpu_detection,
12
+ nvidia_nim_reranker,
13
+ paper_loader,
14
+ rag_pipeline,
15
+ retrieve_chunks,
16
+ singleton_manager,
17
+ tool_helper,
18
+ vector_normalization,
19
+ vector_store,
20
+ )
21
+
22
+ __all__ = [
23
+ "answer_formatter",
24
+ "batch_processor",
25
+ "collection_manager",
26
+ "generate_answer",
27
+ "get_vectorstore",
28
+ "gpu_detection",
29
+ "nvidia_nim_reranker",
30
+ "paper_loader",
31
+ "rag_pipeline",
32
+ "retrieve_chunks",
33
+ "singleton_manager",
34
+ "tool_helper",
35
+ "vector_normalization",
36
+ "vector_store",
37
+ ]
@@ -0,0 +1,62 @@
1
+ """
2
+ Format the final answer text with source attributions and hardware info.
3
+ """
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ from .generate_answer import generate_answer
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def format_answer(
14
+ question: str,
15
+ chunks: list[Any],
16
+ llm: Any,
17
+ articles: dict[str, Any],
18
+ config: Any,
19
+ **kwargs: Any,
20
+ ) -> str:
21
+ """
22
+ Generate the final answer text with source attributions and hardware info.
23
+
24
+ Expects `call_id` and `has_gpu` in kwargs.
25
+ """
26
+ result = generate_answer(question, chunks, llm, config)
27
+ answer = result.get("output_text", "No answer generated.")
28
+
29
+ # Get unique paper titles for source attribution
30
+ titles: dict[str, str] = {}
31
+ for pid in result.get("papers_used", []):
32
+ if pid in articles:
33
+ titles[pid] = articles[pid].get("Title", "Unknown paper")
34
+
35
+ # Format sources
36
+ if titles:
37
+ srcs = "\n\nSources:\n" + "\n".join(f"- {t}" for t in titles.values())
38
+ else:
39
+ srcs = ""
40
+
41
+ # Extract logging metadata
42
+ call_id = kwargs.get("call_id", "<no-call-id>")
43
+ has_gpu = kwargs.get("has_gpu", False)
44
+ hardware_info = "GPU-accelerated" if has_gpu else "CPU-processed"
45
+
46
+ # Log final statistics with hardware info
47
+ logger.info(
48
+ "%s: Generated answer using %d chunks from %d papers (%s)",
49
+ call_id,
50
+ len(chunks),
51
+ len(titles),
52
+ hardware_info,
53
+ )
54
+
55
+ # Add subtle hardware info to logs but not to user output
56
+ logger.debug(
57
+ "%s: Answer generation completed with %s processing",
58
+ call_id,
59
+ hardware_info,
60
+ )
61
+
62
+ return f"{answer}{srcs}"