aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,374 @@
1
+ """
2
+ Tool for performing multimodal subgraph extraction.
3
+ """
4
+
5
+ import logging
6
+ from typing import Annotated
7
+
8
+ import hydra
9
+ import joblib
10
+ import networkx as nx
11
+ import numpy as np
12
+ import pandas as pd
13
+ import torch
14
+ from langchain_core.messages import ToolMessage
15
+ from langchain_core.tools import BaseTool
16
+ from langchain_core.tools.base import InjectedToolCallId
17
+ from langgraph.prebuilt import InjectedState
18
+ from langgraph.types import Command
19
+ from pydantic import BaseModel, Field
20
+ from torch_geometric.data import Data
21
+
22
+ from ..utils.embeddings.ollama import EmbeddingWithOllama
23
+ from ..utils.extractions.multimodal_pcst import MultimodalPCSTPruning
24
+ from .load_arguments import ArgumentData
25
+
26
+ # Initialize logger
27
+ logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class MultimodalSubgraphExtractionInput(BaseModel):
32
+ """
33
+ MultimodalSubgraphExtractionInput is a Pydantic model representing an input
34
+ for extracting a subgraph.
35
+
36
+ Args:
37
+ prompt: Prompt to interact with the backend.
38
+ tool_call_id: Tool call ID.
39
+ state: Injected state.
40
+ arg_data: Argument for analytical process over graph data.
41
+ """
42
+
43
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
44
+ state: Annotated[dict, InjectedState] = Field(description="Injected state.")
45
+ prompt: str = Field(description="Prompt to interact with the backend.")
46
+ arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
47
+
48
+
49
+ class MultimodalSubgraphExtractionTool(BaseTool):
50
+ """
51
+ This tool performs subgraph extraction based on user's prompt by taking into account
52
+ the top-k nodes and edges.
53
+ """
54
+
55
+ name: str = "subgraph_extraction"
56
+ description: str = "A tool for subgraph extraction based on user's prompt."
57
+ args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
58
+
59
+ def _prepare_query_modalities(
60
+ self, prompt_emb: list, state: Annotated[dict, InjectedState], pyg_graph: Data
61
+ ) -> pd.DataFrame:
62
+ """
63
+ Prepare the modality-specific query for subgraph extraction.
64
+
65
+ Args:
66
+ prompt_emb: The embedding of the user prompt in a list.
67
+ state: The injected state for the tool.
68
+ pyg_graph: The PyTorch Geometric graph Data.
69
+
70
+ Returns:
71
+ A DataFrame containing the query embeddings and modalities.
72
+ """
73
+ # Initialize dataframes
74
+ multimodal_df = pd.DataFrame({"name": []})
75
+ query_df = pd.DataFrame(
76
+ {
77
+ "node_id": [],
78
+ "node_type": [],
79
+ "x": [],
80
+ "desc_x": [],
81
+ "use_description": [],
82
+ }
83
+ )
84
+
85
+ # Loop over the uploaded files and find multimodal files
86
+ for i in range(len(state["uploaded_files"])):
87
+ # Check if multimodal file is uploaded
88
+ if state["uploaded_files"][i]["file_type"] == "multimodal":
89
+ # Read the Excel file
90
+ multimodal_df = pd.read_excel(
91
+ state["uploaded_files"][i]["file_path"], sheet_name=None
92
+ )
93
+
94
+ # Check if the multimodal_df is empty
95
+ if len(multimodal_df) > 0:
96
+ # Merge all obtained dataframes into a single dataframe
97
+ multimodal_df = pd.concat(multimodal_df).reset_index()
98
+ multimodal_df.drop(columns=["level_1"], inplace=True)
99
+ multimodal_df.rename(
100
+ columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
101
+ )
102
+ # Since an excel sheet name could not contain a `/`,
103
+ # but the node type can be 'gene/protein' as exists in the PrimeKG
104
+ multimodal_df["q_node_type"] = multimodal_df.q_node_type.apply(
105
+ lambda x: x.replace("-", "/")
106
+ )
107
+
108
+ # Convert PyG graph to a DataFrame for easier filtering
109
+ graph_df = pd.DataFrame(
110
+ {
111
+ "node_id": pyg_graph.node_id,
112
+ "node_name": pyg_graph.node_name,
113
+ "node_type": pyg_graph.node_type,
114
+ "x": pyg_graph.x,
115
+ "desc_x": pyg_graph.desc_x.tolist(),
116
+ }
117
+ )
118
+
119
+ # Make a query dataframe by merging the graph_df and multimodal_df
120
+ query_df = graph_df.merge(multimodal_df, how="cross")
121
+ query_df = query_df[
122
+ query_df.apply(
123
+ lambda x: (x["q_node_name"].lower() in x["node_name"].lower()) # node name
124
+ & (x["node_type"] == x["q_node_type"]), # node type
125
+ axis=1,
126
+ )
127
+ ]
128
+ query_df = query_df[["node_id", "node_type", "x", "desc_x"]].reset_index(drop=True)
129
+ query_df["use_description"] = False # set to False for modal-specific embeddings
130
+
131
+ # Update the state by adding the the selected node IDs
132
+ state["selections"] = query_df.groupby("node_type")["node_id"].apply(list).to_dict()
133
+
134
+ # Append a user prompt to the query dataframe
135
+ query_df = pd.concat(
136
+ [
137
+ query_df,
138
+ pd.DataFrame(
139
+ {
140
+ "node_id": "user_prompt",
141
+ "node_type": "prompt",
142
+ "x": prompt_emb,
143
+ "desc_x": prompt_emb,
144
+ "use_description": True, # set to True for user prompt embedding
145
+ }
146
+ ),
147
+ ]
148
+ ).reset_index(drop=True)
149
+
150
+ return query_df
151
+
152
+ def _perform_subgraph_extraction(
153
+ self,
154
+ state: Annotated[dict, InjectedState],
155
+ cfg: dict,
156
+ pyg_graph: Data,
157
+ query_df: pd.DataFrame,
158
+ ) -> dict:
159
+ """
160
+ Perform multimodal subgraph extraction based on modal-specific embeddings.
161
+
162
+ Args:
163
+ state: The injected state for the tool.
164
+ cfg: The configuration dictionary.
165
+ pyg_graph: The PyTorch Geometric graph Data.
166
+ query_df: The DataFrame containing the query embeddings and modalities.
167
+
168
+ Returns:
169
+ A dictionary containing the extracted subgraph with nodes and edges.
170
+ """
171
+ # Initialize the subgraph dictionary
172
+ subgraphs = {}
173
+ subgraphs["nodes"] = []
174
+ subgraphs["edges"] = []
175
+
176
+ # Loop over query embeddings and modalities
177
+ for q in query_df.iterrows():
178
+ # Prepare the PCSTPruning object and extract the subgraph
179
+ # Parameters were set in the configuration file obtained from Hydra
180
+ subgraph = MultimodalPCSTPruning(
181
+ topk=state["topk_nodes"],
182
+ topk_e=state["topk_edges"],
183
+ cost_e=cfg.cost_e,
184
+ c_const=cfg.c_const,
185
+ root=cfg.root,
186
+ num_clusters=cfg.num_clusters,
187
+ pruning=cfg.pruning,
188
+ verbosity_level=cfg.verbosity_level,
189
+ use_description=q[1]["use_description"],
190
+ ).extract_subgraph(
191
+ pyg_graph,
192
+ torch.tensor(q[1]["desc_x"]), # description embedding
193
+ torch.tensor(q[1]["x"]), # modal-specific embedding
194
+ q[1]["node_type"],
195
+ )
196
+
197
+ # Append the extracted subgraph to the dictionary
198
+ subgraphs["nodes"].append(subgraph["nodes"].tolist())
199
+ subgraphs["edges"].append(subgraph["edges"].tolist())
200
+
201
+ # Concatenate and get unique node and edge indices
202
+ subgraphs["nodes"] = np.unique(
203
+ np.concatenate([np.array(list_) for list_ in subgraphs["nodes"]])
204
+ )
205
+ subgraphs["edges"] = np.unique(
206
+ np.concatenate([np.array(list_) for list_ in subgraphs["edges"]])
207
+ )
208
+
209
+ return subgraphs
210
+
211
+ def _prepare_final_subgraph(
212
+ self, state: Annotated[dict, InjectedState], subgraph: dict, graph: dict, cfg
213
+ ) -> dict:
214
+ """
215
+ Prepare the subgraph based on the extracted subgraph.
216
+
217
+ Args:
218
+ state: The injected state for the tool.
219
+ subgraph: The extracted subgraph.
220
+ graph: The initial graph containing PyG and textualized graph.
221
+ cfg: The configuration dictionary.
222
+
223
+ Returns:
224
+ A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
225
+ """
226
+ # print(subgraph)
227
+ # Prepare the PyTorch Geometric graph
228
+ mapping = {n: i for i, n in enumerate(subgraph["nodes"].tolist())}
229
+ pyg_graph = Data(
230
+ # Node features
231
+ # x=pyg_graph.x[subgraph["nodes"]],
232
+ x=[graph["pyg"].x[i] for i in subgraph["nodes"]],
233
+ node_id=np.array(graph["pyg"].node_id)[subgraph["nodes"]].tolist(),
234
+ node_name=np.array(graph["pyg"].node_id)[subgraph["nodes"]].tolist(),
235
+ enriched_node=np.array(graph["pyg"].enriched_node)[subgraph["nodes"]].tolist(),
236
+ num_nodes=len(subgraph["nodes"]),
237
+ # Edge features
238
+ edge_index=torch.LongTensor(
239
+ [
240
+ [mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()],
241
+ [mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()],
242
+ ]
243
+ ),
244
+ edge_attr=graph["pyg"].edge_attr[subgraph["edges"]],
245
+ edge_type=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
246
+ relation=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
247
+ label=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
248
+ enriched_edge=np.array(graph["pyg"].enriched_edge)[subgraph["edges"]].tolist(),
249
+ )
250
+
251
+ # Networkx DiGraph construction to be visualized in the frontend
252
+ nx_graph = nx.DiGraph()
253
+ # Add nodes with attributes
254
+ node_colors = {
255
+ n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
256
+ }
257
+ for n in pyg_graph.node_name:
258
+ nx_graph.add_node(n, color=node_colors.get(n, None))
259
+
260
+ # Add edges with attributes
261
+ edges = zip(
262
+ pyg_graph.edge_index[0].tolist(),
263
+ pyg_graph.edge_index[1].tolist(),
264
+ pyg_graph.edge_type,
265
+ strict=False,
266
+ )
267
+ for src, dst, edge_type in edges:
268
+ nx_graph.add_edge(
269
+ pyg_graph.node_name[src],
270
+ pyg_graph.node_name[dst],
271
+ relation=edge_type,
272
+ label=edge_type,
273
+ )
274
+
275
+ # Prepare the textualized subgraph
276
+ textualized_graph = (
277
+ graph["text"]["nodes"].iloc[subgraph["nodes"]].to_csv(index=False)
278
+ + "\n"
279
+ + graph["text"]["edges"].iloc[subgraph["edges"]].to_csv(index=False)
280
+ )
281
+
282
+ return {
283
+ "graph_pyg": pyg_graph,
284
+ "graph_nx": nx_graph,
285
+ "graph_text": textualized_graph,
286
+ }
287
+
288
+ def _run(
289
+ self,
290
+ tool_call_id: Annotated[str, InjectedToolCallId],
291
+ state: Annotated[dict, InjectedState],
292
+ prompt: str,
293
+ arg_data: ArgumentData = None,
294
+ ) -> Command:
295
+ """
296
+ Run the subgraph extraction tool.
297
+
298
+ Args:
299
+ tool_call_id: The tool call ID for the tool.
300
+ state: Injected state for the tool.
301
+ prompt: The prompt to interact with the backend.
302
+ arg_data (ArgumentData): The argument data.
303
+
304
+ Returns:
305
+ Command: The command to be executed.
306
+ """
307
+ logger.log(logging.INFO, "Invoking subgraph_extraction tool")
308
+
309
+ # Load hydra configuration
310
+ with hydra.initialize(version_base=None, config_path="../configs"):
311
+ cfg = hydra.compose(
312
+ config_name="config",
313
+ overrides=["tools/multimodal_subgraph_extraction=default"],
314
+ )
315
+ cfg = cfg.tools.multimodal_subgraph_extraction
316
+
317
+ # Retrieve source graph from the state
318
+ initial_graph = {}
319
+ initial_graph["source"] = state["dic_source_graph"][-1] # The last source graph as of now
320
+ # logger.log(logging.INFO, "Source graph: %s", source_graph)
321
+
322
+ # Load the knowledge graph using secure joblib
323
+ initial_graph["pyg"] = joblib.load(initial_graph["source"]["kg_pyg_path"])
324
+ initial_graph["text"] = joblib.load(initial_graph["source"]["kg_text_path"])
325
+
326
+ # Prepare the query embeddings and modalities
327
+ query_df = self._prepare_query_modalities(
328
+ [EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)],
329
+ state,
330
+ initial_graph["pyg"],
331
+ )
332
+
333
+ # Perform subgraph extraction
334
+ subgraphs = self._perform_subgraph_extraction(state, cfg, initial_graph["pyg"], query_df)
335
+
336
+ # Prepare subgraph as a NetworkX graph and textualized graph
337
+ final_subgraph = self._prepare_final_subgraph(state, subgraphs, initial_graph, cfg)
338
+
339
+ # Prepare the dictionary of extracted graph
340
+ dic_extracted_graph = {
341
+ "name": arg_data.extraction_name,
342
+ "tool_call_id": tool_call_id,
343
+ "graph_source": initial_graph["source"]["name"],
344
+ "topk_nodes": state["topk_nodes"],
345
+ "topk_edges": state["topk_edges"],
346
+ "graph_dict": {
347
+ "nodes": list(final_subgraph["graph_nx"].nodes(data=True)),
348
+ "edges": list(final_subgraph["graph_nx"].edges(data=True)),
349
+ },
350
+ "graph_text": final_subgraph["graph_text"],
351
+ "graph_summary": None,
352
+ }
353
+
354
+ # Prepare the dictionary of updated state
355
+ dic_updated_state_for_model = {}
356
+ for key, value in {
357
+ "dic_extracted_graph": [dic_extracted_graph],
358
+ }.items():
359
+ if value:
360
+ dic_updated_state_for_model[key] = value
361
+
362
+ # Return the updated state of the tool
363
+ return Command(
364
+ update=dic_updated_state_for_model
365
+ | {
366
+ # update the message history
367
+ "messages": [
368
+ ToolMessage(
369
+ content=f"Subgraph Extraction Result of {arg_data.extraction_name}",
370
+ tool_call_id=tool_call_id,
371
+ )
372
+ ],
373
+ }
374
+ )
@@ -0,0 +1,291 @@
1
+ """
2
+ Tool for performing subgraph extraction.
3
+ """
4
+
5
+ import logging
6
+ from typing import Annotated
7
+
8
+ import hydra
9
+ import joblib
10
+ import networkx as nx
11
+ import numpy as np
12
+ import pandas as pd
13
+ import torch
14
+ from langchain.chains.combine_documents import create_stuff_documents_chain
15
+ from langchain.chains.retrieval import create_retrieval_chain
16
+ from langchain_community.document_loaders import PyPDFLoader
17
+ from langchain_core.messages import ToolMessage
18
+ from langchain_core.prompts import ChatPromptTemplate
19
+ from langchain_core.tools import BaseTool
20
+ from langchain_core.tools.base import InjectedToolCallId
21
+ from langchain_core.vectorstores import InMemoryVectorStore
22
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
23
+ from langgraph.prebuilt import InjectedState
24
+ from langgraph.types import Command
25
+ from pydantic import BaseModel, Field
26
+ from torch_geometric.data import Data
27
+
28
+ from ..utils.embeddings.ollama import EmbeddingWithOllama
29
+ from ..utils.extractions.pcst import PCSTPruning
30
+ from .load_arguments import ArgumentData
31
+
32
+ # Initialize logger
33
+ logging.basicConfig(level=logging.INFO)
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class SubgraphExtractionInput(BaseModel):
38
+ """
39
+ SubgraphExtractionInput is a Pydantic model representing an input for extracting a subgraph.
40
+
41
+ Args:
42
+ prompt: Prompt to interact with the backend.
43
+ tool_call_id: Tool call ID.
44
+ state: Injected state.
45
+ arg_data: Argument for analytical process over graph data.
46
+ """
47
+
48
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
49
+ state: Annotated[dict, InjectedState] = Field(description="Injected state.")
50
+ prompt: str = Field(description="Prompt to interact with the backend.")
51
+ arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
52
+
53
+
54
+ class SubgraphExtractionTool(BaseTool):
55
+ """
56
+ This tool performs subgraph extraction based on user's prompt by taking into account
57
+ the top-k nodes and edges.
58
+ """
59
+
60
+ name: str = "subgraph_extraction"
61
+ description: str = "A tool for subgraph extraction based on user's prompt."
62
+ args_schema: type[BaseModel] = SubgraphExtractionInput
63
+
64
+ def perform_endotype_filtering(
65
+ self,
66
+ prompt: str,
67
+ state: Annotated[dict, InjectedState],
68
+ cfg: hydra.core.config_store.ConfigStore,
69
+ ) -> str:
70
+ """
71
+ Perform endotype filtering based on the uploaded files and prepare the prompt.
72
+
73
+ Args:
74
+ prompt: The prompt to interact with the backend.
75
+ state: Injected state for the tool.
76
+ cfg: Hydra configuration object.
77
+ """
78
+ # Loop through the uploaded files
79
+ all_genes = []
80
+ for uploaded_file in state["uploaded_files"]:
81
+ if uploaded_file["file_type"] == "endotype":
82
+ # Load the PDF file
83
+ docs = PyPDFLoader(file_path=uploaded_file["file_path"]).load()
84
+
85
+ # Split the text into chunks
86
+ splits = RecursiveCharacterTextSplitter(
87
+ chunk_size=cfg.splitter_chunk_size,
88
+ chunk_overlap=cfg.splitter_chunk_overlap,
89
+ ).split_documents(docs)
90
+
91
+ # Create a chat prompt template
92
+ prompt_template = ChatPromptTemplate.from_messages(
93
+ [
94
+ ("system", cfg.prompt_endotype_filtering),
95
+ ("human", "{input}"),
96
+ ]
97
+ )
98
+
99
+ qa_chain = create_stuff_documents_chain(state["llm_model"], prompt_template)
100
+ rag_chain = create_retrieval_chain(
101
+ InMemoryVectorStore.from_documents(
102
+ documents=splits, embedding=state["embedding_model"]
103
+ ).as_retriever(
104
+ search_type=cfg.retriever_search_type,
105
+ search_kwargs={
106
+ "k": cfg.retriever_k,
107
+ "fetch_k": cfg.retriever_fetch_k,
108
+ "lambda_mult": cfg.retriever_lambda_mult,
109
+ },
110
+ ),
111
+ qa_chain,
112
+ )
113
+ results = rag_chain.invoke({"input": prompt})
114
+ all_genes.append(results["answer"])
115
+
116
+ # Prepare the prompt
117
+ if len(all_genes) > 0:
118
+ prompt = " ".join([prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)])
119
+
120
+ return prompt
121
+
122
+ def prepare_final_subgraph(
123
+ self, subgraph: dict, pyg_graph: Data, textualized_graph: pd.DataFrame
124
+ ) -> dict:
125
+ """
126
+ Prepare the subgraph based on the extracted subgraph.
127
+
128
+ Args:
129
+ subgraph: The extracted subgraph.
130
+ pyg_graph: The PyTorch Geometric graph.
131
+ textualized_graph: The textualized graph.
132
+
133
+ Returns:
134
+ A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
135
+ """
136
+ # print(subgraph)
137
+ # Prepare the PyTorch Geometric graph
138
+ mapping = {n: i for i, n in enumerate(subgraph["nodes"].tolist())}
139
+ pyg_graph = Data(
140
+ # Node features
141
+ x=pyg_graph.x[subgraph["nodes"]],
142
+ node_id=np.array(pyg_graph.node_id)[subgraph["nodes"]].tolist(),
143
+ node_name=np.array(pyg_graph.node_id)[subgraph["nodes"]].tolist(),
144
+ enriched_node=np.array(pyg_graph.enriched_node)[subgraph["nodes"]].tolist(),
145
+ num_nodes=len(subgraph["nodes"]),
146
+ # Edge features
147
+ edge_index=torch.LongTensor(
148
+ [
149
+ [mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()],
150
+ [mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()],
151
+ ]
152
+ ),
153
+ edge_attr=pyg_graph.edge_attr[subgraph["edges"]],
154
+ edge_type=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
155
+ relation=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
156
+ label=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
157
+ enriched_edge=np.array(pyg_graph.enriched_edge)[subgraph["edges"]].tolist(),
158
+ )
159
+
160
+ # Networkx DiGraph construction to be visualized in the frontend
161
+ nx_graph = nx.DiGraph()
162
+ for n in pyg_graph.node_name:
163
+ nx_graph.add_node(n)
164
+ for i, e in enumerate(
165
+ [
166
+ [pyg_graph.node_name[i], pyg_graph.node_name[j]]
167
+ for (i, j) in pyg_graph.edge_index.transpose(1, 0)
168
+ ]
169
+ ):
170
+ nx_graph.add_edge(
171
+ e[0],
172
+ e[1],
173
+ relation=pyg_graph.edge_type[i],
174
+ label=pyg_graph.edge_type[i],
175
+ )
176
+
177
+ # Prepare the textualized subgraph
178
+ textualized_graph = (
179
+ textualized_graph["nodes"].iloc[subgraph["nodes"]].to_csv(index=False)
180
+ + "\n"
181
+ + textualized_graph["edges"].iloc[subgraph["edges"]].to_csv(index=False)
182
+ )
183
+
184
+ return {
185
+ "graph_pyg": pyg_graph,
186
+ "graph_nx": nx_graph,
187
+ "graph_text": textualized_graph,
188
+ }
189
+
190
+ def _run(
191
+ self,
192
+ tool_call_id: Annotated[str, InjectedToolCallId],
193
+ state: Annotated[dict, InjectedState],
194
+ prompt: str,
195
+ arg_data: ArgumentData = None,
196
+ ) -> Command:
197
+ """
198
+ Run the subgraph extraction tool.
199
+
200
+ Args:
201
+ tool_call_id: The tool call ID for the tool.
202
+ state: Injected state for the tool.
203
+ prompt: The prompt to interact with the backend.
204
+ arg_data (ArgumentData): The argument data.
205
+
206
+ Returns:
207
+ Command: The command to be executed.
208
+ """
209
+ logger.log(logging.INFO, "Invoking subgraph_extraction tool")
210
+
211
+ # Load hydra configuration
212
+ with hydra.initialize(version_base=None, config_path="../configs"):
213
+ cfg = hydra.compose(
214
+ config_name="config", overrides=["tools/subgraph_extraction=default"]
215
+ )
216
+ cfg = cfg.tools.subgraph_extraction
217
+
218
+ # Retrieve source graph from the state
219
+ initial_graph = {}
220
+ initial_graph["source"] = state["dic_source_graph"][-1] # The last source graph as of now
221
+ # logger.log(logging.INFO, "Source graph: %s", source_graph)
222
+
223
+ # Load the knowledge graph using secure joblib
224
+ initial_graph["pyg"] = joblib.load(initial_graph["source"]["kg_pyg_path"])
225
+ initial_graph["text"] = joblib.load(initial_graph["source"]["kg_text_path"])
226
+
227
+ # Prepare prompt construction along with a list of endotypes
228
+ if len(state["uploaded_files"]) != 0 and "endotype" in [
229
+ f["file_type"] for f in state["uploaded_files"]
230
+ ]:
231
+ prompt = self.perform_endotype_filtering(prompt, state, cfg)
232
+
233
+ # Prepare embedding model and embed the user prompt as query
234
+ query_emb = torch.tensor(
235
+ EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)
236
+ ).float()
237
+
238
+ # Prepare the PCSTPruning object and extract the subgraph
239
+ # Parameters were set in the configuration file obtained from Hydra
240
+ subgraph = PCSTPruning(
241
+ state["topk_nodes"],
242
+ state["topk_edges"],
243
+ cfg.cost_e,
244
+ cfg.c_const,
245
+ cfg.root,
246
+ cfg.num_clusters,
247
+ cfg.pruning,
248
+ cfg.verbosity_level,
249
+ ).extract_subgraph(initial_graph["pyg"], query_emb)
250
+
251
+ # Prepare subgraph as a NetworkX graph and textualized graph
252
+ final_subgraph = self.prepare_final_subgraph(
253
+ subgraph, initial_graph["pyg"], initial_graph["text"]
254
+ )
255
+
256
+ # Prepare the dictionary of extracted graph
257
+ dic_extracted_graph = {
258
+ "name": arg_data.extraction_name,
259
+ "tool_call_id": tool_call_id,
260
+ "graph_source": initial_graph["source"]["name"],
261
+ "topk_nodes": state["topk_nodes"],
262
+ "topk_edges": state["topk_edges"],
263
+ "graph_dict": {
264
+ "nodes": list(final_subgraph["graph_nx"].nodes(data=True)),
265
+ "edges": list(final_subgraph["graph_nx"].edges(data=True)),
266
+ },
267
+ "graph_text": final_subgraph["graph_text"],
268
+ "graph_summary": None,
269
+ }
270
+
271
+ # Prepare the dictionary of updated state
272
+ dic_updated_state_for_model = {}
273
+ for key, value in {
274
+ "dic_extracted_graph": [dic_extracted_graph],
275
+ }.items():
276
+ if value:
277
+ dic_updated_state_for_model[key] = value
278
+
279
+ # Return the updated state of the tool
280
+ return Command(
281
+ update=dic_updated_state_for_model
282
+ | {
283
+ # update the message history
284
+ "messages": [
285
+ ToolMessage(
286
+ content=f"Subgraph Extraction Result of {arg_data.extraction_name}",
287
+ tool_call_id=tool_call_id,
288
+ )
289
+ ],
290
+ }
291
+ )