aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,5 @@
1
+ """
2
+ This file is used to import all the models in the package.
3
+ """
4
+
5
+ from . import state_talk2knowledgegraphs
@@ -0,0 +1,40 @@
1
+ """
2
+ This is the state file for the Talk2KnowledgeGraphs agent.
3
+ """
4
+
5
+ from typing import Annotated
6
+
7
+ # import operator
8
+ from langchain_core.embeddings.embeddings import Embeddings
9
+ from langchain_core.language_models.chat_models import BaseChatModel
10
+ from langgraph.prebuilt.chat_agent_executor import AgentState
11
+
12
+
13
+ def add_data(data1: dict, data2: dict) -> dict:
14
+ """
15
+ A reducer function to merge two dictionaries.
16
+ """
17
+ left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
18
+ merged = data1.copy()
19
+ for data in data2:
20
+ idx = left_idx_by_name.get(data["name"])
21
+ if idx is not None:
22
+ merged[idx] = data
23
+ else:
24
+ merged.append(data)
25
+ return merged
26
+
27
+
28
+ class Talk2KnowledgeGraphs(AgentState):
29
+ """
30
+ The state for the Talk2KnowledgeGraphs agent.
31
+ """
32
+
33
+ llm_model: BaseChatModel
34
+ embedding_model: Embeddings
35
+ selections: dict
36
+ uploaded_files: list
37
+ topk_nodes: int
38
+ topk_edges: int
39
+ dic_source_graph: Annotated[list[dict], add_data]
40
+ dic_extracted_graph: Annotated[list[dict], add_data]
File without changes
@@ -0,0 +1,318 @@
1
+ """
2
+ Test cases for agents/t2kg_agent.py
3
+ """
4
+
5
+ from contextlib import ExitStack
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import pandas as pd
9
+ import pytest
10
+ from langchain_core.messages import HumanMessage, ToolMessage
11
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
12
+ from langgraph.types import Command
13
+
14
+ from ..agents.t2kg_agent import get_app
15
+ from ..tools.milvus_multimodal_subgraph_extraction import (
16
+ MultimodalSubgraphExtractionTool,
17
+ )
18
+
19
+ DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
20
+
21
+
22
+ @pytest.fixture(name="input_dict")
23
+ def input_dict_fixture():
24
+ """
25
+ Input dictionary fixture.
26
+ """
27
+ input_dict = {
28
+ "llm_model": None,
29
+ "embedding_model": None,
30
+ "selections": {
31
+ "gene/protein": [],
32
+ "molecular_function": [],
33
+ "cellular_component": [],
34
+ "biological_process": [],
35
+ "drug": [],
36
+ "disease": [],
37
+ },
38
+ "uploaded_files": [
39
+ {
40
+ "file_name": "adalimumab.pdf",
41
+ "file_path": f"{DATA_PATH}/adalimumab.pdf",
42
+ "file_type": "drug_data",
43
+ "uploaded_by": "VPEUser",
44
+ "uploaded_timestamp": "2024-11-05 00:00:00",
45
+ },
46
+ ],
47
+ "topk_nodes": 3,
48
+ "topk_edges": 3,
49
+ "dic_source_graph": [
50
+ {
51
+ "name": "BioBridge",
52
+ "kg_pyg_path": f"{DATA_PATH}/biobridge_multimodal_pyg_graph.pkl",
53
+ "kg_text_path": f"{DATA_PATH}/biobridge_multimodal_text_graph.pkl",
54
+ }
55
+ ],
56
+ "dic_extracted_graph": [],
57
+ }
58
+ return input_dict
59
+
60
+
61
+ def mock_milvus_collection(name):
62
+ """
63
+ Mock Milvus collection for testing.
64
+ """
65
+ # name is intentionally unused in this simplified mock
66
+ del name
67
+ nodes = MagicMock()
68
+ nodes.query.return_value = [
69
+ {
70
+ "node_index": 0,
71
+ "node_id": "id1",
72
+ "node_name": "Adalimumab",
73
+ "node_type": "drug",
74
+ "feat": "featA",
75
+ "feat_emb": [0.1, 0.2, 0.3],
76
+ "desc": "descA",
77
+ "desc_emb": [0.1, 0.2, 0.3],
78
+ },
79
+ {
80
+ "node_index": 1,
81
+ "node_id": "id2",
82
+ "node_name": "TNF",
83
+ "node_type": "gene/protein",
84
+ "feat": "featB",
85
+ "feat_emb": [0.4, 0.5, 0.6],
86
+ "desc": "descB",
87
+ "desc_emb": [0.4, 0.5, 0.6],
88
+ },
89
+ ]
90
+ nodes.load.return_value = None
91
+
92
+ edges = MagicMock()
93
+ edges.query.return_value = [
94
+ {
95
+ "triplet_index": 0,
96
+ "head_id": "id1",
97
+ "head_index": 0,
98
+ "tail_id": "id2",
99
+ "tail_index": 1,
100
+ "edge_type": "drug,acts_on,gene/protein",
101
+ "display_relation": "acts_on",
102
+ "feat": "featC",
103
+ "feat_emb": [0.7, 0.8, 0.9],
104
+ }
105
+ ]
106
+ edges.load.return_value = None
107
+
108
+ # Default path in tests expects None for unknown collections (implicit)
109
+
110
+
111
+ def _invoke_app_with_mocks(unique_id, input_dict):
112
+ """Run the app with patched Milvus + tool stack and return (app, config, response)."""
113
+ app = get_app(unique_id, llm_model=input_dict["llm_model"])
114
+ config = {"configurable": {"thread_id": unique_id}}
115
+ app.update_state(config, input_dict)
116
+ prompt = (
117
+ "Adalimumab is a fully human monoclonal antibody (IgG1) that "
118
+ "specifically binds to tumor necrosis factor-alpha (TNF-α), a "
119
+ "pro-inflammatory cytokine.\n\n"
120
+ "I would like to get evidence from the knowledge graph about the "
121
+ "mechanism of actions related to Adalimumab in treating inflammatory "
122
+ "bowel disease (IBD). Please follow these steps:\n"
123
+ "- Extract a subgraph from the PrimeKG that contains information about "
124
+ "Adalimumab.\n- Summarize the extracted subgraph.\n"
125
+ "- Reason about the mechanism of action of Adalimumab in treating IBD.\n\n"
126
+ "Please set the extraction name for the extraction process as `subkg_"
127
+ f"{unique_id}`."
128
+ )
129
+
130
+ mocks = {
131
+ "pcst": MagicMock(),
132
+ "connections": MagicMock(),
133
+ "compose": MagicMock(),
134
+ "connections_manager": MagicMock(),
135
+ "db": MagicMock(),
136
+ "conn_mgr": MagicMock(),
137
+ }
138
+
139
+ with ExitStack() as stack:
140
+ stack.enter_context(
141
+ patch(
142
+ "aiagents4pharma.talk2knowledgegraphs.tools."
143
+ "milvus_multimodal_subgraph_extraction.Collection",
144
+ side_effect=mock_milvus_collection,
145
+ )
146
+ )
147
+ stack.enter_context(
148
+ patch(
149
+ "aiagents4pharma.talk2knowledgegraphs.tools."
150
+ "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning",
151
+ mocks["pcst"],
152
+ )
153
+ )
154
+ stack.enter_context(patch("pymilvus.connections", mocks["connections"]))
155
+ stack.enter_context(
156
+ patch(
157
+ "aiagents4pharma.talk2knowledgegraphs.tools."
158
+ "milvus_multimodal_subgraph_extraction.hydra.initialize"
159
+ )
160
+ )
161
+ stack.enter_context(
162
+ patch(
163
+ "aiagents4pharma.talk2knowledgegraphs.tools."
164
+ "milvus_multimodal_subgraph_extraction.hydra.compose",
165
+ mocks["compose"],
166
+ )
167
+ )
168
+ stack.enter_context(
169
+ patch(
170
+ "aiagents4pharma.talk2knowledgegraphs.utils.database."
171
+ "milvus_connection_manager.connections",
172
+ mocks["connections_manager"],
173
+ )
174
+ )
175
+ stack.enter_context(
176
+ patch(
177
+ "aiagents4pharma.talk2knowledgegraphs.utils.database."
178
+ "milvus_connection_manager.Collection",
179
+ side_effect=mock_milvus_collection,
180
+ )
181
+ )
182
+ stack.enter_context(
183
+ patch(
184
+ "aiagents4pharma.talk2knowledgegraphs.utils.database.milvus_connection_manager.db",
185
+ mocks["db"],
186
+ )
187
+ )
188
+ stack.enter_context(
189
+ patch(
190
+ "aiagents4pharma.talk2knowledgegraphs.tools."
191
+ "milvus_multimodal_subgraph_extraction.MilvusConnectionManager",
192
+ mocks["conn_mgr"],
193
+ )
194
+ )
195
+
196
+ def mock_tool_execution(tool_call_id, state, prompt, arg_data=None):
197
+ del prompt, arg_data
198
+ mock_extracted_graph = {
199
+ "name": f"subkg_{unique_id}",
200
+ "tool_call_id": tool_call_id,
201
+ "graph_source": "BioBridge",
202
+ "topk_nodes": 3,
203
+ "topk_edges": 3,
204
+ "graph_dict": {
205
+ "name": "extracted_subgraph",
206
+ "nodes": ["Adalimumab", "TNF"],
207
+ "edges": [("Adalimumab", "acts_on", "TNF")],
208
+ },
209
+ "graph_text": (
210
+ "Adalimumab acts on TNF protein for treating inflammatory diseases."
211
+ ),
212
+ "graph_summary": None,
213
+ }
214
+ tool_message = ToolMessage(
215
+ content=(
216
+ "Subgraph extraction completed successfully. "
217
+ "Extracted subgraph containing Adalimumab and TNF interactions."
218
+ ),
219
+ tool_call_id=tool_call_id,
220
+ name="subgraph_extraction",
221
+ )
222
+ return Command(
223
+ update={
224
+ "messages": [tool_message],
225
+ "dic_extracted_graph": state.get("dic_extracted_graph", [])
226
+ + [mock_extracted_graph],
227
+ }
228
+ )
229
+
230
+ stack.enter_context(
231
+ patch.object(MultimodalSubgraphExtractionTool, "_run", side_effect=mock_tool_execution)
232
+ )
233
+
234
+ # set return values via the mocks dict
235
+ mocks["connections"].has_connection.return_value = True
236
+ mocks["connections_manager"].has_connection.return_value = True
237
+ mocks["db"].using_database.return_value = None
238
+
239
+ pcst_instance = MagicMock()
240
+ pcst_instance.extract_subgraph.return_value = {
241
+ "nodes": pd.Series([0, 1]),
242
+ "edges": pd.Series([0]),
243
+ }
244
+ mocks["pcst"].return_value = pcst_instance
245
+
246
+ cfg = MagicMock()
247
+ for k, v in {
248
+ "cost_e": 1.0,
249
+ "c_const": 1.0,
250
+ "root": 0,
251
+ "num_clusters": 1,
252
+ "pruning": True,
253
+ "verbosity_level": 0,
254
+ "search_metric_type": "L2",
255
+ }.items():
256
+ setattr(cfg, k, v)
257
+ cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
258
+
259
+ mocks["compose"].return_value = MagicMock()
260
+ mocks["compose"].return_value.tools.multimodal_subgraph_extraction = cfg
261
+ mocks[
262
+ "compose"
263
+ ].return_value.tools.subgraph_summarization.prompt_subgraph_summarization = (
264
+ "Summarize the following subgraph: {textualized_subgraph}"
265
+ )
266
+
267
+ db_cfg = MagicMock()
268
+ for k, v in {
269
+ "alias": "test_alias",
270
+ "host": "localhost",
271
+ "port": "19530",
272
+ "user": "root",
273
+ "password": "password",
274
+ "database_name": "test_db",
275
+ }.items():
276
+ setattr(db_cfg.milvus_db, k, v)
277
+ mocks["compose"].return_value.utils.database.milvus = db_cfg.milvus_db
278
+
279
+ conn = MagicMock()
280
+ conn.ensure_connection.return_value = True
281
+ conn.get_connection_info.return_value = {"database": "test_db", "connected": True}
282
+ conn.test_connection.return_value = True
283
+ mocks["conn_mgr"].return_value = conn
284
+
285
+ response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
286
+
287
+ return app, config, response
288
+
289
+
290
+ def test_t2kg_agent_openai_milvus_mock(input_dict):
291
+ """
292
+ Test the T2KG agent using OpenAI model and Milvus mock.
293
+
294
+ Args:
295
+ input_dict: Input dictionary
296
+ """
297
+ input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
298
+ input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
299
+ unique_id = 12345
300
+ app, config, response = _invoke_app_with_mocks(unique_id, input_dict)
301
+
302
+ assert isinstance(response["messages"][-1].content, str)
303
+ dic_extracted_graph = app.get_state(config).values["dic_extracted_graph"][0]
304
+ assert isinstance(dic_extracted_graph, dict)
305
+ assert dic_extracted_graph["name"] == "subkg_12345"
306
+ assert dic_extracted_graph["graph_source"] == "BioBridge"
307
+ assert dic_extracted_graph["topk_nodes"] == 3
308
+ assert dic_extracted_graph["topk_edges"] == 3
309
+ assert isinstance(dic_extracted_graph["graph_dict"], dict)
310
+ assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
311
+ assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
312
+ assert isinstance(dic_extracted_graph["graph_text"], str)
313
+ assert isinstance(dic_extracted_graph["graph_summary"], str)
314
+ assert "Adalimumab" in response["messages"][-1].content
315
+ assert "TNF" in response["messages"][-1].content
316
+
317
+ # Another test for unknown collection
318
+ assert mock_milvus_collection("unknown") is None
@@ -0,0 +1,248 @@
1
+ """
2
+ Test cases for datasets/primekg_loader.py
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+
8
+ import pytest
9
+
10
+ from ..datasets.biobridge_primekg import BioBridgePrimeKG
11
+
12
+ # Remove the data folder for testing if it exists
13
+ PRIMEKG_LOCAL_DIR = "../data/primekg_test/"
14
+ LOCAL_DIR = "../data/biobridge_primekg_test/"
15
+ shutil.rmtree(LOCAL_DIR, ignore_errors=True)
16
+
17
+
18
+ @pytest.fixture(name="biobridge_primekg")
19
+ def biobridge_primekg_fixture():
20
+ """
21
+ Fixture for creating an instance of PrimeKG.
22
+ """
23
+ return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR, local_dir=LOCAL_DIR)
24
+
25
+
26
+ def test_download_primekg(biobridge_primekg):
27
+ """
28
+ Test the loading method of the BioBridge-PrimeKG class by downloading data from repository.
29
+ """
30
+ # Load BioBridge-PrimeKG data
31
+ biobridge_primekg.load_data()
32
+ primekg_nodes = biobridge_primekg.get_primekg().get_nodes()
33
+ primekg_edges = biobridge_primekg.get_primekg().get_edges()
34
+ biobridge_data_config = biobridge_primekg.get_data_config()
35
+ biobridge_emb_dict = biobridge_primekg.get_node_embeddings()
36
+ biobridge_triplets = biobridge_primekg.get_primekg_triplets()
37
+ biobridge_splits = biobridge_primekg.get_train_test_split()
38
+ biobridge_node_info = biobridge_primekg.get_node_info_dict()
39
+
40
+ # Check if the local directories exists
41
+ assert os.path.exists(biobridge_primekg.primekg_dir)
42
+ assert os.path.exists(biobridge_primekg.local_dir)
43
+ # Check if downloaded and processed files exist
44
+ # PrimeKG files
45
+ files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
46
+ for file in files:
47
+ path = f"{biobridge_primekg.primekg_dir}/{file}"
48
+ assert os.path.exists(path)
49
+ # BioBridge data config
50
+ assert os.path.exists(f"{biobridge_primekg.local_dir}/data_config.json")
51
+ # BioBridge embeddings
52
+ files = [
53
+ "protein.pkl",
54
+ "mf.pkl",
55
+ "cc.pkl",
56
+ "bp.pkl",
57
+ "drug.pkl",
58
+ "disease.pkl",
59
+ "embedding_dict.pkl",
60
+ ]
61
+ for file in files:
62
+ path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
63
+ assert os.path.exists(path)
64
+ # BioBridge processed files
65
+ files = [
66
+ "protein.csv",
67
+ "mf.csv",
68
+ "cc.csv",
69
+ "bp.csv",
70
+ "drug.csv",
71
+ "disease.csv",
72
+ "triplet_full.tsv.gz",
73
+ "triplet_full_altered.tsv.gz",
74
+ "node_train.tsv.gz",
75
+ "triplet_train.tsv.gz",
76
+ "node_test.tsv.gz",
77
+ "triplet_test.tsv.gz",
78
+ ]
79
+ for file in files:
80
+ path = f"{biobridge_primekg.local_dir}/processed/{file}"
81
+ assert os.path.exists(path)
82
+ # Check processed PrimeKG dataframes
83
+ # Nodes
84
+ assert primekg_nodes is not None
85
+ assert len(primekg_nodes) > 0
86
+ assert primekg_nodes.shape[0] == 129375
87
+ # Edges
88
+ assert primekg_edges is not None
89
+ assert len(primekg_edges) > 0
90
+ assert primekg_edges.shape[0] == 8100498
91
+ # Check processed BioBridge data config
92
+ assert biobridge_data_config is not None
93
+ assert len(biobridge_data_config) > 0
94
+ assert len(biobridge_data_config["node_type"]) == 10
95
+ assert len(biobridge_data_config["relation_type"]) == 18
96
+ assert len(biobridge_data_config["emb_dim"]) == 6
97
+ # Check processed BioBridge embeddings
98
+ assert biobridge_emb_dict is not None
99
+ assert len(biobridge_emb_dict) > 0
100
+ assert len(biobridge_emb_dict) == 85466
101
+ # Check processed BioBridge triplets
102
+ assert biobridge_triplets is not None
103
+ assert len(biobridge_triplets) > 0
104
+ assert biobridge_triplets.shape[0] == 3904610
105
+ assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
106
+ assert len(biobridge_splits["train"]) == 3510930
107
+ assert len(biobridge_splits["node_train"]) == 76486
108
+ assert len(biobridge_splits["test"]) == 393680
109
+ assert len(biobridge_splits["node_test"]) == 8495
110
+ # Check node info dictionary
111
+ assert list(biobridge_node_info.keys()) == [
112
+ "gene/protein",
113
+ "molecular_function",
114
+ "cellular_component",
115
+ "biological_process",
116
+ "drug",
117
+ "disease",
118
+ ]
119
+ assert len(biobridge_node_info["gene/protein"]) == 19162
120
+ assert len(biobridge_node_info["molecular_function"]) == 10966
121
+ assert len(biobridge_node_info["cellular_component"]) == 4013
122
+ assert len(biobridge_node_info["biological_process"]) == 27478
123
+ assert len(biobridge_node_info["drug"]) == 6948
124
+ assert len(biobridge_node_info["disease"]) == 44133
125
+
126
+
127
+ def test_load_existing_primekg(biobridge_primekg):
128
+ """
129
+ Test the loading method of the BioBridge-PrimeKG class by loading existing data in local.
130
+ """
131
+ # Load BioBridge-PrimeKG data
132
+ biobridge_primekg.load_data()
133
+ primekg_nodes = biobridge_primekg.get_primekg().get_nodes()
134
+ primekg_edges = biobridge_primekg.get_primekg().get_edges()
135
+ biobridge_data_config = biobridge_primekg.get_data_config()
136
+ biobridge_emb_dict = biobridge_primekg.get_node_embeddings()
137
+ biobridge_triplets = biobridge_primekg.get_primekg_triplets()
138
+ biobridge_splits = biobridge_primekg.get_train_test_split()
139
+ biobridge_node_info = biobridge_primekg.get_node_info_dict()
140
+
141
+ # Check if the local directories exists
142
+ assert os.path.exists(biobridge_primekg.primekg_dir)
143
+ assert os.path.exists(biobridge_primekg.local_dir)
144
+ # Check if downloaded and processed files exist
145
+ # PrimeKG files
146
+ files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
147
+ for file in files:
148
+ path = f"{biobridge_primekg.primekg_dir}/{file}"
149
+ assert os.path.exists(path)
150
+ # BioBridge data config
151
+ assert os.path.exists(f"{biobridge_primekg.local_dir}/data_config.json")
152
+ # BioBridge embeddings
153
+ files = [
154
+ "protein.pkl",
155
+ "mf.pkl",
156
+ "cc.pkl",
157
+ "bp.pkl",
158
+ "drug.pkl",
159
+ "disease.pkl",
160
+ "embedding_dict.pkl",
161
+ ]
162
+ for file in files:
163
+ path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
164
+ assert os.path.exists(path)
165
+ # BioBridge processed files
166
+ files = [
167
+ "protein.csv",
168
+ "mf.csv",
169
+ "cc.csv",
170
+ "bp.csv",
171
+ "drug.csv",
172
+ "disease.csv",
173
+ "triplet_full.tsv.gz",
174
+ "triplet_full_altered.tsv.gz",
175
+ "node_train.tsv.gz",
176
+ "triplet_train.tsv.gz",
177
+ "node_test.tsv.gz",
178
+ "triplet_test.tsv.gz",
179
+ ]
180
+ for file in files:
181
+ path = f"{biobridge_primekg.local_dir}/processed/{file}"
182
+ assert os.path.exists(path)
183
+ # Check processed PrimeKG dataframes
184
+ # Nodes
185
+ assert primekg_nodes is not None
186
+ assert len(primekg_nodes) > 0
187
+ assert primekg_nodes.shape[0] == 129375
188
+ # Edges
189
+ assert primekg_edges is not None
190
+ assert len(primekg_edges) > 0
191
+ assert primekg_edges.shape[0] == 8100498
192
+ # Check processed BioBridge data config
193
+ assert biobridge_data_config is not None
194
+ assert len(biobridge_data_config) > 0
195
+ assert len(biobridge_data_config["node_type"]) == 10
196
+ assert len(biobridge_data_config["relation_type"]) == 18
197
+ assert len(biobridge_data_config["emb_dim"]) == 6
198
+ # Check processed BioBridge embeddings
199
+ assert biobridge_emb_dict is not None
200
+ assert len(biobridge_emb_dict) > 0
201
+ assert len(biobridge_emb_dict) == 85466
202
+ # Check processed BioBridge triplets
203
+ assert biobridge_triplets is not None
204
+ assert len(biobridge_triplets) > 0
205
+ assert biobridge_triplets.shape[0] == 3904610
206
+ assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
207
+ assert len(biobridge_splits["train"]) == 3510930
208
+ assert len(biobridge_splits["node_train"]) == 76486
209
+ assert len(biobridge_splits["test"]) == 393680
210
+ assert len(biobridge_splits["node_test"]) == 8495
211
+ # Check node info dictionary
212
+ assert list(biobridge_node_info.keys()) == [
213
+ "gene/protein",
214
+ "molecular_function",
215
+ "cellular_component",
216
+ "biological_process",
217
+ "drug",
218
+ "disease",
219
+ ]
220
+ assert len(biobridge_node_info["gene/protein"]) == 19162
221
+ assert len(biobridge_node_info["molecular_function"]) == 10966
222
+ assert len(biobridge_node_info["cellular_component"]) == 4013
223
+ assert len(biobridge_node_info["biological_process"]) == 27478
224
+ assert len(biobridge_node_info["drug"]) == 6948
225
+ assert len(biobridge_node_info["disease"]) == 44133
226
+
227
+
228
+ # def test_load_existing_primekg_with_negative_triplets(biobridge_primekg):
229
+ # """
230
+ # Test the loading method of the BioBridge-PrimeKG class by loading existing data in local.
231
+ # In addition, it builds negative triplets for training data.
232
+ # """
233
+ # # Load BioBridge-PrimeKG data
234
+ # # Using 1 negative sample per positive triplet
235
+ # biobridge_primekg.load_data(build_neg_triplest=True, n_neg_samples=1)
236
+ # biobridge_neg_triplets = biobridge_primekg.get_primekg_triplets_negative()
237
+
238
+ # # Check if the local directories exists
239
+ # assert os.path.exists(biobridge_primekg.primekg_dir)
240
+ # assert os.path.exists(biobridge_primekg.local_dir)
241
+ # # Check if downloaded and processed files exist
242
+ # path = f"{biobridge_primekg.local_dir}/processed/triplet_train_negative.tsv.gz"
243
+ # assert os.path.exists(path)
244
+ # # Check processed BioBridge triplets
245
+ # assert biobridge_neg_triplets is not None
246
+ # assert len(biobridge_neg_triplets) > 0
247
+ # assert biobridge_neg_triplets.shape[0] == 3510930
248
+ # assert len(biobridge_neg_triplets.negative_tail_index[0]) == 1
@@ -0,0 +1,33 @@
1
+ """
2
+ Test cases for datasets/dataset.py
3
+ """
4
+
5
+ from ..datasets.dataset import Dataset
6
+
7
+
8
+ class MockDataset(Dataset):
9
+ """
10
+ Mock dataset class for testing purposes.
11
+ """
12
+
13
+ def setup(self):
14
+ pass
15
+
16
+ def load_data(self):
17
+ pass
18
+
19
+
20
+ def test_dataset_setup():
21
+ """
22
+ Test the setup method of the Dataset class.
23
+ """
24
+ dataset = MockDataset()
25
+ assert dataset.setup() is None
26
+
27
+
28
+ def test_dataset_load_data():
29
+ """
30
+ Test the load_data method of the Dataset class.
31
+ """
32
+ dataset = MockDataset()
33
+ assert dataset.load_data() is None