aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,212 @@
1
+ """
2
+ Class for loading PrimeKG dataset.
3
+ """
4
+
5
+ import os
6
+
7
+ import pandas as pd
8
+ import requests
9
+ from tqdm import tqdm
10
+
11
+ from .dataset import Dataset
12
+
13
+
14
+ class PrimeKG(Dataset):
15
+ """
16
+ Class for loading PrimeKG dataset.
17
+ It downloads the data from the Harvard Dataverse and stores it in the local directory.
18
+ The data is then loaded into pandas DataFrame of nodes and edges.
19
+ """
20
+
21
+ def __init__(self, local_dir: str = "../../../data/primekg/"):
22
+ """
23
+ Constructor for PrimeKG class.
24
+
25
+ Args:
26
+ local_dir (str): The local directory where the data will be stored.
27
+ """
28
+ self.name: str = "primekg"
29
+ self.server_path: str = "https://dataverse.harvard.edu/api/access/datafile/"
30
+ self.file_ids: dict = {"nodes": 6180617, "edges": 6180616}
31
+ self.local_dir: str = local_dir
32
+
33
+ # Attributes to store the data
34
+ self.nodes: pd.DataFrame = None
35
+ self.edges: pd.DataFrame = None
36
+
37
+ # Set up the dataset
38
+ self.setup()
39
+
40
+ def setup(self):
41
+ """
42
+ A method to set up the dataset.
43
+ """
44
+ # Make the directory if it doesn't exist
45
+ os.makedirs(os.path.dirname(self.local_dir), exist_ok=True)
46
+
47
+ def _download_file(self, remote_url: str, local_path: str):
48
+ """
49
+ A helper function to download a file from remote URL to the local directory.
50
+
51
+ Args:
52
+ remote_url (str): The remote URL of the file to be downloaded.
53
+ local_path (str): The local path where the file will be saved.
54
+ """
55
+ response = requests.get(remote_url, stream=True, timeout=300)
56
+ response.raise_for_status()
57
+ progress_bar = tqdm(
58
+ total=int(response.headers.get("content-length", 0)),
59
+ unit="iB",
60
+ unit_scale=True,
61
+ )
62
+ with open(local_path, "wb") as file:
63
+ for data in response.iter_content(1024):
64
+ progress_bar.update(len(data))
65
+ file.write(data)
66
+ progress_bar.close()
67
+
68
+ def _load_nodes(self) -> pd.DataFrame:
69
+ """
70
+ Private method to load the nodes dataframe of PrimeKG dataset.
71
+ This method downloads the nodes file from the Harvard Dataverse if it does not exist
72
+ in the local directory. Otherwise, it loads the data from the local directory.
73
+ It further processes the dataframe of nodes and returns it.
74
+
75
+ Returns:
76
+ The nodes dataframe of PrimeKG dataset.
77
+ """
78
+ local_file = os.path.join(self.local_dir, f"{self.name}_nodes.tsv.gz")
79
+ if os.path.exists(local_file):
80
+ print(f"{local_file} already exists. Loading the data from the local directory.")
81
+
82
+ # Load the dataframe from the local directory and assign it to the nodes attribute
83
+ nodes = pd.read_csv(local_file, sep="\t", compression="gzip", low_memory=False)
84
+ else:
85
+ print(f"Downloading node file from {self.server_path}{self.file_ids['nodes']}")
86
+
87
+ # Download the file from the Harvard Dataverse with designated file_id for node
88
+ self._download_file(
89
+ f"{self.server_path}{self.file_ids['nodes']}",
90
+ os.path.join(self.local_dir, "nodes.tab"),
91
+ )
92
+
93
+ # Load the downloaded file into a pandas DataFrame
94
+ nodes = pd.read_csv(
95
+ os.path.join(self.local_dir, "nodes.tab"), sep="\t", low_memory=False
96
+ )
97
+
98
+ # Further processing of the dataframe
99
+ nodes = nodes[["node_index", "node_name", "node_source", "node_id", "node_type"]]
100
+
101
+ # Store compressed dataframe in the local directory
102
+ nodes.to_csv(local_file, index=False, sep="\t", compression="gzip")
103
+
104
+ return nodes
105
+
106
+ def _load_edges(self, nodes: pd.DataFrame) -> pd.DataFrame:
107
+ """
108
+ Private method to load the edges dataframe of PrimeKG dataset.
109
+ This method downloads the edges file from the Harvard Dataverse if it does not exist
110
+ in the local directory. Otherwise, it loads the data from the local directory.
111
+ It further processes the dataframe of edges and returns it.
112
+
113
+ Args:
114
+ nodes (pd.DataFrame): The nodes dataframe of PrimeKG dataset.
115
+
116
+ Returns:
117
+ The edges dataframe of PrimeKG dataset.
118
+ """
119
+ local_file = os.path.join(self.local_dir, f"{self.name}_edges.tsv.gz")
120
+ if os.path.exists(local_file):
121
+ print(f"{local_file} already exists. Loading the data from the local directory.")
122
+
123
+ # Load the dataframe from the local directory and assign it to the edges attribute
124
+ edges = pd.read_csv(local_file, sep="\t", compression="gzip", low_memory=False)
125
+ else:
126
+ print(f"Downloading edge file from {self.server_path}{self.file_ids['edges']}")
127
+
128
+ # Download the file from the Harvard Dataverse with designated file_id for edge
129
+ self._download_file(
130
+ f"{self.server_path}{self.file_ids['edges']}",
131
+ os.path.join(self.local_dir, "edges.csv"),
132
+ )
133
+
134
+ # Load the downloaded file into a pandas DataFrame
135
+ edges = pd.read_csv(
136
+ os.path.join(self.local_dir, "edges.csv"), sep=",", low_memory=False
137
+ )
138
+
139
+ # Further processing of the dataframe
140
+ edges = edges.merge(nodes, left_on="x_index", right_on="node_index")
141
+ edges.drop(["x_index"], axis=1, inplace=True)
142
+ edges.rename(
143
+ columns={
144
+ "node_index": "head_index",
145
+ "node_name": "head_name",
146
+ "node_source": "head_source",
147
+ "node_id": "head_id",
148
+ "node_type": "head_type",
149
+ },
150
+ inplace=True,
151
+ )
152
+ edges = edges.merge(nodes, left_on="y_index", right_on="node_index")
153
+ edges.drop(["y_index"], axis=1, inplace=True)
154
+ edges.rename(
155
+ columns={
156
+ "node_index": "tail_index",
157
+ "node_name": "tail_name",
158
+ "node_source": "tail_source",
159
+ "node_id": "tail_id",
160
+ "node_type": "tail_type",
161
+ },
162
+ inplace=True,
163
+ )
164
+ edges = edges[
165
+ [
166
+ "head_index",
167
+ "head_name",
168
+ "head_source",
169
+ "head_id",
170
+ "head_type",
171
+ "tail_index",
172
+ "tail_name",
173
+ "tail_source",
174
+ "tail_id",
175
+ "tail_type",
176
+ "display_relation",
177
+ "relation",
178
+ ]
179
+ ]
180
+
181
+ # Store compressed dataframe in the local directory
182
+ edges.to_csv(local_file, index=False, sep="\t", compression="gzip")
183
+
184
+ return edges
185
+
186
+ def load_data(self):
187
+ """
188
+ Load the PrimeKG dataset into pandas DataFrame of nodes and edges.
189
+ """
190
+ print("Loading nodes of PrimeKG dataset ...")
191
+ self.nodes = self._load_nodes()
192
+
193
+ print("Loading edges of PrimeKG dataset ...")
194
+ self.edges = self._load_edges(self.nodes)
195
+
196
+ def get_nodes(self) -> pd.DataFrame:
197
+ """
198
+ Get the nodes dataframe of PrimeKG dataset.
199
+
200
+ Returns:
201
+ The nodes dataframe of PrimeKG dataset.
202
+ """
203
+ return self.nodes
204
+
205
+ def get_edges(self) -> pd.DataFrame:
206
+ """
207
+ Get the edges dataframe of PrimeKG dataset.
208
+
209
+ Returns:
210
+ The edges dataframe of PrimeKG dataset.
211
+ """
212
+ return self.edges
@@ -0,0 +1,210 @@
1
+ """
2
+ Class for loading StarkQAPrimeKG dataset.
3
+ """
4
+
5
+ import os
6
+ import shutil
7
+
8
+ import gdown
9
+ import joblib
10
+ import numpy as np
11
+ import pandas as pd
12
+ import torch
13
+ from huggingface_hub import hf_hub_download, list_repo_files
14
+ from tqdm import tqdm
15
+
16
+ from .dataset import Dataset
17
+
18
+
19
+ class StarkQAPrimeKG(Dataset):
20
+ """
21
+ Class for loading StarkQAPrimeKG dataset.
22
+ It downloads the data from the HuggingFace repo and stores it in the local directory.
23
+ The data is then loaded into pandas DataFrame of QA pairs, dictionary of split indices,
24
+ and node information.
25
+ """
26
+
27
+ def __init__(self, local_dir: str = "../../../data/starkqa_primekg/"):
28
+ """
29
+ Constructor for StarkQAPrimeKG class.
30
+
31
+ Args:
32
+ local_dir (str): The local directory to store the dataset files.
33
+ """
34
+ self.name: str = "starkqa_primekg"
35
+ self.hf_repo_id: str = "snap-stanford/stark"
36
+ self.local_dir: str = local_dir
37
+ # Attributes to store the data
38
+ self.starkqa: pd.DataFrame = None
39
+ self.starkqa_split_idx: dict = None
40
+ self.starkqa_node_info: dict = None
41
+ self.query_emb_dict: dict = None
42
+ self.node_emb_dict: dict = None
43
+
44
+ # Set up the dataset
45
+ self.setup()
46
+
47
+ def setup(self):
48
+ """
49
+ A method to set up the dataset.
50
+ """
51
+ # Make the directory if it doesn't exist
52
+ os.makedirs(os.path.dirname(self.local_dir), exist_ok=True)
53
+
54
+ def _load_stark_repo(self) -> tuple[pd.DataFrame, dict, dict]:
55
+ """
56
+ Private method to load related files of StarkQAPrimeKG dataset.
57
+
58
+ Returns:
59
+ The nodes dataframe of StarkQAPrimeKG dataset.
60
+ The split indices of StarkQAPrimeKG dataset.
61
+ The node information of StarkQAPrimeKG dataset.
62
+ """
63
+ # Download the file if it does not exist in the local directory
64
+ # Otherwise, load the data from the local directory
65
+ local_file = os.path.join(self.local_dir, "qa/prime/stark_qa/stark_qa.csv")
66
+ if os.path.exists(local_file):
67
+ print(f"{local_file} already exists. Loading the data from the local directory.")
68
+ else:
69
+ print(f"Downloading files from {self.hf_repo_id}")
70
+
71
+ # List all related files in the HuggingFace Hub repository
72
+ files = list_repo_files(self.hf_repo_id, repo_type="dataset")
73
+ files = [
74
+ f
75
+ for f in files
76
+ if (
77
+ (f.startswith("qa/prime/") or f.startswith("skb/prime/"))
78
+ and f.find("raw") == -1
79
+ )
80
+ ]
81
+
82
+ # Download and save each file in the specified folder
83
+ for file in tqdm(files):
84
+ _ = hf_hub_download(
85
+ self.hf_repo_id, file, repo_type="dataset", local_dir=self.local_dir
86
+ )
87
+
88
+ # Unzip the processed files
89
+ shutil.unpack_archive(
90
+ os.path.join(self.local_dir, "skb/prime/processed.zip"),
91
+ os.path.join(self.local_dir, "skb/prime/"),
92
+ )
93
+
94
+ # Load StarkQA dataframe
95
+ starkqa = pd.read_csv(
96
+ os.path.join(self.local_dir, "qa/prime/stark_qa/stark_qa.csv"),
97
+ low_memory=False,
98
+ )
99
+
100
+ # Read split indices
101
+ qa_indices = sorted(starkqa["id"].tolist())
102
+ starkqa_split_idx = {}
103
+ for split in ["train", "val", "test", "test-0.1"]:
104
+ indices_file = os.path.join(self.local_dir, "qa/prime/split", f"{split}.index")
105
+ with open(indices_file, encoding="utf-8") as f:
106
+ indices = f.read().strip().split("\n")
107
+ query_ids = [int(idx) for idx in indices]
108
+ starkqa_split_idx[split] = np.array(
109
+ [qa_indices.index(query_id) for query_id in query_ids]
110
+ )
111
+
112
+ # Load the node info of PrimeKG preprocessed for StarkQA
113
+ starkqa_node_info = joblib.load(
114
+ os.path.join(self.local_dir, "skb/prime/processed/node_info.pkl")
115
+ )
116
+
117
+ return starkqa, starkqa_split_idx, starkqa_node_info
118
+
119
+ def _load_stark_embeddings(self) -> tuple[dict, dict]:
120
+ """
121
+ Private method to load the embeddings of StarkQAPrimeKG dataset.
122
+
123
+ Returns:
124
+ The query embeddings of StarkQAPrimeKG dataset.
125
+ The node embeddings of StarkQAPrimeKG dataset.
126
+ """
127
+ # Load the provided embeddings of query and nodes
128
+ # Note that they utilized 'text-embedding-ada-002' for embeddings
129
+ emb_model = "text-embedding-ada-002"
130
+ query_emb_url = "https://drive.google.com/uc?id=1MshwJttPZsHEM2cKA5T13SIrsLeBEdyU"
131
+ node_emb_url = "https://drive.google.com/uc?id=16EJvCMbgkVrQ0BuIBvLBp-BYPaye-Edy"
132
+
133
+ # Prepare respective directories to store the embeddings
134
+ emb_dir = os.path.join(self.local_dir, emb_model)
135
+ query_emb_dir = os.path.join(emb_dir, "query")
136
+ node_emb_dir = os.path.join(emb_dir, "doc")
137
+ os.makedirs(query_emb_dir, exist_ok=True)
138
+ os.makedirs(node_emb_dir, exist_ok=True)
139
+ query_emb_path = os.path.join(query_emb_dir, "query_emb_dict.pt")
140
+ node_emb_path = os.path.join(node_emb_dir, "candidate_emb_dict.pt")
141
+
142
+ # Download the embeddings if they do not exist in the local directory
143
+ if not os.path.exists(query_emb_path) or not os.path.exists(node_emb_path):
144
+ # Download the query embeddings
145
+ gdown.download(query_emb_url, query_emb_path, quiet=False)
146
+
147
+ # Download the node embeddings
148
+ gdown.download(node_emb_url, node_emb_path, quiet=False)
149
+
150
+ # Load the embeddings
151
+ query_emb_dict = torch.load(query_emb_path)
152
+ node_emb_dict = torch.load(node_emb_path)
153
+
154
+ return query_emb_dict, node_emb_dict
155
+
156
+ def load_data(self):
157
+ """
158
+ Load the StarkQAPrimeKG dataset into pandas DataFrame of QA pairs,
159
+ dictionary of split indices, and node information.
160
+ """
161
+ print("Loading StarkQAPrimeKG dataset...")
162
+ self.starkqa, self.starkqa_split_idx, self.starkqa_node_info = self._load_stark_repo()
163
+
164
+ print("Loading StarkQAPrimeKG embeddings...")
165
+ self.query_emb_dict, self.node_emb_dict = self._load_stark_embeddings()
166
+
167
+ def get_starkqa(self) -> pd.DataFrame:
168
+ """
169
+ Get the dataframe of StarkQAPrimeKG dataset, containing the QA pairs.
170
+
171
+ Returns:
172
+ The nodes dataframe of PrimeKG dataset.
173
+ """
174
+ return self.starkqa
175
+
176
+ def get_starkqa_split_indicies(self) -> dict:
177
+ """
178
+ Get the split indices of StarkQAPrimeKG dataset.
179
+
180
+ Returns:
181
+ The split indices of StarkQAPrimeKG dataset.
182
+ """
183
+ return self.starkqa_split_idx
184
+
185
+ def get_starkqa_node_info(self) -> dict:
186
+ """
187
+ Get the node information of StarkQAPrimeKG dataset.
188
+
189
+ Returns:
190
+ The node information of StarkQAPrimeKG dataset.
191
+ """
192
+ return self.starkqa_node_info
193
+
194
+ def get_query_embeddings(self) -> dict:
195
+ """
196
+ Get the query embeddings of StarkQAPrimeKG dataset.
197
+
198
+ Returns:
199
+ The query embeddings of StarkQAPrimeKG dataset.
200
+ """
201
+ return self.query_emb_dict
202
+
203
+ def get_node_embeddings(self) -> dict:
204
+ """
205
+ Get the node embeddings of StarkQAPrimeKG dataset.
206
+
207
+ Returns:
208
+ The node embeddings of StarkQAPrimeKG dataset.
209
+ """
210
+ return self.node_emb_dict
@@ -0,0 +1,23 @@
1
+ # .env.example (DO NOT put actual API keys here, read the README.md)
2
+
3
+ # OPENAI API KEY
4
+ OPENAI_API_KEY=your_openai_api_key_here
5
+
6
+ # LangSmith API KEY
7
+ LANGCHAIN_TRACING_V2=true
8
+ LANGCHAIN_API_KEY=your_langchain_api_key_here
9
+
10
+ # NVIDIA API KEY
11
+ NVIDIA_API_KEY=your_nvidia_api_key_here
12
+
13
+ # Set environment variables for data loader
14
+ MILVUS_HOST=milvus-standalone
15
+ MILVUS_PORT=19530
16
+ MILVUS_USER=root
17
+ MILVUS_PASSWORD=Milvus
18
+ MILVUS_DATABASE=t2kg_primekg
19
+
20
+ # Specify the data directory for multimodal data to your own data directory
21
+ # DATA_DIR=/your_absolute_path_to_your_data_dir/
22
+
23
+ BATCH_SIZE=500
@@ -0,0 +1,93 @@
1
+ version: "1.0.0"
2
+
3
+ services:
4
+ # talk2knowledgegraphs with automatic data loading via entrypoint
5
+ talk2knowledgegraphs:
6
+ container_name: talk2knowledgegraphs
7
+ image: vpatientengine/talk2knowledgegraphs:latest-cpu
8
+ platform: linux/amd64
9
+ ports:
10
+ - "8501:8501"
11
+ environment:
12
+ - MILVUS_HOST=milvus-standalone
13
+ - MILVUS_PORT=19530
14
+ env_file:
15
+ - .env
16
+ volumes:
17
+ # Mount external data directory if DATA_DIR is specified in .env
18
+ - ${DATA_DIR:-./default_data}:/mnt/external_data:ro
19
+ healthcheck:
20
+ test: ["CMD", "curl", "-f", "http://localhost:8501/health"]
21
+ interval: 30s
22
+ timeout: 10s
23
+ retries: 3
24
+ start_period: 120s # Allow time for data loading
25
+ depends_on:
26
+ milvus-standalone:
27
+ condition: service_healthy
28
+
29
+ # Milvus Dependencies
30
+ etcd:
31
+ container_name: milvus-etcd
32
+ image: quay.io/coreos/etcd:v3.5.18
33
+ environment:
34
+ - ETCD_AUTO_COMPACTION_MODE=revision
35
+ - ETCD_AUTO_COMPACTION_RETENTION=1000
36
+ - ETCD_QUOTA_BACKEND_BYTES=4294967296
37
+ - ETCD_SNAPSHOT_COUNT=50000
38
+ volumes:
39
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
40
+ command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
41
+ healthcheck:
42
+ test: ["CMD", "etcdctl", "endpoint", "health"]
43
+ interval: 30s
44
+ timeout: 20s
45
+ retries: 3
46
+
47
+ minio:
48
+ container_name: milvus-minio
49
+ image: minio/minio:RELEASE.2024-05-28T17-19-04Z
50
+ environment:
51
+ MINIO_ACCESS_KEY: minioadmin
52
+ MINIO_SECRET_KEY: minioadmin
53
+ ports:
54
+ - "9001:9001"
55
+ - "9000:9000"
56
+ volumes:
57
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
58
+ command: minio server /minio_data --console-address ":9001"
59
+ healthcheck:
60
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
61
+ interval: 30s
62
+ timeout: 20s
63
+ retries: 3
64
+
65
+ # Milvus Vector Database (CPU-only)
66
+ milvus-standalone:
67
+ container_name: milvus-standalone
68
+ image: milvusdb/milvus:v2.5.14
69
+ command: ["milvus", "run", "standalone"]
70
+ security_opt:
71
+ - seccomp:unconfined
72
+ environment:
73
+ MINIO_REGION: us-east-1
74
+ ETCD_ENDPOINTS: etcd:2379
75
+ MINIO_ADDRESS: minio:9000
76
+ volumes:
77
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
78
+ ports:
79
+ - "19530:19530"
80
+ - "9091:9091"
81
+ healthcheck:
82
+ test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
83
+ interval: 30s
84
+ start_period: 90s
85
+ timeout: 20s
86
+ retries: 3
87
+ depends_on:
88
+ - "etcd"
89
+ - "minio"
90
+
91
+ networks:
92
+ milvus:
93
+ name: milvus
@@ -0,0 +1,23 @@
1
+ # .env.example (DO NOT put actual API keys here, read the README.md)
2
+
3
+ # OPENAI API KEY
4
+ OPENAI_API_KEY=your_openai_api_key_here
5
+
6
+ # LangSmith API KEY
7
+ LANGCHAIN_TRACING_V2=true
8
+ LANGCHAIN_API_KEY=your_langchain_api_key_here
9
+
10
+ # NVIDIA API KEY
11
+ NVIDIA_API_KEY=your_nvidia_api_key_here
12
+
13
+ # Set environment variables for data loader
14
+ MILVUS_HOST=milvus-standalone
15
+ MILVUS_PORT=19530
16
+ MILVUS_USER=root
17
+ MILVUS_PASSWORD=Milvus
18
+ MILVUS_DATABASE=t2kg_primekg
19
+
20
+ # Specify the data directory for multimodal data to your own data directory
21
+ # DATA_DIR=/your_absolute_path_to_your_data_dir/
22
+
23
+ BATCH_SIZE=500
@@ -0,0 +1,108 @@
1
+ version: "1.0.0"
2
+
3
+ services:
4
+ # talk2knowledgegraphs with automatic data loading via entrypoint
5
+ talk2knowledgegraphs:
6
+ container_name: talk2knowledgegraphs
7
+ image: vpatientengine/talk2knowledgegraphs:latest-gpu
8
+ platform: linux/amd64
9
+ ports:
10
+ - "8501:8501"
11
+ deploy:
12
+ resources:
13
+ reservations:
14
+ devices:
15
+ - driver: nvidia
16
+ capabilities: ["gpu"]
17
+ device_ids: ["0"]
18
+ environment:
19
+ - MILVUS_HOST=milvus-standalone
20
+ - MILVUS_PORT=19530
21
+ env_file:
22
+ - .env
23
+ volumes:
24
+ # Mount external data directory if DATA_DIR is specified in .env
25
+ - ${DATA_DIR:-./default_data}:/mnt/external_data:ro
26
+ healthcheck:
27
+ test: ["CMD", "curl", "-f", "http://localhost:8501/health"]
28
+ interval: 30s
29
+ timeout: 10s
30
+ retries: 3
31
+ start_period: 120s # Allow time for data loading
32
+ depends_on:
33
+ milvus-standalone:
34
+ condition: service_healthy
35
+
36
+ # Milvus Dependencies
37
+ etcd:
38
+ container_name: milvus-etcd
39
+ image: quay.io/coreos/etcd:v3.5.18
40
+ environment:
41
+ - ETCD_AUTO_COMPACTION_MODE=revision
42
+ - ETCD_AUTO_COMPACTION_RETENTION=1000
43
+ - ETCD_QUOTA_BACKEND_BYTES=4294967296
44
+ - ETCD_SNAPSHOT_COUNT=50000
45
+ volumes:
46
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
47
+ command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
48
+ healthcheck:
49
+ test: ["CMD", "etcdctl", "endpoint", "health"]
50
+ interval: 30s
51
+ timeout: 20s
52
+ retries: 3
53
+
54
+ minio:
55
+ container_name: milvus-minio
56
+ image: minio/minio:RELEASE.2023-03-20T20-16-18Z
57
+ environment:
58
+ MINIO_ACCESS_KEY: minioadmin
59
+ MINIO_SECRET_KEY: minioadmin
60
+ ports:
61
+ - "9001:9001"
62
+ - "9000:9000"
63
+ volumes:
64
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
65
+ command: minio server /minio_data --console-address ":9001"
66
+ healthcheck:
67
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
68
+ interval: 30s
69
+ timeout: 20s
70
+ retries: 3
71
+
72
+ # Milvus Vector Database (GPU-enabled)
73
+ milvus-standalone:
74
+ container_name: milvus-standalone
75
+ image: milvusdb/milvus:v2.6.0-rc1-gpu
76
+ command: ["milvus", "run", "standalone"]
77
+ security_opt:
78
+ - seccomp:unconfined
79
+ environment:
80
+ MINIO_REGION: us-east-1
81
+ ETCD_ENDPOINTS: etcd:2379
82
+ MINIO_ADDRESS: minio:9000
83
+ MQ_TYPE: woodpecker
84
+ volumes:
85
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
86
+ ports:
87
+ - "19530:19530"
88
+ - "9091:9091"
89
+ deploy:
90
+ resources:
91
+ reservations:
92
+ devices:
93
+ - driver: nvidia
94
+ capabilities: ["gpu"]
95
+ device_ids: ["0"]
96
+ healthcheck:
97
+ test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
98
+ interval: 30s
99
+ start_period: 90s
100
+ timeout: 20s
101
+ retries: 3
102
+ depends_on:
103
+ - "etcd"
104
+ - "minio"
105
+
106
+ networks:
107
+ milvus:
108
+ name: milvus