aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (336) hide show
  1. aiagents4pharma/__init__.py +11 -0
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
  11. aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
  12. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
  13. aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
  14. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  16. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  17. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  18. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  19. aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
  20. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
  21. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
  22. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
  23. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
  24. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  25. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  26. aiagents4pharma/talk2biomodels/README.md +1 -0
  27. aiagents4pharma/talk2biomodels/__init__.py +5 -0
  28. aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
  29. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
  30. aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
  31. aiagents4pharma/talk2biomodels/api/ols.py +75 -0
  32. aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
  33. aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
  34. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
  35. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
  36. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
  37. aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
  38. aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
  39. aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
  40. aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
  41. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
  42. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
  43. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
  44. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
  45. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
  46. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
  47. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
  48. aiagents4pharma/talk2biomodels/install.md +63 -0
  49. aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
  50. aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
  51. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
  52. aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
  53. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
  54. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  55. aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
  56. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  57. aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
  58. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
  59. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
  60. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
  61. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
  62. aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
  63. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
  64. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
  65. aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
  66. aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
  67. aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
  68. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
  69. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
  70. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
  71. aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
  72. aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
  73. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
  74. aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
  75. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
  76. aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
  77. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
  78. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
  79. aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
  80. aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
  81. aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
  82. aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
  83. aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
  84. aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
  85. aiagents4pharma/talk2cells/README.md +1 -0
  86. aiagents4pharma/talk2cells/__init__.py +5 -0
  87. aiagents4pharma/talk2cells/agents/__init__.py +6 -0
  88. aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
  89. aiagents4pharma/talk2cells/states/__init__.py +6 -0
  90. aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
  91. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
  92. aiagents4pharma/talk2cells/tools/__init__.py +6 -0
  93. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
  94. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
  95. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
  96. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  97. aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
  98. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  99. aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
  100. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
  101. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
  102. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
  103. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
  104. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
  105. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
  106. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
  107. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
  108. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
  110. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
  111. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
  112. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
  113. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
  114. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
  115. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
  116. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
  117. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
  118. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
  119. aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
  120. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
  121. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
  122. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
  123. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
  124. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
  125. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
  126. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
  127. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
  128. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
  129. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  130. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  131. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  132. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  133. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
  134. aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
  135. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
  136. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
  137. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
  151. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
  152. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
  153. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
  154. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
  155. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
  156. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
  157. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
  158. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
  159. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
  160. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
  161. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
  162. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
  163. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
  164. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
  165. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
  166. aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
  167. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
  168. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
  169. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
  170. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
  171. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
  172. aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
  173. aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
  174. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
  175. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
  176. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
  177. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
  178. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
  179. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
  180. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
  181. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
  182. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
  183. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
  184. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
  185. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
  186. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
  187. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
  188. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
  189. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
  190. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
  191. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
  192. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
  193. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  194. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  195. aiagents4pharma/talk2scholars/README.md +1 -0
  196. aiagents4pharma/talk2scholars/__init__.py +7 -0
  197. aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
  198. aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
  199. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
  200. aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
  201. aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
  202. aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
  203. aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
  204. aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
  205. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
  206. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
  207. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
  208. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
  209. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
  210. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
  211. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
  212. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
  213. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
  214. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
  215. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
  216. aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
  217. aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
  218. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
  219. aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
  220. aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
  221. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
  222. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
  223. aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
  224. aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
  225. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
  226. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
  227. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
  228. aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
  229. aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
  230. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
  231. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
  232. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
  233. aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
  234. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
  235. aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
  236. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  237. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  238. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  239. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  240. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  241. aiagents4pharma/talk2scholars/install.md +122 -0
  242. aiagents4pharma/talk2scholars/state/__init__.py +7 -0
  243. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
  244. aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
  245. aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
  246. aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
  247. aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
  248. aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
  249. aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
  250. aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
  251. aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
  252. aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
  253. aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
  254. aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
  255. aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
  256. aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
  257. aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
  258. aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
  259. aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
  260. aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
  261. aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
  262. aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
  263. aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
  264. aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
  265. aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
  266. aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
  267. aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
  268. aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
  269. aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
  270. aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
  271. aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
  272. aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
  273. aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
  274. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
  275. aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
  276. aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
  277. aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
  278. aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
  279. aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
  280. aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
  281. aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
  282. aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
  283. aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
  284. aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
  285. aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
  286. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
  287. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
  288. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
  289. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
  290. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
  291. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
  292. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
  293. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
  294. aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
  295. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
  296. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
  297. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  298. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
  299. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  300. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  301. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  302. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
  303. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
  304. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
  305. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  306. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
  307. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
  308. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  309. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
  310. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
  311. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
  312. aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
  313. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
  314. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
  315. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
  316. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
  317. aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
  318. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
  319. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
  320. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
  321. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
  322. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
  323. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
  324. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
  325. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
  326. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
  327. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
  328. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
  329. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
  330. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
  331. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
  332. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
  333. aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
  334. aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
  335. aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
  336. aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,111 @@
1
+ """
2
+ Embedding class using HuggingFace model based on LangChain Embeddings class.
3
+ """
4
+
5
+ import torch
6
+ from transformers import AutoConfig, AutoModel, AutoTokenizer
7
+
8
+ from .embeddings import Embeddings
9
+
10
+
11
+ class EmbeddingWithHuggingFace(Embeddings):
12
+ """
13
+ Embedding class using HuggingFace model based on LangChain Embeddings class.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ model_name: str,
19
+ model_cache_dir: str = None,
20
+ truncation: bool = True,
21
+ device: str = "cpu",
22
+ ):
23
+ """
24
+ Initialize the EmbeddingWithHuggingFace class.
25
+
26
+ Args:
27
+ model_name: The name of the HuggingFace model to be used.
28
+ model_cache_dir: The directory to cache the HuggingFace model.
29
+ truncation: The truncation flag for the HuggingFace tokenizer.
30
+ return_tensors: The return_tensors flag for the HuggingFace tokenizer.
31
+ device: The device to run the model on.
32
+ """
33
+
34
+ # Set parameters
35
+ self.model_name = model_name
36
+ self.model_cache_dir = model_cache_dir
37
+ self.truncation = truncation
38
+ self.device = device
39
+
40
+ # Try to load the model from HuggingFace Hub
41
+ try:
42
+ AutoConfig.from_pretrained(self.model_name)
43
+ except OSError as e:
44
+ raise ValueError(f"Model {self.model_name} is not available on HuggingFace Hub.") from e
45
+
46
+ # Load HuggingFace tokenizer and model
47
+ self.tokenizer = AutoTokenizer.from_pretrained(
48
+ self.model_name, cache_dir=self.model_cache_dir
49
+ )
50
+ self.model = AutoModel.from_pretrained(self.model_name, cache_dir=self.model_cache_dir)
51
+
52
+ def meanpooling(self, output, mask) -> torch.Tensor:
53
+ """
54
+ Mean Pooling - Take attention mask into account for correct averaging.
55
+ According to the following documentation:
56
+ https://huggingface.co/NeuML/pubmedbert-base-embeddings
57
+
58
+ Args:
59
+ output: The output of the model.
60
+ mask: The mask of the model.
61
+ """
62
+ embeddings = output[0] # First element of model_output contains all token embeddings
63
+ mask = mask.unsqueeze(-1).expand(embeddings.size()).float()
64
+ return torch.sum(embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)
65
+
66
+ def embed_documents(self, texts: list[str]) -> list[float]:
67
+ """
68
+ Generate embedding for a list of input texts using HuggingFace model.
69
+
70
+ Args:
71
+ texts: The list of texts to be embedded.
72
+
73
+ Returns:
74
+ The list of embeddings for the given texts.
75
+ """
76
+
77
+ # Generate the embedding
78
+ with torch.no_grad():
79
+ inputs = self.tokenizer(
80
+ texts,
81
+ padding=True,
82
+ truncation=self.truncation,
83
+ return_tensors="pt",
84
+ ).to(self.device)
85
+ outputs = self.model.to(self.device)(**inputs)
86
+ embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()
87
+
88
+ return embeddings
89
+
90
+ def embed_query(self, text: str) -> list[float]:
91
+ """
92
+ Generate embeddings for an input text using HuggingFace model.
93
+
94
+ Args:
95
+ text: A query to be embedded.
96
+ Returns:
97
+ The embeddings for the given query.
98
+ """
99
+
100
+ # Generate the embedding
101
+ with torch.no_grad():
102
+ inputs = self.tokenizer(
103
+ text,
104
+ padding=True,
105
+ truncation=self.truncation,
106
+ return_tensors="pt",
107
+ ).to(self.device)
108
+ outputs = self.model.to(self.device)(**inputs)
109
+ embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()[0]
110
+
111
+ return embeddings
@@ -0,0 +1,54 @@
1
+ """
2
+ Embedding class using MOLMIM model from NVIDIA NIM.
3
+ """
4
+
5
+ import json
6
+
7
+ import requests
8
+
9
+ from .embeddings import Embeddings
10
+
11
+
12
+ class EmbeddingWithMOLMIM(Embeddings):
13
+ """
14
+ Embedding class using MOLMIM model from NVIDIA NIM
15
+ """
16
+
17
+ def __init__(self, base_url: str):
18
+ """
19
+ Initialize the EmbeddingWithMOLMIM class.
20
+
21
+ Args:
22
+ base_url: The base URL for the NIM/MOLMIM model.
23
+ """
24
+ # Set base URL
25
+ self.base_url = base_url
26
+
27
+ def embed_documents(self, texts: list[str]) -> list[float]:
28
+ """
29
+ Generate embedding for a list of SMILES strings using MOLMIM model.
30
+
31
+ Args:
32
+ texts: The list of SMILES strings to be embedded.
33
+
34
+ Returns:
35
+ The list of embeddings for the given SMILES strings.
36
+ """
37
+ headers = {"accept": "application/json", "Content-Type": "application/json"}
38
+ data = json.dumps({"sequences": texts})
39
+ response = requests.post(self.base_url, headers=headers, data=data, timeout=60)
40
+ embeddings = response.json()["embeddings"]
41
+ return embeddings
42
+
43
+ def embed_query(self, text: str) -> list[float]:
44
+ """
45
+ Generate embeddings for an input query using MOLMIM model.
46
+
47
+ Args:
48
+ text: A query to be embedded.
49
+ Returns:
50
+ The embeddings for the given query.
51
+ """
52
+ # Generate the embedding
53
+ embeddings = self.embed_documents([text])
54
+ return embeddings
@@ -0,0 +1,87 @@
1
+ """
2
+ Embedding class using Ollama model based on LangChain Embeddings class.
3
+ """
4
+
5
+ import subprocess
6
+ import time
7
+
8
+ import ollama
9
+ from langchain_ollama import OllamaEmbeddings
10
+
11
+ from .embeddings import Embeddings
12
+
13
+
14
+ class EmbeddingWithOllama(Embeddings):
15
+ """
16
+ Embedding class using Ollama model based on LangChain Embeddings class.
17
+ """
18
+
19
+ def __init__(self, model_name: str):
20
+ """
21
+ Initialize the EmbeddingWithOllama class.
22
+
23
+ Args:
24
+ model_name: The name of the Ollama model to be used.
25
+ """
26
+ # Setup the Ollama server
27
+ self.__setup(model_name)
28
+
29
+ # Set parameters
30
+ self.model_name = model_name
31
+
32
+ # Prepare model
33
+ self.model = OllamaEmbeddings(model=self.model_name)
34
+
35
+ def __setup(self, model_name: str) -> None:
36
+ """
37
+ Check if the Ollama model is available and run the Ollama server if needed.
38
+
39
+ Args:
40
+ model_name: The name of the Ollama model to be used.
41
+ """
42
+ try:
43
+ models_list = ollama.list()["models"]
44
+ if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
45
+ ollama.pull(model_name)
46
+ time.sleep(30)
47
+ raise ValueError(f"Pulled {model_name} model")
48
+ except Exception as e:
49
+ with subprocess.Popen(
50
+ "ollama serve",
51
+ shell=True,
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
54
+ ):
55
+ time.sleep(10)
56
+ raise ValueError(f"Error: {e} and restarted Ollama server.") from e
57
+
58
+ def embed_documents(self, texts: list[str]) -> list[float]:
59
+ """
60
+ Generate embedding for a list of input texts using Ollama model.
61
+
62
+ Args:
63
+ texts: The list of texts to be embedded.
64
+
65
+ Returns:
66
+ The list of embeddings for the given texts.
67
+ """
68
+
69
+ # Generate the embedding
70
+ embeddings = self.model.embed_documents(texts)
71
+
72
+ return embeddings
73
+
74
+ def embed_query(self, text: str) -> list[float]:
75
+ """
76
+ Generate embeddings for an input text using Ollama model.
77
+
78
+ Args:
79
+ text: A query to be embedded.
80
+ Returns:
81
+ The embeddings for the given query.
82
+ """
83
+
84
+ # Generate the embedding
85
+ embeddings = self.model.embed_query(text)
86
+
87
+ return embeddings
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Embedding class using SentenceTransformer model based on LangChain Embeddings class.
5
+ """
6
+
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ from .embeddings import Embeddings
10
+
11
+
12
+ class EmbeddingWithSentenceTransformer(Embeddings):
13
+ """
14
+ Embedding class using SentenceTransformer model based on LangChain Embeddings class.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ model_name: str,
20
+ model_cache_dir: str = None,
21
+ trust_remote_code: bool = True,
22
+ ):
23
+ """
24
+ Initialize the EmbeddingWithSentenceTransformer class.
25
+
26
+ Args:
27
+ model_name: The name of the SentenceTransformer model to be used.
28
+ model_cache_dir: The directory to cache the SentenceTransformer model.
29
+ trust_remote_code: Whether to trust the remote code of the model.
30
+ """
31
+
32
+ # Set parameters
33
+ self.model_name = model_name
34
+ self.model_cache_dir = model_cache_dir
35
+ self.trust_remote_code = trust_remote_code
36
+
37
+ # Load the model
38
+ self.model = SentenceTransformer(
39
+ self.model_name,
40
+ cache_folder=self.model_cache_dir,
41
+ trust_remote_code=self.trust_remote_code,
42
+ )
43
+
44
+ def embed_documents(self, texts: list[str]) -> list[float]:
45
+ """
46
+ Generate embedding for a list of input texts using SentenceTransformer model.
47
+
48
+ Args:
49
+ texts: The list of texts to be embedded.
50
+
51
+ Returns:
52
+ The list of embeddings for the given texts.
53
+ """
54
+
55
+ # Generate the embedding
56
+ embeddings = self.model.encode(texts, show_progress_bar=False)
57
+
58
+ return embeddings
59
+
60
+ def embed_query(self, text: str) -> list[float]:
61
+ """
62
+ Generate embeddings for an input text using SentenceTransformer model.
63
+
64
+ Args:
65
+ text: A query to be embedded.
66
+ Returns:
67
+ The embeddings for the given query.
68
+ """
69
+
70
+ # Generate the embedding
71
+ embeddings = self.model.encode(text, show_progress_bar=False)
72
+
73
+ return embeddings
@@ -0,0 +1,12 @@
1
+ """
2
+ This package contains modules to use the enrichment model
3
+ """
4
+
5
+ from . import (
6
+ enrichments,
7
+ ollama,
8
+ ols_terms,
9
+ pubchem_strings,
10
+ reactome_pathways,
11
+ uniprot_proteins,
12
+ )
@@ -0,0 +1,37 @@
1
+ """
2
+ Enrichments interface
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+
8
+ class Enrichments(ABC):
9
+ """Interface for enrichment models.
10
+
11
+ This is an interface meant for implementing text enrichment models.
12
+
13
+ Enrichment models are used to enrich node or relation features in a given knowledge graph.
14
+ """
15
+
16
+ @abstractmethod
17
+ def enrich_documents(self, texts: list[str]) -> list[list[str]]:
18
+ """Enrich documents.
19
+
20
+ Args:
21
+ texts: List of documents to enrich.
22
+
23
+ Returns:
24
+ List of enriched documents.
25
+ """
26
+
27
+ @abstractmethod
28
+ def enrich_documents_with_rag(self, texts: list[str], docs: list[str]) -> list[str]:
29
+ """Enrich documents with RAG.
30
+
31
+ Args:
32
+ texts: List of documents to enrich.
33
+ docs: List of reference documents to enrich the input texts.
34
+
35
+ Returns:
36
+ List of enriched documents with RAG.
37
+ """
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class using Ollama model based on LangChain Enrichment class.
5
+ """
6
+
7
+ import ast
8
+ import subprocess
9
+ import time
10
+
11
+ import ollama
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.prompts import ChatPromptTemplate
14
+ from langchain_ollama import ChatOllama
15
+
16
+ from .enrichments import Enrichments
17
+
18
+
19
+ class EnrichmentWithOllama(Enrichments):
20
+ """
21
+ Enrichment class using Ollama model based on the Enrichment abstract class.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ model_name: str,
27
+ prompt_enrichment: str,
28
+ temperature: float,
29
+ streaming: bool,
30
+ ):
31
+ """
32
+ Initialize the EnrichmentWithOllama class.
33
+
34
+ Args:
35
+ model_name: The name of the Ollama model to be used.
36
+ prompt_enrichment: The prompt enrichment template.
37
+ temperature: The temperature for the Ollama model.
38
+ streaming: The streaming flag for the Ollama model.
39
+ """
40
+ # Setup the Ollama server
41
+ self.__setup(model_name)
42
+
43
+ # Set parameters
44
+ self.model_name = model_name
45
+ self.prompt_enrichment = prompt_enrichment
46
+ self.temperature = temperature
47
+ self.streaming = streaming
48
+
49
+ # Prepare prompt template
50
+ self.prompt_template = ChatPromptTemplate.from_messages(
51
+ [
52
+ ("system", self.prompt_enrichment),
53
+ ("human", "{input}"),
54
+ ]
55
+ )
56
+
57
+ # Prepare model
58
+ self.model = ChatOllama(
59
+ model=self.model_name,
60
+ temperature=self.temperature,
61
+ streaming=self.streaming,
62
+ )
63
+
64
+ def __setup(self, model_name: str) -> None:
65
+ """
66
+ Check if the Ollama model is available and run the Ollama server if needed.
67
+
68
+ Args:
69
+ model_name: The name of the Ollama model to be used.
70
+ """
71
+ try:
72
+ models_list = ollama.list()["models"]
73
+ if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
74
+ ollama.pull(model_name)
75
+ time.sleep(30)
76
+ raise ValueError(f"Pulled {model_name} model")
77
+ except Exception as e:
78
+ with subprocess.Popen(
79
+ "ollama serve",
80
+ shell=True,
81
+ stdout=subprocess.PIPE,
82
+ stderr=subprocess.PIPE,
83
+ ):
84
+ time.sleep(10)
85
+ raise ValueError(f"Error: {e} and restarted Ollama server.") from e
86
+
87
+ def enrich_documents(self, texts: list[str]) -> list[str]:
88
+ """
89
+ Enrich a list of input texts with additional textual features using OLLAMA model.
90
+ Important: Make sure the input is a list of texts based on the defined prompt template
91
+ with 'input' as the variable name.
92
+
93
+ Args:
94
+ texts: The list of texts to be enriched.
95
+
96
+ Returns:
97
+ The list of enriched texts.
98
+ """
99
+
100
+ # Perform enrichment
101
+ chain = self.prompt_template | self.model | StrOutputParser()
102
+
103
+ # Generate the enriched node
104
+ # Important: Make sure the input is a list of texts based on the defined prompt template
105
+ # with 'input' as the variable name
106
+ enriched_texts = chain.invoke({"input": "[" + ", ".join(texts) + "]"})
107
+
108
+ # Convert the enriched nodes to a list of dictionary
109
+ enriched_texts = ast.literal_eval(enriched_texts.replace("```", ""))
110
+
111
+ # Final check for the enriched texts
112
+ assert len(enriched_texts) == len(texts)
113
+
114
+ return enriched_texts
115
+
116
+ def enrich_documents_with_rag(self, texts, docs):
117
+ """
118
+ Enrich a list of input texts with additional textual features using OLLAMA model with RAG.
119
+ As of now, we don't have a RAG model to test this method yet.
120
+ Thus, we will just call the enrich_documents method instead.
121
+
122
+ Args:
123
+ texts: The list of texts to be enriched.
124
+ docs: The list of reference documents to enrich the input texts.
125
+
126
+ Returns:
127
+ The list of enriched texts
128
+ """
129
+ return self.enrich_documents(texts)
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching OLS terms with textual descriptions
5
+ """
6
+
7
+ import json
8
+ import logging
9
+
10
+ import hydra
11
+ import requests
12
+
13
+ from .enrichments import Enrichments
14
+
15
+ # Initialize logger
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class EnrichmentWithOLS(Enrichments):
21
+ """
22
+ Enrichment class using OLS terms
23
+ """
24
+
25
+ def enrich_documents(self, texts: list[str]) -> list[str]:
26
+ """
27
+ Enrich a list of input OLS terms
28
+
29
+ Args:
30
+ texts: The list of OLS terms to be enriched.
31
+
32
+ Returns:
33
+ The list of enriched descriptions
34
+ """
35
+
36
+ ols_ids = texts
37
+
38
+ logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
39
+ with hydra.initialize(version_base=None, config_path="../../configs"):
40
+ cfg = hydra.compose(
41
+ config_name="config", overrides=["utils/enrichments/ols_terms=default"]
42
+ )
43
+ cfg = cfg.utils.enrichments.ols_terms
44
+
45
+ descriptions = []
46
+ for ols_id in ols_ids:
47
+ params = {"short_form": ols_id}
48
+ r = requests.get(
49
+ cfg.base_url,
50
+ headers={"Accept": "application/json"},
51
+ params=params,
52
+ timeout=cfg.timeout,
53
+ )
54
+ response_body = json.loads(r.text)
55
+ # if the response body is empty
56
+ if "_embedded" not in response_body:
57
+ descriptions.append("")
58
+ continue
59
+ # Add the description to the list
60
+ description = []
61
+ for term in response_body["_embedded"]["terms"]:
62
+ # If the term has a description, add it to the list
63
+ description += term.get("description", [])
64
+ # Add synonyms to the description
65
+ description += term.get("synonyms", [])
66
+ # Add the label to the description
67
+ # Label is not provided as list, so we need to convert it to a list
68
+ label = term.get("label", "")
69
+ if label:
70
+ description += [label]
71
+ # Make unique the description
72
+ description = list(set(description))
73
+ # Join the description with new line
74
+ description = "\n".join(description)
75
+ # Ensure we always return a string, even if empty
76
+ descriptions.append(description if description else "")
77
+ return descriptions
78
+
79
+ def enrich_documents_with_rag(self, texts, docs):
80
+ """
81
+ Enrich a list of input OLS terms
82
+
83
+ Args:
84
+ texts: The list of OLS to be enriched.
85
+
86
+ Returns:
87
+ The list of enriched descriptions
88
+ """
89
+ return self.enrich_documents(texts)
@@ -0,0 +1,78 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Enrichment class for enriching PubChem IDs with their STRINGS representation and descriptions.
5
+ """
6
+
7
+ import logging
8
+
9
+ import hydra
10
+ import requests
11
+
12
+ from ..pubchem_utils import pubchem_cid_description
13
+ from .enrichments import Enrichments
14
+
15
+ # Initialize logger
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class EnrichmentWithPubChem(Enrichments):
21
+ """
22
+ Enrichment class using PubChem
23
+ """
24
+
25
+ def enrich_documents(self, texts: list[str]) -> list[str]:
26
+ """
27
+ Enrich a list of input PubChem IDs with their STRINGS representation.
28
+
29
+ Args:
30
+ texts: The list of pubchem IDs to be enriched.
31
+
32
+ Returns:
33
+ The list of enriched STRINGS and their descriptions.
34
+ """
35
+
36
+ enriched_pubchem_ids_smiles = []
37
+ enriched_pubchem_ids_descriptions = []
38
+
39
+ # Load Hydra configuration to get the base URL for PubChem
40
+ with hydra.initialize(version_base=None, config_path="../../configs"):
41
+ cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
42
+ cfg = cfg.utils.pubchem_utils
43
+ # Iterate over each PubChem ID in the input list
44
+ pubchem_cids = texts
45
+ for pubchem_cid in pubchem_cids:
46
+ # Prepare the URL
47
+ pubchem_url = f"{cfg.pubchem_cid2smiles_url}/{pubchem_cid}/property/smiles/JSON"
48
+ # Get the data
49
+ response = requests.get(pubchem_url, timeout=60)
50
+ data = response.json()
51
+ # Extract the PubChem CID SMILES
52
+ smiles = ""
53
+ description = ""
54
+ if "PropertyTable" in data:
55
+ for prop in data["PropertyTable"]["Properties"]:
56
+ smiles = prop.get("SMILES", "")
57
+ description = pubchem_cid_description(pubchem_cid)
58
+ else:
59
+ # If the PubChem ID is not found, set smiles and description to None
60
+ smiles = None
61
+ description = None
62
+ enriched_pubchem_ids_smiles.append(smiles)
63
+ enriched_pubchem_ids_descriptions.append(description)
64
+
65
+ return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles
66
+
67
+ def enrich_documents_with_rag(self, texts, docs):
68
+ """
69
+ Enrich a list of input PubChem IDs with their STRINGS representation.
70
+
71
+ Args:
72
+ texts: The list of pubchem IDs to be enriched.
73
+ docs: None
74
+
75
+ Returns:
76
+ The list of enriched STRINGS
77
+ """
78
+ return self.enrich_documents(texts)