aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  114. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  115. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  116. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  117. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  118. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  122. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  123. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  124. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  125. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  126. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  127. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
  151. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  152. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  153. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
  154. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  156. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  157. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  170. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  174. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  175. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  176. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  177. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  178. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  179. aiagents4pharma/talk2scholars/README.md +1 -0
  180. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  181. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  182. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  183. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  184. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  185. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  186. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  191. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  192. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  194. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  196. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  197. aiagents4pharma/talk2scholars/install.md +122 -0
  198. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  199. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  200. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  201. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  202. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  203. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  205. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  206. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  207. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  208. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  209. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  210. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  212. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  213. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  214. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  215. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  216. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  227. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  228. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  229. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  230. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  234. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  235. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  236. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  242. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  243. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  259. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  260. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  261. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  262. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  263. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  264. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  265. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  266. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  268. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  270. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  271. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  277. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  280. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  281. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
  282. aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
  283. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
  284. aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
  285. aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
  286. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  289. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  290. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,23 +2,25 @@
2
2
  Tool for performing multimodal subgraph extraction.
3
3
  """
4
4
 
5
- from typing import Type, Annotated
6
5
  import logging
7
6
  import pickle
8
- import numpy as np
9
- import pandas as pd
7
+ from typing import Annotated
8
+
10
9
  import hydra
11
10
  import networkx as nx
12
- from pydantic import BaseModel, Field
13
- from langchain_core.tools import BaseTool
11
+ import numpy as np
12
+ import pandas as pd
13
+ import torch
14
14
  from langchain_core.messages import ToolMessage
15
+ from langchain_core.tools import BaseTool
15
16
  from langchain_core.tools.base import InjectedToolCallId
16
- from langgraph.types import Command
17
17
  from langgraph.prebuilt import InjectedState
18
- import torch
18
+ from langgraph.types import Command
19
+ from pydantic import BaseModel, Field
19
20
  from torch_geometric.data import Data
20
- from ..utils.extractions.multimodal_pcst import MultimodalPCSTPruning
21
+
21
22
  from ..utils.embeddings.ollama import EmbeddingWithOllama
23
+ from ..utils.extractions.multimodal_pcst import MultimodalPCSTPruning
22
24
  from .load_arguments import ArgumentData
23
25
 
24
26
  # Initialize logger
@@ -38,14 +40,10 @@ class MultimodalSubgraphExtractionInput(BaseModel):
38
40
  arg_data: Argument for analytical process over graph data.
39
41
  """
40
42
 
41
- tool_call_id: Annotated[str, InjectedToolCallId] = Field(
42
- description="Tool call ID."
43
- )
43
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
44
44
  state: Annotated[dict, InjectedState] = Field(description="Injected state.")
45
45
  prompt: str = Field(description="Prompt to interact with the backend.")
46
- arg_data: ArgumentData = Field(
47
- description="Experiment over graph data.", default=None
48
- )
46
+ arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
49
47
 
50
48
 
51
49
  class MultimodalSubgraphExtractionTool(BaseTool):
@@ -56,12 +54,11 @@ class MultimodalSubgraphExtractionTool(BaseTool):
56
54
 
57
55
  name: str = "subgraph_extraction"
58
56
  description: str = "A tool for subgraph extraction based on user's prompt."
59
- args_schema: Type[BaseModel] = MultimodalSubgraphExtractionInput
57
+ args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
60
58
 
61
- def _prepare_query_modalities(self,
62
- prompt_emb: list,
63
- state: Annotated[dict, InjectedState],
64
- pyg_graph: Data) -> pd.DataFrame:
59
+ def _prepare_query_modalities(
60
+ self, prompt_emb: list, state: Annotated[dict, InjectedState], pyg_graph: Data
61
+ ) -> pd.DataFrame:
65
62
  """
66
63
  Prepare the modality-specific query for subgraph extraction.
67
64
 
@@ -75,77 +72,90 @@ class MultimodalSubgraphExtractionTool(BaseTool):
75
72
  """
76
73
  # Initialize dataframes
77
74
  multimodal_df = pd.DataFrame({"name": []})
78
- query_df = pd.DataFrame({"node_id": [],
79
- "node_type": [],
80
- "x": [],
81
- "desc_x": [],
82
- "use_description": []})
75
+ query_df = pd.DataFrame(
76
+ {
77
+ "node_id": [],
78
+ "node_type": [],
79
+ "x": [],
80
+ "desc_x": [],
81
+ "use_description": [],
82
+ }
83
+ )
83
84
 
84
85
  # Loop over the uploaded files and find multimodal files
85
86
  for i in range(len(state["uploaded_files"])):
86
87
  # Check if multimodal file is uploaded
87
88
  if state["uploaded_files"][i]["file_type"] == "multimodal":
88
89
  # Read the Excel file
89
- multimodal_df = pd.read_excel(state["uploaded_files"][i]["file_path"],
90
- sheet_name=None)
90
+ multimodal_df = pd.read_excel(
91
+ state["uploaded_files"][i]["file_path"], sheet_name=None
92
+ )
91
93
 
92
94
  # Check if the multimodal_df is empty
93
95
  if len(multimodal_df) > 0:
94
96
  # Merge all obtained dataframes into a single dataframe
95
97
  multimodal_df = pd.concat(multimodal_df).reset_index()
96
98
  multimodal_df.drop(columns=["level_1"], inplace=True)
97
- multimodal_df.rename(columns={"level_0": "q_node_type",
98
- "name": "q_node_name"}, inplace=True)
99
+ multimodal_df.rename(
100
+ columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
101
+ )
99
102
  # Since an excel sheet name could not contain a `/`,
100
103
  # but the node type can be 'gene/protein' as exists in the PrimeKG
101
104
  multimodal_df["q_node_type"] = multimodal_df.q_node_type.apply(
102
- lambda x: x.replace('-', '/')
105
+ lambda x: x.replace("-", "/")
103
106
  )
104
107
 
105
108
  # Convert PyG graph to a DataFrame for easier filtering
106
- graph_df = pd.DataFrame({
107
- "node_id": pyg_graph.node_id,
108
- "node_name": pyg_graph.node_name,
109
- "node_type": pyg_graph.node_type,
110
- "x": pyg_graph.x,
111
- "desc_x": pyg_graph.desc_x.tolist(),
112
- })
109
+ graph_df = pd.DataFrame(
110
+ {
111
+ "node_id": pyg_graph.node_id,
112
+ "node_name": pyg_graph.node_name,
113
+ "node_type": pyg_graph.node_type,
114
+ "x": pyg_graph.x,
115
+ "desc_x": pyg_graph.desc_x.tolist(),
116
+ }
117
+ )
113
118
 
114
119
  # Make a query dataframe by merging the graph_df and multimodal_df
115
- query_df = graph_df.merge(multimodal_df, how='cross')
120
+ query_df = graph_df.merge(multimodal_df, how="cross")
116
121
  query_df = query_df[
117
122
  query_df.apply(
118
- lambda x:
119
- (x['q_node_name'].lower() in x['node_name'].lower()) & # node name
120
- (x['node_type'] == x['q_node_type']), # node type
121
- axis=1
123
+ lambda x: (x["q_node_name"].lower() in x["node_name"].lower()) # node name
124
+ & (x["node_type"] == x["q_node_type"]), # node type
125
+ axis=1,
122
126
  )
123
127
  ]
124
- query_df = query_df[['node_id', 'node_type', 'x', 'desc_x']].reset_index(drop=True)
125
- query_df['use_description'] = False # set to False for modal-specific embeddings
128
+ query_df = query_df[["node_id", "node_type", "x", "desc_x"]].reset_index(drop=True)
129
+ query_df["use_description"] = False # set to False for modal-specific embeddings
126
130
 
127
131
  # Update the state by adding the the selected node IDs
128
132
  state["selections"] = query_df.groupby("node_type")["node_id"].apply(list).to_dict()
129
133
 
130
134
  # Append a user prompt to the query dataframe
131
- query_df = pd.concat([
132
- query_df,
133
- pd.DataFrame({
134
- 'node_id': 'user_prompt',
135
- 'node_type': 'prompt',
136
- 'x': prompt_emb,
137
- 'desc_x': prompt_emb,
138
- 'use_description': True # set to True for user prompt embedding
139
- })
140
- ]).reset_index(drop=True)
135
+ query_df = pd.concat(
136
+ [
137
+ query_df,
138
+ pd.DataFrame(
139
+ {
140
+ "node_id": "user_prompt",
141
+ "node_type": "prompt",
142
+ "x": prompt_emb,
143
+ "desc_x": prompt_emb,
144
+ "use_description": True, # set to True for user prompt embedding
145
+ }
146
+ ),
147
+ ]
148
+ ).reset_index(drop=True)
141
149
 
142
150
  return query_df
143
151
 
144
- def _perform_subgraph_extraction(self,
145
- state: Annotated[dict, InjectedState],
146
- cfg: dict,
147
- pyg_graph: Data,
148
- query_df: pd.DataFrame) -> dict:
152
+ def _perform_subgraph_extraction(
153
+ self,
154
+ state: Annotated[dict, InjectedState],
155
+ cfg: dict,
156
+ pyg_graph: Data,
157
+ query_df: pd.DataFrame,
158
+ ) -> dict:
149
159
  """
150
160
  Perform multimodal subgraph extraction based on modal-specific embeddings.
151
161
 
@@ -176,11 +186,13 @@ class MultimodalSubgraphExtractionTool(BaseTool):
176
186
  num_clusters=cfg.num_clusters,
177
187
  pruning=cfg.pruning,
178
188
  verbosity_level=cfg.verbosity_level,
179
- use_description=q[1]['use_description'],
180
- ).extract_subgraph(pyg_graph,
181
- torch.tensor(q[1]['desc_x']), # description embedding
182
- torch.tensor(q[1]['x']), # modal-specific embedding
183
- q[1]['node_type'])
189
+ use_description=q[1]["use_description"],
190
+ ).extract_subgraph(
191
+ pyg_graph,
192
+ torch.tensor(q[1]["desc_x"]), # description embedding
193
+ torch.tensor(q[1]["x"]), # modal-specific embedding
194
+ q[1]["node_type"],
195
+ )
184
196
 
185
197
  # Append the extracted subgraph to the dictionary
186
198
  subgraphs["nodes"].append(subgraph["nodes"].tolist())
@@ -196,11 +208,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
196
208
 
197
209
  return subgraphs
198
210
 
199
- def _prepare_final_subgraph(self,
200
- state:Annotated[dict, InjectedState],
201
- subgraph: dict,
202
- graph: dict,
203
- cfg) -> dict:
211
+ def _prepare_final_subgraph(
212
+ self, state: Annotated[dict, InjectedState], subgraph: dict, graph: dict, cfg
213
+ ) -> dict:
204
214
  """
205
215
  Prepare the subgraph based on the extracted subgraph.
206
216
 
@@ -227,14 +237,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
227
237
  # Edge features
228
238
  edge_index=torch.LongTensor(
229
239
  [
230
- [
231
- mapping[i]
232
- for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()
233
- ],
234
- [
235
- mapping[i]
236
- for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()
237
- ],
240
+ [mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()],
241
+ [mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()],
238
242
  ]
239
243
  ),
240
244
  edge_attr=graph["pyg"].edge_attr[subgraph["edges"]],
@@ -247,8 +251,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
247
251
  # Networkx DiGraph construction to be visualized in the frontend
248
252
  nx_graph = nx.DiGraph()
249
253
  # Add nodes with attributes
250
- node_colors = {n: cfg.node_colors_dict[k]
251
- for k, v in state["selections"].items() for n in v}
254
+ node_colors = {
255
+ n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
256
+ }
252
257
  for n in pyg_graph.node_name:
253
258
  nx_graph.add_node(n, color=node_colors.get(n, None))
254
259
 
@@ -256,7 +261,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
256
261
  edges = zip(
257
262
  pyg_graph.edge_index[0].tolist(),
258
263
  pyg_graph.edge_index[1].tolist(),
259
- pyg_graph.edge_type
264
+ pyg_graph.edge_type,
265
+ strict=False,
260
266
  )
261
267
  for src, dst, edge_type in edges:
262
268
  nx_graph.add_edge(
@@ -303,7 +309,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
303
309
  # Load hydra configuration
304
310
  with hydra.initialize(version_base=None, config_path="../configs"):
305
311
  cfg = hydra.compose(
306
- config_name="config", overrides=["tools/multimodal_subgraph_extraction=default"]
312
+ config_name="config",
313
+ overrides=["tools/multimodal_subgraph_extraction=default"],
307
314
  )
308
315
  cfg = cfg.tools.multimodal_subgraph_extraction
309
316
 
@@ -322,20 +329,14 @@ class MultimodalSubgraphExtractionTool(BaseTool):
322
329
  query_df = self._prepare_query_modalities(
323
330
  [EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)],
324
331
  state,
325
- initial_graph["pyg"]
332
+ initial_graph["pyg"],
326
333
  )
327
334
 
328
335
  # Perform subgraph extraction
329
- subgraphs = self._perform_subgraph_extraction(state,
330
- cfg,
331
- initial_graph["pyg"],
332
- query_df)
336
+ subgraphs = self._perform_subgraph_extraction(state, cfg, initial_graph["pyg"], query_df)
333
337
 
334
338
  # Prepare subgraph as a NetworkX graph and textualized graph
335
- final_subgraph = self._prepare_final_subgraph(state,
336
- subgraphs,
337
- initial_graph,
338
- cfg)
339
+ final_subgraph = self._prepare_final_subgraph(state, subgraphs, initial_graph, cfg)
339
340
 
340
341
  # Prepare the dictionary of extracted graph
341
342
  dic_extracted_graph = {
@@ -362,7 +363,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
362
363
 
363
364
  # Return the updated state of the tool
364
365
  return Command(
365
- update=dic_updated_state_for_model | {
366
+ update=dic_updated_state_for_model
367
+ | {
366
368
  # update the message history
367
369
  "messages": [
368
370
  ToolMessage(
@@ -2,29 +2,31 @@
2
2
  Tool for performing subgraph extraction.
3
3
  """
4
4
 
5
- from typing import Type, Annotated
6
5
  import logging
7
6
  import pickle
8
- import numpy as np
9
- import pandas as pd
7
+ from typing import Annotated
8
+
10
9
  import hydra
11
10
  import networkx as nx
12
- from pydantic import BaseModel, Field
13
- from langchain.chains.retrieval import create_retrieval_chain
11
+ import numpy as np
12
+ import pandas as pd
13
+ import torch
14
14
  from langchain.chains.combine_documents import create_stuff_documents_chain
15
+ from langchain.chains.retrieval import create_retrieval_chain
16
+ from langchain_community.document_loaders import PyPDFLoader
17
+ from langchain_core.messages import ToolMessage
15
18
  from langchain_core.prompts import ChatPromptTemplate
16
- from langchain_core.vectorstores import InMemoryVectorStore
17
19
  from langchain_core.tools import BaseTool
18
- from langchain_core.messages import ToolMessage
19
20
  from langchain_core.tools.base import InjectedToolCallId
20
- from langchain_community.document_loaders import PyPDFLoader
21
+ from langchain_core.vectorstores import InMemoryVectorStore
21
22
  from langchain_text_splitters import RecursiveCharacterTextSplitter
22
- from langgraph.types import Command
23
23
  from langgraph.prebuilt import InjectedState
24
- import torch
24
+ from langgraph.types import Command
25
+ from pydantic import BaseModel, Field
25
26
  from torch_geometric.data import Data
26
- from ..utils.extractions.pcst import PCSTPruning
27
+
27
28
  from ..utils.embeddings.ollama import EmbeddingWithOllama
29
+ from ..utils.extractions.pcst import PCSTPruning
28
30
  from .load_arguments import ArgumentData
29
31
 
30
32
  # Initialize logger
@@ -43,14 +45,10 @@ class SubgraphExtractionInput(BaseModel):
43
45
  arg_data: Argument for analytical process over graph data.
44
46
  """
45
47
 
46
- tool_call_id: Annotated[str, InjectedToolCallId] = Field(
47
- description="Tool call ID."
48
- )
48
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
49
49
  state: Annotated[dict, InjectedState] = Field(description="Injected state.")
50
50
  prompt: str = Field(description="Prompt to interact with the backend.")
51
- arg_data: ArgumentData = Field(
52
- description="Experiment over graph data.", default=None
53
- )
51
+ arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
54
52
 
55
53
 
56
54
  class SubgraphExtractionTool(BaseTool):
@@ -61,7 +59,7 @@ class SubgraphExtractionTool(BaseTool):
61
59
 
62
60
  name: str = "subgraph_extraction"
63
61
  description: str = "A tool for subgraph extraction based on user's prompt."
64
- args_schema: Type[BaseModel] = SubgraphExtractionInput
62
+ args_schema: type[BaseModel] = SubgraphExtractionInput
65
63
 
66
64
  def perform_endotype_filtering(
67
65
  self,
@@ -98,9 +96,7 @@ class SubgraphExtractionTool(BaseTool):
98
96
  ]
99
97
  )
100
98
 
101
- qa_chain = create_stuff_documents_chain(
102
- state["llm_model"], prompt_template
103
- )
99
+ qa_chain = create_stuff_documents_chain(state["llm_model"], prompt_template)
104
100
  rag_chain = create_retrieval_chain(
105
101
  InMemoryVectorStore.from_documents(
106
102
  documents=splits, embedding=state["embedding_model"]
@@ -119,16 +115,13 @@ class SubgraphExtractionTool(BaseTool):
119
115
 
120
116
  # Prepare the prompt
121
117
  if len(all_genes) > 0:
122
- prompt = " ".join(
123
- [prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)]
124
- )
118
+ prompt = " ".join([prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)])
125
119
 
126
120
  return prompt
127
121
 
128
- def prepare_final_subgraph(self,
129
- subgraph: dict,
130
- pyg_graph: Data,
131
- textualized_graph: pd.DataFrame) -> dict:
122
+ def prepare_final_subgraph(
123
+ self, subgraph: dict, pyg_graph: Data, textualized_graph: pd.DataFrame
124
+ ) -> dict:
132
125
  """
133
126
  Prepare the subgraph based on the extracted subgraph.
134
127
 
@@ -153,14 +146,8 @@ class SubgraphExtractionTool(BaseTool):
153
146
  # Edge features
154
147
  edge_index=torch.LongTensor(
155
148
  [
156
- [
157
- mapping[i]
158
- for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()
159
- ],
160
- [
161
- mapping[i]
162
- for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()
163
- ],
149
+ [mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()],
150
+ [mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()],
164
151
  ]
165
152
  ),
166
153
  edge_attr=pyg_graph.edge_attr[subgraph["edges"]],
@@ -293,7 +280,8 @@ class SubgraphExtractionTool(BaseTool):
293
280
 
294
281
  # Return the updated state of the tool
295
282
  return Command(
296
- update=dic_updated_state_for_model | {
283
+ update=dic_updated_state_for_model
284
+ | {
297
285
  # update the message history
298
286
  "messages": [
299
287
  ToolMessage(
@@ -3,16 +3,17 @@ Tool for performing subgraph summarization.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Type, Annotated
7
- from pydantic import BaseModel, Field
6
+ from typing import Annotated
7
+
8
+ import hydra
9
+ from langchain_core.messages import ToolMessage
8
10
  from langchain_core.output_parsers import StrOutputParser
9
11
  from langchain_core.prompts import ChatPromptTemplate
10
- from langchain_core.messages import ToolMessage
11
- from langchain_core.tools.base import InjectedToolCallId
12
12
  from langchain_core.tools import BaseTool
13
- from langgraph.types import Command
13
+ from langchain_core.tools.base import InjectedToolCallId
14
14
  from langgraph.prebuilt import InjectedState
15
- import hydra
15
+ from langgraph.types import Command
16
+ from pydantic import BaseModel, Field
16
17
 
17
18
  # Initialize logger
18
19
  logging.basicConfig(level=logging.INFO)
@@ -31,9 +32,7 @@ class SubgraphSummarizationInput(BaseModel):
31
32
  extraction_name: Name assigned to the subgraph extraction process
32
33
  """
33
34
 
34
- tool_call_id: Annotated[str, InjectedToolCallId] = Field(
35
- description="Tool call ID."
36
- )
35
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
37
36
  state: Annotated[dict, InjectedState] = Field(description="Injected state.")
38
37
  prompt: str = Field(description="Prompt to interact with the backend.")
39
38
  extraction_name: str = Field(
@@ -51,7 +50,7 @@ class SubgraphSummarizationTool(BaseTool):
51
50
  name: str = "subgraph_summarization"
52
51
  description: str = """A tool to perform subgraph summarization over textualized graph
53
52
  for responding to user's follow-up prompt(s)."""
54
- args_schema: Type[BaseModel] = SubgraphSummarizationInput
53
+ args_schema: type[BaseModel] = SubgraphSummarizationInput
55
54
 
56
55
  def _run(
57
56
  self,
@@ -69,9 +68,7 @@ class SubgraphSummarizationTool(BaseTool):
69
68
  prompt: The prompt to interact with the backend.
70
69
  extraction_name: The name assigned to the subgraph extraction process.
71
70
  """
72
- logger.log(
73
- logging.INFO, "Invoking subgraph_summarization tool for %s", extraction_name
74
- )
71
+ logger.log(logging.INFO, "Invoking subgraph_summarization tool for %s", extraction_name)
75
72
 
76
73
  # Load hydra configuration
77
74
  with hydra.initialize(version_base=None, config_path="../configs"):
@@ -1,8 +1,5 @@
1
- '''
1
+ """
2
2
  This file is used to import all the models in the package.
3
- '''
4
- from . import embeddings
5
- from . import enrichments
6
- from . import extractions
7
- from . import kg_utils
8
- from . import pubchem_utils
3
+ """
4
+
5
+ from . import embeddings, enrichments, extractions, kg_utils, pubchem_utils
@@ -1,8 +1,5 @@
1
- '''
1
+ """
2
2
  This file is used to import all the models in the package.
3
- '''
4
- from . import embeddings
5
- from . import sentence_transformer
6
- from . import huggingface
7
- from . import ollama
8
- from . import nim_molmim
3
+ """
4
+
5
+ from . import embeddings, huggingface, nim_molmim, ollama, sentence_transformer
@@ -2,9 +2,12 @@
2
2
  Embeddings interface from LangChain Core.
3
3
  https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/embeddings/embeddings.py
4
4
  """
5
+
5
6
  from abc import ABC, abstractmethod
7
+
6
8
  from langchain_core.runnables.config import run_in_executor
7
9
 
10
+
8
11
  class Embeddings(ABC):
9
12
  """Interface for embedding models.
10
13
 
@@ -32,6 +35,7 @@ class Embeddings(ABC):
32
35
  however, implementations may choose to override the asynchronous methods with
33
36
  an async native implementation for performance reasons.
34
37
  """
38
+
35
39
  @abstractmethod
36
40
  def embed_documents(self, texts: list[str]) -> list[list[float]]:
37
41
  """Embed search docs.
@@ -2,11 +2,12 @@
2
2
  Embedding class using HuggingFace model based on LangChain Embeddings class.
3
3
  """
4
4
 
5
- from typing import List
6
5
  import torch
7
- from transformers import AutoModel, AutoTokenizer, AutoConfig
6
+ from transformers import AutoConfig, AutoModel, AutoTokenizer
7
+
8
8
  from .embeddings import Embeddings
9
9
 
10
+
10
11
  class EmbeddingWithHuggingFace(Embeddings):
11
12
  """
12
13
  Embedding class using HuggingFace model based on LangChain Embeddings class.
@@ -39,18 +40,14 @@ class EmbeddingWithHuggingFace(Embeddings):
39
40
  # Try to load the model from HuggingFace Hub
40
41
  try:
41
42
  AutoConfig.from_pretrained(self.model_name)
42
- except EnvironmentError as e:
43
- raise ValueError(
44
- f"Model {self.model_name} is not available on HuggingFace Hub."
45
- ) from e
43
+ except OSError as e:
44
+ raise ValueError(f"Model {self.model_name} is not available on HuggingFace Hub.") from e
46
45
 
47
46
  # Load HuggingFace tokenizer and model
48
47
  self.tokenizer = AutoTokenizer.from_pretrained(
49
48
  self.model_name, cache_dir=self.model_cache_dir
50
49
  )
51
- self.model = AutoModel.from_pretrained(
52
- self.model_name, cache_dir=self.model_cache_dir
53
- )
50
+ self.model = AutoModel.from_pretrained(self.model_name, cache_dir=self.model_cache_dir)
54
51
 
55
52
  def meanpooling(self, output, mask) -> torch.Tensor:
56
53
  """
@@ -62,11 +59,11 @@ class EmbeddingWithHuggingFace(Embeddings):
62
59
  output: The output of the model.
63
60
  mask: The mask of the model.
64
61
  """
65
- embeddings = output[0] # First element of model_output contains all token embeddings
62
+ embeddings = output[0] # First element of model_output contains all token embeddings
66
63
  mask = mask.unsqueeze(-1).expand(embeddings.size()).float()
67
64
  return torch.sum(embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)
68
65
 
69
- def embed_documents(self, texts: List[str]) -> List[float]:
66
+ def embed_documents(self, texts: list[str]) -> list[float]:
70
67
  """
71
68
  Generate embedding for a list of input texts using HuggingFace model.
72
69
 
@@ -86,11 +83,11 @@ class EmbeddingWithHuggingFace(Embeddings):
86
83
  return_tensors="pt",
87
84
  ).to(self.device)
88
85
  outputs = self.model.to(self.device)(**inputs)
89
- embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()
86
+ embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()
90
87
 
91
88
  return embeddings
92
89
 
93
- def embed_query(self, text: str) -> List[float]:
90
+ def embed_query(self, text: str) -> list[float]:
94
91
  """
95
92
  Generate embeddings for an input text using HuggingFace model.
96
93
 
@@ -109,6 +106,6 @@ class EmbeddingWithHuggingFace(Embeddings):
109
106
  return_tensors="pt",
110
107
  ).to(self.device)
111
108
  outputs = self.model.to(self.device)(**inputs)
112
- embeddings = self.meanpooling(outputs, inputs['attention_mask']).cpu()[0]
109
+ embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()[0]
113
110
 
114
111
  return embeddings
@@ -3,14 +3,17 @@ Embedding class using MOLMIM model from NVIDIA NIM.
3
3
  """
4
4
 
5
5
  import json
6
- from typing import List
6
+
7
7
  import requests
8
+
8
9
  from .embeddings import Embeddings
9
10
 
11
+
10
12
  class EmbeddingWithMOLMIM(Embeddings):
11
13
  """
12
14
  Embedding class using MOLMIM model from NVIDIA NIM
13
15
  """
16
+
14
17
  def __init__(self, base_url: str):
15
18
  """
16
19
  Initialize the EmbeddingWithMOLMIM class.
@@ -21,7 +24,7 @@ class EmbeddingWithMOLMIM(Embeddings):
21
24
  # Set base URL
22
25
  self.base_url = base_url
23
26
 
24
- def embed_documents(self, texts: List[str]) -> List[float]:
27
+ def embed_documents(self, texts: list[str]) -> list[float]:
25
28
  """
26
29
  Generate embedding for a list of SMILES strings using MOLMIM model.
27
30
 
@@ -31,16 +34,13 @@ class EmbeddingWithMOLMIM(Embeddings):
31
34
  Returns:
32
35
  The list of embeddings for the given SMILES strings.
33
36
  """
34
- headers = {
35
- 'accept': 'application/json',
36
- 'Content-Type': 'application/json'
37
- }
37
+ headers = {"accept": "application/json", "Content-Type": "application/json"}
38
38
  data = json.dumps({"sequences": texts})
39
39
  response = requests.post(self.base_url, headers=headers, data=data, timeout=60)
40
40
  embeddings = response.json()["embeddings"]
41
41
  return embeddings
42
42
 
43
- def embed_query(self, text: str) -> List[float]:
43
+ def embed_query(self, text: str) -> list[float]:
44
44
  """
45
45
  Generate embeddings for an input query using MOLMIM model.
46
46