aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  109. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  114. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  115. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  116. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  117. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  118. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  122. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  123. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  124. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  125. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  126. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  127. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
  150. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  151. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  152. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
  153. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  154. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  156. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  157. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  170. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  174. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  175. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  176. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  177. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  178. aiagents4pharma/talk2scholars/README.md +1 -0
  179. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  180. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  181. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  182. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  183. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  184. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  185. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  186. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  191. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  192. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  194. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  196. aiagents4pharma/talk2scholars/install.md +122 -0
  197. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  198. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  199. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  200. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  201. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  202. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  203. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  205. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  206. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  207. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  208. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  209. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  210. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  212. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  213. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  214. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  215. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  216. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  227. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  228. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  229. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  230. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  234. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  235. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  236. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  242. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  243. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  259. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  260. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  261. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  262. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  263. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  264. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  265. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  266. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  268. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  270. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  271. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  277. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  280. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
  281. aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
  282. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
  283. aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
  284. aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
  285. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  286. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  289. {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -3,7 +3,7 @@ Tool for performing multimodal subgraph extraction.
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Annotated, Type
6
+ from typing import Annotated
7
7
 
8
8
  import hydra
9
9
  import pandas as pd
@@ -39,14 +39,10 @@ class MultimodalSubgraphExtractionInput(BaseModel):
39
39
  arg_data: Argument for analytical process over graph data.
40
40
  """
41
41
 
42
- tool_call_id: Annotated[str, InjectedToolCallId] = Field(
43
- description="Tool call ID."
44
- )
42
+ tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
45
43
  state: Annotated[dict, InjectedState] = Field(description="Injected state.")
46
44
  prompt: str = Field(description="Prompt to interact with the backend.")
47
- arg_data: ArgumentData = Field(
48
- description="Experiment over graph data.", default=None
49
- )
45
+ arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
50
46
 
51
47
 
52
48
  class MultimodalSubgraphExtractionTool(BaseTool):
@@ -57,18 +53,19 @@ class MultimodalSubgraphExtractionTool(BaseTool):
57
53
 
58
54
  name: str = "subgraph_extraction"
59
55
  description: str = "A tool for subgraph extraction based on user's prompt."
60
- args_schema: Type[BaseModel] = MultimodalSubgraphExtractionInput
56
+ args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
61
57
 
62
58
  def __init__(self, **kwargs):
63
59
  super().__init__(**kwargs)
64
60
  # Initialize hardware detection and dynamic library loading
65
- object.__setattr__(self, 'detector', SystemDetector())
66
- object.__setattr__(self, 'loader', DynamicLibraryLoader(self.detector))
67
- logger.info("MultimodalSubgraphExtractionTool initialized with %s mode",
68
- "GPU" if self.loader.use_gpu else "CPU")
61
+ object.__setattr__(self, "detector", SystemDetector())
62
+ object.__setattr__(self, "loader", DynamicLibraryLoader(self.detector))
63
+ logger.info(
64
+ "MultimodalSubgraphExtractionTool initialized with %s mode",
65
+ "GPU" if self.loader.use_gpu else "CPU",
66
+ )
69
67
 
70
- def _read_multimodal_files(self,
71
- state: Annotated[dict, InjectedState]):
68
+ def _read_multimodal_files(self, state: Annotated[dict, InjectedState]):
72
69
  """
73
70
  Read the uploaded multimodal files and return a DataFrame.
74
71
 
@@ -86,8 +83,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
86
83
  # Check if multimodal file is uploaded
87
84
  if state["uploaded_files"][i]["file_type"] == "multimodal":
88
85
  # Read the Excel file
89
- multimodal_df = pd.read_excel(state["uploaded_files"][i]["file_path"],
90
- sheet_name=None)
86
+ multimodal_df = pd.read_excel(
87
+ state["uploaded_files"][i]["file_path"], sheet_name=None
88
+ )
91
89
 
92
90
  # Check if the multimodal_df is empty
93
91
  logger.log(logging.INFO, "Checking if multimodal_df is empty")
@@ -98,11 +96,12 @@ class MultimodalSubgraphExtractionTool(BaseTool):
98
96
  multimodal_df = pd.concat(multimodal_df).reset_index()
99
97
  multimodal_df = self.loader.df.DataFrame(multimodal_df)
100
98
  multimodal_df.drop(columns=["level_1"], inplace=True)
101
- multimodal_df.rename(columns={"level_0": "q_node_type",
102
- "name": "q_node_name"}, inplace=True)
99
+ multimodal_df.rename(
100
+ columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
101
+ )
103
102
  # Since an excel sheet name could not contain a `/`,
104
103
  # but the node type can be 'gene/protein' as exists in the PrimeKG
105
- multimodal_df["q_node_type"] = multimodal_df["q_node_type"].str.replace('-', '_')
104
+ multimodal_df["q_node_type"] = multimodal_df["q_node_type"].str.replace("-", "_")
106
105
 
107
106
  return multimodal_df
108
107
 
@@ -115,30 +114,28 @@ class MultimodalSubgraphExtractionTool(BaseTool):
115
114
  collection.load()
116
115
 
117
116
  # Query the collection with node names from multimodal_df
118
- node_names_series = node_type_df['q_node_name']
119
- q_node_names = getattr(node_names_series,
120
- "to_pandas",
121
- lambda series=node_names_series: series)().tolist()
122
- q_columns = ["node_id", "node_name", "node_type",
123
- "feat", "feat_emb", "desc", "desc_emb"]
117
+ node_names_series = node_type_df["q_node_name"]
118
+ q_node_names = getattr(
119
+ node_names_series, "to_pandas", lambda series=node_names_series: series
120
+ )().tolist()
121
+ q_columns = ["node_id", "node_name", "node_type", "feat", "feat_emb", "desc", "desc_emb"]
124
122
  res = collection.query(
125
- expr=f'node_name IN [{','.join(f'"{name}"' for name in q_node_names)}]',
123
+ expr=f"node_name IN [{','.join(f'"{name}"' for name in q_node_names)}]",
126
124
  output_fields=q_columns,
127
125
  )
128
126
  # Convert the embeedings into floats
129
127
  for r_ in res:
130
- r_['feat_emb'] = [float(x) for x in r_['feat_emb']]
131
- r_['desc_emb'] = [float(x) for x in r_['desc_emb']]
128
+ r_["feat_emb"] = [float(x) for x in r_["feat_emb"]]
129
+ r_["desc_emb"] = [float(x) for x in r_["desc_emb"]]
132
130
 
133
131
  # Convert the result to a DataFrame
134
132
  res_df = self.loader.df.DataFrame(res)[q_columns]
135
133
  res_df["use_description"] = False
136
134
  return res_df
137
135
 
138
- def _prepare_query_modalities(self,
139
- prompt: dict,
140
- state: Annotated[dict, InjectedState],
141
- cfg_db: dict):
136
+ def _prepare_query_modalities(
137
+ self, prompt: dict, state: Annotated[dict, InjectedState], cfg_db: dict
138
+ ):
142
139
  """
143
140
  Prepare the modality-specific query for subgraph extraction.
144
141
 
@@ -153,16 +150,18 @@ class MultimodalSubgraphExtractionTool(BaseTool):
153
150
  # Initialize dataframes
154
151
  logger.log(logging.INFO, "Initializing dataframes")
155
152
  query_df = []
156
- prompt_df = self.loader.df.DataFrame({
157
- 'node_id': 'user_prompt',
158
- 'node_name': 'User Prompt',
159
- 'node_type': 'prompt',
160
- 'feat': prompt["text"],
161
- 'feat_emb': prompt["emb"],
162
- 'desc': prompt["text"],
163
- 'desc_emb': prompt["emb"],
164
- 'use_description': True # set to True for user prompt embedding
165
- })
153
+ prompt_df = self.loader.df.DataFrame(
154
+ {
155
+ "node_id": "user_prompt",
156
+ "node_name": "User Prompt",
157
+ "node_type": "prompt",
158
+ "feat": prompt["text"],
159
+ "feat_emb": prompt["emb"],
160
+ "desc": prompt["text"],
161
+ "desc_emb": prompt["emb"],
162
+ "use_description": True, # set to True for user prompt embedding
163
+ }
164
+ )
166
165
 
167
166
  # Read multimodal files uploaded by the user
168
167
  multimodal_df = self._read_multimodal_files(state)
@@ -171,7 +170,10 @@ class MultimodalSubgraphExtractionTool(BaseTool):
171
170
  logger.log(logging.INFO, "Prepare query modalities")
172
171
  if len(multimodal_df) > 0:
173
172
  # Query the Milvus database for each node type in multimodal_df
174
- logger.log(logging.INFO, "Querying Milvus database for each node type in multimodal_df")
173
+ logger.log(
174
+ logging.INFO,
175
+ "Querying Milvus database for each node type in multimodal_df",
176
+ )
175
177
  for node_type, node_type_df in multimodal_df.groupby("q_node_type"):
176
178
  print(f"Processing node type: {node_type}")
177
179
  res_df = self._query_milvus_collection(node_type, node_type_df, cfg_db)
@@ -183,11 +185,12 @@ class MultimodalSubgraphExtractionTool(BaseTool):
183
185
 
184
186
  # Update the state by adding the the selected node IDs
185
187
  logger.log(logging.INFO, "Updating state with selected node IDs")
186
- state["selections"] = getattr(query_df,
187
- "to_pandas",
188
- lambda: query_df)().groupby(
189
- "node_type"
190
- )["node_id"].apply(list).to_dict()
188
+ state["selections"] = (
189
+ getattr(query_df, "to_pandas", lambda: query_df)()
190
+ .groupby("node_type")["node_id"]
191
+ .apply(list)
192
+ .to_dict()
193
+ )
191
194
 
192
195
  # Append a user prompt to the query dataframe
193
196
  logger.log(logging.INFO, "Adding user prompt to query dataframe")
@@ -198,11 +201,13 @@ class MultimodalSubgraphExtractionTool(BaseTool):
198
201
 
199
202
  return query_df
200
203
 
201
- def _perform_subgraph_extraction(self,
202
- state: Annotated[dict, InjectedState],
203
- cfg: dict,
204
- cfg_db: dict,
205
- query_df: pd.DataFrame) -> dict:
204
+ def _perform_subgraph_extraction(
205
+ self,
206
+ state: Annotated[dict, InjectedState],
207
+ cfg: dict,
208
+ cfg_db: dict,
209
+ query_df: pd.DataFrame,
210
+ ) -> dict:
206
211
  """
207
212
  Perform multimodal subgraph extraction based on modal-specific embeddings.
208
213
 
@@ -217,10 +222,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
217
222
  """
218
223
  # Initialize the subgraph dictionary
219
224
  subgraphs = []
220
- unified_subgraph = {
221
- "nodes": [],
222
- "edges": []
223
- }
225
+ unified_subgraph = {"nodes": [], "edges": []}
224
226
  # subgraphs = {}
225
227
  # subgraphs["nodes"] = []
226
228
  # subgraphs["edges"] = []
@@ -228,22 +230,21 @@ class MultimodalSubgraphExtractionTool(BaseTool):
228
230
  # Loop over query embeddings and modalities
229
231
  for q in getattr(query_df, "to_pandas", lambda: query_df)().iterrows():
230
232
  logger.log(logging.INFO, "===========================================")
231
- logger.log(logging.INFO, "Processing query: %s", q[1]['node_name'])
233
+ logger.log(logging.INFO, "Processing query: %s", q[1]["node_name"])
232
234
  # Prepare the PCSTPruning object and extract the subgraph
233
235
  # Parameters were set in the configuration file obtained from Hydra
234
236
  # start = datetime.datetime.now()
235
237
  # Get dynamic metric type (overrides any config setting)
236
238
  # Get dynamic metric type (overrides any config setting)
237
- has_vector_processing = hasattr(cfg, 'vector_processing')
239
+ has_vector_processing = hasattr(cfg, "vector_processing")
238
240
  if has_vector_processing:
239
- dynamic_metrics_enabled = getattr(cfg.vector_processing, 'dynamic_metrics', True)
241
+ dynamic_metrics_enabled = getattr(cfg.vector_processing, "dynamic_metrics", True)
240
242
  else:
241
243
  dynamic_metrics_enabled = False
242
244
  if has_vector_processing and dynamic_metrics_enabled:
243
245
  dynamic_metric_type = self.loader.metric_type
244
246
  else:
245
- dynamic_metric_type = getattr(cfg, 'search_metric_type',
246
- self.loader.metric_type)
247
+ dynamic_metric_type = getattr(cfg, "search_metric_type", self.loader.metric_type)
247
248
 
248
249
  subgraph = MultimodalPCSTPruning(
249
250
  topk=state["topk_nodes"],
@@ -254,20 +255,21 @@ class MultimodalSubgraphExtractionTool(BaseTool):
254
255
  num_clusters=cfg.num_clusters,
255
256
  pruning=cfg.pruning,
256
257
  verbosity_level=cfg.verbosity_level,
257
- use_description=q[1]['use_description'],
258
+ use_description=q[1]["use_description"],
258
259
  metric_type=dynamic_metric_type, # Use dynamic or config metric type
259
- loader=self.loader # Pass the loader instance
260
- ).extract_subgraph(q[1]['desc_emb'],
261
- q[1]['feat_emb'],
262
- q[1]['node_type'],
263
- cfg_db)
260
+ loader=self.loader, # Pass the loader instance
261
+ ).extract_subgraph(q[1]["desc_emb"], q[1]["feat_emb"], q[1]["node_type"], cfg_db)
264
262
 
265
263
  # Append the extracted subgraph to the dictionary
266
264
  unified_subgraph["nodes"].append(subgraph["nodes"].tolist())
267
265
  unified_subgraph["edges"].append(subgraph["edges"].tolist())
268
- subgraphs.append((q[1]['node_name'],
269
- subgraph["nodes"].tolist(),
270
- subgraph["edges"].tolist()))
266
+ subgraphs.append(
267
+ (
268
+ q[1]["node_name"],
269
+ subgraph["nodes"].tolist(),
270
+ subgraph["edges"].tolist(),
271
+ )
272
+ )
271
273
 
272
274
  # end = datetime.datetime.now()
273
275
  # logger.log(logging.INFO, "Subgraph extraction time: %s seconds",
@@ -284,10 +286,10 @@ class MultimodalSubgraphExtractionTool(BaseTool):
284
286
  ).tolist()
285
287
 
286
288
  # Convert the unified subgraph and subgraphs to DataFrames
287
- unified_subgraph = self.loader.df.DataFrame([("Unified Subgraph",
288
- unified_subgraph["nodes"],
289
- unified_subgraph["edges"])],
290
- columns=["name", "nodes", "edges"])
289
+ unified_subgraph = self.loader.df.DataFrame(
290
+ [("Unified Subgraph", unified_subgraph["nodes"], unified_subgraph["edges"])],
291
+ columns=["name", "nodes", "edges"],
292
+ )
291
293
  subgraphs = self.loader.df.DataFrame(subgraphs, columns=["name", "nodes", "edges"])
292
294
 
293
295
  # Concatenate both DataFrames
@@ -295,11 +297,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
295
297
 
296
298
  return subgraphs
297
299
 
298
- def _prepare_final_subgraph(self,
299
- state:Annotated[dict, InjectedState],
300
- subgraph: dict,
301
- cfg: dict,
302
- cfg_db) -> dict:
300
+ def _prepare_final_subgraph(
301
+ self, state: Annotated[dict, InjectedState], subgraph: dict, cfg: dict, cfg_db
302
+ ) -> dict:
303
303
  """
304
304
  Prepare the subgraph based on the extracted subgraph.
305
305
 
@@ -314,53 +314,62 @@ class MultimodalSubgraphExtractionTool(BaseTool):
314
314
  A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
315
315
  """
316
316
  # Convert the dict to a DataFrame
317
- node_colors = {n: cfg.node_colors_dict[k]
318
- for k, v in state["selections"].items() for n in v}
317
+ node_colors = {
318
+ n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
319
+ }
319
320
  color_df = self.loader.df.DataFrame(list(node_colors.items()), columns=["node_id", "color"])
320
321
  # print(color_df)
321
322
 
322
323
  # Prepare the subgraph dictionary
323
- graph_dict = {
324
- "name": [],
325
- "nodes": [],
326
- "edges": [],
327
- "text": ""
328
- }
324
+ graph_dict = {"name": [], "nodes": [], "edges": [], "text": ""}
329
325
  for sub in getattr(subgraph, "to_pandas", lambda: subgraph)().itertuples(index=False):
330
326
  graph_nodes, graph_edges = self._process_subgraph_data(sub, cfg_db, color_df)
331
327
 
332
328
  # Prepare lists for visualization
333
329
  graph_dict["name"].append(sub.name)
334
- graph_dict["nodes"].append([(
335
- row.node_id,
336
- {'hover': "Node Name : " + row.node_name + "\n" +\
337
- "Node Type : " + row.node_type + "\n" +
338
- "Desc : " + row.desc,
339
- 'click': '$hover',
340
- 'color': row.color})
341
- for row in getattr(graph_nodes,
342
- "to_pandas",
343
- lambda graph_nodes=graph_nodes: graph_nodes)()
344
- .itertuples(index=False)])
345
- graph_dict["edges"].append([(
346
- row.head_id,
347
- row.tail_id,
348
- {'label': tuple(row.edge_type)})
349
- for row in getattr(graph_edges,
350
- "to_pandas",
351
- lambda graph_edges=graph_edges: graph_edges)()
352
- .itertuples(index=False)])
330
+ graph_dict["nodes"].append(
331
+ [
332
+ (
333
+ row.node_id,
334
+ {
335
+ "hover": "Node Name : "
336
+ + row.node_name
337
+ + "\n"
338
+ + "Node Type : "
339
+ + row.node_type
340
+ + "\n"
341
+ + "Desc : "
342
+ + row.desc,
343
+ "click": "$hover",
344
+ "color": row.color,
345
+ },
346
+ )
347
+ for row in getattr(
348
+ graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
349
+ )().itertuples(index=False)
350
+ ]
351
+ )
352
+ graph_dict["edges"].append(
353
+ [
354
+ (row.head_id, row.tail_id, {"label": tuple(row.edge_type)})
355
+ for row in getattr(
356
+ graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
357
+ )().itertuples(index=False)
358
+ ]
359
+ )
353
360
 
354
361
  # Prepare the textualized subgraph
355
362
  if sub.name == "Unified Subgraph":
356
- graph_nodes = graph_nodes[['node_id', 'desc']]
357
- graph_nodes.rename(columns={'desc': 'node_attr'}, inplace=True)
358
- graph_edges = graph_edges[['head_id', 'edge_type', 'tail_id']]
359
- nodes_pandas = getattr(graph_nodes, "to_pandas",
360
- lambda graph_nodes=graph_nodes: graph_nodes)()
363
+ graph_nodes = graph_nodes[["node_id", "desc"]]
364
+ graph_nodes.rename(columns={"desc": "node_attr"}, inplace=True)
365
+ graph_edges = graph_edges[["head_id", "edge_type", "tail_id"]]
366
+ nodes_pandas = getattr(
367
+ graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
368
+ )()
361
369
  nodes_csv = nodes_pandas.to_csv(index=False)
362
- edges_pandas = getattr(graph_edges, "to_pandas",
363
- lambda graph_edges=graph_edges: graph_edges)()
370
+ edges_pandas = getattr(
371
+ graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
372
+ )()
364
373
  edges_csv = edges_pandas.to_csv(index=False)
365
374
  graph_dict["text"] = nodes_csv + "\n" + edges_csv
366
375
 
@@ -369,44 +378,43 @@ class MultimodalSubgraphExtractionTool(BaseTool):
369
378
  def _process_subgraph_data(self, sub, cfg_db, color_df):
370
379
  """Helper method to process individual subgraph data."""
371
380
  print(f"Processing subgraph: {sub.name}")
372
- print('---')
381
+ print("---")
373
382
  print(sub.nodes)
374
- print('---')
383
+ print("---")
375
384
  print(sub.edges)
376
- print('---')
385
+ print("---")
377
386
 
378
387
  # Prepare graph dataframes - Nodes
379
388
  coll_name = f"{cfg_db.milvus_db.database_name}_nodes"
380
389
  node_coll = Collection(name=coll_name)
381
390
  node_coll.load()
382
391
  graph_nodes = node_coll.query(
383
- expr=f'node_index IN [{",".join(f"{n}" for n in sub.nodes)}]',
384
- output_fields=['node_id', 'node_name', 'node_type', 'desc']
392
+ expr=f"node_index IN [{','.join(f'{n}' for n in sub.nodes)}]",
393
+ output_fields=["node_id", "node_name", "node_type", "desc"],
385
394
  )
386
395
  graph_nodes = self.loader.df.DataFrame(graph_nodes)
387
- graph_nodes.drop(columns=['node_index'], inplace=True)
396
+ graph_nodes.drop(columns=["node_index"], inplace=True)
388
397
  if not color_df.empty:
389
398
  graph_nodes = graph_nodes.merge(color_df, on="node_id", how="left")
390
399
  else:
391
- graph_nodes["color"] = 'black'
392
- graph_nodes['color'] = graph_nodes['color'].fillna('black')
400
+ graph_nodes["color"] = "black"
401
+ graph_nodes["color"] = graph_nodes["color"].fillna("black")
393
402
 
394
403
  # Edges
395
404
  coll_name = f"{cfg_db.milvus_db.database_name}_edges"
396
405
  edge_coll = Collection(name=coll_name)
397
406
  edge_coll.load()
398
407
  graph_edges = edge_coll.query(
399
- expr=f'triplet_index IN [{",".join(f"{e}" for e in sub.edges)}]',
400
- output_fields=['head_id', 'tail_id', 'edge_type']
408
+ expr=f"triplet_index IN [{','.join(f'{e}' for e in sub.edges)}]",
409
+ output_fields=["head_id", "tail_id", "edge_type"],
401
410
  )
402
411
  graph_edges = self.loader.df.DataFrame(graph_edges)
403
- graph_edges.drop(columns=['triplet_index'], inplace=True)
404
- graph_edges['edge_type'] = graph_edges['edge_type'].str.split('|')
412
+ graph_edges.drop(columns=["triplet_index"], inplace=True)
413
+ graph_edges["edge_type"] = graph_edges["edge_type"].str.split("|")
405
414
 
406
415
  return graph_nodes, graph_edges
407
416
 
408
- def normalize_vector(self,
409
- v : list) -> list:
417
+ def normalize_vector(self, v: list) -> list:
410
418
  """
411
419
  Normalize a vector using appropriate library (CuPy for GPU, NumPy for CPU).
412
420
 
@@ -448,7 +456,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
448
456
  # Load hydra configuration
449
457
  with hydra.initialize(version_base=None, config_path="../configs"):
450
458
  cfg = hydra.compose(
451
- config_name="config", overrides=["tools/multimodal_subgraph_extraction=default"]
459
+ config_name="config",
460
+ overrides=["tools/multimodal_subgraph_extraction=default"],
452
461
  )
453
462
  cfg_db = cfg.app.frontend
454
463
  cfg = cfg.tools.multimodal_subgraph_extraction
@@ -468,10 +477,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
468
477
  logger.log(logging.INFO, "_prepare_query_modalities")
469
478
  # start = datetime.datetime.now()
470
479
  query_df = self._prepare_query_modalities(
471
- {"text": prompt,
472
- "emb": [self.normalize_vector(
473
- state["embedding_model"].embed_query(prompt)
474
- )]
480
+ {
481
+ "text": prompt,
482
+ "emb": [self.normalize_vector(state["embedding_model"].embed_query(prompt))],
475
483
  },
476
484
  state,
477
485
  cfg_db,
@@ -483,10 +491,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
483
491
  # Perform subgraph extraction
484
492
  logger.log(logging.INFO, "_perform_subgraph_extraction")
485
493
  # start = datetime.datetime.now()
486
- subgraphs = self._perform_subgraph_extraction(state,
487
- cfg,
488
- cfg_db,
489
- query_df)
494
+ subgraphs = self._perform_subgraph_extraction(state, cfg, cfg_db, query_df)
490
495
  # end = datetime.datetime.now()
491
496
  # logger.log(logging.INFO, "_perform_subgraph_extraction time: %s seconds",
492
497
  # (end - start).total_seconds())
@@ -495,10 +500,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
495
500
  logger.log(logging.INFO, "_prepare_final_subgraph")
496
501
  logger.log(logging.INFO, "Subgraphs extracted: %s", len(subgraphs))
497
502
  # start = datetime.datetime.now()
498
- final_subgraph = self._prepare_final_subgraph(state,
499
- subgraphs,
500
- cfg,
501
- cfg_db)
503
+ final_subgraph = self._prepare_final_subgraph(state, subgraphs, cfg, cfg_db)
502
504
  # end = datetime.datetime.now()
503
505
  # logger.log(logging.INFO, "_prepare_final_subgraph time: %s seconds",
504
506
  # (end - start).total_seconds())
@@ -534,7 +536,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
534
536
 
535
537
  # Return the updated state of the tool
536
538
  return Command(
537
- update=dic_updated_state_for_model | {
539
+ update=dic_updated_state_for_model
540
+ | {
538
541
  # update the message history
539
542
  "messages": [
540
543
  ToolMessage(