aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. aiagents4pharma/__init__.py +2 -2
  2. aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
  3. aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
  4. aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
  5. aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
  6. aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
  7. aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
  8. aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
  9. aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
  10. aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
  11. aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
  12. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
  13. aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
  14. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
  15. aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
  16. aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
  17. aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
  18. aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
  19. aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
  20. aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
  21. aiagents4pharma/talk2biomodels/.dockerignore +13 -0
  22. aiagents4pharma/talk2biomodels/Dockerfile +104 -0
  23. aiagents4pharma/talk2biomodels/README.md +1 -0
  24. aiagents4pharma/talk2biomodels/__init__.py +4 -8
  25. aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
  26. aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
  27. aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
  28. aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
  29. aiagents4pharma/talk2biomodels/api/ols.py +13 -10
  30. aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
  31. aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
  32. aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
  33. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
  34. aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
  35. aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
  36. aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
  37. aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
  38. aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
  39. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
  40. aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
  41. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
  42. aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
  43. aiagents4pharma/talk2biomodels/install.md +63 -0
  44. aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
  45. aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
  46. aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
  47. aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
  48. aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
  49. aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
  50. aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
  51. aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
  52. aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
  53. aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
  54. aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
  55. aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
  56. aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
  57. aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
  58. aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
  59. aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
  60. aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
  61. aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
  62. aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
  63. aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
  64. aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
  65. aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
  66. aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
  67. aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
  68. aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
  69. aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
  70. aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
  71. aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
  72. aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
  73. aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
  74. aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
  75. aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
  76. aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
  77. aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
  78. aiagents4pharma/talk2cells/README.md +1 -0
  79. aiagents4pharma/talk2cells/__init__.py +4 -5
  80. aiagents4pharma/talk2cells/agents/__init__.py +3 -2
  81. aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
  82. aiagents4pharma/talk2cells/states/__init__.py +3 -2
  83. aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
  84. aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
  85. aiagents4pharma/talk2cells/tools/__init__.py +3 -2
  86. aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
  87. aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
  88. aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
  89. aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
  90. aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
  91. aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
  92. aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
  93. aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
  94. aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
  95. aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
  96. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
  97. aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
  98. aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
  99. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
  100. aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
  101. aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
  102. aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
  103. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
  104. aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
  105. aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
  106. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
  107. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
  108. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
  109. aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
  110. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
  111. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
  112. aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
  113. aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
  114. aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
  115. aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
  116. aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
  117. aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
  118. aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
  119. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
  120. aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
  121. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
  122. aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
  123. aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
  124. aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
  125. aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
  126. aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
  127. aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
  128. aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
  129. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
  130. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
  131. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
  132. aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
  133. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
  134. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
  135. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
  136. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
  137. aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
  138. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
  139. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
  140. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
  141. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
  142. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
  143. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
  144. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
  145. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
  146. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
  147. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
  148. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
  149. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
  150. aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
  151. aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
  152. aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
  153. aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
  154. aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
  155. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
  156. aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
  157. aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
  158. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
  159. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
  160. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
  161. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
  162. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
  163. aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
  164. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
  165. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
  166. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
  167. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
  168. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
  169. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
  170. aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
  171. aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
  172. aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
  173. aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
  174. aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
  175. aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
  176. aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
  177. aiagents4pharma/talk2scholars/.dockerignore +13 -0
  178. aiagents4pharma/talk2scholars/Dockerfile +104 -0
  179. aiagents4pharma/talk2scholars/README.md +1 -0
  180. aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
  181. aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
  182. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
  183. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
  184. aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
  185. aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
  186. aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
  187. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
  188. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
  189. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
  190. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
  191. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
  192. aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
  193. aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
  194. aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
  195. aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
  196. aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
  197. aiagents4pharma/talk2scholars/install.md +122 -0
  198. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
  199. aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
  200. aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
  201. aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
  202. aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
  203. aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
  204. aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
  205. aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
  206. aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
  207. aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
  208. aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
  209. aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
  210. aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
  211. aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
  212. aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
  213. aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
  214. aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
  215. aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
  216. aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
  217. aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
  218. aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
  219. aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
  220. aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
  221. aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
  222. aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
  223. aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
  224. aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
  225. aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
  226. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
  227. aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
  228. aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
  229. aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
  230. aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
  231. aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
  232. aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
  233. aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
  234. aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
  235. aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
  236. aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
  237. aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
  238. aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
  239. aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
  240. aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
  241. aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
  242. aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
  243. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
  244. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
  245. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
  246. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
  247. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
  248. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
  249. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
  250. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
  251. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
  252. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
  253. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
  254. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
  255. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
  256. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
  257. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
  258. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
  259. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
  260. aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
  261. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
  262. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
  263. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
  264. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
  265. aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
  266. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
  267. aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
  268. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
  269. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
  270. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
  271. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
  272. aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
  273. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
  274. aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
  275. aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
  276. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
  277. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
  278. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
  279. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
  280. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
  281. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
  282. aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
  283. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
  284. aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
  285. aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
  286. /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
  287. /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
  288. /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
  289. /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
  290. {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -18,7 +18,7 @@ import os
18
18
  import platform
19
19
  import subprocess
20
20
  import sys
21
- from typing import Any, Dict, List, Optional, Union
21
+ from typing import Any, Union
22
22
 
23
23
  # Configure logging
24
24
  logging.basicConfig(level=logging.INFO, format="[DATA LOADER] %(message)s")
@@ -32,9 +32,7 @@ class SystemDetector:
32
32
  self.os_type = platform.system().lower() # 'windows', 'linux', 'darwin'
33
33
  self.architecture = platform.machine().lower() # 'x86_64', 'arm64', etc.
34
34
  self.has_nvidia_gpu = self._detect_nvidia_gpu()
35
- self.use_gpu = (
36
- self.has_nvidia_gpu and self.os_type != "darwin"
37
- ) # No CUDA on macOS
35
+ self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin" # No CUDA on macOS
38
36
 
39
37
  logger.info("System Detection Results:")
40
38
  logger.info(" OS: %s", self.os_type)
@@ -46,9 +44,7 @@ class SystemDetector:
46
44
  """Detect if NVIDIA GPU is available."""
47
45
  try:
48
46
  # Try nvidia-smi command
49
- result = subprocess.run(
50
- ["nvidia-smi"], capture_output=True, text=True, timeout=10
51
- )
47
+ result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
52
48
  return result.returncode == 0
53
49
  except (
54
50
  subprocess.TimeoutExpired,
@@ -57,7 +53,7 @@ class SystemDetector:
57
53
  ):
58
54
  return False
59
55
 
60
- def get_required_packages(self) -> List[str]:
56
+ def get_required_packages(self) -> list[str]:
61
57
  """Get list of packages to install based on system capabilities - matches original logic."""
62
58
  if self.use_gpu and self.os_type == "linux":
63
59
  # Exact package list from original script for GPU mode
@@ -103,9 +99,7 @@ class SystemDetector:
103
99
  if result.returncode != 0:
104
100
  logger.error("Error installing package: %s", result.stderr)
105
101
  if "cudf" in package_cmd or "dask-cudf" in package_cmd:
106
- logger.warning(
107
- "GPU package installation failed, falling back to CPU mode"
108
- )
102
+ logger.warning("GPU package installation failed, falling back to CPU mode")
109
103
  self.use_gpu = False
110
104
  return self.install_packages() # Retry with CPU packages
111
105
  else:
@@ -115,9 +109,7 @@ class SystemDetector:
115
109
  except subprocess.CalledProcessError as e:
116
110
  logger.error("Failed to install %s: %s", package_cmd, e.stderr)
117
111
  if "cudf" in package_cmd:
118
- logger.warning(
119
- "GPU package installation failed, falling back to CPU mode"
120
- )
112
+ logger.warning("GPU package installation failed, falling back to CPU mode")
121
113
  self.use_gpu = False
122
114
  return self.install_packages() # Retry with CPU packages
123
115
  else:
@@ -130,7 +122,7 @@ class SystemDetector:
130
122
  class DynamicDataLoader:
131
123
  """Dynamic data loader that adapts to system capabilities."""
132
124
 
133
- def __init__(self, config: Dict[str, Any]):
125
+ def __init__(self, config: dict[str, Any]):
134
126
  """Initialize with system detection and dynamic library loading."""
135
127
  self.config = config
136
128
  self.detector = SystemDetector()
@@ -207,13 +199,15 @@ class DynamicDataLoader:
207
199
  logger.info("Successfully imported GPU libraries (cudf, cupy)")
208
200
  except ImportError as e:
209
201
  logger.error(
210
- "[DATA LOADER] cudf or cupy not found. Please ensure they are installed correctly."
202
+ "[DATA LOADER] cudf or cupy not found. "
203
+ "Please ensure they are installed correctly."
211
204
  )
212
205
  logger.error("Import error: %s", str(e))
213
206
  # Match original script's exit behavior for critical GPU import failure
214
207
  if not os.getenv("FORCE_CPU", "false").lower() == "true":
215
208
  logger.error(
216
- "GPU libraries required but not available. Set FORCE_CPU=true to use CPU mode."
209
+ "GPU libraries required but not available. "
210
+ "Set FORCE_CPU=true to use CPU mode."
217
211
  )
218
212
  sys.exit(1)
219
213
  else:
@@ -222,7 +216,7 @@ class DynamicDataLoader:
222
216
  self.use_gpu = False
223
217
 
224
218
  def _read_dataframe(
225
- self, file_path: str, columns: Optional[List[str]] = None
219
+ self, file_path: str, columns: list[str] | None = None
226
220
  ) -> Union["pd.DataFrame", "cudf.DataFrame"]: # type: ignore[reportUndefinedVariable] # noqa: F821
227
221
  """Read dataframe using appropriate library."""
228
222
  if self.use_gpu:
@@ -231,7 +225,7 @@ class DynamicDataLoader:
231
225
  return self.pd.read_parquet(file_path, columns=columns)
232
226
 
233
227
  def _concat_dataframes(
234
- self, df_list: List, ignore_index: bool = True
228
+ self, df_list: list, ignore_index: bool = True
235
229
  ) -> Union["pd.DataFrame", "cudf.DataFrame"]: # type: ignore[reportUndefinedVariable] # noqa: F821
236
230
  """Concatenate dataframes using appropriate library."""
237
231
  if self.use_gpu:
@@ -257,9 +251,7 @@ class DynamicDataLoader:
257
251
  """Extract embeddings and convert to appropriate format."""
258
252
  if self.use_gpu:
259
253
  # cuDF list extraction
260
- emb_data = self.cp.asarray(df[column_name].list.leaves).astype(
261
- self.cp.float32
262
- )
254
+ emb_data = self.cp.asarray(df[column_name].list.leaves).astype(self.cp.float32)
263
255
  return emb_data.reshape(df.shape[0], -1)
264
256
  else:
265
257
  # pandas extraction
@@ -325,9 +317,7 @@ class DynamicDataLoader:
325
317
  for stage in ["enrichment", "embedding"]:
326
318
  logger.info("Processing %s %s", element, stage)
327
319
 
328
- file_list = glob.glob(
329
- os.path.join(self.data_dir, element, stage, "*.parquet.gzip")
330
- )
320
+ file_list = glob.glob(os.path.join(self.data_dir, element, stage, "*.parquet.gzip"))
331
321
  logger.info("Found %d files for %s %s", len(file_list), element, stage)
332
322
 
333
323
  if not file_list:
@@ -342,13 +332,9 @@ class DynamicDataLoader:
342
332
  chunk_files = file_list[i : i + chunk_size]
343
333
  chunk_df_list = []
344
334
  for f in chunk_files:
345
- df = self._read_dataframe(
346
- f, columns=["triplet_index", "edge_emb"]
347
- )
335
+ df = self._read_dataframe(f, columns=["triplet_index", "edge_emb"])
348
336
  chunk_df_list.append(df)
349
- chunk_df = self._concat_dataframes(
350
- chunk_df_list, ignore_index=True
351
- )
337
+ chunk_df = self._concat_dataframes(chunk_df_list, ignore_index=True)
352
338
  graph[element][stage].append(chunk_df)
353
339
  else:
354
340
  # For other combinations, read all files
@@ -356,9 +342,7 @@ class DynamicDataLoader:
356
342
  for f in file_list:
357
343
  df = self._read_dataframe(f)
358
344
  df_list.append(df)
359
- graph[element][stage] = self._concat_dataframes(
360
- df_list, ignore_index=True
361
- )
345
+ graph[element][stage] = self._concat_dataframes(df_list, ignore_index=True)
362
346
 
363
347
  logger.info("Graph data loaded successfully")
364
348
  return graph
@@ -367,16 +351,15 @@ class DynamicDataLoader:
367
351
  """Get embedding dimension using original script's exact logic."""
368
352
  first_emb = df.iloc[0][column_name]
369
353
  if self.use_gpu:
370
- # cuDF format - matches original: len(nodes_df.iloc[0]['desc_emb'].to_arrow().to_pylist()[0])
354
+ # cuDF format - matches original:
355
+ # len(nodes_df.iloc[0]['desc_emb'].to_arrow().to_pylist()[0])
371
356
  return len(first_emb.to_arrow().to_pylist()[0])
372
357
  else:
373
358
  # pandas format
374
359
  if isinstance(first_emb, list):
375
360
  return len(first_emb)
376
361
  else:
377
- return len(
378
- first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb
379
- )
362
+ return len(first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb)
380
363
 
381
364
  def create_nodes_collection(self, nodes_df):
382
365
  """Create and populate the main nodes collection."""
@@ -431,9 +414,7 @@ class DynamicDataLoader:
431
414
 
432
415
  # Create collection if it doesn't exist
433
416
  if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
434
- collection = self.pymilvus_modules["Collection"](
435
- name=node_coll_name, schema=schema
436
- )
417
+ collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
437
418
  else:
438
419
  collection = self.pymilvus_modules["Collection"](name=node_coll_name)
439
420
 
@@ -487,9 +468,7 @@ class DynamicDataLoader:
487
468
  collection.insert(batch)
488
469
 
489
470
  collection.flush()
490
- logger.info(
491
- "Nodes collection created with %d entities", collection.num_entities
492
- )
471
+ logger.info("Nodes collection created with %d entities", collection.num_entities)
493
472
 
494
473
  def create_node_type_collections(self, nodes_df):
495
474
  """Create separate collections for each node type."""
@@ -498,9 +477,7 @@ class DynamicDataLoader:
498
477
  for node_type, nodes_df_ in self.tqdm(
499
478
  nodes_df.groupby("node_type"), desc="Processing node types"
500
479
  ):
501
- node_coll_name = (
502
- f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
503
- )
480
+ node_coll_name = f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
504
481
 
505
482
  # Get embedding dimensions
506
483
  desc_dim = self._get_embedding_dimension(nodes_df_, "desc_emb")
@@ -564,9 +541,7 @@ class DynamicDataLoader:
564
541
  )
565
542
 
566
543
  if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
567
- collection = self.pymilvus_modules["Collection"](
568
- name=node_coll_name, schema=schema
569
- )
544
+ collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
570
545
  else:
571
546
  collection = self.pymilvus_modules["Collection"](name=node_coll_name)
572
547
 
@@ -639,7 +614,7 @@ class DynamicDataLoader:
639
614
  collection.num_entities,
640
615
  )
641
616
 
642
- def create_edges_collection(self, edges_enrichment_df, edges_embedding_df: List):
617
+ def create_edges_collection(self, edges_enrichment_df, edges_embedding_df: list):
643
618
  """Create and populate the edges collection - exact original logic."""
644
619
  logger.info("Creating edges collection...")
645
620
 
@@ -647,9 +622,7 @@ class DynamicDataLoader:
647
622
 
648
623
  # Get embedding dimension from first chunk - exact original logic
649
624
  if self.use_gpu:
650
- emb_dim = len(
651
- edges_embedding_df[0].loc[0, "edge_emb"]
652
- ) # Original cudf access
625
+ emb_dim = len(edges_embedding_df[0].loc[0, "edge_emb"]) # Original cudf access
653
626
  else:
654
627
  first_edge_emb = edges_embedding_df[0].iloc[0]["edge_emb"]
655
628
  emb_dim = (
@@ -772,24 +745,18 @@ class DynamicDataLoader:
772
745
 
773
746
  # Insert data in batches
774
747
  total = len(data[0])
775
- for i in self.tqdm(
776
- range(0, total, self.batch_size), desc="Inserting edges"
777
- ):
748
+ for i in self.tqdm(range(0, total, self.batch_size), desc="Inserting edges"):
778
749
  batch_data = [d[i : i + self.batch_size] for d in data]
779
750
  collection.insert(batch_data)
780
751
 
781
752
  collection.flush()
782
- logger.info(
783
- "Edges collection created with %d entities", collection.num_entities
784
- )
753
+ logger.info("Edges collection created with %d entities", collection.num_entities)
785
754
 
786
755
  def run(self):
787
756
  """Main execution method."""
788
757
  try:
789
758
  logger.info("Starting Dynamic Milvus data loading process...")
790
- logger.info(
791
- "System: %s %s", self.detector.os_type, self.detector.architecture
792
- )
759
+ logger.info("System: %s %s", self.detector.os_type, self.detector.architecture)
793
760
  logger.info("GPU acceleration: %s", self.use_gpu)
794
761
 
795
762
  # Connect to Milvus
@@ -851,8 +818,7 @@ def main():
851
818
  "data_dir": os.getenv("DATA_DIR", default_data_dir),
852
819
  "batch_size": int(os.getenv("BATCH_SIZE", "500")),
853
820
  "chunk_size": int(os.getenv("CHUNK_SIZE", "5")),
854
- "auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower()
855
- == "true",
821
+ "auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower() == "true",
856
822
  }
857
823
 
858
824
  # Override detection for testing/forcing specific modes
@@ -1,4 +1,5 @@
1
- '''
1
+ """
2
2
  This file is used to import all the models in the package.
3
- '''
3
+ """
4
+
4
5
  from . import state_talk2knowledgegraphs
@@ -3,6 +3,7 @@ This is the state file for the Talk2KnowledgeGraphs agent.
3
3
  """
4
4
 
5
5
  from typing import Annotated
6
+
6
7
  # import operator
7
8
  from langchain_core.embeddings.embeddings import Embeddings
8
9
  from langchain_core.language_models.chat_models import BaseChatModel
@@ -1,15 +1,19 @@
1
1
  """
2
2
  Test cases for agents/t2kg_agent.py
3
3
  """
4
- from unittest.mock import patch, MagicMock
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ import pandas as pd
5
8
  import pytest
6
9
  from langchain_core.messages import HumanMessage
7
10
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
8
- import pandas as pd
11
+
9
12
  from ..agents.t2kg_agent import get_app
10
13
 
11
14
  DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
12
15
 
16
+
13
17
  @pytest.fixture(name="input_dict")
14
18
  def input_dict_fixture():
15
19
  """
@@ -24,7 +28,7 @@ def input_dict_fixture():
24
28
  "cellular_component": [],
25
29
  "biological_process": [],
26
30
  "drug": [],
27
- "disease": []
31
+ "disease": [],
28
32
  },
29
33
  "uploaded_files": [
30
34
  {
@@ -44,42 +48,53 @@ def input_dict_fixture():
44
48
  "kg_text_path": f"{DATA_PATH}/biobridge_multimodal_text_graph.pkl",
45
49
  }
46
50
  ],
47
- "dic_extracted_graph": []
51
+ "dic_extracted_graph": [],
48
52
  }
49
53
  return input_dict
50
54
 
55
+
51
56
  def mock_milvus_collection(name):
52
57
  """
53
58
  Mock Milvus collection for testing.
54
59
  """
55
60
  nodes = MagicMock()
56
61
  nodes.query.return_value = [
57
- {"node_index": 0,
58
- "node_id": "id1",
59
- "node_name": "Adalimumab",
60
- "node_type": "drug",
61
- "feat": "featA", "feat_emb": [0.1, 0.2, 0.3],
62
- "desc": "descA", "desc_emb": [0.1, 0.2, 0.3]},
63
- {"node_index": 1,
64
- "node_id": "id2",
65
- "node_name": "TNF",
66
- "node_type": "gene/protein",
67
- "feat": "featB", "feat_emb": [0.4, 0.5, 0.6],
68
- "desc": "descB", "desc_emb": [0.4, 0.5, 0.6]}
62
+ {
63
+ "node_index": 0,
64
+ "node_id": "id1",
65
+ "node_name": "Adalimumab",
66
+ "node_type": "drug",
67
+ "feat": "featA",
68
+ "feat_emb": [0.1, 0.2, 0.3],
69
+ "desc": "descA",
70
+ "desc_emb": [0.1, 0.2, 0.3],
71
+ },
72
+ {
73
+ "node_index": 1,
74
+ "node_id": "id2",
75
+ "node_name": "TNF",
76
+ "node_type": "gene/protein",
77
+ "feat": "featB",
78
+ "feat_emb": [0.4, 0.5, 0.6],
79
+ "desc": "descB",
80
+ "desc_emb": [0.4, 0.5, 0.6],
81
+ },
69
82
  ]
70
83
  nodes.load.return_value = None
71
84
 
72
85
  edges = MagicMock()
73
86
  edges.query.return_value = [
74
- {"triplet_index": 0,
75
- "head_id": "id1",
76
- "head_index": 0,
77
- "tail_id": "id2",
78
- "tail_index": 1,
79
- "edge_type": "drug,acts_on,gene/protein",
80
- "display_relation": "acts_on",
81
- "feat": "featC",
82
- "feat_emb": [0.7, 0.8, 0.9]}
87
+ {
88
+ "triplet_index": 0,
89
+ "head_id": "id1",
90
+ "head_index": 0,
91
+ "tail_id": "id2",
92
+ "tail_index": 1,
93
+ "edge_type": "drug,acts_on,gene/protein",
94
+ "display_relation": "acts_on",
95
+ "feat": "featC",
96
+ "feat_emb": [0.7, 0.8, 0.9],
97
+ }
83
98
  ]
84
99
  edges.load.return_value = None
85
100
 
@@ -89,6 +104,7 @@ def mock_milvus_collection(name):
89
104
  return edges
90
105
  return None
91
106
 
107
+
92
108
  def test_t2kg_agent_openai_milvus_mock(input_dict):
93
109
  """
94
110
  Test the T2KG agent using OpenAI model and Milvus mock.
@@ -103,11 +119,11 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
103
119
  config = {"configurable": {"thread_id": unique_id}}
104
120
  app.update_state(config, input_dict)
105
121
  prompt = """
106
- Adalimumab is a fully human monoclonal antibody (IgG1)
122
+ Adalimumab is a fully human monoclonal antibody (IgG1)
107
123
  that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
108
124
 
109
125
  I would like to get evidence from the knowledge graph about the mechanism of actions related to
110
- Adalimumab in treating inflammatory bowel disease
126
+ Adalimumab in treating inflammatory bowel disease
111
127
  (IBD). Please follow these steps:
112
128
  - Extract a subgraph from the PrimeKG that contains information about Adalimumab.
113
129
  - Summarize the extracted subgraph.
@@ -116,21 +132,31 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
116
132
  Please set the extraction name for the extraction process as `subkg_12345`.
117
133
  """
118
134
 
119
- with patch("aiagents4pharma.talk2knowledgegraphs.tools."
120
- "milvus_multimodal_subgraph_extraction.Collection",
121
- side_effect=mock_milvus_collection), \
122
- patch("aiagents4pharma.talk2knowledgegraphs.tools."
123
- "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning") as mock_pcst, \
124
- patch("pymilvus.connections") as mock_connections, \
125
- patch("aiagents4pharma.talk2knowledgegraphs.tools."
126
- "milvus_multimodal_subgraph_extraction.hydra.initialize"), \
127
- patch("aiagents4pharma.talk2knowledgegraphs.tools."
128
- "milvus_multimodal_subgraph_extraction.hydra.compose") as mock_compose:
135
+ with (
136
+ patch(
137
+ "aiagents4pharma.talk2knowledgegraphs.tools."
138
+ "milvus_multimodal_subgraph_extraction.Collection",
139
+ side_effect=mock_milvus_collection,
140
+ ),
141
+ patch(
142
+ "aiagents4pharma.talk2knowledgegraphs.tools."
143
+ "milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning"
144
+ ) as mock_pcst,
145
+ patch("pymilvus.connections") as mock_connections,
146
+ patch(
147
+ "aiagents4pharma.talk2knowledgegraphs.tools."
148
+ "milvus_multimodal_subgraph_extraction.hydra.initialize"
149
+ ),
150
+ patch(
151
+ "aiagents4pharma.talk2knowledgegraphs.tools."
152
+ "milvus_multimodal_subgraph_extraction.hydra.compose"
153
+ ) as mock_compose,
154
+ ):
129
155
  mock_connections.has_connection.return_value = True
130
156
  mock_pcst_instance = MagicMock()
131
157
  mock_pcst_instance.extract_subgraph.return_value = {
132
158
  "nodes": pd.Series([0, 1]),
133
- "edges": pd.Series([0])
159
+ "edges": pd.Series([0]),
134
160
  }
135
161
  mock_pcst.return_value = mock_pcst_instance
136
162
  mock_cfg = MagicMock()
@@ -144,8 +170,7 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
144
170
  mock_cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
145
171
  mock_compose.return_value = MagicMock()
146
172
  mock_compose.return_value.tools.multimodal_subgraph_extraction = mock_cfg
147
- mock_compose.return_value.tools.subgraph_summarization.\
148
- prompt_subgraph_summarization = (
173
+ mock_compose.return_value.tools.subgraph_summarization.prompt_subgraph_summarization = (
149
174
  "Summarize the following subgraph: {textualized_subgraph}"
150
175
  )
151
176
 
@@ -4,7 +4,9 @@ Test cases for datasets/primekg_loader.py
4
4
 
5
5
  import os
6
6
  import shutil
7
+
7
8
  import pytest
9
+
8
10
  from ..datasets.biobridge_primekg import BioBridgePrimeKG
9
11
 
10
12
  # Remove the data folder for testing if it exists
@@ -12,13 +14,14 @@ PRIMEKG_LOCAL_DIR = "../data/primekg_test/"
12
14
  LOCAL_DIR = "../data/biobridge_primekg_test/"
13
15
  shutil.rmtree(LOCAL_DIR, ignore_errors=True)
14
16
 
17
+
15
18
  @pytest.fixture(name="biobridge_primekg")
16
19
  def biobridge_primekg_fixture():
17
20
  """
18
21
  Fixture for creating an instance of PrimeKG.
19
22
  """
20
- return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR,
21
- local_dir=LOCAL_DIR)
23
+ return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR, local_dir=LOCAL_DIR)
24
+
22
25
 
23
26
  def test_download_primekg(biobridge_primekg):
24
27
  """
@@ -39,8 +42,7 @@ def test_download_primekg(biobridge_primekg):
39
42
  assert os.path.exists(biobridge_primekg.local_dir)
40
43
  # Check if downloaded and processed files exist
41
44
  # PrimeKG files
42
- files = ["nodes.tab", "primekg_nodes.tsv.gz",
43
- "edges.csv", "primekg_edges.tsv.gz"]
45
+ files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
44
46
  for file in files:
45
47
  path = f"{biobridge_primekg.primekg_dir}/{file}"
46
48
  assert os.path.exists(path)
@@ -54,7 +56,7 @@ def test_download_primekg(biobridge_primekg):
54
56
  "bp.pkl",
55
57
  "drug.pkl",
56
58
  "disease.pkl",
57
- "embedding_dict.pkl"
59
+ "embedding_dict.pkl",
58
60
  ]
59
61
  for file in files:
60
62
  path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
@@ -89,9 +91,9 @@ def test_download_primekg(biobridge_primekg):
89
91
  # Check processed BioBridge data config
90
92
  assert biobridge_data_config is not None
91
93
  assert len(biobridge_data_config) > 0
92
- assert len(biobridge_data_config['node_type']) == 10
93
- assert len(biobridge_data_config['relation_type']) == 18
94
- assert len(biobridge_data_config['emb_dim']) == 6
94
+ assert len(biobridge_data_config["node_type"]) == 10
95
+ assert len(biobridge_data_config["relation_type"]) == 18
96
+ assert len(biobridge_data_config["emb_dim"]) == 6
95
97
  # Check processed BioBridge embeddings
96
98
  assert biobridge_emb_dict is not None
97
99
  assert len(biobridge_emb_dict) > 0
@@ -100,24 +102,26 @@ def test_download_primekg(biobridge_primekg):
100
102
  assert biobridge_triplets is not None
101
103
  assert len(biobridge_triplets) > 0
102
104
  assert biobridge_triplets.shape[0] == 3904610
103
- assert list(biobridge_splits.keys()) == ['train', 'node_train', 'test', 'node_test']
104
- assert len(biobridge_splits['train']) == 3510930
105
- assert len(biobridge_splits['node_train']) == 76486
106
- assert len(biobridge_splits['test']) == 393680
107
- assert len(biobridge_splits['node_test']) == 8495
105
+ assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
106
+ assert len(biobridge_splits["train"]) == 3510930
107
+ assert len(biobridge_splits["node_train"]) == 76486
108
+ assert len(biobridge_splits["test"]) == 393680
109
+ assert len(biobridge_splits["node_test"]) == 8495
108
110
  # Check node info dictionary
109
- assert list(biobridge_node_info.keys()) == ['gene/protein',
110
- 'molecular_function',
111
- 'cellular_component',
112
- 'biological_process',
113
- 'drug',
114
- 'disease']
115
- assert len(biobridge_node_info['gene/protein']) == 19162
116
- assert len(biobridge_node_info['molecular_function']) == 10966
117
- assert len(biobridge_node_info['cellular_component']) == 4013
118
- assert len(biobridge_node_info['biological_process']) == 27478
119
- assert len(biobridge_node_info['drug']) == 6948
120
- assert len(biobridge_node_info['disease']) == 44133
111
+ assert list(biobridge_node_info.keys()) == [
112
+ "gene/protein",
113
+ "molecular_function",
114
+ "cellular_component",
115
+ "biological_process",
116
+ "drug",
117
+ "disease",
118
+ ]
119
+ assert len(biobridge_node_info["gene/protein"]) == 19162
120
+ assert len(biobridge_node_info["molecular_function"]) == 10966
121
+ assert len(biobridge_node_info["cellular_component"]) == 4013
122
+ assert len(biobridge_node_info["biological_process"]) == 27478
123
+ assert len(biobridge_node_info["drug"]) == 6948
124
+ assert len(biobridge_node_info["disease"]) == 44133
121
125
 
122
126
 
123
127
  def test_load_existing_primekg(biobridge_primekg):
@@ -139,8 +143,7 @@ def test_load_existing_primekg(biobridge_primekg):
139
143
  assert os.path.exists(biobridge_primekg.local_dir)
140
144
  # Check if downloaded and processed files exist
141
145
  # PrimeKG files
142
- files = ["nodes.tab", "primekg_nodes.tsv.gz",
143
- "edges.csv", "primekg_edges.tsv.gz"]
146
+ files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
144
147
  for file in files:
145
148
  path = f"{biobridge_primekg.primekg_dir}/{file}"
146
149
  assert os.path.exists(path)
@@ -154,7 +157,7 @@ def test_load_existing_primekg(biobridge_primekg):
154
157
  "bp.pkl",
155
158
  "drug.pkl",
156
159
  "disease.pkl",
157
- "embedding_dict.pkl"
160
+ "embedding_dict.pkl",
158
161
  ]
159
162
  for file in files:
160
163
  path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
@@ -189,9 +192,9 @@ def test_load_existing_primekg(biobridge_primekg):
189
192
  # Check processed BioBridge data config
190
193
  assert biobridge_data_config is not None
191
194
  assert len(biobridge_data_config) > 0
192
- assert len(biobridge_data_config['node_type']) == 10
193
- assert len(biobridge_data_config['relation_type']) == 18
194
- assert len(biobridge_data_config['emb_dim']) == 6
195
+ assert len(biobridge_data_config["node_type"]) == 10
196
+ assert len(biobridge_data_config["relation_type"]) == 18
197
+ assert len(biobridge_data_config["emb_dim"]) == 6
195
198
  # Check processed BioBridge embeddings
196
199
  assert biobridge_emb_dict is not None
197
200
  assert len(biobridge_emb_dict) > 0
@@ -200,24 +203,27 @@ def test_load_existing_primekg(biobridge_primekg):
200
203
  assert biobridge_triplets is not None
201
204
  assert len(biobridge_triplets) > 0
202
205
  assert biobridge_triplets.shape[0] == 3904610
203
- assert list(biobridge_splits.keys()) == ['train', 'node_train', 'test', 'node_test']
204
- assert len(biobridge_splits['train']) == 3510930
205
- assert len(biobridge_splits['node_train']) == 76486
206
- assert len(biobridge_splits['test']) == 393680
207
- assert len(biobridge_splits['node_test']) == 8495
206
+ assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
207
+ assert len(biobridge_splits["train"]) == 3510930
208
+ assert len(biobridge_splits["node_train"]) == 76486
209
+ assert len(biobridge_splits["test"]) == 393680
210
+ assert len(biobridge_splits["node_test"]) == 8495
208
211
  # Check node info dictionary
209
- assert list(biobridge_node_info.keys()) == ['gene/protein',
210
- 'molecular_function',
211
- 'cellular_component',
212
- 'biological_process',
213
- 'drug',
214
- 'disease']
215
- assert len(biobridge_node_info['gene/protein']) == 19162
216
- assert len(biobridge_node_info['molecular_function']) == 10966
217
- assert len(biobridge_node_info['cellular_component']) == 4013
218
- assert len(biobridge_node_info['biological_process']) == 27478
219
- assert len(biobridge_node_info['drug']) == 6948
220
- assert len(biobridge_node_info['disease']) == 44133
212
+ assert list(biobridge_node_info.keys()) == [
213
+ "gene/protein",
214
+ "molecular_function",
215
+ "cellular_component",
216
+ "biological_process",
217
+ "drug",
218
+ "disease",
219
+ ]
220
+ assert len(biobridge_node_info["gene/protein"]) == 19162
221
+ assert len(biobridge_node_info["molecular_function"]) == 10966
222
+ assert len(biobridge_node_info["cellular_component"]) == 4013
223
+ assert len(biobridge_node_info["biological_process"]) == 27478
224
+ assert len(biobridge_node_info["drug"]) == 6948
225
+ assert len(biobridge_node_info["disease"]) == 44133
226
+
221
227
 
222
228
  # def test_load_existing_primekg_with_negative_triplets(biobridge_primekg):
223
229
  # """