aiagents4pharma 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +11 -0
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
- aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/api/ols.py +75 -0
- aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
- aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
- aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
- aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
- aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
- aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
- aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
- aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
- aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
- aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
- aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
- aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
- aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
- aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +5 -0
- aiagents4pharma/talk2cells/agents/__init__.py +6 -0
- aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
- aiagents4pharma/talk2cells/states/__init__.py +6 -0
- aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
- aiagents4pharma/talk2cells/tools/__init__.py +6 -0
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
- aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/__init__.py +7 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
- aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
- aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
- aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
- aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
- aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
- aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/__init__.py +7 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
- aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
- aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
- aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
- aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
- aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
- aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
- aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
- aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
- aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
- aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
- aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
- aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
- aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
- aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
- aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
- aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
- aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
- aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
- aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
- aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
- aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
- aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
- aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
- aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
- aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tool for performing multimodal subgraph extraction.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import hydra
|
|
9
|
+
import joblib
|
|
10
|
+
import networkx as nx
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import torch
|
|
14
|
+
from langchain_core.messages import ToolMessage
|
|
15
|
+
from langchain_core.tools import BaseTool
|
|
16
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
17
|
+
from langgraph.prebuilt import InjectedState
|
|
18
|
+
from langgraph.types import Command
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
from torch_geometric.data import Data
|
|
21
|
+
|
|
22
|
+
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
|
23
|
+
from ..utils.extractions.multimodal_pcst import MultimodalPCSTPruning
|
|
24
|
+
from .load_arguments import ArgumentData
|
|
25
|
+
|
|
26
|
+
# Initialize logger
|
|
27
|
+
logging.basicConfig(level=logging.INFO)
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MultimodalSubgraphExtractionInput(BaseModel):
|
|
32
|
+
"""
|
|
33
|
+
MultimodalSubgraphExtractionInput is a Pydantic model representing an input
|
|
34
|
+
for extracting a subgraph.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
prompt: Prompt to interact with the backend.
|
|
38
|
+
tool_call_id: Tool call ID.
|
|
39
|
+
state: Injected state.
|
|
40
|
+
arg_data: Argument for analytical process over graph data.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
|
44
|
+
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
|
45
|
+
prompt: str = Field(description="Prompt to interact with the backend.")
|
|
46
|
+
arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class MultimodalSubgraphExtractionTool(BaseTool):
|
|
50
|
+
"""
|
|
51
|
+
This tool performs subgraph extraction based on user's prompt by taking into account
|
|
52
|
+
the top-k nodes and edges.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
name: str = "subgraph_extraction"
|
|
56
|
+
description: str = "A tool for subgraph extraction based on user's prompt."
|
|
57
|
+
args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
|
|
58
|
+
|
|
59
|
+
def _prepare_query_modalities(
|
|
60
|
+
self, prompt_emb: list, state: Annotated[dict, InjectedState], pyg_graph: Data
|
|
61
|
+
) -> pd.DataFrame:
|
|
62
|
+
"""
|
|
63
|
+
Prepare the modality-specific query for subgraph extraction.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
prompt_emb: The embedding of the user prompt in a list.
|
|
67
|
+
state: The injected state for the tool.
|
|
68
|
+
pyg_graph: The PyTorch Geometric graph Data.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
A DataFrame containing the query embeddings and modalities.
|
|
72
|
+
"""
|
|
73
|
+
# Initialize dataframes
|
|
74
|
+
multimodal_df = pd.DataFrame({"name": []})
|
|
75
|
+
query_df = pd.DataFrame(
|
|
76
|
+
{
|
|
77
|
+
"node_id": [],
|
|
78
|
+
"node_type": [],
|
|
79
|
+
"x": [],
|
|
80
|
+
"desc_x": [],
|
|
81
|
+
"use_description": [],
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Loop over the uploaded files and find multimodal files
|
|
86
|
+
for i in range(len(state["uploaded_files"])):
|
|
87
|
+
# Check if multimodal file is uploaded
|
|
88
|
+
if state["uploaded_files"][i]["file_type"] == "multimodal":
|
|
89
|
+
# Read the Excel file
|
|
90
|
+
multimodal_df = pd.read_excel(
|
|
91
|
+
state["uploaded_files"][i]["file_path"], sheet_name=None
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Check if the multimodal_df is empty
|
|
95
|
+
if len(multimodal_df) > 0:
|
|
96
|
+
# Merge all obtained dataframes into a single dataframe
|
|
97
|
+
multimodal_df = pd.concat(multimodal_df).reset_index()
|
|
98
|
+
multimodal_df.drop(columns=["level_1"], inplace=True)
|
|
99
|
+
multimodal_df.rename(
|
|
100
|
+
columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
|
|
101
|
+
)
|
|
102
|
+
# Since an excel sheet name could not contain a `/`,
|
|
103
|
+
# but the node type can be 'gene/protein' as exists in the PrimeKG
|
|
104
|
+
multimodal_df["q_node_type"] = multimodal_df.q_node_type.apply(
|
|
105
|
+
lambda x: x.replace("-", "/")
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Convert PyG graph to a DataFrame for easier filtering
|
|
109
|
+
graph_df = pd.DataFrame(
|
|
110
|
+
{
|
|
111
|
+
"node_id": pyg_graph.node_id,
|
|
112
|
+
"node_name": pyg_graph.node_name,
|
|
113
|
+
"node_type": pyg_graph.node_type,
|
|
114
|
+
"x": pyg_graph.x,
|
|
115
|
+
"desc_x": pyg_graph.desc_x.tolist(),
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Make a query dataframe by merging the graph_df and multimodal_df
|
|
120
|
+
query_df = graph_df.merge(multimodal_df, how="cross")
|
|
121
|
+
query_df = query_df[
|
|
122
|
+
query_df.apply(
|
|
123
|
+
lambda x: (x["q_node_name"].lower() in x["node_name"].lower()) # node name
|
|
124
|
+
& (x["node_type"] == x["q_node_type"]), # node type
|
|
125
|
+
axis=1,
|
|
126
|
+
)
|
|
127
|
+
]
|
|
128
|
+
query_df = query_df[["node_id", "node_type", "x", "desc_x"]].reset_index(drop=True)
|
|
129
|
+
query_df["use_description"] = False # set to False for modal-specific embeddings
|
|
130
|
+
|
|
131
|
+
# Update the state by adding the the selected node IDs
|
|
132
|
+
state["selections"] = query_df.groupby("node_type")["node_id"].apply(list).to_dict()
|
|
133
|
+
|
|
134
|
+
# Append a user prompt to the query dataframe
|
|
135
|
+
query_df = pd.concat(
|
|
136
|
+
[
|
|
137
|
+
query_df,
|
|
138
|
+
pd.DataFrame(
|
|
139
|
+
{
|
|
140
|
+
"node_id": "user_prompt",
|
|
141
|
+
"node_type": "prompt",
|
|
142
|
+
"x": prompt_emb,
|
|
143
|
+
"desc_x": prompt_emb,
|
|
144
|
+
"use_description": True, # set to True for user prompt embedding
|
|
145
|
+
}
|
|
146
|
+
),
|
|
147
|
+
]
|
|
148
|
+
).reset_index(drop=True)
|
|
149
|
+
|
|
150
|
+
return query_df
|
|
151
|
+
|
|
152
|
+
def _perform_subgraph_extraction(
|
|
153
|
+
self,
|
|
154
|
+
state: Annotated[dict, InjectedState],
|
|
155
|
+
cfg: dict,
|
|
156
|
+
pyg_graph: Data,
|
|
157
|
+
query_df: pd.DataFrame,
|
|
158
|
+
) -> dict:
|
|
159
|
+
"""
|
|
160
|
+
Perform multimodal subgraph extraction based on modal-specific embeddings.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
state: The injected state for the tool.
|
|
164
|
+
cfg: The configuration dictionary.
|
|
165
|
+
pyg_graph: The PyTorch Geometric graph Data.
|
|
166
|
+
query_df: The DataFrame containing the query embeddings and modalities.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A dictionary containing the extracted subgraph with nodes and edges.
|
|
170
|
+
"""
|
|
171
|
+
# Initialize the subgraph dictionary
|
|
172
|
+
subgraphs = {}
|
|
173
|
+
subgraphs["nodes"] = []
|
|
174
|
+
subgraphs["edges"] = []
|
|
175
|
+
|
|
176
|
+
# Loop over query embeddings and modalities
|
|
177
|
+
for q in query_df.iterrows():
|
|
178
|
+
# Prepare the PCSTPruning object and extract the subgraph
|
|
179
|
+
# Parameters were set in the configuration file obtained from Hydra
|
|
180
|
+
subgraph = MultimodalPCSTPruning(
|
|
181
|
+
topk=state["topk_nodes"],
|
|
182
|
+
topk_e=state["topk_edges"],
|
|
183
|
+
cost_e=cfg.cost_e,
|
|
184
|
+
c_const=cfg.c_const,
|
|
185
|
+
root=cfg.root,
|
|
186
|
+
num_clusters=cfg.num_clusters,
|
|
187
|
+
pruning=cfg.pruning,
|
|
188
|
+
verbosity_level=cfg.verbosity_level,
|
|
189
|
+
use_description=q[1]["use_description"],
|
|
190
|
+
).extract_subgraph(
|
|
191
|
+
pyg_graph,
|
|
192
|
+
torch.tensor(q[1]["desc_x"]), # description embedding
|
|
193
|
+
torch.tensor(q[1]["x"]), # modal-specific embedding
|
|
194
|
+
q[1]["node_type"],
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Append the extracted subgraph to the dictionary
|
|
198
|
+
subgraphs["nodes"].append(subgraph["nodes"].tolist())
|
|
199
|
+
subgraphs["edges"].append(subgraph["edges"].tolist())
|
|
200
|
+
|
|
201
|
+
# Concatenate and get unique node and edge indices
|
|
202
|
+
subgraphs["nodes"] = np.unique(
|
|
203
|
+
np.concatenate([np.array(list_) for list_ in subgraphs["nodes"]])
|
|
204
|
+
)
|
|
205
|
+
subgraphs["edges"] = np.unique(
|
|
206
|
+
np.concatenate([np.array(list_) for list_ in subgraphs["edges"]])
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
return subgraphs
|
|
210
|
+
|
|
211
|
+
def _prepare_final_subgraph(
|
|
212
|
+
self, state: Annotated[dict, InjectedState], subgraph: dict, graph: dict, cfg
|
|
213
|
+
) -> dict:
|
|
214
|
+
"""
|
|
215
|
+
Prepare the subgraph based on the extracted subgraph.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
state: The injected state for the tool.
|
|
219
|
+
subgraph: The extracted subgraph.
|
|
220
|
+
graph: The initial graph containing PyG and textualized graph.
|
|
221
|
+
cfg: The configuration dictionary.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
|
|
225
|
+
"""
|
|
226
|
+
# print(subgraph)
|
|
227
|
+
# Prepare the PyTorch Geometric graph
|
|
228
|
+
mapping = {n: i for i, n in enumerate(subgraph["nodes"].tolist())}
|
|
229
|
+
pyg_graph = Data(
|
|
230
|
+
# Node features
|
|
231
|
+
# x=pyg_graph.x[subgraph["nodes"]],
|
|
232
|
+
x=[graph["pyg"].x[i] for i in subgraph["nodes"]],
|
|
233
|
+
node_id=np.array(graph["pyg"].node_id)[subgraph["nodes"]].tolist(),
|
|
234
|
+
node_name=np.array(graph["pyg"].node_id)[subgraph["nodes"]].tolist(),
|
|
235
|
+
enriched_node=np.array(graph["pyg"].enriched_node)[subgraph["nodes"]].tolist(),
|
|
236
|
+
num_nodes=len(subgraph["nodes"]),
|
|
237
|
+
# Edge features
|
|
238
|
+
edge_index=torch.LongTensor(
|
|
239
|
+
[
|
|
240
|
+
[mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()],
|
|
241
|
+
[mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()],
|
|
242
|
+
]
|
|
243
|
+
),
|
|
244
|
+
edge_attr=graph["pyg"].edge_attr[subgraph["edges"]],
|
|
245
|
+
edge_type=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
|
|
246
|
+
relation=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
|
|
247
|
+
label=np.array(graph["pyg"].edge_type)[subgraph["edges"]].tolist(),
|
|
248
|
+
enriched_edge=np.array(graph["pyg"].enriched_edge)[subgraph["edges"]].tolist(),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Networkx DiGraph construction to be visualized in the frontend
|
|
252
|
+
nx_graph = nx.DiGraph()
|
|
253
|
+
# Add nodes with attributes
|
|
254
|
+
node_colors = {
|
|
255
|
+
n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
|
|
256
|
+
}
|
|
257
|
+
for n in pyg_graph.node_name:
|
|
258
|
+
nx_graph.add_node(n, color=node_colors.get(n, None))
|
|
259
|
+
|
|
260
|
+
# Add edges with attributes
|
|
261
|
+
edges = zip(
|
|
262
|
+
pyg_graph.edge_index[0].tolist(),
|
|
263
|
+
pyg_graph.edge_index[1].tolist(),
|
|
264
|
+
pyg_graph.edge_type,
|
|
265
|
+
strict=False,
|
|
266
|
+
)
|
|
267
|
+
for src, dst, edge_type in edges:
|
|
268
|
+
nx_graph.add_edge(
|
|
269
|
+
pyg_graph.node_name[src],
|
|
270
|
+
pyg_graph.node_name[dst],
|
|
271
|
+
relation=edge_type,
|
|
272
|
+
label=edge_type,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Prepare the textualized subgraph
|
|
276
|
+
textualized_graph = (
|
|
277
|
+
graph["text"]["nodes"].iloc[subgraph["nodes"]].to_csv(index=False)
|
|
278
|
+
+ "\n"
|
|
279
|
+
+ graph["text"]["edges"].iloc[subgraph["edges"]].to_csv(index=False)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
"graph_pyg": pyg_graph,
|
|
284
|
+
"graph_nx": nx_graph,
|
|
285
|
+
"graph_text": textualized_graph,
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
def _run(
|
|
289
|
+
self,
|
|
290
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
291
|
+
state: Annotated[dict, InjectedState],
|
|
292
|
+
prompt: str,
|
|
293
|
+
arg_data: ArgumentData = None,
|
|
294
|
+
) -> Command:
|
|
295
|
+
"""
|
|
296
|
+
Run the subgraph extraction tool.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
tool_call_id: The tool call ID for the tool.
|
|
300
|
+
state: Injected state for the tool.
|
|
301
|
+
prompt: The prompt to interact with the backend.
|
|
302
|
+
arg_data (ArgumentData): The argument data.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Command: The command to be executed.
|
|
306
|
+
"""
|
|
307
|
+
logger.log(logging.INFO, "Invoking subgraph_extraction tool")
|
|
308
|
+
|
|
309
|
+
# Load hydra configuration
|
|
310
|
+
with hydra.initialize(version_base=None, config_path="../configs"):
|
|
311
|
+
cfg = hydra.compose(
|
|
312
|
+
config_name="config",
|
|
313
|
+
overrides=["tools/multimodal_subgraph_extraction=default"],
|
|
314
|
+
)
|
|
315
|
+
cfg = cfg.tools.multimodal_subgraph_extraction
|
|
316
|
+
|
|
317
|
+
# Retrieve source graph from the state
|
|
318
|
+
initial_graph = {}
|
|
319
|
+
initial_graph["source"] = state["dic_source_graph"][-1] # The last source graph as of now
|
|
320
|
+
# logger.log(logging.INFO, "Source graph: %s", source_graph)
|
|
321
|
+
|
|
322
|
+
# Load the knowledge graph using secure joblib
|
|
323
|
+
initial_graph["pyg"] = joblib.load(initial_graph["source"]["kg_pyg_path"])
|
|
324
|
+
initial_graph["text"] = joblib.load(initial_graph["source"]["kg_text_path"])
|
|
325
|
+
|
|
326
|
+
# Prepare the query embeddings and modalities
|
|
327
|
+
query_df = self._prepare_query_modalities(
|
|
328
|
+
[EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)],
|
|
329
|
+
state,
|
|
330
|
+
initial_graph["pyg"],
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Perform subgraph extraction
|
|
334
|
+
subgraphs = self._perform_subgraph_extraction(state, cfg, initial_graph["pyg"], query_df)
|
|
335
|
+
|
|
336
|
+
# Prepare subgraph as a NetworkX graph and textualized graph
|
|
337
|
+
final_subgraph = self._prepare_final_subgraph(state, subgraphs, initial_graph, cfg)
|
|
338
|
+
|
|
339
|
+
# Prepare the dictionary of extracted graph
|
|
340
|
+
dic_extracted_graph = {
|
|
341
|
+
"name": arg_data.extraction_name,
|
|
342
|
+
"tool_call_id": tool_call_id,
|
|
343
|
+
"graph_source": initial_graph["source"]["name"],
|
|
344
|
+
"topk_nodes": state["topk_nodes"],
|
|
345
|
+
"topk_edges": state["topk_edges"],
|
|
346
|
+
"graph_dict": {
|
|
347
|
+
"nodes": list(final_subgraph["graph_nx"].nodes(data=True)),
|
|
348
|
+
"edges": list(final_subgraph["graph_nx"].edges(data=True)),
|
|
349
|
+
},
|
|
350
|
+
"graph_text": final_subgraph["graph_text"],
|
|
351
|
+
"graph_summary": None,
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
# Prepare the dictionary of updated state
|
|
355
|
+
dic_updated_state_for_model = {}
|
|
356
|
+
for key, value in {
|
|
357
|
+
"dic_extracted_graph": [dic_extracted_graph],
|
|
358
|
+
}.items():
|
|
359
|
+
if value:
|
|
360
|
+
dic_updated_state_for_model[key] = value
|
|
361
|
+
|
|
362
|
+
# Return the updated state of the tool
|
|
363
|
+
return Command(
|
|
364
|
+
update=dic_updated_state_for_model
|
|
365
|
+
| {
|
|
366
|
+
# update the message history
|
|
367
|
+
"messages": [
|
|
368
|
+
ToolMessage(
|
|
369
|
+
content=f"Subgraph Extraction Result of {arg_data.extraction_name}",
|
|
370
|
+
tool_call_id=tool_call_id,
|
|
371
|
+
)
|
|
372
|
+
],
|
|
373
|
+
}
|
|
374
|
+
)
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tool for performing subgraph extraction.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import hydra
|
|
9
|
+
import joblib
|
|
10
|
+
import networkx as nx
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import torch
|
|
14
|
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
15
|
+
from langchain.chains.retrieval import create_retrieval_chain
|
|
16
|
+
from langchain_community.document_loaders import PyPDFLoader
|
|
17
|
+
from langchain_core.messages import ToolMessage
|
|
18
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
19
|
+
from langchain_core.tools import BaseTool
|
|
20
|
+
from langchain_core.tools.base import InjectedToolCallId
|
|
21
|
+
from langchain_core.vectorstores import InMemoryVectorStore
|
|
22
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
23
|
+
from langgraph.prebuilt import InjectedState
|
|
24
|
+
from langgraph.types import Command
|
|
25
|
+
from pydantic import BaseModel, Field
|
|
26
|
+
from torch_geometric.data import Data
|
|
27
|
+
|
|
28
|
+
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
|
29
|
+
from ..utils.extractions.pcst import PCSTPruning
|
|
30
|
+
from .load_arguments import ArgumentData
|
|
31
|
+
|
|
32
|
+
# Initialize logger
|
|
33
|
+
logging.basicConfig(level=logging.INFO)
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SubgraphExtractionInput(BaseModel):
|
|
38
|
+
"""
|
|
39
|
+
SubgraphExtractionInput is a Pydantic model representing an input for extracting a subgraph.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
prompt: Prompt to interact with the backend.
|
|
43
|
+
tool_call_id: Tool call ID.
|
|
44
|
+
state: Injected state.
|
|
45
|
+
arg_data: Argument for analytical process over graph data.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
|
49
|
+
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
|
50
|
+
prompt: str = Field(description="Prompt to interact with the backend.")
|
|
51
|
+
arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class SubgraphExtractionTool(BaseTool):
|
|
55
|
+
"""
|
|
56
|
+
This tool performs subgraph extraction based on user's prompt by taking into account
|
|
57
|
+
the top-k nodes and edges.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
name: str = "subgraph_extraction"
|
|
61
|
+
description: str = "A tool for subgraph extraction based on user's prompt."
|
|
62
|
+
args_schema: type[BaseModel] = SubgraphExtractionInput
|
|
63
|
+
|
|
64
|
+
def perform_endotype_filtering(
|
|
65
|
+
self,
|
|
66
|
+
prompt: str,
|
|
67
|
+
state: Annotated[dict, InjectedState],
|
|
68
|
+
cfg: hydra.core.config_store.ConfigStore,
|
|
69
|
+
) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Perform endotype filtering based on the uploaded files and prepare the prompt.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
prompt: The prompt to interact with the backend.
|
|
75
|
+
state: Injected state for the tool.
|
|
76
|
+
cfg: Hydra configuration object.
|
|
77
|
+
"""
|
|
78
|
+
# Loop through the uploaded files
|
|
79
|
+
all_genes = []
|
|
80
|
+
for uploaded_file in state["uploaded_files"]:
|
|
81
|
+
if uploaded_file["file_type"] == "endotype":
|
|
82
|
+
# Load the PDF file
|
|
83
|
+
docs = PyPDFLoader(file_path=uploaded_file["file_path"]).load()
|
|
84
|
+
|
|
85
|
+
# Split the text into chunks
|
|
86
|
+
splits = RecursiveCharacterTextSplitter(
|
|
87
|
+
chunk_size=cfg.splitter_chunk_size,
|
|
88
|
+
chunk_overlap=cfg.splitter_chunk_overlap,
|
|
89
|
+
).split_documents(docs)
|
|
90
|
+
|
|
91
|
+
# Create a chat prompt template
|
|
92
|
+
prompt_template = ChatPromptTemplate.from_messages(
|
|
93
|
+
[
|
|
94
|
+
("system", cfg.prompt_endotype_filtering),
|
|
95
|
+
("human", "{input}"),
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
qa_chain = create_stuff_documents_chain(state["llm_model"], prompt_template)
|
|
100
|
+
rag_chain = create_retrieval_chain(
|
|
101
|
+
InMemoryVectorStore.from_documents(
|
|
102
|
+
documents=splits, embedding=state["embedding_model"]
|
|
103
|
+
).as_retriever(
|
|
104
|
+
search_type=cfg.retriever_search_type,
|
|
105
|
+
search_kwargs={
|
|
106
|
+
"k": cfg.retriever_k,
|
|
107
|
+
"fetch_k": cfg.retriever_fetch_k,
|
|
108
|
+
"lambda_mult": cfg.retriever_lambda_mult,
|
|
109
|
+
},
|
|
110
|
+
),
|
|
111
|
+
qa_chain,
|
|
112
|
+
)
|
|
113
|
+
results = rag_chain.invoke({"input": prompt})
|
|
114
|
+
all_genes.append(results["answer"])
|
|
115
|
+
|
|
116
|
+
# Prepare the prompt
|
|
117
|
+
if len(all_genes) > 0:
|
|
118
|
+
prompt = " ".join([prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)])
|
|
119
|
+
|
|
120
|
+
return prompt
|
|
121
|
+
|
|
122
|
+
def prepare_final_subgraph(
|
|
123
|
+
self, subgraph: dict, pyg_graph: Data, textualized_graph: pd.DataFrame
|
|
124
|
+
) -> dict:
|
|
125
|
+
"""
|
|
126
|
+
Prepare the subgraph based on the extracted subgraph.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
subgraph: The extracted subgraph.
|
|
130
|
+
pyg_graph: The PyTorch Geometric graph.
|
|
131
|
+
textualized_graph: The textualized graph.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
|
|
135
|
+
"""
|
|
136
|
+
# print(subgraph)
|
|
137
|
+
# Prepare the PyTorch Geometric graph
|
|
138
|
+
mapping = {n: i for i, n in enumerate(subgraph["nodes"].tolist())}
|
|
139
|
+
pyg_graph = Data(
|
|
140
|
+
# Node features
|
|
141
|
+
x=pyg_graph.x[subgraph["nodes"]],
|
|
142
|
+
node_id=np.array(pyg_graph.node_id)[subgraph["nodes"]].tolist(),
|
|
143
|
+
node_name=np.array(pyg_graph.node_id)[subgraph["nodes"]].tolist(),
|
|
144
|
+
enriched_node=np.array(pyg_graph.enriched_node)[subgraph["nodes"]].tolist(),
|
|
145
|
+
num_nodes=len(subgraph["nodes"]),
|
|
146
|
+
# Edge features
|
|
147
|
+
edge_index=torch.LongTensor(
|
|
148
|
+
[
|
|
149
|
+
[mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()],
|
|
150
|
+
[mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()],
|
|
151
|
+
]
|
|
152
|
+
),
|
|
153
|
+
edge_attr=pyg_graph.edge_attr[subgraph["edges"]],
|
|
154
|
+
edge_type=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
|
|
155
|
+
relation=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
|
|
156
|
+
label=np.array(pyg_graph.edge_type)[subgraph["edges"]].tolist(),
|
|
157
|
+
enriched_edge=np.array(pyg_graph.enriched_edge)[subgraph["edges"]].tolist(),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Networkx DiGraph construction to be visualized in the frontend
|
|
161
|
+
nx_graph = nx.DiGraph()
|
|
162
|
+
for n in pyg_graph.node_name:
|
|
163
|
+
nx_graph.add_node(n)
|
|
164
|
+
for i, e in enumerate(
|
|
165
|
+
[
|
|
166
|
+
[pyg_graph.node_name[i], pyg_graph.node_name[j]]
|
|
167
|
+
for (i, j) in pyg_graph.edge_index.transpose(1, 0)
|
|
168
|
+
]
|
|
169
|
+
):
|
|
170
|
+
nx_graph.add_edge(
|
|
171
|
+
e[0],
|
|
172
|
+
e[1],
|
|
173
|
+
relation=pyg_graph.edge_type[i],
|
|
174
|
+
label=pyg_graph.edge_type[i],
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Prepare the textualized subgraph
|
|
178
|
+
textualized_graph = (
|
|
179
|
+
textualized_graph["nodes"].iloc[subgraph["nodes"]].to_csv(index=False)
|
|
180
|
+
+ "\n"
|
|
181
|
+
+ textualized_graph["edges"].iloc[subgraph["edges"]].to_csv(index=False)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
"graph_pyg": pyg_graph,
|
|
186
|
+
"graph_nx": nx_graph,
|
|
187
|
+
"graph_text": textualized_graph,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
def _run(
|
|
191
|
+
self,
|
|
192
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
193
|
+
state: Annotated[dict, InjectedState],
|
|
194
|
+
prompt: str,
|
|
195
|
+
arg_data: ArgumentData = None,
|
|
196
|
+
) -> Command:
|
|
197
|
+
"""
|
|
198
|
+
Run the subgraph extraction tool.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
tool_call_id: The tool call ID for the tool.
|
|
202
|
+
state: Injected state for the tool.
|
|
203
|
+
prompt: The prompt to interact with the backend.
|
|
204
|
+
arg_data (ArgumentData): The argument data.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Command: The command to be executed.
|
|
208
|
+
"""
|
|
209
|
+
logger.log(logging.INFO, "Invoking subgraph_extraction tool")
|
|
210
|
+
|
|
211
|
+
# Load hydra configuration
|
|
212
|
+
with hydra.initialize(version_base=None, config_path="../configs"):
|
|
213
|
+
cfg = hydra.compose(
|
|
214
|
+
config_name="config", overrides=["tools/subgraph_extraction=default"]
|
|
215
|
+
)
|
|
216
|
+
cfg = cfg.tools.subgraph_extraction
|
|
217
|
+
|
|
218
|
+
# Retrieve source graph from the state
|
|
219
|
+
initial_graph = {}
|
|
220
|
+
initial_graph["source"] = state["dic_source_graph"][-1] # The last source graph as of now
|
|
221
|
+
# logger.log(logging.INFO, "Source graph: %s", source_graph)
|
|
222
|
+
|
|
223
|
+
# Load the knowledge graph using secure joblib
|
|
224
|
+
initial_graph["pyg"] = joblib.load(initial_graph["source"]["kg_pyg_path"])
|
|
225
|
+
initial_graph["text"] = joblib.load(initial_graph["source"]["kg_text_path"])
|
|
226
|
+
|
|
227
|
+
# Prepare prompt construction along with a list of endotypes
|
|
228
|
+
if len(state["uploaded_files"]) != 0 and "endotype" in [
|
|
229
|
+
f["file_type"] for f in state["uploaded_files"]
|
|
230
|
+
]:
|
|
231
|
+
prompt = self.perform_endotype_filtering(prompt, state, cfg)
|
|
232
|
+
|
|
233
|
+
# Prepare embedding model and embed the user prompt as query
|
|
234
|
+
query_emb = torch.tensor(
|
|
235
|
+
EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)
|
|
236
|
+
).float()
|
|
237
|
+
|
|
238
|
+
# Prepare the PCSTPruning object and extract the subgraph
|
|
239
|
+
# Parameters were set in the configuration file obtained from Hydra
|
|
240
|
+
subgraph = PCSTPruning(
|
|
241
|
+
state["topk_nodes"],
|
|
242
|
+
state["topk_edges"],
|
|
243
|
+
cfg.cost_e,
|
|
244
|
+
cfg.c_const,
|
|
245
|
+
cfg.root,
|
|
246
|
+
cfg.num_clusters,
|
|
247
|
+
cfg.pruning,
|
|
248
|
+
cfg.verbosity_level,
|
|
249
|
+
).extract_subgraph(initial_graph["pyg"], query_emb)
|
|
250
|
+
|
|
251
|
+
# Prepare subgraph as a NetworkX graph and textualized graph
|
|
252
|
+
final_subgraph = self.prepare_final_subgraph(
|
|
253
|
+
subgraph, initial_graph["pyg"], initial_graph["text"]
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Prepare the dictionary of extracted graph
|
|
257
|
+
dic_extracted_graph = {
|
|
258
|
+
"name": arg_data.extraction_name,
|
|
259
|
+
"tool_call_id": tool_call_id,
|
|
260
|
+
"graph_source": initial_graph["source"]["name"],
|
|
261
|
+
"topk_nodes": state["topk_nodes"],
|
|
262
|
+
"topk_edges": state["topk_edges"],
|
|
263
|
+
"graph_dict": {
|
|
264
|
+
"nodes": list(final_subgraph["graph_nx"].nodes(data=True)),
|
|
265
|
+
"edges": list(final_subgraph["graph_nx"].edges(data=True)),
|
|
266
|
+
},
|
|
267
|
+
"graph_text": final_subgraph["graph_text"],
|
|
268
|
+
"graph_summary": None,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# Prepare the dictionary of updated state
|
|
272
|
+
dic_updated_state_for_model = {}
|
|
273
|
+
for key, value in {
|
|
274
|
+
"dic_extracted_graph": [dic_extracted_graph],
|
|
275
|
+
}.items():
|
|
276
|
+
if value:
|
|
277
|
+
dic_updated_state_for_model[key] = value
|
|
278
|
+
|
|
279
|
+
# Return the updated state of the tool
|
|
280
|
+
return Command(
|
|
281
|
+
update=dic_updated_state_for_model
|
|
282
|
+
| {
|
|
283
|
+
# update the message history
|
|
284
|
+
"messages": [
|
|
285
|
+
ToolMessage(
|
|
286
|
+
content=f"Subgraph Extraction Result of {arg_data.extraction_name}",
|
|
287
|
+
tool_call_id=tool_call_id,
|
|
288
|
+
)
|
|
289
|
+
],
|
|
290
|
+
}
|
|
291
|
+
)
|