aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -3,7 +3,7 @@ Tool for performing multimodal subgraph extraction.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Annotated
|
6
|
+
from typing import Annotated
|
7
7
|
|
8
8
|
import hydra
|
9
9
|
import pandas as pd
|
@@ -39,14 +39,10 @@ class MultimodalSubgraphExtractionInput(BaseModel):
|
|
39
39
|
arg_data: Argument for analytical process over graph data.
|
40
40
|
"""
|
41
41
|
|
42
|
-
tool_call_id: Annotated[str, InjectedToolCallId] = Field(
|
43
|
-
description="Tool call ID."
|
44
|
-
)
|
42
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
45
43
|
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
46
44
|
prompt: str = Field(description="Prompt to interact with the backend.")
|
47
|
-
arg_data: ArgumentData = Field(
|
48
|
-
description="Experiment over graph data.", default=None
|
49
|
-
)
|
45
|
+
arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
|
50
46
|
|
51
47
|
|
52
48
|
class MultimodalSubgraphExtractionTool(BaseTool):
|
@@ -57,18 +53,19 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
57
53
|
|
58
54
|
name: str = "subgraph_extraction"
|
59
55
|
description: str = "A tool for subgraph extraction based on user's prompt."
|
60
|
-
args_schema:
|
56
|
+
args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
|
61
57
|
|
62
58
|
def __init__(self, **kwargs):
|
63
59
|
super().__init__(**kwargs)
|
64
60
|
# Initialize hardware detection and dynamic library loading
|
65
|
-
object.__setattr__(self,
|
66
|
-
object.__setattr__(self,
|
67
|
-
logger.info(
|
68
|
-
|
61
|
+
object.__setattr__(self, "detector", SystemDetector())
|
62
|
+
object.__setattr__(self, "loader", DynamicLibraryLoader(self.detector))
|
63
|
+
logger.info(
|
64
|
+
"MultimodalSubgraphExtractionTool initialized with %s mode",
|
65
|
+
"GPU" if self.loader.use_gpu else "CPU",
|
66
|
+
)
|
69
67
|
|
70
|
-
def _read_multimodal_files(self,
|
71
|
-
state: Annotated[dict, InjectedState]):
|
68
|
+
def _read_multimodal_files(self, state: Annotated[dict, InjectedState]):
|
72
69
|
"""
|
73
70
|
Read the uploaded multimodal files and return a DataFrame.
|
74
71
|
|
@@ -86,8 +83,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
86
83
|
# Check if multimodal file is uploaded
|
87
84
|
if state["uploaded_files"][i]["file_type"] == "multimodal":
|
88
85
|
# Read the Excel file
|
89
|
-
multimodal_df = pd.read_excel(
|
90
|
-
|
86
|
+
multimodal_df = pd.read_excel(
|
87
|
+
state["uploaded_files"][i]["file_path"], sheet_name=None
|
88
|
+
)
|
91
89
|
|
92
90
|
# Check if the multimodal_df is empty
|
93
91
|
logger.log(logging.INFO, "Checking if multimodal_df is empty")
|
@@ -98,11 +96,12 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
98
96
|
multimodal_df = pd.concat(multimodal_df).reset_index()
|
99
97
|
multimodal_df = self.loader.df.DataFrame(multimodal_df)
|
100
98
|
multimodal_df.drop(columns=["level_1"], inplace=True)
|
101
|
-
multimodal_df.rename(
|
102
|
-
|
99
|
+
multimodal_df.rename(
|
100
|
+
columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
|
101
|
+
)
|
103
102
|
# Since an excel sheet name could not contain a `/`,
|
104
103
|
# but the node type can be 'gene/protein' as exists in the PrimeKG
|
105
|
-
multimodal_df["q_node_type"] = multimodal_df["q_node_type"].str.replace(
|
104
|
+
multimodal_df["q_node_type"] = multimodal_df["q_node_type"].str.replace("-", "_")
|
106
105
|
|
107
106
|
return multimodal_df
|
108
107
|
|
@@ -115,30 +114,28 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
115
114
|
collection.load()
|
116
115
|
|
117
116
|
# Query the collection with node names from multimodal_df
|
118
|
-
node_names_series = node_type_df[
|
119
|
-
q_node_names = getattr(
|
120
|
-
|
121
|
-
|
122
|
-
q_columns = ["node_id", "node_name", "node_type",
|
123
|
-
"feat", "feat_emb", "desc", "desc_emb"]
|
117
|
+
node_names_series = node_type_df["q_node_name"]
|
118
|
+
q_node_names = getattr(
|
119
|
+
node_names_series, "to_pandas", lambda series=node_names_series: series
|
120
|
+
)().tolist()
|
121
|
+
q_columns = ["node_id", "node_name", "node_type", "feat", "feat_emb", "desc", "desc_emb"]
|
124
122
|
res = collection.query(
|
125
|
-
expr=f
|
123
|
+
expr=f"node_name IN [{','.join(f'"{name}"' for name in q_node_names)}]",
|
126
124
|
output_fields=q_columns,
|
127
125
|
)
|
128
126
|
# Convert the embeedings into floats
|
129
127
|
for r_ in res:
|
130
|
-
r_[
|
131
|
-
r_[
|
128
|
+
r_["feat_emb"] = [float(x) for x in r_["feat_emb"]]
|
129
|
+
r_["desc_emb"] = [float(x) for x in r_["desc_emb"]]
|
132
130
|
|
133
131
|
# Convert the result to a DataFrame
|
134
132
|
res_df = self.loader.df.DataFrame(res)[q_columns]
|
135
133
|
res_df["use_description"] = False
|
136
134
|
return res_df
|
137
135
|
|
138
|
-
def _prepare_query_modalities(
|
139
|
-
|
140
|
-
|
141
|
-
cfg_db: dict):
|
136
|
+
def _prepare_query_modalities(
|
137
|
+
self, prompt: dict, state: Annotated[dict, InjectedState], cfg_db: dict
|
138
|
+
):
|
142
139
|
"""
|
143
140
|
Prepare the modality-specific query for subgraph extraction.
|
144
141
|
|
@@ -153,16 +150,18 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
153
150
|
# Initialize dataframes
|
154
151
|
logger.log(logging.INFO, "Initializing dataframes")
|
155
152
|
query_df = []
|
156
|
-
prompt_df = self.loader.df.DataFrame(
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
153
|
+
prompt_df = self.loader.df.DataFrame(
|
154
|
+
{
|
155
|
+
"node_id": "user_prompt",
|
156
|
+
"node_name": "User Prompt",
|
157
|
+
"node_type": "prompt",
|
158
|
+
"feat": prompt["text"],
|
159
|
+
"feat_emb": prompt["emb"],
|
160
|
+
"desc": prompt["text"],
|
161
|
+
"desc_emb": prompt["emb"],
|
162
|
+
"use_description": True, # set to True for user prompt embedding
|
163
|
+
}
|
164
|
+
)
|
166
165
|
|
167
166
|
# Read multimodal files uploaded by the user
|
168
167
|
multimodal_df = self._read_multimodal_files(state)
|
@@ -171,7 +170,10 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
171
170
|
logger.log(logging.INFO, "Prepare query modalities")
|
172
171
|
if len(multimodal_df) > 0:
|
173
172
|
# Query the Milvus database for each node type in multimodal_df
|
174
|
-
logger.log(
|
173
|
+
logger.log(
|
174
|
+
logging.INFO,
|
175
|
+
"Querying Milvus database for each node type in multimodal_df",
|
176
|
+
)
|
175
177
|
for node_type, node_type_df in multimodal_df.groupby("q_node_type"):
|
176
178
|
print(f"Processing node type: {node_type}")
|
177
179
|
res_df = self._query_milvus_collection(node_type, node_type_df, cfg_db)
|
@@ -183,11 +185,12 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
183
185
|
|
184
186
|
# Update the state by adding the the selected node IDs
|
185
187
|
logger.log(logging.INFO, "Updating state with selected node IDs")
|
186
|
-
state["selections"] =
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
188
|
+
state["selections"] = (
|
189
|
+
getattr(query_df, "to_pandas", lambda: query_df)()
|
190
|
+
.groupby("node_type")["node_id"]
|
191
|
+
.apply(list)
|
192
|
+
.to_dict()
|
193
|
+
)
|
191
194
|
|
192
195
|
# Append a user prompt to the query dataframe
|
193
196
|
logger.log(logging.INFO, "Adding user prompt to query dataframe")
|
@@ -198,11 +201,13 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
198
201
|
|
199
202
|
return query_df
|
200
203
|
|
201
|
-
def _perform_subgraph_extraction(
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
204
|
+
def _perform_subgraph_extraction(
|
205
|
+
self,
|
206
|
+
state: Annotated[dict, InjectedState],
|
207
|
+
cfg: dict,
|
208
|
+
cfg_db: dict,
|
209
|
+
query_df: pd.DataFrame,
|
210
|
+
) -> dict:
|
206
211
|
"""
|
207
212
|
Perform multimodal subgraph extraction based on modal-specific embeddings.
|
208
213
|
|
@@ -217,10 +222,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
217
222
|
"""
|
218
223
|
# Initialize the subgraph dictionary
|
219
224
|
subgraphs = []
|
220
|
-
unified_subgraph = {
|
221
|
-
"nodes": [],
|
222
|
-
"edges": []
|
223
|
-
}
|
225
|
+
unified_subgraph = {"nodes": [], "edges": []}
|
224
226
|
# subgraphs = {}
|
225
227
|
# subgraphs["nodes"] = []
|
226
228
|
# subgraphs["edges"] = []
|
@@ -228,22 +230,21 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
228
230
|
# Loop over query embeddings and modalities
|
229
231
|
for q in getattr(query_df, "to_pandas", lambda: query_df)().iterrows():
|
230
232
|
logger.log(logging.INFO, "===========================================")
|
231
|
-
logger.log(logging.INFO, "Processing query: %s", q[1][
|
233
|
+
logger.log(logging.INFO, "Processing query: %s", q[1]["node_name"])
|
232
234
|
# Prepare the PCSTPruning object and extract the subgraph
|
233
235
|
# Parameters were set in the configuration file obtained from Hydra
|
234
236
|
# start = datetime.datetime.now()
|
235
237
|
# Get dynamic metric type (overrides any config setting)
|
236
238
|
# Get dynamic metric type (overrides any config setting)
|
237
|
-
has_vector_processing = hasattr(cfg,
|
239
|
+
has_vector_processing = hasattr(cfg, "vector_processing")
|
238
240
|
if has_vector_processing:
|
239
|
-
dynamic_metrics_enabled = getattr(cfg.vector_processing,
|
241
|
+
dynamic_metrics_enabled = getattr(cfg.vector_processing, "dynamic_metrics", True)
|
240
242
|
else:
|
241
243
|
dynamic_metrics_enabled = False
|
242
244
|
if has_vector_processing and dynamic_metrics_enabled:
|
243
245
|
dynamic_metric_type = self.loader.metric_type
|
244
246
|
else:
|
245
|
-
dynamic_metric_type = getattr(cfg,
|
246
|
-
self.loader.metric_type)
|
247
|
+
dynamic_metric_type = getattr(cfg, "search_metric_type", self.loader.metric_type)
|
247
248
|
|
248
249
|
subgraph = MultimodalPCSTPruning(
|
249
250
|
topk=state["topk_nodes"],
|
@@ -254,20 +255,21 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
254
255
|
num_clusters=cfg.num_clusters,
|
255
256
|
pruning=cfg.pruning,
|
256
257
|
verbosity_level=cfg.verbosity_level,
|
257
|
-
use_description=q[1][
|
258
|
+
use_description=q[1]["use_description"],
|
258
259
|
metric_type=dynamic_metric_type, # Use dynamic or config metric type
|
259
|
-
loader=self.loader # Pass the loader instance
|
260
|
-
).extract_subgraph(q[1][
|
261
|
-
q[1]['feat_emb'],
|
262
|
-
q[1]['node_type'],
|
263
|
-
cfg_db)
|
260
|
+
loader=self.loader, # Pass the loader instance
|
261
|
+
).extract_subgraph(q[1]["desc_emb"], q[1]["feat_emb"], q[1]["node_type"], cfg_db)
|
264
262
|
|
265
263
|
# Append the extracted subgraph to the dictionary
|
266
264
|
unified_subgraph["nodes"].append(subgraph["nodes"].tolist())
|
267
265
|
unified_subgraph["edges"].append(subgraph["edges"].tolist())
|
268
|
-
subgraphs.append(
|
269
|
-
|
270
|
-
|
266
|
+
subgraphs.append(
|
267
|
+
(
|
268
|
+
q[1]["node_name"],
|
269
|
+
subgraph["nodes"].tolist(),
|
270
|
+
subgraph["edges"].tolist(),
|
271
|
+
)
|
272
|
+
)
|
271
273
|
|
272
274
|
# end = datetime.datetime.now()
|
273
275
|
# logger.log(logging.INFO, "Subgraph extraction time: %s seconds",
|
@@ -284,10 +286,10 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
284
286
|
).tolist()
|
285
287
|
|
286
288
|
# Convert the unified subgraph and subgraphs to DataFrames
|
287
|
-
unified_subgraph = self.loader.df.DataFrame(
|
288
|
-
|
289
|
-
|
290
|
-
|
289
|
+
unified_subgraph = self.loader.df.DataFrame(
|
290
|
+
[("Unified Subgraph", unified_subgraph["nodes"], unified_subgraph["edges"])],
|
291
|
+
columns=["name", "nodes", "edges"],
|
292
|
+
)
|
291
293
|
subgraphs = self.loader.df.DataFrame(subgraphs, columns=["name", "nodes", "edges"])
|
292
294
|
|
293
295
|
# Concatenate both DataFrames
|
@@ -295,11 +297,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
295
297
|
|
296
298
|
return subgraphs
|
297
299
|
|
298
|
-
def _prepare_final_subgraph(
|
299
|
-
|
300
|
-
|
301
|
-
cfg: dict,
|
302
|
-
cfg_db) -> dict:
|
300
|
+
def _prepare_final_subgraph(
|
301
|
+
self, state: Annotated[dict, InjectedState], subgraph: dict, cfg: dict, cfg_db
|
302
|
+
) -> dict:
|
303
303
|
"""
|
304
304
|
Prepare the subgraph based on the extracted subgraph.
|
305
305
|
|
@@ -314,53 +314,62 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
314
314
|
A dictionary containing the PyG graph, NetworkX graph, and textualized graph.
|
315
315
|
"""
|
316
316
|
# Convert the dict to a DataFrame
|
317
|
-
node_colors = {
|
318
|
-
|
317
|
+
node_colors = {
|
318
|
+
n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
|
319
|
+
}
|
319
320
|
color_df = self.loader.df.DataFrame(list(node_colors.items()), columns=["node_id", "color"])
|
320
321
|
# print(color_df)
|
321
322
|
|
322
323
|
# Prepare the subgraph dictionary
|
323
|
-
graph_dict = {
|
324
|
-
"name": [],
|
325
|
-
"nodes": [],
|
326
|
-
"edges": [],
|
327
|
-
"text": ""
|
328
|
-
}
|
324
|
+
graph_dict = {"name": [], "nodes": [], "edges": [], "text": ""}
|
329
325
|
for sub in getattr(subgraph, "to_pandas", lambda: subgraph)().itertuples(index=False):
|
330
326
|
graph_nodes, graph_edges = self._process_subgraph_data(sub, cfg_db, color_df)
|
331
327
|
|
332
328
|
# Prepare lists for visualization
|
333
329
|
graph_dict["name"].append(sub.name)
|
334
|
-
graph_dict["nodes"].append(
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
330
|
+
graph_dict["nodes"].append(
|
331
|
+
[
|
332
|
+
(
|
333
|
+
row.node_id,
|
334
|
+
{
|
335
|
+
"hover": "Node Name : "
|
336
|
+
+ row.node_name
|
337
|
+
+ "\n"
|
338
|
+
+ "Node Type : "
|
339
|
+
+ row.node_type
|
340
|
+
+ "\n"
|
341
|
+
+ "Desc : "
|
342
|
+
+ row.desc,
|
343
|
+
"click": "$hover",
|
344
|
+
"color": row.color,
|
345
|
+
},
|
346
|
+
)
|
347
|
+
for row in getattr(
|
348
|
+
graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
|
349
|
+
)().itertuples(index=False)
|
350
|
+
]
|
351
|
+
)
|
352
|
+
graph_dict["edges"].append(
|
353
|
+
[
|
354
|
+
(row.head_id, row.tail_id, {"label": tuple(row.edge_type)})
|
355
|
+
for row in getattr(
|
356
|
+
graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
|
357
|
+
)().itertuples(index=False)
|
358
|
+
]
|
359
|
+
)
|
353
360
|
|
354
361
|
# Prepare the textualized subgraph
|
355
362
|
if sub.name == "Unified Subgraph":
|
356
|
-
graph_nodes = graph_nodes[[
|
357
|
-
graph_nodes.rename(columns={
|
358
|
-
graph_edges = graph_edges[[
|
359
|
-
nodes_pandas = getattr(
|
360
|
-
|
363
|
+
graph_nodes = graph_nodes[["node_id", "desc"]]
|
364
|
+
graph_nodes.rename(columns={"desc": "node_attr"}, inplace=True)
|
365
|
+
graph_edges = graph_edges[["head_id", "edge_type", "tail_id"]]
|
366
|
+
nodes_pandas = getattr(
|
367
|
+
graph_nodes, "to_pandas", lambda graph_nodes=graph_nodes: graph_nodes
|
368
|
+
)()
|
361
369
|
nodes_csv = nodes_pandas.to_csv(index=False)
|
362
|
-
edges_pandas = getattr(
|
363
|
-
|
370
|
+
edges_pandas = getattr(
|
371
|
+
graph_edges, "to_pandas", lambda graph_edges=graph_edges: graph_edges
|
372
|
+
)()
|
364
373
|
edges_csv = edges_pandas.to_csv(index=False)
|
365
374
|
graph_dict["text"] = nodes_csv + "\n" + edges_csv
|
366
375
|
|
@@ -369,44 +378,43 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
369
378
|
def _process_subgraph_data(self, sub, cfg_db, color_df):
|
370
379
|
"""Helper method to process individual subgraph data."""
|
371
380
|
print(f"Processing subgraph: {sub.name}")
|
372
|
-
print(
|
381
|
+
print("---")
|
373
382
|
print(sub.nodes)
|
374
|
-
print(
|
383
|
+
print("---")
|
375
384
|
print(sub.edges)
|
376
|
-
print(
|
385
|
+
print("---")
|
377
386
|
|
378
387
|
# Prepare graph dataframes - Nodes
|
379
388
|
coll_name = f"{cfg_db.milvus_db.database_name}_nodes"
|
380
389
|
node_coll = Collection(name=coll_name)
|
381
390
|
node_coll.load()
|
382
391
|
graph_nodes = node_coll.query(
|
383
|
-
expr=f
|
384
|
-
output_fields=[
|
392
|
+
expr=f"node_index IN [{','.join(f'{n}' for n in sub.nodes)}]",
|
393
|
+
output_fields=["node_id", "node_name", "node_type", "desc"],
|
385
394
|
)
|
386
395
|
graph_nodes = self.loader.df.DataFrame(graph_nodes)
|
387
|
-
graph_nodes.drop(columns=[
|
396
|
+
graph_nodes.drop(columns=["node_index"], inplace=True)
|
388
397
|
if not color_df.empty:
|
389
398
|
graph_nodes = graph_nodes.merge(color_df, on="node_id", how="left")
|
390
399
|
else:
|
391
|
-
graph_nodes["color"] =
|
392
|
-
graph_nodes[
|
400
|
+
graph_nodes["color"] = "black"
|
401
|
+
graph_nodes["color"] = graph_nodes["color"].fillna("black")
|
393
402
|
|
394
403
|
# Edges
|
395
404
|
coll_name = f"{cfg_db.milvus_db.database_name}_edges"
|
396
405
|
edge_coll = Collection(name=coll_name)
|
397
406
|
edge_coll.load()
|
398
407
|
graph_edges = edge_coll.query(
|
399
|
-
expr=f
|
400
|
-
output_fields=[
|
408
|
+
expr=f"triplet_index IN [{','.join(f'{e}' for e in sub.edges)}]",
|
409
|
+
output_fields=["head_id", "tail_id", "edge_type"],
|
401
410
|
)
|
402
411
|
graph_edges = self.loader.df.DataFrame(graph_edges)
|
403
|
-
graph_edges.drop(columns=[
|
404
|
-
graph_edges[
|
412
|
+
graph_edges.drop(columns=["triplet_index"], inplace=True)
|
413
|
+
graph_edges["edge_type"] = graph_edges["edge_type"].str.split("|")
|
405
414
|
|
406
415
|
return graph_nodes, graph_edges
|
407
416
|
|
408
|
-
def normalize_vector(self,
|
409
|
-
v : list) -> list:
|
417
|
+
def normalize_vector(self, v: list) -> list:
|
410
418
|
"""
|
411
419
|
Normalize a vector using appropriate library (CuPy for GPU, NumPy for CPU).
|
412
420
|
|
@@ -448,7 +456,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
448
456
|
# Load hydra configuration
|
449
457
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
450
458
|
cfg = hydra.compose(
|
451
|
-
config_name="config",
|
459
|
+
config_name="config",
|
460
|
+
overrides=["tools/multimodal_subgraph_extraction=default"],
|
452
461
|
)
|
453
462
|
cfg_db = cfg.app.frontend
|
454
463
|
cfg = cfg.tools.multimodal_subgraph_extraction
|
@@ -468,10 +477,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
468
477
|
logger.log(logging.INFO, "_prepare_query_modalities")
|
469
478
|
# start = datetime.datetime.now()
|
470
479
|
query_df = self._prepare_query_modalities(
|
471
|
-
{
|
472
|
-
|
473
|
-
|
474
|
-
)]
|
480
|
+
{
|
481
|
+
"text": prompt,
|
482
|
+
"emb": [self.normalize_vector(state["embedding_model"].embed_query(prompt))],
|
475
483
|
},
|
476
484
|
state,
|
477
485
|
cfg_db,
|
@@ -483,10 +491,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
483
491
|
# Perform subgraph extraction
|
484
492
|
logger.log(logging.INFO, "_perform_subgraph_extraction")
|
485
493
|
# start = datetime.datetime.now()
|
486
|
-
subgraphs = self._perform_subgraph_extraction(state,
|
487
|
-
cfg,
|
488
|
-
cfg_db,
|
489
|
-
query_df)
|
494
|
+
subgraphs = self._perform_subgraph_extraction(state, cfg, cfg_db, query_df)
|
490
495
|
# end = datetime.datetime.now()
|
491
496
|
# logger.log(logging.INFO, "_perform_subgraph_extraction time: %s seconds",
|
492
497
|
# (end - start).total_seconds())
|
@@ -495,10 +500,7 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
495
500
|
logger.log(logging.INFO, "_prepare_final_subgraph")
|
496
501
|
logger.log(logging.INFO, "Subgraphs extracted: %s", len(subgraphs))
|
497
502
|
# start = datetime.datetime.now()
|
498
|
-
final_subgraph = self._prepare_final_subgraph(state,
|
499
|
-
subgraphs,
|
500
|
-
cfg,
|
501
|
-
cfg_db)
|
503
|
+
final_subgraph = self._prepare_final_subgraph(state, subgraphs, cfg, cfg_db)
|
502
504
|
# end = datetime.datetime.now()
|
503
505
|
# logger.log(logging.INFO, "_prepare_final_subgraph time: %s seconds",
|
504
506
|
# (end - start).total_seconds())
|
@@ -534,7 +536,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
534
536
|
|
535
537
|
# Return the updated state of the tool
|
536
538
|
return Command(
|
537
|
-
update=dic_updated_state_for_model
|
539
|
+
update=dic_updated_state_for_model
|
540
|
+
| {
|
538
541
|
# update the message history
|
539
542
|
"messages": [
|
540
543
|
ToolMessage(
|