aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -18,7 +18,7 @@ import os
|
|
18
18
|
import platform
|
19
19
|
import subprocess
|
20
20
|
import sys
|
21
|
-
from typing import Any,
|
21
|
+
from typing import Any, Union
|
22
22
|
|
23
23
|
# Configure logging
|
24
24
|
logging.basicConfig(level=logging.INFO, format="[DATA LOADER] %(message)s")
|
@@ -32,9 +32,7 @@ class SystemDetector:
|
|
32
32
|
self.os_type = platform.system().lower() # 'windows', 'linux', 'darwin'
|
33
33
|
self.architecture = platform.machine().lower() # 'x86_64', 'arm64', etc.
|
34
34
|
self.has_nvidia_gpu = self._detect_nvidia_gpu()
|
35
|
-
self.use_gpu =
|
36
|
-
self.has_nvidia_gpu and self.os_type != "darwin"
|
37
|
-
) # No CUDA on macOS
|
35
|
+
self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin" # No CUDA on macOS
|
38
36
|
|
39
37
|
logger.info("System Detection Results:")
|
40
38
|
logger.info(" OS: %s", self.os_type)
|
@@ -46,9 +44,7 @@ class SystemDetector:
|
|
46
44
|
"""Detect if NVIDIA GPU is available."""
|
47
45
|
try:
|
48
46
|
# Try nvidia-smi command
|
49
|
-
result = subprocess.run(
|
50
|
-
["nvidia-smi"], capture_output=True, text=True, timeout=10
|
51
|
-
)
|
47
|
+
result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, timeout=10)
|
52
48
|
return result.returncode == 0
|
53
49
|
except (
|
54
50
|
subprocess.TimeoutExpired,
|
@@ -57,7 +53,7 @@ class SystemDetector:
|
|
57
53
|
):
|
58
54
|
return False
|
59
55
|
|
60
|
-
def get_required_packages(self) ->
|
56
|
+
def get_required_packages(self) -> list[str]:
|
61
57
|
"""Get list of packages to install based on system capabilities - matches original logic."""
|
62
58
|
if self.use_gpu and self.os_type == "linux":
|
63
59
|
# Exact package list from original script for GPU mode
|
@@ -103,9 +99,7 @@ class SystemDetector:
|
|
103
99
|
if result.returncode != 0:
|
104
100
|
logger.error("Error installing package: %s", result.stderr)
|
105
101
|
if "cudf" in package_cmd or "dask-cudf" in package_cmd:
|
106
|
-
logger.warning(
|
107
|
-
"GPU package installation failed, falling back to CPU mode"
|
108
|
-
)
|
102
|
+
logger.warning("GPU package installation failed, falling back to CPU mode")
|
109
103
|
self.use_gpu = False
|
110
104
|
return self.install_packages() # Retry with CPU packages
|
111
105
|
else:
|
@@ -115,9 +109,7 @@ class SystemDetector:
|
|
115
109
|
except subprocess.CalledProcessError as e:
|
116
110
|
logger.error("Failed to install %s: %s", package_cmd, e.stderr)
|
117
111
|
if "cudf" in package_cmd:
|
118
|
-
logger.warning(
|
119
|
-
"GPU package installation failed, falling back to CPU mode"
|
120
|
-
)
|
112
|
+
logger.warning("GPU package installation failed, falling back to CPU mode")
|
121
113
|
self.use_gpu = False
|
122
114
|
return self.install_packages() # Retry with CPU packages
|
123
115
|
else:
|
@@ -130,7 +122,7 @@ class SystemDetector:
|
|
130
122
|
class DynamicDataLoader:
|
131
123
|
"""Dynamic data loader that adapts to system capabilities."""
|
132
124
|
|
133
|
-
def __init__(self, config:
|
125
|
+
def __init__(self, config: dict[str, Any]):
|
134
126
|
"""Initialize with system detection and dynamic library loading."""
|
135
127
|
self.config = config
|
136
128
|
self.detector = SystemDetector()
|
@@ -207,13 +199,15 @@ class DynamicDataLoader:
|
|
207
199
|
logger.info("Successfully imported GPU libraries (cudf, cupy)")
|
208
200
|
except ImportError as e:
|
209
201
|
logger.error(
|
210
|
-
"[DATA LOADER] cudf or cupy not found.
|
202
|
+
"[DATA LOADER] cudf or cupy not found. "
|
203
|
+
"Please ensure they are installed correctly."
|
211
204
|
)
|
212
205
|
logger.error("Import error: %s", str(e))
|
213
206
|
# Match original script's exit behavior for critical GPU import failure
|
214
207
|
if not os.getenv("FORCE_CPU", "false").lower() == "true":
|
215
208
|
logger.error(
|
216
|
-
"GPU libraries required but not available.
|
209
|
+
"GPU libraries required but not available. "
|
210
|
+
"Set FORCE_CPU=true to use CPU mode."
|
217
211
|
)
|
218
212
|
sys.exit(1)
|
219
213
|
else:
|
@@ -222,7 +216,7 @@ class DynamicDataLoader:
|
|
222
216
|
self.use_gpu = False
|
223
217
|
|
224
218
|
def _read_dataframe(
|
225
|
-
self, file_path: str, columns:
|
219
|
+
self, file_path: str, columns: list[str] | None = None
|
226
220
|
) -> Union["pd.DataFrame", "cudf.DataFrame"]: # type: ignore[reportUndefinedVariable] # noqa: F821
|
227
221
|
"""Read dataframe using appropriate library."""
|
228
222
|
if self.use_gpu:
|
@@ -231,7 +225,7 @@ class DynamicDataLoader:
|
|
231
225
|
return self.pd.read_parquet(file_path, columns=columns)
|
232
226
|
|
233
227
|
def _concat_dataframes(
|
234
|
-
self, df_list:
|
228
|
+
self, df_list: list, ignore_index: bool = True
|
235
229
|
) -> Union["pd.DataFrame", "cudf.DataFrame"]: # type: ignore[reportUndefinedVariable] # noqa: F821
|
236
230
|
"""Concatenate dataframes using appropriate library."""
|
237
231
|
if self.use_gpu:
|
@@ -257,9 +251,7 @@ class DynamicDataLoader:
|
|
257
251
|
"""Extract embeddings and convert to appropriate format."""
|
258
252
|
if self.use_gpu:
|
259
253
|
# cuDF list extraction
|
260
|
-
emb_data = self.cp.asarray(df[column_name].list.leaves).astype(
|
261
|
-
self.cp.float32
|
262
|
-
)
|
254
|
+
emb_data = self.cp.asarray(df[column_name].list.leaves).astype(self.cp.float32)
|
263
255
|
return emb_data.reshape(df.shape[0], -1)
|
264
256
|
else:
|
265
257
|
# pandas extraction
|
@@ -325,9 +317,7 @@ class DynamicDataLoader:
|
|
325
317
|
for stage in ["enrichment", "embedding"]:
|
326
318
|
logger.info("Processing %s %s", element, stage)
|
327
319
|
|
328
|
-
file_list = glob.glob(
|
329
|
-
os.path.join(self.data_dir, element, stage, "*.parquet.gzip")
|
330
|
-
)
|
320
|
+
file_list = glob.glob(os.path.join(self.data_dir, element, stage, "*.parquet.gzip"))
|
331
321
|
logger.info("Found %d files for %s %s", len(file_list), element, stage)
|
332
322
|
|
333
323
|
if not file_list:
|
@@ -342,13 +332,9 @@ class DynamicDataLoader:
|
|
342
332
|
chunk_files = file_list[i : i + chunk_size]
|
343
333
|
chunk_df_list = []
|
344
334
|
for f in chunk_files:
|
345
|
-
df = self._read_dataframe(
|
346
|
-
f, columns=["triplet_index", "edge_emb"]
|
347
|
-
)
|
335
|
+
df = self._read_dataframe(f, columns=["triplet_index", "edge_emb"])
|
348
336
|
chunk_df_list.append(df)
|
349
|
-
chunk_df = self._concat_dataframes(
|
350
|
-
chunk_df_list, ignore_index=True
|
351
|
-
)
|
337
|
+
chunk_df = self._concat_dataframes(chunk_df_list, ignore_index=True)
|
352
338
|
graph[element][stage].append(chunk_df)
|
353
339
|
else:
|
354
340
|
# For other combinations, read all files
|
@@ -356,9 +342,7 @@ class DynamicDataLoader:
|
|
356
342
|
for f in file_list:
|
357
343
|
df = self._read_dataframe(f)
|
358
344
|
df_list.append(df)
|
359
|
-
graph[element][stage] = self._concat_dataframes(
|
360
|
-
df_list, ignore_index=True
|
361
|
-
)
|
345
|
+
graph[element][stage] = self._concat_dataframes(df_list, ignore_index=True)
|
362
346
|
|
363
347
|
logger.info("Graph data loaded successfully")
|
364
348
|
return graph
|
@@ -367,16 +351,15 @@ class DynamicDataLoader:
|
|
367
351
|
"""Get embedding dimension using original script's exact logic."""
|
368
352
|
first_emb = df.iloc[0][column_name]
|
369
353
|
if self.use_gpu:
|
370
|
-
# cuDF format - matches original:
|
354
|
+
# cuDF format - matches original:
|
355
|
+
# len(nodes_df.iloc[0]['desc_emb'].to_arrow().to_pylist()[0])
|
371
356
|
return len(first_emb.to_arrow().to_pylist()[0])
|
372
357
|
else:
|
373
358
|
# pandas format
|
374
359
|
if isinstance(first_emb, list):
|
375
360
|
return len(first_emb)
|
376
361
|
else:
|
377
|
-
return len(
|
378
|
-
first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb
|
379
|
-
)
|
362
|
+
return len(first_emb.tolist() if hasattr(first_emb, "tolist") else first_emb)
|
380
363
|
|
381
364
|
def create_nodes_collection(self, nodes_df):
|
382
365
|
"""Create and populate the main nodes collection."""
|
@@ -431,9 +414,7 @@ class DynamicDataLoader:
|
|
431
414
|
|
432
415
|
# Create collection if it doesn't exist
|
433
416
|
if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
|
434
|
-
collection = self.pymilvus_modules["Collection"](
|
435
|
-
name=node_coll_name, schema=schema
|
436
|
-
)
|
417
|
+
collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
|
437
418
|
else:
|
438
419
|
collection = self.pymilvus_modules["Collection"](name=node_coll_name)
|
439
420
|
|
@@ -487,9 +468,7 @@ class DynamicDataLoader:
|
|
487
468
|
collection.insert(batch)
|
488
469
|
|
489
470
|
collection.flush()
|
490
|
-
logger.info(
|
491
|
-
"Nodes collection created with %d entities", collection.num_entities
|
492
|
-
)
|
471
|
+
logger.info("Nodes collection created with %d entities", collection.num_entities)
|
493
472
|
|
494
473
|
def create_node_type_collections(self, nodes_df):
|
495
474
|
"""Create separate collections for each node type."""
|
@@ -498,9 +477,7 @@ class DynamicDataLoader:
|
|
498
477
|
for node_type, nodes_df_ in self.tqdm(
|
499
478
|
nodes_df.groupby("node_type"), desc="Processing node types"
|
500
479
|
):
|
501
|
-
node_coll_name = (
|
502
|
-
f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
|
503
|
-
)
|
480
|
+
node_coll_name = f"{self.milvus_database}_nodes_{node_type.replace('/', '_')}"
|
504
481
|
|
505
482
|
# Get embedding dimensions
|
506
483
|
desc_dim = self._get_embedding_dimension(nodes_df_, "desc_emb")
|
@@ -564,9 +541,7 @@ class DynamicDataLoader:
|
|
564
541
|
)
|
565
542
|
|
566
543
|
if not self.pymilvus_modules["utility"].has_collection(node_coll_name):
|
567
|
-
collection = self.pymilvus_modules["Collection"](
|
568
|
-
name=node_coll_name, schema=schema
|
569
|
-
)
|
544
|
+
collection = self.pymilvus_modules["Collection"](name=node_coll_name, schema=schema)
|
570
545
|
else:
|
571
546
|
collection = self.pymilvus_modules["Collection"](name=node_coll_name)
|
572
547
|
|
@@ -639,7 +614,7 @@ class DynamicDataLoader:
|
|
639
614
|
collection.num_entities,
|
640
615
|
)
|
641
616
|
|
642
|
-
def create_edges_collection(self, edges_enrichment_df, edges_embedding_df:
|
617
|
+
def create_edges_collection(self, edges_enrichment_df, edges_embedding_df: list):
|
643
618
|
"""Create and populate the edges collection - exact original logic."""
|
644
619
|
logger.info("Creating edges collection...")
|
645
620
|
|
@@ -647,9 +622,7 @@ class DynamicDataLoader:
|
|
647
622
|
|
648
623
|
# Get embedding dimension from first chunk - exact original logic
|
649
624
|
if self.use_gpu:
|
650
|
-
emb_dim = len(
|
651
|
-
edges_embedding_df[0].loc[0, "edge_emb"]
|
652
|
-
) # Original cudf access
|
625
|
+
emb_dim = len(edges_embedding_df[0].loc[0, "edge_emb"]) # Original cudf access
|
653
626
|
else:
|
654
627
|
first_edge_emb = edges_embedding_df[0].iloc[0]["edge_emb"]
|
655
628
|
emb_dim = (
|
@@ -772,24 +745,18 @@ class DynamicDataLoader:
|
|
772
745
|
|
773
746
|
# Insert data in batches
|
774
747
|
total = len(data[0])
|
775
|
-
for i in self.tqdm(
|
776
|
-
range(0, total, self.batch_size), desc="Inserting edges"
|
777
|
-
):
|
748
|
+
for i in self.tqdm(range(0, total, self.batch_size), desc="Inserting edges"):
|
778
749
|
batch_data = [d[i : i + self.batch_size] for d in data]
|
779
750
|
collection.insert(batch_data)
|
780
751
|
|
781
752
|
collection.flush()
|
782
|
-
logger.info(
|
783
|
-
"Edges collection created with %d entities", collection.num_entities
|
784
|
-
)
|
753
|
+
logger.info("Edges collection created with %d entities", collection.num_entities)
|
785
754
|
|
786
755
|
def run(self):
|
787
756
|
"""Main execution method."""
|
788
757
|
try:
|
789
758
|
logger.info("Starting Dynamic Milvus data loading process...")
|
790
|
-
logger.info(
|
791
|
-
"System: %s %s", self.detector.os_type, self.detector.architecture
|
792
|
-
)
|
759
|
+
logger.info("System: %s %s", self.detector.os_type, self.detector.architecture)
|
793
760
|
logger.info("GPU acceleration: %s", self.use_gpu)
|
794
761
|
|
795
762
|
# Connect to Milvus
|
@@ -851,8 +818,7 @@ def main():
|
|
851
818
|
"data_dir": os.getenv("DATA_DIR", default_data_dir),
|
852
819
|
"batch_size": int(os.getenv("BATCH_SIZE", "500")),
|
853
820
|
"chunk_size": int(os.getenv("CHUNK_SIZE", "5")),
|
854
|
-
"auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower()
|
855
|
-
== "true",
|
821
|
+
"auto_install_packages": os.getenv("AUTO_INSTALL_PACKAGES", "true").lower() == "true",
|
856
822
|
}
|
857
823
|
|
858
824
|
# Override detection for testing/forcing specific modes
|
@@ -1,15 +1,19 @@
|
|
1
1
|
"""
|
2
2
|
Test cases for agents/t2kg_agent.py
|
3
3
|
"""
|
4
|
-
|
4
|
+
|
5
|
+
from unittest.mock import MagicMock, patch
|
6
|
+
|
7
|
+
import pandas as pd
|
5
8
|
import pytest
|
6
9
|
from langchain_core.messages import HumanMessage
|
7
10
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
8
|
-
|
11
|
+
|
9
12
|
from ..agents.t2kg_agent import get_app
|
10
13
|
|
11
14
|
DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
|
12
15
|
|
16
|
+
|
13
17
|
@pytest.fixture(name="input_dict")
|
14
18
|
def input_dict_fixture():
|
15
19
|
"""
|
@@ -24,7 +28,7 @@ def input_dict_fixture():
|
|
24
28
|
"cellular_component": [],
|
25
29
|
"biological_process": [],
|
26
30
|
"drug": [],
|
27
|
-
"disease": []
|
31
|
+
"disease": [],
|
28
32
|
},
|
29
33
|
"uploaded_files": [
|
30
34
|
{
|
@@ -44,42 +48,53 @@ def input_dict_fixture():
|
|
44
48
|
"kg_text_path": f"{DATA_PATH}/biobridge_multimodal_text_graph.pkl",
|
45
49
|
}
|
46
50
|
],
|
47
|
-
"dic_extracted_graph": []
|
51
|
+
"dic_extracted_graph": [],
|
48
52
|
}
|
49
53
|
return input_dict
|
50
54
|
|
55
|
+
|
51
56
|
def mock_milvus_collection(name):
|
52
57
|
"""
|
53
58
|
Mock Milvus collection for testing.
|
54
59
|
"""
|
55
60
|
nodes = MagicMock()
|
56
61
|
nodes.query.return_value = [
|
57
|
-
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
62
|
+
{
|
63
|
+
"node_index": 0,
|
64
|
+
"node_id": "id1",
|
65
|
+
"node_name": "Adalimumab",
|
66
|
+
"node_type": "drug",
|
67
|
+
"feat": "featA",
|
68
|
+
"feat_emb": [0.1, 0.2, 0.3],
|
69
|
+
"desc": "descA",
|
70
|
+
"desc_emb": [0.1, 0.2, 0.3],
|
71
|
+
},
|
72
|
+
{
|
73
|
+
"node_index": 1,
|
74
|
+
"node_id": "id2",
|
75
|
+
"node_name": "TNF",
|
76
|
+
"node_type": "gene/protein",
|
77
|
+
"feat": "featB",
|
78
|
+
"feat_emb": [0.4, 0.5, 0.6],
|
79
|
+
"desc": "descB",
|
80
|
+
"desc_emb": [0.4, 0.5, 0.6],
|
81
|
+
},
|
69
82
|
]
|
70
83
|
nodes.load.return_value = None
|
71
84
|
|
72
85
|
edges = MagicMock()
|
73
86
|
edges.query.return_value = [
|
74
|
-
{
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
87
|
+
{
|
88
|
+
"triplet_index": 0,
|
89
|
+
"head_id": "id1",
|
90
|
+
"head_index": 0,
|
91
|
+
"tail_id": "id2",
|
92
|
+
"tail_index": 1,
|
93
|
+
"edge_type": "drug,acts_on,gene/protein",
|
94
|
+
"display_relation": "acts_on",
|
95
|
+
"feat": "featC",
|
96
|
+
"feat_emb": [0.7, 0.8, 0.9],
|
97
|
+
}
|
83
98
|
]
|
84
99
|
edges.load.return_value = None
|
85
100
|
|
@@ -89,6 +104,7 @@ def mock_milvus_collection(name):
|
|
89
104
|
return edges
|
90
105
|
return None
|
91
106
|
|
107
|
+
|
92
108
|
def test_t2kg_agent_openai_milvus_mock(input_dict):
|
93
109
|
"""
|
94
110
|
Test the T2KG agent using OpenAI model and Milvus mock.
|
@@ -103,11 +119,11 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
|
|
103
119
|
config = {"configurable": {"thread_id": unique_id}}
|
104
120
|
app.update_state(config, input_dict)
|
105
121
|
prompt = """
|
106
|
-
Adalimumab is a fully human monoclonal antibody (IgG1)
|
122
|
+
Adalimumab is a fully human monoclonal antibody (IgG1)
|
107
123
|
that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
|
108
124
|
|
109
125
|
I would like to get evidence from the knowledge graph about the mechanism of actions related to
|
110
|
-
Adalimumab in treating inflammatory bowel disease
|
126
|
+
Adalimumab in treating inflammatory bowel disease
|
111
127
|
(IBD). Please follow these steps:
|
112
128
|
- Extract a subgraph from the PrimeKG that contains information about Adalimumab.
|
113
129
|
- Summarize the extracted subgraph.
|
@@ -116,21 +132,31 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
|
|
116
132
|
Please set the extraction name for the extraction process as `subkg_12345`.
|
117
133
|
"""
|
118
134
|
|
119
|
-
with
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
135
|
+
with (
|
136
|
+
patch(
|
137
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
138
|
+
"milvus_multimodal_subgraph_extraction.Collection",
|
139
|
+
side_effect=mock_milvus_collection,
|
140
|
+
),
|
141
|
+
patch(
|
142
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
143
|
+
"milvus_multimodal_subgraph_extraction.MultimodalPCSTPruning"
|
144
|
+
) as mock_pcst,
|
145
|
+
patch("pymilvus.connections") as mock_connections,
|
146
|
+
patch(
|
147
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
148
|
+
"milvus_multimodal_subgraph_extraction.hydra.initialize"
|
149
|
+
),
|
150
|
+
patch(
|
151
|
+
"aiagents4pharma.talk2knowledgegraphs.tools."
|
152
|
+
"milvus_multimodal_subgraph_extraction.hydra.compose"
|
153
|
+
) as mock_compose,
|
154
|
+
):
|
129
155
|
mock_connections.has_connection.return_value = True
|
130
156
|
mock_pcst_instance = MagicMock()
|
131
157
|
mock_pcst_instance.extract_subgraph.return_value = {
|
132
158
|
"nodes": pd.Series([0, 1]),
|
133
|
-
"edges": pd.Series([0])
|
159
|
+
"edges": pd.Series([0]),
|
134
160
|
}
|
135
161
|
mock_pcst.return_value = mock_pcst_instance
|
136
162
|
mock_cfg = MagicMock()
|
@@ -144,8 +170,7 @@ def test_t2kg_agent_openai_milvus_mock(input_dict):
|
|
144
170
|
mock_cfg.node_colors_dict = {"drug": "blue", "gene/protein": "red"}
|
145
171
|
mock_compose.return_value = MagicMock()
|
146
172
|
mock_compose.return_value.tools.multimodal_subgraph_extraction = mock_cfg
|
147
|
-
mock_compose.return_value.tools.subgraph_summarization
|
148
|
-
prompt_subgraph_summarization = (
|
173
|
+
mock_compose.return_value.tools.subgraph_summarization.prompt_subgraph_summarization = (
|
149
174
|
"Summarize the following subgraph: {textualized_subgraph}"
|
150
175
|
)
|
151
176
|
|
@@ -4,7 +4,9 @@ Test cases for datasets/primekg_loader.py
|
|
4
4
|
|
5
5
|
import os
|
6
6
|
import shutil
|
7
|
+
|
7
8
|
import pytest
|
9
|
+
|
8
10
|
from ..datasets.biobridge_primekg import BioBridgePrimeKG
|
9
11
|
|
10
12
|
# Remove the data folder for testing if it exists
|
@@ -12,13 +14,14 @@ PRIMEKG_LOCAL_DIR = "../data/primekg_test/"
|
|
12
14
|
LOCAL_DIR = "../data/biobridge_primekg_test/"
|
13
15
|
shutil.rmtree(LOCAL_DIR, ignore_errors=True)
|
14
16
|
|
17
|
+
|
15
18
|
@pytest.fixture(name="biobridge_primekg")
|
16
19
|
def biobridge_primekg_fixture():
|
17
20
|
"""
|
18
21
|
Fixture for creating an instance of PrimeKG.
|
19
22
|
"""
|
20
|
-
return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR,
|
21
|
-
|
23
|
+
return BioBridgePrimeKG(primekg_dir=PRIMEKG_LOCAL_DIR, local_dir=LOCAL_DIR)
|
24
|
+
|
22
25
|
|
23
26
|
def test_download_primekg(biobridge_primekg):
|
24
27
|
"""
|
@@ -39,8 +42,7 @@ def test_download_primekg(biobridge_primekg):
|
|
39
42
|
assert os.path.exists(biobridge_primekg.local_dir)
|
40
43
|
# Check if downloaded and processed files exist
|
41
44
|
# PrimeKG files
|
42
|
-
files = ["nodes.tab", "primekg_nodes.tsv.gz",
|
43
|
-
"edges.csv", "primekg_edges.tsv.gz"]
|
45
|
+
files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
|
44
46
|
for file in files:
|
45
47
|
path = f"{biobridge_primekg.primekg_dir}/{file}"
|
46
48
|
assert os.path.exists(path)
|
@@ -54,7 +56,7 @@ def test_download_primekg(biobridge_primekg):
|
|
54
56
|
"bp.pkl",
|
55
57
|
"drug.pkl",
|
56
58
|
"disease.pkl",
|
57
|
-
"embedding_dict.pkl"
|
59
|
+
"embedding_dict.pkl",
|
58
60
|
]
|
59
61
|
for file in files:
|
60
62
|
path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
|
@@ -89,9 +91,9 @@ def test_download_primekg(biobridge_primekg):
|
|
89
91
|
# Check processed BioBridge data config
|
90
92
|
assert biobridge_data_config is not None
|
91
93
|
assert len(biobridge_data_config) > 0
|
92
|
-
assert len(biobridge_data_config[
|
93
|
-
assert len(biobridge_data_config[
|
94
|
-
assert len(biobridge_data_config[
|
94
|
+
assert len(biobridge_data_config["node_type"]) == 10
|
95
|
+
assert len(biobridge_data_config["relation_type"]) == 18
|
96
|
+
assert len(biobridge_data_config["emb_dim"]) == 6
|
95
97
|
# Check processed BioBridge embeddings
|
96
98
|
assert biobridge_emb_dict is not None
|
97
99
|
assert len(biobridge_emb_dict) > 0
|
@@ -100,24 +102,26 @@ def test_download_primekg(biobridge_primekg):
|
|
100
102
|
assert biobridge_triplets is not None
|
101
103
|
assert len(biobridge_triplets) > 0
|
102
104
|
assert biobridge_triplets.shape[0] == 3904610
|
103
|
-
assert list(biobridge_splits.keys()) == [
|
104
|
-
assert len(biobridge_splits[
|
105
|
-
assert len(biobridge_splits[
|
106
|
-
assert len(biobridge_splits[
|
107
|
-
assert len(biobridge_splits[
|
105
|
+
assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
|
106
|
+
assert len(biobridge_splits["train"]) == 3510930
|
107
|
+
assert len(biobridge_splits["node_train"]) == 76486
|
108
|
+
assert len(biobridge_splits["test"]) == 393680
|
109
|
+
assert len(biobridge_splits["node_test"]) == 8495
|
108
110
|
# Check node info dictionary
|
109
|
-
assert list(biobridge_node_info.keys()) == [
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
assert len(biobridge_node_info[
|
118
|
-
assert len(biobridge_node_info[
|
119
|
-
assert len(biobridge_node_info[
|
120
|
-
assert len(biobridge_node_info[
|
111
|
+
assert list(biobridge_node_info.keys()) == [
|
112
|
+
"gene/protein",
|
113
|
+
"molecular_function",
|
114
|
+
"cellular_component",
|
115
|
+
"biological_process",
|
116
|
+
"drug",
|
117
|
+
"disease",
|
118
|
+
]
|
119
|
+
assert len(biobridge_node_info["gene/protein"]) == 19162
|
120
|
+
assert len(biobridge_node_info["molecular_function"]) == 10966
|
121
|
+
assert len(biobridge_node_info["cellular_component"]) == 4013
|
122
|
+
assert len(biobridge_node_info["biological_process"]) == 27478
|
123
|
+
assert len(biobridge_node_info["drug"]) == 6948
|
124
|
+
assert len(biobridge_node_info["disease"]) == 44133
|
121
125
|
|
122
126
|
|
123
127
|
def test_load_existing_primekg(biobridge_primekg):
|
@@ -139,8 +143,7 @@ def test_load_existing_primekg(biobridge_primekg):
|
|
139
143
|
assert os.path.exists(biobridge_primekg.local_dir)
|
140
144
|
# Check if downloaded and processed files exist
|
141
145
|
# PrimeKG files
|
142
|
-
files = ["nodes.tab", "primekg_nodes.tsv.gz",
|
143
|
-
"edges.csv", "primekg_edges.tsv.gz"]
|
146
|
+
files = ["nodes.tab", "primekg_nodes.tsv.gz", "edges.csv", "primekg_edges.tsv.gz"]
|
144
147
|
for file in files:
|
145
148
|
path = f"{biobridge_primekg.primekg_dir}/{file}"
|
146
149
|
assert os.path.exists(path)
|
@@ -154,7 +157,7 @@ def test_load_existing_primekg(biobridge_primekg):
|
|
154
157
|
"bp.pkl",
|
155
158
|
"drug.pkl",
|
156
159
|
"disease.pkl",
|
157
|
-
"embedding_dict.pkl"
|
160
|
+
"embedding_dict.pkl",
|
158
161
|
]
|
159
162
|
for file in files:
|
160
163
|
path = f"{biobridge_primekg.local_dir}/embeddings/{file}"
|
@@ -189,9 +192,9 @@ def test_load_existing_primekg(biobridge_primekg):
|
|
189
192
|
# Check processed BioBridge data config
|
190
193
|
assert biobridge_data_config is not None
|
191
194
|
assert len(biobridge_data_config) > 0
|
192
|
-
assert len(biobridge_data_config[
|
193
|
-
assert len(biobridge_data_config[
|
194
|
-
assert len(biobridge_data_config[
|
195
|
+
assert len(biobridge_data_config["node_type"]) == 10
|
196
|
+
assert len(biobridge_data_config["relation_type"]) == 18
|
197
|
+
assert len(biobridge_data_config["emb_dim"]) == 6
|
195
198
|
# Check processed BioBridge embeddings
|
196
199
|
assert biobridge_emb_dict is not None
|
197
200
|
assert len(biobridge_emb_dict) > 0
|
@@ -200,24 +203,27 @@ def test_load_existing_primekg(biobridge_primekg):
|
|
200
203
|
assert biobridge_triplets is not None
|
201
204
|
assert len(biobridge_triplets) > 0
|
202
205
|
assert biobridge_triplets.shape[0] == 3904610
|
203
|
-
assert list(biobridge_splits.keys()) == [
|
204
|
-
assert len(biobridge_splits[
|
205
|
-
assert len(biobridge_splits[
|
206
|
-
assert len(biobridge_splits[
|
207
|
-
assert len(biobridge_splits[
|
206
|
+
assert list(biobridge_splits.keys()) == ["train", "node_train", "test", "node_test"]
|
207
|
+
assert len(biobridge_splits["train"]) == 3510930
|
208
|
+
assert len(biobridge_splits["node_train"]) == 76486
|
209
|
+
assert len(biobridge_splits["test"]) == 393680
|
210
|
+
assert len(biobridge_splits["node_test"]) == 8495
|
208
211
|
# Check node info dictionary
|
209
|
-
assert list(biobridge_node_info.keys()) == [
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
assert len(biobridge_node_info[
|
218
|
-
assert len(biobridge_node_info[
|
219
|
-
assert len(biobridge_node_info[
|
220
|
-
assert len(biobridge_node_info[
|
212
|
+
assert list(biobridge_node_info.keys()) == [
|
213
|
+
"gene/protein",
|
214
|
+
"molecular_function",
|
215
|
+
"cellular_component",
|
216
|
+
"biological_process",
|
217
|
+
"drug",
|
218
|
+
"disease",
|
219
|
+
]
|
220
|
+
assert len(biobridge_node_info["gene/protein"]) == 19162
|
221
|
+
assert len(biobridge_node_info["molecular_function"]) == 10966
|
222
|
+
assert len(biobridge_node_info["cellular_component"]) == 4013
|
223
|
+
assert len(biobridge_node_info["biological_process"]) == 27478
|
224
|
+
assert len(biobridge_node_info["drug"]) == 6948
|
225
|
+
assert len(biobridge_node_info["disease"]) == 44133
|
226
|
+
|
221
227
|
|
222
228
|
# def test_load_existing_primekg_with_negative_triplets(biobridge_primekg):
|
223
229
|
# """
|