aiagents4pharma 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +11 -0
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +133 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +6 -0
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +70 -0
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +29 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2aiagents4pharma/configs/app/frontend/default.yaml +102 -0
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +4 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +154 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +5 -0
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +18 -0
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +3 -0
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +312 -0
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/agents/__init__.py +6 -0
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +104 -0
- aiagents4pharma/talk2biomodels/api/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/api/ols.py +75 -0
- aiagents4pharma/talk2biomodels/api/uniprot.py +36 -0
- aiagents4pharma/talk2biomodels/configs/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +14 -0
- aiagents4pharma/talk2biomodels/configs/app/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/__init__.py +0 -0
- aiagents4pharma/talk2biomodels/configs/app/frontend/default.yaml +72 -0
- aiagents4pharma/talk2biomodels/configs/config.yaml +7 -0
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +30 -0
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +8 -0
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +8 -0
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +5 -0
- aiagents4pharma/talk2biomodels/models/basico_model.py +125 -0
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +60 -0
- aiagents4pharma/talk2biomodels/states/__init__.py +6 -0
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +49 -0
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +3 -0
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +31 -0
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +42 -0
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +67 -0
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +190 -0
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +92 -0
- aiagents4pharma/talk2biomodels/tests/test_integration.py +116 -0
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +35 -0
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +71 -0
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +184 -0
- aiagents4pharma/talk2biomodels/tests/test_save_model.py +47 -0
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +35 -0
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +44 -0
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +86 -0
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +67 -0
- aiagents4pharma/talk2biomodels/tools/__init__.py +17 -0
- aiagents4pharma/talk2biomodels/tools/ask_question.py +125 -0
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +165 -0
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +342 -0
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +159 -0
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +134 -0
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +44 -0
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +310 -0
- aiagents4pharma/talk2biomodels/tools/query_article.py +64 -0
- aiagents4pharma/talk2biomodels/tools/save_model.py +98 -0
- aiagents4pharma/talk2biomodels/tools/search_models.py +96 -0
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +137 -0
- aiagents4pharma/talk2biomodels/tools/steady_state.py +187 -0
- aiagents4pharma/talk2biomodels/tools/utils.py +23 -0
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +5 -0
- aiagents4pharma/talk2cells/agents/__init__.py +6 -0
- aiagents4pharma/talk2cells/agents/scp_agent.py +87 -0
- aiagents4pharma/talk2cells/states/__init__.py +6 -0
- aiagents4pharma/talk2cells/states/state_talk2cells.py +15 -0
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +22 -0
- aiagents4pharma/talk2cells/tools/__init__.py +6 -0
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +6 -0
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +27 -0
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +131 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +99 -0
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +62 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +79 -0
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +13 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +24 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py +0 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +33 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +43 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +9 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/default.yaml +61 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +3 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +6 -0
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +5 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +607 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +25 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +212 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +210 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +180 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +165 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +886 -0
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +40 -0
- aiagents4pharma/talk2knowledgegraphs/tests/__init__.py +0 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +318 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +248 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +33 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +86 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +125 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +257 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +1444 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +159 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +152 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +201 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_database_milvus_connection_manager.py +812 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +51 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +49 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +59 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +63 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_sentencetransformer.py +47 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +40 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +94 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +70 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +45 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +44 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +48 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +759 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +123 -0
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +11 -0
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +138 -0
- aiagents4pharma/talk2knowledgegraphs/tools/load_arguments.py +22 -0
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +965 -0
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +374 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +291 -0
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +123 -0
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/database/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/database/milvus_connection_manager.py +586 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +81 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +111 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +54 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +87 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +73 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +12 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +37 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +129 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +89 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +78 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +71 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +98 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +762 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +298 -0
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +229 -0
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +67 -0
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +104 -0
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/__init__.py +7 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +13 -0
- aiagents4pharma/talk2scholars/agents/main_agent.py +89 -0
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +96 -0
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +101 -0
- aiagents4pharma/talk2scholars/agents/s2_agent.py +135 -0
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +127 -0
- aiagents4pharma/talk2scholars/configs/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +52 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +44 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +19 -0
- aiagents4pharma/talk2scholars/configs/app/__init__.py +7 -0
- aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +72 -0
- aiagents4pharma/talk2scholars/configs/config.yaml +16 -0
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +21 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/paper_download/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/paper_download/default.yaml +124 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +62 -0
- aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml +12 -0
- aiagents4pharma/talk2scholars/configs/tools/search/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +26 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +57 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/__init__.py +7 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +98 -0
- aiagents4pharma/talk2scholars/tests/__init__.py +3 -0
- aiagents4pharma/talk2scholars/tests/test_agents_main_agent.py +256 -0
- aiagents4pharma/talk2scholars/tests/test_agents_paper_agents_download_agent.py +139 -0
- aiagents4pharma/talk2scholars/tests/test_agents_pdf_agent.py +114 -0
- aiagents4pharma/talk2scholars/tests/test_agents_s2_agent.py +198 -0
- aiagents4pharma/talk2scholars/tests/test_agents_zotero_agent.py +160 -0
- aiagents4pharma/talk2scholars/tests/test_s2_tools_display_dataframe.py +91 -0
- aiagents4pharma/talk2scholars/tests/test_s2_tools_query_dataframe.py +191 -0
- aiagents4pharma/talk2scholars/tests/test_states_state.py +38 -0
- aiagents4pharma/talk2scholars/tests/test_tools_paper_downloader.py +507 -0
- aiagents4pharma/talk2scholars/tests/test_tools_question_and_answer_tool.py +105 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_multi.py +307 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_retrieve.py +67 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_search.py +286 -0
- aiagents4pharma/talk2scholars/tests/test_tools_s2_single.py +298 -0
- aiagents4pharma/talk2scholars/tests/test_utils_arxiv_downloader.py +469 -0
- aiagents4pharma/talk2scholars/tests/test_utils_base_paper_downloader.py +598 -0
- aiagents4pharma/talk2scholars/tests/test_utils_biorxiv_downloader.py +669 -0
- aiagents4pharma/talk2scholars/tests/test_utils_medrxiv_downloader.py +500 -0
- aiagents4pharma/talk2scholars/tests/test_utils_nvidia_nim_reranker.py +117 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_answer_formatter.py +67 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_batch_processor.py +92 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_collection_manager.py +173 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_document_processor.py +68 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_generate_answer.py +72 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_gpu_detection.py +129 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_paper_loader.py +116 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py +88 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py +190 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py +159 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py +121 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_store.py +406 -0
- aiagents4pharma/talk2scholars/tests/test_utils_pubmed_downloader.py +1007 -0
- aiagents4pharma/talk2scholars/tests/test_utils_read_helper_utils.py +106 -0
- aiagents4pharma/talk2scholars/tests/test_utils_s2_utils_ext_ids.py +403 -0
- aiagents4pharma/talk2scholars/tests/test_utils_tool_helper_utils.py +85 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_human_in_the_loop.py +266 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_path.py +496 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_pdf_downloader_utils.py +46 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_read.py +743 -0
- aiagents4pharma/talk2scholars/tests/test_utils_zotero_write.py +151 -0
- aiagents4pharma/talk2scholars/tools/__init__.py +9 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +12 -0
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +442 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +22 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +207 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +336 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +313 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +196 -0
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +323 -0
- aiagents4pharma/talk2scholars/tools/pdf/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +170 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +37 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +198 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +59 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +150 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +113 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +197 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +86 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +150 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +327 -0
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +21 -0
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +110 -0
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +111 -0
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +233 -0
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +128 -0
- aiagents4pharma/talk2scholars/tools/s2/search.py +101 -0
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +102 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +5 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +223 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +205 -0
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +216 -0
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +7 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +270 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +74 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +194 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +180 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +133 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +105 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +162 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +91 -0
- aiagents4pharma-0.0.0.dist-info/METADATA +335 -0
- aiagents4pharma-0.0.0.dist-info/RECORD +336 -0
- aiagents4pharma-0.0.0.dist-info/WHEEL +4 -0
- aiagents4pharma-0.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Tests for the PDF batch processor module."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor import (
|
|
8
|
+
add_papers_batch,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture(name="args_fixture")
|
|
13
|
+
def _args_fixture():
|
|
14
|
+
"""Provides common arguments for tests."""
|
|
15
|
+
return {
|
|
16
|
+
"vector_store": MagicMock(),
|
|
17
|
+
"loaded_papers": set(),
|
|
18
|
+
"paper_metadata": {},
|
|
19
|
+
"documents": {},
|
|
20
|
+
"config": {"param": "value"},
|
|
21
|
+
"metadata_fields": ["Title", "Author"],
|
|
22
|
+
"has_gpu": False,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
|
27
|
+
def test_no_papers_to_add(mock_loader, args_fixture):
|
|
28
|
+
"""Test case where no papers are provided to add."""
|
|
29
|
+
add_papers_batch(papers_to_add=[], **args_fixture)
|
|
30
|
+
mock_loader.assert_not_called()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
|
34
|
+
def test_all_papers_already_loaded(mock_loader, args_fixture):
|
|
35
|
+
"""Test case where all papers are already loaded."""
|
|
36
|
+
args_fixture["loaded_papers"].update(["p1", "p2"])
|
|
37
|
+
add_papers_batch(
|
|
38
|
+
papers_to_add=[("p1", "url1", {}), ("p2", "url2", {})],
|
|
39
|
+
**args_fixture,
|
|
40
|
+
)
|
|
41
|
+
mock_loader.assert_not_called()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
|
45
|
+
def test_successful_batch_embedding(mock_loader, args_fixture):
|
|
46
|
+
"""Test case where papers are successfully loaded and embedded."""
|
|
47
|
+
mock_loader.return_value = [
|
|
48
|
+
MagicMock(page_content="Page 1"),
|
|
49
|
+
MagicMock(page_content="Page 2"),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
mock_collection = MagicMock()
|
|
53
|
+
mock_collection.num_entities = 2
|
|
54
|
+
mock_collection.query.return_value = [{"paper_id": "p1"}]
|
|
55
|
+
args_fixture["vector_store"].col = mock_collection
|
|
56
|
+
|
|
57
|
+
add_papers_batch(
|
|
58
|
+
papers_to_add=[("p1", "url1", {"Title": "Paper One"})],
|
|
59
|
+
**args_fixture,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
assert "p1" in args_fixture["paper_metadata"]
|
|
63
|
+
assert "p1" in args_fixture["loaded_papers"]
|
|
64
|
+
args_fixture["vector_store"].add_documents.assert_called_once()
|
|
65
|
+
mock_collection.flush.assert_called()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
|
69
|
+
def test_empty_chunks_after_loading(mock_loader, args_fixture):
|
|
70
|
+
"""Test case where no chunks are returned after loading PDF."""
|
|
71
|
+
mock_loader.return_value = []
|
|
72
|
+
|
|
73
|
+
add_papers_batch(papers_to_add=[("p1", "url1", {})], **args_fixture)
|
|
74
|
+
|
|
75
|
+
args_fixture["vector_store"].add_documents.assert_not_called()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
|
79
|
+
def test_vector_store_insert_failure(mock_loader, args_fixture):
|
|
80
|
+
"""Test case where vector store insertion fails."""
|
|
81
|
+
mock_loader.return_value = [MagicMock(page_content="page")]
|
|
82
|
+
|
|
83
|
+
def raise_error(*_, **__):
|
|
84
|
+
raise RuntimeError("Vector store failed")
|
|
85
|
+
|
|
86
|
+
args_fixture["vector_store"].add_documents.side_effect = raise_error
|
|
87
|
+
|
|
88
|
+
mock_collection = MagicMock()
|
|
89
|
+
args_fixture["vector_store"].col = mock_collection
|
|
90
|
+
|
|
91
|
+
with pytest.raises(RuntimeError, match="Vector store failed"):
|
|
92
|
+
add_papers_batch(papers_to_add=[("p1", "url1", {})], **args_fixture)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""collection_manager for managing Milvus collections for PDF chunks."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from unittest.mock import MagicMock, patch
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils import collection_manager
|
|
9
|
+
|
|
10
|
+
# -- Fixtures --
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def config_mock():
|
|
15
|
+
"""Dataclass config fixture to simulate Milvus config."""
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class MilvusConfig:
|
|
19
|
+
"""Simulated Milvus inner config."""
|
|
20
|
+
|
|
21
|
+
embedding_dim: int = 768
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Config:
|
|
25
|
+
"""Simulated outer config."""
|
|
26
|
+
|
|
27
|
+
milvus: MilvusConfig = field(default_factory=MilvusConfig)
|
|
28
|
+
|
|
29
|
+
return Config()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def index_params():
|
|
34
|
+
"""Fixture to provide index parameters for tests."""
|
|
35
|
+
return {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# -- Safe collection_cache access --
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def set_collection_cache(key, value):
|
|
42
|
+
"""Set a mocked collection into the cache without replacing the whole attribute."""
|
|
43
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
|
44
|
+
if cache is None:
|
|
45
|
+
cache = {}
|
|
46
|
+
# Still need to attach it once if it doesn't exist
|
|
47
|
+
object.__setattr__(collection_manager, "_collection_cache", cache)
|
|
48
|
+
cache[key] = value
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def clear_collection_cache(key):
|
|
52
|
+
"""Remove a mocked collection from the cache."""
|
|
53
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
|
54
|
+
if cache is not None:
|
|
55
|
+
cache.pop(key, None)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# -- Tests --
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_cached_collection_returned(request):
|
|
62
|
+
"""Check if cached collection is returned."""
|
|
63
|
+
config = request.getfixturevalue("config_mock")
|
|
64
|
+
index = request.getfixturevalue("index_params")
|
|
65
|
+
mock_collection = MagicMock()
|
|
66
|
+
collection_name = "test_cached"
|
|
67
|
+
|
|
68
|
+
set_collection_cache(collection_name, mock_collection)
|
|
69
|
+
|
|
70
|
+
result = collection_manager.ensure_collection_exists(
|
|
71
|
+
collection_name, config, index, has_gpu=False
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
assert result == mock_collection
|
|
75
|
+
clear_collection_cache(collection_name)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.Collection")
|
|
79
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.utility")
|
|
80
|
+
def test_create_new_collection(mock_utility, mock_collection_cls, request):
|
|
81
|
+
"""Check if new collection is created when it does not exist."""
|
|
82
|
+
config = request.getfixturevalue("config_mock")
|
|
83
|
+
index = request.getfixturevalue("index_params")
|
|
84
|
+
mock_utility.list_collections.return_value = []
|
|
85
|
+
|
|
86
|
+
mock_collection = MagicMock()
|
|
87
|
+
mock_collection_cls.return_value = mock_collection
|
|
88
|
+
mock_collection.indexes = [MagicMock(field_name="embedding")]
|
|
89
|
+
mock_collection.num_entities = 5
|
|
90
|
+
|
|
91
|
+
result = collection_manager.ensure_collection_exists(
|
|
92
|
+
"new_collection", config, index, has_gpu=True
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
assert mock_collection.create_index.called
|
|
96
|
+
assert mock_collection.load.called
|
|
97
|
+
assert result == mock_collection
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.Collection")
|
|
101
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.utility")
|
|
102
|
+
def test_load_existing_collection(mock_utility, mock_collection_cls, request):
|
|
103
|
+
"""Test loading an existing collection."""
|
|
104
|
+
config = request.getfixturevalue("config_mock")
|
|
105
|
+
index = request.getfixturevalue("index_params")
|
|
106
|
+
mock_utility.list_collections.return_value = ["existing_collection"]
|
|
107
|
+
|
|
108
|
+
mock_collection = MagicMock()
|
|
109
|
+
mock_collection_cls.return_value = mock_collection
|
|
110
|
+
mock_collection.indexes = []
|
|
111
|
+
mock_collection.num_entities = 0
|
|
112
|
+
|
|
113
|
+
result = collection_manager.ensure_collection_exists(
|
|
114
|
+
"existing_collection", config, index, has_gpu=False
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
mock_collection.load.assert_called_once()
|
|
118
|
+
assert result == mock_collection
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.Collection")
|
|
122
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.utility")
|
|
123
|
+
def test_debug_collection_state_failure(mock_utility, mock_collection_cls, request):
|
|
124
|
+
"""debug_collection_state should log but not raise on failure."""
|
|
125
|
+
config = request.getfixturevalue("config_mock")
|
|
126
|
+
index = request.getfixturevalue("index_params")
|
|
127
|
+
mock_utility.list_collections.return_value = ["bad_collection"]
|
|
128
|
+
|
|
129
|
+
mock_collection = MagicMock()
|
|
130
|
+
mock_collection_cls.return_value = mock_collection
|
|
131
|
+
mock_collection.indexes = []
|
|
132
|
+
mock_collection.num_entities = 10
|
|
133
|
+
|
|
134
|
+
mock_collection.schema = property(lambda _: (_ for _ in ()).throw(Exception("bad schema")))
|
|
135
|
+
|
|
136
|
+
result = collection_manager.ensure_collection_exists(
|
|
137
|
+
"bad_collection", config, index, has_gpu=True
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
assert result == mock_collection
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.Collection")
|
|
144
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.collection_manager.utility")
|
|
145
|
+
def test_ensure_collection_exception(mock_utility, mock_collection_cls, request):
|
|
146
|
+
"""ensure_collection_exists should raise on utility failure."""
|
|
147
|
+
config = request.getfixturevalue("config_mock")
|
|
148
|
+
index = request.getfixturevalue("index_params")
|
|
149
|
+
mock_utility.list_collections.side_effect = RuntimeError("milvus failure")
|
|
150
|
+
mock_collection_cls.return_value = MagicMock()
|
|
151
|
+
|
|
152
|
+
with pytest.raises(RuntimeError, match="milvus failure"):
|
|
153
|
+
collection_manager.ensure_collection_exists("fail_collection", config, index, has_gpu=False)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_set_collection_cache_initializes_when_missing(monkeypatch):
|
|
157
|
+
"""Ensure set_collection_cache initializes the cache when attribute is absent."""
|
|
158
|
+
# Remove the attribute if present (avoids W0212 and stays lint-clean)
|
|
159
|
+
monkeypatch.delattr(collection_manager, "_collection_cache", raising=False)
|
|
160
|
+
|
|
161
|
+
key = "init_case"
|
|
162
|
+
val = MagicMock()
|
|
163
|
+
|
|
164
|
+
# This should go through the None-branch and attach the cache via object.__setattr__
|
|
165
|
+
set_collection_cache(key, val)
|
|
166
|
+
|
|
167
|
+
# Verify cache got created and populated
|
|
168
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
|
169
|
+
assert isinstance(cache, dict)
|
|
170
|
+
assert cache.get(key) is val
|
|
171
|
+
|
|
172
|
+
# Cleanup
|
|
173
|
+
clear_collection_cache(key)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Unit tests for PDF document processing utilities."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor import (
|
|
8
|
+
load_and_split_pdf,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture(name="base_args_params")
|
|
13
|
+
def _base_args_params():
|
|
14
|
+
"""base_args_params fixture to provide common arguments for tests."""
|
|
15
|
+
return {
|
|
16
|
+
"paper_id": "P123",
|
|
17
|
+
"pdf_url": "mock/path/to/paper.pdf",
|
|
18
|
+
"paper_metadata": {"Title": "Test Paper", "Author": "A. Researcher"},
|
|
19
|
+
"config": type("Config", (), {"chunk_size": 1000, "chunk_overlap": 200})(),
|
|
20
|
+
"metadata_fields": ["Author"],
|
|
21
|
+
"documents_dict": {},
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor.PyPDFLoader")
|
|
26
|
+
@patch(
|
|
27
|
+
"aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor."
|
|
28
|
+
"RecursiveCharacterTextSplitter"
|
|
29
|
+
)
|
|
30
|
+
def test_load_and_split_pdf_success(mock_splitter_cls, mock_loader_cls, base_args_params):
|
|
31
|
+
"""load_and_split_pdf should load and split PDF correctly."""
|
|
32
|
+
mock_doc = MagicMock()
|
|
33
|
+
mock_doc.metadata = {"page": 1}
|
|
34
|
+
mock_loader = MagicMock()
|
|
35
|
+
mock_loader.load.return_value = [mock_doc]
|
|
36
|
+
mock_loader_cls.return_value = mock_loader
|
|
37
|
+
|
|
38
|
+
mock_splitter = MagicMock()
|
|
39
|
+
chunk1 = MagicMock()
|
|
40
|
+
chunk1.metadata = {"page": 1}
|
|
41
|
+
mock_splitter.split_documents.return_value = [chunk1]
|
|
42
|
+
mock_splitter_cls.return_value = mock_splitter
|
|
43
|
+
|
|
44
|
+
chunks = load_and_split_pdf(**base_args_params)
|
|
45
|
+
|
|
46
|
+
assert len(chunks) == 1
|
|
47
|
+
assert "P123_0" in base_args_params["documents_dict"]
|
|
48
|
+
stored_chunk = base_args_params["documents_dict"]["P123_0"]
|
|
49
|
+
assert stored_chunk.metadata["paper_id"] == "P123"
|
|
50
|
+
assert stored_chunk.metadata["title"] == "Test Paper"
|
|
51
|
+
assert stored_chunk.metadata["chunk_id"] == 0
|
|
52
|
+
assert stored_chunk.metadata["page"] == 1
|
|
53
|
+
assert stored_chunk.metadata["source"] == base_args_params["pdf_url"]
|
|
54
|
+
assert stored_chunk.metadata["Author"] == "A. Researcher"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor.PyPDFLoader")
|
|
58
|
+
def test_load_and_split_pdf_raises_if_config_missing(mock_loader_cls, base_args_params):
|
|
59
|
+
"""load_and_split_pdf should raise ValueError if config is None."""
|
|
60
|
+
mock_loader = MagicMock()
|
|
61
|
+
mock_loader.load.return_value = [MagicMock()]
|
|
62
|
+
mock_loader_cls.return_value = mock_loader
|
|
63
|
+
|
|
64
|
+
base_args_params["config"] = None
|
|
65
|
+
with pytest.raises(
|
|
66
|
+
ValueError, match="Configuration is required for text splitting in Vectorstore."
|
|
67
|
+
):
|
|
68
|
+
load_and_split_pdf(**base_args_params)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""generate_answer tests for the PDF tool"""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer import (
|
|
8
|
+
_build_context_and_sources,
|
|
9
|
+
generate_answer,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture(name="chunks_fixture")
|
|
14
|
+
def _chunks_fixture():
|
|
15
|
+
"""Fixture providing sample document chunks."""
|
|
16
|
+
doc1 = MagicMock()
|
|
17
|
+
doc1.page_content = "This is chunk one."
|
|
18
|
+
doc1.metadata = {"paper_id": "P1", "title": "Title 1", "page": 1}
|
|
19
|
+
|
|
20
|
+
doc2 = MagicMock()
|
|
21
|
+
doc2.page_content = "This is chunk two."
|
|
22
|
+
doc2.metadata = {"paper_id": "P1", "title": "Title 1", "page": 2}
|
|
23
|
+
|
|
24
|
+
doc3 = MagicMock()
|
|
25
|
+
doc3.page_content = "This is chunk three."
|
|
26
|
+
doc3.metadata = {"paper_id": "P2", "title": "Title 2", "page": 1}
|
|
27
|
+
|
|
28
|
+
return [doc1, doc2, doc3]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_build_context_and_sources_formatting(chunks_fixture):
|
|
32
|
+
"""_build_context_and_sources should format context and sources correctly."""
|
|
33
|
+
context, sources = _build_context_and_sources(chunks_fixture)
|
|
34
|
+
|
|
35
|
+
assert "[Document 1] From: 'Title 1' (ID: P1)" in context
|
|
36
|
+
assert "Page 1: This is chunk one." in context
|
|
37
|
+
assert "Page 2: This is chunk two." in context
|
|
38
|
+
assert "[Document 2] From: 'Title 2' (ID: P2)" in context
|
|
39
|
+
assert "Page 1: This is chunk three." in context
|
|
40
|
+
assert sources == {"P1", "P2"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_generate_answer_success(chunks_fixture):
|
|
44
|
+
"""generate_answer should return formatted answer and sources."""
|
|
45
|
+
mock_llm = MagicMock()
|
|
46
|
+
mock_llm.invoke.return_value.content = "The answer is XYZ."
|
|
47
|
+
|
|
48
|
+
config = {
|
|
49
|
+
"prompt_template": "Answer the question based on the context."
|
|
50
|
+
"\n\n{context}\n\nQ: {question}\nA:"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
result = generate_answer("What is the result?", chunks_fixture, mock_llm, config)
|
|
54
|
+
|
|
55
|
+
assert result["output_text"] == "The answer is XYZ."
|
|
56
|
+
assert len(result["sources"]) == 3
|
|
57
|
+
assert result["num_sources"] == 3
|
|
58
|
+
assert set(result["papers_used"]) == {"P1", "P2"}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_generate_answer_raises_for_none_config(chunks_fixture):
|
|
62
|
+
"""generate_answer should raise ValueError for None config."""
|
|
63
|
+
mock_llm = MagicMock()
|
|
64
|
+
with pytest.raises(ValueError, match="Configuration for generate_answer is required."):
|
|
65
|
+
generate_answer("Why?", chunks_fixture, mock_llm, config=None)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_generate_answer_raises_for_missing_template(chunks_fixture):
|
|
69
|
+
"""generate_answer should raise ValueError for missing prompt_template in config."""
|
|
70
|
+
mock_llm = MagicMock()
|
|
71
|
+
with pytest.raises(ValueError, match="The prompt_template is missing from the configuration."):
|
|
72
|
+
generate_answer("Why?", chunks_fixture, mock_llm, config={})
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""gpu detection and index configuration tests."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection import (
|
|
8
|
+
detect_nvidia_gpu,
|
|
9
|
+
get_optimal_index_config,
|
|
10
|
+
log_index_configuration,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# === detect_nvidia_gpu ===
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_detect_nvidia_gpu_force_cpu_from_config():
|
|
17
|
+
"""detect_nvidia_gpu should return False if force_cpu_mode is set."""
|
|
18
|
+
config = SimpleNamespace(gpu_detection=SimpleNamespace(force_cpu_mode=True))
|
|
19
|
+
assert detect_nvidia_gpu(config) is False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
|
|
23
|
+
def test_detect_nvidia_gpu_success(mock_run):
|
|
24
|
+
"""detect_nvidia_gpu should return True if NVIDIA GPUs are detected."""
|
|
25
|
+
mock_run.return_value = MagicMock(returncode=0, stdout="NVIDIA A100\nNVIDIA RTX 3090")
|
|
26
|
+
|
|
27
|
+
assert detect_nvidia_gpu() is True
|
|
28
|
+
mock_run.assert_called_once()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
|
|
32
|
+
def test_detect_nvidia_gpu_no_output(mock_run):
|
|
33
|
+
"""detect_nvidia_gpu should return False if no GPUs are detected."""
|
|
34
|
+
mock_run.return_value = MagicMock(returncode=0, stdout="")
|
|
35
|
+
|
|
36
|
+
assert detect_nvidia_gpu() is False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# === get_optimal_index_config ===
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_get_optimal_index_config_gpu():
|
|
43
|
+
"""get_optimal_index_config should return GPU_CAGRA for GPU setup."""
|
|
44
|
+
index_params, search_params = get_optimal_index_config(has_gpu=True, embedding_dim=768)
|
|
45
|
+
|
|
46
|
+
assert index_params["index_type"] == "GPU_CAGRA"
|
|
47
|
+
assert "cache_dataset_on_device" in index_params["params"]
|
|
48
|
+
assert search_params["params"]["search_width"] == 16
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_get_optimal_index_config_cpu():
|
|
52
|
+
"""get_optimal_index_config should return IVF_FLAT for CPU setup."""
|
|
53
|
+
index_params, search_params = get_optimal_index_config(has_gpu=False, embedding_dim=768)
|
|
54
|
+
|
|
55
|
+
assert index_params["index_type"] == "IVF_FLAT"
|
|
56
|
+
assert index_params["params"]["nlist"] == 96 # 768 / 8 = 96
|
|
57
|
+
assert search_params["params"]["nprobe"] == 16
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# === log_index_configuration ===
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
|
|
64
|
+
def test_log_index_configuration_logs_all(mock_logger):
|
|
65
|
+
"""log_index_configuration should log all parameters correctly."""
|
|
66
|
+
index_params = {
|
|
67
|
+
"index_type": "IVF_FLAT",
|
|
68
|
+
"metric_type": "COSINE",
|
|
69
|
+
"params": {"nlist": 128},
|
|
70
|
+
}
|
|
71
|
+
search_params = {"metric_type": "COSINE", "params": {"nprobe": 16}}
|
|
72
|
+
|
|
73
|
+
log_index_configuration(index_params, search_params)
|
|
74
|
+
|
|
75
|
+
assert mock_logger.info.call_count >= 5
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_get_optimal_index_config_gpu_without_cosine():
|
|
79
|
+
"""Ensure GPU config defaults to IP when use_cosine is False."""
|
|
80
|
+
index_params, search_params = get_optimal_index_config(
|
|
81
|
+
has_gpu=True, embedding_dim=768, use_cosine=False
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
assert index_params["index_type"] == "GPU_CAGRA"
|
|
85
|
+
assert index_params["metric_type"] == "IP"
|
|
86
|
+
assert search_params["metric_type"] == "IP"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
|
|
90
|
+
def test_log_index_configuration_logs_cosine_simulation_note(mock_logger):
|
|
91
|
+
"""Test GPU_CAGRA COSINE -> IP note is logged properly."""
|
|
92
|
+
index_params = {
|
|
93
|
+
"index_type": "GPU_CAGRA",
|
|
94
|
+
"metric_type": "IP",
|
|
95
|
+
"params": {"itopk_size": 128},
|
|
96
|
+
}
|
|
97
|
+
search_params = {
|
|
98
|
+
"metric_type": "IP",
|
|
99
|
+
"params": {"search_width": 16},
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
log_index_configuration(index_params, search_params, use_cosine=True)
|
|
103
|
+
|
|
104
|
+
log_messages = [str(call.args[0]) for call in mock_logger.info.call_args_list]
|
|
105
|
+
assert any("simulate COSINE for GPU" in msg for msg in log_messages)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
|
|
109
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
|
|
110
|
+
def test_detect_nvidia_gpu_timeout_raises_false(mock_run, mock_logger):
|
|
111
|
+
"""detect_nvidia_gpu should return False and log info on subprocess.TimeoutExpired."""
|
|
112
|
+
# Simulate a timeout
|
|
113
|
+
mock_run.side_effect = subprocess.TimeoutExpired(cmd="nvidia-smi", timeout=10)
|
|
114
|
+
|
|
115
|
+
result = detect_nvidia_gpu()
|
|
116
|
+
assert result is False
|
|
117
|
+
mock_logger.info.assert_called_with("NVIDIA GPU detection failed: %s", mock_run.side_effect)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
|
|
121
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
|
|
122
|
+
def test_detect_nvidia_gpu_file_not_found_raises_false(mock_run, mock_logger):
|
|
123
|
+
"""detect_nvidia_gpu should return False and log info on FileNotFoundError."""
|
|
124
|
+
# Simulate nvidia-smi not installed
|
|
125
|
+
mock_run.side_effect = FileNotFoundError("nvidia-smi not found")
|
|
126
|
+
|
|
127
|
+
result = detect_nvidia_gpu()
|
|
128
|
+
assert result is False
|
|
129
|
+
mock_logger.info.assert_called_with("NVIDIA GPU detection failed: %s", mock_run.side_effect)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""paper_loader tests for the load_all_papers function."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader import (
|
|
8
|
+
load_all_papers,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture
|
|
13
|
+
def articles():
|
|
14
|
+
"""A fixture to provide a sample articles dictionary."""
|
|
15
|
+
return {
|
|
16
|
+
"p1": {"pdf_url": "http://example.com/p1.pdf", "title": "Paper 1"},
|
|
17
|
+
"p2": {"pdf_url": "http://example.com/p2.pdf", "title": "Paper 2"},
|
|
18
|
+
"p3": {"title": "No PDF paper"},
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def mock_vector_store():
|
|
24
|
+
"""Mock vector store fixture."""
|
|
25
|
+
return MagicMock(
|
|
26
|
+
loaded_papers={"p1"},
|
|
27
|
+
paper_metadata={},
|
|
28
|
+
documents={},
|
|
29
|
+
metadata_fields=["title"],
|
|
30
|
+
config={"embedding_batch_size": 1234},
|
|
31
|
+
has_gpu=False,
|
|
32
|
+
vector_store=MagicMock(),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
|
|
37
|
+
def test_all_papers_loaded_returns_early(mock_batch, request):
|
|
38
|
+
"""Test early return when all papers are already loaded."""
|
|
39
|
+
article_data = request.getfixturevalue("articles")
|
|
40
|
+
vector_store = request.getfixturevalue("mock_vector_store")
|
|
41
|
+
vector_store.loaded_papers = set(article_data.keys())
|
|
42
|
+
|
|
43
|
+
load_all_papers(
|
|
44
|
+
vector_store=vector_store,
|
|
45
|
+
articles=article_data,
|
|
46
|
+
call_id="test_call",
|
|
47
|
+
config={"embedding_batch_size": 1000},
|
|
48
|
+
has_gpu=False,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
mock_batch.assert_not_called()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
|
|
55
|
+
def test_skips_papers_without_pdf(mock_batch, request):
|
|
56
|
+
"""Test that papers without PDF URLs are skipped."""
|
|
57
|
+
article_data = request.getfixturevalue("articles")
|
|
58
|
+
vector_store = request.getfixturevalue("mock_vector_store")
|
|
59
|
+
vector_store.loaded_papers = {"p2"} # p1 not loaded, p3 has no pdf
|
|
60
|
+
|
|
61
|
+
load_all_papers(
|
|
62
|
+
vector_store=vector_store,
|
|
63
|
+
articles=article_data,
|
|
64
|
+
call_id="test_call",
|
|
65
|
+
config={"embedding_batch_size": 1000},
|
|
66
|
+
has_gpu=False,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
assert mock_batch.call_count == 1
|
|
70
|
+
call_args = mock_batch.call_args[1]["papers_to_add"]
|
|
71
|
+
assert len(call_args) == 1
|
|
72
|
+
assert call_args[0][0] == "p1"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
|
|
76
|
+
def test_gpu_parameters_used(mock_batch, request):
|
|
77
|
+
"""Test GPU-based parameters are used if has_gpu is True."""
|
|
78
|
+
article_data = request.getfixturevalue("articles")
|
|
79
|
+
vector_store = request.getfixturevalue("mock_vector_store")
|
|
80
|
+
vector_store.loaded_papers = set()
|
|
81
|
+
vector_store.has_gpu = True
|
|
82
|
+
|
|
83
|
+
load_all_papers(
|
|
84
|
+
vector_store=vector_store,
|
|
85
|
+
articles=article_data,
|
|
86
|
+
call_id="gpu_call",
|
|
87
|
+
config={"embedding_batch_size": 2048},
|
|
88
|
+
has_gpu=True,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
args = mock_batch.call_args[1]
|
|
92
|
+
assert args["has_gpu"] is True
|
|
93
|
+
assert args["batch_size"] == 2048
|
|
94
|
+
assert args["max_workers"] >= 4
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.paper_loader.add_papers_batch")
|
|
98
|
+
def test_cpu_parameters_used(mock_batch, request):
|
|
99
|
+
"""Test CPU-based parameters are used if has_gpu is False."""
|
|
100
|
+
article_data = request.getfixturevalue("articles")
|
|
101
|
+
vector_store = request.getfixturevalue("mock_vector_store")
|
|
102
|
+
vector_store.loaded_papers = set()
|
|
103
|
+
vector_store.has_gpu = False
|
|
104
|
+
|
|
105
|
+
load_all_papers(
|
|
106
|
+
vector_store=vector_store,
|
|
107
|
+
articles=article_data,
|
|
108
|
+
call_id="cpu_call",
|
|
109
|
+
config={"embedding_batch_size": 512},
|
|
110
|
+
has_gpu=False,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
args = mock_batch.call_args[1]
|
|
114
|
+
assert args["has_gpu"] is False
|
|
115
|
+
assert args["batch_size"] == 512
|
|
116
|
+
assert args["max_workers"] >= 3
|