aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py}
RENAMED
@@ -89,9 +89,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
89
89
|
result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
|
90
90
|
|
91
91
|
# Verify API call - should include /medrxiv/ and /na/json
|
92
|
-
expected_url =
|
93
|
-
"https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
|
94
|
-
)
|
92
|
+
expected_url = "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
|
95
93
|
mock_get.assert_called_once_with(expected_url, timeout=30)
|
96
94
|
mock_response.raise_for_status.assert_called_once()
|
97
95
|
|
@@ -133,9 +131,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
133
131
|
)
|
134
132
|
# Empty collection
|
135
133
|
self.assertEqual(
|
136
|
-
self.downloader.construct_pdf_url(
|
137
|
-
{"collection": []}, "10.1101/2023.01.01.123456"
|
138
|
-
),
|
134
|
+
self.downloader.construct_pdf_url({"collection": []}, "10.1101/2023.01.01.123456"),
|
139
135
|
"",
|
140
136
|
)
|
141
137
|
# Custom version
|
@@ -179,9 +175,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
179
175
|
"""Test metadata extraction without PDF download."""
|
180
176
|
metadata = self.sample_json_response
|
181
177
|
|
182
|
-
with patch.object(
|
183
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
184
|
-
):
|
178
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
185
179
|
result = self.downloader.extract_paper_metadata(
|
186
180
|
metadata, "10.1101/2023.01.01.123456", None
|
187
181
|
)
|
@@ -196,9 +190,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
196
190
|
metadata = {}
|
197
191
|
|
198
192
|
with self.assertRaises(RuntimeError) as context:
|
199
|
-
self.downloader.extract_paper_metadata(
|
200
|
-
metadata, "10.1101/2023.01.01.123456", None
|
201
|
-
)
|
193
|
+
self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
|
202
194
|
|
203
195
|
self.assertIn("No collection data found", str(context.exception))
|
204
196
|
|
@@ -224,9 +216,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
224
216
|
|
225
217
|
# Missing fields
|
226
218
|
paper_missing = {"title": "Test Paper"} # Missing others
|
227
|
-
got_missing = self.downloader.extract_basic_metadata_public(
|
228
|
-
paper_missing, "10.1101/test"
|
229
|
-
)
|
219
|
+
got_missing = self.downloader.extract_basic_metadata_public(paper_missing, "10.1101/test")
|
230
220
|
self.assertEqual(got_missing["Title"], "Test Paper")
|
231
221
|
self.assertEqual(got_missing["Authors"], [])
|
232
222
|
self.assertEqual(got_missing["Abstract"], "N/A")
|
@@ -261,9 +251,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
261
251
|
)
|
262
252
|
|
263
253
|
# Without result
|
264
|
-
with patch.object(
|
265
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
266
|
-
):
|
254
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
267
255
|
expected_without = {
|
268
256
|
"URL": "",
|
269
257
|
"pdf_url": "",
|
@@ -302,9 +290,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
302
290
|
def test_add_service_identifier(self):
|
303
291
|
"""Test _add_service_identifier method."""
|
304
292
|
entry = {}
|
305
|
-
self.downloader.add_service_identifier_public(
|
306
|
-
entry, "10.1101/2023.01.01.123456"
|
307
|
-
)
|
293
|
+
self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
|
308
294
|
self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
|
309
295
|
self.assertEqual(entry["server"], "medrxiv")
|
310
296
|
|
@@ -368,9 +354,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
368
354
|
pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
|
369
355
|
|
370
356
|
# Step 4: Extract metadata
|
371
|
-
paper_data = self.downloader.extract_paper_metadata(
|
372
|
-
metadata, identifier, pdf_result
|
373
|
-
)
|
357
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
|
374
358
|
|
375
359
|
# Verify the complete workflow
|
376
360
|
self.assertEqual(paper_data["Title"], "Integration Test Paper")
|
@@ -384,9 +368,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
384
368
|
"https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json",
|
385
369
|
timeout=30,
|
386
370
|
)
|
387
|
-
expected_pdf_url =
|
388
|
-
"https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
|
389
|
-
)
|
371
|
+
expected_pdf_url = "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
|
390
372
|
mock_download.assert_called_once_with(expected_pdf_url, identifier)
|
391
373
|
|
392
374
|
@patch("requests.get")
|
@@ -413,25 +395,15 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
413
395
|
with self.assertRaises(RuntimeError) as context:
|
414
396
|
self.downloader.fetch_metadata(identifier)
|
415
397
|
|
416
|
-
self.assertIn(
|
417
|
-
"No collection data found in medRxiv API response", str(context.exception)
|
418
|
-
)
|
398
|
+
self.assertIn("No collection data found in medRxiv API response", str(context.exception))
|
419
399
|
|
420
400
|
@patch("requests.get")
|
421
401
|
def test_multiple_identifiers_workflow(self, mock_get):
|
422
402
|
"""Test processing multiple identifiers."""
|
423
403
|
# Mock different responses for different DOIs
|
424
404
|
responses = [
|
425
|
-
{
|
426
|
-
|
427
|
-
{"title": "Paper 1", "version": "1", "authors": "Author 1"}
|
428
|
-
]
|
429
|
-
},
|
430
|
-
{
|
431
|
-
"collection": [
|
432
|
-
{"title": "Paper 2", "version": "2", "authors": "Author 2"}
|
433
|
-
]
|
434
|
-
},
|
405
|
+
{"collection": [{"title": "Paper 1", "version": "1", "authors": "Author 1"}]},
|
406
|
+
{"collection": [{"title": "Paper 2", "version": "2", "authors": "Author 2"}]},
|
435
407
|
]
|
436
408
|
|
437
409
|
mock_responses = []
|
@@ -448,12 +420,8 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
448
420
|
|
449
421
|
for identifier in identifiers:
|
450
422
|
metadata = self.downloader.fetch_metadata(identifier)
|
451
|
-
_ = self.downloader.construct_pdf_url(
|
452
|
-
|
453
|
-
) # ensure path covered
|
454
|
-
paper_data = self.downloader.extract_paper_metadata(
|
455
|
-
metadata, identifier, None
|
456
|
-
)
|
423
|
+
_ = self.downloader.construct_pdf_url(metadata, identifier) # ensure path covered
|
424
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, None)
|
457
425
|
results[identifier] = paper_data
|
458
426
|
|
459
427
|
# Verify both papers were processed
|
@@ -529,6 +497,4 @@ class TestMedrxivSpecialCases(unittest.TestCase):
|
|
529
497
|
# Should handle Unicode properly
|
530
498
|
self.assertEqual(result["Title"], "Título com acentos é símbolos especiais")
|
531
499
|
self.assertEqual(result["Authors"], ["José María", "François Müller"])
|
532
|
-
self.assertEqual(
|
533
|
-
result["Abstract"], "Resumo com çaracteres especiais ñ símbolos"
|
534
|
-
)
|
500
|
+
self.assertEqual(result["Abstract"], "Resumo com çaracteres especiais ñ símbolos")
|
@@ -19,7 +19,7 @@ def fixture_chunks():
|
|
19
19
|
return [
|
20
20
|
Document(
|
21
21
|
page_content=f"chunk {i}",
|
22
|
-
metadata={"paper_id": f"P{i%2}", "relevance_score": 0.9 - 0.01 * i},
|
22
|
+
metadata={"paper_id": f"P{i % 2}", "relevance_score": 0.9 - 0.01 * i},
|
23
23
|
)
|
24
24
|
for i in range(10)
|
25
25
|
]
|
@@ -27,9 +27,7 @@ def fixture_chunks():
|
|
27
27
|
|
28
28
|
def test_rerank_chunks_short_input(chunks_fixture):
|
29
29
|
"""rerank_chunks with fewer chunks than top_k should return original."""
|
30
|
-
result = rerank_chunks(
|
31
|
-
chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5
|
32
|
-
)
|
30
|
+
result = rerank_chunks(chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5)
|
33
31
|
assert result == chunks_fixture[:3]
|
34
32
|
|
35
33
|
|
@@ -65,9 +63,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
|
|
65
63
|
mock_config.reranker.api_key = "test_key"
|
66
64
|
mock_config.reranker.model = "test_model"
|
67
65
|
|
68
|
-
result = rerank_chunks(
|
69
|
-
chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5
|
70
|
-
)
|
66
|
+
result = rerank_chunks(chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5)
|
71
67
|
|
72
68
|
assert isinstance(result, list)
|
73
69
|
assert result == list(reversed(chunks_fixture))[:5]
|
@@ -77,9 +73,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
|
|
77
73
|
|
78
74
|
|
79
75
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
|
80
|
-
def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
81
|
-
mock_reranker_cls, chunks_fixture
|
82
|
-
):
|
76
|
+
def test_rerank_chunks_reranker_fails_raises_and_calls_compress(mock_reranker_cls, chunks_fixture):
|
83
77
|
"""
|
84
78
|
If NVIDIARerank.compress_documents raises RuntimeError:
|
85
79
|
- rerank_chunks should propagate the RuntimeError
|
@@ -94,9 +88,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
|
94
88
|
mock_config.reranker.model = "reranker"
|
95
89
|
|
96
90
|
with pytest.raises(RuntimeError, match="API failure"):
|
97
|
-
rerank_chunks(
|
98
|
-
chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3
|
99
|
-
)
|
91
|
+
rerank_chunks(chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3)
|
100
92
|
|
101
93
|
reranker_instance.compress_documents.assert_called_once_with(
|
102
94
|
query="How does light affect plants?", documents=chunks_fixture
|
@@ -105,9 +97,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
|
105
97
|
|
106
98
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.logger")
|
107
99
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
|
108
|
-
def test_rerank_chunks_debug_block_triggered(
|
109
|
-
mock_reranker_cls, mock_logger, chunks_fixture
|
110
|
-
):
|
100
|
+
def test_rerank_chunks_debug_block_triggered(mock_reranker_cls, mock_logger, chunks_fixture):
|
111
101
|
"""rerank_chunks should log debug info if debug logging is enabled."""
|
112
102
|
mock_logger.isEnabledFor.return_value = True
|
113
103
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Tests for the PDF batch processor module."""
|
2
2
|
|
3
3
|
from unittest.mock import MagicMock, patch
|
4
|
+
|
4
5
|
import pytest
|
5
6
|
|
6
7
|
from aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor import (
|
@@ -22,18 +23,14 @@ def _args_fixture():
|
|
22
23
|
}
|
23
24
|
|
24
25
|
|
25
|
-
@patch(
|
26
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
27
|
-
)
|
26
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
28
27
|
def test_no_papers_to_add(mock_loader, args_fixture):
|
29
28
|
"""Test case where no papers are provided to add."""
|
30
29
|
add_papers_batch(papers_to_add=[], **args_fixture)
|
31
30
|
mock_loader.assert_not_called()
|
32
31
|
|
33
32
|
|
34
|
-
@patch(
|
35
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
36
|
-
)
|
33
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
37
34
|
def test_all_papers_already_loaded(mock_loader, args_fixture):
|
38
35
|
"""Test case where all papers are already loaded."""
|
39
36
|
args_fixture["loaded_papers"].update(["p1", "p2"])
|
@@ -44,9 +41,7 @@ def test_all_papers_already_loaded(mock_loader, args_fixture):
|
|
44
41
|
mock_loader.assert_not_called()
|
45
42
|
|
46
43
|
|
47
|
-
@patch(
|
48
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
49
|
-
)
|
44
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
50
45
|
def test_successful_batch_embedding(mock_loader, args_fixture):
|
51
46
|
"""Test case where papers are successfully loaded and embedded."""
|
52
47
|
mock_loader.return_value = [
|
@@ -70,9 +65,7 @@ def test_successful_batch_embedding(mock_loader, args_fixture):
|
|
70
65
|
mock_collection.flush.assert_called()
|
71
66
|
|
72
67
|
|
73
|
-
@patch(
|
74
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
75
|
-
)
|
68
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
76
69
|
def test_empty_chunks_after_loading(mock_loader, args_fixture):
|
77
70
|
"""Test case where no chunks are returned after loading PDF."""
|
78
71
|
mock_loader.return_value = []
|
@@ -82,9 +75,7 @@ def test_empty_chunks_after_loading(mock_loader, args_fixture):
|
|
82
75
|
args_fixture["vector_store"].add_documents.assert_not_called()
|
83
76
|
|
84
77
|
|
85
|
-
@patch(
|
86
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
87
|
-
)
|
78
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
88
79
|
def test_vector_store_insert_failure(mock_loader, args_fixture):
|
89
80
|
"""Test case where vector store insertion fails."""
|
90
81
|
mock_loader.return_value = [MagicMock(page_content="page")]
|
@@ -1,12 +1,12 @@
|
|
1
1
|
"""collection_manager for managing Milvus collections for PDF chunks."""
|
2
2
|
|
3
|
-
from unittest.mock import MagicMock, patch
|
4
3
|
from dataclasses import dataclass, field
|
4
|
+
from unittest.mock import MagicMock, patch
|
5
|
+
|
5
6
|
import pytest
|
6
7
|
|
7
8
|
from aiagents4pharma.talk2scholars.tools.pdf.utils import collection_manager
|
8
9
|
|
9
|
-
|
10
10
|
# -- Fixtures --
|
11
11
|
|
12
12
|
|
@@ -39,13 +39,20 @@ def index_params():
|
|
39
39
|
|
40
40
|
|
41
41
|
def set_collection_cache(key, value):
|
42
|
-
"""Set a mocked collection into the cache."""
|
43
|
-
getattr(collection_manager, "_collection_cache")
|
42
|
+
"""Set a mocked collection into the cache without replacing the whole attribute."""
|
43
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
44
|
+
if cache is None:
|
45
|
+
cache = {}
|
46
|
+
# Still need to attach it once if it doesn't exist
|
47
|
+
object.__setattr__(collection_manager, "_collection_cache", cache)
|
48
|
+
cache[key] = value
|
44
49
|
|
45
50
|
|
46
51
|
def clear_collection_cache(key):
|
47
52
|
"""Remove a mocked collection from the cache."""
|
48
|
-
getattr(collection_manager, "_collection_cache"
|
53
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
54
|
+
if cache is not None:
|
55
|
+
cache.pop(key, None)
|
49
56
|
|
50
57
|
|
51
58
|
# -- Tests --
|
@@ -124,9 +131,7 @@ def test_debug_collection_state_failure(mock_utility, mock_collection_cls, reque
|
|
124
131
|
mock_collection.indexes = []
|
125
132
|
mock_collection.num_entities = 10
|
126
133
|
|
127
|
-
mock_collection.schema = property(
|
128
|
-
lambda _: (_ for _ in ()).throw(Exception("bad schema"))
|
129
|
-
)
|
134
|
+
mock_collection.schema = property(lambda _: (_ for _ in ()).throw(Exception("bad schema")))
|
130
135
|
|
131
136
|
result = collection_manager.ensure_collection_exists(
|
132
137
|
"bad_collection", config, index, has_gpu=True
|
@@ -145,6 +150,24 @@ def test_ensure_collection_exception(mock_utility, mock_collection_cls, request)
|
|
145
150
|
mock_collection_cls.return_value = MagicMock()
|
146
151
|
|
147
152
|
with pytest.raises(RuntimeError, match="milvus failure"):
|
148
|
-
collection_manager.ensure_collection_exists(
|
149
|
-
|
150
|
-
|
153
|
+
collection_manager.ensure_collection_exists("fail_collection", config, index, has_gpu=False)
|
154
|
+
|
155
|
+
|
156
|
+
def test_set_collection_cache_initializes_when_missing(monkeypatch):
|
157
|
+
"""Ensure set_collection_cache initializes the cache when attribute is absent."""
|
158
|
+
# Remove the attribute if present (avoids W0212 and stays lint-clean)
|
159
|
+
monkeypatch.delattr(collection_manager, "_collection_cache", raising=False)
|
160
|
+
|
161
|
+
key = "init_case"
|
162
|
+
val = MagicMock()
|
163
|
+
|
164
|
+
# This should go through the None-branch and attach the cache via object.__setattr__
|
165
|
+
set_collection_cache(key, val)
|
166
|
+
|
167
|
+
# Verify cache got created and populated
|
168
|
+
cache = getattr(collection_manager, "_collection_cache", None)
|
169
|
+
assert isinstance(cache, dict)
|
170
|
+
assert cache.get(key) is val
|
171
|
+
|
172
|
+
# Cleanup
|
173
|
+
clear_collection_cache(key)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Unit tests for PDF document processing utilities."""
|
2
2
|
|
3
3
|
from unittest.mock import MagicMock, patch
|
4
|
+
|
4
5
|
import pytest
|
5
6
|
|
6
7
|
from aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor import (
|
@@ -26,9 +27,7 @@ def _base_args_params():
|
|
26
27
|
"aiagents4pharma.talk2scholars.tools.pdf.utils.document_processor."
|
27
28
|
"RecursiveCharacterTextSplitter"
|
28
29
|
)
|
29
|
-
def test_load_and_split_pdf_success(
|
30
|
-
mock_splitter_cls, mock_loader_cls, base_args_params
|
31
|
-
):
|
30
|
+
def test_load_and_split_pdf_success(mock_splitter_cls, mock_loader_cls, base_args_params):
|
32
31
|
"""load_and_split_pdf should load and split PDF correctly."""
|
33
32
|
mock_doc = MagicMock()
|
34
33
|
mock_doc.metadata = {"page": 1}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""generate_answer tests for the PDF tool"""
|
2
2
|
|
3
3
|
from unittest.mock import MagicMock
|
4
|
+
|
4
5
|
import pytest
|
5
6
|
|
6
7
|
from aiagents4pharma.talk2scholars.tools.pdf.utils.generate_answer import (
|
@@ -60,16 +61,12 @@ def test_generate_answer_success(chunks_fixture):
|
|
60
61
|
def test_generate_answer_raises_for_none_config(chunks_fixture):
|
61
62
|
"""generate_answer should raise ValueError for None config."""
|
62
63
|
mock_llm = MagicMock()
|
63
|
-
with pytest.raises(
|
64
|
-
ValueError, match="Configuration for generate_answer is required."
|
65
|
-
):
|
64
|
+
with pytest.raises(ValueError, match="Configuration for generate_answer is required."):
|
66
65
|
generate_answer("Why?", chunks_fixture, mock_llm, config=None)
|
67
66
|
|
68
67
|
|
69
68
|
def test_generate_answer_raises_for_missing_template(chunks_fixture):
|
70
69
|
"""generate_answer should raise ValueError for missing prompt_template in config."""
|
71
70
|
mock_llm = MagicMock()
|
72
|
-
with pytest.raises(
|
73
|
-
ValueError, match="The prompt_template is missing from the configuration."
|
74
|
-
):
|
71
|
+
with pytest.raises(ValueError, match="The prompt_template is missing from the configuration."):
|
75
72
|
generate_answer("Why?", chunks_fixture, mock_llm, config={})
|
aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py}
RENAMED
@@ -4,7 +4,6 @@ import subprocess
|
|
4
4
|
from types import SimpleNamespace
|
5
5
|
from unittest.mock import MagicMock, patch
|
6
6
|
|
7
|
-
|
8
7
|
from aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection import (
|
9
8
|
detect_nvidia_gpu,
|
10
9
|
get_optimal_index_config,
|
@@ -23,9 +22,7 @@ def test_detect_nvidia_gpu_force_cpu_from_config():
|
|
23
22
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.subprocess.run")
|
24
23
|
def test_detect_nvidia_gpu_success(mock_run):
|
25
24
|
"""detect_nvidia_gpu should return True if NVIDIA GPUs are detected."""
|
26
|
-
mock_run.return_value = MagicMock(
|
27
|
-
returncode=0, stdout="NVIDIA A100\nNVIDIA RTX 3090"
|
28
|
-
)
|
25
|
+
mock_run.return_value = MagicMock(returncode=0, stdout="NVIDIA A100\nNVIDIA RTX 3090")
|
29
26
|
|
30
27
|
assert detect_nvidia_gpu() is True
|
31
28
|
mock_run.assert_called_once()
|
@@ -44,9 +41,7 @@ def test_detect_nvidia_gpu_no_output(mock_run):
|
|
44
41
|
|
45
42
|
def test_get_optimal_index_config_gpu():
|
46
43
|
"""get_optimal_index_config should return GPU_CAGRA for GPU setup."""
|
47
|
-
index_params, search_params = get_optimal_index_config(
|
48
|
-
has_gpu=True, embedding_dim=768
|
49
|
-
)
|
44
|
+
index_params, search_params = get_optimal_index_config(has_gpu=True, embedding_dim=768)
|
50
45
|
|
51
46
|
assert index_params["index_type"] == "GPU_CAGRA"
|
52
47
|
assert "cache_dataset_on_device" in index_params["params"]
|
@@ -55,9 +50,7 @@ def test_get_optimal_index_config_gpu():
|
|
55
50
|
|
56
51
|
def test_get_optimal_index_config_cpu():
|
57
52
|
"""get_optimal_index_config should return IVF_FLAT for CPU setup."""
|
58
|
-
index_params, search_params = get_optimal_index_config(
|
59
|
-
has_gpu=False, embedding_dim=768
|
60
|
-
)
|
53
|
+
index_params, search_params = get_optimal_index_config(has_gpu=False, embedding_dim=768)
|
61
54
|
|
62
55
|
assert index_params["index_type"] == "IVF_FLAT"
|
63
56
|
assert index_params["params"]["nlist"] == 96 # 768 / 8 = 96
|
@@ -121,9 +114,7 @@ def test_detect_nvidia_gpu_timeout_raises_false(mock_run, mock_logger):
|
|
121
114
|
|
122
115
|
result = detect_nvidia_gpu()
|
123
116
|
assert result is False
|
124
|
-
mock_logger.info.assert_called_with(
|
125
|
-
"NVIDIA GPU detection failed: %s", mock_run.side_effect
|
126
|
-
)
|
117
|
+
mock_logger.info.assert_called_with("NVIDIA GPU detection failed: %s", mock_run.side_effect)
|
127
118
|
|
128
119
|
|
129
120
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.gpu_detection.logger")
|
@@ -135,6 +126,4 @@ def test_detect_nvidia_gpu_file_not_found_raises_false(mock_run, mock_logger):
|
|
135
126
|
|
136
127
|
result = detect_nvidia_gpu()
|
137
128
|
assert result is False
|
138
|
-
mock_logger.info.assert_called_with(
|
139
|
-
"NVIDIA GPU detection failed: %s", mock_run.side_effect
|
140
|
-
)
|
129
|
+
mock_logger.info.assert_called_with("NVIDIA GPU detection failed: %s", mock_run.side_effect)
|
aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py}
RENAMED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""pdf rag pipeline tests."""
|
2
2
|
|
3
3
|
from unittest.mock import MagicMock, patch
|
4
|
+
|
4
5
|
import pytest
|
5
6
|
from langchain_core.documents import Document
|
6
7
|
|
@@ -25,18 +26,13 @@ def _base_config_fixture():
|
|
25
26
|
def _mock_docs_fixture():
|
26
27
|
"""Simulates PDF document chunks."""
|
27
28
|
return [
|
28
|
-
Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"})
|
29
|
-
for i in range(10)
|
29
|
+
Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"}) for i in range(10)
|
30
30
|
]
|
31
31
|
|
32
32
|
|
33
33
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
|
34
|
-
@patch(
|
35
|
-
|
36
|
-
)
|
37
|
-
def test_rag_pipeline_gpu_path(
|
38
|
-
mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture
|
39
|
-
):
|
34
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
|
35
|
+
def test_rag_pipeline_gpu_path(mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture):
|
40
36
|
"""test RAG pipeline with GPU path."""
|
41
37
|
mock_retrieve.return_value = mock_docs_fixture
|
42
38
|
mock_rerank.return_value = mock_docs_fixture[:5]
|
@@ -55,12 +51,8 @@ def test_rag_pipeline_gpu_path(
|
|
55
51
|
|
56
52
|
|
57
53
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
|
58
|
-
@patch(
|
59
|
-
|
60
|
-
)
|
61
|
-
def test_rag_pipeline_cpu_path(
|
62
|
-
mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture
|
63
|
-
):
|
54
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
|
55
|
+
def test_rag_pipeline_cpu_path(mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture):
|
64
56
|
"""rag pipeline with CPU path."""
|
65
57
|
mock_retrieve.return_value = mock_docs_fixture
|
66
58
|
mock_rerank.return_value = mock_docs_fixture[:5]
|
@@ -79,9 +71,7 @@ def test_rag_pipeline_cpu_path(
|
|
79
71
|
|
80
72
|
|
81
73
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
|
82
|
-
@patch(
|
83
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks"
|
84
|
-
)
|
74
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
|
85
75
|
def test_rag_pipeline_empty_results(mock_retrieve, mock_rerank, base_config_fixture):
|
86
76
|
"""rag pipeline with no results."""
|
87
77
|
mock_retrieve.return_value = []
|
@@ -21,8 +21,7 @@ def mock_vector_store():
|
|
21
21
|
def mock_chunks():
|
22
22
|
"""Fixture to simulate PDF chunks."""
|
23
23
|
return [
|
24
|
-
Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i%2}"})
|
25
|
-
for i in range(5)
|
24
|
+
Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"}) for i in range(5)
|
26
25
|
]
|
27
26
|
|
28
27
|
|
@@ -74,9 +73,7 @@ def test_retrieve_chunks_with_filter(mock_logger, request):
|
|
74
73
|
mock_logger.debug = MagicMock()
|
75
74
|
vector_store.max_marginal_relevance_search.return_value = chunks
|
76
75
|
|
77
|
-
results = retrieve_relevant_chunks(
|
78
|
-
vector_store, query="filter test", paper_ids=["P1"], top_k=3
|
79
|
-
)
|
76
|
+
results = retrieve_relevant_chunks(vector_store, query="filter test", paper_ids=["P1"], top_k=3)
|
80
77
|
assert results == chunks
|
81
78
|
args, kwargs = vector_store.max_marginal_relevance_search.call_args
|
82
79
|
assert len(args) == 0
|
@@ -115,9 +112,7 @@ def test_retrieve_chunks_default_search_params(mock_logger, request):
|
|
115
112
|
)
|
116
113
|
|
117
114
|
assert results == chunks
|
118
|
-
mock_logger.debug.assert_any_call(
|
119
|
-
"Using default search parameters (no hardware optimization)"
|
120
|
-
)
|
115
|
+
mock_logger.debug.assert_any_call("Using default search parameters (no hardware optimization)")
|
121
116
|
|
122
117
|
|
123
118
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
|
@@ -191,7 +186,5 @@ def test_retrieve_chunks_with_scores_not_implemented(mock_logger, request):
|
|
191
186
|
retrieve_relevant_chunks_with_scores(
|
192
187
|
vector_store=vector_store, query="fail test", top_k=1, score_threshold=0.0
|
193
188
|
)
|
194
|
-
assert "Vector store does not support similarity_search_with_score" in str(
|
195
|
-
excinfo.value
|
196
|
-
)
|
189
|
+
assert "Vector store does not support similarity_search_with_score" in str(excinfo.value)
|
197
190
|
mock_logger.debug.assert_called_with("GPU-accelerated similarity search enabled")
|
@@ -1,6 +1,9 @@
|
|
1
|
-
"""
|
1
|
+
"""
|
2
|
+
Tests for singleton_manager: manages vector store connections and event loops.
|
3
|
+
"""
|
2
4
|
|
3
5
|
from unittest.mock import MagicMock, patch
|
6
|
+
|
4
7
|
import pytest
|
5
8
|
from pymilvus.exceptions import MilvusException
|
6
9
|
|
@@ -19,21 +22,21 @@ def test_singleton_instance_identity():
|
|
19
22
|
assert a is b
|
20
23
|
|
21
24
|
|
22
|
-
@patch(
|
23
|
-
|
24
|
-
)
|
25
|
-
def test_detect_gpu_once(mock_detect):
|
25
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.detect_nvidia_gpu")
|
26
|
+
def test_detect_gpu_once(mock_detect, monkeypatch):
|
26
27
|
"""Ensure GPU detection is cached."""
|
27
28
|
mock_detect.return_value = True
|
28
29
|
singleton = VectorstoreSingleton()
|
29
|
-
|
30
|
+
|
31
|
+
# Reset GPU detection cache safely
|
32
|
+
monkeypatch.setattr(VectorstoreSingleton, "_gpu_detected", None, raising=False)
|
30
33
|
|
31
34
|
result = singleton.detect_gpu_once()
|
32
35
|
assert result is True
|
33
36
|
|
37
|
+
# Second call should use cached value; detect_nvidia_gpu called only once
|
34
38
|
result2 = singleton.detect_gpu_once()
|
35
39
|
assert result2 is True
|
36
|
-
|
37
40
|
mock_detect.assert_called_once()
|
38
41
|
|
39
42
|
|
@@ -63,20 +66,20 @@ def test_get_connection_creates_connection(_, mock_db, mock_conns):
|
|
63
66
|
|
64
67
|
|
65
68
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.Milvus")
|
66
|
-
def test_get_vector_store_creates_if_missing(mock_milvus):
|
69
|
+
def test_get_vector_store_creates_if_missing(mock_milvus, monkeypatch):
|
67
70
|
"""get_vector_store should create a new vector store if missing."""
|
68
71
|
singleton = VectorstoreSingleton()
|
69
|
-
|
70
|
-
|
72
|
+
|
73
|
+
# Clear caches safely
|
74
|
+
monkeypatch.setattr(VectorstoreSingleton, "_vector_stores", {}, raising=False)
|
75
|
+
monkeypatch.setattr(VectorstoreSingleton, "_event_loops", {}, raising=False)
|
71
76
|
|
72
77
|
mock_embed = MagicMock()
|
73
78
|
connection_args = {"host": "localhost", "port": 19530}
|
74
79
|
|
75
80
|
vs = singleton.get_vector_store("collection1", mock_embed, connection_args)
|
76
81
|
|
77
|
-
|
78
|
-
assert vs is vector_stores["collection1"]
|
79
|
-
assert "collection1" in vector_stores
|
82
|
+
assert vs is not None
|
80
83
|
mock_milvus.assert_called_once()
|
81
84
|
|
82
85
|
|
@@ -114,17 +117,15 @@ def test_get_vectorstore_force_new(mock_vectorstore_cls):
|
|
114
117
|
assert vs1 != vs2
|
115
118
|
|
116
119
|
|
117
|
-
@patch(
|
118
|
-
|
119
|
-
)
|
120
|
-
@patch(
|
121
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.has_connection"
|
122
|
-
)
|
120
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.connect")
|
121
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.has_connection")
|
123
122
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
|
124
|
-
def test_get_connection_milvus_error(_, mock_has_connection, mock_connect):
|
123
|
+
def test_get_connection_milvus_error(_, mock_has_connection, mock_connect, monkeypatch):
|
125
124
|
"""get_connection should raise MilvusException on connection failure."""
|
126
125
|
manager = VectorstoreSingleton()
|
127
|
-
|
126
|
+
|
127
|
+
# Reset connections cache safely
|
128
|
+
monkeypatch.setattr(VectorstoreSingleton, "_connections", {}, raising=False)
|
128
129
|
|
129
130
|
mock_has_connection.return_value = False
|
130
131
|
mock_connect.side_effect = MilvusException("Connection failed")
|
@@ -133,10 +134,12 @@ def test_get_connection_milvus_error(_, mock_has_connection, mock_connect):
|
|
133
134
|
manager.get_connection("localhost", 19530, "test_db")
|
134
135
|
|
135
136
|
|
136
|
-
def test_get_event_loop_creates_new_loop_on_closed():
|
137
|
+
def test_get_event_loop_creates_new_loop_on_closed(monkeypatch):
|
137
138
|
"""Ensure get_event_loop creates a new loop if current one is closed."""
|
138
139
|
manager = VectorstoreSingleton()
|
139
|
-
|
140
|
+
|
141
|
+
# Clear event loops safely
|
142
|
+
monkeypatch.setattr(VectorstoreSingleton, "_event_loops", {}, raising=False)
|
140
143
|
|
141
144
|
mock_loop = MagicMock()
|
142
145
|
mock_loop.is_closed.return_value = True
|