aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
- aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -3,11 +3,11 @@ Unit tests for BasePaperDownloader.
|
|
3
3
|
Tests the abstract base class functionality and common methods.
|
4
4
|
"""
|
5
5
|
|
6
|
+
import inspect
|
6
7
|
import unittest
|
7
|
-
from typing import Any
|
8
|
+
from typing import Any
|
8
9
|
from unittest.mock import Mock, patch
|
9
10
|
|
10
|
-
import inspect
|
11
11
|
import requests
|
12
12
|
|
13
13
|
from aiagents4pharma.talk2scholars.tools.paper_download.utils.base_paper_downloader import (
|
@@ -31,8 +31,8 @@ class ConcretePaperDownloader(BasePaperDownloader):
|
|
31
31
|
return f"https://test.com/{identifier}.pdf"
|
32
32
|
|
33
33
|
def extract_paper_metadata(
|
34
|
-
self, metadata: Any, identifier: str, pdf_result:
|
35
|
-
) ->
|
34
|
+
self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
|
35
|
+
) -> dict[str, Any]:
|
36
36
|
"""Concrete implementation for testing."""
|
37
37
|
return {
|
38
38
|
"Title": f"Test Paper {identifier}",
|
@@ -53,21 +53,19 @@ class ConcretePaperDownloader(BasePaperDownloader):
|
|
53
53
|
"""Concrete implementation for testing."""
|
54
54
|
return f"test_{identifier}.pdf"
|
55
55
|
|
56
|
-
def _get_paper_identifier_info(self, paper:
|
56
|
+
def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
|
57
57
|
"""Concrete implementation for testing."""
|
58
58
|
return f" ({paper.get('identifier', 'unknown')})"
|
59
59
|
|
60
|
-
def _add_service_identifier(self, entry:
|
60
|
+
def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
|
61
61
|
"""Concrete implementation for testing."""
|
62
62
|
entry["test_id"] = identifier
|
63
63
|
|
64
|
-
def get_paper_identifier_info_public(self, paper:
|
64
|
+
def get_paper_identifier_info_public(self, paper: dict[str, Any]) -> str:
|
65
65
|
"""Public wrapper to access protected identifier info for tests."""
|
66
66
|
return self._get_paper_identifier_info(paper)
|
67
67
|
|
68
|
-
def add_service_identifier_public(
|
69
|
-
self, entry: Dict[str, Any], identifier: str
|
70
|
-
) -> None:
|
68
|
+
def add_service_identifier_public(self, entry: dict[str, Any], identifier: str) -> None:
|
71
69
|
"""Public wrapper to access protected service identifier for tests."""
|
72
70
|
self._add_service_identifier(entry, identifier)
|
73
71
|
|
@@ -109,9 +107,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
109
107
|
mock_response = Mock()
|
110
108
|
mock_response.raise_for_status = Mock()
|
111
109
|
mock_response.iter_content.return_value = [b"PDF chunk 1", b"PDF chunk 2"]
|
112
|
-
mock_response.headers = {
|
113
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
114
|
-
}
|
110
|
+
mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
115
111
|
mock_get.return_value = mock_response
|
116
112
|
|
117
113
|
# Mock temporary file
|
@@ -121,9 +117,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
121
117
|
mock_temp_file.__exit__ = Mock(return_value=None)
|
122
118
|
mock_tempfile.return_value = mock_temp_file
|
123
119
|
|
124
|
-
result = self.downloader.download_pdf_to_temp(
|
125
|
-
"https://test.com/paper.pdf", "12345"
|
126
|
-
)
|
120
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
127
121
|
|
128
122
|
# Verify result
|
129
123
|
self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
|
@@ -153,9 +147,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
153
147
|
"""Test PDF download with network error."""
|
154
148
|
mock_get.side_effect = requests.RequestException("Network error")
|
155
149
|
|
156
|
-
result = self.downloader.download_pdf_to_temp(
|
157
|
-
"https://test.com/paper.pdf", "12345"
|
158
|
-
)
|
150
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
159
151
|
|
160
152
|
self.assertIsNone(result)
|
161
153
|
|
@@ -235,12 +227,8 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
235
227
|
raise requests.RequestException("Fetch failed")
|
236
228
|
return {"test": identifier}
|
237
229
|
|
238
|
-
with patch.object(
|
239
|
-
self.downloader, "
|
240
|
-
):
|
241
|
-
with patch.object(
|
242
|
-
self.downloader, "download_pdf_to_temp", return_value=None
|
243
|
-
):
|
230
|
+
with patch.object(self.downloader, "fetch_metadata", side_effect=mock_fetch_metadata):
|
231
|
+
with patch.object(self.downloader, "download_pdf_to_temp", return_value=None):
|
244
232
|
result = self.downloader.process_identifiers(identifiers)
|
245
233
|
|
246
234
|
# Valid identifier should succeed
|
@@ -316,11 +304,11 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
316
304
|
"""Test building summary with long list (should show only top 3)."""
|
317
305
|
article_data = {}
|
318
306
|
for i in range(5): # More than 3
|
319
|
-
article_data[f"{i+1}"] = {
|
320
|
-
"Title": f"Paper {i+1}",
|
321
|
-
"identifier": f"{i+1}",
|
307
|
+
article_data[f"{i + 1}"] = {
|
308
|
+
"Title": f"Paper {i + 1}",
|
309
|
+
"identifier": f"{i + 1}",
|
322
310
|
"access_type": "open_access_downloaded",
|
323
|
-
"Abstract": f"Abstract {i+1}",
|
311
|
+
"Abstract": f"Abstract {i + 1}",
|
324
312
|
}
|
325
313
|
|
326
314
|
result = self.downloader.build_summary(article_data)
|
@@ -388,9 +376,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
388
376
|
BasePaperDownloader.construct_pdf_url(self.downloader, {}, "test")
|
389
377
|
|
390
378
|
with self.assertRaises(NotImplementedError):
|
391
|
-
BasePaperDownloader.extract_paper_metadata(
|
392
|
-
self.downloader, {}, "test", None
|
393
|
-
)
|
379
|
+
BasePaperDownloader.extract_paper_metadata(self.downloader, {}, "test", None)
|
394
380
|
|
395
381
|
with self.assertRaises(NotImplementedError):
|
396
382
|
BasePaperDownloader.get_service_name(self.downloader)
|
@@ -402,15 +388,13 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
402
388
|
BasePaperDownloader.get_default_filename(self.downloader, "test")
|
403
389
|
|
404
390
|
# Protected abstract methods: call via getattr to avoid W0212 while still executing code.
|
391
|
+
method_name_1 = "_get_paper_identifier_info"
|
405
392
|
with self.assertRaises(NotImplementedError):
|
406
|
-
getattr(BasePaperDownloader,
|
407
|
-
self.downloader, {}
|
408
|
-
)
|
393
|
+
getattr(BasePaperDownloader, method_name_1)(self.downloader, {})
|
409
394
|
|
395
|
+
method_name_2 = "_add_service_identifier"
|
410
396
|
with self.assertRaises(NotImplementedError):
|
411
|
-
getattr(BasePaperDownloader, "
|
412
|
-
self.downloader, {}, "test"
|
413
|
-
)
|
397
|
+
getattr(BasePaperDownloader, method_name_2)(self.downloader, {}, "test")
|
414
398
|
|
415
399
|
@patch("tempfile.NamedTemporaryFile")
|
416
400
|
@patch("requests.get")
|
@@ -420,9 +404,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
420
404
|
mock_response = Mock()
|
421
405
|
mock_response.raise_for_status = Mock()
|
422
406
|
mock_response.iter_content.return_value = [b"PDF data"]
|
423
|
-
mock_response.headers = {
|
424
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
425
|
-
}
|
407
|
+
mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
426
408
|
mock_get.return_value = mock_response
|
427
409
|
|
428
410
|
# Mock temporary file
|
@@ -434,9 +416,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
434
416
|
|
435
417
|
# Patch re.search to raise an exception during filename extraction
|
436
418
|
with patch("re.search", side_effect=requests.RequestException("Regex error")):
|
437
|
-
result = self.downloader.download_pdf_to_temp(
|
438
|
-
"https://test.com/paper.pdf", "12345"
|
439
|
-
)
|
419
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
440
420
|
|
441
421
|
# Should still succeed but use default filename due to exception
|
442
422
|
self.assertEqual(result, ("/tmp/test.pdf", "test_12345.pdf"))
|
@@ -507,9 +487,7 @@ class TestBasePaperDownloaderEdgeCases(unittest.TestCase):
|
|
507
487
|
mock_temp_file.__exit__ = Mock(return_value=None)
|
508
488
|
mock_tempfile.return_value = mock_temp_file
|
509
489
|
|
510
|
-
with patch.object(
|
511
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
512
|
-
):
|
490
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
513
491
|
# Call without assigning to avoid 'unused-variable'
|
514
492
|
self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
515
493
|
|
aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py}
RENAMED
@@ -122,9 +122,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
122
122
|
result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
|
123
123
|
|
124
124
|
# Verify API call
|
125
|
-
expected_url =
|
126
|
-
"https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
|
127
|
-
)
|
125
|
+
expected_url = "https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
|
128
126
|
mock_scraper.get.assert_called_once_with(expected_url, timeout=30)
|
129
127
|
mock_response.raise_for_status.assert_called_once()
|
130
128
|
|
@@ -171,9 +169,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
171
169
|
# Default version
|
172
170
|
meta_default = {"collection": [{"title": "Test Paper"}]}
|
173
171
|
self.assertEqual(
|
174
|
-
self.downloader.construct_pdf_url(
|
175
|
-
meta_default, "10.1101/2023.01.01.123456"
|
176
|
-
),
|
172
|
+
self.downloader.construct_pdf_url(meta_default, "10.1101/2023.01.01.123456"),
|
177
173
|
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf",
|
178
174
|
)
|
179
175
|
|
@@ -195,9 +191,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
195
191
|
b"PDF content chunk 1",
|
196
192
|
b"PDF content chunk 2",
|
197
193
|
]
|
198
|
-
mock_pdf_response.headers = {
|
199
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
200
|
-
}
|
194
|
+
mock_pdf_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
201
195
|
|
202
196
|
mock_scraper.get.side_effect = [mock_landing_response, mock_pdf_response]
|
203
197
|
|
@@ -209,9 +203,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
209
203
|
mock_tempfile.return_value = mock_temp_file
|
210
204
|
|
211
205
|
pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
212
|
-
result = self.downloader.download_pdf_to_temp(
|
213
|
-
pdf_url, "10.1101/2023.01.01.123456"
|
214
|
-
)
|
206
|
+
result = self.downloader.download_pdf_to_temp(pdf_url, "10.1101/2023.01.01.123456")
|
215
207
|
|
216
208
|
# Verify result
|
217
209
|
self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
|
@@ -263,9 +255,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
263
255
|
mock_scraper.get.return_value = ok
|
264
256
|
|
265
257
|
# Case 1: with .full.pdf -> should visit landing
|
266
|
-
pdf_url_full =
|
267
|
-
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
268
|
-
)
|
258
|
+
pdf_url_full = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
269
259
|
self.downloader.visit_landing_page_public(
|
270
260
|
mock_scraper, pdf_url_full, "10.1101/2023.01.01.123456"
|
271
261
|
)
|
@@ -324,9 +314,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
324
314
|
), # trigger exception path
|
325
315
|
]
|
326
316
|
for headers, expected, raise_regex in cases:
|
327
|
-
with self.subTest(
|
328
|
-
headers=headers, expected=expected, raise_regex=raise_regex
|
329
|
-
):
|
317
|
+
with self.subTest(headers=headers, expected=expected, raise_regex=raise_regex):
|
330
318
|
resp = Mock()
|
331
319
|
resp.headers = headers
|
332
320
|
if raise_regex:
|
@@ -339,18 +327,14 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
339
327
|
"get_default_filename",
|
340
328
|
return_value="default.pdf",
|
341
329
|
):
|
342
|
-
got = self.downloader.extract_filename_public(
|
343
|
-
resp, "10.1101/test"
|
344
|
-
)
|
330
|
+
got = self.downloader.extract_filename_public(resp, "10.1101/test")
|
345
331
|
else:
|
346
332
|
with patch.object(
|
347
333
|
self.downloader,
|
348
334
|
"get_default_filename",
|
349
335
|
return_value="default.pdf",
|
350
336
|
):
|
351
|
-
got = self.downloader.extract_filename_public(
|
352
|
-
resp, "10.1101/test"
|
353
|
-
)
|
337
|
+
got = self.downloader.extract_filename_public(resp, "10.1101/test")
|
354
338
|
self.assertEqual(got, expected)
|
355
339
|
|
356
340
|
def test_extract_paper_metadata_success(self):
|
@@ -396,18 +380,14 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
396
380
|
self.assertEqual(result["URL"], "")
|
397
381
|
self.assertEqual(result["pdf_url"], "")
|
398
382
|
self.assertEqual(result["temp_file_path"], "")
|
399
|
-
self.assertEqual(
|
400
|
-
result["filename"], "10_1101_2023_01_01_123456.pdf"
|
401
|
-
) # Default filename
|
383
|
+
self.assertEqual(result["filename"], "10_1101_2023_01_01_123456.pdf") # Default filename
|
402
384
|
|
403
385
|
def test_extract_paper_metadata_no_collection(self):
|
404
386
|
"""Test metadata extraction with missing collection."""
|
405
387
|
metadata = {}
|
406
388
|
|
407
389
|
with self.assertRaises(RuntimeError) as context:
|
408
|
-
self.downloader.extract_paper_metadata(
|
409
|
-
metadata, "10.1101/2023.01.01.123456", None
|
410
|
-
)
|
390
|
+
self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
|
411
391
|
|
412
392
|
self.assertIn("No collection data found", str(context.exception))
|
413
393
|
|
@@ -415,9 +395,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
415
395
|
"""Test basic metadata extraction helper method."""
|
416
396
|
paper = self.sample_json_response["collection"][0]
|
417
397
|
|
418
|
-
result = self.downloader.extract_basic_metadata_public(
|
419
|
-
paper, "10.1101/2023.01.01.123456"
|
420
|
-
)
|
398
|
+
result = self.downloader.extract_basic_metadata_public(paper, "10.1101/2023.01.01.123456")
|
421
399
|
|
422
400
|
expected = {
|
423
401
|
"Title": "Test BioRxiv Paper",
|
@@ -468,9 +446,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
468
446
|
"""Test _add_service_identifier method."""
|
469
447
|
entry = {}
|
470
448
|
|
471
|
-
self.downloader.add_service_identifier_public(
|
472
|
-
entry, "10.1101/2023.01.01.123456"
|
473
|
-
)
|
449
|
+
self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
|
474
450
|
|
475
451
|
self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
|
476
452
|
self.assertEqual(entry["server"], "biorxiv")
|
@@ -561,9 +537,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
|
|
561
537
|
pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
|
562
538
|
|
563
539
|
# Step 4: Extract metadata
|
564
|
-
paper_data = self.downloader.extract_paper_metadata(
|
565
|
-
metadata, identifier, pdf_result
|
566
|
-
)
|
540
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
|
567
541
|
|
568
542
|
# Verify the complete workflow
|
569
543
|
self.assertEqual(paper_data["Title"], "Integration Test Paper")
|
@@ -571,9 +545,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
|
|
571
545
|
self.assertEqual(paper_data["access_type"], "open_access_downloaded")
|
572
546
|
self.assertEqual(paper_data["temp_file_path"], "/tmp/integration.pdf")
|
573
547
|
|
574
|
-
expected_pdf_url =
|
575
|
-
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
576
|
-
)
|
548
|
+
expected_pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
577
549
|
self.assertEqual(pdf_url, expected_pdf_url)
|
578
550
|
|
579
551
|
# Verify 3 calls: metadata, landing page, PDF
|
aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py}
RENAMED
@@ -89,9 +89,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
89
89
|
result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
|
90
90
|
|
91
91
|
# Verify API call - should include /medrxiv/ and /na/json
|
92
|
-
expected_url =
|
93
|
-
"https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
|
94
|
-
)
|
92
|
+
expected_url = "https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json"
|
95
93
|
mock_get.assert_called_once_with(expected_url, timeout=30)
|
96
94
|
mock_response.raise_for_status.assert_called_once()
|
97
95
|
|
@@ -133,9 +131,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
133
131
|
)
|
134
132
|
# Empty collection
|
135
133
|
self.assertEqual(
|
136
|
-
self.downloader.construct_pdf_url(
|
137
|
-
{"collection": []}, "10.1101/2023.01.01.123456"
|
138
|
-
),
|
134
|
+
self.downloader.construct_pdf_url({"collection": []}, "10.1101/2023.01.01.123456"),
|
139
135
|
"",
|
140
136
|
)
|
141
137
|
# Custom version
|
@@ -179,9 +175,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
179
175
|
"""Test metadata extraction without PDF download."""
|
180
176
|
metadata = self.sample_json_response
|
181
177
|
|
182
|
-
with patch.object(
|
183
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
184
|
-
):
|
178
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
185
179
|
result = self.downloader.extract_paper_metadata(
|
186
180
|
metadata, "10.1101/2023.01.01.123456", None
|
187
181
|
)
|
@@ -196,9 +190,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
196
190
|
metadata = {}
|
197
191
|
|
198
192
|
with self.assertRaises(RuntimeError) as context:
|
199
|
-
self.downloader.extract_paper_metadata(
|
200
|
-
metadata, "10.1101/2023.01.01.123456", None
|
201
|
-
)
|
193
|
+
self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
|
202
194
|
|
203
195
|
self.assertIn("No collection data found", str(context.exception))
|
204
196
|
|
@@ -224,9 +216,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
224
216
|
|
225
217
|
# Missing fields
|
226
218
|
paper_missing = {"title": "Test Paper"} # Missing others
|
227
|
-
got_missing = self.downloader.extract_basic_metadata_public(
|
228
|
-
paper_missing, "10.1101/test"
|
229
|
-
)
|
219
|
+
got_missing = self.downloader.extract_basic_metadata_public(paper_missing, "10.1101/test")
|
230
220
|
self.assertEqual(got_missing["Title"], "Test Paper")
|
231
221
|
self.assertEqual(got_missing["Authors"], [])
|
232
222
|
self.assertEqual(got_missing["Abstract"], "N/A")
|
@@ -261,9 +251,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
261
251
|
)
|
262
252
|
|
263
253
|
# Without result
|
264
|
-
with patch.object(
|
265
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
266
|
-
):
|
254
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
267
255
|
expected_without = {
|
268
256
|
"URL": "",
|
269
257
|
"pdf_url": "",
|
@@ -302,9 +290,7 @@ class TestMedrxivDownloader(unittest.TestCase):
|
|
302
290
|
def test_add_service_identifier(self):
|
303
291
|
"""Test _add_service_identifier method."""
|
304
292
|
entry = {}
|
305
|
-
self.downloader.add_service_identifier_public(
|
306
|
-
entry, "10.1101/2023.01.01.123456"
|
307
|
-
)
|
293
|
+
self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
|
308
294
|
self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
|
309
295
|
self.assertEqual(entry["server"], "medrxiv")
|
310
296
|
|
@@ -368,9 +354,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
368
354
|
pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
|
369
355
|
|
370
356
|
# Step 4: Extract metadata
|
371
|
-
paper_data = self.downloader.extract_paper_metadata(
|
372
|
-
metadata, identifier, pdf_result
|
373
|
-
)
|
357
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
|
374
358
|
|
375
359
|
# Verify the complete workflow
|
376
360
|
self.assertEqual(paper_data["Title"], "Integration Test Paper")
|
@@ -384,9 +368,7 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
384
368
|
"https://api.medrxiv.org/details/medrxiv/10.1101/2023.01.01.123456/na/json",
|
385
369
|
timeout=30,
|
386
370
|
)
|
387
|
-
expected_pdf_url =
|
388
|
-
"https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
|
389
|
-
)
|
371
|
+
expected_pdf_url = "https://www.medrxiv.org/content/10.1101/2023.01.01.123456v2.full.pdf"
|
390
372
|
mock_download.assert_called_once_with(expected_pdf_url, identifier)
|
391
373
|
|
392
374
|
@patch("requests.get")
|
@@ -413,25 +395,15 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
413
395
|
with self.assertRaises(RuntimeError) as context:
|
414
396
|
self.downloader.fetch_metadata(identifier)
|
415
397
|
|
416
|
-
self.assertIn(
|
417
|
-
"No collection data found in medRxiv API response", str(context.exception)
|
418
|
-
)
|
398
|
+
self.assertIn("No collection data found in medRxiv API response", str(context.exception))
|
419
399
|
|
420
400
|
@patch("requests.get")
|
421
401
|
def test_multiple_identifiers_workflow(self, mock_get):
|
422
402
|
"""Test processing multiple identifiers."""
|
423
403
|
# Mock different responses for different DOIs
|
424
404
|
responses = [
|
425
|
-
{
|
426
|
-
|
427
|
-
{"title": "Paper 1", "version": "1", "authors": "Author 1"}
|
428
|
-
]
|
429
|
-
},
|
430
|
-
{
|
431
|
-
"collection": [
|
432
|
-
{"title": "Paper 2", "version": "2", "authors": "Author 2"}
|
433
|
-
]
|
434
|
-
},
|
405
|
+
{"collection": [{"title": "Paper 1", "version": "1", "authors": "Author 1"}]},
|
406
|
+
{"collection": [{"title": "Paper 2", "version": "2", "authors": "Author 2"}]},
|
435
407
|
]
|
436
408
|
|
437
409
|
mock_responses = []
|
@@ -448,12 +420,8 @@ class TestMedrxivDownloaderIntegration(unittest.TestCase):
|
|
448
420
|
|
449
421
|
for identifier in identifiers:
|
450
422
|
metadata = self.downloader.fetch_metadata(identifier)
|
451
|
-
_ = self.downloader.construct_pdf_url(
|
452
|
-
|
453
|
-
) # ensure path covered
|
454
|
-
paper_data = self.downloader.extract_paper_metadata(
|
455
|
-
metadata, identifier, None
|
456
|
-
)
|
423
|
+
_ = self.downloader.construct_pdf_url(metadata, identifier) # ensure path covered
|
424
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, None)
|
457
425
|
results[identifier] = paper_data
|
458
426
|
|
459
427
|
# Verify both papers were processed
|
@@ -529,6 +497,4 @@ class TestMedrxivSpecialCases(unittest.TestCase):
|
|
529
497
|
# Should handle Unicode properly
|
530
498
|
self.assertEqual(result["Title"], "Título com acentos é símbolos especiais")
|
531
499
|
self.assertEqual(result["Authors"], ["José María", "François Müller"])
|
532
|
-
self.assertEqual(
|
533
|
-
result["Abstract"], "Resumo com çaracteres especiais ñ símbolos"
|
534
|
-
)
|
500
|
+
self.assertEqual(result["Abstract"], "Resumo com çaracteres especiais ñ símbolos")
|
@@ -19,7 +19,7 @@ def fixture_chunks():
|
|
19
19
|
return [
|
20
20
|
Document(
|
21
21
|
page_content=f"chunk {i}",
|
22
|
-
metadata={"paper_id": f"P{i%2}", "relevance_score": 0.9 - 0.01 * i},
|
22
|
+
metadata={"paper_id": f"P{i % 2}", "relevance_score": 0.9 - 0.01 * i},
|
23
23
|
)
|
24
24
|
for i in range(10)
|
25
25
|
]
|
@@ -27,9 +27,7 @@ def fixture_chunks():
|
|
27
27
|
|
28
28
|
def test_rerank_chunks_short_input(chunks_fixture):
|
29
29
|
"""rerank_chunks with fewer chunks than top_k should return original."""
|
30
|
-
result = rerank_chunks(
|
31
|
-
chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5
|
32
|
-
)
|
30
|
+
result = rerank_chunks(chunks_fixture[:3], "What is cancer?", config=MagicMock(), top_k=5)
|
33
31
|
assert result == chunks_fixture[:3]
|
34
32
|
|
35
33
|
|
@@ -65,9 +63,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
|
|
65
63
|
mock_config.reranker.api_key = "test_key"
|
66
64
|
mock_config.reranker.model = "test_model"
|
67
65
|
|
68
|
-
result = rerank_chunks(
|
69
|
-
chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5
|
70
|
-
)
|
66
|
+
result = rerank_chunks(chunks_fixture, "Explain mitochondria.", config=mock_config, top_k=5)
|
71
67
|
|
72
68
|
assert isinstance(result, list)
|
73
69
|
assert result == list(reversed(chunks_fixture))[:5]
|
@@ -77,9 +73,7 @@ def test_rerank_chunks_success(mock_reranker_cls, chunks_fixture):
|
|
77
73
|
|
78
74
|
|
79
75
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
|
80
|
-
def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
81
|
-
mock_reranker_cls, chunks_fixture
|
82
|
-
):
|
76
|
+
def test_rerank_chunks_reranker_fails_raises_and_calls_compress(mock_reranker_cls, chunks_fixture):
|
83
77
|
"""
|
84
78
|
If NVIDIARerank.compress_documents raises RuntimeError:
|
85
79
|
- rerank_chunks should propagate the RuntimeError
|
@@ -94,9 +88,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
|
94
88
|
mock_config.reranker.model = "reranker"
|
95
89
|
|
96
90
|
with pytest.raises(RuntimeError, match="API failure"):
|
97
|
-
rerank_chunks(
|
98
|
-
chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3
|
99
|
-
)
|
91
|
+
rerank_chunks(chunks_fixture, "How does light affect plants?", config=mock_config, top_k=3)
|
100
92
|
|
101
93
|
reranker_instance.compress_documents.assert_called_once_with(
|
102
94
|
query="How does light affect plants?", documents=chunks_fixture
|
@@ -105,9 +97,7 @@ def test_rerank_chunks_reranker_fails_raises_and_calls_compress(
|
|
105
97
|
|
106
98
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.logger")
|
107
99
|
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker.NVIDIARerank")
|
108
|
-
def test_rerank_chunks_debug_block_triggered(
|
109
|
-
mock_reranker_cls, mock_logger, chunks_fixture
|
110
|
-
):
|
100
|
+
def test_rerank_chunks_debug_block_triggered(mock_reranker_cls, mock_logger, chunks_fixture):
|
111
101
|
"""rerank_chunks should log debug info if debug logging is enabled."""
|
112
102
|
mock_logger.isEnabledFor.return_value = True
|
113
103
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Tests for the PDF batch processor module."""
|
2
2
|
|
3
3
|
from unittest.mock import MagicMock, patch
|
4
|
+
|
4
5
|
import pytest
|
5
6
|
|
6
7
|
from aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor import (
|
@@ -22,18 +23,14 @@ def _args_fixture():
|
|
22
23
|
}
|
23
24
|
|
24
25
|
|
25
|
-
@patch(
|
26
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
27
|
-
)
|
26
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
28
27
|
def test_no_papers_to_add(mock_loader, args_fixture):
|
29
28
|
"""Test case where no papers are provided to add."""
|
30
29
|
add_papers_batch(papers_to_add=[], **args_fixture)
|
31
30
|
mock_loader.assert_not_called()
|
32
31
|
|
33
32
|
|
34
|
-
@patch(
|
35
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
36
|
-
)
|
33
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
37
34
|
def test_all_papers_already_loaded(mock_loader, args_fixture):
|
38
35
|
"""Test case where all papers are already loaded."""
|
39
36
|
args_fixture["loaded_papers"].update(["p1", "p2"])
|
@@ -44,9 +41,7 @@ def test_all_papers_already_loaded(mock_loader, args_fixture):
|
|
44
41
|
mock_loader.assert_not_called()
|
45
42
|
|
46
43
|
|
47
|
-
@patch(
|
48
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
49
|
-
)
|
44
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
50
45
|
def test_successful_batch_embedding(mock_loader, args_fixture):
|
51
46
|
"""Test case where papers are successfully loaded and embedded."""
|
52
47
|
mock_loader.return_value = [
|
@@ -70,9 +65,7 @@ def test_successful_batch_embedding(mock_loader, args_fixture):
|
|
70
65
|
mock_collection.flush.assert_called()
|
71
66
|
|
72
67
|
|
73
|
-
@patch(
|
74
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
75
|
-
)
|
68
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
76
69
|
def test_empty_chunks_after_loading(mock_loader, args_fixture):
|
77
70
|
"""Test case where no chunks are returned after loading PDF."""
|
78
71
|
mock_loader.return_value = []
|
@@ -82,9 +75,7 @@ def test_empty_chunks_after_loading(mock_loader, args_fixture):
|
|
82
75
|
args_fixture["vector_store"].add_documents.assert_not_called()
|
83
76
|
|
84
77
|
|
85
|
-
@patch(
|
86
|
-
"aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf"
|
87
|
-
)
|
78
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.batch_processor.load_and_split_pdf")
|
88
79
|
def test_vector_store_insert_failure(mock_loader, args_fixture):
|
89
80
|
"""Test case where vector store insertion fails."""
|
90
81
|
mock_loader.return_value = [MagicMock(page_content="page")]
|