aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -4,11 +4,13 @@ Unit tests for S2 tools functionality.
|
|
4
4
|
|
5
5
|
import json
|
6
6
|
from types import SimpleNamespace
|
7
|
+
|
8
|
+
import hydra
|
7
9
|
import pytest
|
8
10
|
import requests
|
9
|
-
from langgraph.types import Command
|
10
11
|
from langchain_core.messages import ToolMessage
|
11
|
-
import
|
12
|
+
from langgraph.types import Command
|
13
|
+
|
12
14
|
from aiagents4pharma.talk2scholars.tools.s2.multi_paper_rec import (
|
13
15
|
get_multi_paper_recommendations,
|
14
16
|
)
|
@@ -162,9 +164,7 @@ def dummy_requests_post_exception(url, headers, params, data, timeout):
|
|
162
164
|
def patch_hydra(monkeypatch):
|
163
165
|
"""Patch Hydra's initialize and compose functions to return dummy objects."""
|
164
166
|
# Patch hydra.initialize to return our dummy context manager.
|
165
|
-
monkeypatch.setattr(
|
166
|
-
hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
|
167
|
-
)
|
167
|
+
monkeypatch.setattr(hydra, "initialize", lambda version_base, config_path: DummyHydraContext())
|
168
168
|
# Patch hydra.compose to return our dummy config.
|
169
169
|
monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_config)
|
170
170
|
|
@@ -3,13 +3,14 @@ Unit tests for S2 tools functionality.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from unittest.mock import patch
|
6
|
+
|
6
7
|
import pytest
|
7
8
|
from langgraph.types import Command
|
9
|
+
|
8
10
|
from ..tools.s2.retrieve_semantic_scholar_paper_id import (
|
9
11
|
retrieve_semantic_scholar_paper_id,
|
10
12
|
)
|
11
13
|
|
12
|
-
|
13
14
|
# Fixed test data for deterministic results
|
14
15
|
MOCK_SEARCH_RESPONSE = {
|
15
16
|
"data": [
|
@@ -3,11 +3,13 @@ Unit tests for S2 tools functionality.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from types import SimpleNamespace
|
6
|
-
|
6
|
+
|
7
7
|
import hydra
|
8
|
+
import pytest
|
8
9
|
import requests
|
9
|
-
from langgraph.types import Command
|
10
10
|
from langchain_core.messages import ToolMessage
|
11
|
+
from langgraph.types import Command
|
12
|
+
|
11
13
|
from aiagents4pharma.talk2scholars.tools.s2.search import search_tool
|
12
14
|
from aiagents4pharma.talk2scholars.tools.s2.utils import search_helper
|
13
15
|
|
@@ -147,9 +149,7 @@ def dummy_requests_get_exception(url, params, timeout):
|
|
147
149
|
def patch_hydra(monkeypatch):
|
148
150
|
"""hydra patch to mock initialize and compose functions."""
|
149
151
|
# Patch hydra.initialize to return our dummy context manager.
|
150
|
-
monkeypatch.setattr(
|
151
|
-
hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
|
152
|
-
)
|
152
|
+
monkeypatch.setattr(hydra, "initialize", lambda version_base, config_path: DummyHydraContext())
|
153
153
|
# Patch hydra.compose to return our dummy config.
|
154
154
|
monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_config)
|
155
155
|
|
@@ -3,11 +3,13 @@ Unit tests for S2 tools functionality.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from types import SimpleNamespace
|
6
|
+
|
7
|
+
import hydra
|
6
8
|
import pytest
|
7
9
|
import requests
|
8
|
-
import hydra
|
9
|
-
from langgraph.types import Command
|
10
10
|
from langchain_core.messages import ToolMessage
|
11
|
+
from langgraph.types import Command
|
12
|
+
|
11
13
|
from aiagents4pharma.talk2scholars.tools.s2.single_paper_rec import (
|
12
14
|
get_single_paper_recommendations,
|
13
15
|
)
|
@@ -156,9 +158,7 @@ def dummy_requests_get_exception(url, params, timeout):
|
|
156
158
|
@pytest.fixture(autouse=True)
|
157
159
|
def patch_hydra(monkeypatch):
|
158
160
|
"""Patch Hydra's initialize and compose functions with dummy implementations."""
|
159
|
-
monkeypatch.setattr(
|
160
|
-
hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
|
161
|
-
)
|
161
|
+
monkeypatch.setattr(hydra, "initialize", lambda version_base, config_path: DummyHydraContext())
|
162
162
|
# Patch hydra.compose to return our dummy config.
|
163
163
|
monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_config)
|
164
164
|
|
aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py}
RENAMED
@@ -81,7 +81,10 @@ class TestArxivDownloader(unittest.TestCase):
|
|
81
81
|
<name>Jane Smith</name>
|
82
82
|
</author>
|
83
83
|
<link href="http://arxiv.org/abs/1234.5678v1" rel="alternate" type="text/html"/>
|
84
|
-
<link href="http://arxiv.org/pdf/1234.5678v1.pdf"
|
84
|
+
<link href="http://arxiv.org/pdf/1234.5678v1.pdf"
|
85
|
+
rel="related"
|
86
|
+
type="application/pdf"
|
87
|
+
title="pdf"/>
|
85
88
|
</entry>
|
86
89
|
</feed>"""
|
87
90
|
|
@@ -104,8 +107,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
104
107
|
|
105
108
|
# Verify API call - it uses query string format, not params
|
106
109
|
expected_url = (
|
107
|
-
"http://export.arxiv.org/api/query?search_query="
|
108
|
-
"id:1234.5678&start=0&max_results=1"
|
110
|
+
"http://export.arxiv.org/api/query?search_query=id:1234.5678&start=0&max_results=1"
|
109
111
|
)
|
110
112
|
mock_get.assert_called_once_with(expected_url, timeout=30)
|
111
113
|
mock_response.raise_for_status.assert_called_once()
|
@@ -201,9 +203,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
201
203
|
metadata = ET.fromstring(self.sample_xml)
|
202
204
|
pdf_result = ("/tmp/test.pdf", "test_paper.pdf")
|
203
205
|
|
204
|
-
result = self.downloader.extract_paper_metadata(
|
205
|
-
metadata, "1234.5678", pdf_result
|
206
|
-
)
|
206
|
+
result = self.downloader.extract_paper_metadata(metadata, "1234.5678", pdf_result)
|
207
207
|
|
208
208
|
# Verify extracted metadata
|
209
209
|
expected_metadata = {
|
@@ -226,9 +226,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
226
226
|
"""Test metadata extraction without PDF download."""
|
227
227
|
metadata = ET.fromstring(self.sample_xml)
|
228
228
|
|
229
|
-
with patch.object(
|
230
|
-
self.downloader, "get_default_filename", return_value="1234.5678.pdf"
|
231
|
-
):
|
229
|
+
with patch.object(self.downloader, "get_default_filename", return_value="1234.5678.pdf"):
|
232
230
|
result = self.downloader.extract_paper_metadata(metadata, "1234.5678", None)
|
233
231
|
|
234
232
|
# Verify metadata without PDF
|
@@ -275,9 +273,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
275
273
|
# Case 1: Title present
|
276
274
|
metadata1 = ET.fromstring(self.sample_xml)
|
277
275
|
entry1 = metadata1.find("atom:entry", ns)
|
278
|
-
self.assertEqual(
|
279
|
-
self.downloader.extract_title_public(entry1, ns), "Test Paper Title"
|
280
|
-
)
|
276
|
+
self.assertEqual(self.downloader.extract_title_public(entry1, ns), "Test Paper Title")
|
281
277
|
|
282
278
|
# Case 2: Title missing
|
283
279
|
xml_no_title = """<?xml version="1.0" encoding="UTF-8"?>
|
@@ -346,9 +342,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
346
342
|
)
|
347
343
|
|
348
344
|
# Without result
|
349
|
-
with patch.object(
|
350
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
351
|
-
):
|
345
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
352
346
|
expected_without = {
|
353
347
|
"URL": "",
|
354
348
|
"pdf_url": "",
|
@@ -365,9 +359,7 @@ class TestArxivDownloader(unittest.TestCase):
|
|
365
359
|
"""Service name, identifier name, and default filename helpers."""
|
366
360
|
self.assertEqual(self.downloader.get_service_name(), "arXiv")
|
367
361
|
self.assertEqual(self.downloader.get_identifier_name(), "arXiv ID")
|
368
|
-
self.assertEqual(
|
369
|
-
self.downloader.get_default_filename("1234.5678"), "1234.5678.pdf"
|
370
|
-
)
|
362
|
+
self.assertEqual(self.downloader.get_default_filename("1234.5678"), "1234.5678.pdf")
|
371
363
|
|
372
364
|
def test_get_paper_identifier_info(self):
|
373
365
|
"""Test _get_paper_identifier_info method."""
|
@@ -411,7 +403,10 @@ class TestArxivDownloaderIntegration(unittest.TestCase):
|
|
411
403
|
<author>
|
412
404
|
<name>Test Author</name>
|
413
405
|
</author>
|
414
|
-
<link href="http://arxiv.org/pdf/1234.5678v1.pdf"
|
406
|
+
<link href="http://arxiv.org/pdf/1234.5678v1.pdf"
|
407
|
+
rel="related"
|
408
|
+
type="application/pdf"
|
409
|
+
title="pdf"/>
|
415
410
|
</entry>
|
416
411
|
</feed>"""
|
417
412
|
|
@@ -446,9 +441,7 @@ class TestArxivDownloaderIntegration(unittest.TestCase):
|
|
446
441
|
pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
|
447
442
|
|
448
443
|
# Step 4: Extract metadata
|
449
|
-
paper_data = self.downloader.extract_paper_metadata(
|
450
|
-
metadata, identifier, pdf_result
|
451
|
-
)
|
444
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
|
452
445
|
|
453
446
|
results[identifier] = paper_data
|
454
447
|
|
@@ -464,9 +457,7 @@ class TestArxivDownloaderIntegration(unittest.TestCase):
|
|
464
457
|
|
465
458
|
# Verify method calls
|
466
459
|
mock_get.assert_called_once()
|
467
|
-
mock_download.assert_called_once_with(
|
468
|
-
"http://arxiv.org/pdf/1234.5678v1.pdf", "1234.5678"
|
469
|
-
)
|
460
|
+
mock_download.assert_called_once_with("http://arxiv.org/pdf/1234.5678v1.pdf", "1234.5678")
|
470
461
|
|
471
462
|
@patch("requests.get")
|
472
463
|
def test_error_handling_workflow(self, mock_get):
|
@@ -3,11 +3,11 @@ Unit tests for BasePaperDownloader.
|
|
3
3
|
Tests the abstract base class functionality and common methods.
|
4
4
|
"""
|
5
5
|
|
6
|
+
import inspect
|
6
7
|
import unittest
|
7
|
-
from typing import Any
|
8
|
+
from typing import Any
|
8
9
|
from unittest.mock import Mock, patch
|
9
10
|
|
10
|
-
import inspect
|
11
11
|
import requests
|
12
12
|
|
13
13
|
from aiagents4pharma.talk2scholars.tools.paper_download.utils.base_paper_downloader import (
|
@@ -31,8 +31,8 @@ class ConcretePaperDownloader(BasePaperDownloader):
|
|
31
31
|
return f"https://test.com/{identifier}.pdf"
|
32
32
|
|
33
33
|
def extract_paper_metadata(
|
34
|
-
self, metadata: Any, identifier: str, pdf_result:
|
35
|
-
) ->
|
34
|
+
self, metadata: Any, identifier: str, pdf_result: tuple[str, str] | None
|
35
|
+
) -> dict[str, Any]:
|
36
36
|
"""Concrete implementation for testing."""
|
37
37
|
return {
|
38
38
|
"Title": f"Test Paper {identifier}",
|
@@ -53,21 +53,19 @@ class ConcretePaperDownloader(BasePaperDownloader):
|
|
53
53
|
"""Concrete implementation for testing."""
|
54
54
|
return f"test_{identifier}.pdf"
|
55
55
|
|
56
|
-
def _get_paper_identifier_info(self, paper:
|
56
|
+
def _get_paper_identifier_info(self, paper: dict[str, Any]) -> str:
|
57
57
|
"""Concrete implementation for testing."""
|
58
58
|
return f" ({paper.get('identifier', 'unknown')})"
|
59
59
|
|
60
|
-
def _add_service_identifier(self, entry:
|
60
|
+
def _add_service_identifier(self, entry: dict[str, Any], identifier: str) -> None:
|
61
61
|
"""Concrete implementation for testing."""
|
62
62
|
entry["test_id"] = identifier
|
63
63
|
|
64
|
-
def get_paper_identifier_info_public(self, paper:
|
64
|
+
def get_paper_identifier_info_public(self, paper: dict[str, Any]) -> str:
|
65
65
|
"""Public wrapper to access protected identifier info for tests."""
|
66
66
|
return self._get_paper_identifier_info(paper)
|
67
67
|
|
68
|
-
def add_service_identifier_public(
|
69
|
-
self, entry: Dict[str, Any], identifier: str
|
70
|
-
) -> None:
|
68
|
+
def add_service_identifier_public(self, entry: dict[str, Any], identifier: str) -> None:
|
71
69
|
"""Public wrapper to access protected service identifier for tests."""
|
72
70
|
self._add_service_identifier(entry, identifier)
|
73
71
|
|
@@ -109,9 +107,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
109
107
|
mock_response = Mock()
|
110
108
|
mock_response.raise_for_status = Mock()
|
111
109
|
mock_response.iter_content.return_value = [b"PDF chunk 1", b"PDF chunk 2"]
|
112
|
-
mock_response.headers = {
|
113
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
114
|
-
}
|
110
|
+
mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
115
111
|
mock_get.return_value = mock_response
|
116
112
|
|
117
113
|
# Mock temporary file
|
@@ -121,9 +117,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
121
117
|
mock_temp_file.__exit__ = Mock(return_value=None)
|
122
118
|
mock_tempfile.return_value = mock_temp_file
|
123
119
|
|
124
|
-
result = self.downloader.download_pdf_to_temp(
|
125
|
-
"https://test.com/paper.pdf", "12345"
|
126
|
-
)
|
120
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
127
121
|
|
128
122
|
# Verify result
|
129
123
|
self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
|
@@ -153,9 +147,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
153
147
|
"""Test PDF download with network error."""
|
154
148
|
mock_get.side_effect = requests.RequestException("Network error")
|
155
149
|
|
156
|
-
result = self.downloader.download_pdf_to_temp(
|
157
|
-
"https://test.com/paper.pdf", "12345"
|
158
|
-
)
|
150
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
159
151
|
|
160
152
|
self.assertIsNone(result)
|
161
153
|
|
@@ -235,12 +227,8 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
235
227
|
raise requests.RequestException("Fetch failed")
|
236
228
|
return {"test": identifier}
|
237
229
|
|
238
|
-
with patch.object(
|
239
|
-
self.downloader, "
|
240
|
-
):
|
241
|
-
with patch.object(
|
242
|
-
self.downloader, "download_pdf_to_temp", return_value=None
|
243
|
-
):
|
230
|
+
with patch.object(self.downloader, "fetch_metadata", side_effect=mock_fetch_metadata):
|
231
|
+
with patch.object(self.downloader, "download_pdf_to_temp", return_value=None):
|
244
232
|
result = self.downloader.process_identifiers(identifiers)
|
245
233
|
|
246
234
|
# Valid identifier should succeed
|
@@ -316,11 +304,11 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
316
304
|
"""Test building summary with long list (should show only top 3)."""
|
317
305
|
article_data = {}
|
318
306
|
for i in range(5): # More than 3
|
319
|
-
article_data[f"{i+1}"] = {
|
320
|
-
"Title": f"Paper {i+1}",
|
321
|
-
"identifier": f"{i+1}",
|
307
|
+
article_data[f"{i + 1}"] = {
|
308
|
+
"Title": f"Paper {i + 1}",
|
309
|
+
"identifier": f"{i + 1}",
|
322
310
|
"access_type": "open_access_downloaded",
|
323
|
-
"Abstract": f"Abstract {i+1}",
|
311
|
+
"Abstract": f"Abstract {i + 1}",
|
324
312
|
}
|
325
313
|
|
326
314
|
result = self.downloader.build_summary(article_data)
|
@@ -388,9 +376,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
388
376
|
BasePaperDownloader.construct_pdf_url(self.downloader, {}, "test")
|
389
377
|
|
390
378
|
with self.assertRaises(NotImplementedError):
|
391
|
-
BasePaperDownloader.extract_paper_metadata(
|
392
|
-
self.downloader, {}, "test", None
|
393
|
-
)
|
379
|
+
BasePaperDownloader.extract_paper_metadata(self.downloader, {}, "test", None)
|
394
380
|
|
395
381
|
with self.assertRaises(NotImplementedError):
|
396
382
|
BasePaperDownloader.get_service_name(self.downloader)
|
@@ -402,15 +388,13 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
402
388
|
BasePaperDownloader.get_default_filename(self.downloader, "test")
|
403
389
|
|
404
390
|
# Protected abstract methods: call via getattr to avoid W0212 while still executing code.
|
391
|
+
method_name_1 = "_get_paper_identifier_info"
|
405
392
|
with self.assertRaises(NotImplementedError):
|
406
|
-
getattr(BasePaperDownloader,
|
407
|
-
self.downloader, {}
|
408
|
-
)
|
393
|
+
getattr(BasePaperDownloader, method_name_1)(self.downloader, {})
|
409
394
|
|
395
|
+
method_name_2 = "_add_service_identifier"
|
410
396
|
with self.assertRaises(NotImplementedError):
|
411
|
-
getattr(BasePaperDownloader, "
|
412
|
-
self.downloader, {}, "test"
|
413
|
-
)
|
397
|
+
getattr(BasePaperDownloader, method_name_2)(self.downloader, {}, "test")
|
414
398
|
|
415
399
|
@patch("tempfile.NamedTemporaryFile")
|
416
400
|
@patch("requests.get")
|
@@ -420,9 +404,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
420
404
|
mock_response = Mock()
|
421
405
|
mock_response.raise_for_status = Mock()
|
422
406
|
mock_response.iter_content.return_value = [b"PDF data"]
|
423
|
-
mock_response.headers = {
|
424
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
425
|
-
}
|
407
|
+
mock_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
426
408
|
mock_get.return_value = mock_response
|
427
409
|
|
428
410
|
# Mock temporary file
|
@@ -434,9 +416,7 @@ class TestBasePaperDownloader(unittest.TestCase):
|
|
434
416
|
|
435
417
|
# Patch re.search to raise an exception during filename extraction
|
436
418
|
with patch("re.search", side_effect=requests.RequestException("Regex error")):
|
437
|
-
result = self.downloader.download_pdf_to_temp(
|
438
|
-
"https://test.com/paper.pdf", "12345"
|
439
|
-
)
|
419
|
+
result = self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
440
420
|
|
441
421
|
# Should still succeed but use default filename due to exception
|
442
422
|
self.assertEqual(result, ("/tmp/test.pdf", "test_12345.pdf"))
|
@@ -507,9 +487,7 @@ class TestBasePaperDownloaderEdgeCases(unittest.TestCase):
|
|
507
487
|
mock_temp_file.__exit__ = Mock(return_value=None)
|
508
488
|
mock_tempfile.return_value = mock_temp_file
|
509
489
|
|
510
|
-
with patch.object(
|
511
|
-
self.downloader, "get_default_filename", return_value="default.pdf"
|
512
|
-
):
|
490
|
+
with patch.object(self.downloader, "get_default_filename", return_value="default.pdf"):
|
513
491
|
# Call without assigning to avoid 'unused-variable'
|
514
492
|
self.downloader.download_pdf_to_temp("https://test.com/paper.pdf", "12345")
|
515
493
|
|
aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py}
RENAMED
@@ -122,9 +122,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
122
122
|
result = self.downloader.fetch_metadata("10.1101/2023.01.01.123456")
|
123
123
|
|
124
124
|
# Verify API call
|
125
|
-
expected_url =
|
126
|
-
"https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
|
127
|
-
)
|
125
|
+
expected_url = "https://api.biorxiv.org/details/biorxiv/10.1101/2023.01.01.123456/na/json"
|
128
126
|
mock_scraper.get.assert_called_once_with(expected_url, timeout=30)
|
129
127
|
mock_response.raise_for_status.assert_called_once()
|
130
128
|
|
@@ -171,9 +169,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
171
169
|
# Default version
|
172
170
|
meta_default = {"collection": [{"title": "Test Paper"}]}
|
173
171
|
self.assertEqual(
|
174
|
-
self.downloader.construct_pdf_url(
|
175
|
-
meta_default, "10.1101/2023.01.01.123456"
|
176
|
-
),
|
172
|
+
self.downloader.construct_pdf_url(meta_default, "10.1101/2023.01.01.123456"),
|
177
173
|
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf",
|
178
174
|
)
|
179
175
|
|
@@ -195,9 +191,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
195
191
|
b"PDF content chunk 1",
|
196
192
|
b"PDF content chunk 2",
|
197
193
|
]
|
198
|
-
mock_pdf_response.headers = {
|
199
|
-
"Content-Disposition": 'attachment; filename="paper.pdf"'
|
200
|
-
}
|
194
|
+
mock_pdf_response.headers = {"Content-Disposition": 'attachment; filename="paper.pdf"'}
|
201
195
|
|
202
196
|
mock_scraper.get.side_effect = [mock_landing_response, mock_pdf_response]
|
203
197
|
|
@@ -209,9 +203,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
209
203
|
mock_tempfile.return_value = mock_temp_file
|
210
204
|
|
211
205
|
pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
212
|
-
result = self.downloader.download_pdf_to_temp(
|
213
|
-
pdf_url, "10.1101/2023.01.01.123456"
|
214
|
-
)
|
206
|
+
result = self.downloader.download_pdf_to_temp(pdf_url, "10.1101/2023.01.01.123456")
|
215
207
|
|
216
208
|
# Verify result
|
217
209
|
self.assertEqual(result, ("/tmp/test.pdf", "paper.pdf"))
|
@@ -263,9 +255,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
263
255
|
mock_scraper.get.return_value = ok
|
264
256
|
|
265
257
|
# Case 1: with .full.pdf -> should visit landing
|
266
|
-
pdf_url_full =
|
267
|
-
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
268
|
-
)
|
258
|
+
pdf_url_full = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
269
259
|
self.downloader.visit_landing_page_public(
|
270
260
|
mock_scraper, pdf_url_full, "10.1101/2023.01.01.123456"
|
271
261
|
)
|
@@ -324,9 +314,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
324
314
|
), # trigger exception path
|
325
315
|
]
|
326
316
|
for headers, expected, raise_regex in cases:
|
327
|
-
with self.subTest(
|
328
|
-
headers=headers, expected=expected, raise_regex=raise_regex
|
329
|
-
):
|
317
|
+
with self.subTest(headers=headers, expected=expected, raise_regex=raise_regex):
|
330
318
|
resp = Mock()
|
331
319
|
resp.headers = headers
|
332
320
|
if raise_regex:
|
@@ -339,18 +327,14 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
339
327
|
"get_default_filename",
|
340
328
|
return_value="default.pdf",
|
341
329
|
):
|
342
|
-
got = self.downloader.extract_filename_public(
|
343
|
-
resp, "10.1101/test"
|
344
|
-
)
|
330
|
+
got = self.downloader.extract_filename_public(resp, "10.1101/test")
|
345
331
|
else:
|
346
332
|
with patch.object(
|
347
333
|
self.downloader,
|
348
334
|
"get_default_filename",
|
349
335
|
return_value="default.pdf",
|
350
336
|
):
|
351
|
-
got = self.downloader.extract_filename_public(
|
352
|
-
resp, "10.1101/test"
|
353
|
-
)
|
337
|
+
got = self.downloader.extract_filename_public(resp, "10.1101/test")
|
354
338
|
self.assertEqual(got, expected)
|
355
339
|
|
356
340
|
def test_extract_paper_metadata_success(self):
|
@@ -396,18 +380,14 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
396
380
|
self.assertEqual(result["URL"], "")
|
397
381
|
self.assertEqual(result["pdf_url"], "")
|
398
382
|
self.assertEqual(result["temp_file_path"], "")
|
399
|
-
self.assertEqual(
|
400
|
-
result["filename"], "10_1101_2023_01_01_123456.pdf"
|
401
|
-
) # Default filename
|
383
|
+
self.assertEqual(result["filename"], "10_1101_2023_01_01_123456.pdf") # Default filename
|
402
384
|
|
403
385
|
def test_extract_paper_metadata_no_collection(self):
|
404
386
|
"""Test metadata extraction with missing collection."""
|
405
387
|
metadata = {}
|
406
388
|
|
407
389
|
with self.assertRaises(RuntimeError) as context:
|
408
|
-
self.downloader.extract_paper_metadata(
|
409
|
-
metadata, "10.1101/2023.01.01.123456", None
|
410
|
-
)
|
390
|
+
self.downloader.extract_paper_metadata(metadata, "10.1101/2023.01.01.123456", None)
|
411
391
|
|
412
392
|
self.assertIn("No collection data found", str(context.exception))
|
413
393
|
|
@@ -415,9 +395,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
415
395
|
"""Test basic metadata extraction helper method."""
|
416
396
|
paper = self.sample_json_response["collection"][0]
|
417
397
|
|
418
|
-
result = self.downloader.extract_basic_metadata_public(
|
419
|
-
paper, "10.1101/2023.01.01.123456"
|
420
|
-
)
|
398
|
+
result = self.downloader.extract_basic_metadata_public(paper, "10.1101/2023.01.01.123456")
|
421
399
|
|
422
400
|
expected = {
|
423
401
|
"Title": "Test BioRxiv Paper",
|
@@ -468,9 +446,7 @@ class TestBiorxivDownloader(unittest.TestCase):
|
|
468
446
|
"""Test _add_service_identifier method."""
|
469
447
|
entry = {}
|
470
448
|
|
471
|
-
self.downloader.add_service_identifier_public(
|
472
|
-
entry, "10.1101/2023.01.01.123456"
|
473
|
-
)
|
449
|
+
self.downloader.add_service_identifier_public(entry, "10.1101/2023.01.01.123456")
|
474
450
|
|
475
451
|
self.assertEqual(entry["DOI"], "10.1101/2023.01.01.123456")
|
476
452
|
self.assertEqual(entry["server"], "biorxiv")
|
@@ -561,9 +537,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
|
|
561
537
|
pdf_result = self.downloader.download_pdf_to_temp(pdf_url, identifier)
|
562
538
|
|
563
539
|
# Step 4: Extract metadata
|
564
|
-
paper_data = self.downloader.extract_paper_metadata(
|
565
|
-
metadata, identifier, pdf_result
|
566
|
-
)
|
540
|
+
paper_data = self.downloader.extract_paper_metadata(metadata, identifier, pdf_result)
|
567
541
|
|
568
542
|
# Verify the complete workflow
|
569
543
|
self.assertEqual(paper_data["Title"], "Integration Test Paper")
|
@@ -571,9 +545,7 @@ class TestBiorxivDownloaderIntegration(unittest.TestCase):
|
|
571
545
|
self.assertEqual(paper_data["access_type"], "open_access_downloaded")
|
572
546
|
self.assertEqual(paper_data["temp_file_path"], "/tmp/integration.pdf")
|
573
547
|
|
574
|
-
expected_pdf_url =
|
575
|
-
"https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
576
|
-
)
|
548
|
+
expected_pdf_url = "https://www.biorxiv.org/content/10.1101/2023.01.01.123456v1.full.pdf"
|
577
549
|
self.assertEqual(pdf_url, expected_pdf_url)
|
578
550
|
|
579
551
|
# Verify 3 calls: metadata, landing page, PDF
|