aiagents4pharma 1.43.0__py3-none-any.whl → 1.45.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml +17 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +736 -413
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +442 -42
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +10 -6
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +245 -205
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +218 -81
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/METADATA +115 -50
- aiagents4pharma-1.45.0.dist-info/RECORD +324 -0
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.43.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.43.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.43.0.dist-info → aiagents4pharma-1.45.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ Generate an answer for a question using retrieved chunks of documents.
|
|
4
4
|
|
5
5
|
import logging
|
6
6
|
import os
|
7
|
-
from typing import Any
|
7
|
+
from typing import Any
|
8
8
|
|
9
9
|
import hydra
|
10
10
|
from langchain_core.documents import Document
|
@@ -32,7 +32,7 @@ def load_hydra_config() -> Any:
|
|
32
32
|
|
33
33
|
|
34
34
|
def _build_context_and_sources(
|
35
|
-
retrieved_chunks:
|
35
|
+
retrieved_chunks: list[Document],
|
36
36
|
) -> tuple[str, set[str]]:
|
37
37
|
"""
|
38
38
|
Build the combined context string and set of paper_ids from retrieved chunks.
|
@@ -61,10 +61,10 @@ def _build_context_and_sources(
|
|
61
61
|
|
62
62
|
def generate_answer(
|
63
63
|
question: str,
|
64
|
-
retrieved_chunks:
|
64
|
+
retrieved_chunks: list[Document],
|
65
65
|
llm_model: BaseChatModel,
|
66
66
|
config: Any,
|
67
|
-
) ->
|
67
|
+
) -> dict[str, Any]:
|
68
68
|
"""
|
69
69
|
Generate an answer for a question using retrieved chunks.
|
70
70
|
|
@@ -37,14 +37,10 @@ def get_vectorstore(
|
|
37
37
|
with _cache_lock:
|
38
38
|
if force_new and collection_name in _vectorstore_cache:
|
39
39
|
del _vectorstore_cache[collection_name]
|
40
|
-
logger.info(
|
41
|
-
"Forced new Vectorstore instance for collection: %s", collection_name
|
42
|
-
)
|
40
|
+
logger.info("Forced new Vectorstore instance for collection: %s", collection_name)
|
43
41
|
|
44
42
|
if collection_name not in _vectorstore_cache:
|
45
|
-
logger.info(
|
46
|
-
"Creating new Vectorstore instance for collection: %s", collection_name
|
47
|
-
)
|
43
|
+
logger.info("Creating new Vectorstore instance for collection: %s", collection_name)
|
48
44
|
_vectorstore_cache[collection_name] = Vectorstore(
|
49
45
|
embedding_model=embedding_model, config=config
|
50
46
|
)
|
@@ -5,7 +5,7 @@ Handle COSINE -> IP conversion for GPU indexes
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
import subprocess
|
8
|
-
from typing import
|
8
|
+
from typing import Any
|
9
9
|
|
10
10
|
logger = logging.getLogger(__name__)
|
11
11
|
|
@@ -56,7 +56,7 @@ def detect_nvidia_gpu(config=None) -> bool:
|
|
56
56
|
|
57
57
|
def get_optimal_index_config(
|
58
58
|
has_gpu: bool, embedding_dim: int = 768, use_cosine: bool = True
|
59
|
-
) ->
|
59
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
60
60
|
"""
|
61
61
|
Get optimal index and search parameters based on GPU availability.
|
62
62
|
|
@@ -80,9 +80,7 @@ def get_optimal_index_config(
|
|
80
80
|
"GPU indexes don't support COSINE distance. "
|
81
81
|
"Vectors will be normalized and IP distance will be used instead."
|
82
82
|
)
|
83
|
-
metric_type =
|
84
|
-
"IP" # Inner Product for normalized vectors = cosine similarity
|
85
|
-
)
|
83
|
+
metric_type = "IP" # Inner Product for normalized vectors = cosine similarity
|
86
84
|
else:
|
87
85
|
metric_type = "IP" # Default to IP for GPU
|
88
86
|
|
@@ -120,9 +118,7 @@ def get_optimal_index_config(
|
|
120
118
|
"index_type": "IVF_FLAT",
|
121
119
|
"metric_type": metric_type,
|
122
120
|
"params": {
|
123
|
-
"nlist": min(
|
124
|
-
1024, max(64, embedding_dim // 8)
|
125
|
-
) # Dynamic nlist based on dimension
|
121
|
+
"nlist": min(1024, max(64, embedding_dim // 8)) # Dynamic nlist based on dimension
|
126
122
|
},
|
127
123
|
}
|
128
124
|
|
@@ -136,7 +132,7 @@ def get_optimal_index_config(
|
|
136
132
|
|
137
133
|
|
138
134
|
def log_index_configuration(
|
139
|
-
index_params:
|
135
|
+
index_params: dict[str, Any], search_params: dict[str, Any], use_cosine: bool = True
|
140
136
|
) -> None:
|
141
137
|
"""Log the selected index configuration for debugging."""
|
142
138
|
index_type = index_params.get("index_type", "Unknown")
|
@@ -5,7 +5,7 @@ Rerank chunks instead of papers following traditional RAG pipeline
|
|
5
5
|
|
6
6
|
import logging
|
7
7
|
import os
|
8
|
-
from typing import Any
|
8
|
+
from typing import Any
|
9
9
|
|
10
10
|
from langchain_core.documents import Document
|
11
11
|
from langchain_nvidia_ai_endpoints import NVIDIARerank
|
@@ -18,8 +18,8 @@ logger.setLevel(getattr(logging, log_level))
|
|
18
18
|
|
19
19
|
|
20
20
|
def rerank_chunks(
|
21
|
-
chunks:
|
22
|
-
) ->
|
21
|
+
chunks: list[Document], query: str, config: Any, top_k: int = 25
|
22
|
+
) -> list[Document]:
|
23
23
|
"""
|
24
24
|
Rerank chunks by relevance to the query using NVIDIA's reranker.
|
25
25
|
|
@@ -68,7 +68,7 @@ def rerank_chunks(
|
|
68
68
|
# Log chunk metadata for debugging
|
69
69
|
logger.debug(
|
70
70
|
"Reranking chunks from papers: %s",
|
71
|
-
list(
|
71
|
+
list({chunk.metadata.get("paper_id", "unknown") for chunk in chunks})[:5],
|
72
72
|
)
|
73
73
|
|
74
74
|
# Rerank the chunks
|
@@ -3,7 +3,7 @@ Paper loading utilities for managing PDF documents in vector store.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any
|
6
|
+
from typing import Any
|
7
7
|
|
8
8
|
from .batch_processor import add_papers_batch
|
9
9
|
|
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
|
12
12
|
|
13
13
|
def load_all_papers(
|
14
14
|
vector_store: Any, # The Vectorstore instance
|
15
|
-
articles:
|
15
|
+
articles: dict[str, Any],
|
16
16
|
call_id: str,
|
17
17
|
config: Any,
|
18
18
|
has_gpu: bool,
|
@@ -3,8 +3,7 @@ RAG pipeline for retrieving and reranking chunks from a vector store.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any
|
7
|
-
|
6
|
+
from typing import Any
|
8
7
|
|
9
8
|
# Import our GPU detection utility
|
10
9
|
from .nvidia_nim_reranker import rerank_chunks
|
@@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
|
|
15
14
|
|
16
15
|
def retrieve_and_rerank_chunks(
|
17
16
|
vector_store: Any, query: str, config: Any, call_id: str, has_gpu: bool
|
18
|
-
) ->
|
17
|
+
) -> list[Any]:
|
19
18
|
"""
|
20
19
|
Traditional RAG pipeline: retrieve chunks from all papers, then rerank.
|
21
20
|
Optimized for GPU/CPU hardware.
|
@@ -38,12 +37,8 @@ def retrieve_and_rerank_chunks(
|
|
38
37
|
# Adjust initial retrieval count based on hardware
|
39
38
|
if has_gpu:
|
40
39
|
# GPU can handle larger initial retrieval efficiently
|
41
|
-
initial_chunks_count = config.get(
|
42
|
-
|
43
|
-
) # Increased for GPU
|
44
|
-
mmr_diversity = config.get(
|
45
|
-
"mmr_diversity", 0.75
|
46
|
-
) # Slightly more diverse for larger sets
|
40
|
+
initial_chunks_count = config.get("initial_retrieval_k", 150) # Increased for GPU
|
41
|
+
mmr_diversity = config.get("mmr_diversity", 0.75) # Slightly more diverse for larger sets
|
47
42
|
else:
|
48
43
|
# CPU - use conservative settings
|
49
44
|
initial_chunks_count = config.get("initial_retrieval_k", 100) # Original
|
@@ -72,9 +67,7 @@ def retrieve_and_rerank_chunks(
|
|
72
67
|
"%s: Retrieved %d chunks from %d unique papers using %s",
|
73
68
|
call_id,
|
74
69
|
len(retrieved_chunks),
|
75
|
-
len(
|
76
|
-
set(chunk.metadata.get("paper_id", "unknown") for chunk in retrieved_chunks)
|
77
|
-
),
|
70
|
+
len({chunk.metadata.get("paper_id", "unknown") for chunk in retrieved_chunks}),
|
78
71
|
hardware_mode,
|
79
72
|
)
|
80
73
|
|
@@ -95,9 +88,7 @@ def retrieve_and_rerank_chunks(
|
|
95
88
|
)
|
96
89
|
|
97
90
|
# Log final results with hardware info
|
98
|
-
final_papers = len(
|
99
|
-
set(chunk.metadata.get("paper_id", "unknown") for chunk in reranked_chunks)
|
100
|
-
)
|
91
|
+
final_papers = len({chunk.metadata.get("paper_id", "unknown") for chunk in reranked_chunks})
|
101
92
|
|
102
93
|
logger.info(
|
103
94
|
"%s: Reranking complete using %s. Final %d chunks from %d unique papers",
|
@@ -6,11 +6,9 @@ With automatic GPU/CPU search parameter optimization.
|
|
6
6
|
|
7
7
|
import logging
|
8
8
|
import os
|
9
|
-
from typing import List, Optional
|
10
9
|
|
11
10
|
from langchain_core.documents import Document
|
12
11
|
|
13
|
-
|
14
12
|
# Set up logging with configurable level
|
15
13
|
log_level = os.environ.get("LOG_LEVEL", "INFO")
|
16
14
|
logging.basicConfig(level=getattr(logging, log_level))
|
@@ -21,10 +19,10 @@ logger.setLevel(getattr(logging, log_level))
|
|
21
19
|
def retrieve_relevant_chunks(
|
22
20
|
vector_store,
|
23
21
|
query: str,
|
24
|
-
paper_ids:
|
22
|
+
paper_ids: list[str] | None = None,
|
25
23
|
top_k: int = 100, # Increased default to cast wider net before reranking
|
26
24
|
mmr_diversity: float = 0.8, # Slightly reduced for better diversity
|
27
|
-
) ->
|
25
|
+
) -> list[Document]:
|
28
26
|
"""
|
29
27
|
Retrieve the most relevant chunks for a query using maximal marginal relevance.
|
30
28
|
Automatically uses GPU-optimized search parameters if GPU is available.
|
@@ -104,9 +102,7 @@ def retrieve_relevant_chunks(
|
|
104
102
|
filter=filter_dict,
|
105
103
|
)
|
106
104
|
|
107
|
-
logger.info(
|
108
|
-
"Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode
|
109
|
-
)
|
105
|
+
logger.info("Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode)
|
110
106
|
|
111
107
|
# Log some details about retrieved chunks for debugging
|
112
108
|
if results and logger.isEnabledFor(logging.DEBUG):
|
@@ -132,10 +128,10 @@ def retrieve_relevant_chunks(
|
|
132
128
|
def retrieve_relevant_chunks_with_scores(
|
133
129
|
vector_store,
|
134
130
|
query: str,
|
135
|
-
paper_ids:
|
131
|
+
paper_ids: list[str] | None = None,
|
136
132
|
top_k: int = 100,
|
137
133
|
score_threshold: float = 0.0,
|
138
|
-
) ->
|
134
|
+
) -> list[tuple[Document, float]]:
|
139
135
|
"""
|
140
136
|
Retrieve chunks with similarity scores, optimized for GPU/CPU.
|
141
137
|
|
@@ -186,9 +182,7 @@ def retrieve_relevant_chunks_with_scores(
|
|
186
182
|
)
|
187
183
|
|
188
184
|
# Filter by score threshold
|
189
|
-
filtered_results = [
|
190
|
-
(doc, score) for doc, score in results if score >= score_threshold
|
191
|
-
]
|
185
|
+
filtered_results = [(doc, score) for doc, score in results if score >= score_threshold]
|
192
186
|
|
193
187
|
logger.info(
|
194
188
|
"%s search with scores retrieved %d/%d chunks above threshold %.3f",
|
@@ -200,6 +194,4 @@ def retrieve_relevant_chunks_with_scores(
|
|
200
194
|
|
201
195
|
return filtered_results
|
202
196
|
|
203
|
-
raise NotImplementedError(
|
204
|
-
"Vector store does not support similarity_search_with_score"
|
205
|
-
)
|
197
|
+
raise NotImplementedError("Vector store does not support similarity_search_with_score")
|
@@ -6,7 +6,7 @@ Handles connection reuse, event loops, and GPU detection caching.
|
|
6
6
|
import asyncio
|
7
7
|
import logging
|
8
8
|
import threading
|
9
|
-
from typing import Any
|
9
|
+
from typing import Any
|
10
10
|
|
11
11
|
from langchain_core.embeddings import Embeddings
|
12
12
|
from langchain_milvus import Milvus
|
@@ -105,7 +105,7 @@ class VectorstoreSingleton:
|
|
105
105
|
self,
|
106
106
|
collection_name: str,
|
107
107
|
embedding_model: Embeddings,
|
108
|
-
connection_args:
|
108
|
+
connection_args: dict[str, Any],
|
109
109
|
) -> Milvus:
|
110
110
|
"""Get or create a vector store for a collection."""
|
111
111
|
if collection_name not in self._vector_stores:
|
@@ -3,8 +3,7 @@ Helper class for question and answer tool in PDF processing.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any
|
7
|
-
|
6
|
+
from typing import Any
|
8
7
|
|
9
8
|
from .get_vectorstore import get_vectorstore
|
10
9
|
|
@@ -29,7 +28,7 @@ class QAToolHelper:
|
|
29
28
|
self.call_id = call_id
|
30
29
|
logger.debug("QAToolHelper started call %s", call_id)
|
31
30
|
|
32
|
-
def get_state_models_and_data(self, state: dict) -> tuple[Any, Any,
|
31
|
+
def get_state_models_and_data(self, state: dict) -> tuple[Any, Any, dict[str, Any]]:
|
33
32
|
"""Retrieve embedding model, LLM, and article data from agent state."""
|
34
33
|
text_emb = state.get("text_embedding_model")
|
35
34
|
if not text_emb:
|
@@ -78,7 +77,7 @@ class QAToolHelper:
|
|
78
77
|
|
79
78
|
return vs
|
80
79
|
|
81
|
-
def get_hardware_stats(self) ->
|
80
|
+
def get_hardware_stats(self) -> dict[str, Any]:
|
82
81
|
"""Get current hardware configuration stats for monitoring."""
|
83
82
|
return {
|
84
83
|
"gpu_available": self.has_gpu,
|
@@ -5,7 +5,6 @@ and use IP (Inner Product) distance instead.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
from typing import List, Union
|
9
8
|
|
10
9
|
import numpy as np
|
11
10
|
from langchain_core.embeddings import Embeddings
|
@@ -13,7 +12,7 @@ from langchain_core.embeddings import Embeddings
|
|
13
12
|
logger = logging.getLogger(__name__)
|
14
13
|
|
15
14
|
|
16
|
-
def normalize_vector(vector:
|
15
|
+
def normalize_vector(vector: list[float] | np.ndarray) -> list[float]:
|
17
16
|
"""
|
18
17
|
Normalize a single vector to unit length.
|
19
18
|
|
@@ -34,7 +33,7 @@ def normalize_vector(vector: Union[List[float], np.ndarray]) -> List[float]:
|
|
34
33
|
return normalized.tolist()
|
35
34
|
|
36
35
|
|
37
|
-
def normalize_vectors_batch(vectors:
|
36
|
+
def normalize_vectors_batch(vectors: list[list[float]]) -> list[list[float]]:
|
38
37
|
"""
|
39
38
|
Normalize a batch of vectors to unit length.
|
40
39
|
|
@@ -56,9 +55,7 @@ def normalize_vectors_batch(vectors: List[List[float]]) -> List[List[float]]:
|
|
56
55
|
# Handle zero vectors
|
57
56
|
zero_mask = norms.flatten() == 0
|
58
57
|
if np.any(zero_mask):
|
59
|
-
logger.warning(
|
60
|
-
"Found %d zero vectors during batch normalization", np.sum(zero_mask)
|
61
|
-
)
|
58
|
+
logger.warning("Found %d zero vectors during batch normalization", np.sum(zero_mask))
|
62
59
|
norms[zero_mask] = 1.0 # Avoid division by zero
|
63
60
|
|
64
61
|
# Normalize
|
@@ -85,11 +82,9 @@ class NormalizingEmbeddings(Embeddings):
|
|
85
82
|
self.normalize_for_gpu = normalize_for_gpu
|
86
83
|
|
87
84
|
if normalize_for_gpu:
|
88
|
-
logger.info(
|
89
|
-
"Embedding model wrapped with normalization for GPU compatibility"
|
90
|
-
)
|
85
|
+
logger.info("Embedding model wrapped with normalization for GPU compatibility")
|
91
86
|
|
92
|
-
def embed_documents(self, texts:
|
87
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
93
88
|
"""Embed documents and optionally normalize."""
|
94
89
|
embeddings = self.embedding_model.embed_documents(texts)
|
95
90
|
|
@@ -99,7 +94,7 @@ class NormalizingEmbeddings(Embeddings):
|
|
99
94
|
|
100
95
|
return embeddings
|
101
96
|
|
102
|
-
def embed_query(self, text: str) ->
|
97
|
+
def embed_query(self, text: str) -> list[float]:
|
103
98
|
"""Embed query and optionally normalize."""
|
104
99
|
embedding = self.embedding_model.embed_query(text)
|
105
100
|
|
@@ -128,13 +123,9 @@ def should_normalize_vectors(has_gpu: bool, use_cosine: bool) -> bool:
|
|
128
123
|
needs_normalization = has_gpu and use_cosine
|
129
124
|
|
130
125
|
if needs_normalization:
|
131
|
-
logger.info(
|
132
|
-
"Vector normalization ENABLED: GPU detected with COSINE similarity request"
|
133
|
-
)
|
126
|
+
logger.info("Vector normalization ENABLED: GPU detected with COSINE similarity request")
|
134
127
|
else:
|
135
|
-
logger.info(
|
136
|
-
"Vector normalization DISABLED: GPU=%s, COSINE=%s", has_gpu, use_cosine
|
137
|
-
)
|
128
|
+
logger.info("Vector normalization DISABLED: GPU=%s, COSINE=%s", has_gpu, use_cosine)
|
138
129
|
|
139
130
|
return needs_normalization
|
140
131
|
|
@@ -8,7 +8,7 @@ Supports both GPU and CPU configurations.
|
|
8
8
|
import logging
|
9
9
|
import os
|
10
10
|
import time
|
11
|
-
from typing import Any
|
11
|
+
from typing import Any
|
12
12
|
|
13
13
|
from langchain_core.documents import Document
|
14
14
|
from langchain_core.embeddings import Embeddings
|
@@ -39,7 +39,7 @@ class Vectorstore:
|
|
39
39
|
def __init__(
|
40
40
|
self,
|
41
41
|
embedding_model: Embeddings,
|
42
|
-
metadata_fields:
|
42
|
+
metadata_fields: list[str] | None = None,
|
43
43
|
config: Any = None,
|
44
44
|
):
|
45
45
|
"""
|
@@ -96,12 +96,8 @@ class Vectorstore:
|
|
96
96
|
|
97
97
|
# Initialize Milvus connection parameters with environment variable fallback
|
98
98
|
self.connection_args = {
|
99
|
-
"host": (
|
100
|
-
|
101
|
-
),
|
102
|
-
"port": (
|
103
|
-
config.milvus.port if config else int(os.getenv("MILVUS_PORT", "19530"))
|
104
|
-
),
|
99
|
+
"host": (config.milvus.host if config else os.getenv("MILVUS_HOST", "127.0.0.1")),
|
100
|
+
"port": (config.milvus.port if config else int(os.getenv("MILVUS_PORT", "19530"))),
|
105
101
|
}
|
106
102
|
# Log the connection parameters being used
|
107
103
|
logger.info(
|
@@ -109,9 +105,7 @@ class Vectorstore:
|
|
109
105
|
self.connection_args["host"],
|
110
106
|
self.connection_args["port"],
|
111
107
|
)
|
112
|
-
self.collection_name =
|
113
|
-
config.milvus.collection_name if config else "pdf_rag_documents"
|
114
|
-
)
|
108
|
+
self.collection_name = config.milvus.collection_name if config else "pdf_rag_documents"
|
115
109
|
self.db_name = config.milvus.db_name if config else "pdf_rag_db"
|
116
110
|
|
117
111
|
# Get singleton instance
|
@@ -139,8 +133,8 @@ class Vectorstore:
|
|
139
133
|
self._ensure_collection_loaded()
|
140
134
|
|
141
135
|
# Store for document metadata (keeping for compatibility)
|
142
|
-
self.documents:
|
143
|
-
self.paper_metadata:
|
136
|
+
self.documents: dict[str, Document] = {}
|
137
|
+
self.paper_metadata: dict[str, dict[str, Any]] = {}
|
144
138
|
|
145
139
|
# Log final configuration
|
146
140
|
metric_info = (
|
@@ -182,9 +176,7 @@ class Vectorstore:
|
|
182
176
|
langchain_collection = getattr(self.vector_store, "collection", None)
|
183
177
|
|
184
178
|
if langchain_collection is None:
|
185
|
-
logger.warning(
|
186
|
-
"No LangChain collection found, proceeding with empty loaded_papers"
|
187
|
-
)
|
179
|
+
logger.warning("No LangChain collection found, proceeding with empty loaded_papers")
|
188
180
|
return
|
189
181
|
|
190
182
|
# Force flush and check entity count
|
@@ -204,14 +196,14 @@ class Vectorstore:
|
|
204
196
|
)
|
205
197
|
|
206
198
|
# Extract unique paper IDs
|
207
|
-
existing_paper_ids =
|
199
|
+
existing_paper_ids = {result["paper_id"] for result in results}
|
208
200
|
self.loaded_papers.update(existing_paper_ids)
|
209
201
|
|
210
202
|
logger.info("Found %d unique papers in collection", len(existing_paper_ids))
|
211
203
|
else:
|
212
204
|
logger.info("Collection is empty - no existing papers")
|
213
205
|
|
214
|
-
def similarity_search(self, query: str, **kwargs: Any) ->
|
206
|
+
def similarity_search(self, query: str, **kwargs: Any) -> list[Document]:
|
215
207
|
"""
|
216
208
|
Perform similarity search on the vector store.
|
217
209
|
Query embedding will be automatically normalized if using GPU with COSINE.
|
@@ -222,7 +214,7 @@ class Vectorstore:
|
|
222
214
|
"""
|
223
215
|
# Extract our parameters
|
224
216
|
k: int = kwargs.pop("k", 4)
|
225
|
-
filter_:
|
217
|
+
filter_: dict[str, Any] | None = kwargs.pop("filter", None)
|
226
218
|
|
227
219
|
# Build Milvus expr from filter_, if present
|
228
220
|
expr = None
|
@@ -232,22 +224,16 @@ class Vectorstore:
|
|
232
224
|
if isinstance(value, str):
|
233
225
|
conditions.append(f'{key} == "{value}"')
|
234
226
|
elif isinstance(value, list):
|
235
|
-
vals = ", ".join(
|
236
|
-
f'"{v}"' if isinstance(v, str) else str(v) for v in value
|
237
|
-
)
|
227
|
+
vals = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
|
238
228
|
conditions.append(f"{key} in [{vals}]")
|
239
229
|
else:
|
240
230
|
conditions.append(f"{key} == {value}")
|
241
231
|
expr = " and ".join(conditions)
|
242
232
|
|
243
233
|
# Delegate to the wrapped store
|
244
|
-
return self.vector_store.similarity_search(
|
245
|
-
query=query, k=k, expr=expr, **kwargs
|
246
|
-
)
|
234
|
+
return self.vector_store.similarity_search(query=query, k=k, expr=expr, **kwargs)
|
247
235
|
|
248
|
-
def max_marginal_relevance_search(
|
249
|
-
self, query: str, **kwargs: Any
|
250
|
-
) -> List[Document]:
|
236
|
+
def max_marginal_relevance_search(self, query: str, **kwargs: Any) -> list[Document]:
|
251
237
|
"""
|
252
238
|
Perform MMR search on the vector store.
|
253
239
|
Query embedding will be automatically normalized if using GPU with COSINE.
|
@@ -262,7 +248,7 @@ class Vectorstore:
|
|
262
248
|
k: int = kwargs.pop("k", 4)
|
263
249
|
fetch_k: int = kwargs.pop("fetch_k", 20)
|
264
250
|
lambda_mult: float = kwargs.pop("lambda_mult", 0.5)
|
265
|
-
filter_:
|
251
|
+
filter_: dict[str, Any] | None = kwargs.pop("filter", None)
|
266
252
|
|
267
253
|
# Build Milvus expr from filter_, if present
|
268
254
|
expr = None
|
@@ -272,9 +258,7 @@ class Vectorstore:
|
|
272
258
|
if isinstance(value, str):
|
273
259
|
conditions.append(f'{key} == "{value}"')
|
274
260
|
elif isinstance(value, list):
|
275
|
-
vals = ", ".join(
|
276
|
-
f'"{v}"' if isinstance(v, str) else str(v) for v in value
|
277
|
-
)
|
261
|
+
vals = ", ".join(f'"{v}"' if isinstance(v, str) else str(v) for v in value)
|
278
262
|
conditions.append(f"{key} in [{vals}]")
|
279
263
|
else:
|
280
264
|
conditions.append(f"{key} == {value}")
|
@@ -330,7 +314,7 @@ class Vectorstore:
|
|
330
314
|
else:
|
331
315
|
logger.info("Collection is empty, skipping load operation")
|
332
316
|
|
333
|
-
def get_embedding_info(self) ->
|
317
|
+
def get_embedding_info(self) -> dict[str, Any]:
|
334
318
|
"""Get information about the embedding configuration."""
|
335
319
|
return {
|
336
320
|
"has_gpu": self.has_gpu,
|
@@ -2,12 +2,14 @@
|
|
2
2
|
This file is used to import all the modules in the package.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
from . import (
|
6
|
+
display_dataframe,
|
7
|
+
multi_paper_rec,
|
8
|
+
query_dataframe,
|
9
|
+
retrieve_semantic_scholar_paper_id,
|
10
|
+
search,
|
11
|
+
single_paper_rec,
|
12
|
+
)
|
11
13
|
|
12
14
|
__all__ = [
|
13
15
|
"display_dataframe",
|
@@ -12,17 +12,15 @@ only displays the existing list. If no papers are available, it raises NoPapersF
|
|
12
12
|
to signal that a search or recommendation must be executed first.
|
13
13
|
"""
|
14
14
|
|
15
|
-
|
16
15
|
import logging
|
17
|
-
|
18
16
|
from typing import Annotated
|
19
|
-
|
17
|
+
|
20
18
|
from langchain_core.messages import ToolMessage
|
21
19
|
from langchain_core.tools import tool
|
22
20
|
from langchain_core.tools.base import InjectedToolCallId
|
23
21
|
from langgraph.prebuilt import InjectedState
|
24
22
|
from langgraph.types import Command
|
25
|
-
|
23
|
+
from pydantic import BaseModel, Field
|
26
24
|
|
27
25
|
# Configure logging
|
28
26
|
logging.basicConfig(level=logging.INFO)
|
@@ -97,9 +95,7 @@ def display_dataframe(
|
|
97
95
|
artifact = state.get(context_val)
|
98
96
|
if not artifact:
|
99
97
|
logger.info("No papers found in state, raising NoPapersFoundError")
|
100
|
-
raise NoPapersFoundError(
|
101
|
-
"No papers found. A search/rec needs to be performed first."
|
102
|
-
)
|
98
|
+
raise NoPapersFoundError("No papers found. A search/rec needs to be performed first.")
|
103
99
|
content = f"{len(artifact)} papers found. Papers are attached as an artifact."
|
104
100
|
return Command(
|
105
101
|
update={
|
@@ -9,14 +9,15 @@ of recommended papers.
|
|
9
9
|
"""
|
10
10
|
|
11
11
|
import logging
|
12
|
-
from typing import Annotated, Any
|
12
|
+
from typing import Annotated, Any
|
13
|
+
|
13
14
|
from langchain_core.messages import ToolMessage
|
14
15
|
from langchain_core.tools import tool
|
15
16
|
from langchain_core.tools.base import InjectedToolCallId
|
16
17
|
from langgraph.types import Command
|
17
18
|
from pydantic import BaseModel, Field
|
18
|
-
from .utils.multi_helper import MultiPaperRecData
|
19
19
|
|
20
|
+
from .utils.multi_helper import MultiPaperRecData
|
20
21
|
|
21
22
|
# Configure logging
|
22
23
|
logging.basicConfig(level=logging.INFO)
|
@@ -34,7 +35,7 @@ class MultiPaperRecInput(BaseModel):
|
|
34
35
|
tool_call_id: Internal tool call identifier injected by the system.
|
35
36
|
"""
|
36
37
|
|
37
|
-
paper_ids:
|
38
|
+
paper_ids: list[str] = Field(
|
38
39
|
description="List of 40-character Semantic Scholar Paper IDs"
|
39
40
|
"(at least two) to base recommendations on"
|
40
41
|
)
|
@@ -44,7 +45,7 @@ class MultiPaperRecInput(BaseModel):
|
|
44
45
|
ge=1,
|
45
46
|
le=500,
|
46
47
|
)
|
47
|
-
year:
|
48
|
+
year: str | None = Field(
|
48
49
|
default=None,
|
49
50
|
description="Publication year filter; supports formats:"
|
50
51
|
"'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
|
@@ -59,10 +60,10 @@ class MultiPaperRecInput(BaseModel):
|
|
59
60
|
parse_docstring=True,
|
60
61
|
)
|
61
62
|
def get_multi_paper_recommendations(
|
62
|
-
paper_ids:
|
63
|
+
paper_ids: list[str],
|
63
64
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
64
65
|
limit: int = 10,
|
65
|
-
year:
|
66
|
+
year: str | None = None,
|
66
67
|
) -> Command[Any]:
|
67
68
|
"""
|
68
69
|
Recommend related research papers using the Semantic Scholar API.
|