aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,13 +2,15 @@
|
|
2
2
|
Exctraction of multimodal subgraph using Prize-Collecting Steiner Tree (PCST) algorithm.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import
|
5
|
+
from typing import NamedTuple
|
6
|
+
|
6
7
|
import numpy as np
|
7
8
|
import pandas as pd
|
8
|
-
import torch
|
9
9
|
import pcst_fast
|
10
|
+
import torch
|
10
11
|
from torch_geometric.data.data import Data
|
11
12
|
|
13
|
+
|
12
14
|
class MultimodalPCSTPruning(NamedTuple):
|
13
15
|
"""
|
14
16
|
Prize-Collecting Steiner Tree (PCST) pruning algorithm implementation inspired by G-Retriever
|
@@ -27,6 +29,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
27
29
|
pruning: The pruning strategy to use.
|
28
30
|
verbosity_level: The verbosity level.
|
29
31
|
"""
|
32
|
+
|
30
33
|
topk: int = 3
|
31
34
|
topk_e: int = 3
|
32
35
|
cost_e: float = 0.5
|
@@ -37,10 +40,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
37
40
|
verbosity_level: int = 0
|
38
41
|
use_description: bool = False
|
39
42
|
|
40
|
-
def _compute_node_prizes(self,
|
41
|
-
graph: Data,
|
42
|
-
query_emb: torch.Tensor,
|
43
|
-
modality: str) :
|
43
|
+
def _compute_node_prizes(self, graph: Data, query_emb: torch.Tensor, modality: str):
|
44
44
|
"""
|
45
45
|
Compute the node prizes based on the cosine similarity between the query and nodes.
|
46
46
|
|
@@ -54,25 +54,28 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
54
54
|
The prizes of the nodes.
|
55
55
|
"""
|
56
56
|
# Convert PyG graph to a DataFrame
|
57
|
-
graph_df = pd.DataFrame(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
57
|
+
graph_df = pd.DataFrame(
|
58
|
+
{
|
59
|
+
"node_type": graph.node_type,
|
60
|
+
"desc_x": [x.tolist() for x in graph.desc_x],
|
61
|
+
"x": [list(x) for x in graph.x],
|
62
|
+
"score": [0.0 for _ in range(len(graph.node_id))],
|
63
|
+
}
|
64
|
+
)
|
63
65
|
|
64
66
|
# Calculate cosine similarity for text features and update the score
|
65
67
|
if self.use_description:
|
66
68
|
graph_df.loc[:, "score"] = torch.nn.CosineSimilarity(dim=-1)(
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
query_emb,
|
70
|
+
torch.tensor(list(graph_df.desc_x.values)), # Using textual description features
|
71
|
+
).tolist()
|
70
72
|
else:
|
71
|
-
graph_df.loc[graph_df["node_type"] == modality,
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
graph_df.loc[graph_df["node_type"] == modality, "score"] = torch.nn.CosineSimilarity(
|
74
|
+
dim=-1
|
75
|
+
)(
|
76
|
+
query_emb,
|
77
|
+
torch.tensor(list(graph_df[graph_df["node_type"] == modality].x.values)),
|
78
|
+
).tolist()
|
76
79
|
|
77
80
|
# Set the prizes for nodes based on the similarity scores
|
78
81
|
n_prizes = torch.tensor(graph_df.score.values, dtype=torch.float32)
|
@@ -84,9 +87,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
84
87
|
|
85
88
|
return n_prizes
|
86
89
|
|
87
|
-
def _compute_edge_prizes(self,
|
88
|
-
graph: Data,
|
89
|
-
text_emb: torch.Tensor) :
|
90
|
+
def _compute_edge_prizes(self, graph: Data, text_emb: torch.Tensor):
|
90
91
|
"""
|
91
92
|
Compute the node prizes based on the cosine similarity between the query and nodes.
|
92
93
|
|
@@ -106,20 +107,22 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
106
107
|
e_prizes[e_prizes < topk_e_values[-1]] = 0.0
|
107
108
|
last_topk_e_value = topk_e
|
108
109
|
for k in range(topk_e):
|
109
|
-
indices =
|
110
|
-
unique_prizes == topk_e_values[k]
|
111
|
-
)
|
110
|
+
indices = (
|
111
|
+
inverse_indices == (unique_prizes == topk_e_values[k]).nonzero(as_tuple=True)[0]
|
112
|
+
)
|
112
113
|
value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
|
113
114
|
e_prizes[indices] = value
|
114
115
|
last_topk_e_value = value * (1 - self.c_const)
|
115
116
|
|
116
117
|
return e_prizes
|
117
118
|
|
118
|
-
def compute_prizes(
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
def compute_prizes(
|
120
|
+
self,
|
121
|
+
graph: Data,
|
122
|
+
text_emb: torch.Tensor,
|
123
|
+
query_emb: torch.Tensor,
|
124
|
+
modality: str,
|
125
|
+
):
|
123
126
|
"""
|
124
127
|
Compute the node prizes based on the cosine similarity between the query and nodes,
|
125
128
|
as well as the edge prizes based on the cosine similarity between the query and edges.
|
@@ -144,9 +147,9 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
144
147
|
|
145
148
|
return {"nodes": n_prizes, "edges": e_prizes}
|
146
149
|
|
147
|
-
def compute_subgraph_costs(
|
148
|
-
|
149
|
-
|
150
|
+
def compute_subgraph_costs(
|
151
|
+
self, graph: Data, prizes: dict
|
152
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
150
153
|
"""
|
151
154
|
Compute the costs in constructing the subgraph proposed by G-Retriever paper.
|
152
155
|
|
@@ -204,7 +207,11 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
204
207
|
return edges_dict, prizes, costs, mapping
|
205
208
|
|
206
209
|
def get_subgraph_nodes_edges(
|
207
|
-
self,
|
210
|
+
self,
|
211
|
+
graph: Data,
|
212
|
+
vertices: np.ndarray,
|
213
|
+
edges_dict: dict,
|
214
|
+
mapping: dict,
|
208
215
|
) -> dict:
|
209
216
|
"""
|
210
217
|
Get the selected nodes and edges of the subgraph based on the vertices and edges computed
|
@@ -234,18 +241,18 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
234
241
|
subgraph_edges = np.array(subgraph_edges + virtual_edges)
|
235
242
|
edge_index = graph.edge_index[:, subgraph_edges]
|
236
243
|
subgraph_nodes = np.unique(
|
237
|
-
np.concatenate(
|
238
|
-
[subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()]
|
239
|
-
)
|
244
|
+
np.concatenate([subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()])
|
240
245
|
)
|
241
246
|
|
242
247
|
return {"nodes": subgraph_nodes, "edges": subgraph_edges}
|
243
248
|
|
244
|
-
def extract_subgraph(
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
+
def extract_subgraph(
|
250
|
+
self,
|
251
|
+
graph: Data,
|
252
|
+
text_emb: torch.Tensor,
|
253
|
+
query_emb: torch.Tensor,
|
254
|
+
modality: str,
|
255
|
+
) -> dict:
|
249
256
|
"""
|
250
257
|
Perform the Prize-Collecting Steiner Tree (PCST) algorithm to extract the subgraph.
|
251
258
|
|
@@ -268,9 +275,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
268
275
|
prizes = self.compute_prizes(graph, text_emb, query_emb, modality)
|
269
276
|
|
270
277
|
# Compute costs in constructing the subgraph
|
271
|
-
edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(
|
272
|
-
graph, prizes
|
273
|
-
)
|
278
|
+
edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(graph, prizes)
|
274
279
|
|
275
280
|
# Retrieve the subgraph using the PCST algorithm
|
276
281
|
result_vertices, result_edges = pcst_fast.pcst_fast(
|
@@ -287,6 +292,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
287
292
|
graph,
|
288
293
|
result_vertices,
|
289
294
|
{"edges": result_edges, "num_prior_edges": edges_dict["num_prior_edges"]},
|
290
|
-
mapping
|
295
|
+
mapping,
|
296
|
+
)
|
291
297
|
|
292
298
|
return subgraph
|
@@ -2,12 +2,14 @@
|
|
2
2
|
Exctraction of subgraph using Prize-Collecting Steiner Tree (PCST) algorithm.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import
|
5
|
+
from typing import NamedTuple
|
6
|
+
|
6
7
|
import numpy as np
|
7
|
-
import torch
|
8
8
|
import pcst_fast
|
9
|
+
import torch
|
9
10
|
from torch_geometric.data.data import Data
|
10
11
|
|
12
|
+
|
11
13
|
class PCSTPruning(NamedTuple):
|
12
14
|
"""
|
13
15
|
Prize-Collecting Steiner Tree (PCST) pruning algorithm implementation inspired by G-Retriever
|
@@ -26,6 +28,7 @@ class PCSTPruning(NamedTuple):
|
|
26
28
|
pruning: The pruning strategy to use.
|
27
29
|
verbosity_level: The verbosity level.
|
28
30
|
"""
|
31
|
+
|
29
32
|
topk: int = 3
|
30
33
|
topk_e: int = 3
|
31
34
|
cost_e: float = 0.5
|
@@ -76,9 +79,9 @@ class PCSTPruning(NamedTuple):
|
|
76
79
|
e_prizes[e_prizes < topk_e_values[-1]] = 0.0
|
77
80
|
last_topk_e_value = topk_e
|
78
81
|
for k in range(topk_e):
|
79
|
-
indices =
|
80
|
-
unique_prizes == topk_e_values[k]
|
81
|
-
)
|
82
|
+
indices = (
|
83
|
+
inverse_indices == (unique_prizes == topk_e_values[k]).nonzero(as_tuple=True)[0]
|
84
|
+
)
|
82
85
|
value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
|
83
86
|
e_prizes[indices] = value
|
84
87
|
last_topk_e_value = value * (1 - self.c_const)
|
@@ -87,7 +90,7 @@ class PCSTPruning(NamedTuple):
|
|
87
90
|
|
88
91
|
def compute_subgraph_costs(
|
89
92
|
self, graph: Data, prizes: dict
|
90
|
-
) ->
|
93
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
91
94
|
"""
|
92
95
|
Compute the costs in constructing the subgraph proposed by G-Retriever paper.
|
93
96
|
|
@@ -145,7 +148,11 @@ class PCSTPruning(NamedTuple):
|
|
145
148
|
return edges_dict, prizes, costs, mapping
|
146
149
|
|
147
150
|
def get_subgraph_nodes_edges(
|
148
|
-
self,
|
151
|
+
self,
|
152
|
+
graph: Data,
|
153
|
+
vertices: np.ndarray,
|
154
|
+
edges_dict: dict,
|
155
|
+
mapping: dict,
|
149
156
|
) -> dict:
|
150
157
|
"""
|
151
158
|
Get the selected nodes and edges of the subgraph based on the vertices and edges computed
|
@@ -175,9 +182,7 @@ class PCSTPruning(NamedTuple):
|
|
175
182
|
subgraph_edges = np.array(subgraph_edges + virtual_edges)
|
176
183
|
edge_index = graph.edge_index[:, subgraph_edges]
|
177
184
|
subgraph_nodes = np.unique(
|
178
|
-
np.concatenate(
|
179
|
-
[subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()]
|
180
|
-
)
|
185
|
+
np.concatenate([subgraph_nodes, edge_index[0].numpy(), edge_index[1].numpy()])
|
181
186
|
)
|
182
187
|
|
183
188
|
return {"nodes": subgraph_nodes, "edges": subgraph_edges}
|
@@ -201,9 +206,7 @@ class PCSTPruning(NamedTuple):
|
|
201
206
|
prizes = self.compute_prizes(graph, query_emb)
|
202
207
|
|
203
208
|
# Compute costs in constructing the subgraph
|
204
|
-
edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(
|
205
|
-
graph, prizes
|
206
|
-
)
|
209
|
+
edges_dict, prizes, costs, mapping = self.compute_subgraph_costs(graph, prizes)
|
207
210
|
|
208
211
|
# Retrieve the subgraph using the PCST algorithm
|
209
212
|
result_vertices, result_edges = pcst_fast.pcst_fast(
|
@@ -220,6 +223,7 @@ class PCSTPruning(NamedTuple):
|
|
220
223
|
graph,
|
221
224
|
result_vertices,
|
222
225
|
{"edges": result_edges, "num_prior_edges": edges_dict["num_prior_edges"]},
|
223
|
-
mapping
|
226
|
+
mapping,
|
227
|
+
)
|
224
228
|
|
225
229
|
return subgraph
|
@@ -1,12 +1,12 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
|
-
|
3
|
+
"""A utility module for knowledge graph operations"""
|
4
4
|
|
5
|
-
from typing import Tuple
|
6
5
|
import networkx as nx
|
7
6
|
import pandas as pd
|
8
7
|
|
9
|
-
|
8
|
+
|
9
|
+
def kg_to_df_pandas(kg: nx.DiGraph) -> tuple[pd.DataFrame, pd.DataFrame]:
|
10
10
|
"""
|
11
11
|
Convert a directed knowledge graph to a pandas DataFrame.
|
12
12
|
|
@@ -19,20 +19,17 @@ def kg_to_df_pandas(kg: nx.DiGraph) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
19
19
|
"""
|
20
20
|
|
21
21
|
# Create a pandas DataFrame of the nodes
|
22
|
-
df_nodes = pd.DataFrame.from_dict(kg.nodes, orient=
|
22
|
+
df_nodes = pd.DataFrame.from_dict(kg.nodes, orient="index")
|
23
23
|
|
24
24
|
# Create a pandas DataFrame of the edges
|
25
|
-
df_edges = nx.to_pandas_edgelist(kg,
|
26
|
-
source='node_source',
|
27
|
-
target='node_target')
|
25
|
+
df_edges = nx.to_pandas_edgelist(kg, source="node_source", target="node_target")
|
28
26
|
|
29
27
|
return df_nodes, df_edges
|
30
28
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
) -> nx.DiGraph:
|
29
|
+
|
30
|
+
def df_pandas_to_kg(
|
31
|
+
df: pd.DataFrame, df_nodes_attrs: pd.DataFrame, node_source: str, node_target: str
|
32
|
+
) -> nx.DiGraph:
|
36
33
|
"""
|
37
34
|
Convert a pandas DataFrame to a directed knowledge graph.
|
38
35
|
|
@@ -47,22 +44,24 @@ def df_pandas_to_kg(df: pd.DataFrame,
|
|
47
44
|
"""
|
48
45
|
|
49
46
|
# Assert if the columns node_source and node_target are in the df
|
50
|
-
assert node_source in df.columns, f
|
51
|
-
assert node_target in df.columns, f
|
47
|
+
assert node_source in df.columns, f"{node_source} not in df"
|
48
|
+
assert node_target in df.columns, f"{node_target} not in df"
|
52
49
|
|
53
50
|
# Assert that the nodes in the index of the df_nodes_attrs
|
54
51
|
# are present in the source and target columns of the df
|
55
|
-
assert set(df_nodes_attrs.index).issubset(set(df[node_source])
|
56
|
-
|
57
|
-
|
52
|
+
assert set(df_nodes_attrs.index).issubset(set(df[node_source]).union(set(df[node_target]))), (
|
53
|
+
"Nodes in index of df_nodes not found in df_edges"
|
54
|
+
)
|
58
55
|
|
59
56
|
# Create a knowledge graph from the dataframes
|
60
57
|
# Add edges and nodes to the knowledge graph
|
61
|
-
kg = nx.from_pandas_edgelist(
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
58
|
+
kg = nx.from_pandas_edgelist(
|
59
|
+
df,
|
60
|
+
source=node_source,
|
61
|
+
target=node_target,
|
62
|
+
create_using=nx.DiGraph,
|
63
|
+
edge_attr=True,
|
64
|
+
)
|
65
|
+
kg.add_nodes_from(df_nodes_attrs.to_dict("index").items())
|
67
66
|
|
68
67
|
return kg
|
@@ -5,13 +5,15 @@ Enrichment class for enriching PubChem IDs with their STRINGS representation.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
|
8
|
+
|
9
9
|
import hydra
|
10
|
+
import requests
|
10
11
|
|
11
12
|
# Initialize logger
|
12
13
|
logging.basicConfig(level=logging.INFO)
|
13
14
|
logger = logging.getLogger(__name__)
|
14
15
|
|
16
|
+
|
15
17
|
def cas_rn2pubchem_cid(casrn):
|
16
18
|
"""
|
17
19
|
Convert CAS RN to PubChem CID.
|
@@ -24,8 +26,7 @@ def cas_rn2pubchem_cid(casrn):
|
|
24
26
|
"""
|
25
27
|
# Load Hydra configuration for PubChem ID conversion
|
26
28
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
27
|
-
cfg = hydra.compose(config_name=
|
28
|
-
overrides=['utils/pubchem_utils=default'])
|
29
|
+
cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
|
29
30
|
cfg = cfg.utils.pubchem_utils
|
30
31
|
# Prepare the URL
|
31
32
|
pubchem_url_for_drug = f"{cfg.pubchem_casrn2cid_url}{casrn}/record/JSON"
|
@@ -43,6 +44,7 @@ def cas_rn2pubchem_cid(casrn):
|
|
43
44
|
break
|
44
45
|
return cid
|
45
46
|
|
47
|
+
|
46
48
|
def external_id2pubchem_cid(db, db_id):
|
47
49
|
"""
|
48
50
|
Convert external DB ID to PubChem CID.
|
@@ -59,8 +61,7 @@ def external_id2pubchem_cid(db, db_id):
|
|
59
61
|
"""
|
60
62
|
# Load Hydra configuration for PubChem ID conversion
|
61
63
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
62
|
-
cfg = hydra.compose(config_name=
|
63
|
-
overrides=['utils/pubchem_utils=default'])
|
64
|
+
cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
|
64
65
|
cfg = cfg.utils.pubchem_utils
|
65
66
|
# Prepare the URL
|
66
67
|
pubchem_url_for_drug = f"{cfg.pubchem_cid_base_url}/{db}/{db_id}/JSON"
|
@@ -76,6 +77,7 @@ def external_id2pubchem_cid(db, db_id):
|
|
76
77
|
break
|
77
78
|
return cid
|
78
79
|
|
80
|
+
|
79
81
|
def pubchem_cid_description(cid):
|
80
82
|
"""
|
81
83
|
Get the description of a PubChem CID.
|
@@ -88,8 +90,7 @@ def pubchem_cid_description(cid):
|
|
88
90
|
"""
|
89
91
|
# Load Hydra configuration for PubChem CID description
|
90
92
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
91
|
-
cfg = hydra.compose(config_name=
|
92
|
-
overrides=['utils/pubchem_utils=default'])
|
93
|
+
cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
|
93
94
|
cfg = cfg.utils.pubchem_utils
|
94
95
|
# Prepare the URL
|
95
96
|
pubchem_url_for_descpription = f"{cfg.pubchem_cid_description_url}/{cid}/description/JSON"
|
@@ -97,7 +98,7 @@ def pubchem_cid_description(cid):
|
|
97
98
|
response = requests.get(pubchem_url_for_descpription, timeout=60)
|
98
99
|
data = response.json()
|
99
100
|
# Extract the PubChem CID description
|
100
|
-
description =
|
101
|
-
for information in data["InformationList"][
|
102
|
-
description += information.get("Description",
|
101
|
+
description = ""
|
102
|
+
for information in data["InformationList"]["Information"]:
|
103
|
+
description += information.get("Description", "")
|
103
104
|
return description
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# syntax=docker/dockerfile:1
|
2
|
+
|
3
|
+
# Dockerfile for the talk2scholars application
|
4
|
+
# Multi-stage build for optimized image size with UV package manager
|
5
|
+
|
6
|
+
ARG BASE_IMAGE=ubuntu:24.04
|
7
|
+
ARG PYTHON_VERSION=3.12
|
8
|
+
|
9
|
+
FROM ${BASE_IMAGE} AS dev-base
|
10
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
11
|
+
build-essential \
|
12
|
+
ca-certificates \
|
13
|
+
cmake \
|
14
|
+
curl \
|
15
|
+
g++ \
|
16
|
+
libopenblas-dev \
|
17
|
+
libomp-dev \
|
18
|
+
ninja-build \
|
19
|
+
wget \
|
20
|
+
&& rm -rf /var/lib/apt/lists/*
|
21
|
+
|
22
|
+
FROM dev-base AS python-install
|
23
|
+
ARG PYTHON_VERSION=3.12
|
24
|
+
|
25
|
+
# Install Python (available in Ubuntu 24.04 default repos)
|
26
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
27
|
+
python${PYTHON_VERSION} \
|
28
|
+
python${PYTHON_VERSION}-dev \
|
29
|
+
python${PYTHON_VERSION}-venv \
|
30
|
+
python3-pip \
|
31
|
+
&& rm -rf /var/lib/apt/lists/* \
|
32
|
+
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
|
33
|
+
&& update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
|
34
|
+
|
35
|
+
FROM python-install AS uv-install
|
36
|
+
WORKDIR /app
|
37
|
+
|
38
|
+
# Install UV package manager and dependencies
|
39
|
+
COPY pyproject.toml uv.lock* ./
|
40
|
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
41
|
+
export PATH="/root/.local/bin:$PATH" && \
|
42
|
+
export UV_PROJECT_ENVIRONMENT="/opt/venv" && \
|
43
|
+
uv sync --frozen --extra dev --no-install-project --python python${PYTHON_VERSION} && \
|
44
|
+
. /opt/venv/bin/activate && \
|
45
|
+
# RAPIDS packages (commented out - will be added in future if needed)
|
46
|
+
# uv pip install \
|
47
|
+
# --extra-index-url=https://pypi.nvidia.com \
|
48
|
+
# --index-strategy unsafe-best-match \
|
49
|
+
# cudf-cu12 dask-cudf-cu12 && \
|
50
|
+
uv cache clean
|
51
|
+
|
52
|
+
FROM ${BASE_IMAGE} AS runtime
|
53
|
+
ARG PYTHON_VERSION=3.12
|
54
|
+
LABEL maintainer="talk2scholars"
|
55
|
+
LABEL version="1.0.0"
|
56
|
+
LABEL description="AI Agents for Pharma - Scholars Application"
|
57
|
+
|
58
|
+
# Install runtime dependencies
|
59
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
60
|
+
ca-certificates \
|
61
|
+
curl \
|
62
|
+
libmagic1 \
|
63
|
+
libopenblas0 \
|
64
|
+
libomp5 \
|
65
|
+
python${PYTHON_VERSION} \
|
66
|
+
&& rm -rf /var/lib/apt/lists/* \
|
67
|
+
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
|
68
|
+
&& update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
|
69
|
+
|
70
|
+
# Copy UV virtual environment from build stage
|
71
|
+
COPY --from=uv-install /opt/venv /opt/venv
|
72
|
+
|
73
|
+
# Set environment variables
|
74
|
+
ENV PATH="/opt/venv/bin:$PATH"
|
75
|
+
ENV PYTHONPATH="/app"
|
76
|
+
ENV PYTHONUNBUFFERED=1
|
77
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
78
|
+
ENV STREAMLIT_SERVER_HEADLESS=true
|
79
|
+
ENV STREAMLIT_SERVER_ENABLE_CORS=false
|
80
|
+
|
81
|
+
# Set working directory and create necessary directories
|
82
|
+
WORKDIR /app
|
83
|
+
|
84
|
+
# Copy application code
|
85
|
+
COPY aiagents4pharma/talk2scholars /app/aiagents4pharma/talk2scholars
|
86
|
+
COPY docs /app/docs
|
87
|
+
COPY app /app/app
|
88
|
+
|
89
|
+
# Copy and set up the entrypoint script (commented out - will be added in future if needed)
|
90
|
+
# COPY aiagents4pharma/talk2knowledgegraphs/entrypoint.sh /usr/local/bin/entrypoint.sh
|
91
|
+
# RUN chmod +x /usr/local/bin/entrypoint.sh
|
92
|
+
|
93
|
+
# Health check for production monitoring
|
94
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
95
|
+
CMD curl -f http://localhost:8501/health || exit 1
|
96
|
+
|
97
|
+
# Expose the default Streamlit port
|
98
|
+
EXPOSE 8501
|
99
|
+
|
100
|
+
# Set the entrypoint (commented out - will be added in future if needed)
|
101
|
+
# ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
102
|
+
|
103
|
+
# Default command (can be overridden)
|
104
|
+
CMD ["streamlit", "run", "/app/app/frontend/streamlit_app_talk2scholars.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
@@ -0,0 +1 @@
|
|
1
|
+
Please check out the README file in the root folder for more information.
|
@@ -2,11 +2,7 @@
|
|
2
2
|
This file is used to import all the modules in the package.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import main_agent
|
6
|
-
from . import s2_agent
|
7
|
-
from . import paper_download_agent
|
8
|
-
from . import zotero_agent
|
9
|
-
from . import pdf_agent
|
5
|
+
from . import main_agent, paper_download_agent, pdf_agent, s2_agent, zotero_agent
|
10
6
|
|
11
7
|
__all__ = [
|
12
8
|
"main_agent",
|
@@ -12,15 +12,17 @@ Functions:
|
|
12
12
|
"""
|
13
13
|
|
14
14
|
import logging
|
15
|
+
|
15
16
|
import hydra
|
16
|
-
from langgraph_supervisor import create_supervisor
|
17
|
-
from langchain_openai import ChatOpenAI
|
18
17
|
from langchain_core.language_models.chat_models import BaseChatModel
|
18
|
+
from langchain_openai import ChatOpenAI
|
19
19
|
from langgraph.checkpoint.memory import MemorySaver
|
20
|
+
from langgraph_supervisor import create_supervisor
|
21
|
+
|
22
|
+
from ..agents.paper_download_agent import get_app as get_app_paper_download
|
23
|
+
from ..agents.pdf_agent import get_app as get_app_pdf
|
20
24
|
from ..agents.s2_agent import get_app as get_app_s2
|
21
25
|
from ..agents.zotero_agent import get_app as get_app_zotero
|
22
|
-
from ..agents.pdf_agent import get_app as get_app_pdf
|
23
|
-
from ..agents.paper_download_agent import get_app as get_app_paper_download
|
24
26
|
from ..state.state_talk2scholars import Talk2Scholars
|
25
27
|
|
26
28
|
# Initialize logger
|
@@ -5,17 +5,18 @@ paper details and PDFs. It is part of the Talk2Scholars project.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
from typing import Any
|
8
|
+
from typing import Any
|
9
|
+
|
9
10
|
import hydra
|
10
11
|
from langchain_core.language_models.chat_models import BaseChatModel
|
12
|
+
from langgraph.checkpoint.memory import MemorySaver
|
11
13
|
from langgraph.graph import START, StateGraph
|
12
14
|
from langgraph.prebuilt.chat_agent_executor import create_react_agent
|
13
15
|
from langgraph.prebuilt.tool_node import ToolNode
|
14
|
-
|
16
|
+
|
15
17
|
from ..state.state_talk2scholars import Talk2Scholars
|
16
18
|
from ..tools.paper_download.paper_downloader import download_papers
|
17
19
|
|
18
|
-
|
19
20
|
# Initialize logger
|
20
21
|
logging.basicConfig(level=logging.INFO)
|
21
22
|
logger = logging.getLogger(__name__)
|
@@ -67,7 +68,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
67
68
|
checkpointer=MemorySaver(),
|
68
69
|
)
|
69
70
|
|
70
|
-
def paper_download_agent_node(state: Talk2Scholars) ->
|
71
|
+
def paper_download_agent_node(state: Talk2Scholars) -> dict[str, Any]:
|
71
72
|
"""
|
72
73
|
Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
|
73
74
|
"""
|
@@ -12,11 +12,13 @@ Usage:
|
|
12
12
|
"""
|
13
13
|
|
14
14
|
import logging
|
15
|
+
|
15
16
|
import hydra
|
16
17
|
from langchain_core.language_models.chat_models import BaseChatModel
|
17
|
-
from langgraph.graph import START, StateGraph
|
18
|
-
from langgraph.prebuilt import create_react_agent, ToolNode
|
19
18
|
from langgraph.checkpoint.memory import MemorySaver
|
19
|
+
from langgraph.graph import START, StateGraph
|
20
|
+
from langgraph.prebuilt import ToolNode, create_react_agent
|
21
|
+
|
20
22
|
from ..state.state_talk2scholars import Talk2Scholars
|
21
23
|
from ..tools.pdf.question_and_answer import question_and_answer
|
22
24
|
|
@@ -5,7 +5,7 @@ Agent for interacting with Semantic Scholar
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
from typing import Any
|
8
|
+
from typing import Any
|
9
9
|
|
10
10
|
import hydra
|
11
11
|
from langchain_core.language_models.chat_models import BaseChatModel
|
@@ -54,7 +54,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
54
54
|
>>> result = app.invoke(initial_state)
|
55
55
|
"""
|
56
56
|
|
57
|
-
def s2_agent_node(state: Talk2Scholars) ->
|
57
|
+
def s2_agent_node(state: Talk2Scholars) -> dict[str, Any]:
|
58
58
|
"""
|
59
59
|
Processes the user query and retrieves relevant research papers.
|
60
60
|
|