aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,23 +2,25 @@
|
|
2
2
|
Tool for performing multimodal subgraph extraction.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import Type, Annotated
|
6
5
|
import logging
|
7
6
|
import pickle
|
8
|
-
|
9
|
-
|
7
|
+
from typing import Annotated
|
8
|
+
|
10
9
|
import hydra
|
11
10
|
import networkx as nx
|
12
|
-
|
13
|
-
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
import torch
|
14
14
|
from langchain_core.messages import ToolMessage
|
15
|
+
from langchain_core.tools import BaseTool
|
15
16
|
from langchain_core.tools.base import InjectedToolCallId
|
16
|
-
from langgraph.types import Command
|
17
17
|
from langgraph.prebuilt import InjectedState
|
18
|
-
import
|
18
|
+
from langgraph.types import Command
|
19
|
+
from pydantic import BaseModel, Field
|
19
20
|
from torch_geometric.data import Data
|
20
|
-
|
21
|
+
|
21
22
|
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
23
|
+
from ..utils.extractions.multimodal_pcst import MultimodalPCSTPruning
|
22
24
|
from .load_arguments import ArgumentData
|
23
25
|
|
24
26
|
# Initialize logger
|
@@ -38,14 +40,10 @@ class MultimodalSubgraphExtractionInput(BaseModel):
|
|
38
40
|
arg_data: Argument for analytical process over graph data.
|
39
41
|
"""
|
40
42
|
|
41
|
-
tool_call_id: Annotated[str, InjectedToolCallId] = Field(
|
42
|
-
description="Tool call ID."
|
43
|
-
)
|
43
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
44
44
|
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
45
45
|
prompt: str = Field(description="Prompt to interact with the backend.")
|
46
|
-
arg_data: ArgumentData = Field(
|
47
|
-
description="Experiment over graph data.", default=None
|
48
|
-
)
|
46
|
+
arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
|
49
47
|
|
50
48
|
|
51
49
|
class MultimodalSubgraphExtractionTool(BaseTool):
|
@@ -56,12 +54,11 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
56
54
|
|
57
55
|
name: str = "subgraph_extraction"
|
58
56
|
description: str = "A tool for subgraph extraction based on user's prompt."
|
59
|
-
args_schema:
|
57
|
+
args_schema: type[BaseModel] = MultimodalSubgraphExtractionInput
|
60
58
|
|
61
|
-
def _prepare_query_modalities(
|
62
|
-
|
63
|
-
|
64
|
-
pyg_graph: Data) -> pd.DataFrame:
|
59
|
+
def _prepare_query_modalities(
|
60
|
+
self, prompt_emb: list, state: Annotated[dict, InjectedState], pyg_graph: Data
|
61
|
+
) -> pd.DataFrame:
|
65
62
|
"""
|
66
63
|
Prepare the modality-specific query for subgraph extraction.
|
67
64
|
|
@@ -75,77 +72,90 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
75
72
|
"""
|
76
73
|
# Initialize dataframes
|
77
74
|
multimodal_df = pd.DataFrame({"name": []})
|
78
|
-
query_df = pd.DataFrame(
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
75
|
+
query_df = pd.DataFrame(
|
76
|
+
{
|
77
|
+
"node_id": [],
|
78
|
+
"node_type": [],
|
79
|
+
"x": [],
|
80
|
+
"desc_x": [],
|
81
|
+
"use_description": [],
|
82
|
+
}
|
83
|
+
)
|
83
84
|
|
84
85
|
# Loop over the uploaded files and find multimodal files
|
85
86
|
for i in range(len(state["uploaded_files"])):
|
86
87
|
# Check if multimodal file is uploaded
|
87
88
|
if state["uploaded_files"][i]["file_type"] == "multimodal":
|
88
89
|
# Read the Excel file
|
89
|
-
multimodal_df = pd.read_excel(
|
90
|
-
|
90
|
+
multimodal_df = pd.read_excel(
|
91
|
+
state["uploaded_files"][i]["file_path"], sheet_name=None
|
92
|
+
)
|
91
93
|
|
92
94
|
# Check if the multimodal_df is empty
|
93
95
|
if len(multimodal_df) > 0:
|
94
96
|
# Merge all obtained dataframes into a single dataframe
|
95
97
|
multimodal_df = pd.concat(multimodal_df).reset_index()
|
96
98
|
multimodal_df.drop(columns=["level_1"], inplace=True)
|
97
|
-
multimodal_df.rename(
|
98
|
-
|
99
|
+
multimodal_df.rename(
|
100
|
+
columns={"level_0": "q_node_type", "name": "q_node_name"}, inplace=True
|
101
|
+
)
|
99
102
|
# Since an excel sheet name could not contain a `/`,
|
100
103
|
# but the node type can be 'gene/protein' as exists in the PrimeKG
|
101
104
|
multimodal_df["q_node_type"] = multimodal_df.q_node_type.apply(
|
102
|
-
lambda x: x.replace(
|
105
|
+
lambda x: x.replace("-", "/")
|
103
106
|
)
|
104
107
|
|
105
108
|
# Convert PyG graph to a DataFrame for easier filtering
|
106
|
-
graph_df = pd.DataFrame(
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
109
|
+
graph_df = pd.DataFrame(
|
110
|
+
{
|
111
|
+
"node_id": pyg_graph.node_id,
|
112
|
+
"node_name": pyg_graph.node_name,
|
113
|
+
"node_type": pyg_graph.node_type,
|
114
|
+
"x": pyg_graph.x,
|
115
|
+
"desc_x": pyg_graph.desc_x.tolist(),
|
116
|
+
}
|
117
|
+
)
|
113
118
|
|
114
119
|
# Make a query dataframe by merging the graph_df and multimodal_df
|
115
|
-
query_df = graph_df.merge(multimodal_df, how=
|
120
|
+
query_df = graph_df.merge(multimodal_df, how="cross")
|
116
121
|
query_df = query_df[
|
117
122
|
query_df.apply(
|
118
|
-
lambda x:
|
119
|
-
(x[
|
120
|
-
|
121
|
-
axis=1
|
123
|
+
lambda x: (x["q_node_name"].lower() in x["node_name"].lower()) # node name
|
124
|
+
& (x["node_type"] == x["q_node_type"]), # node type
|
125
|
+
axis=1,
|
122
126
|
)
|
123
127
|
]
|
124
|
-
query_df = query_df[[
|
125
|
-
query_df[
|
128
|
+
query_df = query_df[["node_id", "node_type", "x", "desc_x"]].reset_index(drop=True)
|
129
|
+
query_df["use_description"] = False # set to False for modal-specific embeddings
|
126
130
|
|
127
131
|
# Update the state by adding the the selected node IDs
|
128
132
|
state["selections"] = query_df.groupby("node_type")["node_id"].apply(list).to_dict()
|
129
133
|
|
130
134
|
# Append a user prompt to the query dataframe
|
131
|
-
query_df = pd.concat(
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
135
|
+
query_df = pd.concat(
|
136
|
+
[
|
137
|
+
query_df,
|
138
|
+
pd.DataFrame(
|
139
|
+
{
|
140
|
+
"node_id": "user_prompt",
|
141
|
+
"node_type": "prompt",
|
142
|
+
"x": prompt_emb,
|
143
|
+
"desc_x": prompt_emb,
|
144
|
+
"use_description": True, # set to True for user prompt embedding
|
145
|
+
}
|
146
|
+
),
|
147
|
+
]
|
148
|
+
).reset_index(drop=True)
|
141
149
|
|
142
150
|
return query_df
|
143
151
|
|
144
|
-
def _perform_subgraph_extraction(
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
152
|
+
def _perform_subgraph_extraction(
|
153
|
+
self,
|
154
|
+
state: Annotated[dict, InjectedState],
|
155
|
+
cfg: dict,
|
156
|
+
pyg_graph: Data,
|
157
|
+
query_df: pd.DataFrame,
|
158
|
+
) -> dict:
|
149
159
|
"""
|
150
160
|
Perform multimodal subgraph extraction based on modal-specific embeddings.
|
151
161
|
|
@@ -176,11 +186,13 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
176
186
|
num_clusters=cfg.num_clusters,
|
177
187
|
pruning=cfg.pruning,
|
178
188
|
verbosity_level=cfg.verbosity_level,
|
179
|
-
use_description=q[1][
|
180
|
-
).extract_subgraph(
|
181
|
-
|
182
|
-
|
183
|
-
|
189
|
+
use_description=q[1]["use_description"],
|
190
|
+
).extract_subgraph(
|
191
|
+
pyg_graph,
|
192
|
+
torch.tensor(q[1]["desc_x"]), # description embedding
|
193
|
+
torch.tensor(q[1]["x"]), # modal-specific embedding
|
194
|
+
q[1]["node_type"],
|
195
|
+
)
|
184
196
|
|
185
197
|
# Append the extracted subgraph to the dictionary
|
186
198
|
subgraphs["nodes"].append(subgraph["nodes"].tolist())
|
@@ -196,11 +208,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
196
208
|
|
197
209
|
return subgraphs
|
198
210
|
|
199
|
-
def _prepare_final_subgraph(
|
200
|
-
|
201
|
-
|
202
|
-
graph: dict,
|
203
|
-
cfg) -> dict:
|
211
|
+
def _prepare_final_subgraph(
|
212
|
+
self, state: Annotated[dict, InjectedState], subgraph: dict, graph: dict, cfg
|
213
|
+
) -> dict:
|
204
214
|
"""
|
205
215
|
Prepare the subgraph based on the extracted subgraph.
|
206
216
|
|
@@ -227,14 +237,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
227
237
|
# Edge features
|
228
238
|
edge_index=torch.LongTensor(
|
229
239
|
[
|
230
|
-
[
|
231
|
-
|
232
|
-
for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()
|
233
|
-
],
|
234
|
-
[
|
235
|
-
mapping[i]
|
236
|
-
for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()
|
237
|
-
],
|
240
|
+
[mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][0].tolist()],
|
241
|
+
[mapping[i] for i in graph["pyg"].edge_index[:, subgraph["edges"]][1].tolist()],
|
238
242
|
]
|
239
243
|
),
|
240
244
|
edge_attr=graph["pyg"].edge_attr[subgraph["edges"]],
|
@@ -247,8 +251,9 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
247
251
|
# Networkx DiGraph construction to be visualized in the frontend
|
248
252
|
nx_graph = nx.DiGraph()
|
249
253
|
# Add nodes with attributes
|
250
|
-
node_colors = {
|
251
|
-
|
254
|
+
node_colors = {
|
255
|
+
n: cfg.node_colors_dict[k] for k, v in state["selections"].items() for n in v
|
256
|
+
}
|
252
257
|
for n in pyg_graph.node_name:
|
253
258
|
nx_graph.add_node(n, color=node_colors.get(n, None))
|
254
259
|
|
@@ -256,7 +261,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
256
261
|
edges = zip(
|
257
262
|
pyg_graph.edge_index[0].tolist(),
|
258
263
|
pyg_graph.edge_index[1].tolist(),
|
259
|
-
pyg_graph.edge_type
|
264
|
+
pyg_graph.edge_type,
|
265
|
+
strict=False,
|
260
266
|
)
|
261
267
|
for src, dst, edge_type in edges:
|
262
268
|
nx_graph.add_edge(
|
@@ -303,7 +309,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
303
309
|
# Load hydra configuration
|
304
310
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
305
311
|
cfg = hydra.compose(
|
306
|
-
config_name="config",
|
312
|
+
config_name="config",
|
313
|
+
overrides=["tools/multimodal_subgraph_extraction=default"],
|
307
314
|
)
|
308
315
|
cfg = cfg.tools.multimodal_subgraph_extraction
|
309
316
|
|
@@ -322,20 +329,14 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
322
329
|
query_df = self._prepare_query_modalities(
|
323
330
|
[EmbeddingWithOllama(model_name=cfg.ollama_embeddings[0]).embed_query(prompt)],
|
324
331
|
state,
|
325
|
-
initial_graph["pyg"]
|
332
|
+
initial_graph["pyg"],
|
326
333
|
)
|
327
334
|
|
328
335
|
# Perform subgraph extraction
|
329
|
-
subgraphs = self._perform_subgraph_extraction(state,
|
330
|
-
cfg,
|
331
|
-
initial_graph["pyg"],
|
332
|
-
query_df)
|
336
|
+
subgraphs = self._perform_subgraph_extraction(state, cfg, initial_graph["pyg"], query_df)
|
333
337
|
|
334
338
|
# Prepare subgraph as a NetworkX graph and textualized graph
|
335
|
-
final_subgraph = self._prepare_final_subgraph(state,
|
336
|
-
subgraphs,
|
337
|
-
initial_graph,
|
338
|
-
cfg)
|
339
|
+
final_subgraph = self._prepare_final_subgraph(state, subgraphs, initial_graph, cfg)
|
339
340
|
|
340
341
|
# Prepare the dictionary of extracted graph
|
341
342
|
dic_extracted_graph = {
|
@@ -362,7 +363,8 @@ class MultimodalSubgraphExtractionTool(BaseTool):
|
|
362
363
|
|
363
364
|
# Return the updated state of the tool
|
364
365
|
return Command(
|
365
|
-
update=dic_updated_state_for_model
|
366
|
+
update=dic_updated_state_for_model
|
367
|
+
| {
|
366
368
|
# update the message history
|
367
369
|
"messages": [
|
368
370
|
ToolMessage(
|
@@ -2,29 +2,31 @@
|
|
2
2
|
Tool for performing subgraph extraction.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import Type, Annotated
|
6
5
|
import logging
|
7
6
|
import pickle
|
8
|
-
|
9
|
-
|
7
|
+
from typing import Annotated
|
8
|
+
|
10
9
|
import hydra
|
11
10
|
import networkx as nx
|
12
|
-
|
13
|
-
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
import torch
|
14
14
|
from langchain.chains.combine_documents import create_stuff_documents_chain
|
15
|
+
from langchain.chains.retrieval import create_retrieval_chain
|
16
|
+
from langchain_community.document_loaders import PyPDFLoader
|
17
|
+
from langchain_core.messages import ToolMessage
|
15
18
|
from langchain_core.prompts import ChatPromptTemplate
|
16
|
-
from langchain_core.vectorstores import InMemoryVectorStore
|
17
19
|
from langchain_core.tools import BaseTool
|
18
|
-
from langchain_core.messages import ToolMessage
|
19
20
|
from langchain_core.tools.base import InjectedToolCallId
|
20
|
-
from
|
21
|
+
from langchain_core.vectorstores import InMemoryVectorStore
|
21
22
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
22
|
-
from langgraph.types import Command
|
23
23
|
from langgraph.prebuilt import InjectedState
|
24
|
-
import
|
24
|
+
from langgraph.types import Command
|
25
|
+
from pydantic import BaseModel, Field
|
25
26
|
from torch_geometric.data import Data
|
26
|
-
|
27
|
+
|
27
28
|
from ..utils.embeddings.ollama import EmbeddingWithOllama
|
29
|
+
from ..utils.extractions.pcst import PCSTPruning
|
28
30
|
from .load_arguments import ArgumentData
|
29
31
|
|
30
32
|
# Initialize logger
|
@@ -43,14 +45,10 @@ class SubgraphExtractionInput(BaseModel):
|
|
43
45
|
arg_data: Argument for analytical process over graph data.
|
44
46
|
"""
|
45
47
|
|
46
|
-
tool_call_id: Annotated[str, InjectedToolCallId] = Field(
|
47
|
-
description="Tool call ID."
|
48
|
-
)
|
48
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
49
49
|
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
50
50
|
prompt: str = Field(description="Prompt to interact with the backend.")
|
51
|
-
arg_data: ArgumentData = Field(
|
52
|
-
description="Experiment over graph data.", default=None
|
53
|
-
)
|
51
|
+
arg_data: ArgumentData = Field(description="Experiment over graph data.", default=None)
|
54
52
|
|
55
53
|
|
56
54
|
class SubgraphExtractionTool(BaseTool):
|
@@ -61,7 +59,7 @@ class SubgraphExtractionTool(BaseTool):
|
|
61
59
|
|
62
60
|
name: str = "subgraph_extraction"
|
63
61
|
description: str = "A tool for subgraph extraction based on user's prompt."
|
64
|
-
args_schema:
|
62
|
+
args_schema: type[BaseModel] = SubgraphExtractionInput
|
65
63
|
|
66
64
|
def perform_endotype_filtering(
|
67
65
|
self,
|
@@ -98,9 +96,7 @@ class SubgraphExtractionTool(BaseTool):
|
|
98
96
|
]
|
99
97
|
)
|
100
98
|
|
101
|
-
qa_chain = create_stuff_documents_chain(
|
102
|
-
state["llm_model"], prompt_template
|
103
|
-
)
|
99
|
+
qa_chain = create_stuff_documents_chain(state["llm_model"], prompt_template)
|
104
100
|
rag_chain = create_retrieval_chain(
|
105
101
|
InMemoryVectorStore.from_documents(
|
106
102
|
documents=splits, embedding=state["embedding_model"]
|
@@ -119,16 +115,13 @@ class SubgraphExtractionTool(BaseTool):
|
|
119
115
|
|
120
116
|
# Prepare the prompt
|
121
117
|
if len(all_genes) > 0:
|
122
|
-
prompt = " ".join(
|
123
|
-
[prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)]
|
124
|
-
)
|
118
|
+
prompt = " ".join([prompt, cfg.prompt_endotype_addition, ", ".join(all_genes)])
|
125
119
|
|
126
120
|
return prompt
|
127
121
|
|
128
|
-
def prepare_final_subgraph(
|
129
|
-
|
130
|
-
|
131
|
-
textualized_graph: pd.DataFrame) -> dict:
|
122
|
+
def prepare_final_subgraph(
|
123
|
+
self, subgraph: dict, pyg_graph: Data, textualized_graph: pd.DataFrame
|
124
|
+
) -> dict:
|
132
125
|
"""
|
133
126
|
Prepare the subgraph based on the extracted subgraph.
|
134
127
|
|
@@ -153,14 +146,8 @@ class SubgraphExtractionTool(BaseTool):
|
|
153
146
|
# Edge features
|
154
147
|
edge_index=torch.LongTensor(
|
155
148
|
[
|
156
|
-
[
|
157
|
-
|
158
|
-
for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()
|
159
|
-
],
|
160
|
-
[
|
161
|
-
mapping[i]
|
162
|
-
for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()
|
163
|
-
],
|
149
|
+
[mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][0].tolist()],
|
150
|
+
[mapping[i] for i in pyg_graph.edge_index[:, subgraph["edges"]][1].tolist()],
|
164
151
|
]
|
165
152
|
),
|
166
153
|
edge_attr=pyg_graph.edge_attr[subgraph["edges"]],
|
@@ -293,7 +280,8 @@ class SubgraphExtractionTool(BaseTool):
|
|
293
280
|
|
294
281
|
# Return the updated state of the tool
|
295
282
|
return Command(
|
296
|
-
update=dic_updated_state_for_model
|
283
|
+
update=dic_updated_state_for_model
|
284
|
+
| {
|
297
285
|
# update the message history
|
298
286
|
"messages": [
|
299
287
|
ToolMessage(
|
@@ -3,16 +3,17 @@ Tool for performing subgraph summarization.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import
|
7
|
-
|
6
|
+
from typing import Annotated
|
7
|
+
|
8
|
+
import hydra
|
9
|
+
from langchain_core.messages import ToolMessage
|
8
10
|
from langchain_core.output_parsers import StrOutputParser
|
9
11
|
from langchain_core.prompts import ChatPromptTemplate
|
10
|
-
from langchain_core.messages import ToolMessage
|
11
|
-
from langchain_core.tools.base import InjectedToolCallId
|
12
12
|
from langchain_core.tools import BaseTool
|
13
|
-
from
|
13
|
+
from langchain_core.tools.base import InjectedToolCallId
|
14
14
|
from langgraph.prebuilt import InjectedState
|
15
|
-
import
|
15
|
+
from langgraph.types import Command
|
16
|
+
from pydantic import BaseModel, Field
|
16
17
|
|
17
18
|
# Initialize logger
|
18
19
|
logging.basicConfig(level=logging.INFO)
|
@@ -31,9 +32,7 @@ class SubgraphSummarizationInput(BaseModel):
|
|
31
32
|
extraction_name: Name assigned to the subgraph extraction process
|
32
33
|
"""
|
33
34
|
|
34
|
-
tool_call_id: Annotated[str, InjectedToolCallId] = Field(
|
35
|
-
description="Tool call ID."
|
36
|
-
)
|
35
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = Field(description="Tool call ID.")
|
37
36
|
state: Annotated[dict, InjectedState] = Field(description="Injected state.")
|
38
37
|
prompt: str = Field(description="Prompt to interact with the backend.")
|
39
38
|
extraction_name: str = Field(
|
@@ -51,7 +50,7 @@ class SubgraphSummarizationTool(BaseTool):
|
|
51
50
|
name: str = "subgraph_summarization"
|
52
51
|
description: str = """A tool to perform subgraph summarization over textualized graph
|
53
52
|
for responding to user's follow-up prompt(s)."""
|
54
|
-
args_schema:
|
53
|
+
args_schema: type[BaseModel] = SubgraphSummarizationInput
|
55
54
|
|
56
55
|
def _run(
|
57
56
|
self,
|
@@ -69,9 +68,7 @@ class SubgraphSummarizationTool(BaseTool):
|
|
69
68
|
prompt: The prompt to interact with the backend.
|
70
69
|
extraction_name: The name assigned to the subgraph extraction process.
|
71
70
|
"""
|
72
|
-
logger.log(
|
73
|
-
logging.INFO, "Invoking subgraph_summarization tool for %s", extraction_name
|
74
|
-
)
|
71
|
+
logger.log(logging.INFO, "Invoking subgraph_summarization tool for %s", extraction_name)
|
75
72
|
|
76
73
|
# Load hydra configuration
|
77
74
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
@@ -1,8 +1,5 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
This file is used to import all the models in the package.
|
3
|
-
|
4
|
-
|
5
|
-
from . import enrichments
|
6
|
-
from . import extractions
|
7
|
-
from . import kg_utils
|
8
|
-
from . import pubchem_utils
|
3
|
+
"""
|
4
|
+
|
5
|
+
from . import embeddings, enrichments, extractions, kg_utils, pubchem_utils
|
@@ -1,8 +1,5 @@
|
|
1
|
-
|
1
|
+
"""
|
2
2
|
This file is used to import all the models in the package.
|
3
|
-
|
4
|
-
|
5
|
-
from . import sentence_transformer
|
6
|
-
from . import huggingface
|
7
|
-
from . import ollama
|
8
|
-
from . import nim_molmim
|
3
|
+
"""
|
4
|
+
|
5
|
+
from . import embeddings, huggingface, nim_molmim, ollama, sentence_transformer
|
@@ -2,9 +2,12 @@
|
|
2
2
|
Embeddings interface from LangChain Core.
|
3
3
|
https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/embeddings/embeddings.py
|
4
4
|
"""
|
5
|
+
|
5
6
|
from abc import ABC, abstractmethod
|
7
|
+
|
6
8
|
from langchain_core.runnables.config import run_in_executor
|
7
9
|
|
10
|
+
|
8
11
|
class Embeddings(ABC):
|
9
12
|
"""Interface for embedding models.
|
10
13
|
|
@@ -32,6 +35,7 @@ class Embeddings(ABC):
|
|
32
35
|
however, implementations may choose to override the asynchronous methods with
|
33
36
|
an async native implementation for performance reasons.
|
34
37
|
"""
|
38
|
+
|
35
39
|
@abstractmethod
|
36
40
|
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
37
41
|
"""Embed search docs.
|
@@ -2,11 +2,12 @@
|
|
2
2
|
Embedding class using HuggingFace model based on LangChain Embeddings class.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import List
|
6
5
|
import torch
|
7
|
-
from transformers import AutoModel, AutoTokenizer
|
6
|
+
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
7
|
+
|
8
8
|
from .embeddings import Embeddings
|
9
9
|
|
10
|
+
|
10
11
|
class EmbeddingWithHuggingFace(Embeddings):
|
11
12
|
"""
|
12
13
|
Embedding class using HuggingFace model based on LangChain Embeddings class.
|
@@ -39,18 +40,14 @@ class EmbeddingWithHuggingFace(Embeddings):
|
|
39
40
|
# Try to load the model from HuggingFace Hub
|
40
41
|
try:
|
41
42
|
AutoConfig.from_pretrained(self.model_name)
|
42
|
-
except
|
43
|
-
raise ValueError(
|
44
|
-
f"Model {self.model_name} is not available on HuggingFace Hub."
|
45
|
-
) from e
|
43
|
+
except OSError as e:
|
44
|
+
raise ValueError(f"Model {self.model_name} is not available on HuggingFace Hub.") from e
|
46
45
|
|
47
46
|
# Load HuggingFace tokenizer and model
|
48
47
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
49
48
|
self.model_name, cache_dir=self.model_cache_dir
|
50
49
|
)
|
51
|
-
self.model = AutoModel.from_pretrained(
|
52
|
-
self.model_name, cache_dir=self.model_cache_dir
|
53
|
-
)
|
50
|
+
self.model = AutoModel.from_pretrained(self.model_name, cache_dir=self.model_cache_dir)
|
54
51
|
|
55
52
|
def meanpooling(self, output, mask) -> torch.Tensor:
|
56
53
|
"""
|
@@ -62,11 +59,11 @@ class EmbeddingWithHuggingFace(Embeddings):
|
|
62
59
|
output: The output of the model.
|
63
60
|
mask: The mask of the model.
|
64
61
|
"""
|
65
|
-
embeddings = output[0]
|
62
|
+
embeddings = output[0] # First element of model_output contains all token embeddings
|
66
63
|
mask = mask.unsqueeze(-1).expand(embeddings.size()).float()
|
67
64
|
return torch.sum(embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)
|
68
65
|
|
69
|
-
def embed_documents(self, texts:
|
66
|
+
def embed_documents(self, texts: list[str]) -> list[float]:
|
70
67
|
"""
|
71
68
|
Generate embedding for a list of input texts using HuggingFace model.
|
72
69
|
|
@@ -86,11 +83,11 @@ class EmbeddingWithHuggingFace(Embeddings):
|
|
86
83
|
return_tensors="pt",
|
87
84
|
).to(self.device)
|
88
85
|
outputs = self.model.to(self.device)(**inputs)
|
89
|
-
embeddings = self.meanpooling(outputs, inputs[
|
86
|
+
embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()
|
90
87
|
|
91
88
|
return embeddings
|
92
89
|
|
93
|
-
def embed_query(self, text: str) ->
|
90
|
+
def embed_query(self, text: str) -> list[float]:
|
94
91
|
"""
|
95
92
|
Generate embeddings for an input text using HuggingFace model.
|
96
93
|
|
@@ -109,6 +106,6 @@ class EmbeddingWithHuggingFace(Embeddings):
|
|
109
106
|
return_tensors="pt",
|
110
107
|
).to(self.device)
|
111
108
|
outputs = self.model.to(self.device)(**inputs)
|
112
|
-
embeddings = self.meanpooling(outputs, inputs[
|
109
|
+
embeddings = self.meanpooling(outputs, inputs["attention_mask"]).cpu()[0]
|
113
110
|
|
114
111
|
return embeddings
|
@@ -3,14 +3,17 @@ Embedding class using MOLMIM model from NVIDIA NIM.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
import json
|
6
|
-
|
6
|
+
|
7
7
|
import requests
|
8
|
+
|
8
9
|
from .embeddings import Embeddings
|
9
10
|
|
11
|
+
|
10
12
|
class EmbeddingWithMOLMIM(Embeddings):
|
11
13
|
"""
|
12
14
|
Embedding class using MOLMIM model from NVIDIA NIM
|
13
15
|
"""
|
16
|
+
|
14
17
|
def __init__(self, base_url: str):
|
15
18
|
"""
|
16
19
|
Initialize the EmbeddingWithMOLMIM class.
|
@@ -21,7 +24,7 @@ class EmbeddingWithMOLMIM(Embeddings):
|
|
21
24
|
# Set base URL
|
22
25
|
self.base_url = base_url
|
23
26
|
|
24
|
-
def embed_documents(self, texts:
|
27
|
+
def embed_documents(self, texts: list[str]) -> list[float]:
|
25
28
|
"""
|
26
29
|
Generate embedding for a list of SMILES strings using MOLMIM model.
|
27
30
|
|
@@ -31,16 +34,13 @@ class EmbeddingWithMOLMIM(Embeddings):
|
|
31
34
|
Returns:
|
32
35
|
The list of embeddings for the given SMILES strings.
|
33
36
|
"""
|
34
|
-
headers = {
|
35
|
-
'accept': 'application/json',
|
36
|
-
'Content-Type': 'application/json'
|
37
|
-
}
|
37
|
+
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
38
38
|
data = json.dumps({"sequences": texts})
|
39
39
|
response = requests.post(self.base_url, headers=headers, data=data, timeout=60)
|
40
40
|
embeddings = response.json()["embeddings"]
|
41
41
|
return embeddings
|
42
42
|
|
43
|
-
def embed_query(self, text: str) ->
|
43
|
+
def embed_query(self, text: str) -> list[float]:
|
44
44
|
"""
|
45
45
|
Generate embeddings for an input query using MOLMIM model.
|
46
46
|
|