aiagents4pharma 1.44.0__py3-none-any.whl → 1.45.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/.dockerignore +13 -0
- aiagents4pharma/talk2aiagents4pharma/Dockerfile +105 -0
- aiagents4pharma/talk2aiagents4pharma/README.md +1 -0
- aiagents4pharma/talk2aiagents4pharma/__init__.py +4 -5
- aiagents4pharma/talk2aiagents4pharma/agents/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/agents/main_agent.py +24 -23
- aiagents4pharma/talk2aiagents4pharma/configs/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/agents/main_agent/default.yaml +2 -2
- aiagents4pharma/talk2aiagents4pharma/configs/config.yaml +1 -1
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2aiagents4pharma/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2aiagents4pharma/install.md +127 -0
- aiagents4pharma/talk2aiagents4pharma/states/__init__.py +3 -2
- aiagents4pharma/talk2aiagents4pharma/states/state_talk2aiagents4pharma.py +5 -3
- aiagents4pharma/talk2aiagents4pharma/tests/__init__.py +2 -2
- aiagents4pharma/talk2aiagents4pharma/tests/test_main_agent.py +72 -50
- aiagents4pharma/talk2biomodels/.dockerignore +13 -0
- aiagents4pharma/talk2biomodels/Dockerfile +104 -0
- aiagents4pharma/talk2biomodels/README.md +1 -0
- aiagents4pharma/talk2biomodels/__init__.py +4 -8
- aiagents4pharma/talk2biomodels/agents/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/agents/t2b_agent.py +47 -42
- aiagents4pharma/talk2biomodels/api/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/api/kegg.py +14 -10
- aiagents4pharma/talk2biomodels/api/ols.py +13 -10
- aiagents4pharma/talk2biomodels/api/uniprot.py +7 -6
- aiagents4pharma/talk2biomodels/configs/__init__.py +3 -4
- aiagents4pharma/talk2biomodels/configs/agents/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/agents/t2b_agent/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/config.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/ask_question/default.yaml +1 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/custom_plotter/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/configs/tools/get_annotation/default.yaml +1 -1
- aiagents4pharma/talk2biomodels/install.md +63 -0
- aiagents4pharma/talk2biomodels/models/__init__.py +4 -4
- aiagents4pharma/talk2biomodels/models/basico_model.py +36 -28
- aiagents4pharma/talk2biomodels/models/sys_bio_model.py +13 -10
- aiagents4pharma/talk2biomodels/states/__init__.py +3 -2
- aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py +12 -8
- aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml +1585 -0
- aiagents4pharma/talk2biomodels/tests/__init__.py +2 -2
- aiagents4pharma/talk2biomodels/tests/article_on_model_537.pdf +0 -0
- aiagents4pharma/talk2biomodels/tests/test_api.py +18 -14
- aiagents4pharma/talk2biomodels/tests/test_ask_question.py +8 -9
- aiagents4pharma/talk2biomodels/tests/test_basico_model.py +15 -9
- aiagents4pharma/talk2biomodels/tests/test_get_annotation.py +54 -55
- aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py +28 -27
- aiagents4pharma/talk2biomodels/tests/test_integration.py +21 -33
- aiagents4pharma/talk2biomodels/tests/test_load_biomodel.py +14 -11
- aiagents4pharma/talk2biomodels/tests/test_param_scan.py +21 -20
- aiagents4pharma/talk2biomodels/tests/test_query_article.py +129 -29
- aiagents4pharma/talk2biomodels/tests/test_search_models.py +9 -13
- aiagents4pharma/talk2biomodels/tests/test_simulate_model.py +16 -15
- aiagents4pharma/talk2biomodels/tests/test_steady_state.py +12 -22
- aiagents4pharma/talk2biomodels/tests/test_sys_bio_model.py +33 -29
- aiagents4pharma/talk2biomodels/tools/__init__.py +15 -12
- aiagents4pharma/talk2biomodels/tools/ask_question.py +42 -32
- aiagents4pharma/talk2biomodels/tools/custom_plotter.py +51 -43
- aiagents4pharma/talk2biomodels/tools/get_annotation.py +99 -75
- aiagents4pharma/talk2biomodels/tools/get_modelinfo.py +57 -51
- aiagents4pharma/talk2biomodels/tools/load_arguments.py +52 -32
- aiagents4pharma/talk2biomodels/tools/load_biomodel.py +8 -2
- aiagents4pharma/talk2biomodels/tools/parameter_scan.py +107 -90
- aiagents4pharma/talk2biomodels/tools/query_article.py +14 -13
- aiagents4pharma/talk2biomodels/tools/search_models.py +37 -26
- aiagents4pharma/talk2biomodels/tools/simulate_model.py +47 -37
- aiagents4pharma/talk2biomodels/tools/steady_state.py +76 -58
- aiagents4pharma/talk2biomodels/tools/utils.py +4 -3
- aiagents4pharma/talk2cells/README.md +1 -0
- aiagents4pharma/talk2cells/__init__.py +4 -5
- aiagents4pharma/talk2cells/agents/__init__.py +3 -2
- aiagents4pharma/talk2cells/agents/scp_agent.py +21 -19
- aiagents4pharma/talk2cells/states/__init__.py +3 -2
- aiagents4pharma/talk2cells/states/state_talk2cells.py +4 -2
- aiagents4pharma/talk2cells/tests/scp_agent/test_scp_agent.py +8 -9
- aiagents4pharma/talk2cells/tools/__init__.py +3 -2
- aiagents4pharma/talk2cells/tools/scp_agent/__init__.py +4 -4
- aiagents4pharma/talk2cells/tools/scp_agent/display_studies.py +5 -3
- aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py +21 -22
- aiagents4pharma/talk2knowledgegraphs/.dockerignore +13 -0
- aiagents4pharma/talk2knowledgegraphs/Dockerfile +103 -0
- aiagents4pharma/talk2knowledgegraphs/README.md +1 -0
- aiagents4pharma/talk2knowledgegraphs/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/agents/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py +40 -30
- aiagents4pharma/talk2knowledgegraphs/configs/__init__.py +3 -6
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml +8 -8
- aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py +2 -2
- aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/ols_terms/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/reactome_pathways/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/enrichments/uniprot_proteins/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/configs/utils/pubchem_utils/default.yaml +1 -1
- aiagents4pharma/talk2knowledgegraphs/datasets/__init__.py +4 -6
- aiagents4pharma/talk2knowledgegraphs/datasets/biobridge_primekg.py +115 -67
- aiagents4pharma/talk2knowledgegraphs/datasets/dataset.py +2 -0
- aiagents4pharma/talk2knowledgegraphs/datasets/primekg.py +35 -24
- aiagents4pharma/talk2knowledgegraphs/datasets/starkqa_primekg.py +29 -21
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/cpu/docker-compose.yml +93 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/.env.example +23 -0
- aiagents4pharma/talk2knowledgegraphs/docker-compose/gpu/docker-compose.yml +108 -0
- aiagents4pharma/talk2knowledgegraphs/entrypoint.sh +190 -0
- aiagents4pharma/talk2knowledgegraphs/install.md +140 -0
- aiagents4pharma/talk2knowledgegraphs/milvus_data_dump.py +31 -65
- aiagents4pharma/talk2knowledgegraphs/states/__init__.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py +65 -40
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_biobridge_primekg.py +54 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_dataset.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_primekg.py +17 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_datasets_starkqa_primekg.py +33 -24
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_graphrag_reasoning.py +116 -69
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_milvus_multimodal_subgraph_extraction.py +334 -216
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_multimodal_subgraph_extraction.py +22 -15
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_extraction.py +19 -12
- aiagents4pharma/talk2knowledgegraphs/tests/test_tools_subgraph_summarization.py +95 -48
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_huggingface.py +5 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py +13 -18
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_ollama.py +10 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py +4 -3
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py +3 -2
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ols.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py +9 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_reactome.py +6 -6
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_extractions_milvus_multimodal_pcst.py +160 -97
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_kg_utils.py +3 -4
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py +87 -13
- aiagents4pharma/talk2knowledgegraphs/tools/__init__.py +10 -7
- aiagents4pharma/talk2knowledgegraphs/tools/graphrag_reasoning.py +15 -20
- aiagents4pharma/talk2knowledgegraphs/tools/milvus_multimodal_subgraph_extraction.py +145 -142
- aiagents4pharma/talk2knowledgegraphs/tools/multimodal_subgraph_extraction.py +92 -90
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_extraction.py +25 -37
- aiagents4pharma/talk2knowledgegraphs/tools/subgraph_summarization.py +10 -13
- aiagents4pharma/talk2knowledgegraphs/utils/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py +4 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/embeddings.py +4 -0
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/huggingface.py +11 -14
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py +7 -7
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/ollama.py +12 -6
- aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py +8 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +9 -6
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ollama.py +15 -9
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py +23 -20
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py +12 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/reactome_pathways.py +16 -10
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +26 -18
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py +4 -5
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/milvus_multimodal_pcst.py +14 -34
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/multimodal_pcst.py +53 -47
- aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py +18 -14
- aiagents4pharma/talk2knowledgegraphs/utils/kg_utils.py +22 -23
- aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py +11 -10
- aiagents4pharma/talk2scholars/.dockerignore +13 -0
- aiagents4pharma/talk2scholars/Dockerfile +104 -0
- aiagents4pharma/talk2scholars/README.md +1 -0
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -5
- aiagents4pharma/talk2scholars/agents/main_agent.py +6 -4
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -2
- aiagents4pharma/talk2scholars/agents/s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +10 -11
- aiagents4pharma/talk2scholars/configs/__init__.py +1 -3
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py +1 -4
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +8 -8
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +7 -7
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +8 -6
- aiagents4pharma/talk2scholars/docker-compose/cpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/cpu/docker-compose.yml +90 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/.env.example +21 -0
- aiagents4pharma/talk2scholars/docker-compose/gpu/docker-compose.yml +105 -0
- aiagents4pharma/talk2scholars/install.md +122 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +8 -8
- aiagents4pharma/talk2scholars/tests/{test_main_agent.py → test_agents_main_agent.py} +41 -23
- aiagents4pharma/talk2scholars/tests/{test_paper_download_agent.py → test_agents_paper_agents_download_agent.py} +10 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_agent.py → test_agents_pdf_agent.py} +6 -10
- aiagents4pharma/talk2scholars/tests/{test_s2_agent.py → test_agents_s2_agent.py} +8 -16
- aiagents4pharma/talk2scholars/tests/{test_zotero_agent.py → test_agents_zotero_agent.py} +5 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_display_dataframe.py → test_s2_tools_display_dataframe.py} +6 -7
- aiagents4pharma/talk2scholars/tests/{test_s2_query_dataframe.py → test_s2_tools_query_dataframe.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_paper_downloader.py → test_tools_paper_downloader.py} +25 -63
- aiagents4pharma/talk2scholars/tests/{test_question_and_answer_tool.py → test_tools_question_and_answer_tool.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_multi.py → test_tools_s2_multi.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_retrieve.py → test_tools_s2_retrieve.py} +2 -1
- aiagents4pharma/talk2scholars/tests/{test_s2_search.py → test_tools_s2_search.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_s2_single.py → test_tools_s2_single.py} +5 -5
- aiagents4pharma/talk2scholars/tests/{test_arxiv_downloader.py → test_utils_arxiv_downloader.py} +16 -25
- aiagents4pharma/talk2scholars/tests/{test_base_paper_downloader.py → test_utils_base_paper_downloader.py} +25 -47
- aiagents4pharma/talk2scholars/tests/{test_biorxiv_downloader.py → test_utils_biorxiv_downloader.py} +14 -42
- aiagents4pharma/talk2scholars/tests/{test_medrxiv_downloader.py → test_utils_medrxiv_downloader.py} +15 -49
- aiagents4pharma/talk2scholars/tests/{test_nvidia_nim_reranker.py → test_utils_nvidia_nim_reranker.py} +6 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_answer_formatter.py → test_utils_pdf_answer_formatter.py} +1 -0
- aiagents4pharma/talk2scholars/tests/{test_pdf_batch_processor.py → test_utils_pdf_batch_processor.py} +6 -15
- aiagents4pharma/talk2scholars/tests/{test_pdf_collection_manager.py → test_utils_pdf_collection_manager.py} +34 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_document_processor.py → test_utils_pdf_document_processor.py} +2 -3
- aiagents4pharma/talk2scholars/tests/{test_pdf_generate_answer.py → test_utils_pdf_generate_answer.py} +3 -6
- aiagents4pharma/talk2scholars/tests/{test_pdf_gpu_detection.py → test_utils_pdf_gpu_detection.py} +5 -16
- aiagents4pharma/talk2scholars/tests/{test_pdf_rag_pipeline.py → test_utils_pdf_rag_pipeline.py} +7 -17
- aiagents4pharma/talk2scholars/tests/{test_pdf_retrieve_chunks.py → test_utils_pdf_retrieve_chunks.py} +4 -11
- aiagents4pharma/talk2scholars/tests/{test_pdf_singleton_manager.py → test_utils_pdf_singleton_manager.py} +26 -23
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_normalization.py → test_utils_pdf_vector_normalization.py} +1 -1
- aiagents4pharma/talk2scholars/tests/{test_pdf_vector_store.py → test_utils_pdf_vector_store.py} +27 -55
- aiagents4pharma/talk2scholars/tests/{test_pubmed_downloader.py → test_utils_pubmed_downloader.py} +31 -91
- aiagents4pharma/talk2scholars/tests/{test_read_helper_utils.py → test_utils_read_helper_utils.py} +2 -6
- aiagents4pharma/talk2scholars/tests/{test_s2_utils_ext_ids.py → test_utils_s2_utils_ext_ids.py} +5 -15
- aiagents4pharma/talk2scholars/tests/{test_zotero_human_in_the_loop.py → test_utils_zotero_human_in_the_loop.py} +6 -13
- aiagents4pharma/talk2scholars/tests/{test_zotero_path.py → test_utils_zotero_path.py} +53 -45
- aiagents4pharma/talk2scholars/tests/{test_zotero_read.py → test_utils_zotero_read.py} +30 -91
- aiagents4pharma/talk2scholars/tests/{test_zotero_write.py → test_utils_zotero_write.py} +6 -16
- aiagents4pharma/talk2scholars/tools/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/paper_download/paper_downloader.py +20 -35
- aiagents4pharma/talk2scholars/tools/paper_download/utils/__init__.py +7 -5
- aiagents4pharma/talk2scholars/tools/paper_download/utils/arxiv_downloader.py +9 -11
- aiagents4pharma/talk2scholars/tools/paper_download/utils/base_paper_downloader.py +14 -21
- aiagents4pharma/talk2scholars/tools/paper_download/utils/biorxiv_downloader.py +14 -22
- aiagents4pharma/talk2scholars/tools/paper_download/utils/medrxiv_downloader.py +11 -13
- aiagents4pharma/talk2scholars/tools/paper_download/utils/pubmed_downloader.py +14 -28
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +4 -8
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +16 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +15 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +5 -5
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +2 -6
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +5 -9
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +4 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +6 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +7 -15
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +2 -2
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +3 -4
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +8 -17
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +17 -33
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +8 -6
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +3 -7
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +7 -6
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +5 -12
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +2 -4
- aiagents4pharma/talk2scholars/tools/s2/search.py +6 -6
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py +1 -3
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +12 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +11 -18
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +11 -16
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +1 -4
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +21 -39
- aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py +2 -6
- aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py +8 -11
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +4 -12
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +13 -27
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +4 -7
- aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +8 -10
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +3 -2
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/METADATA +115 -51
- aiagents4pharma-1.45.1.dist-info/RECORD +324 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/WHEEL +1 -2
- aiagents4pharma-1.44.0.dist-info/RECORD +0 -293
- aiagents4pharma-1.44.0.dist-info/top_level.txt +0 -1
- /aiagents4pharma/talk2scholars/tests/{test_state.py → test_states_state.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_pdf_paper_loader.py → test_utils_pdf_paper_loader.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_tool_helper_utils.py → test_utils_tool_helper_utils.py} +0 -0
- /aiagents4pharma/talk2scholars/tests/{test_zotero_pdf_downloader_utils.py → test_utils_zotero_pdf_downloader_utils.py} +0 -0
- {aiagents4pharma-1.44.0.dist-info → aiagents4pharma-1.45.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,17 +2,20 @@
|
|
2
2
|
Embedding class using Ollama model based on LangChain Embeddings class.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import time
|
6
|
-
from typing import List
|
7
5
|
import subprocess
|
6
|
+
import time
|
7
|
+
|
8
8
|
import ollama
|
9
9
|
from langchain_ollama import OllamaEmbeddings
|
10
|
+
|
10
11
|
from .embeddings import Embeddings
|
11
12
|
|
13
|
+
|
12
14
|
class EmbeddingWithOllama(Embeddings):
|
13
15
|
"""
|
14
16
|
Embedding class using Ollama model based on LangChain Embeddings class.
|
15
17
|
"""
|
18
|
+
|
16
19
|
def __init__(self, model_name: str):
|
17
20
|
"""
|
18
21
|
Initialize the EmbeddingWithOllama class.
|
@@ -38,18 +41,21 @@ class EmbeddingWithOllama(Embeddings):
|
|
38
41
|
"""
|
39
42
|
try:
|
40
43
|
models_list = ollama.list()["models"]
|
41
|
-
if model_name not in [m[
|
44
|
+
if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
|
42
45
|
ollama.pull(model_name)
|
43
46
|
time.sleep(30)
|
44
47
|
raise ValueError(f"Pulled {model_name} model")
|
45
48
|
except Exception as e:
|
46
49
|
with subprocess.Popen(
|
47
|
-
"ollama serve",
|
50
|
+
"ollama serve",
|
51
|
+
shell=True,
|
52
|
+
stdout=subprocess.PIPE,
|
53
|
+
stderr=subprocess.PIPE,
|
48
54
|
):
|
49
55
|
time.sleep(10)
|
50
56
|
raise ValueError(f"Error: {e} and restarted Ollama server.") from e
|
51
57
|
|
52
|
-
def embed_documents(self, texts:
|
58
|
+
def embed_documents(self, texts: list[str]) -> list[float]:
|
53
59
|
"""
|
54
60
|
Generate embedding for a list of input texts using Ollama model.
|
55
61
|
|
@@ -65,7 +71,7 @@ class EmbeddingWithOllama(Embeddings):
|
|
65
71
|
|
66
72
|
return embeddings
|
67
73
|
|
68
|
-
def embed_query(self, text: str) ->
|
74
|
+
def embed_query(self, text: str) -> list[float]:
|
69
75
|
"""
|
70
76
|
Generate embeddings for an input text using Ollama model.
|
71
77
|
|
@@ -4,8 +4,8 @@
|
|
4
4
|
Embedding class using SentenceTransformer model based on LangChain Embeddings class.
|
5
5
|
"""
|
6
6
|
|
7
|
-
from typing import List
|
8
7
|
from sentence_transformers import SentenceTransformer
|
8
|
+
|
9
9
|
from .embeddings import Embeddings
|
10
10
|
|
11
11
|
|
@@ -35,11 +35,13 @@ class EmbeddingWithSentenceTransformer(Embeddings):
|
|
35
35
|
self.trust_remote_code = trust_remote_code
|
36
36
|
|
37
37
|
# Load the model
|
38
|
-
self.model = SentenceTransformer(
|
39
|
-
|
40
|
-
|
38
|
+
self.model = SentenceTransformer(
|
39
|
+
self.model_name,
|
40
|
+
cache_folder=self.model_cache_dir,
|
41
|
+
trust_remote_code=self.trust_remote_code,
|
42
|
+
)
|
41
43
|
|
42
|
-
def embed_documents(self, texts:
|
44
|
+
def embed_documents(self, texts: list[str]) -> list[float]:
|
43
45
|
"""
|
44
46
|
Generate embedding for a list of input texts using SentenceTransformer model.
|
45
47
|
|
@@ -55,7 +57,7 @@ class EmbeddingWithSentenceTransformer(Embeddings):
|
|
55
57
|
|
56
58
|
return embeddings
|
57
59
|
|
58
|
-
def embed_query(self, text: str) ->
|
60
|
+
def embed_query(self, text: str) -> list[float]:
|
59
61
|
"""
|
60
62
|
Generate embeddings for an input text using SentenceTransformer model.
|
61
63
|
|
@@ -1,9 +1,12 @@
|
|
1
1
|
"""
|
2
2
|
This package contains modules to use the enrichment model
|
3
3
|
"""
|
4
|
-
|
5
|
-
from . import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
|
5
|
+
from . import (
|
6
|
+
enrichments,
|
7
|
+
ollama,
|
8
|
+
ols_terms,
|
9
|
+
pubchem_strings,
|
10
|
+
reactome_pathways,
|
11
|
+
uniprot_proteins,
|
12
|
+
)
|
@@ -4,20 +4,23 @@
|
|
4
4
|
Enrichment class using Ollama model based on LangChain Enrichment class.
|
5
5
|
"""
|
6
6
|
|
7
|
-
import time
|
8
|
-
from typing import List
|
9
|
-
import subprocess
|
10
7
|
import ast
|
8
|
+
import subprocess
|
9
|
+
import time
|
10
|
+
|
11
11
|
import ollama
|
12
|
-
from langchain_ollama import ChatOllama
|
13
|
-
from langchain_core.prompts import ChatPromptTemplate
|
14
12
|
from langchain_core.output_parsers import StrOutputParser
|
13
|
+
from langchain_core.prompts import ChatPromptTemplate
|
14
|
+
from langchain_ollama import ChatOllama
|
15
|
+
|
15
16
|
from .enrichments import Enrichments
|
16
17
|
|
18
|
+
|
17
19
|
class EnrichmentWithOllama(Enrichments):
|
18
20
|
"""
|
19
21
|
Enrichment class using Ollama model based on the Enrichment abstract class.
|
20
22
|
"""
|
23
|
+
|
21
24
|
def __init__(
|
22
25
|
self,
|
23
26
|
model_name: str,
|
@@ -67,18 +70,21 @@ class EnrichmentWithOllama(Enrichments):
|
|
67
70
|
"""
|
68
71
|
try:
|
69
72
|
models_list = ollama.list()["models"]
|
70
|
-
if model_name not in [m[
|
73
|
+
if model_name not in [m["model"].replace(":latest", "") for m in models_list]:
|
71
74
|
ollama.pull(model_name)
|
72
75
|
time.sleep(30)
|
73
76
|
raise ValueError(f"Pulled {model_name} model")
|
74
77
|
except Exception as e:
|
75
78
|
with subprocess.Popen(
|
76
|
-
"ollama serve",
|
79
|
+
"ollama serve",
|
80
|
+
shell=True,
|
81
|
+
stdout=subprocess.PIPE,
|
82
|
+
stderr=subprocess.PIPE,
|
77
83
|
):
|
78
84
|
time.sleep(10)
|
79
85
|
raise ValueError(f"Error: {e} and restarted Ollama server.") from e
|
80
86
|
|
81
|
-
def enrich_documents(self, texts:
|
87
|
+
def enrich_documents(self, texts: list[str]) -> list[str]:
|
82
88
|
"""
|
83
89
|
Enrich a list of input texts with additional textual features using OLLAMA model.
|
84
90
|
Important: Make sure the input is a list of texts based on the defined prompt template
|
@@ -116,7 +122,7 @@ class EnrichmentWithOllama(Enrichments):
|
|
116
122
|
Args:
|
117
123
|
texts: The list of texts to be enriched.
|
118
124
|
docs: The list of reference documents to enrich the input texts.
|
119
|
-
|
125
|
+
|
120
126
|
Returns:
|
121
127
|
The list of enriched texts
|
122
128
|
"""
|
@@ -4,22 +4,25 @@
|
|
4
4
|
Enrichment class for enriching OLS terms with textual descriptions
|
5
5
|
"""
|
6
6
|
|
7
|
-
from typing import List
|
8
|
-
import logging
|
9
7
|
import json
|
8
|
+
import logging
|
9
|
+
|
10
10
|
import hydra
|
11
11
|
import requests
|
12
|
+
|
12
13
|
from .enrichments import Enrichments
|
13
14
|
|
14
15
|
# Initialize logger
|
15
16
|
logging.basicConfig(level=logging.INFO)
|
16
17
|
logger = logging.getLogger(__name__)
|
17
18
|
|
19
|
+
|
18
20
|
class EnrichmentWithOLS(Enrichments):
|
19
21
|
"""
|
20
22
|
Enrichment class using OLS terms
|
21
23
|
"""
|
22
|
-
|
24
|
+
|
25
|
+
def enrich_documents(self, texts: list[str]) -> list[str]:
|
23
26
|
"""
|
24
27
|
Enrich a list of input OLS terms
|
25
28
|
|
@@ -32,41 +35,41 @@ class EnrichmentWithOLS(Enrichments):
|
|
32
35
|
|
33
36
|
ols_ids = texts
|
34
37
|
|
35
|
-
logger.log(logging.INFO,
|
36
|
-
"Load Hydra configuration for OLS enrichments.")
|
38
|
+
logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
|
37
39
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
38
|
-
cfg = hydra.compose(
|
39
|
-
|
40
|
+
cfg = hydra.compose(
|
41
|
+
config_name="config", overrides=["utils/enrichments/ols_terms=default"]
|
42
|
+
)
|
40
43
|
cfg = cfg.utils.enrichments.ols_terms
|
41
44
|
|
42
45
|
descriptions = []
|
43
46
|
for ols_id in ols_ids:
|
44
|
-
params = {
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
47
|
+
params = {"short_form": ols_id}
|
48
|
+
r = requests.get(
|
49
|
+
cfg.base_url,
|
50
|
+
headers={"Accept": "application/json"},
|
51
|
+
params=params,
|
52
|
+
timeout=cfg.timeout,
|
53
|
+
)
|
51
54
|
response_body = json.loads(r.text)
|
52
55
|
# if the response body is empty
|
53
|
-
if
|
56
|
+
if "_embedded" not in response_body:
|
54
57
|
descriptions.append(None)
|
55
58
|
continue
|
56
59
|
# Add the description to the list
|
57
60
|
description = []
|
58
|
-
for term in response_body[
|
61
|
+
for term in response_body["_embedded"]["terms"]:
|
59
62
|
# If the term has a description, add it to the list
|
60
|
-
description += term.get(
|
63
|
+
description += term.get("description", [])
|
61
64
|
# Add synonyms to the description
|
62
|
-
description += term.get(
|
65
|
+
description += term.get("synonyms", [])
|
63
66
|
# Add the label to the description
|
64
67
|
# Label is not provided as list, so we need to convert it to a list
|
65
|
-
description += [term.get(
|
68
|
+
description += [term.get("label", [])]
|
66
69
|
# Make unique the description
|
67
70
|
description = list(set(description))
|
68
71
|
# Join the description with new line
|
69
|
-
description =
|
72
|
+
description = "\n".join(description)
|
70
73
|
# Add the description to the list
|
71
74
|
descriptions.append(description)
|
72
75
|
return descriptions
|
@@ -5,21 +5,24 @@ Enrichment class for enriching PubChem IDs with their STRINGS representation and
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
|
9
|
-
import requests
|
8
|
+
|
10
9
|
import hydra
|
11
|
-
|
10
|
+
import requests
|
11
|
+
|
12
12
|
from ..pubchem_utils import pubchem_cid_description
|
13
|
+
from .enrichments import Enrichments
|
13
14
|
|
14
15
|
# Initialize logger
|
15
16
|
logging.basicConfig(level=logging.INFO)
|
16
17
|
logger = logging.getLogger(__name__)
|
17
18
|
|
19
|
+
|
18
20
|
class EnrichmentWithPubChem(Enrichments):
|
19
21
|
"""
|
20
22
|
Enrichment class using PubChem
|
21
23
|
"""
|
22
|
-
|
24
|
+
|
25
|
+
def enrich_documents(self, texts: list[str]) -> list[str]:
|
23
26
|
"""
|
24
27
|
Enrich a list of input PubChem IDs with their STRINGS representation.
|
25
28
|
|
@@ -35,8 +38,7 @@ class EnrichmentWithPubChem(Enrichments):
|
|
35
38
|
|
36
39
|
# Load Hydra configuration to get the base URL for PubChem
|
37
40
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
38
|
-
cfg = hydra.compose(config_name=
|
39
|
-
overrides=['utils/pubchem_utils=default'])
|
41
|
+
cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
|
40
42
|
cfg = cfg.utils.pubchem_utils
|
41
43
|
# Iterate over each PubChem ID in the input list
|
42
44
|
pubchem_cids = texts
|
@@ -47,11 +49,11 @@ class EnrichmentWithPubChem(Enrichments):
|
|
47
49
|
response = requests.get(pubchem_url, timeout=60)
|
48
50
|
data = response.json()
|
49
51
|
# Extract the PubChem CID SMILES
|
50
|
-
smiles =
|
51
|
-
description =
|
52
|
+
smiles = ""
|
53
|
+
description = ""
|
52
54
|
if "PropertyTable" in data:
|
53
|
-
for prop in data["PropertyTable"][
|
54
|
-
smiles = prop.get("SMILES",
|
55
|
+
for prop in data["PropertyTable"]["Properties"]:
|
56
|
+
smiles = prop.get("SMILES", "")
|
55
57
|
description = pubchem_cid_description(pubchem_cid)
|
56
58
|
else:
|
57
59
|
# If the PubChem ID is not found, set smiles and description to None
|
@@ -4,21 +4,24 @@
|
|
4
4
|
Enrichment class for enriching Reactome pathways with textual descriptions
|
5
5
|
"""
|
6
6
|
|
7
|
-
from typing import List
|
8
7
|
import logging
|
8
|
+
|
9
9
|
import hydra
|
10
10
|
import requests
|
11
|
+
|
11
12
|
from .enrichments import Enrichments
|
12
13
|
|
13
14
|
# Initialize logger
|
14
15
|
logging.basicConfig(level=logging.INFO)
|
15
16
|
logger = logging.getLogger(__name__)
|
16
17
|
|
18
|
+
|
17
19
|
class EnrichmentWithReactome(Enrichments):
|
18
20
|
"""
|
19
21
|
Enrichment class using Reactome pathways
|
20
22
|
"""
|
21
|
-
|
23
|
+
|
24
|
+
def enrich_documents(self, texts: list[str]) -> list[str]:
|
22
25
|
"""
|
23
26
|
Enrich a list of input Reactome pathways
|
24
27
|
|
@@ -31,25 +34,28 @@ class EnrichmentWithReactome(Enrichments):
|
|
31
34
|
|
32
35
|
reactome_pathways_ids = texts
|
33
36
|
|
34
|
-
logger.log(logging.INFO,
|
35
|
-
"Load Hydra configuration for reactome enrichment")
|
37
|
+
logger.log(logging.INFO, "Load Hydra configuration for reactome enrichment")
|
36
38
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
37
|
-
cfg = hydra.compose(
|
38
|
-
|
39
|
+
cfg = hydra.compose(
|
40
|
+
config_name="config",
|
41
|
+
overrides=["utils/enrichments/reactome_pathways=default"],
|
42
|
+
)
|
39
43
|
cfg = cfg.utils.enrichments.reactome_pathways
|
40
44
|
|
41
45
|
descriptions = []
|
42
46
|
for reactome_pathway_id in reactome_pathways_ids:
|
43
|
-
r = requests.get(
|
44
|
-
|
45
|
-
|
47
|
+
r = requests.get(
|
48
|
+
cfg.base_url + reactome_pathway_id + "/summation",
|
49
|
+
headers={"Accept": "text/plain"},
|
50
|
+
timeout=cfg.timeout,
|
51
|
+
)
|
46
52
|
# if the response is not ok
|
47
53
|
if not r.ok:
|
48
54
|
descriptions.append(None)
|
49
55
|
continue
|
50
56
|
response_body = r.text
|
51
57
|
# if the response is ok
|
52
|
-
descriptions.append(response_body.split(
|
58
|
+
descriptions.append(response_body.split("\t")[1])
|
53
59
|
return descriptions
|
54
60
|
|
55
61
|
def enrich_documents_with_rag(self, texts, docs):
|
@@ -4,22 +4,25 @@
|
|
4
4
|
Enrichment class for enriching Gene names with their function and sequence using UniProt.
|
5
5
|
"""
|
6
6
|
|
7
|
-
from typing import List
|
8
|
-
import logging
|
9
7
|
import json
|
8
|
+
import logging
|
9
|
+
|
10
10
|
import hydra
|
11
11
|
import requests
|
12
|
+
|
12
13
|
from .enrichments import Enrichments
|
13
14
|
|
14
15
|
# Initialize logger
|
15
16
|
logging.basicConfig(level=logging.INFO)
|
16
17
|
logger = logging.getLogger(__name__)
|
17
18
|
|
19
|
+
|
18
20
|
class EnrichmentWithUniProt(Enrichments):
|
19
21
|
"""
|
20
22
|
Enrichment class using UniProt
|
21
23
|
"""
|
22
|
-
|
24
|
+
|
25
|
+
def enrich_documents(self, texts: list[str]) -> list[str]:
|
23
26
|
"""
|
24
27
|
Enrich a list of input UniProt gene names with their function and sequence.
|
25
28
|
|
@@ -32,14 +35,17 @@ class EnrichmentWithUniProt(Enrichments):
|
|
32
35
|
|
33
36
|
enriched_gene_names = texts
|
34
37
|
|
35
|
-
logger.log(
|
36
|
-
|
38
|
+
logger.log(
|
39
|
+
logging.INFO,
|
40
|
+
"Load Hydra configuration for Gene enrichment with description and sequence.",
|
41
|
+
)
|
37
42
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
38
|
-
cfg = hydra.compose(
|
39
|
-
|
43
|
+
cfg = hydra.compose(
|
44
|
+
config_name="config",
|
45
|
+
overrides=["utils/enrichments/uniprot_proteins=default"],
|
46
|
+
)
|
40
47
|
cfg = cfg.utils.enrichments.uniprot_proteins
|
41
48
|
|
42
|
-
|
43
49
|
descriptions = []
|
44
50
|
sequences = []
|
45
51
|
for gene in enriched_gene_names:
|
@@ -52,10 +58,12 @@ class EnrichmentWithUniProt(Enrichments):
|
|
52
58
|
# https://www.uniprot.org/help/taxonomy
|
53
59
|
}
|
54
60
|
|
55
|
-
r = requests.get(
|
56
|
-
|
57
|
-
|
58
|
-
|
61
|
+
r = requests.get(
|
62
|
+
cfg.uniprot_url,
|
63
|
+
headers={"Accept": "application/json"},
|
64
|
+
params=params,
|
65
|
+
timeout=cfg.timeout,
|
66
|
+
)
|
59
67
|
# if the response is not ok
|
60
68
|
if not r.ok:
|
61
69
|
descriptions.append(None)
|
@@ -67,12 +75,12 @@ class EnrichmentWithUniProt(Enrichments):
|
|
67
75
|
descriptions.append(None)
|
68
76
|
sequences.append(None)
|
69
77
|
continue
|
70
|
-
description =
|
71
|
-
for comment in response_body[0][
|
72
|
-
if comment[
|
73
|
-
for value in comment[
|
74
|
-
description += value[
|
75
|
-
sequence = response_body[0][
|
78
|
+
description = ""
|
79
|
+
for comment in response_body[0]["comments"]:
|
80
|
+
if comment["type"] == "FUNCTION":
|
81
|
+
for value in comment["text"]:
|
82
|
+
description += value["value"]
|
83
|
+
sequence = response_body[0]["sequence"]["sequence"]
|
76
84
|
descriptions.append(description)
|
77
85
|
sequences.append(sequence)
|
78
86
|
return descriptions, sequences
|
@@ -16,6 +16,7 @@ from pymilvus import Collection
|
|
16
16
|
try:
|
17
17
|
import cudf
|
18
18
|
import cupy as cp
|
19
|
+
|
19
20
|
CUDF_AVAILABLE = True
|
20
21
|
except ImportError:
|
21
22
|
CUDF_AVAILABLE = False
|
@@ -34,9 +35,7 @@ class SystemDetector:
|
|
34
35
|
self.os_type = platform.system().lower() # 'windows', 'linux', 'darwin'
|
35
36
|
self.architecture = platform.machine().lower() # 'x86_64', 'arm64', etc.
|
36
37
|
self.has_nvidia_gpu = self._detect_nvidia_gpu()
|
37
|
-
self.use_gpu =
|
38
|
-
self.has_nvidia_gpu and self.os_type != "darwin"
|
39
|
-
) # No CUDA on macOS
|
38
|
+
self.use_gpu = self.has_nvidia_gpu and self.os_type != "darwin" # No CUDA on macOS
|
40
39
|
|
41
40
|
logger.info("System Detection Results:")
|
42
41
|
logger.info(" OS: %s", self.os_type)
|
@@ -232,9 +231,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
232
231
|
"""
|
233
232
|
# Initialize several variables
|
234
233
|
topk = min(self.topk, colls["nodes"].num_entities)
|
235
|
-
n_prizes = self.loader.py.zeros(
|
236
|
-
colls["nodes"].num_entities, dtype=self.loader.py.float32
|
237
|
-
)
|
234
|
+
n_prizes = self.loader.py.zeros(colls["nodes"].num_entities, dtype=self.loader.py.float32)
|
238
235
|
|
239
236
|
# Get the actual metric type to use
|
240
237
|
actual_metric_type = self.metric_type or self.loader.metric_type
|
@@ -279,9 +276,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
279
276
|
"""
|
280
277
|
# Initialize several variables
|
281
278
|
topk_e = min(self.topk_e, colls["edges"].num_entities)
|
282
|
-
e_prizes = self.loader.py.zeros(
|
283
|
-
colls["edges"].num_entities, dtype=self.loader.py.float32
|
284
|
-
)
|
279
|
+
e_prizes = self.loader.py.zeros(colls["edges"].num_entities, dtype=self.loader.py.float32)
|
285
280
|
|
286
281
|
# Get the actual metric type to use
|
287
282
|
actual_metric_type = self.metric_type or self.loader.metric_type
|
@@ -299,15 +294,11 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
299
294
|
e_prizes[[r.id for r in res[0]]] = [r.score for r in res[0]]
|
300
295
|
|
301
296
|
# Further process the edge_prizes
|
302
|
-
unique_prizes, inverse_indices = self.loader.py.unique(
|
303
|
-
e_prizes, return_inverse=True
|
304
|
-
)
|
297
|
+
unique_prizes, inverse_indices = self.loader.py.unique(e_prizes, return_inverse=True)
|
305
298
|
topk_e_values = unique_prizes[self.loader.py.argsort(-unique_prizes)[:topk_e]]
|
306
299
|
last_topk_e_value = topk_e
|
307
300
|
for k in range(topk_e):
|
308
|
-
indices = (
|
309
|
-
inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
|
310
|
-
)
|
301
|
+
indices = inverse_indices == (unique_prizes == topk_e_values[k]).nonzero()[0]
|
311
302
|
value = min((topk_e - k) / indices.sum().item(), last_topk_e_value)
|
312
303
|
e_prizes[indices] = value
|
313
304
|
last_topk_e_value = value * (1 - self.c_const)
|
@@ -381,7 +372,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
381
372
|
# Edge index mapping: local real edge idx -> original global index
|
382
373
|
logger.log(logging.INFO, "Creating mapping for real edges")
|
383
374
|
mapping_edges = dict(
|
384
|
-
zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]))
|
375
|
+
zip(range(len(real_["indices"])), self.loader.to_list(real_["indices"]), strict=False)
|
385
376
|
)
|
386
377
|
|
387
378
|
# Virtual edge handling
|
@@ -398,15 +389,9 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
398
389
|
|
399
390
|
# Virtual edges: (src → virtual), (virtual → dst)
|
400
391
|
logger.log(logging.INFO, "Creating virtual edges")
|
401
|
-
virt_["edges_1"] = self.loader.py.stack(
|
402
|
-
|
403
|
-
)
|
404
|
-
virt_["edges_2"] = self.loader.py.stack(
|
405
|
-
[virt_["node_ids"], virt_["dst"]], axis=1
|
406
|
-
)
|
407
|
-
virt_["edges"] = self.loader.py.concatenate(
|
408
|
-
[virt_["edges_1"], virt_["edges_2"]], axis=0
|
409
|
-
)
|
392
|
+
virt_["edges_1"] = self.loader.py.stack([virt_["src"], virt_["node_ids"]], axis=1)
|
393
|
+
virt_["edges_2"] = self.loader.py.stack([virt_["node_ids"], virt_["dst"]], axis=1)
|
394
|
+
virt_["edges"] = self.loader.py.concatenate([virt_["edges_1"], virt_["edges_2"]], axis=0)
|
410
395
|
virt_["costs"] = self.loader.py.zeros(
|
411
396
|
(virt_["edges"].shape[0],), dtype=real_["costs"].dtype
|
412
397
|
)
|
@@ -418,9 +403,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
418
403
|
|
419
404
|
# Final prizes
|
420
405
|
logger.log(logging.INFO, "Getting final prizes")
|
421
|
-
final_prizes = self.loader.py.concatenate(
|
422
|
-
[prizes["nodes"], virt_["prizes"]], axis=0
|
423
|
-
)
|
406
|
+
final_prizes = self.loader.py.concatenate([prizes["nodes"], virt_["prizes"]], axis=0)
|
424
407
|
|
425
408
|
# Mapping virtual node ID -> edge index in original graph
|
426
409
|
logger.log(logging.INFO, "Creating mapping for virtual nodes")
|
@@ -428,6 +411,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
428
411
|
zip(
|
429
412
|
self.loader.to_list(virt_["node_ids"]),
|
430
413
|
self.loader.to_list(virt_["indices"]),
|
414
|
+
strict=False,
|
431
415
|
)
|
432
416
|
)
|
433
417
|
|
@@ -466,9 +450,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
466
450
|
|
467
451
|
# Retrieve the selected nodes and edges based on the given vertices and edges
|
468
452
|
subgraph_nodes = vertices[vertices < num_nodes]
|
469
|
-
subgraph_edges = [
|
470
|
-
mapping["edges"][e.item()] for e in edges if e < num_prior_edges
|
471
|
-
]
|
453
|
+
subgraph_edges = [mapping["edges"][e.item()] for e in edges if e < num_prior_edges]
|
472
454
|
virtual_vertices = vertices[vertices >= num_nodes]
|
473
455
|
if len(virtual_vertices) > 0:
|
474
456
|
virtual_edges = [mapping["nodes"][i.item()] for i in virtual_vertices]
|
@@ -480,9 +462,7 @@ class MultimodalPCSTPruning(NamedTuple):
|
|
480
462
|
|
481
463
|
return {"nodes": subgraph_nodes, "edges": subgraph_edges}
|
482
464
|
|
483
|
-
def extract_subgraph(
|
484
|
-
self, text_emb: list, query_emb: list, modality: str, cfg: dict
|
485
|
-
) -> dict:
|
465
|
+
def extract_subgraph(self, text_emb: list, query_emb: list, modality: str, cfg: dict) -> dict:
|
486
466
|
"""
|
487
467
|
Perform the Prize-Collecting Steiner Tree (PCST) algorithm to extract the subgraph.
|
488
468
|
|