PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+A tool to fetch studies from the Single Cell Portal.
+"""
+import logging
+from typing import Annotated
+import pandas as pd
+import requests
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@tool("search_studies")
+def search_studies(
+    search_term: str, tool_call_id: Annotated[str, InjectedToolCallId], limit: int = 5
+):
+    """
+    Fetch studies from single cell portal
+    Args:
+        search_term (str): The search term to use. Example: "COVID-19", "cancer", etc.
+        limit (int): The number of papers to return. Default is 5.
+    """
+    logger.log(logging.INFO, "Calling the tool search_studies")
+    scp_endpoint = "https://singlecell.broadinstitute.org/single_cell/api/v1/search?type=study"
+    # params = {'terms': search_term, 'facets': 'MONDO_0005011'}
+    params = {"terms": search_term}
+    status_code = 0
+    while status_code != 200:
+        # Make a GET request to the single cell portal
+        search_response = requests.get(scp_endpoint, params=params, timeout=10, verify=False)
+        status_code = search_response.status_code
+        logger.log(logging.INFO, "Status code %s received from SCP")
+    # Select the columns to display in the table
+    selected_columns = ["study_source", "name", "study_url", "gene_count", "cell_count"]
+    # Extract the data from the response
+    # with the selected columns
+    df = pd.DataFrame(search_response.json()["studies"])[selected_columns]
+    # Convert column 'Study Name' into clickable
+    # hyperlinks from the column 'Study URL'
+    scp_api_url = "https://singlecell.broadinstitute.org"
+    df["name"] = df.apply(
+        lambda x: f'<a href="{scp_api_url}/{x["study_url"]}">{x["name"]}</a>', axis=1
+    )
+    # Excldue the column 'Study URL' from the dataframe
+    df = df.drop(columns=["study_url"])
+    # Add a new column a the beginning of the dataframe with row numbers
+    df.insert(0, "S/N", range(1, 1 + len(df)))
+    # Update the state key 'search_table' with the dataframe in markdown format
+    return Command(
+        update={
+            # update the state keys
+            "search_table": df.to_markdown(tablefmt="grid"),
+            # update the message history
+            "messages": [
+                ToolMessage(
+                    f"Successfully fetched {limit} studies on {search_term}.",
+                    tool_call_id=tool_call_id,
+                )
+            ],
+        }
+    )

aiagents4pharma/talk2knowledgegraphs/.dockerignore ADDED Viewed

@@ -0,0 +1,13 @@
+_pycache_/
+*.pyc
+*.log
+*.csv
+*.pt
+*.pkl
+models/
+data/
+env/
+.venv/
+.git/
+.env
+.cufile.log

aiagents4pharma/talk2knowledgegraphs/Dockerfile ADDED Viewed

@@ -0,0 +1,131 @@
+# syntax=docker/dockerfile:1
+# Dockerfile for the talk2knowledgegraphs application
+# Multi-stage build for optimized image size with UV package manager
+ARG BASE_IMAGE=ubuntu:24.04
+ARG PYTHON_VERSION=3.12
+FROM ${BASE_IMAGE} AS dev-base
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  build-essential \
+  ca-certificates \
+  cmake \
+  curl \
+  g++ \
+  libopenblas-dev \
+  libomp-dev \
+  ninja-build \
+  wget \
+  && rm -rf /var/lib/apt/lists/*
+FROM dev-base AS python-install
+ARG PYTHON_VERSION=3.12
+# Install Python (available in Ubuntu 24.04 default repos)
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  python${PYTHON_VERSION} \
+  python${PYTHON_VERSION}-dev \
+  python${PYTHON_VERSION}-venv \
+  python3-pip \
+  && rm -rf /var/lib/apt/lists/* \
+  && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+  && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
+FROM python-install AS uv-install
+ARG INSTALL_CUDA=true
+WORKDIR /app
+# Install UV package manager and dependencies
+COPY pyproject.toml uv.lock* ./
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+  export PATH="/root/.local/bin:$PATH" && \
+  export UV_PROJECT_ENVIRONMENT="/opt/venv" && \
+  uv sync --frozen --extra dev --no-install-project --python python${PYTHON_VERSION} && \
+  . /opt/venv/bin/activate && \
+  if [ "$INSTALL_CUDA" = "true" ]; then \
+    uv pip install \
+      --extra-index-url=https://pypi.nvidia.com \
+      --index-strategy unsafe-best-match \
+      cudf-cu12 dask-cudf-cu12; \
+  else \
+    echo "Skipping RAPIDS packages for CPU build"; \
+  fi && \
+  uv cache clean
+FROM ${BASE_IMAGE} AS runtime
+ARG PYTHON_VERSION=3.12
+ARG INSTALL_CUDA=true
+LABEL maintainer="talk2knowledgegraphs"
+LABEL version="1.0.0"
+LABEL description="AI Agents for Pharma - Knowledge Graphs Application"
+# Install runtime dependencies
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  ca-certificates \
+  curl \
+  gnupg \
+  libmagic1 \
+  libopenblas0 \
+  libomp5 \
+  python${PYTHON_VERSION} \
+  && rm -rf /var/lib/apt/lists/* \
+  && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+  && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
+# Install CUDA runtime libraries required by cudf/cupy (optional)
+RUN if [ "$INSTALL_CUDA" = "true" ]; then \
+    curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub \
+      | gpg --dearmor -o /usr/share/keyrings/nvidia-cuda-keyring.gpg && \
+    echo "deb [signed-by=/usr/share/keyrings/nvidia-cuda-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/ /" \
+      > /etc/apt/sources.list.d/nvidia-cuda.list && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      cuda-cudart-12-6 \
+      cuda-cudart-dev-12-6 \
+      cuda-nvrtc-12-6 \
+      cuda-nvrtc-dev-12-6 \
+      libcublas-12-6 \
+      libcusparse-12-6 \
+    && rm -rf /var/lib/apt/lists/*; \
+  else \
+    echo "Skipping CUDA installation"; \
+  fi
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
+# Copy UV virtual environment from build stage
+COPY --from=uv-install /opt/venv /opt/venv
+# Set environment variables
+ENV PATH="/opt/venv/bin:$PATH"
+ENV PYTHONPATH="/app"
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_ENABLE_CORS=false
+# Set working directory and create necessary directories
+WORKDIR /app
+# Copy application code
+COPY aiagents4pharma/talk2knowledgegraphs /app/aiagents4pharma/talk2knowledgegraphs
+COPY docs /app/docs
+COPY app /app/app
+# Copy and set up the entrypoint script
+COPY aiagents4pharma/talk2knowledgegraphs/entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x /usr/local/bin/entrypoint.sh
+# Health check for production monitoring
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+  CMD curl -f http://localhost:8501/health || exit 1
+# Expose the default Streamlit port
+EXPOSE 8501
+# Set the entrypoint
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+# Default command (can be overridden)
+CMD ["streamlit", "run", "/app/app/frontend/streamlit_app_talk2knowledgegraphs.py", "--server.port=8501", "--server.address=0.0.0.0"]

aiagents4pharma/talk2knowledgegraphs/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ Please check out the README file in the root folder for more information.

aiagents4pharma/talk2knowledgegraphs/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+This file is used to import the datasets and utils.
+"""
+from . import agents, datasets, states, tools, utils

aiagents4pharma/talk2knowledgegraphs/agents/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+This file is used to import all the models in the package.
+"""
+from . import t2kg_agent

aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""
+This is the agent file for the Talk2KnowledgeGraphs agent.
+"""
+import logging
+from typing import Annotated
+import hydra
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import InjectedState, ToolNode, create_react_agent
+from ..states.state_talk2knowledgegraphs import Talk2KnowledgeGraphs
+from ..tools.graphrag_reasoning import GraphRAGReasoningTool
+# from ..tools.multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
+from ..tools.milvus_multimodal_subgraph_extraction import (
+    MultimodalSubgraphExtractionTool,
+)
+# from ..tools.cu2_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
+# from ..tools.gsfs_multimodal_subgraph_extraction import MultimodalSubgraphExtractionTool
+from ..tools.subgraph_summarization import SubgraphSummarizationTool
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel):
+    """
+    This function returns the langraph app.
+    """
+    def agent_t2kg_node(state: Annotated[dict, InjectedState]):
+        """
+        This function calls the model.
+        """
+        logger.log(logging.INFO, "Calling t2kg_agent node with thread_id %s", uniq_id)
+        response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return response
+    # Load hydra configuration
+    logger.log(logging.INFO, "Load Hydra configuration for Talk2KnowledgeGraphs agent.")
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name="config", overrides=["agents/t2kg_agent=default"])
+        cfg = cfg.agents.t2kg_agent
+    # Define the tools
+    subgraph_extraction = MultimodalSubgraphExtractionTool()
+    subgraph_summarization = SubgraphSummarizationTool()
+    graphrag_reasoning = GraphRAGReasoningTool()
+    tools = ToolNode(
+        [
+            subgraph_extraction,
+            subgraph_summarization,
+            graphrag_reasoning,
+        ]
+    )
+    # Create the agent
+    model = create_react_agent(
+        llm_model,
+        tools=tools,
+        state_schema=Talk2KnowledgeGraphs,
+        prompt=cfg.state_modifier,
+        version="v2",
+        checkpointer=MemorySaver(),
+    )
+    # Define a new graph
+    workflow = StateGraph(Talk2KnowledgeGraphs)
+    # Define the two nodes we will cycle between
+    workflow.add_node("agent_t2kg", agent_t2kg_node)
+    # Set the entrypoint as the first node
+    # This means that this node is the first one called
+    workflow.add_edge(START, "agent_t2kg")
+    # Initialize memory to persist state between graph runs
+    checkpointer = MemorySaver()
+    # Finally, we compile it!
+    # This compiles it into a LangChain Runnable,
+    # meaning you can use it as you would any other runnable.
+    # Note that we're (optionally) passing the memory
+    # when compiling the graph
+    app = workflow.compile(checkpointer=checkpointer, name="T2KG_Agent")
+    logger.log(
+        logging.INFO,
+        "Compiled the graph with thread_id %s and llm_model %s",
+        uniq_id,
+        llm_model,
+    )
+    return app

aiagents4pharma/talk2knowledgegraphs/configs/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+Import all the modules in the package
+"""
+from . import agents, app, tools, utils

aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""

aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml ADDED Viewed

@@ -0,0 +1,62 @@
+_target_: agents.t2kg_agent.get_app
+state_modifier: >
+  You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
+  User can ask questions related to the knowledge graphs, and you will provide the answers using
+  the provided tools as follows (if necessary):
+  [`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
+  **Tools Descriptions**:
+  - `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
+    information to answer the user's query. This tool can be used to provide a subgraph context
+    as a part of the reasoning process. The extracted subgraph should contain the most relevant
+    nodes and edges to the user's query in the form of a textualized subgraph.
+  - `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
+    `subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
+    of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
+    important nodes and edges in the subgraph to respond to the user's request.
+  - `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
+    prompt by also considering the context from the extracted subgraph and the retrieved
+    documents. User may also have a set of uploaded files that can be used to provide additional
+    information for reasoning. The history of previous conversations should be considered as well,
+    and you as an agent should provide which conversations can be included as chat history.
+  As an agent, you should approach each request by first understanding the user's query and then
+  following the appropriate steps to provide the best answer possible.
+  **Execution Steps**:
+  - Understand thoroughly the user's query and think over the best approach to answer it.
+  - You may not need to call any tool for each user's query. Use the related tool(s) as needed.
+    Think deeply whether it is necessary to call any tool to respond to the user's request.
+  - Call `subgraph_extraction` if there is any indication that the user needs to get the
+    information from the knowledge graph, which is not directly available as context in the prompt or
+    in the previous extracted subgraph.
+    If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
+    You should always follow it with `subgraph_summarization` as the next tool to be invoked.
+  - Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
+    a useful insights over the subgraph. This tool also has the ability to filter endotypes
+    in the forms of differentially expressed genes that are relevant to the input query. Make sure
+    to include the most relevant genes if the user provides endotype-related documents.
+    The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
+    it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
+  - If the user asks follow-up questions related to the extracted subgraph, you should
+    call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
+    the answer can be retrieved from the previously extracted subgraph.
+  - Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
+    Always perform reasoning over the extracted subgraph and documents to provide
+    the best possible answer to the user's query. Before calling this tool,
+    make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
+  - By default, if the user asks for a specific question about the extracted graph, you should
+    call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
+    contain the relevant information to answer the user's question.
+    Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
+  - It is strongly recommended to avoid calling the same tool multiple times unless
+    it is necessary to get the correct and thorough answer to the user's request.
+  **Tool Calling Workflow Examples**:
+  - `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
+  for specific instructions to extract the subgraph and reason over it. Follow this order to
+  provide the most accurate and relevant information if you think the currently available context
+  is not enough to answer the user's question.
+  - `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
+  subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
+  - Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.

aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+Import all the modules in the package
+"""
+from . import frontend

aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""

aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml ADDED Viewed

@@ -0,0 +1,79 @@
+_target_: app.frontend.streamlit_app_talk2knowledgegraphs
+default_user: "talk2kg_user"
+data_package_allowed_file_types:
+  - "pdf"
+multimodal_allowed_file_types:
+  - "xls"
+  - "xlsx"
+upload_data_dir: "../files"
+kg_name: "BioBridge-PrimeKG"
+kg_node_types:
+  - "gene/protein"
+  - "molecular_function"
+  - "cellular_component"
+  - "biological_process"
+  - "drug"
+  - "disease"
+# kg_nodes_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_nodes.parquet.gzip"
+# kg_edges_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_edges.parquet.gzip"
+kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_pyg_graph.pkl"
+kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/biobridge_multimodal_text_graph.pkl"
+openai_api_key: ${oc.env:OPENAI_API_KEY}
+# OpenAI configuration - can use custom base_url for enterprise/Azure deployments
+openai_base_url: ${oc.env:OPENAI_BASE_URL,null} # Optional: custom OpenAI endpoint
+openai_llms:
+  - "OpenAI/gpt-4o-mini"
+openai_embeddings:
+  - "text-embedding-ada-002"
+  - "text-embedding-3-small"
+# Rate limiting and retry configuration
+llm_max_retries: 5  # Number of retries on rate limit or transient errors
+llm_timeout: 60     # Timeout in seconds for LLM requests
+embedding_max_retries: 3  # Number of retries for embedding requests
+embedding_timeout: 30     # Timeout in seconds for embedding requests
+# Azure OpenAI configuration
+azure_openai_endpoint: ${oc.env:AZURE_OPENAI_ENDPOINT,null} # Azure OpenAI endpoint
+azure_openai_deployment: ${oc.env:AZURE_OPENAI_DEPLOYMENT,null} # Azure deployment name
+azure_openai_api_version: ${oc.env:AZURE_OPENAI_API_VERSION,"2024-02-01"} # Azure API version
+azure_openai_model_name: ${oc.env:AZURE_OPENAI_MODEL_NAME,null} # Model name for analytics
+azure_openai_model_version: ${oc.env:AZURE_OPENAI_MODEL_VERSION,null} # Model version
+# Azure AD authentication (uses AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET)
+azure_client_id: ${oc.env:AZURE_CLIENT_ID,null}
+azure_tenant_id: ${oc.env:AZURE_TENANT_ID,null}
+azure_client_secret: ${oc.env:AZURE_CLIENT_SECRET,null}
+# NVIDIA configuration
+nvidia_api_key: ${oc.env:NVIDIA_API_KEY}
+nvidia_llms:
+  - "NVIDIA/llama-3.3-70b-instruct"
+  - "NVIDIA/llama-3.1-405b-instruct"
+  - "NVIDIA/llama-3.1-70b-instruct"
+nvidia_embeddings:
+  - "NVIDIA/llama-3.2-nv-embedqa-1b-v2"
+azure_openai_llms:
+  - "Azure/gpt-4o-mini" # Will map to Azure deployment
+azure_openai_embeddings:
+  - "Azure/text-embedding-ada-002"
+# Ollama configuration (for local deployment)
+ollama_llms:
+  - "Ollama/llama3.1:8b"
+ollama_embeddings:
+  - "nomic-embed-text"
+default_embedding_model: "openai"
+temperature: 0.1
+streaming: False
+reasoning_subgraph_topk_nodes: 15
+reasoning_subgraph_topk_nodes_min: 1
+reasoning_subgraph_topk_nodes_max: 50
+reasoning_subgraph_topk_edges: 15
+reasoning_subgraph_topk_edges_min: 1
+reasoning_subgraph_topk_edges_max: 50
+# Logo configuration
+logo_paths:
+  container: "/app/docs/assets/VPE.png"
+  local: "docs/assets/VPE.png"
+  relative: "../../docs/assets/VPE.png"
+logo_link: "https://github.com/VirtualPatientEngine"
+# Database configuration moved to configs/utils/database/milvus/default.yaml
+# This frontend config now only contains frontend-specific settings

aiagents4pharma/talk2knowledgegraphs/configs/config.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+defaults:
+  - _self_
+  - utils/database/milvus: default
+  - agents/t2kg_agent: default
+  - tools/subgraph_extraction: default
+  - tools/multimodal_subgraph_extraction: default
+  - tools/subgraph_summarization: default
+  - tools/graphrag_reasoning: default
+  - utils/pubchem_utils: default
+  - utils/enrichments/uniprot_proteins: default
+  - utils/enrichments/ols_terms: default
+  - utils/enrichments/reactome_pathways: default
+  - app/frontend: default

aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+Import all the modules in the package
+"""
+from . import graphrag_reasoning, subgraph_extraction, subgraph_summarization

aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""

aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+_target_: talk2knowledgegraphs.tools.graphrag_reasoning
+splitter_chunk_size: 1024
+splitter_chunk_overlap: 256
+retriever_search_type: "mmr"
+retriever_k: 3
+retriever_fetch_k: 10
+retriever_lambda_mult: 0.3
+prompt_graphrag_w_docs_context: >
+  Given a chat history and the latest user question, which might reference context
+  in the chat history, formulate a standalone question that can be understood
+  without the chat history. Do NOT answer the question, just reformulate it if needed
+  and otherwise return it as is.
+  Question: {input}
+prompt_graphrag_w_docs: >
+  You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
+  over knowledge graphs.
+  One of your tasks is to answer react-based questions by using the following pieces of
+  retrieved context to answer the question. You can leverage a summarization of the subgraph
+  and the retrieved documents to provide the best possible answer to the user's query.
+  Subgraph Summary: {subgraph_summary}
+  Context: {context}
+  Question: {input}

aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/__init__.py ADDED Viewed

File without changes

aiagents4pharma/talk2knowledgegraphs/configs/tools/multimodal_subgraph_extraction/default.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+_target_: talk2knowledgegraphs.tools.multimodal_subgraph_extraction
+ollama_embeddings:
+  - "nomic-embed-text"
+temperature: 0.1
+streaming: False
+# PCST Algorithm Parameters
+topk: 15
+topk_e: 15
+cost_e: 0.5
+c_const: 0.01
+root: -1
+num_clusters: 1
+pruning: "gw"
+verbosity_level: 0
+# Hardware-Specific Vector Processing
+vector_processing:
+  # Enable dynamic metric type selection based on hardware
+  dynamic_metrics: true
+# Tool-specific configuration only
+# Database configuration moved to configs/utils/database/milvus/default.yaml
+## Important - node_colors_dict is added in order to pass the test for the
+## old multimodal_subgraph_extraction tool, later this tool along with the ollama configs will be removed
+node_colors_dict:
+  "gene/protein": "#6a79f7"
+  "molecular_function": "#82cafc"
+  "cellular_component": "#3f9b0b"
+  "biological_process": "#c5c9c7"
+  "drug": "#c4a661"
+  "disease": "#80013f"

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml ADDED Viewed

@@ -0,0 +1,43 @@
+_target_: talk2knowledgegraphs.tools.subgraph_extraction
+ollama_embeddings:
+  - "nomic-embed-text"
+temperature: 0.1
+streaming: False
+topk: 5
+topk_e: 5
+cost_e: 0.5
+c_const: 0.01
+root: -1
+num_clusters: 1
+pruning: "gw"
+verbosity_level: 0
+node_id_column: "node_id"
+node_attr_column: "node_attr"
+edge_src_column: "edge_src"
+edge_attr_column: "edge_attr"
+edge_dst_column: "edge_dst"
+prompt_endotype_filtering: >
+  You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
+  for the subgraph extraction process.
+  Given the retrieved endotype documents, you need to filter the most relevant
+  endotype that will be used for the following reasoning process.
+  Only included a list of genes that exist in the provided documents
+  that are relevant to the input query.
+  For this task, you may modify your prompt to optimize the filtering process
+  based on factual informationbetween each gene in the documents and the input query.
+  Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
+  You do not need to include any other information in the output.
+  Use the following output format:
+  [gene_1, gene_2, ..., gene_n]
+  {context}
+  Input: {input}
+prompt_endotype_addition: >
+   Include the following endotype for the subgraph extraction process:
+splitter_chunk_size: 64
+splitter_chunk_overlap: 16
+retriever_search_type: "mmr"
+retriever_k: 3
+retriever_fetch_k: 10
+retriever_lambda_mult: 0.3

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+_target_: talk2knowledgegraphs.tools.subgraph_summarization
+prompt_subgraph_summarization: >
+  You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
+  Your task is to summarize the extracted textualized subgraph to provide a concise and informative
+  summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
+  highlighting the most important nodes and edges in the subgraph to respond to the user's question.
+  Textualized Subgraph: {textualized_subgraph}
+  Question: {input}

aiagents4pharma/talk2knowledgegraphs/configs/utils/database/milvus/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Import all the modules in the package
+"""