PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+"""
+Enrichment class for enriching PubChem IDs with their STRINGS representation.
+"""
+import logging
+import hydra
+import requests
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def cas_rn2pubchem_cid(casrn):
+    """
+    Convert CAS RN to PubChem CID.
+    Args:
+        casrn: The CAS RN of the drug.
+    Returns:
+        The PubChem CID of the drug.
+    """
+    # Load Hydra configuration for PubChem ID conversion
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
+        cfg = cfg.utils.pubchem_utils
+    # Prepare the URL
+    pubchem_url_for_drug = f"{cfg.pubchem_casrn2cid_url}{casrn}/record/JSON"
+    # Get the data
+    response = requests.get(pubchem_url_for_drug, timeout=60)
+    data = response.json()
+    # Extract the PubChem CID
+    cid = None
+    for substance in data.get("PC_Substances", []):
+        for compound in substance.get("compound", []):
+            if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
+                cid = compound["id"].get("id", {}).get("cid")
+                break
+        if cid is not None:
+            break
+    return cid
+def external_id2pubchem_cid(db, db_id):
+    """
+    Convert external DB ID to PubChem CID.
+    Please refer to the following URL for more information
+    on data sources:
+    https://pubchem.ncbi.nlm.nih.gov/sources/
+    Args:
+        db: The database name.
+        db_id: The database ID of the drug.
+    Returns:
+        The PubChem CID of the drug.
+    """
+    # Load Hydra configuration for PubChem ID conversion
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
+        cfg = cfg.utils.pubchem_utils
+    # Prepare the URL
+    pubchem_url_for_drug = f"{cfg.pubchem_cid_base_url}/{db}/{db_id}/JSON"
+    # Get the data
+    response = requests.get(pubchem_url_for_drug, timeout=60)
+    data = response.json()
+    # Extract the PubChem CID
+    cid = None
+    for substance in data.get("PC_Substances", []):
+        for compound in substance.get("compound", []):
+            if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
+                cid = compound["id"].get("id", {}).get("cid")
+                break
+    return cid
+def pubchem_cid_description(cid):
+    """
+    Get the description of a PubChem CID.
+    Args:
+        cid: The PubChem CID of the drug.
+    Returns:
+        The description of the PubChem CID.
+    """
+    # Load Hydra configuration for PubChem CID description
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name="config", overrides=["utils/pubchem_utils=default"])
+        cfg = cfg.utils.pubchem_utils
+    # Prepare the URL
+    pubchem_url_for_descpription = f"{cfg.pubchem_cid_description_url}/{cid}/description/JSON"
+    # Get the data
+    response = requests.get(pubchem_url_for_descpription, timeout=60)
+    data = response.json()
+    # Extract the PubChem CID description
+    description = ""
+    for information in data["InformationList"]["Information"]:
+        description += information.get("Description", "")
+    return description

aiagents4pharma/talk2scholars/.dockerignore ADDED Viewed

@@ -0,0 +1,13 @@
+_pycache_/
+*.pyc
+*.log
+*.csv
+*.pt
+*.pkl
+models/
+data/
+env/
+.venv/
+.git/
+.env
+.cufile.log

aiagents4pharma/talk2scholars/Dockerfile ADDED Viewed

@@ -0,0 +1,104 @@
+# syntax=docker/dockerfile:1
+# Dockerfile for the talk2scholars application
+# Multi-stage build for optimized image size with UV package manager
+ARG BASE_IMAGE=ubuntu:24.04
+ARG PYTHON_VERSION=3.12
+FROM ${BASE_IMAGE} AS dev-base
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  build-essential \
+  ca-certificates \
+  cmake \
+  curl \
+  g++ \
+  libopenblas-dev \
+  libomp-dev \
+  ninja-build \
+  wget \
+  && rm -rf /var/lib/apt/lists/*
+FROM dev-base AS python-install
+ARG PYTHON_VERSION=3.12
+# Install Python (available in Ubuntu 24.04 default repos)
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  python${PYTHON_VERSION} \
+  python${PYTHON_VERSION}-dev \
+  python${PYTHON_VERSION}-venv \
+  python3-pip \
+  && rm -rf /var/lib/apt/lists/* \
+  && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+  && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
+FROM python-install AS uv-install
+WORKDIR /app
+# Install UV package manager and dependencies
+COPY pyproject.toml uv.lock* ./
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+  export PATH="/root/.local/bin:$PATH" && \
+  export UV_PROJECT_ENVIRONMENT="/opt/venv" && \
+  uv sync --frozen --extra dev --no-install-project --python python${PYTHON_VERSION} && \
+  . /opt/venv/bin/activate && \
+  # RAPIDS packages (commented out - will be added in future if needed)
+  # uv pip install \
+  # --extra-index-url=https://pypi.nvidia.com \
+  # --index-strategy unsafe-best-match \
+  # cudf-cu12 dask-cudf-cu12 && \
+  uv cache clean
+FROM ${BASE_IMAGE} AS runtime
+ARG PYTHON_VERSION=3.12
+LABEL maintainer="talk2scholars"
+LABEL version="1.0.0"
+LABEL description="AI Agents for Pharma - Scholars Application"
+# Install runtime dependencies
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+  ca-certificates \
+  curl \
+  libmagic1 \
+  libopenblas0 \
+  libomp5 \
+  python${PYTHON_VERSION} \
+  && rm -rf /var/lib/apt/lists/* \
+  && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+  && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
+# Copy UV virtual environment from build stage
+COPY --from=uv-install /opt/venv /opt/venv
+# Set environment variables
+ENV PATH="/opt/venv/bin:$PATH"
+ENV PYTHONPATH="/app"
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_ENABLE_CORS=false
+# Set working directory and create necessary directories
+WORKDIR /app
+# Copy application code
+COPY aiagents4pharma/talk2scholars /app/aiagents4pharma/talk2scholars
+COPY docs /app/docs
+COPY app /app/app
+# Copy and set up the entrypoint script (commented out - will be added in future if needed)
+# COPY aiagents4pharma/talk2knowledgegraphs/entrypoint.sh /usr/local/bin/entrypoint.sh
+# RUN chmod +x /usr/local/bin/entrypoint.sh
+# Health check for production monitoring
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+  CMD curl -f http://localhost:8501/health || exit 1
+# Expose the default Streamlit port
+EXPOSE 8501
+# Set the entrypoint (commented out - will be added in future if needed)
+# ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+# Default command (can be overridden)
+CMD ["streamlit", "run", "/app/app/frontend/streamlit_app_talk2scholars.py", "--server.port=8501", "--server.address=0.0.0.0"]

aiagents4pharma/talk2scholars/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ Please check out the README file in the root folder for more information.

aiagents4pharma/talk2scholars/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+This file is used to import all the modules in the package.
+"""
+from . import agents, configs, state, tests, tools
+__all__ = ["agents", "configs", "state", "tests", "tools"]

aiagents4pharma/talk2scholars/agents/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+This file is used to import all the modules in the package.
+"""
+from . import main_agent, paper_download_agent, pdf_agent, s2_agent, zotero_agent
+__all__ = [
+    "main_agent",
+    "s2_agent",
+    "paper_download_agent",
+    "zotero_agent",
+    "pdf_agent",
+]

aiagents4pharma/talk2scholars/agents/main_agent.py ADDED Viewed

@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+Main agent module for initializing and running the Talk2Scholars application.
+This module sets up the hierarchical agent system using LangGraph and integrates
+various sub-agents for handling different tasks such as semantic scholar, zotero,
+PDF processing, and paper downloading.
+Functions:
+- get_app: Initializes and returns the LangGraph-based hierarchical agent system.
+"""
+import logging
+import hydra
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_openai import ChatOpenAI
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph_supervisor import create_supervisor
+from ..agents.paper_download_agent import get_app as get_app_paper_download
+from ..agents.pdf_agent import get_app as get_app_pdf
+from ..agents.s2_agent import get_app as get_app_s2
+from ..agents.zotero_agent import get_app as get_app_zotero
+from ..state.state_talk2scholars import Talk2Scholars
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel):
+    """
+    Initializes and returns the LangGraph-based hierarchical agent system.
+    This function constructs the agent workflow by defining nodes for the supervisor
+    and sub-agents. It compiles the graph using `StateGraph` to enable structured
+    conversational workflows.
+    Args:
+        thread_id (str): A unique session identifier for tracking conversation state.
+        llm_model (BaseChatModel, optional): The language model used for query processing.
+            Defaults to `ChatOpenAI(model="gpt-4o-mini", temperature=0)`.
+    Returns:
+        StateGraph: A compiled LangGraph application that can process user queries.
+    Example:
+        >>> app = get_app("thread_123")
+        >>> result = app.invoke(initial_state)
+    """
+    # Replace placeholder mini model with a configured ChatOpenAI instance
+    if getattr(llm_model, "model_name", None) == "gpt-4o-mini":
+        llm_model = ChatOpenAI(
+            model="gpt-4o-mini",
+            temperature=0,
+            model_kwargs={"parallel_tool_calls": False},
+        )
+    # Load hydra configuration
+    logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
+    with hydra.initialize(version_base=None, config_path="../configs/"):
+        cfg = hydra.compose(
+            config_name="config", overrides=["agents/talk2scholars/main_agent=default"]
+        )
+        cfg = cfg.agents.talk2scholars.main_agent
+    logger.log(logging.INFO, "System_prompt of Talk2Scholars: %s", cfg.system_prompt)
+    # Create supervisor workflow
+    workflow = create_supervisor(
+        [
+            get_app_s2(uniq_id, llm_model),  # semantic scholar
+            get_app_zotero(uniq_id, llm_model),  # zotero
+            get_app_paper_download(uniq_id, llm_model),  # pdf
+            get_app_pdf(uniq_id, llm_model),  # paper download
+        ],
+        model=llm_model,
+        state_schema=Talk2Scholars,
+        # Full history is needed to extract
+        # the tool artifacts
+        output_mode="full_history",
+        # Allow supervisor to resume control and chain multiple sub-agent calls
+        add_handoff_back_messages=True,
+        prompt=cfg.system_prompt,
+    )
+    # Compile and run
+    app = workflow.compile(checkpointer=MemorySaver(), name="Talk2Scholars_MainAgent")
+    return app

aiagents4pharma/talk2scholars/agents/paper_download_agent.py ADDED Viewed

@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+This module defines the paper download agent that connects to the arXiv API to fetch
+paper details and PDFs. It is part of the Talk2Scholars project.
+"""
+import logging
+from typing import Any
+import hydra
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt.chat_agent_executor import create_react_agent
+from langgraph.prebuilt.tool_node import ToolNode
+from ..state.state_talk2scholars import Talk2Scholars
+from ..tools.paper_download.paper_downloader import download_papers
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel):
+    """
+    Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
+    This agent supports downloading scientific papers from multiple preprint servers, including
+    arXiv, BioRxiv, and MedRxiv. It can intelligently handle user queries by extracting or resolving
+    necessary identifiers (e.g., arXiv ID or DOI) from the paper title and routing the request to
+    the appropriate download tool.
+    Args:
+        uniq_id (str): A unique identifier for tracking the current session.
+        llm_model (BaseChatModel, optional): The language model to be used by the agent.
+        Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
+    Returns:
+        StateGraph: A compiled LangGraph application that enables the paper download agent to
+        process user queries and retrieve research papers from arXiv (using arXiv ID),
+        BioRxiv and MedRxiv (using DOI resolved from the paper title or provided directly).
+    """
+    # Load Hydra configuration
+    logger.info("Loading Hydra configuration for Talk2Scholars paper download agent")
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(
+            config_name="config",
+            overrides=["agents/talk2scholars/paper_download_agent=default"],
+        )
+        cfg = cfg.agents.talk2scholars.paper_download_agent
+    # Define tools properly
+    tools = ToolNode(
+        [
+            download_papers,
+        ]
+    )
+    # Define the model
+    logger.info("Using OpenAI model %s", llm_model)
+    model = create_react_agent(
+        llm_model,
+        tools=tools,
+        state_schema=Talk2Scholars,
+        prompt=cfg.paper_download_agent,
+        checkpointer=MemorySaver(),
+    )
+    def paper_download_agent_node(state: Talk2Scholars) -> dict[str, Any]:
+        """
+        Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
+        """
+        logger.info("Creating paper download agent node with thread_id: %s", uniq_id)
+        result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return result
+    # Define new graph
+    workflow = StateGraph(Talk2Scholars)
+    # Adding node for paper download agent
+    workflow.add_node("paper_download_agent", paper_download_agent_node)
+    # Entering into the agent
+    workflow.add_edge(START, "paper_download_agent")
+    # Memory management for states between graph runs
+    checkpointer = MemorySaver()
+    # Compile the graph
+    app = workflow.compile(checkpointer=checkpointer, name="paper_download_agent")
+    # Logging the information and returning the app
+    logger.info("Compiled the graph")
+    return app

aiagents4pharma/talk2scholars/agents/pdf_agent.py ADDED Viewed

@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Agent for interacting with PDF documents via question and answer.
+This module initializes and compiles a LangGraph application that enables users to query PDF
+documents using a question_and_answer tool. It integrates a language model and follows
+the ReAct pattern to process and answer queries related to PDF content.
+Usage:
+    >>> app = get_app("unique_thread_id")
+    >>> response = app.invoke(initial_state)
+"""
+import logging
+import hydra
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import ToolNode, create_react_agent
+from ..state.state_talk2scholars import Talk2Scholars
+from ..tools.pdf.question_and_answer import question_and_answer
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel):
+    """
+    Initializes and returns the LangGraph application for the PDF agent.
+    This function sets up the PDF agent by loading configuration settings via Hydra,
+    initializing a model, and creating a workflow graph that incorporates
+    PDF-specific tools. The agent is built using the ReAct pattern to facilitate interactive
+    querying and processing of PDF documents.
+    Args:
+        uniq_id (str): A unique identifier for the current conversation session or thread.
+        llm_model (BaseChatModel, optional): The language model instance to be used.
+            Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0).
+    Returns:
+        StateGraph: A compiled LangGraph application capable of handling PDF interactions.
+    Example:
+        >>> app = get_app("thread_123")
+        >>> result = app.invoke(initial_state)
+    """
+    # Load configuration using Hydra.
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(
+            config_name="config",
+            overrides=["agents/talk2scholars/pdf_agent=default"],
+        )
+        cfg = cfg.agents.talk2scholars.pdf_agent
+        logger.info("Loaded pdf_agent configuration.")
+    def pdf_agent_node(state: Talk2Scholars):
+        """
+        Processes the current state by invoking the language model for PDF question and answer.
+        Args:
+            state (Talk2Scholars): The current conversation state containing query details
+            and context.
+        Returns:
+            Any: The response generated by the language model after processing the state.
+        """
+        logger.info("Creating Agent_PDF node with thread_id %s", uniq_id)
+        response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return response
+    # Define the tool node that includes the PDF QnA tool.
+    tools = ToolNode([question_and_answer])
+    logger.info("Using OpenAI model %s", llm_model)
+    # Create the agent using the provided BaseChatModel instance.
+    model = create_react_agent(
+        llm_model,
+        tools=tools,
+        state_schema=Talk2Scholars,
+        prompt=cfg.pdf_agent,
+        checkpointer=MemorySaver(),
+    )
+    # Define a new workflow graph with the state schema.
+    workflow = StateGraph(Talk2Scholars)
+    workflow.add_node("pdf_agent", pdf_agent_node)
+    workflow.add_edge(START, "pdf_agent")
+    # Initialize memory to persist state between runs.
+    checkpointer = MemorySaver()
+    # Compile the graph into a runnable app.
+    app = workflow.compile(checkpointer=checkpointer, name="pdf_agent")
+    logger.info("Compiled the PDF agent graph.")
+    return app

aiagents4pharma/talk2scholars/agents/s2_agent.py ADDED Viewed

@@ -0,0 +1,135 @@
+# /usr/bin/env python3
+"""
+Agent for interacting with Semantic Scholar
+"""
+import logging
+from typing import Any
+import hydra
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import ToolNode, create_react_agent
+from ..state.state_talk2scholars import Talk2Scholars
+from ..tools.s2.display_dataframe import display_dataframe
+from ..tools.s2.multi_paper_rec import (
+    get_multi_paper_recommendations,
+)
+from ..tools.s2.query_dataframe import query_dataframe
+from ..tools.s2.retrieve_semantic_scholar_paper_id import (
+    retrieve_semantic_scholar_paper_id,
+)
+from ..tools.s2.search import search_tool
+from ..tools.s2.single_paper_rec import (
+    get_single_paper_recommendations,
+)
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel):
+    """
+    Initializes and returns the LangGraph application for the Semantic Scholar (S2) agent.
+    This function sets up the S2 agent, which integrates various tools to search, retrieve,
+    and display research papers from Semantic Scholar. The agent follows the ReAct pattern
+    for structured interaction.
+    Args:
+        uniq_id (str): Unique identifier for the current conversation session.
+        llm_model (BaseChatModel, optional): The language model to be used by the agent.
+            Defaults to `ChatOpenAI(model="gpt-4o-mini", temperature=0)`.
+    Returns:
+        StateGraph: A compiled LangGraph application that enables the S2 agent to process
+            user queries and retrieve research papers.
+    Example:
+        >>> app = get_app("thread_123")
+        >>> result = app.invoke(initial_state)
+    """
+    def s2_agent_node(state: Talk2Scholars) -> dict[str, Any]:
+        """
+        Processes the user query and retrieves relevant research papers.
+        This function calls the language model using the configured `ReAct` agent to analyze
+        the state and generate an appropriate response. The function then returns control
+        to the main supervisor.
+        Args:
+            state (Talk2Scholars): The current conversation state, including messages exchanged
+                and any previously retrieved research papers.
+        Returns:
+            Dict[str, Any]: A dictionary containing the updated conversation state.
+        Example:
+            >>> result = s2_agent_node(current_state)
+            >>> papers = result.get("papers", [])
+        """
+        logger.log(logging.INFO, "Creating Agent_S2 node with thread_id %s", uniq_id)
+        result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return result
+    logger.log(logging.INFO, "thread_id, llm_model: %s, %s", uniq_id, llm_model)
+    # Load hydra configuration
+    logger.log(logging.INFO, "Load Hydra configuration for Talk2Scholars S2 agent.")
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(
+            config_name="config", overrides=["agents/talk2scholars/s2_agent=default"]
+        )
+        cfg = cfg.agents.talk2scholars.s2_agent
+        logger.log(logging.INFO, "Loaded configuration for S2 agent")
+    # Define the tools
+    tools = ToolNode(
+        [
+            search_tool,
+            display_dataframe,
+            query_dataframe,
+            retrieve_semantic_scholar_paper_id,
+            get_single_paper_recommendations,
+            get_multi_paper_recommendations,
+        ]
+    )
+    # Define the model
+    logger.log(logging.INFO, "Using OpenAI model %s", llm_model)
+    # Create the agent
+    model = create_react_agent(
+        llm_model,
+        tools=tools,
+        state_schema=Talk2Scholars,
+        prompt=cfg.s2_agent,
+        checkpointer=MemorySaver(),
+    )
+    workflow = StateGraph(Talk2Scholars)
+    workflow.add_node("s2_agent", s2_agent_node)
+    workflow.add_edge(START, "s2_agent")
+    # Initialize memory to persist state between graph runs
+    checkpointer = MemorySaver()
+    # Finally, we compile it!
+    # This compiles it into a LangChain Runnable,
+    # meaning you can use it as you would any other runnable.
+    # Note that we're (optionally) passing the memory when compiling the graph
+    app = workflow.compile(checkpointer=checkpointer, name="s2_agent")
+    logger.log(
+        logging.INFO,
+        "Compiled the graph with thread_id %s and llm_model %s",
+        uniq_id,
+        llm_model,
+    )
+    return app