PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py ADDED Viewed

@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Query the metadata table of the most recently displayed papers.
+This tool loads `state['last_displayed_papers']` into a pandas DataFrame and uses an
+LLM-driven DataFrame agent to execute metadata-level queries. It supports both
+natural-language prompts (e.g., “list titles by author X”) and direct Python expressions
+over the DataFrame.
+Capabilities
+- Filter, sort, and aggregate rows using metadata columns (e.g., Title, Authors, Venue, Year).
+- Extract paper identifiers from a designated column (default: 'paper_ids'),
+  optionally for a single row.
+- Return the DataFrame agent’s textual result as a ToolMessage.
+Requirements
+- `state['llm_model']`: model used to instantiate the DataFrame agent.
+- `state['last_displayed_papers']`: dictionary mapping row keys → metadata records.
+Notes
+- Operates strictly on the metadata table; it does not parse or read PDF content.
+- When `extract_ids=True`, the tool constructs a Python expression for the agent to evaluate
+  and return identifiers from `id_column`. If `row_number` is provided (1-based), only that row’s
+  first identifier is returned; otherwise a list is returned from all rows that have values.
+"""
+import logging
+from typing import Annotated, Any
+import pandas as pd
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class NoPapersFoundError(Exception):
+    """Exception raised when no papers are found in the state."""
+class QueryDataFrameInput(BaseModel):
+    """
+    Input schema for querying the last displayed papers metadata DataFrame.
+    Fields:
+      question (str):
+        The query to execute. Accepts natural language (e.g., "List titles from 2024")
+        or a Python expression over the DataFrame (e.g., "df['Title'].tolist()").
+      extract_ids (bool, default=False):
+        When True, the tool prepares a Python expression for the DataFrame agent to extract
+        identifiers from `id_column`. Use to obtain IDs from the metadata table.
+      id_column (str, default="paper_ids"):
+        Name of the column that contains per-row lists of identifiers (e.g., ["arxiv:2301.12345"]).
+        Used only when `extract_ids=True`.
+      row_number (int | None, default=None):
+        1-based row index. When provided with `extract_ids=True`, returns only that row’s first
+        identifier. When omitted, returns a list of first identifiers from each applicable row.
+      tool_call_id (InjectedToolCallId):
+        Internal identifier for tracing the tool invocation.
+      state (dict):
+        Agent state containing:
+          - 'last_displayed_papers': dict with the current results table (rows → metadata)
+          - 'llm_model': model object or reference for the DataFrame agent
+    """
+    question: str = Field(
+        description=(
+            "The metadata query to run over the papers DataFrame. Can be natural language "
+            "(e.g., 'List all titles by author X') or Python code "
+            "(e.g., df['arxiv_id'].dropna().tolist())."
+        )
+    )
+    extract_ids: bool = Field(
+        default=False,
+        description=(
+            "If true, instruct the DataFrame agent to extract values from the"
+            "specified ID column via a Python expression."
+        ),
+    )
+    id_column: str = Field(
+        default="paper_ids",
+        description=(
+            "Name of the metadata column containing a list of paper IDs to"
+            "extract when extract_ids=True."
+        ),
+    )
+    row_number: int | None = Field(
+        default=None,
+        description=(
+            "1-based index of the ID to extract from the list; if provided, returns only"
+            "that single ID."
+        ),
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+    state: Annotated[dict, InjectedState]
+@tool(
+    "query_dataframe",
+    args_schema=QueryDataFrameInput,
+    parse_docstring=True,
+)
+def query_dataframe(
+    question: str,
+    state: Annotated[dict, InjectedState],
+    tool_call_id: str,
+    **kwargs: Any,
+) -> Command:
+    """
+    Execute a metadata query against the DataFrame built from `last_displayed_papers`.
+    Behavior
+    - Builds a pandas DataFrame from `state['last_displayed_papers']`.
+    - Instantiates a pandas DataFrame agent with `state['llm_model']`.
+    - Runs either:
+        • the provided natural-language prompt, or
+        • a constructed Python expression when `extract_ids=True`
+          (optionally scoped to `row_number`, 1-based).
+    - Returns the DataFrame agent’s output text in a ToolMessage.
+    Parameters
+      question (str):
+        Natural-language query or Python expression to run on the DataFrame.
+      state (dict):
+        Must provide 'llm_model' and 'last_displayed_papers'.
+      tool_call_id (str):
+        Internal identifier for the tool call.
+      **kwargs:
+        extract_ids (bool): Enable ID extraction from `id_column`.
+        id_column (str): Column containing lists of identifiers (default: "paper_ids").
+        row_number (int | None): 1-based index for a single-row extraction.
+    Returns
+      Command:
+        update = {
+          "messages": [
+            ToolMessage(
+              content=<text result from the DataFrame agent>,
+              tool_call_id=<tool_call_id>
+            )
+          ]
+        }
+    Errors
+    - Raises `ValueError` if 'llm_model' is missing in `state`.
+    - Raises `NoPapersFoundError` if `state['last_displayed_papers']` is missing or empty.
+    - Raises `ValueError` if a required argument for the chosen mode is invalid
+      (e.g., no `id_column` when `extract_ids=True`).
+    Examples
+    - Natural language:
+        question="List titles where Year >= 2023"
+    - Python list of titles:
+        question="df.query('Year >= 2023')['Title'].tolist()"
+    - Extract first ID from row 1:
+        extract_ids=True, row_number=1
+    - Extract first IDs from all rows:
+        extract_ids=True
+    """
+    logger.info("Querying last displayed papers with question: %s", question)
+    llm_model = state.get("llm_model")
+    if llm_model is None:
+        raise ValueError("Missing 'llm_model' in state.")
+    context_val = state.get("last_displayed_papers")
+    if not context_val:
+        logger.info("No papers displayed so far, raising NoPapersFoundError")
+        raise NoPapersFoundError("No papers found. A search needs to be performed first.")
+    # Resolve the paper dictionary
+    if isinstance(context_val, dict):
+        dic_papers = context_val
+    else:
+        dic_papers = state.get(context_val)
+    if not isinstance(dic_papers, dict):
+        raise ValueError(
+            "Could not resolve a valid metadata dictionary from 'last_displayed_papers'"
+        )
+    df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
+    # Prepare the query: if extracting IDs, let the DataFrame agent handle it via Python code
+    extract_ids_flag = kwargs.get("extract_ids", False)
+    id_column = kwargs.get("id_column", "paper_ids")
+    row_number = kwargs.get("row_number")
+    question_to_agent = question
+    if extract_ids_flag:
+        if not id_column:
+            raise ValueError("Must specify 'id_column' when extract_ids=True.")
+        if row_number is not None:
+            question_to_agent = f"df['{id_column}'].dropna().str[0].tolist()[{row_number - 1}]"
+        else:
+            question_to_agent = f"df['{id_column}'].dropna().str[0].tolist()"
+        logger.info("extract_ids enabled: asking agent to run expression: %s", question_to_agent)
+    df_agent = create_pandas_dataframe_agent(
+        llm_model,
+        allow_dangerous_code=True,
+        agent_type="tool-calling",
+        df=df_papers,
+        max_iterations=5,
+        include_df_in_prompt=True,
+        number_of_head_rows=df_papers.shape[0],
+        verbose=True,
+    )
+    llm_result = df_agent.invoke({"input": question_to_agent}, stream_mode=None)
+    response_text = llm_result["output"]
+    return Command(
+        update={
+            "messages": [
+                ToolMessage(
+                    content=response_text,
+                    tool_call_id=tool_call_id,
+                )
+            ],
+        }
+    )

aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""
+Resolve a paper title to a Semantic Scholar paperId.
+This module provides a tool that queries the Semantic Scholar API for the best match to a
+given paper title (full or partial) and returns the corresponding `paperId` string.
+Configuration is loaded via Hydra and the top ranked result is returned.
+"""
+import logging
+from typing import Annotated, Any
+import hydra
+import requests
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class RetrieveSemanticScholarPaperIdInput(BaseModel):
+    """
+    Input schema for title→paperId resolution.
+    Fields
+    -------
+    paper_title : str
+        Paper title to search. Accepts full titles or informative partial titles.
+    tool_call_id : InjectedToolCallId
+        Runtime-injected identifier for tracing the tool invocation.
+    """
+    paper_title: str = Field(..., description="The paper title to search for on Semantic Scholar.")
+    tool_call_id: Annotated[str, InjectedToolCallId]
+@tool(
+    "retrieve_semantic_scholar_paper_id",
+    args_schema=RetrieveSemanticScholarPaperIdInput,
+    parse_docstring=True,
+)
+def retrieve_semantic_scholar_paper_id(
+    paper_title: str,
+    tool_call_id: str,
+) -> Command[Any]:
+    """
+    Look up a Semantic Scholar paperId from a paper title.
+    Behavior
+    --------
+    - Loads Hydra config from `tools.retrieve_semantic_scholar_paper_id`.
+    - Sends a search request with `query=<paper_title>`, `limit=1`, and requested fields.
+    - Parses the top hit and returns its `paperId` as the ToolMessage content (plain string).
+    Parameters
+    ----------
+    paper_title : str
+        Title or informative partial title to resolve.
+    tool_call_id : str
+        Runtime-injected identifier for the tool call.
+    Returns
+    -------
+    Command
+        update = {
+          "messages": [
+            ToolMessage(
+              content="<paperId>",  # Semantic Scholar paperId string
+              tool_call_id=<tool_call_id>
+            )
+          ]
+        }
+    Exceptions
+    ----------
+    ValueError
+        Raised when no match is found for the provided title.
+    requests.RequestException
+        Raised on network/HTTP errors (timeout, connection issues, etc.).
+    Examples
+    --------
+    >>> retrieve_semantic_scholar_paper_id("Attention Is All You Need", "tc_123")
+    """
+    # Load hydra configuration
+    with hydra.initialize(version_base=None, config_path="../../configs"):
+        cfg = hydra.compose(
+            config_name="config",
+            overrides=["tools/retrieve_semantic_scholar_paper_id=default"],
+        )
+        cfg = cfg.tools.retrieve_semantic_scholar_paper_id
+        logger.info("Loaded configuration for Semantic Scholar paper ID retrieval tool")
+    logger.info("Retrieving ID of paper with title: %s", paper_title)
+    endpoint = cfg.api_endpoint
+    params = {
+        "query": paper_title,
+        "limit": 1,
+        "fields": ",".join(cfg.api_fields),
+    }
+    response = requests.get(endpoint, params=params, timeout=10)
+    data = response.json()
+    papers = data.get("data", [])
+    logger.info("Received %d papers", len(papers))
+    if not papers:
+        logger.error("No papers found for query: %s", paper_title)
+        raise ValueError(f"No papers found for query: {paper_title}. Try again.")
+    # Extract the paper ID from the top result
+    paper_id = papers[0]["paperId"]
+    logger.info("Found paper ID: %s", paper_id)
+    # Prepare the response content (just the ID)
+    response_text = paper_id
+    return Command(
+        update={
+            "messages": [
+                ToolMessage(
+                    content=response_text,
+                    tool_call_id=tool_call_id,
+                )
+            ],
+        }
+    )

aiagents4pharma/talk2scholars/tools/s2/search.py ADDED Viewed

@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Search for academic papers on Semantic Scholar by title or keywords.
+Given a text query, this tool retrieves relevant papers from Semantic Scholar,
+optionally filtered by publication year.
+"""
+import logging
+from typing import Annotated, Any
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+from .utils.search_helper import SearchData
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class SearchInput(BaseModel):
+    """Defines the input schema for the paper search tool.
+    Attributes:
+        query: Full or partial paper title or keywords to search for.
+        limit: Maximum number of search results to return (1-100).
+        year: Optional publication year filter; supports 'YYYY',
+        'YYYY-', '-YYYY', 'YYYY:YYYY'.
+        tool_call_id: Internal tool call identifier injected by the system.
+    """
+    query: str = Field(description="Full or partial paper title or keywords to search for")
+    limit: int = Field(
+        default=10,
+        description="Maximum number of search results to return (1-100)",
+        ge=1,
+        le=100,
+    )
+    year: str | None = Field(
+        default=None,
+        description="Publication year filter; supports formats:"
+        "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+@tool(
+    "search_tool",
+    args_schema=SearchInput,
+    parse_docstring=True,
+)
+def search_tool(
+    query: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    limit: int = 10,
+    year: str | None = None,
+) -> Command[Any]:
+    """
+    Return academic papers from Semantic Scholar matching a title or keyword query.
+    This tool searches Semantic Scholar for papers whose titles or keywords
+    match the given text, optionally filtered by publication year.
+    Args:
+        query (str): Full or partial paper title or keywords to search for.
+        tool_call_id (str): Internal tool call identifier injected by the system.
+        limit (int, optional): Maximum number of search results to return. Defaults to 5.
+        year (str, optional): Publication year filter; supports 'YYYY',
+        'YYYY-', '-YYYY', 'YYYY:YYYY'. Defaults to None.
+    Returns:
+        Command: A Command object containing:
+            - papers: List of matching papers.
+            - last_displayed_papers: Same list for display purposes.
+            - messages: List containing a ToolMessage with search results details.
+    """
+    # Create search data object to organize variables
+    search_data = SearchData(query, limit, year, tool_call_id)
+    # Process the search
+    results = search_data.process_search()
+    return Command(
+        update={
+            "papers": results["papers"],
+            # Store the latest results mapping directly for display
+            "last_displayed_papers": results["papers"],
+            "messages": [
+                ToolMessage(
+                    content=results["content"],
+                    tool_call_id=tool_call_id,
+                    artifact=results["papers"],
+                )
+            ],
+        }
+    )

aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py ADDED Viewed

@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+Recommend research papers related to a single input paper using Semantic Scholar.
+Given a Semantic Scholar paper ID, this tool retrieves related works
+(citations and references) and returns a curated list of recommended papers.
+"""
+import logging
+from typing import Annotated, Any
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+from .utils.single_helper import SinglePaperRecData
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class SinglePaperRecInput(BaseModel):
+    """Defines the input schema for the single-paper recommendation tool.
+    Attributes:
+        paper_id: 40-character Semantic Scholar Paper ID to base recommendations on.
+        limit: Maximum number of recommendations to return (1-500).
+        year: Optional publication year filter; supports 'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'.
+        tool_call_id: Internal tool call identifier injected by the system.
+    """
+    paper_id: str = Field(
+        description="40-character Semantic Scholar Paper ID to base recommendations on"
+    )
+    limit: int = Field(
+        default=10,
+        description="Maximum number of recommendations to return (1-500)",
+        ge=1,
+        le=500,
+    )
+    year: str | None = Field(
+        default=None,
+        description="Publication year filter; supports formats::"
+        "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+    model_config = {"arbitrary_types_allowed": True}
+@tool(
+    args_schema=SinglePaperRecInput,
+    parse_docstring=True,
+)
+def get_single_paper_recommendations(
+    paper_id: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    limit: int = 10,
+    year: str | None = None,
+) -> Command[Any]:
+    """
+    Recommend related research papers using the Semantic Scholar API for a single paper ID.
+    This tool is designed to suggest relevant papers based on one input Semantic Scholar paper ID.
+    It fetches citations and references for the given paper and returns a set of recommended works.
+    Args:
+        paper_id (str): 40-character Semantic Scholar paper ID.
+        tool_call_id (str): Internal tool call identifier injected by the system.
+        limit (int, optional): Maximum number of recommendations to return. Defaults to 5.
+        year (str, optional): Filter recommendations by publication year.
+            Supports formats: 'YYYY', 'YYYY-', '-YYYY', or 'YYYY:YYYY'. Defaults to None.
+    Returns:
+        Command: A Command object containing:
+            - papers: List of recommended papers.
+            - last_displayed_papers: Same list for display purposes.
+            - messages: List containing a ToolMessage with recommendation details.
+    """
+    # Create recommendation data object to organize variables
+    rec_data = SinglePaperRecData(paper_id, limit, year, tool_call_id)
+    # Process the recommendations
+    results = rec_data.process_recommendations()
+    return Command(
+        update={
+            "papers": results["papers"],
+            # Store the latest single-paper results mapping directly for display
+            "last_displayed_papers": results["papers"],
+            "messages": [
+                ToolMessage(
+                    content=results["content"],
+                    tool_call_id=tool_call_id,
+                    artifact=results["papers"],
+                )
+            ],
+        }
+    )

aiagents4pharma/talk2scholars/tools/s2/utils/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""This module contains utility functions for the Semantic Scholar search tool."""
+from . import multi_helper, search_helper, single_helper
+__all__ = ["search_helper", "single_helper", "multi_helper"]