PyPI - aiagents4pharma - Versions diffs - 1.9.0__py3-none-any.whl → 1.15.0__py3-none-any.whl - Mend

aiagents4pharma 1.9.0py3-none-any.whl → 1.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

aiagents4pharma/talk2competitors/tools/s2/multi_paper_rec.py ADDED Viewed

@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+multi_paper_rec: Tool for getting recommendations
+                based on multiple papers
+"""
+import json
+import logging
+from typing import Annotated, Any, Dict, List, Optional
+import pandas as pd
+import requests
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+class MultiPaperRecInput(BaseModel):
+    """Input schema for multiple paper recommendations tool."""
+    paper_ids: List[str] = Field(
+        description=("List of Semantic Scholar Paper IDs to get recommendations for")
+    )
+    limit: int = Field(
+        default=2,
+        description="Maximum total number of recommendations to return",
+        ge=1,
+        le=500,
+    )
+    year: Optional[str] = Field(
+        default=None,
+        description="Year range in format: YYYY for specific year, "
+        "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+    model_config = {"arbitrary_types_allowed": True}
+@tool(args_schema=MultiPaperRecInput)
+def get_multi_paper_recommendations(
+    paper_ids: List[str],
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    limit: int = 2,
+    year: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Get paper recommendations based on multiple papers.
+    Args:
+        paper_ids (List[str]): The list of paper IDs to base recommendations on.
+        tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
+        limit (int, optional): The maximum number of recommendations to return. Defaults to 2.
+        year (str, optional): Year range for papers.
+        Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
+    Returns:
+        Dict[str, Any]: The recommendations and related information.
+    """
+    logging.info("Starting multi-paper recommendations search.")
+    endpoint = "https://api.semanticscholar.org/recommendations/v1/papers"
+    headers = {"Content-Type": "application/json"}
+    payload = {"positivePaperIds": paper_ids, "negativePaperIds": []}
+    params = {
+        "limit": min(limit, 500),
+        "fields": "paperId,title,abstract,year,authors,citationCount,url",
+    }
+    # Add year parameter if provided
+    if year:
+        params["year"] = year
+    # Getting recommendations
+    response = requests.post(
+        endpoint,
+        headers=headers,
+        params=params,
+        data=json.dumps(payload),
+        timeout=10,
+    )
+    logging.info(
+        "API Response Status for multi-paper recommendations: %s", response.status_code
+    )
+    data = response.json()
+    recommendations = data.get("recommendedPapers", [])
+    # Create a dictionary to store the papers
+    filtered_papers = {
+        paper["paperId"]: {
+            "Title": paper.get("title", "N/A"),
+            "Abstract": paper.get("abstract", "N/A"),
+            "Year": paper.get("year", "N/A"),
+            "Citation Count": paper.get("citationCount", "N/A"),
+            "URL": paper.get("url", "N/A"),
+        }
+        for paper in recommendations
+        if paper.get("title") and paper.get("paperId")
+    }
+    # Create a DataFrame from the dictionary
+    df = pd.DataFrame.from_dict(filtered_papers, orient="index")
+    # print("Created DataFrame with results:")
+    logging.info("Created DataFrame with results: %s", df)
+    # Format papers for state update
+    papers = [
+        f"Paper ID: {paper_id}\n"
+        f"Title: {paper_data['Title']}\n"
+        f"Abstract: {paper_data['Abstract']}\n"
+        f"Year: {paper_data['Year']}\n"
+        f"Citations: {paper_data['Citation Count']}\n"
+        f"URL: {paper_data['URL']}\n"
+        for paper_id, paper_data in filtered_papers.items()
+    ]
+    # Convert DataFrame to markdown table
+    markdown_table = df.to_markdown(tablefmt="grid")
+    logging.info("Search results: %s", papers)
+    return Command(
+        update={
+            "papers": filtered_papers,  # Now sending the dictionary directly
+            "messages": [
+                ToolMessage(content=markdown_table, tool_call_id=tool_call_id)
+            ],
+        }
+    )

aiagents4pharma/talk2competitors/tools/s2/search.py ADDED Viewed

@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+This tool is used to search for academic papers on Semantic Scholar.
+"""
+import logging
+from typing import Annotated, Any, Dict, Optional
+import pandas as pd
+import requests
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+class SearchInput(BaseModel):
+    """Input schema for the search papers tool."""
+    query: str = Field(
+        description="Search query string to find academic papers."
+        "Be specific and include relevant academic terms."
+    )
+    limit: int = Field(
+        default=2, description="Maximum number of results to return", ge=1, le=100
+    )
+    year: Optional[str] = Field(
+        default=None,
+        description="Year range in format: YYYY for specific year, "
+        "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+@tool(args_schema=SearchInput)
+def search_tool(
+    query: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    limit: int = 2,
+    year: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Search for academic papers on Semantic Scholar.
+    Args:
+        query (str): The search query string to find academic papers.
+        tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
+        limit (int, optional): The maximum number of results to return. Defaults to 2.
+        year (str, optional): Year range for papers.
+        Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
+    Returns:
+        Dict[str, Any]: The search results and related information.
+    """
+    print("Starting paper search...")
+    endpoint = "https://api.semanticscholar.org/graph/v1/paper/search"
+    params = {
+        "query": query,
+        "limit": min(limit, 100),
+        # "fields": "paperId,title,abstract,year,authors,
+        # citationCount,url,publicationTypes,openAccessPdf",
+        "fields": "paperId,title,abstract,year,authors,citationCount,url",
+    }
+    # Add year parameter if provided
+    if year:
+        params["year"] = year
+    response = requests.get(endpoint, params=params, timeout=10)
+    data = response.json()
+    papers = data.get("data", [])
+    # Create a dictionary to store the papers
+    filtered_papers = {
+        paper["paperId"]: {
+            "Title": paper.get("title", "N/A"),
+            "Abstract": paper.get("abstract", "N/A"),
+            "Year": paper.get("year", "N/A"),
+            "Citation Count": paper.get("citationCount", "N/A"),
+            "URL": paper.get("url", "N/A"),
+            # "Publication Type": paper.get("publicationTypes", ["N/A"])[0]
+            # if paper.get("publicationTypes")
+            # else "N/A",
+            # "Open Access PDF": paper.get("openAccessPdf", {}).get("url", "N/A")
+            # if paper.get("openAccessPdf") is not None
+            # else "N/A",
+        }
+        for paper in papers
+        if paper.get("title") and paper.get("authors")
+    }
+    df = pd.DataFrame(filtered_papers)
+    # Format papers for state update
+    papers = [
+        f"Paper ID: {paper_id}\n"
+        f"Title: {paper_data['Title']}\n"
+        f"Abstract: {paper_data['Abstract']}\n"
+        f"Year: {paper_data['Year']}\n"
+        f"Citations: {paper_data['Citation Count']}\n"
+        f"URL: {paper_data['URL']}\n"
+        # f"Publication Type: {paper_data['Publication Type']}\n"
+        # f"Open Access PDF: {paper_data['Open Access PDF']}"
+        for paper_id, paper_data in filtered_papers.items()
+    ]
+    markdown_table = df.to_markdown(tablefmt="grid")
+    logging.info("Search results: %s", papers)
+    return Command(
+        update={
+            "papers": filtered_papers,  # Now sending the dictionary directly
+            "messages": [
+                ToolMessage(content=markdown_table, tool_call_id=tool_call_id)
+            ],
+        }
+    )

aiagents4pharma/talk2competitors/tools/s2/single_paper_rec.py ADDED Viewed

@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+This tool is used to return recommendations for a single paper.
+"""
+import logging
+from typing import Annotated, Any, Dict, Optional
+import pandas as pd
+import requests
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import tool
+from langchain_core.tools.base import InjectedToolCallId
+from langgraph.types import Command
+from pydantic import BaseModel, Field
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class SinglePaperRecInput(BaseModel):
+    """Input schema for single paper recommendation tool."""
+    paper_id: str = Field(
+        description="Semantic Scholar Paper ID to get recommendations for (40-character string)"
+    )
+    limit: int = Field(
+        default=2,
+        description="Maximum number of recommendations to return",
+        ge=1,
+        le=500,
+    )
+    year: Optional[str] = Field(
+        default=None,
+        description="Year range in format: YYYY for specific year, "
+        "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
+    )
+    tool_call_id: Annotated[str, InjectedToolCallId]
+    model_config = {"arbitrary_types_allowed": True}
+@tool(args_schema=SinglePaperRecInput)
+def get_single_paper_recommendations(
+    paper_id: str,
+    tool_call_id: Annotated[str, InjectedToolCallId],
+    limit: int = 2,
+    year: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Get paper recommendations based on a single paper.
+    Args:
+        paper_id (str): The Semantic Scholar Paper ID to get recommendations for.
+        tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
+        limit (int, optional): The maximum number of recommendations to return. Defaults to 2.
+        year (str, optional): Year range for papers.
+        Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
+    Returns:
+        Dict[str, Any]: The recommendations and related information.
+    """
+    logger.info("Starting single paper recommendations search.")
+    endpoint = (
+        f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}"
+    )
+    params = {
+        "limit": min(limit, 500),  # Max 500 per API docs
+        "fields": "paperId,title,abstract,year,authors,citationCount,url",
+        "from": "all-cs",  # Using all-cs pool as specified in docs
+    }
+    # Add year parameter if provided
+    if year:
+        params["year"] = year
+    response = requests.get(endpoint, params=params, timeout=10)
+    data = response.json()
+    papers = data.get("data", [])
+    response = requests.get(endpoint, params=params, timeout=10)
+    # print(f"API Response Status: {response.status_code}")
+    logging.info(
+        "API Response Status for recommendations of paper %s: %s",
+        paper_id,
+        response.status_code,
+    )
+    # print(f"Request params: {params}")
+    logging.info("Request params: %s", params)
+    data = response.json()
+    recommendations = data.get("recommendedPapers", [])
+    # Extract paper ID and title from recommendations
+    filtered_papers = {
+        paper["paperId"]: {
+            "Title": paper.get("title", "N/A"),
+            "Abstract": paper.get("abstract", "N/A"),
+            "Year": paper.get("year", "N/A"),
+            "Citation Count": paper.get("citationCount", "N/A"),
+            "URL": paper.get("url", "N/A"),
+            # "Publication Type": paper.get("publicationTypes", ["N/A"])[0]
+            # if paper.get("publicationTypes")
+            # else "N/A",
+            # "Open Access PDF": paper.get("openAccessPdf", {}).get("url", "N/A")
+            # if paper.get("openAccessPdf") is not None
+            # else "N/A",
+        }
+        for paper in recommendations
+        if paper.get("title") and paper.get("authors")
+    }
+    # Create a DataFrame for pretty printing
+    df = pd.DataFrame(filtered_papers)
+    # Format papers for state update
+    papers = [
+        f"Paper ID: {paper_id}\n"
+        f"Title: {paper_data['Title']}\n"
+        f"Abstract: {paper_data['Abstract']}\n"
+        f"Year: {paper_data['Year']}\n"
+        f"Citations: {paper_data['Citation Count']}\n"
+        f"URL: {paper_data['URL']}\n"
+        # f"Publication Type: {paper_data['Publication Type']}\n"
+        # f"Open Access PDF: {paper_data['Open Access PDF']}"
+        for paper_id, paper_data in filtered_papers.items()
+    ]
+    # Convert DataFrame to markdown table
+    markdown_table = df.to_markdown(tablefmt="grid")
+    logging.info("Search results: %s", papers)
+    return Command(
+        update={
+            "papers": filtered_papers,  # Now sending the dictionary directly
+            "messages": [
+                ToolMessage(content=markdown_table, tool_call_id=tool_call_id)
+            ],
+        }
+    )

aiagents4pharma/talk2knowledgegraphs/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 '''
-This file is used to import the datasets, utils, and tools.
+This file is used to import the datasets and utils.
 '''
 from . import datasets
+from . import utils

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_enrichments.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+Test cases for utils/enrichments/enrichments.py
+"""
+from ..utils.enrichments.enrichments import Enrichments
+class TestEnrichments(Enrichments):
+    """Test implementation of the Enrichments interface for testing purposes."""
+    def enrich_documents(self, texts: list[str]) -> list[list[float]]:
+        return [
+            f"Additional text description of {text} as the input." for text in texts
+        ]
+    def enrich_documents_with_rag(self, texts, docs):
+        # Currently we don't have a RAG model to test this method.
+        # Thus, we will just call the enrich_documents method instead.
+        return self.enrich_documents(texts)
+def test_enrich_documents():
+    """Test enriching documents using the Enrichments interface."""
+    enrichments = TestEnrichments()
+    texts = ["text1", "text2"]
+    result = enrichments.enrich_documents(texts)
+    assert result == [
+        "Additional text description of text1 as the input.",
+        "Additional text description of text2 as the input.",
+    ]
+def test_enrich_documents_with_rag():
+    """Test enriching documents with RAG using the Enrichments interface."""
+    enrichments = TestEnrichments()
+    texts = ["text1", "text2"]
+    docs = ["doc1", "doc2"]
+    result = enrichments.enrich_documents_with_rag(texts, docs)
+    assert result == [
+        "Additional text description of text1 as the input.",
+        "Additional text description of text2 as the input.",
+    ]

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_ollama.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+Test cases for utils/enrichments/ollama.py
+"""
+import pytest
+import ollama
+from ..utils.enrichments.ollama import EnrichmentWithOllama
+@pytest.fixture(name="ollama_config")
+def fixture_ollama_config():
+    """Return a dictionary with Ollama configuration."""
+    return {
+        "model_name": "smollm2:360m",
+        "prompt_enrichment": """
+            Given the input as a list of strings, please return the list of addditional information of
+            each input terms using your prior knowledge.
+            Example:
+            Input: ['acetaminophen', 'aspirin']
+            Ouput: ['acetaminophen is a medication used to treat pain and fever',
+            'aspirin is a medication used to treat pain, fever, and inflammation']
+            Do not include any pretext as the output, only the list of strings enriched.
+            Input: {input}
+        """,
+        "temperature": 0.0,
+        "streaming": False,
+    }
+def test_no_model_ollama(ollama_config):
+    """Test the case when the Ollama model is not available."""
+    cfg = ollama_config
+    cfg_model = "smollm2:135m" # Choose a small model
+    # Delete the Ollama model
+    try:
+        ollama.delete(cfg_model)
+    except ollama.ResponseError:
+        pass
+    # Check if the model is available
+    with pytest.raises(
+        ValueError, match=f"Error: Pulled {cfg_model} model and restarted Ollama server."
+    ):
+        EnrichmentWithOllama(
+            model_name=cfg_model,
+            prompt_enrichment=cfg["prompt_enrichment"],
+            temperature=cfg["temperature"],
+            streaming=cfg["streaming"],
+        )
+    ollama.delete(cfg_model)
+def test_enrich_nodes_ollama(ollama_config):
+    """Test the Ollama textual enrichment class for node enrichment."""
+    # Prepare enrichment model
+    cfg = ollama_config
+    enr_model = EnrichmentWithOllama(
+        model_name=cfg["model_name"],
+        prompt_enrichment=cfg["prompt_enrichment"],
+        temperature=cfg["temperature"],
+        streaming=cfg["streaming"],
+    )
+    # Perform enrichment for nodes
+    nodes = ["Adalimumab", "Infliximab"]
+    enriched_nodes = enr_model.enrich_documents(nodes)
+    # Check the enriched nodes
+    assert len(enriched_nodes) == 2
+    assert all(
+        enriched_nodes[i] != nodes[i] for i in range(len(nodes))
+    )
+def test_enrich_relations_ollama(ollama_config):
+    """Test the Ollama textual enrichment class for relation enrichment."""
+    # Prepare enrichment model
+    cfg = ollama_config
+    enr_model = EnrichmentWithOllama(
+        model_name=cfg["model_name"],
+        prompt_enrichment=cfg["prompt_enrichment"],
+        temperature=cfg["temperature"],
+        streaming=cfg["streaming"],
+    )
+    # Perform enrichment for relations
+    relations = [
+        "IL23R-gene causation disease-inflammatory bowel diseases",
+        "NOD2-gene causation disease-inflammatory bowel diseases",
+    ]
+    enriched_relations = enr_model.enrich_documents(relations)
+    # Check the enriched relations
+    assert len(enriched_relations) == 2
+    assert all(
+        enriched_relations[i] != relations[i]
+        for i in range(len(relations))
+    )
+def test_enrich_ollama_rag(ollama_config):
+    """Test the Ollama textual enrichment class for enrichment with RAG (not implemented)."""
+    # Prepare enrichment model
+    cfg = ollama_config
+    enr_model = EnrichmentWithOllama(
+        model_name=cfg["model_name"],
+        prompt_enrichment=cfg["prompt_enrichment"],
+        temperature=cfg["temperature"],
+        streaming=cfg["streaming"],
+    )
+    # Perform enrichment for nodes
+    nodes = ["Adalimumab", "Infliximab"]
+    docs = [r"\path\to\doc1", r"\path\to\doc2"]
+    enriched_nodes = enr_model.enrich_documents_with_rag(nodes, docs)
+    # Check the enriched nodes
+    assert len(enriched_nodes) == 2
+    assert all(
+        enriched_nodes[i] != nodes[i] for i in range(len(nodes))
+    )

aiagents4pharma/talk2knowledgegraphs/utils/__init__.py CHANGED Viewed

@@ -0,0 +1,5 @@
+'''
+This file is used to import utlities.
+'''
+from . import enrichments
+from . import embeddings

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+This package contains modules to use the enrichment model
+"""
+from . import enrichments
+from . import ollama

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/enrichments.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""
+Enrichments interface
+"""
+from abc import ABC, abstractmethod
+class Enrichments(ABC):
+    """Interface for enrichment models.
+    This is an interface meant for implementing text enrichment models.
+    Enrichment models are used to enrich node or relation features in a given knowledge graph.
+    """
+    @abstractmethod
+    def enrich_documents(self, texts: list[str]) -> list[list[str]]:
+        """Enrich documents.
+        Args:
+            texts: List of documents to enrich.
+        Returns:
+            List of enriched documents.
+        """
+    @abstractmethod
+    def enrich_documents_with_rag(self, texts: list[str], docs: list[str]) -> list[str]:
+        """Enrich documents with RAG.
+        Args:
+            texts: List of documents to enrich.
+            docs: List of reference documents to enrich the input texts.
+        Returns:
+            List of enriched documents with RAG.
+        """

aiagents4pharma 1.9.0__py3-none-any.whl → 1.15.0__py3-none-any.whl

aiagents4pharma 1.9.0py3-none-any.whl → 1.15.0py3-none-any.whl