PyPI - aiagents4pharma - Versions diffs - 1.20.1__py3-none-any.whl → 1.22.0__py3-none-any.whl - Mend

aiagents4pharma 1.20.1py3-none-any.whl → 1.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

aiagents4pharma/talk2knowledgegraphs/configs/__init__.py CHANGED Viewed

@@ -5,3 +5,4 @@ Import all the modules in the package
 from . import agents
 from . import tools
 from . import app
+from . import utils

aiagents4pharma/talk2knowledgegraphs/configs/config.yaml CHANGED Viewed

@@ -4,4 +4,5 @@ defaults:
   - tools/subgraph_extraction: default
   - tools/subgraph_summarization: default
   - tools/graphrag_reasoning: default
+  - utils/pubchem_utils: default
   - app/frontend: default

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_embeddings_nim_molmim.py ADDED Viewed

@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/embeddings/nim_molmim.py
+"""
+import unittest
+from unittest.mock import patch, MagicMock
+from ..utils.embeddings.nim_molmim import EmbeddingWithMOLMIM
+class TestEmbeddingWithMOLMIM(unittest.TestCase):
+    """
+    Test cases for EmbeddingWithMOLMIM class.
+    """
+    def setUp(self):
+        self.base_url = "https://fake-nim-api.com/embeddings"
+        self.embeddings_model = EmbeddingWithMOLMIM(self.base_url)
+        self.test_texts = ["CCO", "CCC", "C=O"]
+        self.test_query = "CCO"
+        self.mock_response = {
+            "embeddings": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
+        }
+    @patch("requests.post")
+    def test_embed_documents(self, mock_post):
+        '''
+        Test the embed_documents method.
+        '''
+        # Mock the response from requests.post
+        mock_post.return_value = MagicMock()
+        mock_post.return_value.json.return_value = self.mock_response
+        embeddings = self.embeddings_model.embed_documents(self.test_texts)
+        # Assertions
+        self.assertEqual(embeddings, self.mock_response["embeddings"])
+        mock_post.assert_called_once_with(
+            self.base_url,
+            headers={
+                'accept': 'application/json',
+                'Content-Type': 'application/json'
+            },
+            data='{"sequences": ["CCO", "CCC", "C=O"]}',
+            timeout=60
+        )
+    @patch("requests.post")
+    def test_embed_query(self, mock_post):
+        '''
+        Test the embed_query method.
+        '''
+        # Mock the response from requests.post
+        mock_post.return_value = MagicMock()
+        mock_post.return_value.json.return_value = {"embeddings": [[0.1, 0.2, 0.3]]}
+        embedding = self.embeddings_model.embed_query(self.test_query)
+        # Assertions
+        self.assertEqual(embedding, [[0.1, 0.2, 0.3]])
+        mock_post.assert_called_once_with(
+            self.base_url,
+            headers={
+                'accept': 'application/json',
+                'Content-Type': 'application/json'
+            },
+            data='{"sequences": ["CCO"]}',
+            timeout=60
+        )

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_pubchem.py ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+"""
+Test cases for utils/enrichments/pubchem_strings.py
+"""
+import pytest
+from ..utils.enrichments.pubchem_strings import EnrichmentWithPubChem
+# In this test, we will consider 2 examples:
+# 1. PubChem ID: 5311000 (Alclometasone)
+# 2. PubChem ID: 1X (Fake ID)
+# The expected SMILES representation for the first PubChem ID is:
+SMILES_FIRST = 'C[C@@H]1C[C@H]2[C@@H]3[C@@H](CC4=CC(=O)C=C[C@@]'
+SMILES_FIRST += '4([C@H]3[C@H](C[C@@]2([C@]1(C(=O)CO)O)C)O)C)Cl'
+# The expected SMILES representation for the second PubChem ID is None.
+@pytest.fixture(name="enrich_obj")
+def fixture_pubchem_config():
+    """Return a dictionary with the configuration for the PubChem enrichment."""
+    return EnrichmentWithPubChem()
+def test_enrich_documents(enrich_obj):
+    """Test the enrich_documents method."""
+    pubchem_ids = ["5311000", "1X"]
+    enriched_strings = enrich_obj.enrich_documents(pubchem_ids)
+    assert enriched_strings == [SMILES_FIRST, None]
+def test_enrich_documents_with_rag(enrich_obj):
+    """Test the enrich_documents_with_rag method."""
+    pubchem_ids = ["5311000", "1X"]
+    enriched_strings = enrich_obj.enrich_documents_with_rag(pubchem_ids, None)
+    assert enriched_strings == [SMILES_FIRST, None]

aiagents4pharma/talk2knowledgegraphs/tests/test_utils_pubchem_utils.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Test cases for utils/pubchem_utils.py
+"""
+from ..utils import pubchem_utils
+def test_drugbank_id2pubchem_cid():
+    """
+    Test the drugbank_id2pubchem_cid method.
+    The DrugBank ID for Alclometasone is DB00240.
+    The PubChem CID for Alclometasone is 5311000.
+    """
+    drugbank_id = "DB00240"
+    pubchem_cid = pubchem_utils.drugbank_id2pubchem_cid(drugbank_id)
+    assert pubchem_cid == 5311000

aiagents4pharma/talk2knowledgegraphs/utils/__init__.py CHANGED Viewed

@@ -5,3 +5,4 @@ from . import embeddings
 from . import enrichments
 from . import extractions
 from . import kg_utils
+from . import pubchem_utils

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/__init__.py CHANGED Viewed

@@ -5,3 +5,4 @@ from . import embeddings
 from . import sentence_transformer
 from . import huggingface
 from . import ollama
+from . import nim_molmim

aiagents4pharma/talk2knowledgegraphs/utils/embeddings/nim_molmim.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+Embedding class using MOLMIM model from NVIDIA NIM.
+"""
+import json
+from typing import List
+import requests
+from .embeddings import Embeddings
+class EmbeddingWithMOLMIM(Embeddings):
+    """
+    Embedding class using MOLMIM model from NVIDIA NIM
+    """
+    def __init__(self, base_url: str):
+        """
+        Initialize the EmbeddingWithMOLMIM class.
+        Args:
+            base_url: The base URL for the NIM/MOLMIM model.
+        """
+        # Set base URL
+        self.base_url = base_url
+    def embed_documents(self, texts: List[str]) -> List[float]:
+        """
+        Generate embedding for a list of SMILES strings using MOLMIM model.
+        Args:
+            texts: The list of SMILES strings to be embedded.
+        Returns:
+            The list of embeddings for the given SMILES strings.
+        """
+        headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json'
+        }
+        data = json.dumps({"sequences": texts})
+        response = requests.post(self.base_url, headers=headers, data=data, timeout=60)
+        embeddings = response.json()["embeddings"]
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """
+        Generate embeddings for an input query using MOLMIM model.
+        Args:
+            text: A query to be embedded.
+        Returns:
+            The embeddings for the given query.
+        """
+        # Generate the embedding
+        embeddings = self.embed_documents([text])
+        return embeddings

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py CHANGED Viewed

@@ -3,3 +3,4 @@ This package contains modules to use the enrichment model
 """
 from . import enrichments
 from . import ollama
+from . import pubchem_strings

aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py ADDED Viewed

@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+"""
+Enrichment class for enriching PubChem IDs with their STRINGS representation.
+"""
+from typing import List
+import pubchempy as pcp
+from .enrichments import Enrichments
+class EnrichmentWithPubChem(Enrichments):
+    """
+    Enrichment class using PubChem
+    """
+    def enrich_documents(self, texts: List[str]) -> List[str]:
+        """
+        Enrich a list of input PubChem IDs with their STRINGS representation.
+        Args:
+            texts: The list of pubchem IDs to be enriched.
+        Returns:
+            The list of enriched STRINGS
+        """
+        enriched_pubchem_ids = []
+        pubchem_cids = texts
+        for pubchem_cid in pubchem_cids:
+            try:
+                c = pcp.Compound.from_cid(pubchem_cid)
+            except pcp.BadRequestError:
+                enriched_pubchem_ids.append(None)
+                continue
+            enriched_pubchem_ids.append(c.isomeric_smiles)
+        return enriched_pubchem_ids
+    def enrich_documents_with_rag(self, texts, docs):
+        """
+        Enrich a list of input PubChem IDs with their STRINGS representation.
+        Args:
+            texts: The list of pubchem IDs to be enriched.
+            docs: None
+        Returns:
+            The list of enriched STRINGS
+        """
+        return self.enrich_documents(texts)

aiagents4pharma/talk2knowledgegraphs/utils/pubchem_utils.py ADDED Viewed

@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+Enrichment class for enriching PubChem IDs with their STRINGS representation.
+"""
+import logging
+import requests
+import hydra
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def drugbank_id2pubchem_cid(drugbank_id):
+    """
+    Convert DrugBank ID to PubChem CID.
+    Args:
+        drugbank_id: The DrugBank ID of the drug.
+    Returns:
+        The PubChem CID of the drug.
+    """
+    logger.log(logging.INFO, "Load Hydra configuration for PubChem ID conversion.")
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name='config',
+                            overrides=['utils/pubchem_utils=default'])
+        cfg = cfg.utils.pubchem_utils
+    # Prepare the URL
+    pubchem_url_for_drug = cfg.drugbank_id_to_pubchem_cid_url + drugbank_id + '/JSON'
+    # Get the data
+    response = requests.get(pubchem_url_for_drug, timeout=60)
+    data = response.json()
+    # Extract the PubChem CID
+    cid = None
+    for substance in data.get("PC_Substances", []):
+        for compound in substance.get("compound", []):
+            if "id" in compound and "type" in compound["id"] and compound["id"]["type"] == 1:
+                cid = compound["id"].get("id", {}).get("cid")
+                break
+    return cid

aiagents4pharma/talk2scholars/agents/main_agent.py CHANGED Viewed

@@ -6,28 +6,17 @@ Main agent for the talk2scholars app using ReAct pattern.
 This module implements a hierarchical agent system where a supervisor agent
 routes queries to specialized sub-agents. It follows the LangGraph patterns
 for multi-agent systems and implements proper state management.
-The main components are:
-1. Supervisor node with ReAct pattern for intelligent routing.
-2. S2 agent node for handling academic paper queries.
-3. Shared state management via Talk2Scholars.
-4. Hydra-based configuration system.
-Example:
-    app = get_app("thread_123", "gpt-4o-mini")
-    result = app.invoke({
-        "messages": [("human", "Find papers about AI agents")]
-    })
 """
 import logging
 from typing import Literal, Callable
+from pydantic import BaseModel
 import hydra
 from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_openai import ChatOpenAI
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph import END, START, StateGraph
-from langgraph.prebuilt import create_react_agent
 from langgraph.types import Command
 from ..agents import s2_agent
 from ..state.state_talk2scholars import Talk2Scholars
@@ -39,13 +28,13 @@ logger = logging.getLogger(__name__)
 def get_hydra_config():
     """
-    Loads and returns the Hydra configuration for the main agent.
+    Loads the Hydra configuration for the main agent.
-    This function fetches the configuration settings for the Talk2Scholars
-    agent, ensuring that all required parameters are properly initialized.
+    This function initializes the Hydra configuration system and retrieves the settings
+    for the `Talk2Scholars` agent, ensuring that all required parameters are loaded.
     Returns:
-        Any: The configuration object for the main agent.
+        DictConfig: The configuration object containing parameters for the main agent.
     """
     with hydra.initialize(version_base=None, config_path="../configs"):
         cfg = hydra.compose(
@@ -54,116 +43,127 @@ def get_hydra_config():
     return cfg.agents.talk2scholars.main_agent
-def make_supervisor_node(llm: BaseChatModel, thread_id: str) -> Callable:
+def make_supervisor_node(llm_model: BaseChatModel, thread_id: str) -> Callable:
     """
-    Creates and returns a supervisor node for intelligent routing using the ReAct pattern.
+    Creates the supervisor node responsible for routing user queries to the appropriate sub-agents.
-    This function initializes a supervisor agent that processes user queries and
-    determines the appropriate sub-agent for further processing. It applies structured
-    reasoning to manage conversations and direct queries based on context.
+    This function initializes the routing logic by leveraging the system and router prompts defined
+    in the Hydra configuration. The supervisor determines whether to
+    call a sub-agent (like `s2_agent`)
+    or directly generate a response using the language model.
     Args:
-        llm (BaseChatModel): The language model used by the supervisor agent.
-        thread_id (str): Unique identifier for the conversation session.
+        llm_model (BaseChatModel): The language model used for decision-making.
+        thread_id (str): Unique identifier for the current conversation session.
     Returns:
-        Callable: A function that acts as the supervisor node in the LangGraph workflow.
-    Example:
-        supervisor = make_supervisor_node(llm, "thread_123")
-        workflow.add_node("supervisor", supervisor)
+        Callable: The supervisor node function that processes user queries and
+        decides the next step.
     """
-    logger.info("Loading Hydra configuration for Talk2Scholars main agent.")
     cfg = get_hydra_config()
-    logger.info("Hydra configuration loaded with values: %s", cfg)
+    logger.info("Hydra configuration for Talk2Scholars main agent loaded: %s", cfg)
+    members = ["s2_agent"]
+    options = ["FINISH"] + members
+    # Define system prompt for general interactions
+    system_prompt = cfg.system_prompt
+    # Define router prompt for routing to sub-agents
+    router_prompt = cfg.router_prompt
+    class Router(BaseModel):
+        """Worker to route to next. If no workers needed, route to FINISH."""
-    # Create the supervisor agent using the main agent's configuration
-    supervisor_agent = create_react_agent(
-        llm,
-        tools=[],  # Will add sub-agents later
-        state_modifier=cfg.main_agent,
-        state_schema=Talk2Scholars,
-        checkpointer=MemorySaver(),
-    )
+        next: Literal[*options]
     def supervisor_node(
         state: Talk2Scholars,
-    ) -> Command[Literal["s2_agent", "__end__"]]:
+    ) -> Command:
         """
-        Processes user queries and determines the next step in the conversation flow.
+        Handles the routing logic for the supervisor agent.
-        This function examines the conversation state and decides whether to forward
-        the query to a specialized sub-agent (e.g., S2 agent) or conclude the interaction.
+        This function determines the next agent to invoke based on the router prompt response.
+        If no further processing is required, it generates an AI response using the system prompt.
         Args:
-            state (Talk2Scholars): The current state of the conversation, containing
-                messages, papers, and metadata.
+            state (Talk2Scholars): The current conversation state, including messages
+            exchanged so far.
         Returns:
-            Command: The next action to be executed, along with updated state data.
-        Example:
-            result = supervisor_node(current_state)
-            next_step = result.goto
+            Command: A command dictating whether to invoke a sub-agent or generate a final response.
         """
-        logger.info(
-            "Supervisor node called - Messages count: %d",
-            len(state["messages"]),
-        )
-        # Invoke the supervisor agent with configurable thread_id
-        result = supervisor_agent.invoke(
-            state, {"configurable": {"thread_id": thread_id}}
-        )
-        goto = "s2_agent"
-        logger.info("Supervisor agent completed with result: %s", result)
+        messages = [SystemMessage(content=router_prompt)] + state["messages"]
+        structured_llm = llm_model.with_structured_output(Router)
+        response = structured_llm.invoke(messages)
+        goto = response.next
+        logger.info("Routing to: %s, Thread ID: %s", goto, thread_id)
+        if goto == "FINISH":
+            goto = END  # Using END from langgraph.graph
+            # If no agents were called, and the last message was
+            # from the user, call the LLM to respond to the user
+            # with a slightly different system prompt.
+            if isinstance(messages[-1], HumanMessage):
+                response = llm_model.invoke(
+                    [
+                        SystemMessage(content=system_prompt),
+                    ]
+                    + messages[1:]
+                )
+                return Command(
+                    goto=goto, update={"messages": AIMessage(content=response.content)}
+                )
+        # Go to the requested agent
         return Command(goto=goto)
     return supervisor_node
-def get_app(thread_id: str, llm_model: str = "gpt-4o-mini") -> StateGraph:
+def get_app(
+    thread_id: str,
+    llm_model: BaseChatModel = ChatOpenAI(model="gpt-4o-mini", temperature=0),
+):
     """
-    Initializes and returns the LangGraph application with a hierarchical agent system.
+    Initializes and returns the LangGraph-based hierarchical agent system.
-    This function sets up the full agent architecture, including the supervisor
-    and sub-agents, and compiles the LangGraph workflow for handling user queries.
+    This function constructs the agent workflow by defining nodes for the supervisor
+    and sub-agents. It compiles the graph using `StateGraph` to enable structured
+    conversational workflows.
     Args:
-        thread_id (str): Unique identifier for the conversation session.
-        llm_model (str, optional): The language model to be used. Defaults to "gpt-4o-mini".
+        thread_id (str): A unique session identifier for tracking conversation state.
+        llm_model (BaseChatModel, optional): The language model used for query processing.
+            Defaults to `ChatOpenAI(model="gpt-4o-mini", temperature=0)`.
     Returns:
-        StateGraph: A compiled LangGraph application ready for query invocation.
+        StateGraph: A compiled LangGraph application that can process user queries.
     Example:
-        app = get_app("thread_123")
-        result = app.invoke(initial_state)
+        >>> app = get_app("thread_123")
+        >>> result = app.invoke(initial_state)
     """
     cfg = get_hydra_config()
     def call_s2_agent(
         state: Talk2Scholars,
-    ) -> Command[Literal["supervisor", "__end__"]]:
+    ) -> Command[Literal["supervisor"]]:
         """
-        Calls the Semantic Scholar (S2) agent to process academic paper queries.
+        Invokes the Semantic Scholar (S2) agent to retrieve relevant research papers.
-        This function invokes the S2 agent, retrieves relevant research papers,
-        and updates the conversation state accordingly.
+        This function calls the `s2_agent` and updates the conversation state with retrieved
+        academic papers. The agent uses Semantic Scholar's API to find papers based on
+        user queries.
         Args:
-            state (Talk2Scholars): The current conversation state, including user queries
-                and any previously retrieved papers.
+            state (Talk2Scholars): The current state of the conversation, containing messages
+                and any previous search results.
         Returns:
-            Command: The next action to execute, along with updated messages and papers.
+            Command: A command to update the conversation state with the retrieved papers
+                and return control to the supervisor node.
         Example:
-            result = call_s2_agent(current_state)
-            next_step = result.goto
+            >>> result = call_s2_agent(current_state)
+            >>> next_step = result.goto
         """
-        logger.info("Calling S2 agent with state: %s", state)
+        logger.info("Calling S2 agent")
         app = s2_agent.get_app(thread_id, llm_model)
         # Invoke the S2 agent, passing state,
@@ -177,31 +177,30 @@ def get_app(thread_id: str, llm_model: str = "gpt-4o-mini") -> StateGraph:
                 }
             },
         )
-        logger.info("S2 agent completed with response: %s", response)
+        logger.info("S2 agent completed with response")
         return Command(
-            goto=END,
             update={
                 "messages": response["messages"],
                 "papers": response.get("papers", {}),
                 "multi_papers": response.get("multi_papers", {}),
+                "last_displayed_papers": response.get("last_displayed_papers", {}),
             },
+            # Always return to supervisor
+            goto="supervisor",
         )
     # Initialize LLM
-    logger.info("Using OpenAI model %s with temperature %s", llm_model, cfg.temperature)
-    llm = ChatOpenAI(model=llm_model, temperature=cfg.temperature)
+    logger.info("Using model %s with temperature %s", llm_model, cfg.temperature)
     # Build the graph
     workflow = StateGraph(Talk2Scholars)
-    supervisor = make_supervisor_node(llm, thread_id)
+    supervisor = make_supervisor_node(llm_model, thread_id)
+    # Add nodes
     workflow.add_node("supervisor", supervisor)
     workflow.add_node("s2_agent", call_s2_agent)
+    # Add edges
     workflow.add_edge(START, "supervisor")
-    workflow.add_edge("s2_agent", END)
-    # Compile the graph without initial state
+    # Compile the workflow
     app = workflow.compile(checkpointer=MemorySaver())
     logger.info("Main agent workflow compiled")
     return app

aiagents4pharma 1.20.1__py3-none-any.whl → 1.22.0__py3-none-any.whl

aiagents4pharma 1.20.1py3-none-any.whl → 1.22.0py3-none-any.whl