PyPI - aiagents4pharma - Versions diffs - 1.8.0__py3-none-any.whl → 1.15.0__py3-none-any.whl - Mend

aiagents4pharma 1.8.0py3-none-any.whl → 1.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

aiagents4pharma/__init__.py CHANGED Viewed

@@ -1,7 +1,11 @@
-'''
+"""
 This file is used to import aiagents4pharma modules.
-'''
+"""
-from . import talk2biomodels
-from . import talk2cells
-from . import talk2knowledgegraphs
+from . import (
+    configs,
+    talk2biomodels,
+    talk2cells,
+    talk2competitors,
+    talk2knowledgegraphs,
+)

aiagents4pharma/configs/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+'''
+Import all the modules in the package
+'''
+from . import talk2biomodels

aiagents4pharma/configs/config.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+defaults:
+  - _self_
+  - talk2biomodels/agents/t2b_agent: default
+  - talk2biomodels/tools/ask_question: default

aiagents4pharma/configs/talk2biomodels/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+'''
+Import all the modules in the package
+'''
+from . import agents
+from . import tools

aiagents4pharma/configs/talk2biomodels/agents/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+'''
+Import all the modules in the package
+'''
+from . import t2b_agent

aiagents4pharma/configs/talk2biomodels/agents/t2b_agent/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/configs/talk2biomodels/agents/t2b_agent/default.yaml ADDED Viewed

@@ -0,0 +1,14 @@
+_target_: talk2biomodels.agents.t2b_agent.get_app
+state_modifier: >
+  You are Talk2BioModels agent.
+  If the user asks for the uploaded model,
+  then pass the use_uploaded_model argument
+  as True. If the user asks for simulation
+  or param_scan or steady state, suggest a
+  value for the `experiment_name` argument.
+  If the user asks question related to the
+  uploaded document/pdf/article/document,
+  use the tool `query_article` to answer the
+  question. Please note that the `experiment_name`
+  argument may be unrelated to the question asked.

aiagents4pharma/configs/talk2biomodels/tools/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+'''
+Import all the modules in the package
+'''
+from . import ask_question

aiagents4pharma/configs/talk2biomodels/tools/ask_question/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2biomodels/__init__.py CHANGED Viewed

@@ -3,3 +3,6 @@ This file is used to import the models and tools.
 '''
 from . import models
 from . import tools
+from . import agents
+from . import states
+from . import api

aiagents4pharma/talk2biomodels/agents/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+'''
+This file is used to import all the modules in the package.
+'''
+# import everything from the module
+from . import t2b_agent

aiagents4pharma/talk2biomodels/agents/t2b_agent.py ADDED Viewed

@@ -0,0 +1,96 @@
+#/usr/bin/env python3
+'''
+This is the agent file for the Talk2BioModels agent.
+'''
+import logging
+from typing import Annotated
+import hydra
+from langchain_openai import ChatOpenAI
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import create_react_agent, ToolNode, InjectedState
+from ..tools.search_models import SearchModelsTool
+from ..tools.get_modelinfo import GetModelInfoTool
+from ..tools.simulate_model import SimulateModelTool
+from ..tools.custom_plotter import CustomPlotterTool
+from ..tools.get_annotation import GetAnnotationTool
+from ..tools.ask_question import AskQuestionTool
+from ..tools.parameter_scan import ParameterScanTool
+from ..tools.steady_state import SteadyStateTool
+from ..tools.query_article import QueryArticle
+from ..states.state_talk2biomodels import Talk2Biomodels
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model='gpt-4o-mini'):
+    '''
+    This function returns the langraph app.
+    '''
+    def agent_t2b_node(state: Annotated[dict, InjectedState]):
+        '''
+        This function calls the model.
+        '''
+        logger.log(logging.INFO, "Calling t2b_agent node with thread_id %s", uniq_id)
+        response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return response
+    # Define the tools
+    tools = ToolNode([
+                    SimulateModelTool(),
+                    AskQuestionTool(),
+                    CustomPlotterTool(),
+                    SearchModelsTool(),
+                    GetModelInfoTool(),
+                    SteadyStateTool(),
+                    ParameterScanTool(),
+                    GetAnnotationTool(),
+                    QueryArticle()
+                ])
+    # Define the model
+    llm = ChatOpenAI(model=llm_model, temperature=0)
+    # Load hydra configuration
+    logger.log(logging.INFO, "Load Hydra configuration for Talk2BioModels agent.")
+    with hydra.initialize(version_base=None, config_path="../../configs"):
+        cfg = hydra.compose(config_name='config',
+                            overrides=['talk2biomodels/agents/t2b_agent=default'])
+        cfg = cfg.talk2biomodels.agents.t2b_agent
+    logger.log(logging.INFO, "state_modifier: %s", cfg.state_modifier)
+    # Create the agent
+    model = create_react_agent(
+                llm,
+                tools=tools,
+                state_schema=Talk2Biomodels,
+                state_modifier=cfg.state_modifier,
+                checkpointer=MemorySaver()
+            )
+    # Define a new graph
+    workflow = StateGraph(Talk2Biomodels)
+    # Define the two nodes we will cycle between
+    workflow.add_node("agent_t2b", agent_t2b_node)
+    # Set the entrypoint as the first node
+    # This means that this node is the first one called
+    workflow.add_edge(START, "agent_t2b")
+    # Initialize memory to persist state between graph runs
+    checkpointer = MemorySaver()
+    # Finally, we compile it!
+    # This compiles it into a LangChain Runnable,
+    # meaning you can use it as you would any other runnable.
+    # Note that we're (optionally) passing the memory
+    # when compiling the graph
+    app = workflow.compile(checkpointer=checkpointer)
+    logger.log(logging.INFO,
+               "Compiled the graph with thread_id %s and llm_model %s",
+               uniq_id,
+               llm_model)
+    return app

aiagents4pharma/talk2biomodels/api/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+'''
+This file is used to import the modules in the package.
+'''
+from . import uniprot
+from . import ols
+from . import kegg

aiagents4pharma/talk2biomodels/api/kegg.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+This module contains the API for fetching Kegg database
+"""
+import re
+from typing import List, Dict
+import requests
+def fetch_from_api(base_url: str, query: str) -> str:
+    """Fetch data from the given API endpoint."""
+    try:
+        response = requests.get(base_url + query, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching data for query {query}: {e}")
+        return ""
+def fetch_kegg_names(ids: List[str], batch_size: int = 10) -> Dict[str, str]:
+    """
+    Fetch the names of multiple KEGG entries using the KEGG REST API in batches.
+    Args:
+        ids (List[str]): List of KEGG IDs.
+        batch_size (int): Maximum number of IDs to include in a single request.
+    Returns:
+        Dict[str, str]: A mapping of KEGG IDs to their names.
+    """
+    if not ids:
+        return {}
+    base_url = "https://rest.kegg.jp/get/"
+    entry_name_map = {}
+    # Process IDs in batches
+    for i in range(0, len(ids), batch_size):
+        batch = ids[i:i + batch_size]
+        query = "+".join(batch)
+        entry_data = fetch_from_api(base_url, query)
+        # if not entry_data:
+        #     continue
+        entries = entry_data.split("///")
+        for entry in entries:
+            if not entry.strip():
+                continue
+            lines = entry.strip().split("\n")
+            entry_line = next((line for line in lines
+                                if line.startswith("ENTRY")), None)
+            name_line = next((line for line in lines
+                                if line.startswith("NAME")), None)
+            # if not entry_line and not name_line:
+            #     continue
+            entry_id = entry_line.split()[1]
+            # Split multiple names in the NAME field and clean them
+            names = [
+                re.sub(r'[^a-zA-Z0-9\s]', '', name).strip()
+                for name in name_line.replace("NAME", "").strip().split(";")
+            ]
+            # Join cleaned names into a single string
+            entry_name_map[entry_id] = " ".join(names).strip()
+    return entry_name_map
+def fetch_kegg_annotations(data: List[Dict[str, str]],
+                           batch_size: int = 10) -> Dict[str, Dict[str, str]]:
+    """Fetch KEGG entry descriptions grouped by database type."""
+    grouped_data = {}
+    for entry in data:
+        db_type = entry["Database"].lower()
+        grouped_data.setdefault(db_type, []).append(entry["Id"])
+    results = {}
+    for db_type, ids in grouped_data.items():
+        results[db_type] = fetch_kegg_names(ids, batch_size=batch_size)
+    return results
+# def get_protein_name_or_label(data: List[Dict[str, str]],
+#                               batch_size: int = 10) -> Dict[str, Dict[str, str]]:
+#     """Fetch descriptions for KEGG-related identifiers."""
+#     return fetch_kegg_annotations(data, batch_size=batch_size)

aiagents4pharma/talk2biomodels/api/ols.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+This module contains the API for fetching ols database
+"""
+from typing import List, Dict
+import requests
+def fetch_from_ols(term: str) -> str:
+    """
+    Fetch the label for a single term from OLS.
+    Args:
+        term (str): The term in the format "ONTOLOGY:TERM_ID".
+    Returns:
+        str: The label for the term or an error message.
+    """
+    try:
+        ontology, _ = term.split(":")
+        base_url = f"https://www.ebi.ac.uk/ols4/api/ontologies/{ontology.lower()}/terms"
+        params = {"obo_id": term}
+        response = requests.get(
+            base_url,
+            params=params,
+            headers={"Accept": "application/json"},
+            timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+        label = '-'
+        # Extract and return the label
+        if "_embedded" in data and "terms" in data["_embedded"] \
+             and len(data["_embedded"]["terms"]) > 0:
+            label = data["_embedded"]["terms"][0].get("label", "Label not found")
+        return label
+    except (requests.exceptions.RequestException, KeyError, IndexError) as e:
+        return f"Error: {str(e)}"
+def fetch_ols_labels(terms: List[str]) -> Dict[str, str]:
+    """
+    Fetch labels for multiple terms from OLS.
+    Args:
+        terms (List[str]): A list of terms in the format "ONTOLOGY:TERM_ID".
+    Returns:
+        Dict[str, str]: A mapping of term IDs to their labels or error messages.
+    """
+    results = {}
+    for term in terms:
+        results[term] = fetch_from_ols(term)
+    return results
+def search_ols_labels(data: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]:
+    """
+    Fetch OLS annotations grouped by ontology type.
+    Args:
+        data (List[Dict[str, str]]): A list of dictionaries containing 'Id' and 'Database'.
+    Returns:
+        Dict[str, Dict[str, str]]: A mapping of ontology type to term labels.
+    """
+    grouped_data = {}
+    for entry in data:
+        ontology = entry["Database"].lower()
+        grouped_data.setdefault(ontology, []).append(entry["Id"])
+    results = {}
+    for ontology, terms in grouped_data.items():
+        results[ontology] = fetch_ols_labels(terms)
+    return results

aiagents4pharma/talk2biomodels/api/uniprot.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+This module contains the API for fetching uniprot database
+"""
+from typing import List, Dict
+import requests
+def search_uniprot_labels(identifiers: List[str]) -> Dict[str, str]:
+    """
+    Fetch protein names or labels for a list of UniProt identifiers by making sequential requests.
+    Args:
+        identifiers (List[str]): A list of UniProt identifiers.
+    Returns:
+        Dict[str, str]: A mapping of UniProt identifiers to their protein names or error messages.
+    """
+    results = {}
+    base_url = "https://www.uniprot.org/uniprot/"
+    for identifier in identifiers:
+        url = f"{base_url}{identifier}.json"
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            protein_name = (
+                data.get('proteinDescription', {})
+                .get('recommendedName', {})
+                .get('fullName', {})
+                .get('value', 'Name not found')
+            )
+            results[identifier] = protein_name
+        except requests.exceptions.RequestException as e:
+            results[identifier] = f"Error: {str(e)}"
+    return results

aiagents4pharma/talk2biomodels/models/basico_model.py CHANGED Viewed

@@ -48,52 +48,49 @@ class BasicoModel(SysBioModel):
             self.name = basico.model_info.get_model_name(model=self.copasi_model)
         return self
-    def simulate(self,
-                 parameters: Optional[Dict[str, Union[float, int]]] = None,
-                 duration: Union[int, float] = 10,
-                 interval: int = 10
-                 ) -> pd.DataFrame:
+    def update_parameters(self, parameters: Dict[str, Union[float, int]]) -> None:
+        """
+        Update model parameters with new values.
+        """
+        # Update parameters in the model
+        for param_name, param_value in parameters.items():
+            # check if the param_name is not None
+            if param_name is None:
+                continue
+            # if param is a kinetic parameter
+            df_all_params = basico.model_info.get_parameters(model=self.copasi_model)
+            if param_name in df_all_params.index.tolist():
+                basico.model_info.set_parameters(name=param_name,
+                                            exact=True,
+                                            initial_value=param_value,
+                                            model=self.copasi_model)
+            # if param is a species
+            else:
+                basico.model_info.set_species(name=param_name,
+                                            exact=True,
+                                            initial_concentration=param_value,
+                                            model=self.copasi_model)
+    def simulate(self, duration: Union[int, float] = 10, interval: int = 10) -> pd.DataFrame:
         """
         Simulate the COPASI model over a specified range of time points.
         Args:
-            parameters: Dictionary of model parameters to update before simulation.
             duration: Duration of the simulation in time units.
             interval: Interval between time points in the simulation.
         Returns:
             Pandas DataFrame with time-course simulation results.
         """
-        # Update parameters in the model
-        if parameters:
-            for param_name, param_value in parameters.items():
-                # check if the param_name is not None
-                if param_name is None:
-                    continue
-                # if param is a kinectic parameter
-                df_all_params = basico.model_info.get_parameters(model=self.copasi_model)
-                if param_name in df_all_params.index.tolist():
-                    basico.model_info.set_parameters(name=param_name,
-                                                exact=True,
-                                                initial_value=param_value,
-                                                model=self.copasi_model)
-                # if param is a species
-                else:
-                    basico.model_info.set_species(name=param_name,
-                                                exact=True,
-                                                initial_concentration=param_value,
-                                                model=self.copasi_model)
         # Run the simulation and return results
         df_result = basico.run_time_course(model=self.copasi_model,
                                         intervals=interval,
                                         duration=duration)
-        # Replace curly braces in column headers with square brackets
-        # Because curly braces in the world of LLMS are used for
-        # structured output
-        df_result.columns = df_result.columns.str.replace('{', '[', regex=False).\
-                    str.replace('}', ']', regex=False)
+        # # Replace curly braces in column headers with square brackets
+        # # Because curly braces in the world of LLMS are used for
+        # # structured output
+        # df_result.columns = df_result.columns.str.replace('{', '[', regex=False).\
+        #             str.replace('}', ']', regex=False)
         # Reset the index
         df_result.reset_index(inplace=True)
         # Store the simulation results

aiagents4pharma/talk2biomodels/models/sys_bio_model.py CHANGED Viewed

@@ -35,18 +35,21 @@ class SysBioModel(ABC, BaseModel):
         Returns:
             dict: Dictionary with model metadata
         """
+    @abstractmethod
+    def update_parameters(self, parameters: Dict[str, Union[float, int]]) -> None:
+        """
+        Abstract method to update model parameters.
+        Args:
+            parameters: Dictionary of parameter values.
+        """
     @abstractmethod
-    def simulate(self,
-                 parameters: Dict[str, Union[float, int]],
-                 duration: Union[int, float]) -> List[float]:
+    def simulate(self, duration: Union[int, float]) -> List[float]:
         """
         Abstract method to run a simulation of the model.
-        This method should be implemented to simulate model
-        behavior based on the provided parameters.
         Args:
-            parameters: Dictionary of parameter values.
             duration: Duration of the simulation.
         Returns:

aiagents4pharma/talk2biomodels/states/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+'''
+This file is used to import all the modules in the package.
+'''
+# import everything from the module
+from . import state_talk2biomodels

aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+'''
+This is the state file for the Talk2BioModels agent.
+'''
+from typing import Annotated
+import operator
+from langgraph.prebuilt.chat_agent_executor import AgentState
+def add_data(data1: dict, data2: dict) -> dict:
+    """
+    A reducer function to merge two dictionaries.
+    """
+    left_idx_by_name = {data['name']: idx for idx, data in enumerate(data1)}
+    merged = data1.copy()
+    for data in data2:
+        idx = left_idx_by_name.get(data['name'])
+        if idx is not None:
+            merged[idx] = data
+        else:
+            merged.append(data)
+    return merged
+class Talk2Biomodels(AgentState):
+    """
+    The state for the Talk2BioModels agent.
+    """
+    llm_model: str
+    pdf_file_name: str
+    # A StateGraph may receive a concurrent updates
+    # which is not supported by the StateGraph. Hence,
+    # we need to add a reducer function to handle the
+    # concurrent updates.
+    # https://langchain-ai.github.io/langgraph/troubleshooting/errors/INVALID_CONCURRENT_GRAPH_UPDATE/
+    model_id: Annotated[list, operator.add]
+    sbml_file_path: Annotated[list, operator.add]
+    dic_simulated_data: Annotated[list[dict], add_data]
+    dic_scanned_data: Annotated[list[dict], add_data]
+    dic_steady_state_data: Annotated[list[dict], add_data]
+    dic_annotations_data : Annotated[list[dict], add_data]

aiagents4pharma/talk2biomodels/tests/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+This module contains the test cases.
+'''

aiagents4pharma/talk2biomodels/tests/test_api.py ADDED Viewed

@@ -0,0 +1,57 @@
+'''
+Test cases for Talk2Biomodels.
+'''
+from ..api.uniprot import search_uniprot_labels
+from ..api.ols import fetch_from_ols
+from ..api.kegg import fetch_kegg_names, fetch_from_api
+def test_search_uniprot_labels():
+    '''
+    Test the search_uniprot_labels function.
+    '''
+    # "P61764" = Positive result, "P0000Q" = negative result
+    identifiers = ["P61764", "P0000Q"]
+    results = search_uniprot_labels(identifiers)
+    assert results["P61764"] == "Syntaxin-binding protein 1"
+    assert results["P0000Q"].startswith("Error: 400")
+def test_fetch_from_ols():
+    '''
+    Test the fetch_from_ols function.
+    '''
+    term_1 = "GO:0005886" #Positive result
+    term_2 = "GO:ABC123" #Negative result
+    label_1 = fetch_from_ols(term_1)
+    label_2 = fetch_from_ols(term_2)
+    assert isinstance(label_1, str), f"Expected string, got {type(label_1)}"
+    assert isinstance(label_2, str), f"Expected string, got {type(label_2)}"
+    assert label_1 == "plasma membrane"
+    assert label_2.startswith("Error: 404")
+def test_fetch_kegg_names():
+    '''
+    Test the fetch_kegg_names function.
+    '''
+    ids = ["C00001", "C00002"]
+    results = fetch_kegg_names(ids)
+    assert results["C00001"] == "H2O"
+    assert results["C00002"] == "ATP"
+    # Try with an empty list
+    results = fetch_kegg_names([])
+    assert not results
+def test_fetch_from_api():
+    '''
+    Test the fetch_from_api function.
+    '''
+    base_url = "https://rest.kegg.jp/get/"
+    query = "C00001"
+    entry_data = fetch_from_api(base_url, query)
+    assert entry_data.startswith("ENTRY       C00001")
+    # Try with an invalid query
+    query = "C0000Q"
+    entry_data = fetch_from_api(base_url, query)
+    assert not entry_data

aiagents4pharma/talk2biomodels/tests/test_ask_question.py ADDED Viewed

@@ -0,0 +1,44 @@
+'''
+Test cases for Talk2Biomodels.
+'''
+from langchain_core.messages import HumanMessage, ToolMessage
+from ..agents.t2b_agent import get_app
+def test_ask_question_tool():
+    '''
+    Test the ask_question tool without the simulation results.
+    '''
+    unique_id = 12345
+    app = get_app(unique_id, llm_model='gpt-4o-mini')
+    config = {"configurable": {"thread_id": unique_id}}
+    ##########################################
+    # Test ask_question tool when simulation
+    # results are not available i.e. the
+    # simulation has not been run. In this
+    # case, the tool should return an error
+    ##########################################
+    # Update state
+    app.update_state(config, {"llm_model": "gpt-4o-mini"})
+    # Define the prompt
+    prompt = "Call the ask_question tool to answer the "
+    prompt += "question: What is the concentration of CRP "
+    prompt += "in serum at 1000 hours? The simulation name "
+    prompt += "is `simulation_name`."
+    # Invoke the tool
+    app.invoke(
+            {"messages": [HumanMessage(content=prompt)]},
+            config=config
+        )
+    # Get the messages from the current state
+    # and reverse the order
+    current_state = app.get_state(config)
+    reversed_messages = current_state.values["messages"][::-1]
+    # Loop through the reversed messages until a
+    # ToolMessage is found.
+    for msg in reversed_messages:
+        # Assert that the message is a ToolMessage
+        # and its status is "error"
+        if isinstance(msg, ToolMessage):
+            assert msg.status == "error"

aiagents4pharma 1.8.0__py3-none-any.whl → 1.15.0__py3-none-any.whl

aiagents4pharma 1.8.0py3-none-any.whl → 1.15.0py3-none-any.whl