PyPI - aiagents4pharma - Versions diffs - 1.13.1__tar.gz → 1.14.1__tar.gz - Mend

aiagents4pharma 1.13.1tar.gz → 1.14.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{aiagents4pharma-1.13.1 → aiagents4pharma-1.14.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: aiagents4pharma
-Version: 1.13.1
+Version: 1.14.1
 Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License

aiagents4pharma-1.14.1/aiagents4pharma/configs/config.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+defaults:
+  - _self_
+  - talk2biomodels/agents/t2b_agent: default
+  - talk2biomodels/tools/ask_question: default

{aiagents4pharma-1.13.1 → aiagents4pharma-1.14.1}/aiagents4pharma/configs/talk2biomodels/__init__.py RENAMED Viewed

@@ -3,3 +3,4 @@ Import all the modules in the package
 '''
 from . import agents
+from . import tools

{aiagents4pharma-1.13.1 → aiagents4pharma-1.14.1}/aiagents4pharma/configs/talk2biomodels/agents/t2b_agent/default.yaml RENAMED Viewed

@@ -4,6 +4,5 @@ state_modifier: >
   If the user asks for the uploaded model,
   then pass the use_uploaded_model argument
   as True. If the user asks for simulation
-  or steady state, suggest a value for the
-  `simulation_name` or `steadystate_name`
-  argument.
+  or param_scan or steady state, suggest a
+  value for the `experiment_name` argument.

aiagents4pharma-1.14.1/aiagents4pharma/configs/talk2biomodels/tools/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+'''
+Import all the modules in the package
+'''
+from . import ask_question

aiagents4pharma-1.14.1/aiagents4pharma/configs/talk2biomodels/tools/ask_question/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

{aiagents4pharma-1.13.1 → aiagents4pharma-1.14.1}/aiagents4pharma/talk2biomodels/__init__.py RENAMED Viewed

@@ -5,3 +5,4 @@ from . import models
 from . import tools
 from . import agents
 from . import states
+from . import api

{aiagents4pharma-1.13.1 → aiagents4pharma-1.14.1}/aiagents4pharma/talk2biomodels/agents/t2b_agent.py RENAMED Viewed

@@ -15,6 +15,7 @@ from ..tools.search_models import SearchModelsTool
 from ..tools.get_modelinfo import GetModelInfoTool
 from ..tools.simulate_model import SimulateModelTool
 from ..tools.custom_plotter import CustomPlotterTool
+from ..tools.get_annotation import GetAnnotationTool
 from ..tools.ask_question import AskQuestionTool
 from ..tools.parameter_scan import ParameterScanTool
 from ..tools.steady_state import SteadyStateTool
@@ -44,8 +45,9 @@ def get_app(uniq_id, llm_model='gpt-4o-mini'):
                     SearchModelsTool(),
                     GetModelInfoTool(),
                     SteadyStateTool(),
-                    ParameterScanTool()
-                    ])
+                    ParameterScanTool(),
+                    GetAnnotationTool()
+                ])
     # Define the model
     llm = ChatOpenAI(model=llm_model, temperature=0)

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/api/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+'''
+This file is used to import the modules in the package.
+'''
+from . import uniprot
+from . import ols
+from . import kegg

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/api/kegg.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+This module contains the API for fetching Kegg database
+"""
+import re
+from typing import List, Dict
+import requests
+def fetch_from_api(base_url: str, query: str) -> str:
+    """Fetch data from the given API endpoint."""
+    try:
+        response = requests.get(base_url + query, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching data for query {query}: {e}")
+        return ""
+def fetch_kegg_names(ids: List[str], batch_size: int = 10) -> Dict[str, str]:
+    """
+    Fetch the names of multiple KEGG entries using the KEGG REST API in batches.
+    Args:
+        ids (List[str]): List of KEGG IDs.
+        batch_size (int): Maximum number of IDs to include in a single request.
+    Returns:
+        Dict[str, str]: A mapping of KEGG IDs to their names.
+    """
+    if not ids:
+        return {}
+    base_url = "https://rest.kegg.jp/get/"
+    entry_name_map = {}
+    # Process IDs in batches
+    for i in range(0, len(ids), batch_size):
+        batch = ids[i:i + batch_size]
+        query = "+".join(batch)
+        entry_data = fetch_from_api(base_url, query)
+        # if not entry_data:
+        #     continue
+        entries = entry_data.split("///")
+        for entry in entries:
+            if not entry.strip():
+                continue
+            lines = entry.strip().split("\n")
+            entry_line = next((line for line in lines
+                                if line.startswith("ENTRY")), None)
+            name_line = next((line for line in lines
+                                if line.startswith("NAME")), None)
+            # if not entry_line and not name_line:
+            #     continue
+            entry_id = entry_line.split()[1]
+            # Split multiple names in the NAME field and clean them
+            names = [
+                re.sub(r'[^a-zA-Z0-9\s]', '', name).strip()
+                for name in name_line.replace("NAME", "").strip().split(";")
+            ]
+            # Join cleaned names into a single string
+            entry_name_map[entry_id] = " ".join(names).strip()
+    return entry_name_map
+def fetch_kegg_annotations(data: List[Dict[str, str]],
+                           batch_size: int = 10) -> Dict[str, Dict[str, str]]:
+    """Fetch KEGG entry descriptions grouped by database type."""
+    grouped_data = {}
+    for entry in data:
+        db_type = entry["Database"].lower()
+        grouped_data.setdefault(db_type, []).append(entry["Id"])
+    results = {}
+    for db_type, ids in grouped_data.items():
+        results[db_type] = fetch_kegg_names(ids, batch_size=batch_size)
+    return results
+# def get_protein_name_or_label(data: List[Dict[str, str]],
+#                               batch_size: int = 10) -> Dict[str, Dict[str, str]]:
+#     """Fetch descriptions for KEGG-related identifiers."""
+#     return fetch_kegg_annotations(data, batch_size=batch_size)

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/api/ols.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+This module contains the API for fetching ols database
+"""
+from typing import List, Dict
+import requests
+def fetch_from_ols(term: str) -> str:
+    """
+    Fetch the label for a single term from OLS.
+    Args:
+        term (str): The term in the format "ONTOLOGY:TERM_ID".
+    Returns:
+        str: The label for the term or an error message.
+    """
+    try:
+        ontology, _ = term.split(":")
+        base_url = f"https://www.ebi.ac.uk/ols4/api/ontologies/{ontology.lower()}/terms"
+        params = {"obo_id": term}
+        response = requests.get(
+            base_url,
+            params=params,
+            headers={"Accept": "application/json"},
+            timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+        label = '-'
+        # Extract and return the label
+        if "_embedded" in data and "terms" in data["_embedded"] \
+             and len(data["_embedded"]["terms"]) > 0:
+            label = data["_embedded"]["terms"][0].get("label", "Label not found")
+        return label
+    except (requests.exceptions.RequestException, KeyError, IndexError) as e:
+        return f"Error: {str(e)}"
+def fetch_ols_labels(terms: List[str]) -> Dict[str, str]:
+    """
+    Fetch labels for multiple terms from OLS.
+    Args:
+        terms (List[str]): A list of terms in the format "ONTOLOGY:TERM_ID".
+    Returns:
+        Dict[str, str]: A mapping of term IDs to their labels or error messages.
+    """
+    results = {}
+    for term in terms:
+        results[term] = fetch_from_ols(term)
+    return results
+def search_ols_labels(data: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]:
+    """
+    Fetch OLS annotations grouped by ontology type.
+    Args:
+        data (List[Dict[str, str]]): A list of dictionaries containing 'Id' and 'Database'.
+    Returns:
+        Dict[str, Dict[str, str]]: A mapping of ontology type to term labels.
+    """
+    grouped_data = {}
+    for entry in data:
+        ontology = entry["Database"].lower()
+        grouped_data.setdefault(ontology, []).append(entry["Id"])
+    results = {}
+    for ontology, terms in grouped_data.items():
+        results[ontology] = fetch_ols_labels(terms)
+    return results

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/api/uniprot.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+This module contains the API for fetching uniprot database
+"""
+from typing import List, Dict
+import requests
+def search_uniprot_labels(identifiers: List[str]) -> Dict[str, str]:
+    """
+    Fetch protein names or labels for a list of UniProt identifiers by making sequential requests.
+    Args:
+        identifiers (List[str]): A list of UniProt identifiers.
+    Returns:
+        Dict[str, str]: A mapping of UniProt identifiers to their protein names or error messages.
+    """
+    results = {}
+    base_url = "https://www.uniprot.org/uniprot/"
+    for identifier in identifiers:
+        url = f"{base_url}{identifier}.json"
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            protein_name = (
+                data.get('proteinDescription', {})
+                .get('recommendedName', {})
+                .get('fullName', {})
+                .get('value', 'Name not found')
+            )
+            results[identifier] = protein_name
+        except requests.exceptions.RequestException as e:
+            results[identifier] = f"Error: {str(e)}"
+    return results

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/states/state_talk2biomodels.py ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+'''
+This is the state file for the Talk2BioModels agent.
+'''
+from typing import Annotated
+import operator
+from langgraph.prebuilt.chat_agent_executor import AgentState
+def add_data(data1: dict, data2: dict) -> dict:
+    """
+    A reducer function to merge two dictionaries.
+    """
+    left_idx_by_name = {data['name']: idx for idx, data in enumerate(data1)}
+    merged = data1.copy()
+    for data in data2:
+        idx = left_idx_by_name.get(data['name'])
+        if idx is not None:
+            merged[idx] = data
+        else:
+            merged.append(data)
+    return merged
+class Talk2Biomodels(AgentState):
+    """
+    The state for the Talk2BioModels agent.
+    """
+    llm_model: str
+    # A StateGraph may receive a concurrent updates
+    # which is not supported by the StateGraph. Hence,
+    # we need to add a reducer function to handle the
+    # concurrent updates.
+    # https://langchain-ai.github.io/langgraph/troubleshooting/errors/INVALID_CONCURRENT_GRAPH_UPDATE/
+    model_id: Annotated[list, operator.add]
+    sbml_file_path: Annotated[list, operator.add]
+    dic_simulated_data: Annotated[list[dict], add_data]
+    dic_scanned_data: Annotated[list[dict], add_data]
+    dic_steady_state_data: Annotated[list[dict], add_data]
+    dic_annotations_data : Annotated[list[dict], add_data]

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/tests/test_api.py ADDED Viewed

@@ -0,0 +1,57 @@
+'''
+Test cases for Talk2Biomodels.
+'''
+from ..api.uniprot import search_uniprot_labels
+from ..api.ols import fetch_from_ols
+from ..api.kegg import fetch_kegg_names, fetch_from_api
+def test_search_uniprot_labels():
+    '''
+    Test the search_uniprot_labels function.
+    '''
+    # "P61764" = Positive result, "P0000Q" = negative result
+    identifiers = ["P61764", "P0000Q"]
+    results = search_uniprot_labels(identifiers)
+    assert results["P61764"] == "Syntaxin-binding protein 1"
+    assert results["P0000Q"].startswith("Error: 400")
+def test_fetch_from_ols():
+    '''
+    Test the fetch_from_ols function.
+    '''
+    term_1 = "GO:0005886" #Positive result
+    term_2 = "GO:ABC123" #Negative result
+    label_1 = fetch_from_ols(term_1)
+    label_2 = fetch_from_ols(term_2)
+    assert isinstance(label_1, str), f"Expected string, got {type(label_1)}"
+    assert isinstance(label_2, str), f"Expected string, got {type(label_2)}"
+    assert label_1 == "plasma membrane"
+    assert label_2.startswith("Error: 404")
+def test_fetch_kegg_names():
+    '''
+    Test the fetch_kegg_names function.
+    '''
+    ids = ["C00001", "C00002"]
+    results = fetch_kegg_names(ids)
+    assert results["C00001"] == "H2O"
+    assert results["C00002"] == "ATP"
+    # Try with an empty list
+    results = fetch_kegg_names([])
+    assert not results
+def test_fetch_from_api():
+    '''
+    Test the fetch_from_api function.
+    '''
+    base_url = "https://rest.kegg.jp/get/"
+    query = "C00001"
+    entry_data = fetch_from_api(base_url, query)
+    assert entry_data.startswith("ENTRY       C00001")
+    # Try with an invalid query
+    query = "C0000Q"
+    entry_data = fetch_from_api(base_url, query)
+    assert not entry_data

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/tests/test_ask_question.py ADDED Viewed

@@ -0,0 +1,44 @@
+'''
+Test cases for Talk2Biomodels.
+'''
+from langchain_core.messages import HumanMessage, ToolMessage
+from ..agents.t2b_agent import get_app
+def test_ask_question_tool():
+    '''
+    Test the ask_question tool without the simulation results.
+    '''
+    unique_id = 12345
+    app = get_app(unique_id, llm_model='gpt-4o-mini')
+    config = {"configurable": {"thread_id": unique_id}}
+    ##########################################
+    # Test ask_question tool when simulation
+    # results are not available i.e. the
+    # simulation has not been run. In this
+    # case, the tool should return an error
+    ##########################################
+    # Update state
+    app.update_state(config, {"llm_model": "gpt-4o-mini"})
+    # Define the prompt
+    prompt = "Call the ask_question tool to answer the "
+    prompt += "question: What is the concentration of CRP "
+    prompt += "in serum at 1000 hours? The simulation name "
+    prompt += "is `simulation_name`."
+    # Invoke the tool
+    app.invoke(
+            {"messages": [HumanMessage(content=prompt)]},
+            config=config
+        )
+    # Get the messages from the current state
+    # and reverse the order
+    current_state = app.get_state(config)
+    reversed_messages = current_state.values["messages"][::-1]
+    # Loop through the reversed messages until a
+    # ToolMessage is found.
+    for msg in reversed_messages:
+        # Assert that the message is a ToolMessage
+        # and its status is "error"
+        if isinstance(msg, ToolMessage):
+            assert msg.status == "error"

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/tests/test_get_annotation.py ADDED Viewed

@@ -0,0 +1,171 @@
+'''
+Test cases for Talk2Biomodels get_annotation tool.
+'''
+import random
+import pytest
+from langchain_core.messages import HumanMessage, ToolMessage
+from ..agents.t2b_agent import get_app
+from ..tools.get_annotation import prepare_content_msg
+@pytest.fixture(name="make_graph")
+def make_graph_fixture():
+    '''
+    Create an instance of the talk2biomodels agent.
+    '''
+    unique_id = random.randint(1000, 9999)
+    graph = get_app(unique_id)
+    config = {"configurable": {"thread_id": unique_id}}
+    return graph, config
+def test_no_model_provided(make_graph):
+    '''
+    Test the tool by not specifying any model.
+    We are testing a condition where the user
+    asks for annotations of all species without
+    specifying a model.
+    '''
+    app, config = make_graph
+    prompt = "Extract annotations of all species. Call the tool get_annotation."
+    app.invoke({"messages": [HumanMessage(content=prompt)]},
+                        config=config
+                    )
+    current_state = app.get_state(config)
+    # Assert that the state key model_id is empty.
+    assert current_state.values["model_id"] == []
+def test_specific_species_provided(make_graph):
+    '''
+    Test the tool by providing a specific species name.
+    We are testing a condition where the user asks for annotations
+    of a specific species in a specific model.
+    '''
+    # Test with a valid species name
+    app, config = make_graph
+    prompt = "Extract annotations of species IL6 in model 537."
+    app.invoke(
+                {"messages": [HumanMessage(content=prompt)]},
+                config=config
+            )
+    current_state = app.get_state(config)
+    # print (current_state.values["dic_annotations_data"])
+    dic_annotations_data = current_state.values["dic_annotations_data"]
+    # The assert statement checks if IL6 is present in the returned annotations.
+    assert dic_annotations_data[0]['data']["Species Name"][0] == "IL6"
+    # Test with an invalid species name
+    app, config = make_graph
+    prompt = "Extract annotations of species NADH in model 537."
+    app.invoke(
+        {"messages": [HumanMessage(content=prompt)]},
+        config=config
+    )
+    current_state = app.get_state(config)
+    reversed_messages = current_state.values["messages"][::-1]
+    # Loop through the reversed messages until a
+    # ToolMessage is found.
+    test_condition = False
+    for msg in reversed_messages:
+        # Assert that the one of the messages is a ToolMessage
+        # and its artifact is None.
+        if isinstance(msg, ToolMessage) and msg.name == "get_annotation":
+            #If a ToolMessage exists and artifact is None (meaning no valid annotation was found)
+            #and the rejected species (NADH) is mentioned, the test passes.
+            if msg.artifact is None and 'NADH' in msg.content:
+                #If artifact is None, it means no annotation was found
+                # (likely due to an invalid species).
+                #If artifact contains data, the tool successfully retrieved annotations.
+                test_condition = True
+                break
+    # assert test_condition
+    assert test_condition, "Expected rejection message for NADH but did not find it."
+    # Test with an invalid species name and a valid species name
+    app, config = make_graph
+    prompt = "Extract annotations of species NADH, NAD, and IL7 in model 64."
+    app.invoke(
+        {"messages": [HumanMessage(content=prompt)]},
+        config=config
+    )
+    current_state = app.get_state(config)
+    # dic_annotations_data = current_state.values["dic_annotations_data"]
+    reversed_messages = current_state.values["messages"][::-1]
+    # Loop through the reversed messages until a
+    # ToolMessage is found.
+    artifact_was_none = False
+    for msg in reversed_messages:
+        # Assert that the one of the messages is a ToolMessage
+        # and its artifact is None.
+        if isinstance(msg, ToolMessage) and msg.name == "get_annotation":
+            # print (msg.artifact, msg.content)
+            if msg.artifact is True and 'IL7' in msg.content:
+                artifact_was_none = True
+                break
+    assert artifact_was_none
+def test_all_species_annotations(make_graph):
+    '''
+    Test the tool by asking for annotations of all species is specific models.
+    Here, we test the tool with three models since they have different use cases:
+        - model 12 contains a species with no URL provided.
+        - model 20 contains a species without description.
+        - model 56 contains a species with database outside of KEGG, UniProt, and OLS.
+    We are testing a condition where the user asks for annotations
+    of all species in a specific model.
+    '''
+    # Loop through the models and test the tool
+    # for each model's unique use case.
+    for model_id in [12, 20, 56]:
+        app, config = make_graph
+        prompt = f"Extract annotations of all species model {model_id}."
+        # Test the tool get_modelinfo
+        app.invoke({"messages": [HumanMessage(content=prompt)]},
+                            config=config
+                        )
+        current_state = app.get_state(config)
+        reversed_messages = current_state.values["messages"][::-1]
+        # Coveres all of the use cases for the expecetd sting on all the species
+        test_condition = False
+        for msg in reversed_messages:
+            # Skip messages that are not ToolMessages and those that are not
+            # from the get_annotation tool.
+            if not isinstance(msg, ToolMessage) or msg.name != "get_annotation":
+                continue
+            if model_id == 12:
+                # Extact the first and second description of the LacI protein
+                # We already know that the first or second description is missing ('-')
+                dic_annotations_data = current_state.values["dic_annotations_data"][0]
+                first_descp_laci_protein = dic_annotations_data['data']['Description'][0]
+                second_descp_laci_protein = dic_annotations_data['data']['Description'][1]
+                # Expect a successful extraction (artifact is True) and that the content
+                # matches what is returned by prepare_content_msg for species.
+                # And that the first or second description of the LacI protein is missing.
+                if (msg.artifact is True and msg.content == prepare_content_msg([],[])
+                    and msg.status=="success" and (first_descp_laci_protein == '-' or
+                                                    second_descp_laci_protein == '-')):
+                    test_condition = True
+                    break
+            if model_id == 20:
+                # Expect an error message containing a note
+                # that species extraction failed.
+                if ("Unable to extract species from the model"
+                    in msg.content and msg.status == "error"):
+                    test_condition = True
+                    break
+            if model_id == 56:
+                # Expect a successful extraction (artifact is True) and that the content
+                # matches for for missing description ['ORI'].
+                if (msg.artifact is True and
+                msg.content == prepare_content_msg([],['ORI'])
+                and msg.status == "success"):
+                    test_condition = True
+                    break
+        assert test_condition # Expected output is validated

aiagents4pharma-1.14.1/aiagents4pharma/talk2biomodels/tests/test_getmodelinfo.py ADDED Viewed

@@ -0,0 +1,26 @@
+'''
+Test cases for Talk2Biomodels get_modelinfo tool.
+'''
+from langchain_core.messages import HumanMessage
+from ..agents.t2b_agent import get_app
+def test_get_modelinfo_tool():
+    '''
+    Test the get_modelinfo tool.
+    '''
+    unique_id = 12345
+    app = get_app(unique_id)
+    config = {"configurable": {"thread_id": unique_id}}
+    # Update state
+    app.update_state(config,
+      {"sbml_file_path": ["aiagents4pharma/talk2biomodels/tests/BIOMD0000000449_url.xml"]})
+    prompt = "Extract all relevant information from the uploaded model."
+    # Test the tool get_modelinfo
+    response = app.invoke(
+                        {"messages": [HumanMessage(content=prompt)]},
+                        config=config
+                    )
+    assistant_msg = response["messages"][-1].content
+    # Check if the assistant message is a string
+    assert isinstance(assistant_msg, str)

aiagents4pharma 1.13.1__tar.gz → 1.14.1__tar.gz

aiagents4pharma 1.13.1tar.gz → 1.14.1tar.gz