PyPI - aiagents4pharma - Versions diffs - 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl - Mend

aiagents4pharma 1.18.0py3-none-any.whl → 1.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

aiagents4pharma/talk2knowledgegraphs/__init__.py CHANGED Viewed

@@ -1,5 +1,8 @@
 '''
 This file is used to import the datasets and utils.
 '''
+from . import agents
 from . import datasets
+from . import states
+from . import tools
 from . import utils

aiagents4pharma/talk2knowledgegraphs/agents/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+'''
+This file is used to import all the models in the package.
+'''
+from . import t2kg_agent

aiagents4pharma/talk2knowledgegraphs/agents/t2kg_agent.py ADDED Viewed

@@ -0,0 +1,85 @@
+'''
+This is the agent file for the Talk2KnowledgeGraphs agent.
+'''
+import logging
+from typing import Annotated
+import hydra
+from langchain_ollama import ChatOllama
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import create_react_agent, ToolNode, InjectedState
+from ..tools.subgraph_extraction import SubgraphExtractionTool
+from ..tools.subgraph_summarization import SubgraphSummarizationTool
+from ..tools.graphrag_reasoning import GraphRAGReasoningTool
+from ..states.state_talk2knowledgegraphs import Talk2KnowledgeGraphs
+# Initialize logger
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_app(uniq_id, llm_model: BaseChatModel=ChatOllama(model='llama3.2:1b', temperature=0.0)):
+    '''
+    This function returns the langraph app.
+    '''
+    def agent_t2kg_node(state: Annotated[dict, InjectedState]):
+        '''
+        This function calls the model.
+        '''
+        logger.log(logging.INFO, "Calling t2kg_agent node with thread_id %s", uniq_id)
+        response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
+        return response
+    # Load hydra configuration
+    logger.log(logging.INFO, "Load Hydra configuration for Talk2KnowledgeGraphs agent.")
+    with hydra.initialize(version_base=None, config_path="../configs"):
+        cfg = hydra.compose(config_name='config',
+                            overrides=['agents/t2kg_agent=default'])
+        cfg = cfg.agents.t2kg_agent
+    # Define the tools
+    subgraph_extraction = SubgraphExtractionTool()
+    subgraph_summarization = SubgraphSummarizationTool()
+    graphrag_reasoning = GraphRAGReasoningTool()
+    tools = ToolNode([
+                    subgraph_extraction,
+                    subgraph_summarization,
+                    graphrag_reasoning,
+                    ])
+    # Create the agent
+    model = create_react_agent(
+                llm_model,
+                tools=tools,
+                state_schema=Talk2KnowledgeGraphs,
+                state_modifier=cfg.state_modifier,
+                checkpointer=MemorySaver()
+            )
+    # Define a new graph
+    workflow = StateGraph(Talk2KnowledgeGraphs)
+    # Define the two nodes we will cycle between
+    workflow.add_node("agent_t2kg", agent_t2kg_node)
+    # Set the entrypoint as the first node
+    # This means that this node is the first one called
+    workflow.add_edge(START, "agent_t2kg")
+    # Initialize memory to persist state between graph runs
+    checkpointer = MemorySaver()
+    # Finally, we compile it!
+    # This compiles it into a LangChain Runnable,
+    # meaning you can use it as you would any other runnable.
+    # Note that we're (optionally) passing the memory
+    # when compiling the graph
+    app = workflow.compile(checkpointer=checkpointer)
+    logger.log(logging.INFO,
+               "Compiled the graph with thread_id %s and llm_model %s",
+               uniq_id,
+               llm_model)
+    return app

aiagents4pharma/talk2knowledgegraphs/configs/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+'''
+Import all the modules in the package
+'''
+from . import agents
+from . import tools
+from . import app

aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2knowledgegraphs/configs/agents/t2kg_agent/default.yaml ADDED Viewed

@@ -0,0 +1,62 @@
+_target_: agents.t2kg_agent.get_app
+state_modifier: >
+  You are talk2knowledgegraphs agent, a helpful assistant for reasoning over knowledge graphs.
+  User can ask questions related to the knowledge graphs, and you will provide the answers using
+  the provided tools as follows (if necessary):
+  [`subgraph_extraction`, `subgraph_summarization`, `graphrag_reasoning`].
+  **Tools Descriptions**:
+  - `subgraph_extraction`: Extract a subgraph from the knowledge graph that contains the relevant
+    information to answer the user's query. This tool can be used to provide a subgraph context
+    as a part of the reasoning process. The extracted subgraph should contain the most relevant
+    nodes and edges to the user's query in the form of a textualized subgraph.
+  - `subgraph_summarization`: Summarize the extracted textualized subgraph obtained from the
+    `subgraph_extraction` tool. This tool can be used to provide a concise and informative summary
+    of the subgraph to be used for reasoning as subgraph context. This tool highlights the most
+    important nodes and edges in the subgraph to respond to the user's request.
+  - `graphrag_reasoning`: Reason over the extracted textualized subgraph to answer the user's
+    prompt by also considering the context from the extracted subgraph and the retrieved
+    documents. User may also have a set of uploaded files that can be used to provide additional
+    information for reasoning. The history of previous conversations should be considered as well,
+    and you as an agent should provide which conversations can be included as chat history.
+  As an agent, you should approach each request by first understanding the user's query and then
+  following the appropriate steps to provide the best answer possible.
+  **Execution Steps**:
+  - Understand thoroughly the user's query and think over the best approach to answer it.
+  - You may not need to call any tool for each user's query. Use the related tool(s) as needed.
+    Think deeply whether it is necessary to call any tool to respond to the user's request.
+  - Call `subgraph_extraction` if there is any indication that the user needs to get the
+    information from the knowledge graph, which is not directly available as context in the prompt or
+    in the previous extracted subgraph.
+    If the user asks for subgraph extraction, suggest a value for the `extraction_name` argument.
+    You should always follow it with `subgraph_summarization` as the next tool to be invoked.
+  - Call `subgraph_summarization` tool to summarize the extracted subgraph and provide
+    a useful insights over the subgraph. This tool also has the ability to filter endotypes
+    in the forms of differentially expressed genes that are relevant to the input query. Make sure
+    to include the most relevant genes if the user provides endotype-related documents.
+    The summary of the subgraph will be stored as `graph_summary` in the state in which you can use
+    it for reasoning over the subgraph in the `graphrag_reasoning` tool afterwards.
+  - If the user asks follow-up questions related to the extracted subgraph, you should
+    call `subgraph_summarization` followed by `graphrag_reasoning` tools if you think
+    the answer can be retrieved from the previously extracted subgraph.
+  - Call `graphrag_reasoning` tool to reason over the extracted subgraph and documents.
+    Always perform reasoning over the extracted subgraph and documents to provide
+    the best possible answer to the user's query. Before calling this tool,
+    make sure you have access to the summarized subgraph obtained from `subgraph_summarization` tool.
+  - By default, if the user asks for a specific question about the extracted graph, you should
+    call `subgraph_summarization` followed by `graphrag_reasoning` if the most recent subgraphs
+    contain the relevant information to answer the user's question.
+    Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
+  - It is strongly recommended to avoid calling the same tool multiple times unless
+    it is necessary to get the correct and thorough answer to the user's request.
+  **Tool Calling Workflow Examples**:
+  - `subgraph_extraction` -> `subgraph_summarization` -> `graphrag_reasoning` when the user asks
+  for specific instructions to extract the subgraph and reason over it. Follow this order to
+  provide the most accurate and relevant information if you think the currently available context
+  is not enough to answer the user's question.
+  - `subgraph_summarization` -> `graphrag_reasoning` when the user asks for the previously extracted
+  subgraph. Use the summarized subgraph as the subgraph context in the `graphrag_reasoning` tool.
+  - Do not call `graphrag_reasoning` tool without calling `subgraph_summarization` tool first.

aiagents4pharma/talk2knowledgegraphs/configs/app/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+'''
+Import all the modules in the package
+'''
+from . import frontend

aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2knowledgegraphs/configs/app/frontend/default.yaml ADDED Viewed

@@ -0,0 +1,31 @@
+_target_: app.frontend.streamlit_app_talk2knowledgegraphs
+default_user: "talk2kg_user"
+data_package_allowed_file_types:
+  - "pdf"
+endotype_allowed_file_types:
+  - "pdf"
+upload_data_dir: "../files"
+kg_name: "PrimeKG"
+kg_pyg_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_pyg_graph.pkl"
+kg_text_path: "aiagents4pharma/talk2knowledgegraphs/tests/files/primekg_ibd_text_graph.pkl"
+openai_api_key: ${oc.env:OPENAI_API_KEY}
+openai_llms:
+  - "gpt-4o-mini"
+  - "gpt-4-turbo"
+  - "gpt-3.5-turbo"
+openai_embeddings:
+  - "text-embedding-3-small"
+ollama_llms:
+  - "llama3.2:1b"
+  - "llama3.2"
+  - "llama3.1"
+ollama_embeddings:
+  - "nomic-embed-text"
+temperature: 0.1
+streaming: False
+reasoning_subgraph_topk_nodes: 10
+reasoning_subgraph_topk_nodes_min: 1
+reasoning_subgraph_topk_nodes_max: 100
+reasoning_subgraph_topk_edges: 10
+reasoning_subgraph_topk_edges_min: 1
+reasoning_subgraph_topk_edges_max: 100

aiagents4pharma/talk2knowledgegraphs/configs/config.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+defaults:
+  - _self_
+  - agents/t2kg_agent: default
+  - tools/subgraph_extraction: default
+  - tools/subgraph_summarization: default
+  - tools/graphrag_reasoning: default
+  - app/frontend: default

aiagents4pharma/talk2knowledgegraphs/configs/tools/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+'''
+Import all the modules in the package
+'''
+from . import subgraph_extraction
+from . import subgraph_summarization
+from . import graphrag_reasoning

aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2knowledgegraphs/configs/tools/graphrag_reasoning/default.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+_target_: talk2knowledgegraphs.tools.graphrag_reasoning
+splitter_chunk_size: 1024
+splitter_chunk_overlap: 256
+retriever_search_type: "mmr"
+retriever_k: 3
+retriever_fetch_k: 10
+retriever_lambda_mult: 0.3
+prompt_graphrag_w_docs_context: >
+  Given a chat history and the latest user question, which might reference context
+  in the chat history, formulate a standalone question that can be understood
+  without the chat history. Do NOT answer the question, just reformulate it if needed
+  and otherwise return it as is.
+  Question: {input}
+prompt_graphrag_w_docs: >
+  You are talk2knowledgegraphs, a helpful assistant performing retrievel-augmented generation (RAG)
+  over knowledge graphs.
+  One of your tasks is to answer react-based questions by using the following pieces of
+  retrieved context to answer the question. You can leverage a summarization of the subgraph
+  and the retrieved documents to provide the best possible answer to the user's query.
+  Subgraph Summary: {subgraph_summary}
+  Context: {context}
+  Question: {input}

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_extraction/default.yaml ADDED Viewed

@@ -0,0 +1,43 @@
+_target_: talk2knowledgegraphs.tools.subgraph_extraction
+ollama_embeddings:
+  - "nomic-embed-text"
+temperature: 0.1
+streaming: False
+topk: 5
+topk_e: 5
+cost_e: 0.5
+c_const: 0.01
+root: -1
+num_clusters: 1
+pruning: "gw"
+verbosity_level: 0
+node_id_column: "node_id"
+node_attr_column: "node_attr"
+edge_src_column: "edge_src"
+edge_attr_column: "edge_attr"
+edge_dst_column: "edge_dst"
+prompt_endotype_filtering: >
+  You are talk2knowledgegraphs agent, a helpful assistant in filtering the most relevant endotype
+  for the subgraph extraction process.
+  Given the retrieved endotype documents, you need to filter the most relevant
+  endotype that will be used for the following reasoning process.
+  Only included a list of genes that exist in the provided documents
+  that are relevant to the input query.
+  For this task, you may modify your prompt to optimize the filtering process
+  based on factual informationbetween each gene in the documents and the input query.
+  Discover as many genes as possible that are relevant for enriching the subgraph extraction process.
+  You do not need to include any other information in the output.
+  Use the following output format:
+  [gene_1, gene_2, ..., gene_n]
+  {context}
+  Input: {input}
+prompt_endotype_addition: >
+   Include the following endotype for the subgraph extraction process:
+splitter_chunk_size: 64
+splitter_chunk_overlap: 16
+retriever_search_type: "mmr"
+retriever_k: 3
+retriever_fetch_k: 10
+retriever_lambda_mult: 0.3

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+'''
+Import all the modules in the package
+'''

aiagents4pharma/talk2knowledgegraphs/configs/tools/subgraph_summarization/default.yaml ADDED Viewed

@@ -0,0 +1,9 @@
+_target_: talk2knowledgegraphs.tools.subgraph_summarization
+prompt_subgraph_summarization: >
+  You are talk2knowledgegraphs agent, a helpful assistant in reasoning over biomedical knowledge graph.
+  Your task is to summarize the extracted textualized subgraph to provide a concise and informative
+  summary of the subgraph to be used for reasoning as subgraph context. You are responsible for
+  highlighting the most important nodes and edges in the subgraph to respond to the user's question.
+  Textualized Subgraph: {textualized_subgraph}
+  Question: {input}

aiagents4pharma/talk2knowledgegraphs/states/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+'''
+This file is used to import all the models in the package.
+'''
+from . import state_talk2knowledgegraphs

aiagents4pharma/talk2knowledgegraphs/states/state_talk2knowledgegraphs.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+This is the state file for the Talk2KnowledgeGraphs agent.
+"""
+from typing import Annotated
+# import operator
+from langchain_core.embeddings.embeddings import Embeddings
+from langchain_core.language_models.chat_models import BaseChatModel
+from langgraph.prebuilt.chat_agent_executor import AgentState
+def add_data(data1: dict, data2: dict) -> dict:
+    """
+    A reducer function to merge two dictionaries.
+    """
+    left_idx_by_name = {data["name"]: idx for idx, data in enumerate(data1)}
+    merged = data1.copy()
+    for data in data2:
+        idx = left_idx_by_name.get(data["name"])
+        if idx is not None:
+            merged[idx] = data
+        else:
+            merged.append(data)
+    return merged
+class Talk2KnowledgeGraphs(AgentState):
+    """
+    The state for the Talk2KnowledgeGraphs agent.
+    """
+    llm_model: BaseChatModel
+    embedding_model: Embeddings
+    uploaded_files: list
+    topk_nodes: int
+    topk_edges: int
+    dic_source_graph: Annotated[list[dict], add_data]
+    dic_extracted_graph: Annotated[list[dict], add_data]

aiagents4pharma/talk2knowledgegraphs/tests/test_agents_t2kg_agent.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""
+Test cases for agents/t2kg_agent.py
+"""
+import pytest
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from ..agents.t2kg_agent import get_app
+# Define the data path
+DATA_PATH = "aiagents4pharma/talk2knowledgegraphs/tests/files"
+@pytest.fixture(name="input_dict")
+def input_dict_fixture():
+    """
+    Input dictionary fixture.
+    """
+    input_dict = {
+        "llm_model": None,  # TBA for each test case
+        "embedding_model": None,  # TBA for each test case
+        "uploaded_files": [
+            {
+                "file_name": "adalimumab.pdf",
+                "file_path": f"{DATA_PATH}/adalimumab.pdf",
+                "file_type": "drug_data",
+                "uploaded_by": "VPEUser",
+                "uploaded_timestamp": "2024-11-05 00:00:00",
+            },
+            {
+                "file_name": "DGE_human_Colon_UC-vs-Colon_Control.pdf",
+                "file_path": f"{DATA_PATH}/DGE_human_Colon_UC-vs-Colon_Control.pdf",
+                "file_type": "endotype",
+                "uploaded_by": "VPEUser",
+                "uploaded_timestamp": "2024-11-05 00:00:00",
+            },
+        ],
+        "topk_nodes": 3,
+        "topk_edges": 3,
+        "dic_source_graph": [
+            {
+                "name": "PrimeKG",
+                "kg_pyg_path": f"{DATA_PATH}/primekg_ibd_pyg_graph.pkl",
+                "kg_text_path": f"{DATA_PATH}/primekg_ibd_text_graph.pkl",
+            }
+        ],
+        "dic_extracted_graph": []
+    }
+    return input_dict
+def test_t2kg_agent_openai(input_dict):
+    """
+    Test the T2KG agent using OpenAI model.
+    Args:
+        input_dict: Input dictionary
+    """
+    # Prepare LLM and embedding model
+    input_dict["llm_model"] = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
+    input_dict["embedding_model"] = OpenAIEmbeddings(model="text-embedding-3-small")
+    # Setup the app
+    unique_id = 12345
+    app = get_app(unique_id, llm_model=input_dict["llm_model"])
+    config = {"configurable": {"thread_id": unique_id}}
+    # Update state
+    app.update_state(
+        config,
+        input_dict,
+    )
+    prompt = """
+    Adalimumab is a fully human monoclonal antibody (IgG1)
+    that specifically binds to tumor necrosis factor-alpha (TNF-α), a pro-inflammatory cytokine.
+    I would like to get evidence from the knowledge graph about the mechanism of actions related to
+    Adalimumab in treating inflammatory bowel disease
+    (IBD). Please follow these steps:
+    - Extract a subgraph from the PrimeKG that contains information about Adalimumab.
+    - Summarize the extracted subgraph.
+    - Reason about the mechanism of action of Adalimumab in treating IBD.
+    Please set the extraction name for the extraction process as `subkg_12345`.
+    """
+    # Test the tool get_modelinfo
+    response = app.invoke({"messages": [HumanMessage(content=prompt)]}, config=config)
+    # Check assistant message
+    assistant_msg = response["messages"][-1].content
+    assert isinstance(assistant_msg, str)
+    # Check extracted subgraph dictionary
+    current_state = app.get_state(config)
+    dic_extracted_graph = current_state.values["dic_extracted_graph"][0]
+    assert isinstance(dic_extracted_graph, dict)
+    assert dic_extracted_graph["name"] == "subkg_12345"
+    assert dic_extracted_graph["graph_source"] == "PrimeKG"
+    assert dic_extracted_graph["topk_nodes"] == 3
+    assert dic_extracted_graph["topk_edges"] == 3
+    assert isinstance(dic_extracted_graph["graph_dict"], dict)
+    assert len(dic_extracted_graph["graph_dict"]["nodes"]) > 0
+    assert len(dic_extracted_graph["graph_dict"]["edges"]) > 0
+    assert isinstance(dic_extracted_graph["graph_text"], str)
+    # Check summarized subgraph
+    assert isinstance(dic_extracted_graph["graph_summary"], str)
+    # Check reasoning results
+    assert "Adalimumab" in assistant_msg
+    assert "TNF" in assistant_msg

aiagents4pharma 1.18.0__py3-none-any.whl → 1.19.0__py3-none-any.whl

aiagents4pharma 1.18.0py3-none-any.whl → 1.19.0py3-none-any.whl