PyPI - agent-starter-pack - Versions diffs - 0.3.3__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

agent-starter-pack 0.3.3py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (255) hide show

agents/langgraph_base_react/notebooks/evaluating_langgraph_agent.ipynb → agent_starter_pack/agents/adk_base/notebooks/evaluating_adk_agent.ipynb RENAMED Viewed

@@ -8,7 +8,7 @@
       },
       "outputs": [],
       "source": [
-        "# Copyright 2024 Google LLC\n",
+        "# Copyright 2025 Google LLC\n",
         "#\n",
         "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
         "# you may not use this file except in compliance with the License.\n",
@@ -29,26 +29,26 @@
         "id": "JAPoU8Sm5E6e"
       },
       "source": [
-        "# Evaluating Agents - Evaluate a LangGraph agent with Vertex AI Gen AI Evaluation\n",
+        "# Evaluate your ADK agent using Vertex AI Gen AI Evaluation service\n",
         "\n",
         "<table align=\"left\">\n",
         "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\">\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
         "      <img width=\"32px\" src=\"https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg\" alt=\"Google Colaboratory logo\"><br> Open in Colab\n",
         "    </a>\n",
         "  </td>\n",
         "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%2Fevaluating_langgraph_agent.ipynb\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%2Fevaluating_adk_agent.ipynb\">\n",
         "      <img width=\"32px\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" alt=\"Google Cloud Colab Enterprise logo\"><br> Open in Colab Enterprise\n",
         "    </a>\n",
         "  </td>\n",
         "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
         "      <img src=\"https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg\" alt=\"Vertex AI logo\"><br> Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
         "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\">\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
         "      <img width=\"32px\" src=\"https://www.svgrepo.com/download/217753/github.svg\" alt=\"GitHub logo\"><br> View on GitHub\n",
         "    </a>\n",
         "  </td>\n",
@@ -58,23 +58,23 @@
         "\n",
         "<b>Share to:</b>\n",
         "\n",
-        "<a href=\"https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\" target=\"_blank\">\n",
+        "<a href=\"https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
         "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg\" alt=\"LinkedIn logo\">\n",
         "</a>\n",
         "\n",
-        "<a href=\"https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\" target=\"_blank\">\n",
+        "<a href=\"https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
         "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg\" alt=\"Bluesky logo\">\n",
         "</a>\n",
         "\n",
-        "<a href=\"https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\" target=\"_blank\">\n",
+        "<a href=\"https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
         "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg\" alt=\"X logo\">\n",
         "</a>\n",
         "\n",
-        "<a href=\"https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\" target=\"_blank\">\n",
+        "<a href=\"https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
         "  <img width=\"20px\" src=\"https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png\" alt=\"Reddit logo\">\n",
         "</a>\n",
         "\n",
-        "<a href=\"https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_langgraph_agent.ipynb\" target=\"_blank\">\n",
+        "<a href=\"https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
         "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg\" alt=\"Facebook logo\">\n",
         "</a>"
       ]
@@ -85,9 +85,9 @@
         "id": "84f0f73a0f76"
       },
       "source": [
-        "| | |\n",
-        "|-|-|\n",
-        "| Authors | [Ivan Nardini](https://github.com/inardini) [Naveksha Sood](https://github.com/navekshasood)|"
+        "| Author(s) |\n",
+        "| --- |\n",
+        "| [Ivan Nardini](https://github.com/inardini) |"
       ]
     },
     {
@@ -98,23 +98,17 @@
       "source": [
         "## Overview\n",
         "\n",
-        "Just like any Generative AI application, AI agents require thorough evaluation to ensure they perform reliably and effectively. This evaluation should happen both in real-time (online) and on large datasets of test cases (offline). Developers building agent applications face a significant challenge in evaluating their performance. Both subjective (human feedback) and objective (measurable metrics) evaluations are essential for building trust in agent behavior.\n",
+        "Agent Development Kit (ADK in short) is a flexible and modular open source framework for developing and deploying AI agents. While ADK has its own evaluation module, using Vertex AI Gen AI Evaluation provides a toolkit of quality controlled and explainable methods and metrics to evaluate any generative model or application, including agents, and benchmark the evaluation results against your own judgment, using your own evaluation criteria.\n",
         "\n",
-        "Vertex AI Model Evaluation provides a toolkit of quality controlled and explainable methods and metrics to evaluate any generative model or application, including agents, and benchmark the evaluation results against your own judgment, using your own evaluation criteria.\n",
-        "\n",
-        "This tutorial shows how to evaluate a LangGraph agent using Vertex AI Gen AI Evaluation for agent evaluation.\n",
-        "\n",
-        "The tutorial uses the following Google Cloud services and resources:\n",
-        "\n",
-        "*  Vertex AI Gen AI Evaluation\n",
+        "This tutorial shows how to evaluate an ADK agent using Vertex AI Gen AI Evaluation for agent evaluation.\n",
         "\n",
         "The steps performed include:\n",
         "\n",
-        "* Build local agent using LangGraph\n",
+        "* Build local agent using ADK\n",
         "* Prepare Agent Evaluation dataset\n",
         "* Single tool usage evaluation\n",
         "* Trajectory evaluation\n",
-        "* Response evaluation\n"
+        "* Response evaluation"
       ]
     },
     {
@@ -132,7 +126,7 @@
         "id": "No17Cw5hgx12"
       },
       "source": [
-        "### Install Vertex AI SDK and other required packages\n"
+        "### Install Google Gen AI SDK and other required packages\n"
       ]
     },
     {
@@ -143,50 +137,8 @@
       },
       "outputs": [],
       "source": [
-        "%pip install --upgrade --user --quiet \"google-cloud-aiplatform[evaluation]\" \\\n",
-        "    \"langchain_google_vertexai\" \\\n",
-        "    \"langgraph\" \\\n",
-        "    \"cloudpickle==3.0.0\" \\\n",
-        "    \"pydantic==2.7.4\" \\\n",
-        "    \"requests\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "R5Xep4W9lq-Z"
-      },
-      "source": [
-        "### Restart runtime\n",
-        "\n",
-        "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n",
-        "\n",
-        "The restart might take a minute or longer. After it's restarted, continue to the next step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "XRvKdaPDTznN"
-      },
-      "outputs": [],
-      "source": [
-        "import IPython\n",
-        "\n",
-        "app = IPython.Application.instance()\n",
-        "app.kernel.do_shutdown(True)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SbmM4z7FOBpM"
-      },
-      "source": [
-        "<div class=\"alert alert-block alert-warning\">\n",
-        "<b>⚠️ The kernel is going to restart. In Colab or Colab Enterprise, you might see an error message that says \"Your session crashed for an unknown reason.\" This is expected. Wait until it's finished before continuing to the next step. ⚠️</b>\n",
-        "</div>\n"
+        "%pip install --upgrade --quiet 'google-adk'\n",
+        "%pip install --upgrade --quiet 'google-cloud-aiplatform[evaluation]'"
       ]
     },
     {
@@ -222,7 +174,7 @@
         "id": "DF4l8DTdWgPY"
       },
       "source": [
-        "### Set Google Cloud project information and initialize Vertex AI SDK\n",
+        "### Set Google Cloud project information\n",
         "\n",
         "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
         "\n",
@@ -243,13 +195,21 @@
         "import vertexai\n",
         "\n",
         "PROJECT_ID = \"[your-project-id]\"  # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n",
-        "\n",
         "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n",
         "    PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n",
         "\n",
         "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")\n",
         "\n",
-        "EXPERIMENT_NAME = \"evaluate-langgraph-agent\"  # @param {type:\"string\"}\n",
+        "BUCKET_NAME = \"[your-bucket-name]\"  # @param {type: \"string\", placeholder: \"[your-bucket-name]\", isTemplate: true}\n",
+        "BUCKET_URI = f\"gs://{BUCKET_NAME}\"\n",
+        "\n",
+        "!gsutil mb -l {LOCATION} {BUCKET_URI}\n",
+        "\n",
+        "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = PROJECT_ID\n",
+        "os.environ[\"GOOGLE_CLOUD_LOCATION\"] = LOCATION\n",
+        "os.environ[\"GOOGLE_GENAI_USE_VERTEXAI\"] = \"True\"\n",
+        "\n",
+        "EXPERIMENT_NAME = \"evaluate-adk-agent\"  # @param {type:\"string\"}\n",
         "\n",
         "vertexai.init(project=PROJECT_ID, location=LOCATION, experiment=EXPERIMENT_NAME)"
       ]
@@ -273,23 +233,25 @@
       },
       "outputs": [],
       "source": [
+        "import json\n",
+        "import asyncio\n",
+        "\n",
         "# General\n",
         "import random\n",
         "import string\n",
-        "from typing import Literal\n",
+        "from typing import Any\n",
         "\n",
         "from IPython.display import HTML, Markdown, display\n",
+        "from google.adk.agents import Agent\n",
+        "\n",
+        "# Build agent with adk\n",
+        "from google.adk.events import Event\n",
+        "from google.adk.runners import Runner\n",
+        "from google.adk.sessions import InMemorySessionService\n",
         "\n",
         "# Evaluate agent\n",
         "from google.cloud import aiplatform\n",
-        "from langchain.load import dump as langchain_load_dump\n",
-        "\n",
-        "# Build agent\n",
-        "from langchain_core.messages import BaseMessage, HumanMessage\n",
-        "from langchain_core.tools import tool\n",
-        "from langchain_google_vertexai import ChatVertexAI\n",
-        "from langgraph.graph import END, MessageGraph\n",
-        "from langgraph.prebuilt import ToolNode\n",
+        "from google.genai import types\n",
         "import pandas as pd\n",
         "import plotly.graph_objects as go\n",
         "from vertexai.preview.evaluation import EvalTask\n",
@@ -324,65 +286,62 @@
         "    return \"\".join(random.choices(string.ascii_lowercase + string.digits, k=length))\n",
         "\n",
         "\n",
-        "def parse_messages_to_output_dictionary(messages: list[dict]) -> dict:\n",
-        "    \"\"\"Parse response and function calls from a list of messages in the constructor format.\"\"\"\n",
+        "def parse_adk_output_to_dictionary(events: list[Event], *, as_json: bool = False):\n",
+        "    \"\"\"\n",
+        "    Parse ADK event output into a structured dictionary format,\n",
+        "    with the predicted trajectory dumped as a JSON string.\n",
         "\n",
-        "    final_output = {\n",
-        "        \"response\": \"No AI response found in the message history.\",\n",
-        "        \"predicted_trajectory\": [],\n",
-        "    }\n",
+        "    \"\"\"\n",
         "\n",
-        "    # Process each message\n",
-        "    function_calls = []\n",
-        "    for message in messages:\n",
-        "        # Check if it's a Tool message which contains the actual response\n",
-        "        if message.get(\"type\") == \"constructor\" and \"ToolMessage\" in message.get(\n",
-        "            \"id\", []\n",
-        "        ):\n",
-        "            final_output[\"response\"] = message[\"kwargs\"][\"content\"]\n",
+        "    final_response = \"\"\n",
+        "    trajectory = []\n",
         "\n",
-        "        # Check if it's an AI message to get tool calls\n",
-        "        elif message.get(\"type\") == \"constructor\" and \"AIMessage\" in message.get(\n",
-        "            \"id\", []\n",
-        "        ):\n",
-        "            tool_calls = message[\"kwargs\"].get(\"tool_calls\", [])\n",
-        "            for tool_call in tool_calls:\n",
-        "                if tool_call:\n",
-        "                    function_calls.append(\n",
-        "                        {\n",
-        "                            \"tool_name\": tool_call.get(\"name\"),\n",
-        "                            \"tool_input\": tool_call.get(\"args\"),\n",
-        "                        }\n",
-        "                    )\n",
+        "    for event in events:\n",
+        "        if not getattr(event, \"content\", None) or not getattr(event.content, \"parts\", None):\n",
+        "            continue\n",
+        "        for part in event.content.parts:\n",
+        "            if getattr(part, \"function_call\", None):\n",
+        "                info = {\n",
+        "                    \"tool_name\": part.function_call.name,\n",
+        "                    \"tool_input\": dict(part.function_call.args),\n",
+        "                }\n",
+        "                if info not in trajectory:\n",
+        "                    trajectory.append(info)\n",
+        "            if event.content.role == \"model\" and getattr(part, \"text\", None):\n",
+        "                final_response = part.text.strip()\n",
+        "\n",
+        "    if as_json:\n",
+        "        trajectory_out = json.dumps(trajectory)\n",
+        "    else:\n",
+        "        trajectory_out = trajectory\n",
         "\n",
-        "    final_output[\"predicted_trajectory\"] = function_calls\n",
-        "    return final_output\n",
+        "    return {\"response\": final_response, \"predicted_trajectory\": trajectory_out}\n",
         "\n",
         "\n",
         "def format_output_as_markdown(output: dict) -> str:\n",
         "    \"\"\"Convert the output dictionary to a formatted markdown string.\"\"\"\n",
-        "    markdown = \"### AI Response\\n\"\n",
-        "    markdown += f\"{output['response']}\\n\\n\"\n",
-        "\n",
+        "    markdown = \"### AI Response\\n\" + output[\"response\"] + \"\\n\\n\"\n",
         "    if output[\"predicted_trajectory\"]:\n",
         "        markdown += \"### Function Calls\\n\"\n",
         "        for call in output[\"predicted_trajectory\"]:\n",
         "            markdown += f\"- **Function**: `{call['tool_name']}`\\n\"\n",
-        "            markdown += \"  - **Arguments**:\\n\"\n",
+        "            markdown += \"  - **Arguments**\\n\"\n",
         "            for key, value in call[\"tool_input\"].items():\n",
         "                markdown += f\"    - `{key}`: `{value}`\\n\"\n",
-        "\n",
         "    return markdown\n",
         "\n",
         "\n",
         "def display_eval_report(eval_result: pd.DataFrame) -> None:\n",
         "    \"\"\"Display the evaluation results.\"\"\"\n",
-        "    metrics_df = pd.DataFrame.from_dict(eval_result.summary_metrics, orient=\"index\").T\n",
         "    display(Markdown(\"### Summary Metrics\"))\n",
-        "    display(metrics_df)\n",
-        "\n",
-        "    display(Markdown(f\"### Row-wise Metrics\"))\n",
-        "    display(eval_result.metrics_table)\n",
+        "    display(\n",
+        "        pd.DataFrame(\n",
+        "            eval_result.summary_metrics.items(), columns=[\"metric\", \"value\"]\n",
+        "        )\n",
+        "    )\n",
+        "    if getattr(eval_result, \"metrics_table\", None) is not None:\n",
+        "        display(Markdown(\"### Row‑wise Metrics\"))\n",
+        "        display(eval_result.metrics_table.head())\n",
         "\n",
         "\n",
         "def display_drilldown(row: pd.Series) -> None:\n",
@@ -528,9 +487,9 @@
         "id": "bDaa2Mtsifmq"
       },
       "source": [
-        "## Build LangGraph agent\n",
+        "## Build ADK agent\n",
         "\n",
-        "Build your application using LangGraph, including the Gemini model, custom tools that you define and a router to control the conversational flow."
+        "Build your application using ADK, including the Gemini model and custom tools that you define."
       ]
     },
     {
@@ -552,7 +511,6 @@
       },
       "outputs": [],
       "source": [
-        "@tool\n",
         "def get_product_details(product_name: str):\n",
         "    \"\"\"Gathers basic details about a product.\"\"\"\n",
         "    details = {\n",
@@ -565,7 +523,6 @@
         "    return details.get(product_name, \"Product details not found.\")\n",
         "\n",
         "\n",
-        "@tool\n",
         "def get_product_price(product_name: str):\n",
         "    \"\"\"Gathers price about a product.\"\"\"\n",
         "    details = {\n",
@@ -578,45 +535,6 @@
         "    return details.get(product_name, \"Product price not found.\")"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "be70714d9fae"
-      },
-      "source": [
-        "### Define router\n",
-        "\n",
-        "Set up a router to direct conversation flow by selecting the appropriate tool based on user input or interaction state.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "516b5108d327"
-      },
-      "outputs": [],
-      "source": [
-        "def router(\n",
-        "    state: list[BaseMessage],\n",
-        ") -> Literal[\"get_product_details\", \"get_product_price\", \"__end__\"]:\n",
-        "    \"\"\"Initiates product details or price retrieval if the user asks for a product.\"\"\"\n",
-        "    # Get the tool_calls from the last message in the conversation history.\n",
-        "    tool_calls = state[-1].tool_calls\n",
-        "\n",
-        "    # If there are any tool_calls\n",
-        "    if tool_calls:\n",
-        "        # Check the function name in the first tool call\n",
-        "        function_name = tool_calls[0].get(\"name\")\n",
-        "        if function_name == \"get_product_price\":\n",
-        "            return \"get_product_price\"\n",
-        "        else:\n",
-        "            return \"get_product_details\"\n",
-        "    else:\n",
-        "        # End the conversation flow.\n",
-        "        return \"__end__\""
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -636,7 +554,7 @@
       },
       "outputs": [],
       "source": [
-        "llm = \"gemini-1.5-pro\""
+        "model = \"gemini-2.0-flash\""
       ]
     },
     {
@@ -656,30 +574,53 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "dAFdi7SujGP8"
+        "id": "gD5OB44g4sc3"
       },
       "outputs": [],
       "source": [
-        "def agent_parsed_outcome(input):\n",
-        "\n",
-        "    model = ChatVertexAI(model=llm)\n",
-        "    builder = MessageGraph()\n",
-        "\n",
-        "    model_with_tools = model.bind_tools([get_product_details, get_product_price])\n",
-        "    builder.add_node(\"tools\", model_with_tools)\n",
-        "\n",
-        "    tool_node = ToolNode([get_product_details, get_product_price])\n",
-        "    builder.add_node(\"get_product_details\", tool_node)\n",
-        "    builder.add_node(\"get_product_price\", tool_node)\n",
-        "    builder.add_edge(\"get_product_details\", END)\n",
-        "    builder.add_edge(\"get_product_price\", END)\n",
-        "\n",
-        "    builder.set_entry_point(\"tools\")\n",
-        "    builder.add_conditional_edges(\"tools\", router)\n",
-        "\n",
-        "    app = builder.compile()\n",
-        "    chat_history = langchain_load_dump.dumpd(app.invoke(HumanMessage(input)))\n",
-        "    return parse_messages_to_output_dictionary(chat_history)"
+        "async def agent_parsed_outcome(query):\n",
+        "   app_name = \"product_research_app\"\n",
+        "   user_id = \"user_one\"\n",
+        "   session_id = \"session_one\"\n",
+        "   \n",
+        "   product_research_agent = Agent(\n",
+        "       name=\"ProductResearchAgent\",\n",
+        "       model=model,\n",
+        "       description=\"An agent that performs product research.\",\n",
+        "       instruction=f\"\"\"\n",
+        "       Analyze this user request: '{query}'.\n",
+        "       If the request is about price, use get_product_price tool.\n",
+        "       Otherwise, use get_product_details tool to get product information.\n",
+        "       \"\"\",\n",
+        "       tools=[get_product_details, get_product_price],\n",
+        "   )\n",
+        "\n",
+        "   session_service = InMemorySessionService()\n",
+        "   await session_service.create_session(\n",
+        "       app_name=app_name, user_id=user_id, session_id=session_id\n",
+        "   )\n",
+        "\n",
+        "   runner = Runner(\n",
+        "       agent=product_research_agent, app_name=app_name, session_service=session_service\n",
+        "   )\n",
+        "\n",
+        "   content = types.Content(role=\"user\", parts=[types.Part(text=query)])\n",
+        "   events = [event async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content)]\n",
+        "   \n",
+        "   return parse_adk_output_to_dictionary(events)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# --- Sync wrapper for Vertex‑AI evaluation\n",
+        "def agent_parsed_outcome_sync(prompt: str):\n",
+        "    result = asyncio.run(agent_parsed_outcome(prompt))\n",
+        "    result[\"predicted_trajectory\"] = json.dumps(result[\"predicted_trajectory\"])\n",
+        "    return result"
       ]
     },
     {
@@ -701,7 +642,7 @@
       },
       "outputs": [],
       "source": [
-        "response = agent_parsed_outcome(input=\"Get product details for shoes\")\n",
+        "response = await agent_parsed_outcome(query=\"Get product details for shoes\")\n",
         "display(Markdown(format_output_as_markdown(response)))"
       ]
     },
@@ -713,7 +654,7 @@
       },
       "outputs": [],
       "source": [
-        "response = agent_parsed_outcome(input=\"Get product price for shoes\")\n",
+        "response = await agent_parsed_outcome(query=\"Get product price for shoes\")\n",
         "display(Markdown(format_output_as_markdown(response)))"
       ]
     },
@@ -723,7 +664,7 @@
         "id": "aOGPePsorpUl"
       },
       "source": [
-        "## Evaluating a LangGraph agent with Vertex AI Gen AI Evaluation\n",
+        "## Evaluating a ADK agent with Vertex AI Gen AI Evaluation\n",
         "\n",
         "When working with AI agents, it's important to keep track of their performance and how well they're working. You can look at this in two main ways: **monitoring** and **observability**.\n",
         "\n",
@@ -777,7 +718,7 @@
         "        \"Get product details and price for shoes\",\n",
         "        \"Get product details for speaker?\",\n",
         "    ],\n",
-        "    \"reference_trajectory\": [\n",
+        "    \"predicted_trajectory\": [\n",
         "        [\n",
         "            {\n",
         "                \"tool_name\": \"get_product_price\",\n",
@@ -899,10 +840,11 @@
         "    dataset=eval_sample_dataset,\n",
         "    metrics=single_tool_usage_metrics,\n",
         "    experiment=EXPERIMENT_NAME,\n",
+        "    output_uri_prefix=BUCKET_URI + \"/single-metric-eval\",\n",
         ")\n",
         "\n",
         "single_tool_call_eval_result = single_tool_call_eval_task.evaluate(\n",
-        "    runnable=agent_parsed_outcome, experiment_run_name=EXPERIMENT_RUN\n",
+        "    runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
         ")\n",
         "\n",
         "display_eval_report(single_tool_call_eval_result)"
@@ -1003,11 +945,14 @@
         "EXPERIMENT_RUN = f\"trajectory-{get_id()}\"\n",
         "\n",
         "trajectory_eval_task = EvalTask(\n",
-        "    dataset=eval_sample_dataset, metrics=trajectory_metrics, experiment=EXPERIMENT_NAME\n",
+        "    dataset=eval_sample_dataset,\n",
+        "    metrics=trajectory_metrics,\n",
+        "    experiment=EXPERIMENT_NAME,\n",
+        "    output_uri_prefix=BUCKET_URI + \"/multiple-metric-eval\",\n",
         ")\n",
         "\n",
         "trajectory_eval_result = trajectory_eval_task.evaluate(\n",
-        "    runnable=agent_parsed_outcome, experiment_run_name=EXPERIMENT_RUN\n",
+        "    runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
         ")\n",
         "\n",
         "display_eval_report(trajectory_eval_result)"
@@ -1028,7 +973,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "sLVRdN5llA0h"
+        "id": "z7-LdM3mLBtk"
       },
       "outputs": [],
       "source": [
@@ -1039,7 +984,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "erYYZEaaTNjJ"
+        "id": "sLVRdN5llA0h"
       },
       "outputs": [],
       "source": [
@@ -1107,11 +1052,14 @@
         "EXPERIMENT_RUN = f\"response-{get_id()}\"\n",
         "\n",
         "response_eval_task = EvalTask(\n",
-        "    dataset=eval_sample_dataset, metrics=response_metrics, experiment=EXPERIMENT_NAME\n",
+        "    dataset=eval_sample_dataset,\n",
+        "    metrics=response_metrics,\n",
+        "    experiment=EXPERIMENT_NAME,\n",
+        "    output_uri_prefix=BUCKET_URI + \"/response-metric-eval\",\n",
         ")\n",
         "\n",
         "response_eval_result = response_eval_task.evaluate(\n",
-        "    runnable=agent_parsed_outcome, experiment_run_name=EXPERIMENT_RUN\n",
+        "    runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
         ")\n",
         "\n",
         "display_eval_report(response_eval_result)"
@@ -1120,7 +1068,7 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "WOP9hW-rTUIU"
+        "id": "JtewTwiwg9qH"
       },
       "source": [
         "#### Visualize evaluation results\n",
@@ -1292,10 +1240,13 @@
         "    dataset=eval_sample_dataset,\n",
         "    metrics=response_tool_metrics,\n",
         "    experiment=EXPERIMENT_NAME,\n",
+        "    output_uri_prefix=BUCKET_URI + \"/reasoning-metric-eval\",\n",
         ")\n",
         "\n",
         "response_eval_tool_result = response_eval_tool_task.evaluate(\n",
-        "    runnable=agent_parsed_outcome, experiment_run_name=EXPERIMENT_RUN\n",
+        "    # Uncomment the line below if you are providing the agent with an unparsed dataset\n",
+        "    #runnable=agent_parsed_outcome_sync, \n",
+        "    experiment_run_name=EXPERIMENT_RUN\n",
         ")\n",
         "\n",
         "display_eval_report(response_eval_tool_result)"
@@ -1323,6 +1274,21 @@
         "display_dataframe_rows(response_eval_tool_result.metrics_table, num_rows=3)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tdVhCURXMdLG"
+      },
+      "outputs": [],
+      "source": [
+        "plot_bar_plot(\n",
+        "    response_eval_tool_result,\n",
+        "    title=\"Response Metrics\",\n",
+        "    metrics=[f\"{metric}/mean\" for metric in response_tool_metrics],\n",
+        ")"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1436,15 +1402,22 @@
         "        ],\n",
         "    ],\n",
         "    \"response\": [\n",
-        "        500,\n",
-        "        50,\n",
+        "        \"500\",\n",
+        "        \"50\",\n",
         "        \"A super fast and light usb charger\",\n",
-        "        100,\n",
+        "        \"100\",\n",
         "        \"A voice-controlled smart speaker that plays music, sets alarms, and controls smart home devices.\",\n",
         "    ],\n",
         "}\n",
         "\n",
-        "byod_eval_sample_dataset = pd.DataFrame(byod_eval_data)"
+        "byod_eval_sample_dataset = pd.DataFrame(byod_eval_data)\n",
+        "byod_eval_sample_dataset[\"predicted_trajectory\"] = byod_eval_sample_dataset[\n",
+        "    \"predicted_trajectory\"\n",
+        "].apply(json.dumps)\n",
+        "byod_eval_sample_dataset[\"reference_trajectory\"] = byod_eval_sample_dataset[\n",
+        "    \"reference_trajectory\"\n",
+        "].apply(json.dumps)\n",
+        "byod_eval_sample_dataset[\"response\"] = byod_eval_sample_dataset[\"response\"].apply(json.dumps)"
       ]
     },
     {
@@ -1472,6 +1445,7 @@
         "    dataset=byod_eval_sample_dataset,\n",
         "    metrics=response_tool_metrics,\n",
         "    experiment=EXPERIMENT_NAME,\n",
+        "    output_uri_prefix=BUCKET_URI + \"/byod-eval\",\n",
         ")\n",
         "\n",
         "byod_response_eval_tool_result = byod_response_eval_tool_task.evaluate(\n",
@@ -1487,7 +1461,7 @@
         "id": "9eU3LG6r7q-3"
       },
       "source": [
-        "#### Visualize evaluation results\n",
+        "### Visualize evaluation results\n",
         "\n",
         "Visualize evaluation result sample."
       ]
@@ -1507,13 +1481,13 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "DJr8GqQKTpUa"
+        "id": "84HiPDOkPseW"
       },
       "outputs": [],
       "source": [
         "display_radar_plot(\n",
         "    byod_response_eval_tool_result,\n",
-        "    title=\"Agent evaluation metrics\",\n",
+        "    title=\"ADK agent evaluation\",\n",
         "    metrics=[f\"{metric}/mean\" for metric in response_tool_metrics],\n",
         ")"
       ]
@@ -1521,7 +1495,7 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "2a4e033321ad"
+        "id": "fIppkS2jq_Dn"
       },
       "source": [
         "## Cleaning up\n"
@@ -1548,7 +1522,7 @@
   ],
   "metadata": {
     "colab": {
-      "name": "evaluating_langgraph_agent.ipynb",
+      "name": "evaluating_adk_agent.ipynb",
       "toc_visible": true
     },
     "kernelspec": {

agent-starter-pack 0.3.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

agent-starter-pack 0.3.3py3-none-any.whl → 0.21.0py3-none-any.whl