agent-starter-pack 0.18.2__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_starter_pack/agents/{langgraph_base_react → adk_a2a_base}/.template/templateconfig.yaml +5 -12
- agent_starter_pack/agents/adk_a2a_base/README.md +37 -0
- agent_starter_pack/{frontends/streamlit/frontend/style/app_markdown.py → agents/adk_a2a_base/app/__init__.py} +3 -23
- agent_starter_pack/agents/adk_a2a_base/app/agent.py +70 -0
- agent_starter_pack/agents/adk_a2a_base/notebooks/adk_a2a_app_testing.ipynb +583 -0
- agent_starter_pack/agents/{crewai_coding_crew/notebooks/evaluating_crewai_agent.ipynb → adk_a2a_base/notebooks/evaluating_adk_agent.ipynb} +163 -199
- agent_starter_pack/agents/adk_a2a_base/tests/integration/test_agent.py +58 -0
- agent_starter_pack/agents/adk_base/app/__init__.py +2 -2
- agent_starter_pack/agents/adk_base/app/agent.py +3 -0
- agent_starter_pack/agents/adk_base/notebooks/adk_app_testing.ipynb +13 -28
- agent_starter_pack/agents/adk_live/app/__init__.py +17 -0
- agent_starter_pack/agents/adk_live/app/agent.py +3 -0
- agent_starter_pack/agents/agentic_rag/app/__init__.py +2 -2
- agent_starter_pack/agents/agentic_rag/app/agent.py +3 -0
- agent_starter_pack/agents/agentic_rag/notebooks/adk_app_testing.ipynb +13 -28
- agent_starter_pack/agents/{crewai_coding_crew → langgraph_base}/.template/templateconfig.yaml +12 -9
- agent_starter_pack/agents/langgraph_base/README.md +30 -0
- agent_starter_pack/agents/langgraph_base/app/__init__.py +17 -0
- agent_starter_pack/agents/{langgraph_base_react → langgraph_base}/app/agent.py +4 -4
- agent_starter_pack/agents/{langgraph_base_react → langgraph_base}/tests/integration/test_agent.py +1 -1
- agent_starter_pack/base_template/.gitignore +4 -2
- agent_starter_pack/base_template/Makefile +110 -16
- agent_starter_pack/base_template/README.md +97 -12
- agent_starter_pack/base_template/deployment/terraform/dev/apis.tf +4 -6
- agent_starter_pack/base_template/deployment/terraform/dev/providers.tf +5 -1
- agent_starter_pack/base_template/deployment/terraform/dev/variables.tf +5 -3
- agent_starter_pack/base_template/deployment/terraform/dev/{% if cookiecutter.is_adk %}telemetry.tf{% else %}unused_telemetry.tf{% endif %} +193 -0
- agent_starter_pack/base_template/deployment/terraform/github.tf +16 -9
- agent_starter_pack/base_template/deployment/terraform/locals.tf +7 -7
- agent_starter_pack/base_template/deployment/terraform/providers.tf +5 -1
- agent_starter_pack/base_template/deployment/terraform/sql/completions.sql +138 -0
- agent_starter_pack/base_template/deployment/terraform/storage.tf +0 -9
- agent_starter_pack/base_template/deployment/terraform/variables.tf +15 -19
- agent_starter_pack/base_template/deployment/terraform/{% if cookiecutter.cicd_runner == 'google_cloud_build' %}build_triggers.tf{% else %}unused_build_triggers.tf{% endif %} +20 -22
- agent_starter_pack/base_template/deployment/terraform/{% if cookiecutter.is_adk %}telemetry.tf{% else %}unused_telemetry.tf{% endif %} +206 -0
- agent_starter_pack/base_template/pyproject.toml +5 -17
- agent_starter_pack/base_template/{% if cookiecutter.cicd_runner == 'github_actions' %}.github{% else %}unused_github{% endif %}/workflows/deploy-to-prod.yaml +19 -4
- agent_starter_pack/base_template/{% if cookiecutter.cicd_runner == 'github_actions' %}.github{% else %}unused_github{% endif %}/workflows/staging.yaml +36 -11
- agent_starter_pack/base_template/{% if cookiecutter.cicd_runner == 'google_cloud_build' %}.cloudbuild{% else %}unused_.cloudbuild{% endif %}/deploy-to-prod.yaml +24 -5
- agent_starter_pack/base_template/{% if cookiecutter.cicd_runner == 'google_cloud_build' %}.cloudbuild{% else %}unused_.cloudbuild{% endif %}/staging.yaml +44 -9
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/app_utils/telemetry.py +96 -0
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/{utils → app_utils}/typing.py +4 -6
- agent_starter_pack/{agents/crewai_coding_crew/app/crew/config/agents.yaml → base_template/{{cookiecutter.agent_directory}}/app_utils/{% if cookiecutter.is_a2a and cookiecutter.agent_name == 'langgraph_base' %}converters{% else %}unused_converters{% endif %}/__init__.py } +9 -23
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/app_utils/{% if cookiecutter.is_a2a and cookiecutter.agent_name == 'langgraph_base' %}converters{% else %}unused_converters{% endif %}/part_converter.py +138 -0
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/app_utils/{% if cookiecutter.is_a2a and cookiecutter.agent_name == 'langgraph_base' %}executor{% else %}unused_executor{% endif %}/__init__.py +13 -0
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/app_utils/{% if cookiecutter.is_a2a and cookiecutter.agent_name == 'langgraph_base' %}executor{% else %}unused_executor{% endif %}/a2a_agent_executor.py +265 -0
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/app_utils/{% if cookiecutter.is_a2a and cookiecutter.agent_name == 'langgraph_base' %}executor{% else %}unused_executor{% endif %}/task_result_aggregator.py +152 -0
- agent_starter_pack/cli/commands/create.py +40 -4
- agent_starter_pack/cli/commands/enhance.py +1 -1
- agent_starter_pack/cli/commands/register_gemini_enterprise.py +1070 -0
- agent_starter_pack/cli/main.py +2 -0
- agent_starter_pack/cli/utils/cicd.py +20 -4
- agent_starter_pack/cli/utils/template.py +257 -25
- agent_starter_pack/deployment_targets/agent_engine/tests/integration/test_agent_engine_app.py +113 -16
- agent_starter_pack/deployment_targets/agent_engine/tests/load_test/README.md +2 -2
- agent_starter_pack/deployment_targets/agent_engine/tests/load_test/load_test.py +178 -9
- agent_starter_pack/deployment_targets/agent_engine/tests/{% if cookiecutter.is_a2a %}helpers.py{% else %}unused_helpers.py{% endif %} +138 -0
- agent_starter_pack/deployment_targets/agent_engine/{{cookiecutter.agent_directory}}/agent_engine_app.py +193 -307
- agent_starter_pack/deployment_targets/agent_engine/{{cookiecutter.agent_directory}}/app_utils/deploy.py +414 -0
- agent_starter_pack/deployment_targets/agent_engine/{{cookiecutter.agent_directory}}/{utils → app_utils}/{% if cookiecutter.is_adk_live %}expose_app.py{% else %}unused_expose_app.py{% endif %} +13 -14
- agent_starter_pack/deployment_targets/cloud_run/Dockerfile +4 -1
- agent_starter_pack/deployment_targets/cloud_run/deployment/terraform/dev/service.tf +85 -86
- agent_starter_pack/deployment_targets/cloud_run/deployment/terraform/service.tf +139 -107
- agent_starter_pack/deployment_targets/cloud_run/tests/integration/test_server_e2e.py +228 -12
- agent_starter_pack/deployment_targets/cloud_run/tests/load_test/README.md +4 -4
- agent_starter_pack/deployment_targets/cloud_run/tests/load_test/load_test.py +92 -12
- agent_starter_pack/deployment_targets/cloud_run/{{cookiecutter.agent_directory}}/{server.py → fast_api_app.py} +194 -121
- agent_starter_pack/frontends/adk_live_react/frontend/package-lock.json +18 -18
- agent_starter_pack/frontends/adk_live_react/frontend/src/multimodal-live-types.ts +5 -3
- agent_starter_pack/resources/docs/adk-cheatsheet.md +198 -41
- agent_starter_pack/resources/locks/uv-adk_a2a_base-agent_engine.lock +4966 -0
- agent_starter_pack/resources/locks/uv-adk_a2a_base-cloud_run.lock +5011 -0
- agent_starter_pack/resources/locks/uv-adk_base-agent_engine.lock +1443 -709
- agent_starter_pack/resources/locks/uv-adk_base-cloud_run.lock +1058 -874
- agent_starter_pack/resources/locks/uv-adk_live-agent_engine.lock +1443 -709
- agent_starter_pack/resources/locks/uv-adk_live-cloud_run.lock +1058 -874
- agent_starter_pack/resources/locks/uv-agentic_rag-agent_engine.lock +1568 -749
- agent_starter_pack/resources/locks/uv-agentic_rag-cloud_run.lock +1123 -929
- agent_starter_pack/resources/locks/{uv-langgraph_base_react-agent_engine.lock → uv-langgraph_base-agent_engine.lock} +1714 -1689
- agent_starter_pack/resources/locks/{uv-langgraph_base_react-cloud_run.lock → uv-langgraph_base-cloud_run.lock} +1285 -2374
- agent_starter_pack/utils/watch_and_rebuild.py +1 -1
- {agent_starter_pack-0.18.2.dist-info → agent_starter_pack-0.21.0.dist-info}/METADATA +3 -6
- {agent_starter_pack-0.18.2.dist-info → agent_starter_pack-0.21.0.dist-info}/RECORD +89 -93
- agent_starter_pack-0.21.0.dist-info/entry_points.txt +2 -0
- llm.txt +4 -5
- agent_starter_pack/agents/crewai_coding_crew/README.md +0 -34
- agent_starter_pack/agents/crewai_coding_crew/app/agent.py +0 -47
- agent_starter_pack/agents/crewai_coding_crew/app/crew/config/tasks.yaml +0 -37
- agent_starter_pack/agents/crewai_coding_crew/app/crew/crew.py +0 -71
- agent_starter_pack/agents/crewai_coding_crew/tests/integration/test_agent.py +0 -47
- agent_starter_pack/agents/langgraph_base_react/README.md +0 -9
- agent_starter_pack/agents/langgraph_base_react/notebooks/evaluating_langgraph_agent.ipynb +0 -1574
- agent_starter_pack/base_template/deployment/terraform/dev/log_sinks.tf +0 -69
- agent_starter_pack/base_template/deployment/terraform/log_sinks.tf +0 -79
- agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/utils/tracing.py +0 -155
- agent_starter_pack/cli/utils/register_gemini_enterprise.py +0 -406
- agent_starter_pack/deployment_targets/agent_engine/deployment/terraform/{% if not cookiecutter.is_adk_live %}service.tf{% else %}unused_service.tf{% endif %} +0 -82
- agent_starter_pack/deployment_targets/agent_engine/notebooks/intro_agent_engine.ipynb +0 -1025
- agent_starter_pack/deployment_targets/agent_engine/{{cookiecutter.agent_directory}}/utils/deployment.py +0 -99
- agent_starter_pack/frontends/streamlit/frontend/side_bar.py +0 -214
- agent_starter_pack/frontends/streamlit/frontend/streamlit_app.py +0 -265
- agent_starter_pack/frontends/streamlit/frontend/utils/chat_utils.py +0 -67
- agent_starter_pack/frontends/streamlit/frontend/utils/local_chat_history.py +0 -127
- agent_starter_pack/frontends/streamlit/frontend/utils/message_editing.py +0 -59
- agent_starter_pack/frontends/streamlit/frontend/utils/multimodal_utils.py +0 -217
- agent_starter_pack/frontends/streamlit/frontend/utils/stream_handler.py +0 -310
- agent_starter_pack/frontends/streamlit/frontend/utils/title_summary.py +0 -94
- agent_starter_pack/resources/locks/uv-crewai_coding_crew-agent_engine.lock +0 -6650
- agent_starter_pack/resources/locks/uv-crewai_coding_crew-cloud_run.lock +0 -7825
- agent_starter_pack-0.18.2.dist-info/entry_points.txt +0 -3
- /agent_starter_pack/agents/{crewai_coding_crew → langgraph_base}/notebooks/evaluating_langgraph_agent.ipynb +0 -0
- /agent_starter_pack/base_template/{{cookiecutter.agent_directory}}/{utils → app_utils}/gcs.py +0 -0
- {agent_starter_pack-0.18.2.dist-info → agent_starter_pack-0.21.0.dist-info}/WHEEL +0 -0
- {agent_starter_pack-0.18.2.dist-info → agent_starter_pack-0.21.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
},
|
|
9
9
|
"outputs": [],
|
|
10
10
|
"source": [
|
|
11
|
-
"# Copyright
|
|
11
|
+
"# Copyright 2025 Google LLC\n",
|
|
12
12
|
"#\n",
|
|
13
13
|
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
|
14
14
|
"# you may not use this file except in compliance with the License.\n",
|
|
@@ -29,26 +29,26 @@
|
|
|
29
29
|
"id": "JAPoU8Sm5E6e"
|
|
30
30
|
},
|
|
31
31
|
"source": [
|
|
32
|
-
"#
|
|
32
|
+
"# Evaluate your ADK agent using Vertex AI Gen AI Evaluation service\n",
|
|
33
33
|
"\n",
|
|
34
34
|
"<table align=\"left\">\n",
|
|
35
35
|
" <td style=\"text-align: center\">\n",
|
|
36
|
-
" <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main
|
|
36
|
+
" <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
|
|
37
37
|
" <img width=\"32px\" src=\"https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg\" alt=\"Google Colaboratory logo\"><br> Open in Colab\n",
|
|
38
38
|
" </a>\n",
|
|
39
39
|
" </td>\n",
|
|
40
40
|
" <td style=\"text-align: center\">\n",
|
|
41
|
-
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%
|
|
41
|
+
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%2Fevaluating_adk_agent.ipynb\">\n",
|
|
42
42
|
" <img width=\"32px\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" alt=\"Google Cloud Colab Enterprise logo\"><br> Open in Colab Enterprise\n",
|
|
43
43
|
" </a>\n",
|
|
44
44
|
" </td>\n",
|
|
45
45
|
" <td style=\"text-align: center\">\n",
|
|
46
|
-
" <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/
|
|
46
|
+
" <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
|
|
47
47
|
" <img src=\"https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg\" alt=\"Vertex AI logo\"><br> Open in Vertex AI Workbench\n",
|
|
48
48
|
" </a>\n",
|
|
49
49
|
" </td>\n",
|
|
50
50
|
" <td style=\"text-align: center\">\n",
|
|
51
|
-
" <a href=\"https://github.com/GoogleCloudPlatform/generative-ai/blob/main
|
|
51
|
+
" <a href=\"https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\">\n",
|
|
52
52
|
" <img width=\"32px\" src=\"https://www.svgrepo.com/download/217753/github.svg\" alt=\"GitHub logo\"><br> View on GitHub\n",
|
|
53
53
|
" </a>\n",
|
|
54
54
|
" </td>\n",
|
|
@@ -58,23 +58,23 @@
|
|
|
58
58
|
"\n",
|
|
59
59
|
"<b>Share to:</b>\n",
|
|
60
60
|
"\n",
|
|
61
|
-
"<a href=\"https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/
|
|
61
|
+
"<a href=\"https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
|
|
62
62
|
" <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg\" alt=\"LinkedIn logo\">\n",
|
|
63
63
|
"</a>\n",
|
|
64
64
|
"\n",
|
|
65
|
-
"<a href=\"https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/
|
|
65
|
+
"<a href=\"https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
|
|
66
66
|
" <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg\" alt=\"Bluesky logo\">\n",
|
|
67
67
|
"</a>\n",
|
|
68
68
|
"\n",
|
|
69
|
-
"<a href=\"https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/
|
|
69
|
+
"<a href=\"https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
|
|
70
70
|
" <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg\" alt=\"X logo\">\n",
|
|
71
71
|
"</a>\n",
|
|
72
72
|
"\n",
|
|
73
|
-
"<a href=\"https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/
|
|
73
|
+
"<a href=\"https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
|
|
74
74
|
" <img width=\"20px\" src=\"https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png\" alt=\"Reddit logo\">\n",
|
|
75
75
|
"</a>\n",
|
|
76
76
|
"\n",
|
|
77
|
-
"<a href=\"https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/
|
|
77
|
+
"<a href=\"https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_adk_agent.ipynb\" target=\"_blank\">\n",
|
|
78
78
|
" <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg\" alt=\"Facebook logo\">\n",
|
|
79
79
|
"</a>"
|
|
80
80
|
]
|
|
@@ -85,9 +85,9 @@
|
|
|
85
85
|
"id": "84f0f73a0f76"
|
|
86
86
|
},
|
|
87
87
|
"source": [
|
|
88
|
-
"|
|
|
89
|
-
"
|
|
90
|
-
"|
|
|
88
|
+
"| Author(s) |\n",
|
|
89
|
+
"| --- |\n",
|
|
90
|
+
"| [Ivan Nardini](https://github.com/inardini) |"
|
|
91
91
|
]
|
|
92
92
|
},
|
|
93
93
|
{
|
|
@@ -98,23 +98,17 @@
|
|
|
98
98
|
"source": [
|
|
99
99
|
"## Overview\n",
|
|
100
100
|
"\n",
|
|
101
|
-
"
|
|
101
|
+
"Agent Development Kit (ADK in short) is a flexible and modular open source framework for developing and deploying AI agents. While ADK has its own evaluation module, using Vertex AI Gen AI Evaluation provides a toolkit of quality controlled and explainable methods and metrics to evaluate any generative model or application, including agents, and benchmark the evaluation results against your own judgment, using your own evaluation criteria.\n",
|
|
102
102
|
"\n",
|
|
103
|
-
"
|
|
104
|
-
"\n",
|
|
105
|
-
"This tutorial shows how to evaluate a Crew AI agent using Vertex AI Gen AI Evaluation for agent evaluation.\n",
|
|
106
|
-
"\n",
|
|
107
|
-
"The tutorial uses the following Google Cloud services and resources:\n",
|
|
108
|
-
"\n",
|
|
109
|
-
"* Vertex AI Gen AI Evaluation\n",
|
|
103
|
+
"This tutorial shows how to evaluate an ADK agent using Vertex AI Gen AI Evaluation for agent evaluation.\n",
|
|
110
104
|
"\n",
|
|
111
105
|
"The steps performed include:\n",
|
|
112
106
|
"\n",
|
|
113
|
-
"* Build local agent using
|
|
107
|
+
"* Build local agent using ADK\n",
|
|
114
108
|
"* Prepare Agent Evaluation dataset\n",
|
|
115
109
|
"* Single tool usage evaluation\n",
|
|
116
110
|
"* Trajectory evaluation\n",
|
|
117
|
-
"* Response evaluation
|
|
111
|
+
"* Response evaluation"
|
|
118
112
|
]
|
|
119
113
|
},
|
|
120
114
|
{
|
|
@@ -132,7 +126,7 @@
|
|
|
132
126
|
"id": "No17Cw5hgx12"
|
|
133
127
|
},
|
|
134
128
|
"source": [
|
|
135
|
-
"### Install
|
|
129
|
+
"### Install Google Gen AI SDK and other required packages\n"
|
|
136
130
|
]
|
|
137
131
|
},
|
|
138
132
|
{
|
|
@@ -143,49 +137,8 @@
|
|
|
143
137
|
},
|
|
144
138
|
"outputs": [],
|
|
145
139
|
"source": [
|
|
146
|
-
"%pip install --upgrade --
|
|
147
|
-
"
|
|
148
|
-
" \"cloudpickle==3.0.0\" \\\n",
|
|
149
|
-
" \"pydantic==2.7.4\" \\\n",
|
|
150
|
-
" \"requests\""
|
|
151
|
-
]
|
|
152
|
-
},
|
|
153
|
-
{
|
|
154
|
-
"cell_type": "markdown",
|
|
155
|
-
"metadata": {
|
|
156
|
-
"id": "R5Xep4W9lq-Z"
|
|
157
|
-
},
|
|
158
|
-
"source": [
|
|
159
|
-
"### Restart runtime\n",
|
|
160
|
-
"\n",
|
|
161
|
-
"To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n",
|
|
162
|
-
"\n",
|
|
163
|
-
"The restart might take a minute or longer. After it's restarted, continue to the next step."
|
|
164
|
-
]
|
|
165
|
-
},
|
|
166
|
-
{
|
|
167
|
-
"cell_type": "code",
|
|
168
|
-
"execution_count": null,
|
|
169
|
-
"metadata": {
|
|
170
|
-
"id": "XRvKdaPDTznN"
|
|
171
|
-
},
|
|
172
|
-
"outputs": [],
|
|
173
|
-
"source": [
|
|
174
|
-
"import IPython\n",
|
|
175
|
-
"\n",
|
|
176
|
-
"app = IPython.Application.instance()\n",
|
|
177
|
-
"app.kernel.do_shutdown(True)"
|
|
178
|
-
]
|
|
179
|
-
},
|
|
180
|
-
{
|
|
181
|
-
"cell_type": "markdown",
|
|
182
|
-
"metadata": {
|
|
183
|
-
"id": "SbmM4z7FOBpM"
|
|
184
|
-
},
|
|
185
|
-
"source": [
|
|
186
|
-
"<div class=\"alert alert-block alert-warning\">\n",
|
|
187
|
-
"<b>⚠️ The kernel is going to restart. In Colab or Colab Enterprise, you might see an error message that says \"Your session crashed for an unknown reason.\" This is expected. Wait until it's finished before continuing to the next step. ⚠️</b>\n",
|
|
188
|
-
"</div>\n"
|
|
140
|
+
"%pip install --upgrade --quiet 'google-adk'\n",
|
|
141
|
+
"%pip install --upgrade --quiet 'google-cloud-aiplatform[evaluation]'"
|
|
189
142
|
]
|
|
190
143
|
},
|
|
191
144
|
{
|
|
@@ -221,7 +174,7 @@
|
|
|
221
174
|
"id": "DF4l8DTdWgPY"
|
|
222
175
|
},
|
|
223
176
|
"source": [
|
|
224
|
-
"### Set Google Cloud project information
|
|
177
|
+
"### Set Google Cloud project information\n",
|
|
225
178
|
"\n",
|
|
226
179
|
"To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
|
|
227
180
|
"\n",
|
|
@@ -242,14 +195,21 @@
|
|
|
242
195
|
"import vertexai\n",
|
|
243
196
|
"\n",
|
|
244
197
|
"PROJECT_ID = \"[your-project-id]\" # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n",
|
|
245
|
-
"\n",
|
|
246
198
|
"if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n",
|
|
247
199
|
" PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n",
|
|
248
|
-
"os.environ[\"GOOGLE_CLOUD_PROJECT\"] = PROJECT_ID\n",
|
|
249
200
|
"\n",
|
|
250
201
|
"LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", \"us-central1\")\n",
|
|
251
202
|
"\n",
|
|
252
|
-
"
|
|
203
|
+
"BUCKET_NAME = \"[your-bucket-name]\" # @param {type: \"string\", placeholder: \"[your-bucket-name]\", isTemplate: true}\n",
|
|
204
|
+
"BUCKET_URI = f\"gs://{BUCKET_NAME}\"\n",
|
|
205
|
+
"\n",
|
|
206
|
+
"!gsutil mb -l {LOCATION} {BUCKET_URI}\n",
|
|
207
|
+
"\n",
|
|
208
|
+
"os.environ[\"GOOGLE_CLOUD_PROJECT\"] = PROJECT_ID\n",
|
|
209
|
+
"os.environ[\"GOOGLE_CLOUD_LOCATION\"] = LOCATION\n",
|
|
210
|
+
"os.environ[\"GOOGLE_GENAI_USE_VERTEXAI\"] = \"True\"\n",
|
|
211
|
+
"\n",
|
|
212
|
+
"EXPERIMENT_NAME = \"evaluate-adk-agent\" # @param {type:\"string\"}\n",
|
|
253
213
|
"\n",
|
|
254
214
|
"vertexai.init(project=PROJECT_ID, location=LOCATION, experiment=EXPERIMENT_NAME)"
|
|
255
215
|
]
|
|
@@ -273,24 +233,27 @@
|
|
|
273
233
|
},
|
|
274
234
|
"outputs": [],
|
|
275
235
|
"source": [
|
|
236
|
+
"import json\n",
|
|
237
|
+
"import asyncio\n",
|
|
238
|
+
"\n",
|
|
276
239
|
"# General\n",
|
|
277
240
|
"import random\n",
|
|
278
241
|
"import string\n",
|
|
279
|
-
"import
|
|
242
|
+
"from typing import Any\n",
|
|
280
243
|
"\n",
|
|
281
244
|
"from IPython.display import HTML, Markdown, display\n",
|
|
282
|
-
"
|
|
283
|
-
"import plotly.graph_objects as go\n",
|
|
284
|
-
"\n",
|
|
285
|
-
"warnings.filterwarnings(\"ignore\", category=Warning, module=\"opentelemetry.trace\")\n",
|
|
245
|
+
"from google.adk.agents import Agent\n",
|
|
286
246
|
"\n",
|
|
287
|
-
"# Build agent\n",
|
|
288
|
-
"from
|
|
289
|
-
"from
|
|
290
|
-
"from
|
|
247
|
+
"# Build agent with adk\n",
|
|
248
|
+
"from google.adk.events import Event\n",
|
|
249
|
+
"from google.adk.runners import Runner\n",
|
|
250
|
+
"from google.adk.sessions import InMemorySessionService\n",
|
|
291
251
|
"\n",
|
|
292
252
|
"# Evaluate agent\n",
|
|
293
253
|
"from google.cloud import aiplatform\n",
|
|
254
|
+
"from google.genai import types\n",
|
|
255
|
+
"import pandas as pd\n",
|
|
256
|
+
"import plotly.graph_objects as go\n",
|
|
294
257
|
"from vertexai.preview.evaluation import EvalTask\n",
|
|
295
258
|
"from vertexai.preview.evaluation.metrics import (\n",
|
|
296
259
|
" PointwiseMetric,\n",
|
|
@@ -323,51 +286,62 @@
|
|
|
323
286
|
" return \"\".join(random.choices(string.ascii_lowercase + string.digits, k=length))\n",
|
|
324
287
|
"\n",
|
|
325
288
|
"\n",
|
|
326
|
-
"def
|
|
289
|
+
"def parse_adk_output_to_dictionary(events: list[Event], *, as_json: bool = False):\n",
|
|
327
290
|
" \"\"\"\n",
|
|
328
|
-
" Parse
|
|
291
|
+
" Parse ADK event output into a structured dictionary format,\n",
|
|
292
|
+
" with the predicted trajectory dumped as a JSON string.\n",
|
|
293
|
+
"\n",
|
|
329
294
|
" \"\"\"\n",
|
|
330
|
-
"
|
|
331
|
-
"\n",
|
|
332
|
-
"
|
|
333
|
-
"
|
|
334
|
-
"
|
|
335
|
-
"
|
|
336
|
-
"
|
|
337
|
-
"
|
|
295
|
+
"\n",
|
|
296
|
+
" final_response = \"\"\n",
|
|
297
|
+
" trajectory = []\n",
|
|
298
|
+
"\n",
|
|
299
|
+
" for event in events:\n",
|
|
300
|
+
" if not getattr(event, \"content\", None) or not getattr(event.content, \"parts\", None):\n",
|
|
301
|
+
" continue\n",
|
|
302
|
+
" for part in event.content.parts:\n",
|
|
303
|
+
" if getattr(part, \"function_call\", None):\n",
|
|
304
|
+
" info = {\n",
|
|
305
|
+
" \"tool_name\": part.function_call.name,\n",
|
|
306
|
+
" \"tool_input\": dict(part.function_call.args),\n",
|
|
338
307
|
" }\n",
|
|
339
|
-
"
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
"
|
|
308
|
+
" if info not in trajectory:\n",
|
|
309
|
+
" trajectory.append(info)\n",
|
|
310
|
+
" if event.content.role == \"model\" and getattr(part, \"text\", None):\n",
|
|
311
|
+
" final_response = part.text.strip()\n",
|
|
343
312
|
"\n",
|
|
344
|
-
"
|
|
313
|
+
" if as_json:\n",
|
|
314
|
+
" trajectory_out = json.dumps(trajectory)\n",
|
|
315
|
+
" else:\n",
|
|
316
|
+
" trajectory_out = trajectory\n",
|
|
317
|
+
"\n",
|
|
318
|
+
" return {\"response\": final_response, \"predicted_trajectory\": trajectory_out}\n",
|
|
345
319
|
"\n",
|
|
346
320
|
"\n",
|
|
347
321
|
"def format_output_as_markdown(output: dict) -> str:\n",
|
|
348
322
|
" \"\"\"Convert the output dictionary to a formatted markdown string.\"\"\"\n",
|
|
349
|
-
" markdown = \"### AI Response\\n\"\n",
|
|
350
|
-
" markdown += f\"{output['response']}\\n\\n\"\n",
|
|
351
|
-
"\n",
|
|
323
|
+
" markdown = \"### AI Response\\n\" + output[\"response\"] + \"\\n\\n\"\n",
|
|
352
324
|
" if output[\"predicted_trajectory\"]:\n",
|
|
353
325
|
" markdown += \"### Function Calls\\n\"\n",
|
|
354
326
|
" for call in output[\"predicted_trajectory\"]:\n",
|
|
355
327
|
" markdown += f\"- **Function**: `{call['tool_name']}`\\n\"\n",
|
|
356
|
-
" markdown += \" - **Arguments
|
|
328
|
+
" markdown += \" - **Arguments**\\n\"\n",
|
|
357
329
|
" for key, value in call[\"tool_input\"].items():\n",
|
|
358
330
|
" markdown += f\" - `{key}`: `{value}`\\n\"\n",
|
|
359
|
-
"\n",
|
|
360
331
|
" return markdown\n",
|
|
361
332
|
"\n",
|
|
362
333
|
"\n",
|
|
363
334
|
"def display_eval_report(eval_result: pd.DataFrame) -> None:\n",
|
|
364
335
|
" \"\"\"Display the evaluation results.\"\"\"\n",
|
|
365
|
-
" metrics_df = pd.DataFrame.from_dict(eval_result.summary_metrics, orient=\"index\").T\n",
|
|
366
336
|
" display(Markdown(\"### Summary Metrics\"))\n",
|
|
367
|
-
" display(
|
|
368
|
-
"\n",
|
|
369
|
-
"
|
|
370
|
-
"
|
|
337
|
+
" display(\n",
|
|
338
|
+
" pd.DataFrame(\n",
|
|
339
|
+
" eval_result.summary_metrics.items(), columns=[\"metric\", \"value\"]\n",
|
|
340
|
+
" )\n",
|
|
341
|
+
" )\n",
|
|
342
|
+
" if getattr(eval_result, \"metrics_table\", None) is not None:\n",
|
|
343
|
+
" display(Markdown(\"### Row‑wise Metrics\"))\n",
|
|
344
|
+
" display(eval_result.metrics_table.head())\n",
|
|
371
345
|
"\n",
|
|
372
346
|
"\n",
|
|
373
347
|
"def display_drilldown(row: pd.Series) -> None:\n",
|
|
@@ -513,9 +487,9 @@
|
|
|
513
487
|
"id": "bDaa2Mtsifmq"
|
|
514
488
|
},
|
|
515
489
|
"source": [
|
|
516
|
-
"## Build
|
|
490
|
+
"## Build ADK agent\n",
|
|
517
491
|
"\n",
|
|
518
|
-
"Build your application using
|
|
492
|
+
"Build your application using ADK, including the Gemini model and custom tools that you define."
|
|
519
493
|
]
|
|
520
494
|
},
|
|
521
495
|
{
|
|
@@ -537,7 +511,6 @@
|
|
|
537
511
|
},
|
|
538
512
|
"outputs": [],
|
|
539
513
|
"source": [
|
|
540
|
-
"@tool\n",
|
|
541
514
|
"def get_product_details(product_name: str):\n",
|
|
542
515
|
" \"\"\"Gathers basic details about a product.\"\"\"\n",
|
|
543
516
|
" details = {\n",
|
|
@@ -550,7 +523,6 @@
|
|
|
550
523
|
" return details.get(product_name, \"Product details not found.\")\n",
|
|
551
524
|
"\n",
|
|
552
525
|
"\n",
|
|
553
|
-
"@tool\n",
|
|
554
526
|
"def get_product_price(product_name: str):\n",
|
|
555
527
|
" \"\"\"Gathers price about a product.\"\"\"\n",
|
|
556
528
|
" details = {\n",
|
|
@@ -563,47 +535,6 @@
|
|
|
563
535
|
" return details.get(product_name, \"Product price not found.\")"
|
|
564
536
|
]
|
|
565
537
|
},
|
|
566
|
-
{
|
|
567
|
-
"cell_type": "markdown",
|
|
568
|
-
"metadata": {
|
|
569
|
-
"id": "be70714d9fae"
|
|
570
|
-
},
|
|
571
|
-
"source": [
|
|
572
|
-
"### Define router using Flow\n",
|
|
573
|
-
"\n",
|
|
574
|
-
"Set up a router to direct conversation flow by selecting the appropriate tool based on user input or interaction state.\n"
|
|
575
|
-
]
|
|
576
|
-
},
|
|
577
|
-
{
|
|
578
|
-
"cell_type": "code",
|
|
579
|
-
"execution_count": null,
|
|
580
|
-
"metadata": {
|
|
581
|
-
"id": "516b5108d327"
|
|
582
|
-
},
|
|
583
|
-
"outputs": [],
|
|
584
|
-
"source": [
|
|
585
|
-
"class ProductFlow(Flow):\n",
|
|
586
|
-
" @start\n",
|
|
587
|
-
" def begin_flow(self):\n",
|
|
588
|
-
" \"\"\"Starts the product information flow\"\"\"\n",
|
|
589
|
-
" return \"check_request\"\n",
|
|
590
|
-
"\n",
|
|
591
|
-
" @listen(\"check_request\")\n",
|
|
592
|
-
" def router(self, state: dict) -> str:\n",
|
|
593
|
-
" \"\"\"Routes the product request to appropriate handler\"\"\"\n",
|
|
594
|
-
" # Get the last message from the state\n",
|
|
595
|
-
" last_message = state.get(\"last_message\", {})\n",
|
|
596
|
-
" tool_calls = last_message.get(\"tool_calls\", [])\n",
|
|
597
|
-
"\n",
|
|
598
|
-
" if tool_calls:\n",
|
|
599
|
-
" function_name = tool_calls[0].get(\"name\")\n",
|
|
600
|
-
" if function_name == \"get_product_price\":\n",
|
|
601
|
-
" return \"get_product_price\"\n",
|
|
602
|
-
" else:\n",
|
|
603
|
-
" return \"get_product_details\"\n",
|
|
604
|
-
" return \"end\""
|
|
605
|
-
]
|
|
606
|
-
},
|
|
607
538
|
{
|
|
608
539
|
"cell_type": "markdown",
|
|
609
540
|
"metadata": {
|
|
@@ -623,7 +554,7 @@
|
|
|
623
554
|
},
|
|
624
555
|
"outputs": [],
|
|
625
556
|
"source": [
|
|
626
|
-
"model = \"
|
|
557
|
+
"model = \"gemini-2.0-flash\""
|
|
627
558
|
]
|
|
628
559
|
},
|
|
629
560
|
{
|
|
@@ -643,38 +574,53 @@
|
|
|
643
574
|
"cell_type": "code",
|
|
644
575
|
"execution_count": null,
|
|
645
576
|
"metadata": {
|
|
646
|
-
"id": "
|
|
577
|
+
"id": "gD5OB44g4sc3"
|
|
647
578
|
},
|
|
648
579
|
"outputs": [],
|
|
649
580
|
"source": [
|
|
650
|
-
"def agent_parsed_outcome(
|
|
651
|
-
"
|
|
652
|
-
"
|
|
653
|
-
"
|
|
654
|
-
"
|
|
655
|
-
"
|
|
656
|
-
"
|
|
657
|
-
"
|
|
658
|
-
"
|
|
659
|
-
"\n",
|
|
660
|
-
"
|
|
661
|
-
"
|
|
662
|
-
"
|
|
663
|
-
"
|
|
664
|
-
"
|
|
665
|
-
"
|
|
666
|
-
"
|
|
667
|
-
"
|
|
668
|
-
"\n",
|
|
669
|
-
"
|
|
670
|
-
"
|
|
671
|
-
"
|
|
672
|
-
"
|
|
673
|
-
"
|
|
674
|
-
"
|
|
675
|
-
"\n",
|
|
676
|
-
"
|
|
677
|
-
"
|
|
581
|
+
"async def agent_parsed_outcome(query):\n",
|
|
582
|
+
" app_name = \"product_research_app\"\n",
|
|
583
|
+
" user_id = \"user_one\"\n",
|
|
584
|
+
" session_id = \"session_one\"\n",
|
|
585
|
+
" \n",
|
|
586
|
+
" product_research_agent = Agent(\n",
|
|
587
|
+
" name=\"ProductResearchAgent\",\n",
|
|
588
|
+
" model=model,\n",
|
|
589
|
+
" description=\"An agent that performs product research.\",\n",
|
|
590
|
+
" instruction=f\"\"\"\n",
|
|
591
|
+
" Analyze this user request: '{query}'.\n",
|
|
592
|
+
" If the request is about price, use get_product_price tool.\n",
|
|
593
|
+
" Otherwise, use get_product_details tool to get product information.\n",
|
|
594
|
+
" \"\"\",\n",
|
|
595
|
+
" tools=[get_product_details, get_product_price],\n",
|
|
596
|
+
" )\n",
|
|
597
|
+
"\n",
|
|
598
|
+
" session_service = InMemorySessionService()\n",
|
|
599
|
+
" await session_service.create_session(\n",
|
|
600
|
+
" app_name=app_name, user_id=user_id, session_id=session_id\n",
|
|
601
|
+
" )\n",
|
|
602
|
+
"\n",
|
|
603
|
+
" runner = Runner(\n",
|
|
604
|
+
" agent=product_research_agent, app_name=app_name, session_service=session_service\n",
|
|
605
|
+
" )\n",
|
|
606
|
+
"\n",
|
|
607
|
+
" content = types.Content(role=\"user\", parts=[types.Part(text=query)])\n",
|
|
608
|
+
" events = [event async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content)]\n",
|
|
609
|
+
" \n",
|
|
610
|
+
" return parse_adk_output_to_dictionary(events)\n"
|
|
611
|
+
]
|
|
612
|
+
},
|
|
613
|
+
{
|
|
614
|
+
"cell_type": "code",
|
|
615
|
+
"execution_count": null,
|
|
616
|
+
"metadata": {},
|
|
617
|
+
"outputs": [],
|
|
618
|
+
"source": [
|
|
619
|
+
"# --- Sync wrapper for Vertex‑AI evaluation\n",
|
|
620
|
+
"def agent_parsed_outcome_sync(prompt: str):\n",
|
|
621
|
+
" result = asyncio.run(agent_parsed_outcome(prompt))\n",
|
|
622
|
+
" result[\"predicted_trajectory\"] = json.dumps(result[\"predicted_trajectory\"])\n",
|
|
623
|
+
" return result"
|
|
678
624
|
]
|
|
679
625
|
},
|
|
680
626
|
{
|
|
@@ -696,7 +642,7 @@
|
|
|
696
642
|
},
|
|
697
643
|
"outputs": [],
|
|
698
644
|
"source": [
|
|
699
|
-
"response = agent_parsed_outcome(
|
|
645
|
+
"response = await agent_parsed_outcome(query=\"Get product details for shoes\")\n",
|
|
700
646
|
"display(Markdown(format_output_as_markdown(response)))"
|
|
701
647
|
]
|
|
702
648
|
},
|
|
@@ -708,7 +654,7 @@
|
|
|
708
654
|
},
|
|
709
655
|
"outputs": [],
|
|
710
656
|
"source": [
|
|
711
|
-
"response = agent_parsed_outcome(
|
|
657
|
+
"response = await agent_parsed_outcome(query=\"Get product price for shoes\")\n",
|
|
712
658
|
"display(Markdown(format_output_as_markdown(response)))"
|
|
713
659
|
]
|
|
714
660
|
},
|
|
@@ -718,7 +664,7 @@
|
|
|
718
664
|
"id": "aOGPePsorpUl"
|
|
719
665
|
},
|
|
720
666
|
"source": [
|
|
721
|
-
"## Evaluating a
|
|
667
|
+
"## Evaluating a ADK agent with Vertex AI Gen AI Evaluation\n",
|
|
722
668
|
"\n",
|
|
723
669
|
"When working with AI agents, it's important to keep track of their performance and how well they're working. You can look at this in two main ways: **monitoring** and **observability**.\n",
|
|
724
670
|
"\n",
|
|
@@ -772,7 +718,7 @@
|
|
|
772
718
|
" \"Get product details and price for shoes\",\n",
|
|
773
719
|
" \"Get product details for speaker?\",\n",
|
|
774
720
|
" ],\n",
|
|
775
|
-
" \"
|
|
721
|
+
" \"predicted_trajectory\": [\n",
|
|
776
722
|
" [\n",
|
|
777
723
|
" {\n",
|
|
778
724
|
" \"tool_name\": \"get_product_price\",\n",
|
|
@@ -894,10 +840,11 @@
|
|
|
894
840
|
" dataset=eval_sample_dataset,\n",
|
|
895
841
|
" metrics=single_tool_usage_metrics,\n",
|
|
896
842
|
" experiment=EXPERIMENT_NAME,\n",
|
|
843
|
+
" output_uri_prefix=BUCKET_URI + \"/single-metric-eval\",\n",
|
|
897
844
|
")\n",
|
|
898
845
|
"\n",
|
|
899
846
|
"single_tool_call_eval_result = single_tool_call_eval_task.evaluate(\n",
|
|
900
|
-
" runnable=
|
|
847
|
+
" runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
|
|
901
848
|
")\n",
|
|
902
849
|
"\n",
|
|
903
850
|
"display_eval_report(single_tool_call_eval_result)"
|
|
@@ -998,11 +945,14 @@
|
|
|
998
945
|
"EXPERIMENT_RUN = f\"trajectory-{get_id()}\"\n",
|
|
999
946
|
"\n",
|
|
1000
947
|
"trajectory_eval_task = EvalTask(\n",
|
|
1001
|
-
" dataset=eval_sample_dataset
|
|
948
|
+
" dataset=eval_sample_dataset,\n",
|
|
949
|
+
" metrics=trajectory_metrics,\n",
|
|
950
|
+
" experiment=EXPERIMENT_NAME,\n",
|
|
951
|
+
" output_uri_prefix=BUCKET_URI + \"/multiple-metric-eval\",\n",
|
|
1002
952
|
")\n",
|
|
1003
953
|
"\n",
|
|
1004
954
|
"trajectory_eval_result = trajectory_eval_task.evaluate(\n",
|
|
1005
|
-
" runnable=
|
|
955
|
+
" runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
|
|
1006
956
|
")\n",
|
|
1007
957
|
"\n",
|
|
1008
958
|
"display_eval_report(trajectory_eval_result)"
|
|
@@ -1102,11 +1052,14 @@
|
|
|
1102
1052
|
"EXPERIMENT_RUN = f\"response-{get_id()}\"\n",
|
|
1103
1053
|
"\n",
|
|
1104
1054
|
"response_eval_task = EvalTask(\n",
|
|
1105
|
-
" dataset=eval_sample_dataset
|
|
1055
|
+
" dataset=eval_sample_dataset,\n",
|
|
1056
|
+
" metrics=response_metrics,\n",
|
|
1057
|
+
" experiment=EXPERIMENT_NAME,\n",
|
|
1058
|
+
" output_uri_prefix=BUCKET_URI + \"/response-metric-eval\",\n",
|
|
1106
1059
|
")\n",
|
|
1107
1060
|
"\n",
|
|
1108
1061
|
"response_eval_result = response_eval_task.evaluate(\n",
|
|
1109
|
-
" runnable=
|
|
1062
|
+
" runnable=agent_parsed_outcome_sync, experiment_run_name=EXPERIMENT_RUN\n",
|
|
1110
1063
|
")\n",
|
|
1111
1064
|
"\n",
|
|
1112
1065
|
"display_eval_report(response_eval_result)"
|
|
@@ -1287,10 +1240,13 @@
|
|
|
1287
1240
|
" dataset=eval_sample_dataset,\n",
|
|
1288
1241
|
" metrics=response_tool_metrics,\n",
|
|
1289
1242
|
" experiment=EXPERIMENT_NAME,\n",
|
|
1243
|
+
" output_uri_prefix=BUCKET_URI + \"/reasoning-metric-eval\",\n",
|
|
1290
1244
|
")\n",
|
|
1291
1245
|
"\n",
|
|
1292
1246
|
"response_eval_tool_result = response_eval_tool_task.evaluate(\n",
|
|
1293
|
-
"
|
|
1247
|
+
" # Uncomment the line below if you are providing the agent with an unparsed dataset\n",
|
|
1248
|
+
" #runnable=agent_parsed_outcome_sync, \n",
|
|
1249
|
+
" experiment_run_name=EXPERIMENT_RUN\n",
|
|
1294
1250
|
")\n",
|
|
1295
1251
|
"\n",
|
|
1296
1252
|
"display_eval_report(response_eval_tool_result)"
|
|
@@ -1446,15 +1402,22 @@
|
|
|
1446
1402
|
" ],\n",
|
|
1447
1403
|
" ],\n",
|
|
1448
1404
|
" \"response\": [\n",
|
|
1449
|
-
" 500,\n",
|
|
1450
|
-
" 50,\n",
|
|
1405
|
+
" \"500\",\n",
|
|
1406
|
+
" \"50\",\n",
|
|
1451
1407
|
" \"A super fast and light usb charger\",\n",
|
|
1452
|
-
" 100,\n",
|
|
1408
|
+
" \"100\",\n",
|
|
1453
1409
|
" \"A voice-controlled smart speaker that plays music, sets alarms, and controls smart home devices.\",\n",
|
|
1454
1410
|
" ],\n",
|
|
1455
1411
|
"}\n",
|
|
1456
1412
|
"\n",
|
|
1457
|
-
"byod_eval_sample_dataset = pd.DataFrame(byod_eval_data)"
|
|
1413
|
+
"byod_eval_sample_dataset = pd.DataFrame(byod_eval_data)\n",
|
|
1414
|
+
"byod_eval_sample_dataset[\"predicted_trajectory\"] = byod_eval_sample_dataset[\n",
|
|
1415
|
+
" \"predicted_trajectory\"\n",
|
|
1416
|
+
"].apply(json.dumps)\n",
|
|
1417
|
+
"byod_eval_sample_dataset[\"reference_trajectory\"] = byod_eval_sample_dataset[\n",
|
|
1418
|
+
" \"reference_trajectory\"\n",
|
|
1419
|
+
"].apply(json.dumps)\n",
|
|
1420
|
+
"byod_eval_sample_dataset[\"response\"] = byod_eval_sample_dataset[\"response\"].apply(json.dumps)"
|
|
1458
1421
|
]
|
|
1459
1422
|
},
|
|
1460
1423
|
{
|
|
@@ -1482,6 +1445,7 @@
|
|
|
1482
1445
|
" dataset=byod_eval_sample_dataset,\n",
|
|
1483
1446
|
" metrics=response_tool_metrics,\n",
|
|
1484
1447
|
" experiment=EXPERIMENT_NAME,\n",
|
|
1448
|
+
" output_uri_prefix=BUCKET_URI + \"/byod-eval\",\n",
|
|
1485
1449
|
")\n",
|
|
1486
1450
|
"\n",
|
|
1487
1451
|
"byod_response_eval_tool_result = byod_response_eval_tool_task.evaluate(\n",
|
|
@@ -1497,7 +1461,7 @@
|
|
|
1497
1461
|
"id": "9eU3LG6r7q-3"
|
|
1498
1462
|
},
|
|
1499
1463
|
"source": [
|
|
1500
|
-
"
|
|
1464
|
+
"### Visualize evaluation results\n",
|
|
1501
1465
|
"\n",
|
|
1502
1466
|
"Visualize evaluation result sample."
|
|
1503
1467
|
]
|
|
@@ -1523,7 +1487,7 @@
|
|
|
1523
1487
|
"source": [
|
|
1524
1488
|
"display_radar_plot(\n",
|
|
1525
1489
|
" byod_response_eval_tool_result,\n",
|
|
1526
|
-
" title=\"
|
|
1490
|
+
" title=\"ADK agent evaluation\",\n",
|
|
1527
1491
|
" metrics=[f\"{metric}/mean\" for metric in response_tool_metrics],\n",
|
|
1528
1492
|
")"
|
|
1529
1493
|
]
|
|
@@ -1531,7 +1495,7 @@
|
|
|
1531
1495
|
{
|
|
1532
1496
|
"cell_type": "markdown",
|
|
1533
1497
|
"metadata": {
|
|
1534
|
-
"id": "
|
|
1498
|
+
"id": "fIppkS2jq_Dn"
|
|
1535
1499
|
},
|
|
1536
1500
|
"source": [
|
|
1537
1501
|
"## Cleaning up\n"
|
|
@@ -1558,7 +1522,7 @@
|
|
|
1558
1522
|
],
|
|
1559
1523
|
"metadata": {
|
|
1560
1524
|
"colab": {
|
|
1561
|
-
"name": "
|
|
1525
|
+
"name": "evaluating_adk_agent.ipynb",
|
|
1562
1526
|
"toc_visible": true
|
|
1563
1527
|
},
|
|
1564
1528
|
"kernelspec": {
|