google-adk 0.5.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. google/adk/agents/base_agent.py +76 -30
  2. google/adk/agents/callback_context.py +2 -6
  3. google/adk/agents/llm_agent.py +122 -30
  4. google/adk/agents/loop_agent.py +1 -1
  5. google/adk/agents/parallel_agent.py +7 -0
  6. google/adk/agents/readonly_context.py +8 -0
  7. google/adk/agents/run_config.py +1 -1
  8. google/adk/agents/sequential_agent.py +31 -0
  9. google/adk/agents/transcription_entry.py +4 -2
  10. google/adk/artifacts/gcs_artifact_service.py +1 -1
  11. google/adk/artifacts/in_memory_artifact_service.py +1 -1
  12. google/adk/auth/auth_credential.py +10 -2
  13. google/adk/auth/auth_preprocessor.py +7 -1
  14. google/adk/auth/auth_tool.py +3 -4
  15. google/adk/cli/agent_graph.py +5 -5
  16. google/adk/cli/browser/index.html +4 -4
  17. google/adk/cli/browser/{main-ULN5R5I5.js → main-PKDNKWJE.js} +59 -60
  18. google/adk/cli/browser/polyfills-B6TNHZQ6.js +17 -0
  19. google/adk/cli/cli.py +10 -9
  20. google/adk/cli/cli_deploy.py +7 -2
  21. google/adk/cli/cli_eval.py +109 -115
  22. google/adk/cli/cli_tools_click.py +179 -67
  23. google/adk/cli/fast_api.py +248 -197
  24. google/adk/cli/utils/agent_loader.py +137 -0
  25. google/adk/cli/utils/cleanup.py +40 -0
  26. google/adk/cli/utils/common.py +23 -0
  27. google/adk/cli/utils/evals.py +83 -0
  28. google/adk/cli/utils/logs.py +8 -5
  29. google/adk/code_executors/__init__.py +3 -1
  30. google/adk/code_executors/built_in_code_executor.py +52 -0
  31. google/adk/code_executors/code_execution_utils.py +2 -1
  32. google/adk/code_executors/container_code_executor.py +0 -1
  33. google/adk/code_executors/vertex_ai_code_executor.py +6 -8
  34. google/adk/evaluation/__init__.py +1 -1
  35. google/adk/evaluation/agent_evaluator.py +168 -128
  36. google/adk/evaluation/eval_case.py +104 -0
  37. google/adk/evaluation/eval_metrics.py +74 -0
  38. google/adk/evaluation/eval_result.py +86 -0
  39. google/adk/evaluation/eval_set.py +39 -0
  40. google/adk/evaluation/eval_set_results_manager.py +47 -0
  41. google/adk/evaluation/eval_sets_manager.py +43 -0
  42. google/adk/evaluation/evaluation_generator.py +88 -113
  43. google/adk/evaluation/evaluator.py +58 -0
  44. google/adk/evaluation/local_eval_set_results_manager.py +113 -0
  45. google/adk/evaluation/local_eval_sets_manager.py +264 -0
  46. google/adk/evaluation/response_evaluator.py +106 -1
  47. google/adk/evaluation/trajectory_evaluator.py +84 -2
  48. google/adk/events/event.py +6 -1
  49. google/adk/events/event_actions.py +6 -1
  50. google/adk/examples/base_example_provider.py +1 -0
  51. google/adk/examples/example_util.py +3 -2
  52. google/adk/flows/llm_flows/_code_execution.py +9 -1
  53. google/adk/flows/llm_flows/audio_transcriber.py +4 -3
  54. google/adk/flows/llm_flows/base_llm_flow.py +58 -21
  55. google/adk/flows/llm_flows/contents.py +3 -1
  56. google/adk/flows/llm_flows/functions.py +9 -8
  57. google/adk/flows/llm_flows/instructions.py +18 -80
  58. google/adk/flows/llm_flows/single_flow.py +2 -2
  59. google/adk/memory/__init__.py +1 -1
  60. google/adk/memory/_utils.py +23 -0
  61. google/adk/memory/base_memory_service.py +23 -21
  62. google/adk/memory/in_memory_memory_service.py +57 -25
  63. google/adk/memory/memory_entry.py +37 -0
  64. google/adk/memory/vertex_ai_rag_memory_service.py +38 -15
  65. google/adk/models/anthropic_llm.py +16 -9
  66. google/adk/models/base_llm.py +2 -1
  67. google/adk/models/base_llm_connection.py +2 -0
  68. google/adk/models/gemini_llm_connection.py +11 -11
  69. google/adk/models/google_llm.py +12 -2
  70. google/adk/models/lite_llm.py +80 -23
  71. google/adk/models/llm_response.py +16 -3
  72. google/adk/models/registry.py +1 -1
  73. google/adk/runners.py +98 -42
  74. google/adk/sessions/__init__.py +1 -1
  75. google/adk/sessions/_session_util.py +2 -1
  76. google/adk/sessions/base_session_service.py +6 -33
  77. google/adk/sessions/database_session_service.py +57 -67
  78. google/adk/sessions/in_memory_session_service.py +106 -24
  79. google/adk/sessions/session.py +3 -0
  80. google/adk/sessions/vertex_ai_session_service.py +44 -51
  81. google/adk/telemetry.py +7 -2
  82. google/adk/tools/__init__.py +4 -7
  83. google/adk/tools/_memory_entry_utils.py +30 -0
  84. google/adk/tools/agent_tool.py +10 -10
  85. google/adk/tools/apihub_tool/apihub_toolset.py +55 -74
  86. google/adk/tools/apihub_tool/clients/apihub_client.py +10 -3
  87. google/adk/tools/apihub_tool/clients/secret_client.py +1 -0
  88. google/adk/tools/application_integration_tool/application_integration_toolset.py +111 -85
  89. google/adk/tools/application_integration_tool/clients/connections_client.py +28 -1
  90. google/adk/tools/application_integration_tool/clients/integration_client.py +7 -5
  91. google/adk/tools/application_integration_tool/integration_connector_tool.py +69 -26
  92. google/adk/tools/base_toolset.py +96 -0
  93. google/adk/tools/bigquery/__init__.py +28 -0
  94. google/adk/tools/bigquery/bigquery_credentials.py +216 -0
  95. google/adk/tools/bigquery/bigquery_tool.py +116 -0
  96. google/adk/tools/{built_in_code_execution_tool.py → enterprise_search_tool.py} +17 -11
  97. google/adk/tools/function_parameter_parse_util.py +9 -2
  98. google/adk/tools/function_tool.py +33 -3
  99. google/adk/tools/get_user_choice_tool.py +1 -0
  100. google/adk/tools/google_api_tool/__init__.py +24 -70
  101. google/adk/tools/google_api_tool/google_api_tool.py +12 -6
  102. google/adk/tools/google_api_tool/{google_api_tool_set.py → google_api_toolset.py} +57 -55
  103. google/adk/tools/google_api_tool/google_api_toolsets.py +108 -0
  104. google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +40 -42
  105. google/adk/tools/google_search_tool.py +2 -2
  106. google/adk/tools/langchain_tool.py +96 -49
  107. google/adk/tools/load_memory_tool.py +14 -5
  108. google/adk/tools/mcp_tool/__init__.py +3 -2
  109. google/adk/tools/mcp_tool/conversion_utils.py +6 -2
  110. google/adk/tools/mcp_tool/mcp_session_manager.py +80 -69
  111. google/adk/tools/mcp_tool/mcp_tool.py +35 -32
  112. google/adk/tools/mcp_tool/mcp_toolset.py +99 -194
  113. google/adk/tools/openapi_tool/auth/credential_exchangers/base_credential_exchanger.py +1 -3
  114. google/adk/tools/openapi_tool/auth/credential_exchangers/service_account_exchanger.py +6 -7
  115. google/adk/tools/openapi_tool/common/common.py +5 -1
  116. google/adk/tools/openapi_tool/openapi_spec_parser/__init__.py +7 -2
  117. google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +27 -7
  118. google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +36 -32
  119. google/adk/tools/openapi_tool/openapi_spec_parser/rest_api_tool.py +11 -1
  120. google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +1 -1
  121. google/adk/tools/preload_memory_tool.py +27 -18
  122. google/adk/tools/retrieval/__init__.py +1 -1
  123. google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +1 -1
  124. google/adk/tools/toolbox_toolset.py +107 -0
  125. google/adk/tools/transfer_to_agent_tool.py +0 -1
  126. google/adk/utils/__init__.py +13 -0
  127. google/adk/utils/instructions_utils.py +131 -0
  128. google/adk/version.py +1 -1
  129. {google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/METADATA +18 -19
  130. google_adk-1.1.0.dist-info/RECORD +200 -0
  131. google/adk/agents/remote_agent.py +0 -50
  132. google/adk/cli/browser/polyfills-FFHMD2TL.js +0 -18
  133. google/adk/cli/fast_api.py.orig +0 -728
  134. google/adk/tools/google_api_tool/google_api_tool_sets.py +0 -112
  135. google/adk/tools/toolbox_tool.py +0 -46
  136. google_adk-0.5.0.dist-info/RECORD +0 -180
  137. {google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/WHEEL +0 -0
  138. {google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/entry_points.txt +0 -0
  139. {google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,264 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+ import os
18
+ import re
19
+ import time
20
+ from typing import Any
21
+ import uuid
22
+
23
+ from google.genai import types as genai_types
24
+ from pydantic import ValidationError
25
+ from typing_extensions import override
26
+
27
+ from .eval_case import EvalCase
28
+ from .eval_case import IntermediateData
29
+ from .eval_case import Invocation
30
+ from .eval_case import SessionInput
31
+ from .eval_set import EvalSet
32
+ from .eval_sets_manager import EvalSetsManager
33
+
34
+ logger = logging.getLogger("google_adk." + __name__)
35
+
36
+ _EVAL_SET_FILE_EXTENSION = ".evalset.json"
37
+
38
+
39
+ def _convert_invocation_to_pydantic_schema(
40
+ invocation_in_json_format: dict[str, Any],
41
+ ) -> Invocation:
42
+ """Converts an invocation from old json format to new Pydantic Schema"""
43
+ query = invocation_in_json_format["query"]
44
+ reference = invocation_in_json_format["reference"]
45
+ expected_tool_use = []
46
+ expected_intermediate_agent_responses = []
47
+
48
+ for old_tool_use in invocation_in_json_format.get("expected_tool_use", []):
49
+ expected_tool_use.append(
50
+ genai_types.FunctionCall(
51
+ name=old_tool_use["tool_name"], args=old_tool_use["tool_input"]
52
+ )
53
+ )
54
+
55
+ for old_intermediate_response in invocation_in_json_format.get(
56
+ "expected_intermediate_agent_responses", []
57
+ ):
58
+ expected_intermediate_agent_responses.append((
59
+ old_intermediate_response["author"],
60
+ [genai_types.Part.from_text(text=old_intermediate_response["text"])],
61
+ ))
62
+
63
+ return Invocation(
64
+ invocation_id=str(uuid.uuid4()),
65
+ user_content=genai_types.Content(
66
+ parts=[genai_types.Part.from_text(text=query)], role="user"
67
+ ),
68
+ final_response=genai_types.Content(
69
+ parts=[genai_types.Part.from_text(text=reference)], role="model"
70
+ ),
71
+ intermediate_data=IntermediateData(
72
+ tool_uses=expected_tool_use,
73
+ intermediate_responses=expected_intermediate_agent_responses,
74
+ ),
75
+ creation_timestamp=time.time(),
76
+ )
77
+
78
+
79
+ def convert_eval_set_to_pydanctic_schema(
80
+ eval_set_id: str,
81
+ eval_set_in_json_format: list[dict[str, Any]],
82
+ ) -> EvalSet:
83
+ r"""Returns an pydantic EvalSet generated from the json representation.
84
+
85
+ Args:
86
+ eval_set_id: Eval set id.
87
+ eval_set_in_json_format: Eval set specified in JSON format.
88
+
89
+ Here is a sample eval set in JSON format:
90
+ [
91
+ {
92
+ "name": "roll_17_sided_dice_twice",
93
+ "data": [
94
+ {
95
+ "query": "What can you do?",
96
+ "expected_tool_use": [],
97
+ "expected_intermediate_agent_responses": [],
98
+ "reference": "I can roll dice of different sizes and check if a number
99
+ is prime. I can also use multiple tools in parallel.\n"
100
+ },
101
+ {
102
+ "query": "Roll a 17 sided dice twice for me",
103
+ "expected_tool_use": [
104
+ {
105
+ "tool_name": "roll_die",
106
+ "tool_input": {
107
+ "sides": 17
108
+ }
109
+ },
110
+ {
111
+ "tool_name": "roll_die",
112
+ "tool_input": {
113
+ "sides": 17
114
+ }
115
+ }
116
+ ],
117
+ "expected_intermediate_agent_responses": [],
118
+ "reference": "I have rolled a 17 sided die twice. The first roll was
119
+ 13 and the second roll was 4.\n"
120
+ }
121
+ ],
122
+ "initial_session": {
123
+ "state": {},
124
+ "app_name": "hello_world",
125
+ "user_id": "user"
126
+ }
127
+ }
128
+ ]
129
+ """
130
+ eval_cases = []
131
+ for old_eval_case in eval_set_in_json_format:
132
+ new_invocations = []
133
+
134
+ for old_invocation in old_eval_case["data"]:
135
+ new_invocations.append(
136
+ _convert_invocation_to_pydantic_schema(old_invocation)
137
+ )
138
+
139
+ session_input = None
140
+ if (
141
+ "initial_session" in old_eval_case
142
+ and len(old_eval_case["initial_session"]) > 0
143
+ ):
144
+ session_input = SessionInput(
145
+ app_name=old_eval_case["initial_session"].get("app_name", ""),
146
+ user_id=old_eval_case["initial_session"].get("user_id", ""),
147
+ state=old_eval_case["initial_session"].get("state", {}),
148
+ )
149
+
150
+ new_eval_case = EvalCase(
151
+ eval_id=old_eval_case["name"],
152
+ conversation=new_invocations,
153
+ session_input=session_input,
154
+ creation_timestamp=time.time(),
155
+ )
156
+ eval_cases.append(new_eval_case)
157
+
158
+ return EvalSet(
159
+ eval_set_id=eval_set_id,
160
+ name=eval_set_id,
161
+ creation_timestamp=time.time(),
162
+ eval_cases=eval_cases,
163
+ )
164
+
165
+
166
+ def load_eval_set_from_file(
167
+ eval_set_file_path: str, eval_set_id: str
168
+ ) -> EvalSet:
169
+ """Returns an EvalSet that is read from the given file."""
170
+ with open(eval_set_file_path, "r", encoding="utf-8") as f:
171
+ content = f.read()
172
+ try:
173
+ return EvalSet.model_validate_json(content)
174
+ except ValidationError:
175
+ # We assume that the eval data was specified in the old format and try
176
+ # to convert it to the new format.
177
+ return convert_eval_set_to_pydanctic_schema(
178
+ eval_set_id, json.loads(content)
179
+ )
180
+
181
+
182
+ class LocalEvalSetsManager(EvalSetsManager):
183
+ """An EvalSets manager that stores eval sets locally on disk."""
184
+
185
+ def __init__(self, agents_dir: str):
186
+ self._agents_dir = agents_dir
187
+
188
+ @override
189
+ def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
190
+ """Returns an EvalSet identified by an app_name and eval_set_id."""
191
+ # Load the eval set file data
192
+ eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
193
+ return load_eval_set_from_file(eval_set_file_path, eval_set_id)
194
+
195
+ @override
196
+ def create_eval_set(self, app_name: str, eval_set_id: str):
197
+ """Creates an empty EvalSet given the app_name and eval_set_id."""
198
+ self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)
199
+
200
+ # Define the file path
201
+ new_eval_set_path = self._get_eval_set_file_path(app_name, eval_set_id)
202
+
203
+ logger.info("Creating eval set file `%s`", new_eval_set_path)
204
+
205
+ if not os.path.exists(new_eval_set_path):
206
+ # Write the JSON string to the file
207
+ logger.info("Eval set file doesn't exist, we will create a new one.")
208
+ new_eval_set = EvalSet(
209
+ eval_set_id=eval_set_id,
210
+ name=eval_set_id,
211
+ eval_cases=[],
212
+ creation_timestamp=time.time(),
213
+ )
214
+ self._write_eval_set(new_eval_set_path, new_eval_set)
215
+
216
+ @override
217
+ def list_eval_sets(self, app_name: str) -> list[str]:
218
+ """Returns a list of EvalSets that belong to the given app_name."""
219
+ eval_set_file_path = os.path.join(self._agents_dir, app_name)
220
+ eval_sets = []
221
+ for file in os.listdir(eval_set_file_path):
222
+ if file.endswith(_EVAL_SET_FILE_EXTENSION):
223
+ eval_sets.append(
224
+ os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION)
225
+ )
226
+
227
+ return sorted(eval_sets)
228
+
229
+ @override
230
+ def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
231
+ """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
232
+ eval_case_id = eval_case.eval_id
233
+ self._validate_id(id_name="Eval Case Id", id_value=eval_case_id)
234
+
235
+ eval_set = self.get_eval_set(app_name, eval_set_id)
236
+
237
+ if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]:
238
+ raise ValueError(
239
+ f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`"
240
+ " eval set.",
241
+ )
242
+
243
+ eval_set.eval_cases.append(eval_case)
244
+
245
+ eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
246
+ self._write_eval_set(eval_set_file_path, eval_set)
247
+
248
+ def _get_eval_set_file_path(self, app_name: str, eval_set_id: str) -> str:
249
+ return os.path.join(
250
+ self._agents_dir,
251
+ app_name,
252
+ eval_set_id + _EVAL_SET_FILE_EXTENSION,
253
+ )
254
+
255
+ def _validate_id(self, id_name: str, id_value: str):
256
+ pattern = r"^[a-zA-Z0-9_]+$"
257
+ if not bool(re.fullmatch(pattern, id_value)):
258
+ raise ValueError(
259
+ f"Invalid {id_name}. {id_name} should have the `{pattern}` format",
260
+ )
261
+
262
+ def _write_eval_set(self, eval_set_path: str, eval_set: EvalSet):
263
+ with open(eval_set_path, "w") as f:
264
+ f.write(eval_set.model_dump_json(indent=2))
@@ -13,17 +13,122 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from typing import Any
16
+ from typing import Optional
16
17
 
18
+ from deprecated import deprecated
19
+ from google.genai import types as genai_types
17
20
  import pandas as pd
18
21
  from tabulate import tabulate
22
+ from typing_extensions import override
19
23
  from vertexai.preview.evaluation import EvalTask
20
24
  from vertexai.preview.evaluation import MetricPromptTemplateExamples
21
25
 
26
+ from .eval_case import IntermediateData
27
+ from .eval_case import Invocation
28
+ from .evaluator import EvalStatus
29
+ from .evaluator import EvaluationResult
30
+ from .evaluator import Evaluator
31
+ from .evaluator import PerInvocationResult
22
32
 
23
- class ResponseEvaluator:
33
+
34
+ class ResponseEvaluator(Evaluator):
24
35
  """Runs response evaluation for agents."""
25
36
 
37
+ def __init__(self, threshold: float, metric_name: str):
38
+ if "response_evaluation_score" == metric_name:
39
+ self._metric_name = MetricPromptTemplateExamples.Pointwise.COHERENCE
40
+ elif "response_match_score" == metric_name:
41
+ self._metric_name = "rouge_1"
42
+ else:
43
+ raise ValueError(f"`{metric_name}` is not supported.")
44
+
45
+ self._threshold = threshold
46
+
47
+ @override
48
+ def evaluate_invocations(
49
+ self,
50
+ actual_invocations: list[Invocation],
51
+ expected_invocations: list[Invocation],
52
+ ) -> EvaluationResult:
53
+ total_score = 0.0
54
+ num_invocations = 0
55
+ per_invocation_results = []
56
+ for actual, expected in zip(actual_invocations, expected_invocations):
57
+ prompt = self._get_text(expected.user_content)
58
+ reference = self._get_text(expected.final_response)
59
+ response = self._get_text(actual.final_response)
60
+ actual_tool_use = self._get_tool_use_trajectory(actual.intermediate_data)
61
+ reference_trajectory = self._get_tool_use_trajectory(
62
+ expected.intermediate_data
63
+ )
64
+
65
+ eval_case = {
66
+ "prompt": prompt,
67
+ "reference": reference,
68
+ "response": response,
69
+ "actual_tool_user": actual_tool_use,
70
+ "reference_trajectory": reference_trajectory,
71
+ }
72
+
73
+ eval_case_result = ResponseEvaluator._perform_eval(
74
+ pd.DataFrame([eval_case]), [self._metric_name]
75
+ )
76
+ score = self._get_score(eval_case_result)
77
+ per_invocation_results.append(
78
+ PerInvocationResult(
79
+ actual_invocation=actual,
80
+ expected_invocation=expected,
81
+ score=score,
82
+ eval_status=self._get_eval_status(score),
83
+ )
84
+ )
85
+ total_score += score
86
+ num_invocations += 1
87
+
88
+ if per_invocation_results:
89
+ overall_score = total_score / num_invocations
90
+ return EvaluationResult(
91
+ overall_score=overall_score,
92
+ overall_eval_status=self._get_eval_status(overall_score),
93
+ per_invocation_results=per_invocation_results,
94
+ )
95
+
96
+ return EvaluationResult()
97
+
98
+ def _get_text(self, content: Optional[genai_types.Content]) -> str:
99
+ if content and content.parts:
100
+ return "\n".join([p.text for p in content.parts if p.text])
101
+
102
+ return ""
103
+
104
+ def _get_tool_use_trajectory(
105
+ self, intermediate_data: Optional[IntermediateData]
106
+ ) -> list[dict[str, Any]]:
107
+ tool_use_trajectory = []
108
+ if not intermediate_data:
109
+ return tool_use_trajectory
110
+
111
+ for function_call in intermediate_data.tool_uses:
112
+ tool_use_trajectory.append({
113
+ "tool_name": function_call.name,
114
+ "tool_input": function_call.args or {},
115
+ })
116
+
117
+ return tool_use_trajectory
118
+
119
+ def _get_score(self, eval_result) -> float:
120
+ return eval_result.summary_metrics[f"{self._metric_name}/mean"].item()
121
+
122
+ def _get_eval_status(self, score: float):
123
+ return EvalStatus.PASSED if score >= self._threshold else EvalStatus.FAILED
124
+
26
125
  @staticmethod
126
+ @deprecated(
127
+ reason=(
128
+ "This method has been deprecated and will be removed soon. Please use"
129
+ " evaluate_invocations instead."
130
+ )
131
+ )
27
132
  def evaluate(
28
133
  raw_eval_dataset: list[list[dict[str, Any]]],
29
134
  evaluation_criteria: list[str],
@@ -13,17 +13,98 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from typing import Any
16
+ from typing import cast
16
17
 
18
+ from deprecated import deprecated
19
+ from google.genai import types as genai_types
17
20
  import pandas as pd
18
21
  from tabulate import tabulate
22
+ from typing_extensions import override
19
23
 
24
+ from .eval_case import Invocation
20
25
  from .evaluation_constants import EvalConstants
26
+ from .evaluator import EvalStatus
27
+ from .evaluator import EvaluationResult
28
+ from .evaluator import Evaluator
29
+ from .evaluator import PerInvocationResult
21
30
 
22
31
 
23
- class TrajectoryEvaluator:
32
+ class TrajectoryEvaluator(Evaluator):
24
33
  """Evaluates tool use trajectories for accuracy."""
25
34
 
35
+ def __init__(self, threshold: float):
36
+ self._threshold = threshold
37
+
38
+ @override
39
+ def evaluate_invocations(
40
+ self,
41
+ actual_invocations: list[Invocation],
42
+ expected_invocations: list[Invocation],
43
+ ) -> EvaluationResult:
44
+ """Returns EvaluationResult after performing evaluations using actual and expected invocations."""
45
+ total_tool_use_accuracy = 0.0
46
+ num_invocations = 0
47
+ per_invocation_results = []
48
+
49
+ for actual, expected in zip(actual_invocations, expected_invocations):
50
+ actual_tool_uses = (
51
+ actual.intermediate_data.tool_uses if actual.intermediate_data else []
52
+ )
53
+ expected_tool_uses = (
54
+ expected.intermediate_data.tool_uses
55
+ if expected.intermediate_data
56
+ else []
57
+ )
58
+ tool_use_accuracy = (
59
+ 1.0
60
+ if self._are_tool_calls_equal(actual_tool_uses, expected_tool_uses)
61
+ else 0.0
62
+ )
63
+ per_invocation_results.append(
64
+ PerInvocationResult(
65
+ actual_invocation=actual,
66
+ expected_invocation=expected,
67
+ score=tool_use_accuracy,
68
+ eval_status=self._get_eval_status(tool_use_accuracy),
69
+ )
70
+ )
71
+ total_tool_use_accuracy += tool_use_accuracy
72
+ num_invocations += 1
73
+
74
+ if per_invocation_results:
75
+ overall_score = total_tool_use_accuracy / num_invocations
76
+ return EvaluationResult(
77
+ overall_score=overall_score,
78
+ overall_eval_status=self._get_eval_status(overall_score),
79
+ per_invocation_results=per_invocation_results,
80
+ )
81
+
82
+ return EvaluationResult()
83
+
84
+ def _are_tool_calls_equal(
85
+ self,
86
+ actual_tool_calls: list[genai_types.FunctionCall],
87
+ expected_tool_calls: list[genai_types.FunctionCall],
88
+ ) -> bool:
89
+ if len(actual_tool_calls) != len(expected_tool_calls):
90
+ return False
91
+
92
+ for actual, expected in zip(actual_tool_calls, expected_tool_calls):
93
+ if actual.name != expected.name or actual.args != expected.args:
94
+ return False
95
+
96
+ return True
97
+
98
+ def _get_eval_status(self, score: float):
99
+ return EvalStatus.PASSED if score >= self._threshold else EvalStatus.FAILED
100
+
26
101
  @staticmethod
102
+ @deprecated(
103
+ reason=(
104
+ "This method has been deprecated and will be removed soon. Please use"
105
+ " evaluate_invocations instead."
106
+ )
107
+ )
27
108
  def evaluate(
28
109
  eval_dataset: list[list[dict[str, Any]]],
29
110
  *,
@@ -35,7 +116,7 @@ class TrajectoryEvaluator:
35
116
  tool use trajectories. An exact match scores a 1, 0 otherwise. The final
36
117
  number is an average of these individual scores.
37
118
 
38
- Value range: [0, 1], where 0 is means none of the too use entries aligned,
119
+ Value range: [0, 1], where 0 means none of the tool use entries aligned,
39
120
  and 1 would mean all of them aligned. Higher value is good.
40
121
 
41
122
  Args:
@@ -137,6 +218,7 @@ class TrajectoryEvaluator:
137
218
  return new_row, failure
138
219
 
139
220
  @staticmethod
221
+ @deprecated()
140
222
  def are_tools_equal(list_a_original, list_b_original):
141
223
  # Remove other entries that we don't want to evaluate
142
224
  list_a = [
@@ -19,6 +19,7 @@ import string
19
19
  from typing import Optional
20
20
 
21
21
  from google.genai import types
22
+ from pydantic import alias_generators
22
23
  from pydantic import ConfigDict
23
24
  from pydantic import Field
24
25
 
@@ -46,7 +47,11 @@ class Event(LlmResponse):
46
47
  """
47
48
 
48
49
  model_config = ConfigDict(
49
- extra='forbid', ser_json_bytes='base64', val_json_bytes='base64'
50
+ extra='forbid',
51
+ ser_json_bytes='base64',
52
+ val_json_bytes='base64',
53
+ alias_generator=alias_generators.to_camel,
54
+ populate_by_name=True,
50
55
  )
51
56
  """The pydantic model config."""
52
57
 
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  from typing import Optional
18
18
 
19
+ from pydantic import alias_generators
19
20
  from pydantic import BaseModel
20
21
  from pydantic import ConfigDict
21
22
  from pydantic import Field
@@ -26,7 +27,11 @@ from ..auth.auth_tool import AuthConfig
26
27
  class EventActions(BaseModel):
27
28
  """Represents the actions attached to an event."""
28
29
 
29
- model_config = ConfigDict(extra='forbid')
30
+ model_config = ConfigDict(
31
+ extra='forbid',
32
+ alias_generator=alias_generators.to_camel,
33
+ populate_by_name=True,
34
+ )
30
35
  """The pydantic model config."""
31
36
 
32
37
  skip_summarization: Optional[bool] = None
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import abc
16
+
16
17
  from .example import Example
17
18
 
18
19
 
@@ -15,8 +15,9 @@
15
15
  """Utility functions for converting examples to a string that can be used in system instructions in the prompt."""
16
16
 
17
17
  import logging
18
- from typing import Optional, Union
18
+ from typing import Optional
19
19
  from typing import TYPE_CHECKING
20
+ from typing import Union
20
21
 
21
22
  from .base_example_provider import BaseExampleProvider
22
23
  from .example import Example
@@ -24,7 +25,7 @@ from .example import Example
24
25
  if TYPE_CHECKING:
25
26
  from ..sessions.session import Session
26
27
 
27
- logger = logging.getLogger(__name__)
28
+ logger = logging.getLogger("google_adk." + __name__)
28
29
 
29
30
  # Constant parts of the example string
30
31
  _EXAMPLES_INTRO = (
@@ -22,7 +22,6 @@ import dataclasses
22
22
  import os
23
23
  import re
24
24
  from typing import AsyncGenerator
25
- from typing import Generator
26
25
  from typing import Optional
27
26
  from typing import TYPE_CHECKING
28
27
 
@@ -31,6 +30,7 @@ from typing_extensions import override
31
30
 
32
31
  from ...agents.invocation_context import InvocationContext
33
32
  from ...code_executors.base_code_executor import BaseCodeExecutor
33
+ from ...code_executors.built_in_code_executor import BuiltInCodeExecutor
34
34
  from ...code_executors.code_execution_utils import CodeExecutionInput
35
35
  from ...code_executors.code_execution_utils import CodeExecutionResult
36
36
  from ...code_executors.code_execution_utils import CodeExecutionUtils
@@ -174,6 +174,11 @@ async def _run_pre_processor(
174
174
 
175
175
  if not code_executor or not isinstance(code_executor, BaseCodeExecutor):
176
176
  return
177
+
178
+ if isinstance(code_executor, BuiltInCodeExecutor):
179
+ code_executor.process_llm_request(llm_request)
180
+ return
181
+
177
182
  if not code_executor.optimize_data_file:
178
183
  return
179
184
 
@@ -262,6 +267,9 @@ async def _run_post_processor(
262
267
  if not llm_response or not llm_response.content:
263
268
  return
264
269
 
270
+ if isinstance(code_executor, BuiltInCodeExecutor):
271
+ return
272
+
265
273
  code_executor_context = CodeExecutorContext(invocation_context.session.state)
266
274
  # Skip if the error count exceeds the max retry attempts.
267
275
  if (
@@ -25,8 +25,9 @@ if TYPE_CHECKING:
25
25
  class AudioTranscriber:
26
26
  """Transcribes audio using Google Cloud Speech-to-Text."""
27
27
 
28
- def __init__(self):
29
- self.client = speech.SpeechClient()
28
+ def __init__(self, init_client=False):
29
+ if init_client:
30
+ self.client = speech.SpeechClient()
30
31
 
31
32
  def transcribe_file(
32
33
  self, invocation_context: InvocationContext
@@ -84,7 +85,7 @@ class AudioTranscriber:
84
85
 
85
86
  # Step2: transcription
86
87
  for speaker, data in bundled_audio:
87
- if speaker == 'user':
88
+ if isinstance(data, genai_types.Blob):
88
89
  audio = speech.RecognitionAudio(content=data)
89
90
 
90
91
  config = speech.RecognitionConfig(