google-adk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. google/adk/__init__.py +20 -0
  2. google/adk/agents/__init__.py +32 -0
  3. google/adk/agents/active_streaming_tool.py +38 -0
  4. google/adk/agents/base_agent.py +345 -0
  5. google/adk/agents/callback_context.py +112 -0
  6. google/adk/agents/invocation_context.py +181 -0
  7. google/adk/agents/langgraph_agent.py +140 -0
  8. google/adk/agents/live_request_queue.py +64 -0
  9. google/adk/agents/llm_agent.py +376 -0
  10. google/adk/agents/loop_agent.py +62 -0
  11. google/adk/agents/parallel_agent.py +96 -0
  12. google/adk/agents/readonly_context.py +46 -0
  13. google/adk/agents/remote_agent.py +50 -0
  14. google/adk/agents/run_config.py +87 -0
  15. google/adk/agents/sequential_agent.py +45 -0
  16. google/adk/agents/transcription_entry.py +34 -0
  17. google/adk/artifacts/__init__.py +23 -0
  18. google/adk/artifacts/base_artifact_service.py +128 -0
  19. google/adk/artifacts/gcs_artifact_service.py +195 -0
  20. google/adk/artifacts/in_memory_artifact_service.py +133 -0
  21. google/adk/auth/__init__.py +22 -0
  22. google/adk/auth/auth_credential.py +220 -0
  23. google/adk/auth/auth_handler.py +268 -0
  24. google/adk/auth/auth_preprocessor.py +116 -0
  25. google/adk/auth/auth_schemes.py +67 -0
  26. google/adk/auth/auth_tool.py +55 -0
  27. google/adk/cli/__init__.py +15 -0
  28. google/adk/cli/__main__.py +18 -0
  29. google/adk/cli/agent_graph.py +122 -0
  30. google/adk/cli/browser/adk_favicon.svg +17 -0
  31. google/adk/cli/browser/assets/audio-processor.js +51 -0
  32. google/adk/cli/browser/assets/config/runtime-config.json +3 -0
  33. google/adk/cli/browser/index.html +33 -0
  34. google/adk/cli/browser/main-XUU6OGCC.js +75 -0
  35. google/adk/cli/browser/polyfills-FFHMD2TL.js +18 -0
  36. google/adk/cli/browser/styles-4VDSPQ37.css +17 -0
  37. google/adk/cli/cli.py +181 -0
  38. google/adk/cli/cli_deploy.py +181 -0
  39. google/adk/cli/cli_eval.py +282 -0
  40. google/adk/cli/cli_tools_click.py +479 -0
  41. google/adk/cli/fast_api.py +774 -0
  42. google/adk/cli/media_streamer/__init__.py +19 -0
  43. google/adk/cli/media_streamer/index.html +228 -0
  44. google/adk/cli/utils/__init__.py +49 -0
  45. google/adk/cli/utils/envs.py +57 -0
  46. google/adk/cli/utils/evals.py +93 -0
  47. google/adk/cli/utils/logs.py +72 -0
  48. google/adk/code_executors/__init__.py +49 -0
  49. google/adk/code_executors/base_code_executor.py +97 -0
  50. google/adk/code_executors/code_execution_utils.py +256 -0
  51. google/adk/code_executors/code_executor_context.py +202 -0
  52. google/adk/code_executors/container_code_executor.py +196 -0
  53. google/adk/code_executors/unsafe_local_code_executor.py +71 -0
  54. google/adk/code_executors/vertex_ai_code_executor.py +234 -0
  55. google/adk/evaluation/__init__.py +31 -0
  56. google/adk/evaluation/agent_evaluator.py +329 -0
  57. google/adk/evaluation/evaluation_constants.py +24 -0
  58. google/adk/evaluation/evaluation_generator.py +270 -0
  59. google/adk/evaluation/response_evaluator.py +135 -0
  60. google/adk/evaluation/trajectory_evaluator.py +184 -0
  61. google/adk/events/__init__.py +21 -0
  62. google/adk/events/event.py +130 -0
  63. google/adk/events/event_actions.py +55 -0
  64. google/adk/examples/__init__.py +28 -0
  65. google/adk/examples/base_example_provider.py +35 -0
  66. google/adk/examples/example.py +27 -0
  67. google/adk/examples/example_util.py +123 -0
  68. google/adk/examples/vertex_ai_example_store.py +104 -0
  69. google/adk/flows/__init__.py +14 -0
  70. google/adk/flows/llm_flows/__init__.py +20 -0
  71. google/adk/flows/llm_flows/_base_llm_processor.py +52 -0
  72. google/adk/flows/llm_flows/_code_execution.py +458 -0
  73. google/adk/flows/llm_flows/_nl_planning.py +129 -0
  74. google/adk/flows/llm_flows/agent_transfer.py +132 -0
  75. google/adk/flows/llm_flows/audio_transcriber.py +109 -0
  76. google/adk/flows/llm_flows/auto_flow.py +49 -0
  77. google/adk/flows/llm_flows/base_llm_flow.py +559 -0
  78. google/adk/flows/llm_flows/basic.py +72 -0
  79. google/adk/flows/llm_flows/contents.py +370 -0
  80. google/adk/flows/llm_flows/functions.py +486 -0
  81. google/adk/flows/llm_flows/identity.py +47 -0
  82. google/adk/flows/llm_flows/instructions.py +137 -0
  83. google/adk/flows/llm_flows/single_flow.py +57 -0
  84. google/adk/memory/__init__.py +35 -0
  85. google/adk/memory/base_memory_service.py +74 -0
  86. google/adk/memory/in_memory_memory_service.py +62 -0
  87. google/adk/memory/vertex_ai_rag_memory_service.py +177 -0
  88. google/adk/models/__init__.py +31 -0
  89. google/adk/models/anthropic_llm.py +243 -0
  90. google/adk/models/base_llm.py +87 -0
  91. google/adk/models/base_llm_connection.py +76 -0
  92. google/adk/models/gemini_llm_connection.py +200 -0
  93. google/adk/models/google_llm.py +331 -0
  94. google/adk/models/lite_llm.py +673 -0
  95. google/adk/models/llm_request.py +98 -0
  96. google/adk/models/llm_response.py +111 -0
  97. google/adk/models/registry.py +102 -0
  98. google/adk/planners/__init__.py +23 -0
  99. google/adk/planners/base_planner.py +66 -0
  100. google/adk/planners/built_in_planner.py +75 -0
  101. google/adk/planners/plan_re_act_planner.py +208 -0
  102. google/adk/runners.py +456 -0
  103. google/adk/sessions/__init__.py +41 -0
  104. google/adk/sessions/base_session_service.py +133 -0
  105. google/adk/sessions/database_session_service.py +522 -0
  106. google/adk/sessions/in_memory_session_service.py +206 -0
  107. google/adk/sessions/session.py +54 -0
  108. google/adk/sessions/state.py +71 -0
  109. google/adk/sessions/vertex_ai_session_service.py +356 -0
  110. google/adk/telemetry.py +189 -0
  111. google/adk/tests/__init__.py +14 -0
  112. google/adk/tests/integration/.env.example +10 -0
  113. google/adk/tests/integration/__init__.py +18 -0
  114. google/adk/tests/integration/conftest.py +119 -0
  115. google/adk/tests/integration/fixture/__init__.py +14 -0
  116. google/adk/tests/integration/fixture/agent_with_config/__init__.py +15 -0
  117. google/adk/tests/integration/fixture/agent_with_config/agent.py +88 -0
  118. google/adk/tests/integration/fixture/callback_agent/__init__.py +15 -0
  119. google/adk/tests/integration/fixture/callback_agent/agent.py +105 -0
  120. google/adk/tests/integration/fixture/context_update_test/OWNERS +1 -0
  121. google/adk/tests/integration/fixture/context_update_test/__init__.py +15 -0
  122. google/adk/tests/integration/fixture/context_update_test/agent.py +43 -0
  123. google/adk/tests/integration/fixture/context_update_test/successful_test.session.json +582 -0
  124. google/adk/tests/integration/fixture/context_variable_agent/__init__.py +15 -0
  125. google/adk/tests/integration/fixture/context_variable_agent/agent.py +115 -0
  126. google/adk/tests/integration/fixture/customer_support_ma/__init__.py +15 -0
  127. google/adk/tests/integration/fixture/customer_support_ma/agent.py +172 -0
  128. google/adk/tests/integration/fixture/ecommerce_customer_service_agent/__init__.py +15 -0
  129. google/adk/tests/integration/fixture/ecommerce_customer_service_agent/agent.py +338 -0
  130. google/adk/tests/integration/fixture/ecommerce_customer_service_agent/order_query.test.json +69 -0
  131. google/adk/tests/integration/fixture/ecommerce_customer_service_agent/test_config.json +6 -0
  132. google/adk/tests/integration/fixture/flow_complex_spark/__init__.py +15 -0
  133. google/adk/tests/integration/fixture/flow_complex_spark/agent.py +182 -0
  134. google/adk/tests/integration/fixture/flow_complex_spark/sample.debug.log +243 -0
  135. google/adk/tests/integration/fixture/flow_complex_spark/sample.session.json +190 -0
  136. google/adk/tests/integration/fixture/hello_world_agent/__init__.py +15 -0
  137. google/adk/tests/integration/fixture/hello_world_agent/agent.py +95 -0
  138. google/adk/tests/integration/fixture/hello_world_agent/roll_die.test.json +24 -0
  139. google/adk/tests/integration/fixture/hello_world_agent/test_config.json +6 -0
  140. google/adk/tests/integration/fixture/home_automation_agent/__init__.py +15 -0
  141. google/adk/tests/integration/fixture/home_automation_agent/agent.py +304 -0
  142. google/adk/tests/integration/fixture/home_automation_agent/simple_test.test.json +5 -0
  143. google/adk/tests/integration/fixture/home_automation_agent/simple_test2.test.json +5 -0
  144. google/adk/tests/integration/fixture/home_automation_agent/test_config.json +5 -0
  145. google/adk/tests/integration/fixture/home_automation_agent/test_files/dependent_tool_calls.test.json +18 -0
  146. google/adk/tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json +17 -0
  147. google/adk/tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/test_config.json +6 -0
  148. google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_multi_turn_conversation.test.json +18 -0
  149. google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_test.test.json +17 -0
  150. google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_test2.test.json +5 -0
  151. google/adk/tests/integration/fixture/home_automation_agent/test_files/test_config.json +5 -0
  152. google/adk/tests/integration/fixture/tool_agent/__init__.py +15 -0
  153. google/adk/tests/integration/fixture/tool_agent/agent.py +218 -0
  154. google/adk/tests/integration/fixture/tool_agent/files/Agent_test_plan.pdf +0 -0
  155. google/adk/tests/integration/fixture/trip_planner_agent/__init__.py +15 -0
  156. google/adk/tests/integration/fixture/trip_planner_agent/agent.py +110 -0
  157. google/adk/tests/integration/fixture/trip_planner_agent/initial.session.json +13 -0
  158. google/adk/tests/integration/fixture/trip_planner_agent/test_config.json +5 -0
  159. google/adk/tests/integration/fixture/trip_planner_agent/test_files/initial.session.json +13 -0
  160. google/adk/tests/integration/fixture/trip_planner_agent/test_files/test_config.json +5 -0
  161. google/adk/tests/integration/fixture/trip_planner_agent/test_files/trip_inquiry_sub_agent.test.json +7 -0
  162. google/adk/tests/integration/fixture/trip_planner_agent/trip_inquiry.test.json +19 -0
  163. google/adk/tests/integration/models/__init__.py +14 -0
  164. google/adk/tests/integration/models/test_google_llm.py +65 -0
  165. google/adk/tests/integration/test_callback.py +70 -0
  166. google/adk/tests/integration/test_context_variable.py +67 -0
  167. google/adk/tests/integration/test_evalute_agent_in_fixture.py +76 -0
  168. google/adk/tests/integration/test_multi_agent.py +28 -0
  169. google/adk/tests/integration/test_multi_turn.py +42 -0
  170. google/adk/tests/integration/test_single_agent.py +23 -0
  171. google/adk/tests/integration/test_sub_agent.py +26 -0
  172. google/adk/tests/integration/test_system_instruction.py +177 -0
  173. google/adk/tests/integration/test_tools.py +287 -0
  174. google/adk/tests/integration/test_with_test_file.py +34 -0
  175. google/adk/tests/integration/tools/__init__.py +14 -0
  176. google/adk/tests/integration/utils/__init__.py +16 -0
  177. google/adk/tests/integration/utils/asserts.py +75 -0
  178. google/adk/tests/integration/utils/test_runner.py +97 -0
  179. google/adk/tests/unittests/__init__.py +14 -0
  180. google/adk/tests/unittests/agents/__init__.py +14 -0
  181. google/adk/tests/unittests/agents/test_base_agent.py +407 -0
  182. google/adk/tests/unittests/agents/test_langgraph_agent.py +191 -0
  183. google/adk/tests/unittests/agents/test_llm_agent_callbacks.py +138 -0
  184. google/adk/tests/unittests/agents/test_llm_agent_fields.py +231 -0
  185. google/adk/tests/unittests/agents/test_loop_agent.py +136 -0
  186. google/adk/tests/unittests/agents/test_parallel_agent.py +92 -0
  187. google/adk/tests/unittests/agents/test_sequential_agent.py +114 -0
  188. google/adk/tests/unittests/artifacts/__init__.py +14 -0
  189. google/adk/tests/unittests/artifacts/test_artifact_service.py +276 -0
  190. google/adk/tests/unittests/auth/test_auth_handler.py +575 -0
  191. google/adk/tests/unittests/conftest.py +73 -0
  192. google/adk/tests/unittests/fast_api/__init__.py +14 -0
  193. google/adk/tests/unittests/fast_api/test_fast_api.py +269 -0
  194. google/adk/tests/unittests/flows/__init__.py +14 -0
  195. google/adk/tests/unittests/flows/llm_flows/__init__.py +14 -0
  196. google/adk/tests/unittests/flows/llm_flows/_test_examples.py +142 -0
  197. google/adk/tests/unittests/flows/llm_flows/test_agent_transfer.py +311 -0
  198. google/adk/tests/unittests/flows/llm_flows/test_functions_long_running.py +244 -0
  199. google/adk/tests/unittests/flows/llm_flows/test_functions_request_euc.py +346 -0
  200. google/adk/tests/unittests/flows/llm_flows/test_functions_sequential.py +93 -0
  201. google/adk/tests/unittests/flows/llm_flows/test_functions_simple.py +258 -0
  202. google/adk/tests/unittests/flows/llm_flows/test_identity.py +66 -0
  203. google/adk/tests/unittests/flows/llm_flows/test_instructions.py +164 -0
  204. google/adk/tests/unittests/flows/llm_flows/test_model_callbacks.py +142 -0
  205. google/adk/tests/unittests/flows/llm_flows/test_other_configs.py +46 -0
  206. google/adk/tests/unittests/flows/llm_flows/test_tool_callbacks.py +269 -0
  207. google/adk/tests/unittests/models/__init__.py +14 -0
  208. google/adk/tests/unittests/models/test_google_llm.py +224 -0
  209. google/adk/tests/unittests/models/test_litellm.py +804 -0
  210. google/adk/tests/unittests/models/test_models.py +60 -0
  211. google/adk/tests/unittests/sessions/__init__.py +14 -0
  212. google/adk/tests/unittests/sessions/test_session_service.py +227 -0
  213. google/adk/tests/unittests/sessions/test_vertex_ai_session_service.py +246 -0
  214. google/adk/tests/unittests/streaming/__init__.py +14 -0
  215. google/adk/tests/unittests/streaming/test_streaming.py +50 -0
  216. google/adk/tests/unittests/tools/__init__.py +14 -0
  217. google/adk/tests/unittests/tools/apihub_tool/clients/test_apihub_client.py +499 -0
  218. google/adk/tests/unittests/tools/apihub_tool/test_apihub_toolset.py +204 -0
  219. google/adk/tests/unittests/tools/application_integration_tool/clients/test_connections_client.py +600 -0
  220. google/adk/tests/unittests/tools/application_integration_tool/clients/test_integration_client.py +630 -0
  221. google/adk/tests/unittests/tools/application_integration_tool/test_application_integration_toolset.py +345 -0
  222. google/adk/tests/unittests/tools/google_api_tool/__init__.py +13 -0
  223. google/adk/tests/unittests/tools/google_api_tool/test_googleapi_to_openapi_converter.py +657 -0
  224. google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_auto_auth_credential_exchanger.py +145 -0
  225. google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_base_auth_credential_exchanger.py +68 -0
  226. google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_oauth2_exchanger.py +153 -0
  227. google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_service_account_exchanger.py +196 -0
  228. google/adk/tests/unittests/tools/openapi_tool/auth/test_auth_helper.py +573 -0
  229. google/adk/tests/unittests/tools/openapi_tool/common/test_common.py +436 -0
  230. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test.yaml +1367 -0
  231. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_openapi_spec_parser.py +628 -0
  232. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_openapi_toolset.py +139 -0
  233. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_operation_parser.py +406 -0
  234. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_rest_api_tool.py +966 -0
  235. google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_tool_auth_handler.py +201 -0
  236. google/adk/tests/unittests/tools/retrieval/__init__.py +14 -0
  237. google/adk/tests/unittests/tools/retrieval/test_vertex_ai_rag_retrieval.py +147 -0
  238. google/adk/tests/unittests/tools/test_agent_tool.py +167 -0
  239. google/adk/tests/unittests/tools/test_base_tool.py +141 -0
  240. google/adk/tests/unittests/tools/test_build_function_declaration.py +277 -0
  241. google/adk/tests/unittests/utils.py +304 -0
  242. google/adk/tools/__init__.py +51 -0
  243. google/adk/tools/_automatic_function_calling_util.py +346 -0
  244. google/adk/tools/agent_tool.py +176 -0
  245. google/adk/tools/apihub_tool/__init__.py +19 -0
  246. google/adk/tools/apihub_tool/apihub_toolset.py +209 -0
  247. google/adk/tools/apihub_tool/clients/__init__.py +13 -0
  248. google/adk/tools/apihub_tool/clients/apihub_client.py +332 -0
  249. google/adk/tools/apihub_tool/clients/secret_client.py +115 -0
  250. google/adk/tools/application_integration_tool/__init__.py +19 -0
  251. google/adk/tools/application_integration_tool/application_integration_toolset.py +230 -0
  252. google/adk/tools/application_integration_tool/clients/connections_client.py +903 -0
  253. google/adk/tools/application_integration_tool/clients/integration_client.py +253 -0
  254. google/adk/tools/base_tool.py +144 -0
  255. google/adk/tools/built_in_code_execution_tool.py +59 -0
  256. google/adk/tools/crewai_tool.py +72 -0
  257. google/adk/tools/example_tool.py +62 -0
  258. google/adk/tools/exit_loop_tool.py +23 -0
  259. google/adk/tools/function_parameter_parse_util.py +307 -0
  260. google/adk/tools/function_tool.py +87 -0
  261. google/adk/tools/get_user_choice_tool.py +28 -0
  262. google/adk/tools/google_api_tool/__init__.py +14 -0
  263. google/adk/tools/google_api_tool/google_api_tool.py +59 -0
  264. google/adk/tools/google_api_tool/google_api_tool_set.py +107 -0
  265. google/adk/tools/google_api_tool/google_api_tool_sets.py +55 -0
  266. google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +521 -0
  267. google/adk/tools/google_search_tool.py +68 -0
  268. google/adk/tools/langchain_tool.py +86 -0
  269. google/adk/tools/load_artifacts_tool.py +113 -0
  270. google/adk/tools/load_memory_tool.py +58 -0
  271. google/adk/tools/load_web_page.py +41 -0
  272. google/adk/tools/long_running_tool.py +39 -0
  273. google/adk/tools/mcp_tool/__init__.py +42 -0
  274. google/adk/tools/mcp_tool/conversion_utils.py +161 -0
  275. google/adk/tools/mcp_tool/mcp_tool.py +113 -0
  276. google/adk/tools/mcp_tool/mcp_toolset.py +272 -0
  277. google/adk/tools/openapi_tool/__init__.py +21 -0
  278. google/adk/tools/openapi_tool/auth/__init__.py +19 -0
  279. google/adk/tools/openapi_tool/auth/auth_helpers.py +498 -0
  280. google/adk/tools/openapi_tool/auth/credential_exchangers/__init__.py +25 -0
  281. google/adk/tools/openapi_tool/auth/credential_exchangers/auto_auth_credential_exchanger.py +105 -0
  282. google/adk/tools/openapi_tool/auth/credential_exchangers/base_credential_exchanger.py +55 -0
  283. google/adk/tools/openapi_tool/auth/credential_exchangers/oauth2_exchanger.py +117 -0
  284. google/adk/tools/openapi_tool/auth/credential_exchangers/service_account_exchanger.py +97 -0
  285. google/adk/tools/openapi_tool/common/__init__.py +19 -0
  286. google/adk/tools/openapi_tool/common/common.py +300 -0
  287. google/adk/tools/openapi_tool/openapi_spec_parser/__init__.py +32 -0
  288. google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py +231 -0
  289. google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +144 -0
  290. google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +260 -0
  291. google/adk/tools/openapi_tool/openapi_spec_parser/rest_api_tool.py +496 -0
  292. google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +268 -0
  293. google/adk/tools/preload_memory_tool.py +72 -0
  294. google/adk/tools/retrieval/__init__.py +36 -0
  295. google/adk/tools/retrieval/base_retrieval_tool.py +37 -0
  296. google/adk/tools/retrieval/files_retrieval.py +33 -0
  297. google/adk/tools/retrieval/llama_index_retrieval.py +41 -0
  298. google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +107 -0
  299. google/adk/tools/tool_context.py +90 -0
  300. google/adk/tools/toolbox_tool.py +46 -0
  301. google/adk/tools/transfer_to_agent_tool.py +21 -0
  302. google/adk/tools/vertex_ai_search_tool.py +96 -0
  303. google/adk/version.py +16 -0
  304. google_adk-0.0.1.dist-info/LICENSE.txt → google_adk-0.0.2.dist-info/LICENSE +32 -0
  305. google_adk-0.0.2.dist-info/METADATA +73 -0
  306. google_adk-0.0.2.dist-info/RECORD +308 -0
  307. {google_adk-0.0.1.dist-info → google_adk-0.0.2.dist-info}/WHEEL +1 -2
  308. google_adk-0.0.2.dist-info/entry_points.txt +3 -0
  309. agent_kit/__init__.py +0 -0
  310. google_adk-0.0.1.dist-info/METADATA +0 -15
  311. google_adk-0.0.1.dist-info/RECORD +0 -6
  312. google_adk-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,71 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from contextlib import redirect_stdout
16
+ import io
17
+
18
+ from pydantic import Field
19
+ from typing_extensions import override
20
+
21
+ from ..agents.invocation_context import InvocationContext
22
+ from .base_code_executor import BaseCodeExecutor
23
+ from .code_execution_utils import CodeExecutionInput
24
+ from .code_execution_utils import CodeExecutionResult
25
+
26
+
27
+ class UnsafeLocalCodeExecutor(BaseCodeExecutor):
28
+ """A code executor that unsafely execute code in the current local context."""
29
+
30
+ # Overrides the BaseCodeExecutor attribute: this executor cannot be stateful.
31
+ stateful: bool = Field(default=False, frozen=True, exclude=True)
32
+
33
+ # Overrides the BaseCodeExecutor attribute: this executor cannot
34
+ # optimize_data_file.
35
+ optimize_data_file: bool = Field(default=False, frozen=True, exclude=True)
36
+
37
+ def __init__(self, **data):
38
+ """Initializes the UnsafeLocalCodeExecutor."""
39
+ if 'stateful' in data and data['stateful']:
40
+ raise ValueError('Cannot set `stateful=True` in UnsafeLocalCodeExecutor.')
41
+ if 'optimize_data_file' in data and data['optimize_data_file']:
42
+ raise ValueError(
43
+ 'Cannot set `optimize_data_file=True` in UnsafeLocalCodeExecutor.'
44
+ )
45
+ super().__init__(**data)
46
+
47
+ @override
48
+ def execute_code(
49
+ self,
50
+ invocation_context: InvocationContext,
51
+ code_execution_input: CodeExecutionInput,
52
+ ) -> CodeExecutionResult:
53
+ # Execute the code.
54
+ output = ''
55
+ error = ''
56
+ try:
57
+ globals_ = {}
58
+ locals_ = {}
59
+ stdout = io.StringIO()
60
+ with redirect_stdout(stdout):
61
+ exec(code_execution_input.code, globals_, locals_)
62
+ output = stdout.getvalue()
63
+ except Exception as e:
64
+ error = str(e)
65
+
66
+ # Collect the final result.
67
+ return CodeExecutionResult(
68
+ stdout=output,
69
+ stderr=error,
70
+ output_files=[],
71
+ )
@@ -0,0 +1,234 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import datetime
16
+ import mimetypes
17
+ import os
18
+ from typing import Any, Optional
19
+
20
+ from typing_extensions import override
21
+ from vertexai.preview.extensions import Extension
22
+
23
+ from ..agents.invocation_context import InvocationContext
24
+ from .base_code_executor import BaseCodeExecutor
25
+ from .code_execution_utils import CodeExecutionInput
26
+ from .code_execution_utils import CodeExecutionResult
27
+ from .code_execution_utils import File
28
+
29
+ _SUPPORTED_IMAGE_TYPES = ['png', 'jpg', 'jpeg']
30
+ _SUPPORTED_DATA_FILE_TYPES = ['csv']
31
+
32
+ _IMPORTED_LIBRARIES = '''
33
+ import io
34
+ import math
35
+ import re
36
+
37
+ import matplotlib.pyplot as plt
38
+ import numpy as np
39
+ import pandas as pd
40
+ import scipy
41
+
42
+ def crop(s: str, max_chars: int = 64) -> str:
43
+ """Crops a string to max_chars characters."""
44
+ return s[: max_chars - 3] + '...' if len(s) > max_chars else s
45
+
46
+
47
+ def explore_df(df: pd.DataFrame) -> None:
48
+ """Prints some information about a pandas DataFrame."""
49
+
50
+ with pd.option_context(
51
+ 'display.max_columns', None, 'display.expand_frame_repr', False
52
+ ):
53
+ # Print the column names to never encounter KeyError when selecting one.
54
+ df_dtypes = df.dtypes
55
+
56
+ # Obtain information about data types and missing values.
57
+ df_nulls = (len(df) - df.isnull().sum()).apply(
58
+ lambda x: f'{x} / {df.shape[0]} non-null'
59
+ )
60
+
61
+ # Explore unique total values in columns using `.unique()`.
62
+ df_unique_count = df.apply(lambda x: len(x.unique()))
63
+
64
+ # Explore unique values in columns using `.unique()`.
65
+ df_unique = df.apply(lambda x: crop(str(list(x.unique()))))
66
+
67
+ df_info = pd.concat(
68
+ (
69
+ df_dtypes.rename('Dtype'),
70
+ df_nulls.rename('Non-Null Count'),
71
+ df_unique_count.rename('Unique Values Count'),
72
+ df_unique.rename('Unique Values'),
73
+ ),
74
+ axis=1,
75
+ )
76
+ df_info.index.name = 'Columns'
77
+ print(f"""Total rows: {df.shape[0]}
78
+ Total columns: {df.shape[1]}
79
+
80
+ {df_info}""")
81
+ '''
82
+
83
+
84
+ def _get_code_interpreter_extension(resource_name: str = None):
85
+ """Returns: Load or create the code interpreter extension."""
86
+ if not resource_name:
87
+ resource_name = os.environ.get('CODE_INTERPRETER_EXTENSION_NAME')
88
+ if resource_name:
89
+ new_code_interpreter = Extension(resource_name)
90
+ else:
91
+ print('No CODE_INTERPRETER_ID found in the environment. Create a new one.')
92
+ new_code_interpreter = Extension.from_hub('code_interpreter')
93
+ os.environ['CODE_INTERPRETER_EXTENSION_NAME'] = (
94
+ new_code_interpreter.gca_resource.name
95
+ )
96
+ return new_code_interpreter
97
+
98
+
99
+ class VertexAiCodeExecutor(BaseCodeExecutor):
100
+ """A code executor that uses Vertex Code Interpreter Extension to execute code.
101
+
102
+ Attributes:
103
+ resource_name: If set, load the existing resource name of the code
104
+ interpreter extension instead of creating a new one. Format:
105
+ projects/123/locations/us-central1/extensions/456
106
+ """
107
+
108
+ resource_name: str = None
109
+ """
110
+ If set, load the existing resource name of the code interpreter extension
111
+ instead of creating a new one.
112
+ Format: projects/123/locations/us-central1/extensions/456
113
+ """
114
+
115
+ _code_interpreter_extension: Extension
116
+
117
+ def __init__(
118
+ self,
119
+ resource_name: str = None,
120
+ **data,
121
+ ):
122
+ """Initializes the VertexAiCodeExecutor.
123
+
124
+ Args:
125
+ resource_name: If set, load the existing resource name of the code
126
+ interpreter extension instead of creating a new one. Format:
127
+ projects/123/locations/us-central1/extensions/456
128
+ **data: Additional keyword arguments to be passed to the base class.
129
+ """
130
+ super().__init__(**data)
131
+ self.resource_name = resource_name
132
+ self._code_interpreter_extension = _get_code_interpreter_extension(
133
+ self.resource_name
134
+ )
135
+
136
+ @override
137
+ def execute_code(
138
+ self,
139
+ invocation_context: InvocationContext,
140
+ code_execution_input: CodeExecutionInput,
141
+ ) -> CodeExecutionResult:
142
+ # Execute the code.
143
+ code_execution_result = self._execute_code_interpreter(
144
+ self._get_code_with_imports(code_execution_input.code),
145
+ code_execution_input.input_files,
146
+ code_execution_input.execution_id,
147
+ )
148
+
149
+ # Save output file as artifacts.
150
+ current_timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
151
+ file_name_prefix = '%s_' % str(current_timestamp)
152
+ saved_files = []
153
+ file_count = 0
154
+ for output_file in code_execution_result['output_files']:
155
+ file_type = output_file['name'].split('.')[-1]
156
+ file_name = file_name_prefix + '%d.%s' % (file_count, file_type)
157
+ if file_type in _SUPPORTED_IMAGE_TYPES:
158
+ file_count += 1
159
+ saved_files.append(
160
+ File(
161
+ name='plot_' + file_name,
162
+ content=output_file['contents'],
163
+ mime_type=f'image/{file_type}',
164
+ )
165
+ )
166
+ elif file_type in _SUPPORTED_DATA_FILE_TYPES:
167
+ file_count += 1
168
+ saved_files.append(
169
+ File(
170
+ name='data_' + file_name,
171
+ content=output_file['contents'],
172
+ mime_type=f'text/{file_type}',
173
+ )
174
+ )
175
+ else:
176
+ mime_type, _ = mimetypes.guess_type(file_name)
177
+ saved_files.append(
178
+ File(
179
+ name=file_name,
180
+ content=output_file['contents'],
181
+ mime_type=mime_type,
182
+ )
183
+ )
184
+
185
+ # Collect the final result.
186
+ return CodeExecutionResult(
187
+ stdout=code_execution_result.get('execution_result', ''),
188
+ stderr=code_execution_result.get('execution_error', ''),
189
+ output_files=saved_files,
190
+ )
191
+
192
+ def _execute_code_interpreter(
193
+ self,
194
+ code: str,
195
+ input_files: Optional[list[File]] = None,
196
+ session_id: Optional[str] = None,
197
+ ) -> dict[str, Any]:
198
+ """Executes the code interpreter extension.
199
+
200
+ Args:
201
+ code: The code to execute.
202
+ input_files: The input files to execute the code with.
203
+ session_id: The session ID to execute the code with.
204
+
205
+ Returns:
206
+ The response from the code interpreter extension.
207
+ """
208
+ operation_params = {'code': code}
209
+ if input_files:
210
+ operation_params['files'] = [
211
+ {'name': f.name, 'contents': f.content} for f in input_files
212
+ ]
213
+ if session_id:
214
+ operation_params['session_id'] = session_id
215
+ response = self._code_interpreter_extension.execute(
216
+ operation_id='execute',
217
+ operation_params=operation_params,
218
+ )
219
+ return response
220
+
221
+ def _get_code_with_imports(self, code: str) -> str:
222
+ """Builds the code string with built-in imports.
223
+
224
+ Args:
225
+ code: The code to execute.
226
+
227
+ Returns:
228
+ The code string with built-in imports.
229
+ """
230
+ return f"""
231
+ {_IMPORTED_LIBRARIES}
232
+
233
+ {code}
234
+ """
@@ -0,0 +1,31 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ __all__ = []
20
+
21
+ try:
22
+ from .agent_evaluator import AgentEvaluator
23
+
24
+ __all__.append('AgentEvaluator')
25
+ except ImportError:
26
+ logger.debug(
27
+ 'The Vertex[eval] sdk is not installed. If you want to use the Vertex'
28
+ ' Evaluation with agents, please install it(pip install'
29
+ ' "google-cloud-aiplatform[evaluation]). If not, you can ignore this'
30
+ ' warning.'
31
+ )
@@ -0,0 +1,329 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import os
17
+ from os import path
18
+ from typing import Dict
19
+ from typing import List
20
+ from typing import Union
21
+
22
+ from .evaluation_generator import EvaluationGenerator
23
+ from .response_evaluator import ResponseEvaluator
24
+ from .trajectory_evaluator import TrajectoryEvaluator
25
+
26
+ # Constants for default runs and evaluation criteria
27
+ NUM_RUNS = 2
28
+ TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score"
29
+ # This evaluation is not very stable.
30
+ # This is always optional unless explicitly specified.
31
+ RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score"
32
+ RESPONSE_MATCH_SCORE_KEY = "response_match_score"
33
+
34
+ ALLOWED_CRITERIA = [
35
+ TOOL_TRAJECTORY_SCORE_KEY,
36
+ RESPONSE_EVALUATION_SCORE_KEY,
37
+ RESPONSE_MATCH_SCORE_KEY,
38
+ ]
39
+
40
+
41
+ QUERY_COLUMN = "query"
42
+ REFERENCE_COLUMN = "reference"
43
+ EXPECTED_TOOL_USE_COLUMN = "expected_tool_use"
44
+
45
+
46
+ DEFAULT_CRITERIA = {
47
+ TOOL_TRAJECTORY_SCORE_KEY: 1.0, # 1-point scale; 1.0 is perfect.
48
+ RESPONSE_MATCH_SCORE_KEY: 0.8, # Rouge-1 text match; 0.8 is default.
49
+ }
50
+
51
+
52
+ def load_json(file_path: str) -> Union[Dict, List]:
53
+ with open(file_path, "r") as f:
54
+ return json.load(f)
55
+
56
+
57
+ class AgentEvaluator:
58
+ """An evaluator for Agents, mainly intented for helping with test cases."""
59
+
60
+ @staticmethod
61
+ def find_config_for_test_file(test_file: str):
62
+ """Find the test_config.json file in the same folder as the test file."""
63
+ test_folder = os.path.dirname(test_file)
64
+ config_path = os.path.join(test_folder, "test_config.json")
65
+ if os.path.exists(config_path):
66
+ config_data = load_json(config_path)
67
+ if "criteria" in config_data and isinstance(
68
+ config_data["criteria"], dict
69
+ ):
70
+ return config_data["criteria"]
71
+ else:
72
+ raise ValueError(
73
+ f"Invalid format for test_config.json at {config_path}. Expected a"
74
+ " 'criteria' dictionary."
75
+ )
76
+ return DEFAULT_CRITERIA
77
+
78
+ @staticmethod
79
+ def evaluate(
80
+ agent_module,
81
+ eval_dataset_file_path_or_dir,
82
+ num_runs=NUM_RUNS,
83
+ agent_name=None,
84
+ initial_session_file=None,
85
+ ):
86
+ """Evaluates an Agent given eval data.
87
+
88
+ Args:
89
+ agent_module: The path to python module that contains the definition of
90
+ the agent. There is convention in place here, where the code is going to
91
+ look for 'root_agent' in the loaded module.
92
+ eval_dataset: The eval data set. This can be either a string representing
93
+ full path to the file containing eval dataset, or a directory that is
94
+ recusively explored for all files that have a `.test.json` suffix.
95
+ num_runs: Number of times all entries in the eval dataset should be
96
+ assessed.
97
+ agent_name: The name of the agent.
98
+ initial_session_file: File that contains initial session state that is
99
+ needed by all the evals in the eval dataset.
100
+ """
101
+ test_files = []
102
+ if isinstance(eval_dataset_file_path_or_dir, str) and os.path.isdir(
103
+ eval_dataset_file_path_or_dir
104
+ ):
105
+ for root, _, files in os.walk(eval_dataset_file_path_or_dir):
106
+ for file in files:
107
+ if file.endswith(".test.json"):
108
+ test_files.append(path.join(root, file))
109
+ else:
110
+ test_files = [eval_dataset_file_path_or_dir]
111
+
112
+ initial_session_state = {}
113
+ if initial_session_file:
114
+ with open(initial_session_file, "r") as f:
115
+ initial_session_state = json.loads(f.read())["state"]
116
+
117
+ for test_file in test_files:
118
+ dataset = AgentEvaluator._load_dataset(test_file)[0]
119
+ criteria = AgentEvaluator.find_config_for_test_file(test_file)
120
+
121
+ AgentEvaluator._validate_input([dataset], criteria)
122
+
123
+ evaluation_response = AgentEvaluator._generate_responses(
124
+ agent_module,
125
+ [dataset],
126
+ num_runs,
127
+ agent_name=agent_name,
128
+ initial_session={"state": initial_session_state},
129
+ )
130
+
131
+ if AgentEvaluator._response_evaluation_required(criteria, [dataset]):
132
+ AgentEvaluator._evaluate_response_scores(
133
+ agent_module, evaluation_response, criteria
134
+ )
135
+
136
+ if AgentEvaluator._trajectory_evaluation_required(criteria, [dataset]):
137
+ AgentEvaluator._evaluate_tool_trajectory(
138
+ agent_module, evaluation_response, criteria
139
+ )
140
+
141
+ @staticmethod
142
+ def _load_dataset(
143
+ input_data: Union[str, List[str], List[Dict], List[List[Dict]]],
144
+ ) -> List[List[Dict]]:
145
+ def load_json_file(file_path: str) -> List[Dict]:
146
+ data = load_json(file_path)
147
+ if not isinstance(data, list) or not all(
148
+ isinstance(d, dict) for d in data
149
+ ):
150
+ raise ValueError(f"{file_path} must contain a list of dictionaries.")
151
+ return data
152
+
153
+ if isinstance(input_data, str):
154
+ if os.path.isdir(input_data):
155
+ test_files = []
156
+ for root, _, files in os.walk(input_data):
157
+ for file in files:
158
+ if file.endswith(".test.json"):
159
+ test_files.append(os.path.join(root, file))
160
+ return [load_json_file(f) for f in test_files]
161
+ elif os.path.isfile(input_data):
162
+ return [load_json_file(input_data)]
163
+ else:
164
+ raise ValueError(f"Input path {input_data} is invalid.")
165
+ elif isinstance(input_data, list):
166
+ if all(isinstance(i, str) and os.path.isfile(i) for i in input_data):
167
+ return [load_json_file(i) for i in input_data]
168
+ raise TypeError("Input list must contain valid file paths.")
169
+ raise TypeError("Invalid input type for dataset loading.")
170
+
171
+ @staticmethod
172
+ def _validate_input(eval_dataset, criteria):
173
+ """Validates that the evaluation criteria align with the provided dataset.
174
+
175
+ For efficiency, we only use first row to validate input.
176
+ """
177
+ if not eval_dataset:
178
+ raise ValueError("The evaluation dataset is None or empty.")
179
+
180
+ for key in criteria:
181
+ if key not in ALLOWED_CRITERIA:
182
+ raise ValueError(
183
+ f"Invalid criteria key: {key}. Expected one of {ALLOWED_CRITERIA}."
184
+ )
185
+
186
+ if not eval_dataset:
187
+ raise ValueError("The evaluation dataset is empty.")
188
+ sample = eval_dataset[0]
189
+ first_query = sample[0]
190
+
191
+ if not isinstance(sample, list) and not isinstance(first_query, dict):
192
+ raise ValueError(
193
+ "Each evaluation dataset sample must be list of dictionary. But it's"
194
+ f" {eval_dataset}"
195
+ )
196
+
197
+ if TOOL_TRAJECTORY_SCORE_KEY in criteria:
198
+ if (
199
+ QUERY_COLUMN not in first_query
200
+ or EXPECTED_TOOL_USE_COLUMN not in first_query
201
+ ):
202
+ raise ValueError(
203
+ f"Samples for {TOOL_TRAJECTORY_SCORE_KEY} must include"
204
+ f" '{QUERY_COLUMN}' and '{EXPECTED_TOOL_USE_COLUMN}' keys. The"
205
+ f" sample is {sample}."
206
+ )
207
+
208
+ if RESPONSE_EVALUATION_SCORE_KEY in criteria:
209
+ if QUERY_COLUMN not in first_query:
210
+ raise ValueError(
211
+ f"Samples for {RESPONSE_EVALUATION_SCORE_KEY} must include"
212
+ f" '{QUERY_COLUMN}' key. The sample is {sample}."
213
+ )
214
+
215
+ if RESPONSE_MATCH_SCORE_KEY in criteria:
216
+ if QUERY_COLUMN not in first_query or REFERENCE_COLUMN not in first_query:
217
+ raise ValueError(
218
+ f"Samples for {RESPONSE_MATCH_SCORE_KEY} must include"
219
+ f" '{QUERY_COLUMN}' and '{REFERENCE_COLUMN}' keys. The sample is"
220
+ f" {sample}."
221
+ )
222
+
223
+ @staticmethod
224
+ def _get_infer_criteria(eval_dataset):
225
+ """Infers evaluation criteria based on the provided dataset.
226
+
227
+ Args:
228
+ eval_dataset (list): A list of evaluation samples.
229
+
230
+ Returns:
231
+ dict: Inferred evaluation criteria based on dataset fields.
232
+ """
233
+ inferred_criteria = {}
234
+ sample = eval_dataset[0][0]
235
+
236
+ if QUERY_COLUMN in sample and EXPECTED_TOOL_USE_COLUMN in sample:
237
+ inferred_criteria[TOOL_TRAJECTORY_SCORE_KEY] = DEFAULT_CRITERIA[
238
+ TOOL_TRAJECTORY_SCORE_KEY
239
+ ]
240
+
241
+ if QUERY_COLUMN in sample and REFERENCE_COLUMN in sample:
242
+ inferred_criteria[RESPONSE_MATCH_SCORE_KEY] = DEFAULT_CRITERIA[
243
+ RESPONSE_MATCH_SCORE_KEY
244
+ ]
245
+
246
+ return inferred_criteria
247
+
248
+ @staticmethod
249
+ def _generate_responses(
250
+ agent_module, eval_dataset, num_runs, agent_name=None, initial_session={}
251
+ ):
252
+ """Generates evaluation responses by running the agent module multiple times."""
253
+ return EvaluationGenerator.generate_responses(
254
+ eval_dataset,
255
+ agent_module,
256
+ repeat_num=num_runs,
257
+ agent_name=agent_name,
258
+ initial_session=initial_session,
259
+ )
260
+
261
+ @staticmethod
262
+ def _generate_responses_from_session(eval_dataset, session_path):
263
+ """Generates evaluation responses by running the agent module multiple times."""
264
+ return EvaluationGenerator.generate_responses_from_session(
265
+ session_path, eval_dataset
266
+ )
267
+
268
+ @staticmethod
269
+ def _response_evaluation_required(criteria, eval_dataset):
270
+ """Checks if response evaluation are needed."""
271
+ return REFERENCE_COLUMN in eval_dataset[0][0] and any(
272
+ key in criteria
273
+ for key in [RESPONSE_EVALUATION_SCORE_KEY, RESPONSE_MATCH_SCORE_KEY]
274
+ )
275
+
276
+ @staticmethod
277
+ def _trajectory_evaluation_required(evaluation_criteria, eval_dataset):
278
+ """Checks if response evaluation are needed."""
279
+ return (
280
+ EXPECTED_TOOL_USE_COLUMN in eval_dataset[0][0]
281
+ and TOOL_TRAJECTORY_SCORE_KEY in evaluation_criteria
282
+ )
283
+
284
+ @staticmethod
285
+ def _evaluate_response_scores(agent_module, evaluation_response, criteria):
286
+ """Evaluates response scores and raises an assertion error if they don't meet the criteria."""
287
+ metrics = ResponseEvaluator.evaluate(
288
+ evaluation_response, criteria, print_detailed_results=True
289
+ )
290
+
291
+ AgentEvaluator._assert_score(
292
+ metrics,
293
+ "coherence/mean",
294
+ criteria.get(RESPONSE_EVALUATION_SCORE_KEY),
295
+ "Average response evaluation score",
296
+ agent_module,
297
+ )
298
+
299
+ AgentEvaluator._assert_score(
300
+ metrics,
301
+ "rouge_1/mean",
302
+ criteria.get(RESPONSE_MATCH_SCORE_KEY),
303
+ "Average response match score",
304
+ agent_module,
305
+ )
306
+
307
+ @staticmethod
308
+ def _evaluate_tool_trajectory(agent_module, evaluation_response, criteria):
309
+ """Evaluates tool trajectory scores and raises an assertion error if they don't meet the criteria."""
310
+ score = TrajectoryEvaluator.evaluate(
311
+ evaluation_response, print_detailed_results=True
312
+ )
313
+ AgentEvaluator._assert_score(
314
+ {TOOL_TRAJECTORY_SCORE_KEY: score},
315
+ TOOL_TRAJECTORY_SCORE_KEY,
316
+ criteria[TOOL_TRAJECTORY_SCORE_KEY],
317
+ "Average tool trajectory evaluation score",
318
+ agent_module,
319
+ )
320
+
321
+ @staticmethod
322
+ def _assert_score(metrics, metric_key, threshold, description, agent_module):
323
+ """Asserts that a metric meets the specified threshold."""
324
+ if metric_key in metrics:
325
+ actual_score = metrics[metric_key]
326
+ assert actual_score >= threshold, (
327
+ f"{description} for {agent_module} is lower than expected. "
328
+ f"Expected >= {threshold}, but got {actual_score}."
329
+ )
@@ -0,0 +1,24 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class EvalConstants:
16
+ """Holds constants for evaluation file constants."""
17
+
18
+ QUERY = "query"
19
+ EXPECTED_TOOL_USE = "expected_tool_use"
20
+ RESPONSE = "response"
21
+ REFERENCE = "reference"
22
+ TOOL_NAME = "tool_name"
23
+ TOOL_INPUT = "tool_input"
24
+ MOCK_TOOL_OUTPUT = "mock_tool_output"