google-adk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google/adk/__init__.py +20 -0
- google/adk/agents/__init__.py +32 -0
- google/adk/agents/active_streaming_tool.py +38 -0
- google/adk/agents/base_agent.py +345 -0
- google/adk/agents/callback_context.py +112 -0
- google/adk/agents/invocation_context.py +181 -0
- google/adk/agents/langgraph_agent.py +140 -0
- google/adk/agents/live_request_queue.py +64 -0
- google/adk/agents/llm_agent.py +376 -0
- google/adk/agents/loop_agent.py +62 -0
- google/adk/agents/parallel_agent.py +96 -0
- google/adk/agents/readonly_context.py +46 -0
- google/adk/agents/remote_agent.py +50 -0
- google/adk/agents/run_config.py +87 -0
- google/adk/agents/sequential_agent.py +45 -0
- google/adk/agents/transcription_entry.py +34 -0
- google/adk/artifacts/__init__.py +23 -0
- google/adk/artifacts/base_artifact_service.py +128 -0
- google/adk/artifacts/gcs_artifact_service.py +195 -0
- google/adk/artifacts/in_memory_artifact_service.py +133 -0
- google/adk/auth/__init__.py +22 -0
- google/adk/auth/auth_credential.py +220 -0
- google/adk/auth/auth_handler.py +268 -0
- google/adk/auth/auth_preprocessor.py +116 -0
- google/adk/auth/auth_schemes.py +67 -0
- google/adk/auth/auth_tool.py +55 -0
- google/adk/cli/__init__.py +15 -0
- google/adk/cli/__main__.py +18 -0
- google/adk/cli/agent_graph.py +122 -0
- google/adk/cli/browser/adk_favicon.svg +17 -0
- google/adk/cli/browser/assets/audio-processor.js +51 -0
- google/adk/cli/browser/assets/config/runtime-config.json +3 -0
- google/adk/cli/browser/index.html +33 -0
- google/adk/cli/browser/main-XUU6OGCC.js +75 -0
- google/adk/cli/browser/polyfills-FFHMD2TL.js +18 -0
- google/adk/cli/browser/styles-4VDSPQ37.css +17 -0
- google/adk/cli/cli.py +181 -0
- google/adk/cli/cli_deploy.py +181 -0
- google/adk/cli/cli_eval.py +282 -0
- google/adk/cli/cli_tools_click.py +479 -0
- google/adk/cli/fast_api.py +774 -0
- google/adk/cli/media_streamer/__init__.py +19 -0
- google/adk/cli/media_streamer/index.html +228 -0
- google/adk/cli/utils/__init__.py +49 -0
- google/adk/cli/utils/envs.py +57 -0
- google/adk/cli/utils/evals.py +93 -0
- google/adk/cli/utils/logs.py +72 -0
- google/adk/code_executors/__init__.py +49 -0
- google/adk/code_executors/base_code_executor.py +97 -0
- google/adk/code_executors/code_execution_utils.py +256 -0
- google/adk/code_executors/code_executor_context.py +202 -0
- google/adk/code_executors/container_code_executor.py +196 -0
- google/adk/code_executors/unsafe_local_code_executor.py +71 -0
- google/adk/code_executors/vertex_ai_code_executor.py +234 -0
- google/adk/evaluation/__init__.py +31 -0
- google/adk/evaluation/agent_evaluator.py +329 -0
- google/adk/evaluation/evaluation_constants.py +24 -0
- google/adk/evaluation/evaluation_generator.py +270 -0
- google/adk/evaluation/response_evaluator.py +135 -0
- google/adk/evaluation/trajectory_evaluator.py +184 -0
- google/adk/events/__init__.py +21 -0
- google/adk/events/event.py +130 -0
- google/adk/events/event_actions.py +55 -0
- google/adk/examples/__init__.py +28 -0
- google/adk/examples/base_example_provider.py +35 -0
- google/adk/examples/example.py +27 -0
- google/adk/examples/example_util.py +123 -0
- google/adk/examples/vertex_ai_example_store.py +104 -0
- google/adk/flows/__init__.py +14 -0
- google/adk/flows/llm_flows/__init__.py +20 -0
- google/adk/flows/llm_flows/_base_llm_processor.py +52 -0
- google/adk/flows/llm_flows/_code_execution.py +458 -0
- google/adk/flows/llm_flows/_nl_planning.py +129 -0
- google/adk/flows/llm_flows/agent_transfer.py +132 -0
- google/adk/flows/llm_flows/audio_transcriber.py +109 -0
- google/adk/flows/llm_flows/auto_flow.py +49 -0
- google/adk/flows/llm_flows/base_llm_flow.py +559 -0
- google/adk/flows/llm_flows/basic.py +72 -0
- google/adk/flows/llm_flows/contents.py +370 -0
- google/adk/flows/llm_flows/functions.py +486 -0
- google/adk/flows/llm_flows/identity.py +47 -0
- google/adk/flows/llm_flows/instructions.py +137 -0
- google/adk/flows/llm_flows/single_flow.py +57 -0
- google/adk/memory/__init__.py +35 -0
- google/adk/memory/base_memory_service.py +74 -0
- google/adk/memory/in_memory_memory_service.py +62 -0
- google/adk/memory/vertex_ai_rag_memory_service.py +177 -0
- google/adk/models/__init__.py +31 -0
- google/adk/models/anthropic_llm.py +243 -0
- google/adk/models/base_llm.py +87 -0
- google/adk/models/base_llm_connection.py +76 -0
- google/adk/models/gemini_llm_connection.py +200 -0
- google/adk/models/google_llm.py +331 -0
- google/adk/models/lite_llm.py +673 -0
- google/adk/models/llm_request.py +98 -0
- google/adk/models/llm_response.py +111 -0
- google/adk/models/registry.py +102 -0
- google/adk/planners/__init__.py +23 -0
- google/adk/planners/base_planner.py +66 -0
- google/adk/planners/built_in_planner.py +75 -0
- google/adk/planners/plan_re_act_planner.py +208 -0
- google/adk/runners.py +456 -0
- google/adk/sessions/__init__.py +41 -0
- google/adk/sessions/base_session_service.py +133 -0
- google/adk/sessions/database_session_service.py +522 -0
- google/adk/sessions/in_memory_session_service.py +206 -0
- google/adk/sessions/session.py +54 -0
- google/adk/sessions/state.py +71 -0
- google/adk/sessions/vertex_ai_session_service.py +356 -0
- google/adk/telemetry.py +189 -0
- google/adk/tests/__init__.py +14 -0
- google/adk/tests/integration/.env.example +10 -0
- google/adk/tests/integration/__init__.py +18 -0
- google/adk/tests/integration/conftest.py +119 -0
- google/adk/tests/integration/fixture/__init__.py +14 -0
- google/adk/tests/integration/fixture/agent_with_config/__init__.py +15 -0
- google/adk/tests/integration/fixture/agent_with_config/agent.py +88 -0
- google/adk/tests/integration/fixture/callback_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/callback_agent/agent.py +105 -0
- google/adk/tests/integration/fixture/context_update_test/OWNERS +1 -0
- google/adk/tests/integration/fixture/context_update_test/__init__.py +15 -0
- google/adk/tests/integration/fixture/context_update_test/agent.py +43 -0
- google/adk/tests/integration/fixture/context_update_test/successful_test.session.json +582 -0
- google/adk/tests/integration/fixture/context_variable_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/context_variable_agent/agent.py +115 -0
- google/adk/tests/integration/fixture/customer_support_ma/__init__.py +15 -0
- google/adk/tests/integration/fixture/customer_support_ma/agent.py +172 -0
- google/adk/tests/integration/fixture/ecommerce_customer_service_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/ecommerce_customer_service_agent/agent.py +338 -0
- google/adk/tests/integration/fixture/ecommerce_customer_service_agent/order_query.test.json +69 -0
- google/adk/tests/integration/fixture/ecommerce_customer_service_agent/test_config.json +6 -0
- google/adk/tests/integration/fixture/flow_complex_spark/__init__.py +15 -0
- google/adk/tests/integration/fixture/flow_complex_spark/agent.py +182 -0
- google/adk/tests/integration/fixture/flow_complex_spark/sample.debug.log +243 -0
- google/adk/tests/integration/fixture/flow_complex_spark/sample.session.json +190 -0
- google/adk/tests/integration/fixture/hello_world_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/hello_world_agent/agent.py +95 -0
- google/adk/tests/integration/fixture/hello_world_agent/roll_die.test.json +24 -0
- google/adk/tests/integration/fixture/hello_world_agent/test_config.json +6 -0
- google/adk/tests/integration/fixture/home_automation_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/home_automation_agent/agent.py +304 -0
- google/adk/tests/integration/fixture/home_automation_agent/simple_test.test.json +5 -0
- google/adk/tests/integration/fixture/home_automation_agent/simple_test2.test.json +5 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_config.json +5 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/dependent_tool_calls.test.json +18 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json +17 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/test_config.json +6 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_multi_turn_conversation.test.json +18 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_test.test.json +17 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/simple_test2.test.json +5 -0
- google/adk/tests/integration/fixture/home_automation_agent/test_files/test_config.json +5 -0
- google/adk/tests/integration/fixture/tool_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/tool_agent/agent.py +218 -0
- google/adk/tests/integration/fixture/tool_agent/files/Agent_test_plan.pdf +0 -0
- google/adk/tests/integration/fixture/trip_planner_agent/__init__.py +15 -0
- google/adk/tests/integration/fixture/trip_planner_agent/agent.py +110 -0
- google/adk/tests/integration/fixture/trip_planner_agent/initial.session.json +13 -0
- google/adk/tests/integration/fixture/trip_planner_agent/test_config.json +5 -0
- google/adk/tests/integration/fixture/trip_planner_agent/test_files/initial.session.json +13 -0
- google/adk/tests/integration/fixture/trip_planner_agent/test_files/test_config.json +5 -0
- google/adk/tests/integration/fixture/trip_planner_agent/test_files/trip_inquiry_sub_agent.test.json +7 -0
- google/adk/tests/integration/fixture/trip_planner_agent/trip_inquiry.test.json +19 -0
- google/adk/tests/integration/models/__init__.py +14 -0
- google/adk/tests/integration/models/test_google_llm.py +65 -0
- google/adk/tests/integration/test_callback.py +70 -0
- google/adk/tests/integration/test_context_variable.py +67 -0
- google/adk/tests/integration/test_evalute_agent_in_fixture.py +76 -0
- google/adk/tests/integration/test_multi_agent.py +28 -0
- google/adk/tests/integration/test_multi_turn.py +42 -0
- google/adk/tests/integration/test_single_agent.py +23 -0
- google/adk/tests/integration/test_sub_agent.py +26 -0
- google/adk/tests/integration/test_system_instruction.py +177 -0
- google/adk/tests/integration/test_tools.py +287 -0
- google/adk/tests/integration/test_with_test_file.py +34 -0
- google/adk/tests/integration/tools/__init__.py +14 -0
- google/adk/tests/integration/utils/__init__.py +16 -0
- google/adk/tests/integration/utils/asserts.py +75 -0
- google/adk/tests/integration/utils/test_runner.py +97 -0
- google/adk/tests/unittests/__init__.py +14 -0
- google/adk/tests/unittests/agents/__init__.py +14 -0
- google/adk/tests/unittests/agents/test_base_agent.py +407 -0
- google/adk/tests/unittests/agents/test_langgraph_agent.py +191 -0
- google/adk/tests/unittests/agents/test_llm_agent_callbacks.py +138 -0
- google/adk/tests/unittests/agents/test_llm_agent_fields.py +231 -0
- google/adk/tests/unittests/agents/test_loop_agent.py +136 -0
- google/adk/tests/unittests/agents/test_parallel_agent.py +92 -0
- google/adk/tests/unittests/agents/test_sequential_agent.py +114 -0
- google/adk/tests/unittests/artifacts/__init__.py +14 -0
- google/adk/tests/unittests/artifacts/test_artifact_service.py +276 -0
- google/adk/tests/unittests/auth/test_auth_handler.py +575 -0
- google/adk/tests/unittests/conftest.py +73 -0
- google/adk/tests/unittests/fast_api/__init__.py +14 -0
- google/adk/tests/unittests/fast_api/test_fast_api.py +269 -0
- google/adk/tests/unittests/flows/__init__.py +14 -0
- google/adk/tests/unittests/flows/llm_flows/__init__.py +14 -0
- google/adk/tests/unittests/flows/llm_flows/_test_examples.py +142 -0
- google/adk/tests/unittests/flows/llm_flows/test_agent_transfer.py +311 -0
- google/adk/tests/unittests/flows/llm_flows/test_functions_long_running.py +244 -0
- google/adk/tests/unittests/flows/llm_flows/test_functions_request_euc.py +346 -0
- google/adk/tests/unittests/flows/llm_flows/test_functions_sequential.py +93 -0
- google/adk/tests/unittests/flows/llm_flows/test_functions_simple.py +258 -0
- google/adk/tests/unittests/flows/llm_flows/test_identity.py +66 -0
- google/adk/tests/unittests/flows/llm_flows/test_instructions.py +164 -0
- google/adk/tests/unittests/flows/llm_flows/test_model_callbacks.py +142 -0
- google/adk/tests/unittests/flows/llm_flows/test_other_configs.py +46 -0
- google/adk/tests/unittests/flows/llm_flows/test_tool_callbacks.py +269 -0
- google/adk/tests/unittests/models/__init__.py +14 -0
- google/adk/tests/unittests/models/test_google_llm.py +224 -0
- google/adk/tests/unittests/models/test_litellm.py +804 -0
- google/adk/tests/unittests/models/test_models.py +60 -0
- google/adk/tests/unittests/sessions/__init__.py +14 -0
- google/adk/tests/unittests/sessions/test_session_service.py +227 -0
- google/adk/tests/unittests/sessions/test_vertex_ai_session_service.py +246 -0
- google/adk/tests/unittests/streaming/__init__.py +14 -0
- google/adk/tests/unittests/streaming/test_streaming.py +50 -0
- google/adk/tests/unittests/tools/__init__.py +14 -0
- google/adk/tests/unittests/tools/apihub_tool/clients/test_apihub_client.py +499 -0
- google/adk/tests/unittests/tools/apihub_tool/test_apihub_toolset.py +204 -0
- google/adk/tests/unittests/tools/application_integration_tool/clients/test_connections_client.py +600 -0
- google/adk/tests/unittests/tools/application_integration_tool/clients/test_integration_client.py +630 -0
- google/adk/tests/unittests/tools/application_integration_tool/test_application_integration_toolset.py +345 -0
- google/adk/tests/unittests/tools/google_api_tool/__init__.py +13 -0
- google/adk/tests/unittests/tools/google_api_tool/test_googleapi_to_openapi_converter.py +657 -0
- google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_auto_auth_credential_exchanger.py +145 -0
- google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_base_auth_credential_exchanger.py +68 -0
- google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_oauth2_exchanger.py +153 -0
- google/adk/tests/unittests/tools/openapi_tool/auth/credential_exchangers/test_service_account_exchanger.py +196 -0
- google/adk/tests/unittests/tools/openapi_tool/auth/test_auth_helper.py +573 -0
- google/adk/tests/unittests/tools/openapi_tool/common/test_common.py +436 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test.yaml +1367 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_openapi_spec_parser.py +628 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_openapi_toolset.py +139 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_operation_parser.py +406 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_rest_api_tool.py +966 -0
- google/adk/tests/unittests/tools/openapi_tool/openapi_spec_parser/test_tool_auth_handler.py +201 -0
- google/adk/tests/unittests/tools/retrieval/__init__.py +14 -0
- google/adk/tests/unittests/tools/retrieval/test_vertex_ai_rag_retrieval.py +147 -0
- google/adk/tests/unittests/tools/test_agent_tool.py +167 -0
- google/adk/tests/unittests/tools/test_base_tool.py +141 -0
- google/adk/tests/unittests/tools/test_build_function_declaration.py +277 -0
- google/adk/tests/unittests/utils.py +304 -0
- google/adk/tools/__init__.py +51 -0
- google/adk/tools/_automatic_function_calling_util.py +346 -0
- google/adk/tools/agent_tool.py +176 -0
- google/adk/tools/apihub_tool/__init__.py +19 -0
- google/adk/tools/apihub_tool/apihub_toolset.py +209 -0
- google/adk/tools/apihub_tool/clients/__init__.py +13 -0
- google/adk/tools/apihub_tool/clients/apihub_client.py +332 -0
- google/adk/tools/apihub_tool/clients/secret_client.py +115 -0
- google/adk/tools/application_integration_tool/__init__.py +19 -0
- google/adk/tools/application_integration_tool/application_integration_toolset.py +230 -0
- google/adk/tools/application_integration_tool/clients/connections_client.py +903 -0
- google/adk/tools/application_integration_tool/clients/integration_client.py +253 -0
- google/adk/tools/base_tool.py +144 -0
- google/adk/tools/built_in_code_execution_tool.py +59 -0
- google/adk/tools/crewai_tool.py +72 -0
- google/adk/tools/example_tool.py +62 -0
- google/adk/tools/exit_loop_tool.py +23 -0
- google/adk/tools/function_parameter_parse_util.py +307 -0
- google/adk/tools/function_tool.py +87 -0
- google/adk/tools/get_user_choice_tool.py +28 -0
- google/adk/tools/google_api_tool/__init__.py +14 -0
- google/adk/tools/google_api_tool/google_api_tool.py +59 -0
- google/adk/tools/google_api_tool/google_api_tool_set.py +107 -0
- google/adk/tools/google_api_tool/google_api_tool_sets.py +55 -0
- google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +521 -0
- google/adk/tools/google_search_tool.py +68 -0
- google/adk/tools/langchain_tool.py +86 -0
- google/adk/tools/load_artifacts_tool.py +113 -0
- google/adk/tools/load_memory_tool.py +58 -0
- google/adk/tools/load_web_page.py +41 -0
- google/adk/tools/long_running_tool.py +39 -0
- google/adk/tools/mcp_tool/__init__.py +42 -0
- google/adk/tools/mcp_tool/conversion_utils.py +161 -0
- google/adk/tools/mcp_tool/mcp_tool.py +113 -0
- google/adk/tools/mcp_tool/mcp_toolset.py +272 -0
- google/adk/tools/openapi_tool/__init__.py +21 -0
- google/adk/tools/openapi_tool/auth/__init__.py +19 -0
- google/adk/tools/openapi_tool/auth/auth_helpers.py +498 -0
- google/adk/tools/openapi_tool/auth/credential_exchangers/__init__.py +25 -0
- google/adk/tools/openapi_tool/auth/credential_exchangers/auto_auth_credential_exchanger.py +105 -0
- google/adk/tools/openapi_tool/auth/credential_exchangers/base_credential_exchanger.py +55 -0
- google/adk/tools/openapi_tool/auth/credential_exchangers/oauth2_exchanger.py +117 -0
- google/adk/tools/openapi_tool/auth/credential_exchangers/service_account_exchanger.py +97 -0
- google/adk/tools/openapi_tool/common/__init__.py +19 -0
- google/adk/tools/openapi_tool/common/common.py +300 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/__init__.py +32 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py +231 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +144 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +260 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/rest_api_tool.py +496 -0
- google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +268 -0
- google/adk/tools/preload_memory_tool.py +72 -0
- google/adk/tools/retrieval/__init__.py +36 -0
- google/adk/tools/retrieval/base_retrieval_tool.py +37 -0
- google/adk/tools/retrieval/files_retrieval.py +33 -0
- google/adk/tools/retrieval/llama_index_retrieval.py +41 -0
- google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +107 -0
- google/adk/tools/tool_context.py +90 -0
- google/adk/tools/toolbox_tool.py +46 -0
- google/adk/tools/transfer_to_agent_tool.py +21 -0
- google/adk/tools/vertex_ai_search_tool.py +96 -0
- google/adk/version.py +16 -0
- google_adk-0.0.1.dist-info/LICENSE.txt → google_adk-0.0.2.dist-info/LICENSE +32 -0
- google_adk-0.0.2.dist-info/METADATA +73 -0
- google_adk-0.0.2.dist-info/RECORD +308 -0
- {google_adk-0.0.1.dist-info → google_adk-0.0.2.dist-info}/WHEEL +1 -2
- google_adk-0.0.2.dist-info/entry_points.txt +3 -0
- agent_kit/__init__.py +0 -0
- google_adk-0.0.1.dist-info/METADATA +0 -15
- google_adk-0.0.1.dist-info/RECORD +0 -6
- google_adk-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,282 @@
|
|
1
|
+
# Copyright 2025 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from enum import Enum
|
16
|
+
import importlib.util
|
17
|
+
import json
|
18
|
+
import logging
|
19
|
+
import os
|
20
|
+
import sys
|
21
|
+
import traceback
|
22
|
+
from typing import Any
|
23
|
+
from typing import Generator
|
24
|
+
from typing import Optional
|
25
|
+
import uuid
|
26
|
+
|
27
|
+
from pydantic import BaseModel
|
28
|
+
|
29
|
+
from ..agents import Agent
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
|
34
|
+
class EvalStatus(Enum):
|
35
|
+
PASSED = 1
|
36
|
+
FAILED = 2
|
37
|
+
NOT_EVALUATED = 3
|
38
|
+
|
39
|
+
|
40
|
+
class EvalMetric(BaseModel):
|
41
|
+
metric_name: str
|
42
|
+
threshold: float
|
43
|
+
|
44
|
+
|
45
|
+
class EvalMetricResult(BaseModel):
|
46
|
+
score: Optional[float]
|
47
|
+
eval_status: EvalStatus
|
48
|
+
|
49
|
+
|
50
|
+
class EvalResult(BaseModel):
|
51
|
+
eval_set_file: str
|
52
|
+
eval_id: str
|
53
|
+
final_eval_status: EvalStatus
|
54
|
+
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
|
55
|
+
session_id: str
|
56
|
+
|
57
|
+
|
58
|
+
MISSING_EVAL_DEPENDENCIES_MESSAGE = (
|
59
|
+
"Eval module is not installed, please install via `pip install"
|
60
|
+
" google-adk[eval]`."
|
61
|
+
)
|
62
|
+
TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score"
|
63
|
+
RESPONSE_MATCH_SCORE_KEY = "response_match_score"
|
64
|
+
# This evaluation is not very stable.
|
65
|
+
# This is always optional unless explicitly specified.
|
66
|
+
RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score"
|
67
|
+
|
68
|
+
EVAL_SESSION_ID_PREFIX = "___eval___session___"
|
69
|
+
DEFAULT_CRITERIA = {
|
70
|
+
TOOL_TRAJECTORY_SCORE_KEY: 1.0, # 1-point scale; 1.0 is perfect.
|
71
|
+
RESPONSE_MATCH_SCORE_KEY: 0.8,
|
72
|
+
}
|
73
|
+
|
74
|
+
|
75
|
+
def _import_from_path(module_name, file_path):
|
76
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
77
|
+
module = importlib.util.module_from_spec(spec)
|
78
|
+
sys.modules[module_name] = module
|
79
|
+
spec.loader.exec_module(module)
|
80
|
+
return module
|
81
|
+
|
82
|
+
|
83
|
+
def _get_agent_module(agent_module_file_path: str):
|
84
|
+
file_path = os.path.join(agent_module_file_path, "__init__.py")
|
85
|
+
module_name = "agent"
|
86
|
+
return _import_from_path(module_name, file_path)
|
87
|
+
|
88
|
+
|
89
|
+
def get_evaluation_criteria_or_default(
|
90
|
+
eval_config_file_path: str,
|
91
|
+
) -> dict[str, float]:
|
92
|
+
"""Returns evaluation criteria from the config file, if present.
|
93
|
+
|
94
|
+
Otherwise a default one is returned.
|
95
|
+
"""
|
96
|
+
if eval_config_file_path:
|
97
|
+
with open(eval_config_file_path, "r", encoding="utf-8") as f:
|
98
|
+
config_data = json.load(f)
|
99
|
+
|
100
|
+
if "criteria" in config_data and isinstance(config_data["criteria"], dict):
|
101
|
+
evaluation_criteria = config_data["criteria"]
|
102
|
+
else:
|
103
|
+
raise ValueError(
|
104
|
+
f"Invalid format for test_config.json at {eval_config_file_path}."
|
105
|
+
" Expected a 'criteria' dictionary."
|
106
|
+
)
|
107
|
+
else:
|
108
|
+
logger.info("No config file supplied. Using default criteria.")
|
109
|
+
evaluation_criteria = DEFAULT_CRITERIA
|
110
|
+
|
111
|
+
return evaluation_criteria
|
112
|
+
|
113
|
+
|
114
|
+
def get_root_agent(agent_module_file_path: str) -> Agent:
|
115
|
+
"""Returns root agent given the agetn module."""
|
116
|
+
agent_module = _get_agent_module(agent_module_file_path)
|
117
|
+
root_agent = agent_module.agent.root_agent
|
118
|
+
return root_agent
|
119
|
+
|
120
|
+
|
121
|
+
def try_get_reset_func(agent_module_file_path: str) -> Any:
|
122
|
+
"""Returns reset function for the agent, if present, given the agetn module."""
|
123
|
+
agent_module = _get_agent_module(agent_module_file_path)
|
124
|
+
reset_func = getattr(agent_module.agent, "reset_data", None)
|
125
|
+
return reset_func
|
126
|
+
|
127
|
+
|
128
|
+
def parse_and_get_evals_to_run(
|
129
|
+
eval_set_file_path: tuple[str],
|
130
|
+
) -> dict[str, list[str]]:
|
131
|
+
"""Returns a dictionary of eval sets to evals that should be run."""
|
132
|
+
eval_set_to_evals = {}
|
133
|
+
for input_eval_set in eval_set_file_path:
|
134
|
+
evals = []
|
135
|
+
if ":" not in input_eval_set:
|
136
|
+
eval_set_file = input_eval_set
|
137
|
+
else:
|
138
|
+
eval_set_file = input_eval_set.split(":")[0]
|
139
|
+
evals = input_eval_set.split(":")[1].split(",")
|
140
|
+
|
141
|
+
if eval_set_file not in eval_set_to_evals:
|
142
|
+
eval_set_to_evals[eval_set_file] = []
|
143
|
+
|
144
|
+
eval_set_to_evals[eval_set_file].extend(evals)
|
145
|
+
|
146
|
+
return eval_set_to_evals
|
147
|
+
|
148
|
+
|
149
|
+
def run_evals(
|
150
|
+
eval_set_to_evals: dict[str, list[str]],
|
151
|
+
root_agent: Agent,
|
152
|
+
reset_func: Optional[Any],
|
153
|
+
eval_metrics: list[EvalMetric],
|
154
|
+
session_service=None,
|
155
|
+
artifact_service=None,
|
156
|
+
print_detailed_results=False,
|
157
|
+
) -> Generator[EvalResult, None, None]:
|
158
|
+
try:
|
159
|
+
from ..evaluation.agent_evaluator import EvaluationGenerator
|
160
|
+
from ..evaluation.response_evaluator import ResponseEvaluator
|
161
|
+
from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
|
162
|
+
except ModuleNotFoundError as e:
|
163
|
+
raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e
|
164
|
+
|
165
|
+
"""Returns a summary of eval runs."""
|
166
|
+
for eval_set_file, evals_to_run in eval_set_to_evals.items():
|
167
|
+
with open(eval_set_file, "r", encoding="utf-8") as file:
|
168
|
+
eval_items = json.load(file) # Load JSON into a list
|
169
|
+
|
170
|
+
assert eval_items, f"No eval data found in eval set file: {eval_set_file}"
|
171
|
+
|
172
|
+
for eval_item in eval_items:
|
173
|
+
eval_name = eval_item["name"]
|
174
|
+
eval_data = eval_item["data"]
|
175
|
+
initial_session = eval_item.get("initial_session", {})
|
176
|
+
|
177
|
+
if evals_to_run and eval_name not in evals_to_run:
|
178
|
+
continue
|
179
|
+
|
180
|
+
try:
|
181
|
+
print(f"Running Eval: {eval_set_file}:{eval_name}")
|
182
|
+
session_id = f"{EVAL_SESSION_ID_PREFIX}{str(uuid.uuid4())}"
|
183
|
+
|
184
|
+
scrape_result = EvaluationGenerator._process_query_with_root_agent(
|
185
|
+
data=eval_data,
|
186
|
+
root_agent=root_agent,
|
187
|
+
reset_func=reset_func,
|
188
|
+
initial_session=initial_session,
|
189
|
+
session_id=session_id,
|
190
|
+
session_service=session_service,
|
191
|
+
artifact_service=artifact_service,
|
192
|
+
)
|
193
|
+
|
194
|
+
eval_metric_results = []
|
195
|
+
for eval_metric in eval_metrics:
|
196
|
+
eval_metric_result = None
|
197
|
+
if eval_metric.metric_name == TOOL_TRAJECTORY_SCORE_KEY:
|
198
|
+
score = TrajectoryEvaluator.evaluate(
|
199
|
+
[scrape_result], print_detailed_results=print_detailed_results
|
200
|
+
)
|
201
|
+
eval_metric_result = _get_eval_metric_result(eval_metric, score)
|
202
|
+
elif eval_metric.metric_name == RESPONSE_MATCH_SCORE_KEY:
|
203
|
+
score = ResponseEvaluator.evaluate(
|
204
|
+
[scrape_result],
|
205
|
+
[RESPONSE_MATCH_SCORE_KEY],
|
206
|
+
print_detailed_results=print_detailed_results,
|
207
|
+
)
|
208
|
+
eval_metric_result = _get_eval_metric_result(
|
209
|
+
eval_metric, score["rouge_1/mean"].item()
|
210
|
+
)
|
211
|
+
elif eval_metric.metric_name == RESPONSE_EVALUATION_SCORE_KEY:
|
212
|
+
score = ResponseEvaluator.evaluate(
|
213
|
+
[scrape_result],
|
214
|
+
[RESPONSE_EVALUATION_SCORE_KEY],
|
215
|
+
print_detailed_results=print_detailed_results,
|
216
|
+
)
|
217
|
+
eval_metric_result = _get_eval_metric_result(
|
218
|
+
eval_metric, score["coherence/mean"].item()
|
219
|
+
)
|
220
|
+
else:
|
221
|
+
logger.warning("`%s` is not supported.", eval_metric.metric_name)
|
222
|
+
eval_metric_results.append((
|
223
|
+
eval_metric,
|
224
|
+
EvalMetricResult(eval_status=EvalStatus.NOT_EVALUATED),
|
225
|
+
))
|
226
|
+
|
227
|
+
eval_metric_results.append((
|
228
|
+
eval_metric,
|
229
|
+
eval_metric_result,
|
230
|
+
))
|
231
|
+
_print_eval_metric_result(eval_metric, eval_metric_result)
|
232
|
+
|
233
|
+
final_eval_status = EvalStatus.NOT_EVALUATED
|
234
|
+
|
235
|
+
# Go over the all the eval statuses and mark the final eval status as
|
236
|
+
# passed if all of them pass, otherwise mark the final eval status to
|
237
|
+
# failed.
|
238
|
+
for eval_metric_result in eval_metric_results:
|
239
|
+
eval_status = eval_metric_result[1].eval_status
|
240
|
+
if eval_status == EvalStatus.PASSED:
|
241
|
+
final_eval_status = EvalStatus.PASSED
|
242
|
+
elif eval_status == EvalStatus.NOT_EVALUATED:
|
243
|
+
continue
|
244
|
+
elif eval_status == EvalStatus.FAILED:
|
245
|
+
final_eval_status = EvalStatus.FAILED
|
246
|
+
break
|
247
|
+
else:
|
248
|
+
raise ValueError("Unknown eval status.")
|
249
|
+
|
250
|
+
yield EvalResult(
|
251
|
+
eval_set_file=eval_set_file,
|
252
|
+
eval_id=eval_name,
|
253
|
+
final_eval_status=final_eval_status,
|
254
|
+
eval_metric_results=eval_metric_results,
|
255
|
+
session_id=session_id,
|
256
|
+
)
|
257
|
+
|
258
|
+
if final_eval_status == EvalStatus.PASSED:
|
259
|
+
result = "✅ Passsed"
|
260
|
+
else:
|
261
|
+
result = "❌ Failed"
|
262
|
+
|
263
|
+
print(f"Result: {result}\n")
|
264
|
+
|
265
|
+
except Exception as e:
|
266
|
+
print(f"Error: {e}")
|
267
|
+
logger.info("Error: %s", str(traceback.format_exc()))
|
268
|
+
|
269
|
+
|
270
|
+
def _get_eval_metric_result(eval_metric, score):
|
271
|
+
eval_status = (
|
272
|
+
EvalStatus.PASSED if score >= eval_metric.threshold else EvalStatus.FAILED
|
273
|
+
)
|
274
|
+
return EvalMetricResult(score=score, eval_status=eval_status)
|
275
|
+
|
276
|
+
|
277
|
+
def _print_eval_metric_result(eval_metric, eval_metric_result):
|
278
|
+
print(
|
279
|
+
f"Metric: {eval_metric.metric_name}\tStatus:"
|
280
|
+
f" {eval_metric_result.eval_status}\tScore:"
|
281
|
+
f" {eval_metric_result.score}\tThreshold: {eval_metric.threshold}"
|
282
|
+
)
|