PyPI - google-adk - Versions diffs - 0.5.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

google-adk 0.5.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

google/adk/agents/base_agent.py +76 -30
google/adk/agents/callback_context.py +2 -6
google/adk/agents/llm_agent.py +122 -30
google/adk/agents/loop_agent.py +1 -1
google/adk/agents/parallel_agent.py +7 -0
google/adk/agents/readonly_context.py +8 -0
google/adk/agents/run_config.py +1 -1
google/adk/agents/sequential_agent.py +31 -0
google/adk/agents/transcription_entry.py +4 -2
google/adk/artifacts/gcs_artifact_service.py +1 -1
google/adk/artifacts/in_memory_artifact_service.py +1 -1
google/adk/auth/auth_credential.py +10 -2
google/adk/auth/auth_preprocessor.py +7 -1
google/adk/auth/auth_tool.py +3 -4
google/adk/cli/agent_graph.py +5 -5
google/adk/cli/browser/index.html +4 -4
google/adk/cli/browser/{main-ULN5R5I5.js → main-PKDNKWJE.js} +59 -60
google/adk/cli/browser/polyfills-B6TNHZQ6.js +17 -0
google/adk/cli/cli.py +10 -9
google/adk/cli/cli_deploy.py +7 -2
google/adk/cli/cli_eval.py +109 -115
google/adk/cli/cli_tools_click.py +179 -67
google/adk/cli/fast_api.py +248 -197
google/adk/cli/utils/agent_loader.py +137 -0
google/adk/cli/utils/cleanup.py +40 -0
google/adk/cli/utils/common.py +23 -0
google/adk/cli/utils/evals.py +83 -0
google/adk/cli/utils/logs.py +8 -5
google/adk/code_executors/__init__.py +3 -1
google/adk/code_executors/built_in_code_executor.py +52 -0
google/adk/code_executors/code_execution_utils.py +2 -1
google/adk/code_executors/container_code_executor.py +0 -1
google/adk/code_executors/vertex_ai_code_executor.py +6 -8
google/adk/evaluation/__init__.py +1 -1
google/adk/evaluation/agent_evaluator.py +168 -128
google/adk/evaluation/eval_case.py +104 -0
google/adk/evaluation/eval_metrics.py +74 -0
google/adk/evaluation/eval_result.py +86 -0
google/adk/evaluation/eval_set.py +39 -0
google/adk/evaluation/eval_set_results_manager.py +47 -0
google/adk/evaluation/eval_sets_manager.py +43 -0
google/adk/evaluation/evaluation_generator.py +88 -113
google/adk/evaluation/evaluator.py +58 -0
google/adk/evaluation/local_eval_set_results_manager.py +113 -0
google/adk/evaluation/local_eval_sets_manager.py +264 -0
google/adk/evaluation/response_evaluator.py +106 -1
google/adk/evaluation/trajectory_evaluator.py +84 -2
google/adk/events/event.py +6 -1
google/adk/events/event_actions.py +6 -1
google/adk/examples/base_example_provider.py +1 -0
google/adk/examples/example_util.py +3 -2
google/adk/flows/llm_flows/_code_execution.py +9 -1
google/adk/flows/llm_flows/audio_transcriber.py +4 -3
google/adk/flows/llm_flows/base_llm_flow.py +58 -21
google/adk/flows/llm_flows/contents.py +3 -1
google/adk/flows/llm_flows/functions.py +9 -8
google/adk/flows/llm_flows/instructions.py +18 -80
google/adk/flows/llm_flows/single_flow.py +2 -2
google/adk/memory/__init__.py +1 -1
google/adk/memory/_utils.py +23 -0
google/adk/memory/base_memory_service.py +23 -21
google/adk/memory/in_memory_memory_service.py +57 -25
google/adk/memory/memory_entry.py +37 -0
google/adk/memory/vertex_ai_rag_memory_service.py +38 -15
google/adk/models/anthropic_llm.py +16 -9
google/adk/models/base_llm.py +2 -1
google/adk/models/base_llm_connection.py +2 -0
google/adk/models/gemini_llm_connection.py +11 -11
google/adk/models/google_llm.py +12 -2
google/adk/models/lite_llm.py +80 -23
google/adk/models/llm_response.py +16 -3
google/adk/models/registry.py +1 -1
google/adk/runners.py +98 -42
google/adk/sessions/__init__.py +1 -1
google/adk/sessions/_session_util.py +2 -1
google/adk/sessions/base_session_service.py +6 -33
google/adk/sessions/database_session_service.py +57 -67
google/adk/sessions/in_memory_session_service.py +106 -24
google/adk/sessions/session.py +3 -0
google/adk/sessions/vertex_ai_session_service.py +44 -51
google/adk/telemetry.py +7 -2
google/adk/tools/__init__.py +4 -7
google/adk/tools/_memory_entry_utils.py +30 -0
google/adk/tools/agent_tool.py +10 -10
google/adk/tools/apihub_tool/apihub_toolset.py +55 -74
google/adk/tools/apihub_tool/clients/apihub_client.py +10 -3
google/adk/tools/apihub_tool/clients/secret_client.py +1 -0
google/adk/tools/application_integration_tool/application_integration_toolset.py +111 -85
google/adk/tools/application_integration_tool/clients/connections_client.py +28 -1
google/adk/tools/application_integration_tool/clients/integration_client.py +7 -5
google/adk/tools/application_integration_tool/integration_connector_tool.py +69 -26
google/adk/tools/base_toolset.py +96 -0
google/adk/tools/bigquery/__init__.py +28 -0
google/adk/tools/bigquery/bigquery_credentials.py +216 -0
google/adk/tools/bigquery/bigquery_tool.py +116 -0
google/adk/tools/{built_in_code_execution_tool.py → enterprise_search_tool.py} +17 -11
google/adk/tools/function_parameter_parse_util.py +9 -2
google/adk/tools/function_tool.py +33 -3
google/adk/tools/get_user_choice_tool.py +1 -0
google/adk/tools/google_api_tool/__init__.py +24 -70
google/adk/tools/google_api_tool/google_api_tool.py +12 -6
google/adk/tools/google_api_tool/{google_api_tool_set.py → google_api_toolset.py} +57 -55
google/adk/tools/google_api_tool/google_api_toolsets.py +108 -0
google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +40 -42
google/adk/tools/google_search_tool.py +2 -2
google/adk/tools/langchain_tool.py +96 -49
google/adk/tools/load_memory_tool.py +14 -5
google/adk/tools/mcp_tool/__init__.py +3 -2
google/adk/tools/mcp_tool/conversion_utils.py +6 -2
google/adk/tools/mcp_tool/mcp_session_manager.py +80 -69
google/adk/tools/mcp_tool/mcp_tool.py +35 -32
google/adk/tools/mcp_tool/mcp_toolset.py +99 -194
google/adk/tools/openapi_tool/auth/credential_exchangers/base_credential_exchanger.py +1 -3
google/adk/tools/openapi_tool/auth/credential_exchangers/service_account_exchanger.py +6 -7
google/adk/tools/openapi_tool/common/common.py +5 -1
google/adk/tools/openapi_tool/openapi_spec_parser/__init__.py +7 -2
google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +27 -7
google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +36 -32
google/adk/tools/openapi_tool/openapi_spec_parser/rest_api_tool.py +11 -1
google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +1 -1
google/adk/tools/preload_memory_tool.py +27 -18
google/adk/tools/retrieval/__init__.py +1 -1
google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +1 -1
google/adk/tools/toolbox_toolset.py +107 -0
google/adk/tools/transfer_to_agent_tool.py +0 -1
google/adk/utils/__init__.py +13 -0
google/adk/utils/instructions_utils.py +131 -0
google/adk/version.py +1 -1
{google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/METADATA +18 -19
google_adk-1.1.0.dist-info/RECORD +200 -0
google/adk/agents/remote_agent.py +0 -50
google/adk/cli/browser/polyfills-FFHMD2TL.js +0 -18
google/adk/cli/fast_api.py.orig +0 -728
google/adk/tools/google_api_tool/google_api_tool_sets.py +0 -112
google/adk/tools/toolbox_tool.py +0 -46
google_adk-0.5.0.dist-info/RECORD +0 -180
{google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/WHEEL +0 -0
{google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/entry_points.txt +0 -0
{google_adk-0.5.0.dist-info → google_adk-1.1.0.dist-info}/licenses/LICENSE +0 -0

google/adk/evaluation/eval_sets_manager.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC
+from abc import abstractmethod
+from .eval_case import EvalCase
+from .eval_set import EvalSet
+class EvalSetsManager(ABC):
+  """An interface to manage an Eval Sets."""
+  @abstractmethod
+  def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
+    """Returns an EvalSet identified by an app_name and eval_set_id."""
+    raise NotImplementedError()
+  @abstractmethod
+  def create_eval_set(self, app_name: str, eval_set_id: str):
+    """Creates an empty EvalSet given the app_name and eval_set_id."""
+    raise NotImplementedError()
+  @abstractmethod
+  def list_eval_sets(self, app_name: str) -> list[str]:
+    """Returns a list of EvalSets that belong to the given app_name."""
+    raise NotImplementedError()
+  @abstractmethod
+  def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
+    """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
+    raise NotImplementedError()

google/adk/evaluation/evaluation_generator.py CHANGED Viewed

@@ -13,19 +13,34 @@
 # limitations under the License.
 import importlib
+from typing import Any
+from typing import Optional
 import uuid
-from google.genai import types
+from pydantic import BaseModel
-from ..agents.base_agent import BaseAgent
 from ..agents.llm_agent import Agent
-from ..agents.llm_agent import BeforeToolCallback
-from ..agents.llm_agent import LlmAgent
+from ..artifacts.base_artifact_service import BaseArtifactService
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..runners import Runner
+from ..sessions.base_session_service import BaseSessionService
 from ..sessions.in_memory_session_service import InMemorySessionService
 from ..sessions.session import Session
-from .evaluation_constants import EvalConstants
+from .eval_case import EvalCase
+from .eval_case import IntermediateData
+from .eval_case import Invocation
+from .eval_case import SessionInput
+from .eval_set import EvalSet
+class EvalCaseResponses(BaseModel):
+  """Contains multiple responses associated with an EvalCase.
+  Multiple responses are a result of repeated requests to genereate inferences.
+  """
+  eval_case: EvalCase
+  responses: list[list[Invocation]]
 class EvaluationGenerator:
@@ -33,12 +48,11 @@ class EvaluationGenerator:
   @staticmethod
   async def generate_responses(
-      eval_dataset,
-      agent_module_path,
-      repeat_num=3,
-      agent_name=None,
-      initial_session={},
-  ):
+      eval_set: EvalSet,
+      agent_module_path: str,
+      repeat_num: int = 3,
+      agent_name: str = None,
+  ) -> list[EvalCaseResponses]:
     """Returns evaluation responses for the given dataset and agent.
     Args:
@@ -48,17 +62,23 @@ class EvaluationGenerator:
         usually done to remove uncertainty that a single run may bring.
       agent_name: The name of the agent that should be evaluated. This is
         usually the sub-agent.
-      initial_session: Initial session for the eval data.
     """
     results = []
-    for _ in range(repeat_num):
-      for data in eval_dataset:
-        results.append(
-            EvaluationGenerator._process_query(
-                data, agent_module_path, agent_name, initial_session
-            )
+    for eval_case in eval_set.eval_cases:
+      responses = []
+      for _ in range(repeat_num):
+        response_invocations = await EvaluationGenerator._process_query(
+            eval_case.conversation,
+            agent_module_path,
+            agent_name,
+            eval_case.session_input,
         )
+        responses.append(response_invocations)
+      results.append(
+          EvalCaseResponses(eval_case=eval_case, responses=responses)
+      )
     return results
@@ -89,7 +109,12 @@ class EvaluationGenerator:
     return results
   @staticmethod
-  def _process_query(data, module_name, agent_name=None, initial_session={}):
+  async def _process_query(
+      invocations: list[Invocation],
+      module_name: str,
+      agent_name: Optional[str] = None,
+      initial_session: Optional[SessionInput] = None,
+  ) -> list[Invocation]:
     """Process a query using the agent and evaluation dataset."""
     module_path = f"{module_name}"
     agent_module = importlib.import_module(module_path)
@@ -102,56 +127,40 @@ class EvaluationGenerator:
       agent_to_evaluate = root_agent.find_agent(agent_name)
       assert agent_to_evaluate, f"Sub-Agent `{agent_name}` not found."
-    return EvaluationGenerator._process_query_with_root_agent(
-        data, agent_to_evaluate, reset_func, initial_session
+    return await EvaluationGenerator._generate_inferences_from_root_agent(
+        invocations, agent_to_evaluate, reset_func, initial_session
     )
   @staticmethod
-  async def _process_query_with_root_agent(
-      data,
-      root_agent,
-      reset_func,
-      initial_session={},
-      session_id=None,
-      session_service=None,
-      artifact_service=None,
-  ):
-    """Process a query using the agent and evaluation dataset."""
-    # we don't know which tools belong to which agent
-    # so we just apply to any agents that has certain tool outputs
-    all_mock_tools = set()
-    for eval_entry in data:
-      expected_tool_use = eval_entry.get(EvalConstants.EXPECTED_TOOL_USE, [])
-      for expected in expected_tool_use:
-        if EvalConstants.MOCK_TOOL_OUTPUT in expected:
-          all_mock_tools.add(expected[EvalConstants.TOOL_NAME])
-    eval_data_copy = data.copy()
-    await EvaluationGenerator.apply_before_tool_callback(
-        root_agent,
-        lambda *args: EvaluationGenerator.before_tool_callback(
-            *args, eval_dataset=eval_data_copy
-        ),
-        all_mock_tools,
-    )
+  async def _generate_inferences_from_root_agent(
+      invocations: list[Invocation],
+      root_agent: Agent,
+      reset_func: Any,
+      initial_session: Optional[SessionInput] = None,
+      session_id: Optional[str] = None,
+      session_service: Optional[BaseSessionService] = None,
+      artifact_service: Optional[BaseArtifactService] = None,
+  ) -> list[Invocation]:
+    """Scrapes the root agent given the list of Invocations."""
     if not session_service:
       session_service = InMemorySessionService()
-    app_name = initial_session.get("app_name", "EvaluationGenerator")
-    user_id = initial_session.get("user_id", "test_user_id")
+    app_name = (
+        initial_session.app_name if initial_session else "EvaluationGenerator"
+    )
+    user_id = initial_session.user_id if initial_session else "test_user_id"
     session_id = session_id if session_id else str(uuid.uuid4())
-    _ = session_service.create_session(
+    _ = await session_service.create_session(
         app_name=app_name,
         user_id=user_id,
-        state=initial_session.get("state", {}),
+        state=initial_session.state if initial_session else {},
         session_id=session_id,
     )
     if not artifact_service:
       artifact_service = InMemoryArtifactService()
     runner = Runner(
         app_name=app_name,
         agent=root_agent,
@@ -163,30 +172,37 @@ class EvaluationGenerator:
     if callable(reset_func):
       reset_func()
-    responses = data.copy()
+    response_invocations = []
-    for index, eval_entry in enumerate(responses):
-      response = None
-      query = eval_entry["query"]
-      content = types.Content(role="user", parts=[types.Part(text=query)])
-      turn_actual_tool_uses = []
+    for invocation in invocations:
+      final_response = None
+      user_content = invocation.user_content
+      tool_uses = []
+      invocation_id = ""
       for event in runner.run(
-          user_id=user_id, session_id=session_id, new_message=content
+          user_id=user_id, session_id=session_id, new_message=user_content
       ):
+        invocation_id = (
+            event.invocation_id if not invocation_id else invocation_id
+        )
         if event.is_final_response() and event.content and event.content.parts:
-          response = event.content.parts[0].text
+          final_response = event.content
         elif event.get_function_calls():
           for call in event.get_function_calls():
-            turn_actual_tool_uses.append({
-                EvalConstants.TOOL_NAME: call.name,
-                EvalConstants.TOOL_INPUT: call.args,
-            })
+            tool_uses.append(call)
+      response_invocations.append(
+          Invocation(
+              invocation_id=invocation_id,
+              user_content=user_content,
+              final_response=final_response,
+              intermediate_data=IntermediateData(tool_uses=tool_uses),
+          )
+      )
-      responses[index]["actual_tool_use"] = turn_actual_tool_uses
-      responses[index]["response"] = response
-    return responses
+    return response_invocations
   @staticmethod
   def _process_query_with_session(session_data, data):
@@ -225,46 +241,5 @@ class EvaluationGenerator:
       responses[index]["actual_tool_use"] = actual_tool_uses
       responses[index]["response"] = response
     return responses
-  @staticmethod
-  def before_tool_callback(tool, args, tool_context, eval_dataset):
-    """Intercept specific tool calls and return predefined outputs
-    from eval_dataset.
-    """
-    for index, eval_entry in enumerate(eval_dataset):
-      expected_tool_use = eval_entry.get("expected_tool_use", [])
-      for expected in expected_tool_use:
-        if (
-            EvalConstants.MOCK_TOOL_OUTPUT in expected
-            and tool.name == expected[EvalConstants.TOOL_NAME]
-            and args == expected.get(EvalConstants.TOOL_INPUT, {})
-        ):
-          # pop the matched entry so we don't rematch again
-          eval_dataset.pop(index)
-          return {"result": expected[EvalConstants.MOCK_TOOL_OUTPUT]}
-    return None
-  @staticmethod
-  async def apply_before_tool_callback(
-      agent: BaseAgent,
-      callback: BeforeToolCallback,
-      all_mock_tools: set[str],
-  ):
-    """Recursively apply the before_tool_callback to the root agent and all its subagents."""
-    # Check if the agent has tools that are defined by evalset.
-    # We use function names to check if tools match
-    if not isinstance(agent, Agent) and not isinstance(agent, LlmAgent):
-      return
-    for tool in agent.canonical_tools:
-      tool_name = tool.name
-      if tool_name in all_mock_tools:
-        agent.before_tool_callback = callback
-    # Apply recursively to subagents if they exist
-    for sub_agent in agent.sub_agents:
-      await EvaluationGenerator.apply_before_tool_callback(
-          sub_agent, callback, all_mock_tools
-      )
+    return responses
+    return responses

google/adk/evaluation/evaluator.py ADDED Viewed

@@ -0,0 +1,58 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC
+from enum import Enum
+from typing import Optional
+from pydantic import BaseModel
+from .eval_case import Invocation
+class EvalStatus(Enum):
+  PASSED = 1
+  FAILED = 2
+  NOT_EVALUATED = 3
+class PerInvocationResult(BaseModel):
+  """Metric evaluation score per invocation."""
+  actual_invocation: Invocation
+  expected_invocation: Invocation
+  score: Optional[float] = None
+  eval_status: EvalStatus = EvalStatus.NOT_EVALUATED
+class EvaluationResult(BaseModel):
+  overall_score: Optional[float] = None
+  """Overall score, based on each invocation."""
+  overall_eval_status: EvalStatus = EvalStatus.NOT_EVALUATED
+  """Overall status, based on each invocation."""
+  per_invocation_results: list[PerInvocationResult] = []
+class Evaluator(ABC):
+  """A merics evaluator interface."""
+  def evaluate_invocations(
+      self,
+      actual_invocations: list[Invocation],
+      expected_invocations: list[Invocation],
+  ) -> EvaluationResult:
+    """Returns EvaluationResult after performing evaluations using actual and expected invocations."""
+    raise NotImplementedError()

google/adk/evaluation/local_eval_set_results_manager.py ADDED Viewed

@@ -0,0 +1,113 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import json
+import logging
+import os
+import time
+from typing_extensions import override
+from .eval_result import EvalCaseResult
+from .eval_result import EvalSetResult
+from .eval_set_results_manager import EvalSetResultsManager
+logger = logging.getLogger("google_adk." + __name__)
+_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
+_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
+def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
+  return eval_set_result_name.replace("/", "_")
+class LocalEvalSetResultsManager(EvalSetResultsManager):
+  """An EvalSetResult manager that stores eval set results locally on disk."""
+  def __init__(self, agents_dir: str):
+    self._agents_dir = agents_dir
+  @override
+  def save_eval_set_result(
+      self,
+      app_name: str,
+      eval_set_id: str,
+      eval_case_results: list[EvalCaseResult],
+  ) -> None:
+    """Creates and saves a new EvalSetResult given eval_case_results."""
+    timestamp = time.time()
+    eval_set_result_id = app_name + "_" + eval_set_id + "_" + str(timestamp)
+    eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
+    eval_set_result = EvalSetResult(
+        eval_set_result_id=eval_set_result_id,
+        eval_set_result_name=eval_set_result_name,
+        eval_set_id=eval_set_id,
+        eval_case_results=eval_case_results,
+        creation_timestamp=timestamp,
+    )
+    # Write eval result file, with eval_set_result_name.
+    app_eval_history_dir = self._get_eval_history_dir(app_name)
+    if not os.path.exists(app_eval_history_dir):
+      os.makedirs(app_eval_history_dir)
+    # Convert to json and write to file.
+    eval_set_result_json = eval_set_result.model_dump_json()
+    eval_set_result_file_path = os.path.join(
+        app_eval_history_dir,
+        eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
+    )
+    logger.info("Writing eval result to file: %s", eval_set_result_file_path)
+    with open(eval_set_result_file_path, "w") as f:
+      f.write(json.dumps(eval_set_result_json, indent=2))
+  @override
+  def get_eval_set_result(
+      self, app_name: str, eval_set_result_id: str
+  ) -> EvalSetResult:
+    """Returns an EvalSetResult identified by app_name and eval_set_result_id."""
+    # Load the eval set result file data.
+    maybe_eval_result_file_path = (
+        os.path.join(
+            self._get_eval_history_dir(app_name),
+            eval_set_result_id,
+        )
+        + _EVAL_SET_RESULT_FILE_EXTENSION
+    )
+    if not os.path.exists(maybe_eval_result_file_path):
+      raise ValueError(
+          f"Eval set result `{eval_set_result_id}` does not exist."
+      )
+    with open(maybe_eval_result_file_path, "r") as file:
+      eval_result_data = json.load(file)
+    return EvalSetResult.model_validate_json(eval_result_data)
+  @override
+  def list_eval_set_results(self, app_name: str) -> list[str]:
+    """Returns the eval result ids that belong to the given app_name."""
+    app_eval_history_directory = self._get_eval_history_dir(app_name)
+    if not os.path.exists(app_eval_history_directory):
+      return []
+    eval_result_files = [
+        file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
+        for file in os.listdir(app_eval_history_directory)
+        if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
+    ]
+    return eval_result_files
+  def _get_eval_history_dir(self, app_name: str) -> str:
+    return os.path.join(self._agents_dir, app_name, _ADK_EVAL_HISTORY_DIR)

google-adk 0.5.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

google-adk 0.5.0py3-none-any.whl → 1.1.0py3-none-any.whl