PyPI - google-adk - Versions diffs - 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

google-adk 0.4.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

google/adk/agents/active_streaming_tool.py +1 -0
google/adk/agents/base_agent.py +91 -47
google/adk/agents/base_agent.py.orig +330 -0
google/adk/agents/callback_context.py +4 -9
google/adk/agents/invocation_context.py +1 -0
google/adk/agents/langgraph_agent.py +1 -0
google/adk/agents/live_request_queue.py +1 -0
google/adk/agents/llm_agent.py +172 -35
google/adk/agents/loop_agent.py +1 -1
google/adk/agents/parallel_agent.py +7 -0
google/adk/agents/readonly_context.py +7 -1
google/adk/agents/run_config.py +5 -1
google/adk/agents/sequential_agent.py +31 -0
google/adk/agents/transcription_entry.py +5 -2
google/adk/artifacts/base_artifact_service.py +5 -10
google/adk/artifacts/gcs_artifact_service.py +9 -9
google/adk/artifacts/in_memory_artifact_service.py +6 -6
google/adk/auth/auth_credential.py +9 -5
google/adk/auth/auth_preprocessor.py +7 -1
google/adk/auth/auth_tool.py +3 -4
google/adk/cli/agent_graph.py +5 -5
google/adk/cli/browser/index.html +2 -2
google/adk/cli/browser/{main-HWIBUY2R.js → main-QOEMUXM4.js} +58 -58
google/adk/cli/cli.py +7 -7
google/adk/cli/cli_deploy.py +7 -2
google/adk/cli/cli_eval.py +181 -106
google/adk/cli/cli_tools_click.py +147 -62
google/adk/cli/fast_api.py +340 -158
google/adk/cli/fast_api.py.orig +822 -0
google/adk/cli/utils/common.py +23 -0
google/adk/cli/utils/evals.py +83 -1
google/adk/cli/utils/logs.py +13 -5
google/adk/code_executors/__init__.py +3 -1
google/adk/code_executors/built_in_code_executor.py +52 -0
google/adk/evaluation/__init__.py +1 -1
google/adk/evaluation/agent_evaluator.py +168 -128
google/adk/evaluation/eval_case.py +102 -0
google/adk/evaluation/eval_set.py +37 -0
google/adk/evaluation/eval_sets_manager.py +42 -0
google/adk/evaluation/evaluation_constants.py +1 -0
google/adk/evaluation/evaluation_generator.py +89 -114
google/adk/evaluation/evaluator.py +56 -0
google/adk/evaluation/local_eval_sets_manager.py +264 -0
google/adk/evaluation/response_evaluator.py +107 -3
google/adk/evaluation/trajectory_evaluator.py +83 -2
google/adk/events/event.py +7 -1
google/adk/events/event_actions.py +7 -1
google/adk/examples/example.py +1 -0
google/adk/examples/example_util.py +3 -2
google/adk/flows/__init__.py +0 -1
google/adk/flows/llm_flows/_code_execution.py +19 -11
google/adk/flows/llm_flows/audio_transcriber.py +4 -3
google/adk/flows/llm_flows/base_llm_flow.py +86 -22
google/adk/flows/llm_flows/basic.py +3 -0
google/adk/flows/llm_flows/functions.py +10 -9
google/adk/flows/llm_flows/instructions.py +28 -9
google/adk/flows/llm_flows/single_flow.py +1 -1
google/adk/memory/__init__.py +1 -1
google/adk/memory/_utils.py +23 -0
google/adk/memory/base_memory_service.py +25 -21
google/adk/memory/base_memory_service.py.orig +76 -0
google/adk/memory/in_memory_memory_service.py +59 -27
google/adk/memory/memory_entry.py +37 -0
google/adk/memory/vertex_ai_rag_memory_service.py +40 -17
google/adk/models/anthropic_llm.py +36 -11
google/adk/models/base_llm.py +45 -4
google/adk/models/gemini_llm_connection.py +15 -2
google/adk/models/google_llm.py +9 -44
google/adk/models/google_llm.py.orig +305 -0
google/adk/models/lite_llm.py +94 -38
google/adk/models/llm_request.py +1 -1
google/adk/models/llm_response.py +15 -3
google/adk/models/registry.py +1 -1
google/adk/runners.py +68 -44
google/adk/sessions/__init__.py +1 -1
google/adk/sessions/_session_util.py +14 -0
google/adk/sessions/base_session_service.py +8 -32
google/adk/sessions/database_session_service.py +58 -61
google/adk/sessions/in_memory_session_service.py +108 -26
google/adk/sessions/session.py +4 -0
google/adk/sessions/vertex_ai_session_service.py +23 -45
google/adk/telemetry.py +3 -0
google/adk/tools/__init__.py +4 -7
google/adk/tools/{built_in_code_execution_tool.py → _built_in_code_execution_tool.py} +11 -0
google/adk/tools/_memory_entry_utils.py +30 -0
google/adk/tools/agent_tool.py +16 -13
google/adk/tools/apihub_tool/apihub_toolset.py +55 -74
google/adk/tools/application_integration_tool/application_integration_toolset.py +107 -85
google/adk/tools/application_integration_tool/clients/connections_client.py +29 -25
google/adk/tools/application_integration_tool/clients/integration_client.py +6 -6
google/adk/tools/application_integration_tool/integration_connector_tool.py +69 -26
google/adk/tools/base_toolset.py +58 -0
google/adk/tools/enterprise_search_tool.py +65 -0
google/adk/tools/function_parameter_parse_util.py +2 -2
google/adk/tools/google_api_tool/__init__.py +18 -70
google/adk/tools/google_api_tool/google_api_tool.py +11 -5
google/adk/tools/google_api_tool/google_api_toolset.py +126 -0
google/adk/tools/google_api_tool/google_api_toolsets.py +102 -0
google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +40 -42
google/adk/tools/langchain_tool.py +96 -49
google/adk/tools/load_artifacts_tool.py +4 -4
google/adk/tools/load_memory_tool.py +16 -5
google/adk/tools/mcp_tool/__init__.py +3 -2
google/adk/tools/mcp_tool/conversion_utils.py +1 -1
google/adk/tools/mcp_tool/mcp_session_manager.py +167 -16
google/adk/tools/mcp_tool/mcp_session_manager.py.orig +322 -0
google/adk/tools/mcp_tool/mcp_tool.py +12 -12
google/adk/tools/mcp_tool/mcp_toolset.py +155 -195
google/adk/tools/openapi_tool/common/common.py +2 -5
google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +32 -7
google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +43 -33
google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +1 -1
google/adk/tools/preload_memory_tool.py +27 -18
google/adk/tools/retrieval/__init__.py +1 -1
google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +1 -1
google/adk/tools/tool_context.py +4 -4
google/adk/tools/toolbox_toolset.py +79 -0
google/adk/tools/transfer_to_agent_tool.py +0 -1
google/adk/version.py +1 -1
{google_adk-0.4.0.dist-info → google_adk-1.0.0.dist-info}/METADATA +7 -5
google_adk-1.0.0.dist-info/RECORD +195 -0
google/adk/agents/remote_agent.py +0 -50
google/adk/tools/google_api_tool/google_api_tool_set.py +0 -110
google/adk/tools/google_api_tool/google_api_tool_sets.py +0 -112
google/adk/tools/toolbox_tool.py +0 -46
google_adk-0.4.0.dist-info/RECORD +0 -179
{google_adk-0.4.0.dist-info → google_adk-1.0.0.dist-info}/WHEEL +0 -0
{google_adk-0.4.0.dist-info → google_adk-1.0.0.dist-info}/entry_points.txt +0 -0
{google_adk-0.4.0.dist-info → google_adk-1.0.0.dist-info}/licenses/LICENSE +0 -0

google/adk/evaluation/eval_set.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from pydantic import BaseModel
+from .eval_case import EvalCase
+class EvalSet(BaseModel):
+  """A set of eval cases."""
+  eval_set_id: str
+  """Unique identifier for the eval set."""
+  name: Optional[str] = None
+  """Name of the dataset."""
+  description: Optional[str] = None
+  """Description of the dataset."""
+  eval_cases: list[EvalCase]
+  """List of eval cases in the dataset. Each case represents a single
+  interaction to be evaluated."""
+  creation_timestamp: float = 0.0
+  """The time at which this eval set was created."""

google/adk/evaluation/eval_sets_manager.py ADDED Viewed

@@ -0,0 +1,42 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from .eval_case import EvalCase
+from .eval_set import EvalSet
+class EvalSetsManager(ABC):
+  """An interface to manage an Eval Sets."""
+  @abstractmethod
+  def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
+    """Returns an EvalSet identified by an app_name and eval_set_id."""
+    raise NotImplementedError()
+  @abstractmethod
+  def create_eval_set(self, app_name: str, eval_set_id: str):
+    """Creates an empty EvalSet given the app_name and eval_set_id."""
+    raise NotImplementedError()
+  @abstractmethod
+  def list_eval_sets(self, app_name: str) -> list[str]:
+    """Returns a list of EvalSets that belong to the given app_name."""
+    raise NotImplementedError()
+  @abstractmethod
+  def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
+    """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
+    raise NotImplementedError()

google/adk/evaluation/evaluation_constants.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 class EvalConstants:
   """Holds constants for evaluation file constants."""

google/adk/evaluation/evaluation_generator.py CHANGED Viewed

@@ -13,32 +13,46 @@
 # limitations under the License.
 import importlib
+from typing import Any
+from typing import Optional
 import uuid
-from google.genai import types
+from pydantic import BaseModel
-from ..agents.base_agent import BaseAgent
 from ..agents.llm_agent import Agent
-from ..agents.llm_agent import BeforeToolCallback
-from ..agents.llm_agent import LlmAgent
+from ..artifacts.base_artifact_service import BaseArtifactService
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..runners import Runner
+from ..sessions.base_session_service import BaseSessionService
 from ..sessions.in_memory_session_service import InMemorySessionService
 from ..sessions.session import Session
-from .evaluation_constants import EvalConstants
+from .eval_case import EvalCase
+from .eval_case import IntermediateData
+from .eval_case import Invocation
+from .eval_case import SessionInput
+from .eval_set import EvalSet
+class EvalCaseResponses(BaseModel):
+  """Contains multiple responses associated with an EvalCase.
+  Multiple responses are a result of repeated requests to genereate inferences.
+  """
+  eval_case: EvalCase
+  responses: list[list[Invocation]]
 class EvaluationGenerator:
   """Generates evaluation responses for agents."""
   @staticmethod
-  def generate_responses(
-      eval_dataset,
-      agent_module_path,
-      repeat_num=3,
-      agent_name=None,
-      initial_session={},
-  ):
+  async def generate_responses(
+      eval_set: EvalSet,
+      agent_module_path: str,
+      repeat_num: int = 3,
+      agent_name: str = None,
+  ) -> list[EvalCaseResponses]:
     """Returns evaluation responses for the given dataset and agent.
     Args:
@@ -48,17 +62,23 @@ class EvaluationGenerator:
         usually done to remove uncertainty that a single run may bring.
       agent_name: The name of the agent that should be evaluated. This is
         usually the sub-agent.
-      initial_session: Initial session for the eval data.
     """
     results = []
-    for _ in range(repeat_num):
-      for data in eval_dataset:
-        results.append(
-            EvaluationGenerator._process_query(
-                data, agent_module_path, agent_name, initial_session
-            )
+    for eval_case in eval_set.eval_cases:
+      responses = []
+      for _ in range(repeat_num):
+        response_invocations = await EvaluationGenerator._process_query(
+            eval_case.conversation,
+            agent_module_path,
+            agent_name,
+            eval_case.session_input,
         )
+        responses.append(response_invocations)
+      results.append(
+          EvalCaseResponses(eval_case=eval_case, responses=responses)
+      )
     return results
@@ -89,7 +109,12 @@ class EvaluationGenerator:
     return results
   @staticmethod
-  def _process_query(data, module_name, agent_name=None, initial_session={}):
+  async def _process_query(
+      invocations: list[Invocation],
+      module_name: str,
+      agent_name: Optional[str] = None,
+      initial_session: Optional[SessionInput] = None,
+  ) -> list[Invocation]:
     """Process a query using the agent and evaluation dataset."""
     module_path = f"{module_name}"
     agent_module = importlib.import_module(module_path)
@@ -102,56 +127,40 @@ class EvaluationGenerator:
       agent_to_evaluate = root_agent.find_agent(agent_name)
       assert agent_to_evaluate, f"Sub-Agent `{agent_name}` not found."
-    return EvaluationGenerator._process_query_with_root_agent(
-        data, agent_to_evaluate, reset_func, initial_session
+    return await EvaluationGenerator._generate_inferences_from_root_agent(
+        invocations, agent_to_evaluate, reset_func, initial_session
     )
   @staticmethod
-  def _process_query_with_root_agent(
-      data,
-      root_agent,
-      reset_func,
-      initial_session={},
-      session_id=None,
-      session_service=None,
-      artifact_service=None,
-  ):
-    """Process a query using the agent and evaluation dataset."""
-    # we don't know which tools belong to which agent
-    # so we just apply to any agents that has certain tool outputs
-    all_mock_tools = set()
-    for eval_entry in data:
-      expected_tool_use = eval_entry.get(EvalConstants.EXPECTED_TOOL_USE, [])
-      for expected in expected_tool_use:
-        if EvalConstants.MOCK_TOOL_OUTPUT in expected:
-          all_mock_tools.add(expected[EvalConstants.TOOL_NAME])
-    eval_data_copy = data.copy()
-    EvaluationGenerator.apply_before_tool_callback(
-        root_agent,
-        lambda *args: EvaluationGenerator.before_tool_callback(
-            *args, eval_dataset=eval_data_copy
-        ),
-        all_mock_tools,
-    )
+  async def _generate_inferences_from_root_agent(
+      invocations: list[Invocation],
+      root_agent: Agent,
+      reset_func: Any,
+      initial_session: Optional[SessionInput] = None,
+      session_id: Optional[str] = None,
+      session_service: Optional[BaseSessionService] = None,
+      artifact_service: Optional[BaseArtifactService] = None,
+  ) -> list[Invocation]:
+    """Scrapes the root agent given the list of Invocations."""
     if not session_service:
       session_service = InMemorySessionService()
-    app_name = initial_session.get("app_name", "EvaluationGenerator")
-    user_id = initial_session.get("user_id", "test_user_id")
+    app_name = (
+        initial_session.app_name if initial_session else "EvaluationGenerator"
+    )
+    user_id = initial_session.user_id if initial_session else "test_user_id"
     session_id = session_id if session_id else str(uuid.uuid4())
-    _ = session_service.create_session(
+    _ = await session_service.create_session(
         app_name=app_name,
         user_id=user_id,
-        state=initial_session.get("state", {}),
+        state=initial_session.state if initial_session else {},
         session_id=session_id,
     )
     if not artifact_service:
       artifact_service = InMemoryArtifactService()
     runner = Runner(
         app_name=app_name,
         agent=root_agent,
@@ -163,30 +172,37 @@ class EvaluationGenerator:
     if callable(reset_func):
       reset_func()
-    responses = data.copy()
+    response_invocations = []
-    for index, eval_entry in enumerate(responses):
-      response = None
-      query = eval_entry["query"]
-      content = types.Content(role="user", parts=[types.Part(text=query)])
-      turn_actual_tool_uses = []
+    for invocation in invocations:
+      final_response = None
+      user_content = invocation.user_content
+      tool_uses = []
+      invocation_id = ""
       for event in runner.run(
-          user_id=user_id, session_id=session_id, new_message=content
+          user_id=user_id, session_id=session_id, new_message=user_content
       ):
+        invocation_id = (
+            event.invocation_id if not invocation_id else invocation_id
+        )
         if event.is_final_response() and event.content and event.content.parts:
-          response = event.content.parts[0].text
+          final_response = event.content
         elif event.get_function_calls():
           for call in event.get_function_calls():
-            turn_actual_tool_uses.append({
-                EvalConstants.TOOL_NAME: call.name,
-                EvalConstants.TOOL_INPUT: call.args,
-            })
+            tool_uses.append(call)
+      response_invocations.append(
+          Invocation(
+              invocation_id=invocation_id,
+              user_content=user_content,
+              final_response=final_response,
+              intermediate_data=IntermediateData(tool_uses=tool_uses),
+          )
+      )
-      responses[index]["actual_tool_use"] = turn_actual_tool_uses
-      responses[index]["response"] = response
-    return responses
+    return response_invocations
   @staticmethod
   def _process_query_with_session(session_data, data):
@@ -225,46 +241,5 @@ class EvaluationGenerator:
       responses[index]["actual_tool_use"] = actual_tool_uses
       responses[index]["response"] = response
     return responses
-  @staticmethod
-  def before_tool_callback(tool, args, tool_context, eval_dataset):
-    """Intercept specific tool calls and return predefined outputs
-    from eval_dataset.
-    """
-    for index, eval_entry in enumerate(eval_dataset):
-      expected_tool_use = eval_entry.get("expected_tool_use", [])
-      for expected in expected_tool_use:
-        if (
-            EvalConstants.MOCK_TOOL_OUTPUT in expected
-            and tool.name == expected[EvalConstants.TOOL_NAME]
-            and args == expected.get(EvalConstants.TOOL_INPUT, {})
-        ):
-          # pop the matched entry so we don't rematch again
-          eval_dataset.pop(index)
-          return {"result": expected[EvalConstants.MOCK_TOOL_OUTPUT]}
-    return None
-  @staticmethod
-  def apply_before_tool_callback(
-      agent: BaseAgent,
-      callback: BeforeToolCallback,
-      all_mock_tools: set[str],
-  ):
-    """Recursively apply the before_tool_callback to the root agent and all its subagents."""
-    # Check if the agent has tools that are defined by evalset.
-    # We use function names to check if tools match
-    if not isinstance(agent, Agent) and not isinstance(agent, LlmAgent):
-      return
-    for tool in agent.canonical_tools:
-      tool_name = tool.name
-      if tool_name in all_mock_tools:
-        agent.before_tool_callback = callback
-    # Apply recursively to subagents if they exist
-    for sub_agent in agent.sub_agents:
-      EvaluationGenerator.apply_before_tool_callback(
-          sub_agent, callback, all_mock_tools
-      )
+    return responses
+    return responses

google/adk/evaluation/evaluator.py ADDED Viewed

@@ -0,0 +1,56 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC
+from enum import Enum
+from typing import Optional
+from pydantic import BaseModel
+from .eval_case import Invocation
+class EvalStatus(Enum):
+  PASSED = 1
+  FAILED = 2
+  NOT_EVALUATED = 3
+class PerInvocationResult(BaseModel):
+  """Metric evaluation score per invocation."""
+  actual_invocation: Invocation
+  expected_invocation: Invocation
+  score: Optional[float] = None
+  eval_status: EvalStatus = EvalStatus.NOT_EVALUATED
+class EvaluationResult(BaseModel):
+  overall_score: Optional[float] = None
+  """Overall score, based on each invocation."""
+  overall_eval_status: EvalStatus = EvalStatus.NOT_EVALUATED
+  """Overall status, based on each invocation."""
+  per_invocation_results: list[PerInvocationResult] = []
+class Evaluator(ABC):
+  """A merics evaluator interface."""
+  def evaluate_invocations(
+      self,
+      actual_invocations: list[Invocation],
+      expected_invocations: list[Invocation],
+  ) -> EvaluationResult:
+    """Returns EvaluationResult after performing evaluations using actual and expected invocations."""
+    raise NotImplementedError()

google/adk/evaluation/local_eval_sets_manager.py ADDED Viewed

@@ -0,0 +1,264 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import os
+import re
+import time
+from typing import Any
+import uuid
+from google.genai import types as genai_types
+from pydantic import ValidationError
+from typing_extensions import override
+from .eval_case import EvalCase
+from .eval_case import IntermediateData
+from .eval_case import Invocation
+from .eval_case import SessionInput
+from .eval_set import EvalSet
+from .eval_sets_manager import EvalSetsManager
+logger = logging.getLogger("google_adk." + __name__)
+_EVAL_SET_FILE_EXTENSION = ".evalset.json"
+def _convert_invocation_to_pydantic_schema(
+    invocation_in_json_format: dict[str, Any],
+) -> Invocation:
+  """Converts an invocation from old json format to new Pydantic Schema"""
+  query = invocation_in_json_format["query"]
+  reference = invocation_in_json_format["reference"]
+  expected_tool_use = []
+  expected_intermediate_agent_responses = []
+  for old_tool_use in invocation_in_json_format.get("expected_tool_use", []):
+    expected_tool_use.append(
+        genai_types.FunctionCall(
+            name=old_tool_use["tool_name"], args=old_tool_use["tool_input"]
+        )
+    )
+  for old_intermediate_response in invocation_in_json_format.get(
+      "expected_intermediate_agent_responses", []
+  ):
+    expected_intermediate_agent_responses.append((
+        old_intermediate_response["author"],
+        [genai_types.Part.from_text(text=old_intermediate_response["text"])],
+    ))
+  return Invocation(
+      invocation_id=str(uuid.uuid4()),
+      user_content=genai_types.Content(
+          parts=[genai_types.Part.from_text(text=query)], role="user"
+      ),
+      final_response=genai_types.Content(
+          parts=[genai_types.Part.from_text(text=reference)], role="model"
+      ),
+      intermediate_data=IntermediateData(
+          tool_uses=expected_tool_use,
+          intermediate_responses=expected_intermediate_agent_responses,
+      ),
+      creation_timestamp=time.time(),
+  )
+def convert_eval_set_to_pydanctic_schema(
+    eval_set_id: str,
+    eval_set_in_json_format: list[dict[str, Any]],
+) -> EvalSet:
+  r"""Returns an pydantic EvalSet generated from the json representation.
+    Args:
+      eval_set_id: Eval set id.
+      eval_set_in_json_format: Eval set specified in JSON format.
+    Here is a sample eval set in JSON format:
+  [
+    {
+      "name": "roll_17_sided_dice_twice",
+      "data": [
+        {
+          "query": "What can you do?",
+          "expected_tool_use": [],
+          "expected_intermediate_agent_responses": [],
+          "reference": "I can roll dice of different sizes and check if a number
+            is prime. I can also use multiple tools in parallel.\n"
+        },
+        {
+          "query": "Roll a 17 sided dice twice for me",
+          "expected_tool_use": [
+            {
+              "tool_name": "roll_die",
+              "tool_input": {
+                "sides": 17
+              }
+            },
+            {
+              "tool_name": "roll_die",
+              "tool_input": {
+                "sides": 17
+              }
+            }
+          ],
+          "expected_intermediate_agent_responses": [],
+          "reference": "I have rolled a 17 sided die twice. The first roll was
+            13 and the second roll was 4.\n"
+        }
+      ],
+      "initial_session": {
+        "state": {},
+        "app_name": "hello_world",
+        "user_id": "user"
+      }
+    }
+  ]
+  """
+  eval_cases = []
+  for old_eval_case in eval_set_in_json_format:
+    new_invocations = []
+    for old_invocation in old_eval_case["data"]:
+      new_invocations.append(
+          _convert_invocation_to_pydantic_schema(old_invocation)
+      )
+    session_input = None
+    if (
+        "initial_session" in old_eval_case
+        and len(old_eval_case["initial_session"]) > 0
+    ):
+      session_input = SessionInput(
+          app_name=old_eval_case["initial_session"].get("app_name", ""),
+          user_id=old_eval_case["initial_session"].get("user_id", ""),
+          state=old_eval_case["initial_session"].get("state", {}),
+      )
+    new_eval_case = EvalCase(
+        eval_id=old_eval_case["name"],
+        conversation=new_invocations,
+        session_input=session_input,
+        creation_timestamp=time.time(),
+    )
+    eval_cases.append(new_eval_case)
+  return EvalSet(
+      eval_set_id=eval_set_id,
+      name=eval_set_id,
+      creation_timestamp=time.time(),
+      eval_cases=eval_cases,
+  )
+def load_eval_set_from_file(
+    eval_set_file_path: str, eval_set_id: str
+) -> EvalSet:
+  """Returns an EvalSet that is read from the given file."""
+  with open(eval_set_file_path, "r", encoding="utf-8") as f:
+    content = f.read()
+    try:
+      return EvalSet.model_validate_json(content)
+    except ValidationError:
+      # We assume that the eval data was specified in the old format and try
+      # to convert it to the new format.
+      return convert_eval_set_to_pydanctic_schema(
+          eval_set_id, json.loads(content)
+      )
+class LocalEvalSetsManager(EvalSetsManager):
+  """An EvalSets manager that stores eval sets locally on disk."""
+  def __init__(self, agent_dir: str):
+    self._agent_dir = agent_dir
+  @override
+  def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
+    """Returns an EvalSet identified by an app_name and eval_set_id."""
+    # Load the eval set file data
+    eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
+    return load_eval_set_from_file(eval_set_file_path, eval_set_id)
+  @override
+  def create_eval_set(self, app_name: str, eval_set_id: str):
+    """Creates an empty EvalSet given the app_name and eval_set_id."""
+    self._validate_id(id_name="Eval Set Id", id_value=eval_set_id)
+    # Define the file path
+    new_eval_set_path = self._get_eval_set_file_path(app_name, eval_set_id)
+    logger.info("Creating eval set file `%s`", new_eval_set_path)
+    if not os.path.exists(new_eval_set_path):
+      # Write the JSON string to the file
+      logger.info("Eval set file doesn't exist, we will create a new one.")
+      new_eval_set = EvalSet(
+          eval_set_id=eval_set_id,
+          name=eval_set_id,
+          eval_cases=[],
+          creation_timestamp=time.time(),
+      )
+      self._write_eval_set(new_eval_set_path, new_eval_set)
+  @override
+  def list_eval_sets(self, app_name: str) -> list[str]:
+    """Returns a list of EvalSets that belong to the given app_name."""
+    eval_set_file_path = os.path.join(self._agent_dir, app_name)
+    eval_sets = []
+    for file in os.listdir(eval_set_file_path):
+      if file.endswith(_EVAL_SET_FILE_EXTENSION):
+        eval_sets.append(
+            os.path.basename(file).removesuffix(_EVAL_SET_FILE_EXTENSION)
+        )
+    return sorted(eval_sets)
+  @override
+  def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
+    """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
+    eval_case_id = eval_case.eval_id
+    self._validate_id(id_name="Eval Case Id", id_value=eval_case_id)
+    eval_set = self.get_eval_set(app_name, eval_set_id)
+    if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]:
+      raise ValueError(
+          f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`"
+          " eval set.",
+      )
+    eval_set.eval_cases.append(eval_case)
+    eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
+    self._write_eval_set(eval_set_file_path, eval_set)
+  def _get_eval_set_file_path(self, app_name: str, eval_set_id: str) -> str:
+    return os.path.join(
+        self._agent_dir,
+        app_name,
+        eval_set_id + _EVAL_SET_FILE_EXTENSION,
+    )
+  def _validate_id(self, id_name: str, id_value: str):
+    pattern = r"^[a-zA-Z0-9_]+$"
+    if not bool(re.fullmatch(pattern, id_value)):
+      raise ValueError(
+          f"Invalid {id_name}. {id_name} should have the `{pattern}` format",
+      )
+  def _write_eval_set(self, eval_set_path: str, eval_set: EvalSet):
+    with open(eval_set_path, "w") as f:
+      f.write(eval_set.model_dump_json(indent=2))

google-adk 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

google-adk 0.4.0py3-none-any.whl → 1.0.0py3-none-any.whl