PyPI - google-adk - Versions diffs - 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

google-adk 1.7.0py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

google/adk/a2a/converters/request_converter.py +1 -2
google/adk/a2a/logs/log_utils.py +1 -2
google/adk/a2a/utils/__init__.py +0 -0
google/adk/a2a/utils/agent_card_builder.py +544 -0
google/adk/a2a/utils/agent_to_a2a.py +118 -0
google/adk/agents/base_agent.py +6 -1
google/adk/agents/config_schemas/AgentConfig.json +22 -0
google/adk/agents/live_request_queue.py +15 -0
google/adk/agents/llm_agent.py +11 -0
google/adk/agents/loop_agent.py +6 -1
google/adk/agents/remote_a2a_agent.py +2 -2
google/adk/artifacts/gcs_artifact_service.py +86 -18
google/adk/cli/browser/index.html +2 -2
google/adk/cli/browser/{main-SRBSE46V.js → main-W7QZBYAR.js} +139 -139
google/adk/cli/cli_eval.py +87 -12
google/adk/cli/cli_tools_click.py +143 -82
google/adk/cli/fast_api.py +136 -95
google/adk/evaluation/eval_metrics.py +4 -0
google/adk/evaluation/eval_sets_manager.py +5 -1
google/adk/evaluation/final_response_match_v2.py +2 -2
google/adk/evaluation/gcs_eval_sets_manager.py +2 -1
google/adk/evaluation/local_eval_service.py +2 -2
google/adk/evaluation/local_eval_set_results_manager.py +2 -2
google/adk/evaluation/local_eval_sets_manager.py +1 -1
google/adk/evaluation/metric_evaluator_registry.py +16 -6
google/adk/evaluation/vertex_ai_eval_facade.py +7 -1
google/adk/events/event.py +7 -2
google/adk/flows/llm_flows/base_llm_flow.py +25 -6
google/adk/flows/llm_flows/functions.py +13 -19
google/adk/memory/in_memory_memory_service.py +1 -1
google/adk/memory/vertex_ai_memory_bank_service.py +12 -10
google/adk/models/anthropic_llm.py +2 -1
google/adk/models/base_llm_connection.py +2 -0
google/adk/models/gemini_llm_connection.py +17 -6
google/adk/models/google_llm.py +35 -5
google/adk/models/lite_llm.py +31 -18
google/adk/sessions/database_session_service.py +25 -24
google/adk/sessions/vertex_ai_session_service.py +13 -5
google/adk/tools/__init__.py +2 -0
google/adk/tools/_automatic_function_calling_util.py +20 -2
google/adk/tools/agent_tool.py +14 -3
google/adk/tools/base_toolset.py +22 -0
google/adk/tools/bigquery/metadata_tool.py +2 -0
google/adk/tools/bigquery/query_tool.py +15 -1
google/adk/tools/computer_use/__init__.py +13 -0
google/adk/tools/computer_use/base_computer.py +265 -0
google/adk/tools/computer_use/computer_use_tool.py +166 -0
google/adk/tools/computer_use/computer_use_toolset.py +220 -0
google/adk/tools/exit_loop_tool.py +1 -0
google/adk/tools/langchain_tool.py +14 -3
google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py +5 -0
google/adk/version.py +1 -1
{google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/METADATA +2 -1
{google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/RECORD +57 -50
{google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/WHEEL +0 -0
{google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/entry_points.txt +0 -0
{google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/licenses/LICENSE +0 -0

google/adk/cli/cli_eval.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from __future__ import annotations
 import importlib.util
+import inspect
 import json
 import logging
 import os
@@ -24,13 +25,22 @@ from typing import AsyncGenerator
 from typing import Optional
 import uuid
+from typing_extensions import deprecated
 from ..agents import Agent
 from ..artifacts.base_artifact_service import BaseArtifactService
+from ..evaluation.base_eval_service import BaseEvalService
+from ..evaluation.base_eval_service import EvaluateConfig
+from ..evaluation.base_eval_service import EvaluateRequest
+from ..evaluation.base_eval_service import InferenceConfig
+from ..evaluation.base_eval_service import InferenceRequest
+from ..evaluation.base_eval_service import InferenceResult
 from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
 from ..evaluation.eval_case import EvalCase
 from ..evaluation.eval_metrics import EvalMetric
 from ..evaluation.eval_metrics import EvalMetricResult
 from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
+from ..evaluation.eval_metrics import JudgeModelOptions
 from ..evaluation.eval_result import EvalCaseResult
 from ..evaluation.evaluator import EvalStatus
 from ..evaluation.evaluator import Evaluator
@@ -42,6 +52,7 @@ logger = logging.getLogger("google_adk." + __name__)
 TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score"
 RESPONSE_MATCH_SCORE_KEY = "response_match_score"
 SAFETY_V1_KEY = "safety_v1"
+FINAL_RESPONSE_MATCH_V2 = "final_response_match_v2"
 # This evaluation is not very stable.
 # This is always optional unless explicitly specified.
 RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score"
@@ -107,26 +118,80 @@ def try_get_reset_func(agent_module_file_path: str) -> Any:
 def parse_and_get_evals_to_run(
-    eval_set_file_path: tuple[str],
+    evals_to_run_info: list[str],
 ) -> dict[str, list[str]]:
-  """Returns a dictionary of eval sets to evals that should be run."""
+  """Returns a dictionary of eval set info to evals that should be run.
+  Args:
+    evals_to_run_info: While the structure is quite simple, a list of string,
+      each string actually is formatted with the following convention:
+      <eval_set_file_path | eval_set_id>:[comma separated eval case ids]
+  """
   eval_set_to_evals = {}
-  for input_eval_set in eval_set_file_path:
+  for input_eval_set in evals_to_run_info:
     evals = []
     if ":" not in input_eval_set:
-      eval_set_file = input_eval_set
+      # We don't have any eval cases specified. This would be the case where the
+      # the user wants to run all eval cases in the eval set.
+      eval_set = input_eval_set
     else:
-      eval_set_file = input_eval_set.split(":")[0]
+      # There are eval cases that we need to parse. The user wants to run
+      # specific eval cases from the eval set.
+      eval_set = input_eval_set.split(":")[0]
       evals = input_eval_set.split(":")[1].split(",")
+      evals = [s for s in evals if s.strip()]
-    if eval_set_file not in eval_set_to_evals:
-      eval_set_to_evals[eval_set_file] = []
+    if eval_set not in eval_set_to_evals:
+      eval_set_to_evals[eval_set] = []
-    eval_set_to_evals[eval_set_file].extend(evals)
+    eval_set_to_evals[eval_set].extend(evals)
   return eval_set_to_evals
+async def _collect_inferences(
+    inference_requests: list[InferenceRequest],
+    eval_service: BaseEvalService,
+) -> list[InferenceResult]:
+  """Simple utility methods to collect inferences from an eval service.
+  The method is intentionally kept private to prevent general usage.
+  """
+  inference_results = []
+  for inference_request in inference_requests:
+    async for inference_result in eval_service.perform_inference(
+        inference_request=inference_request
+    ):
+      inference_results.append(inference_result)
+  return inference_results
+async def _collect_eval_results(
+    inference_results: list[InferenceResult],
+    eval_service: BaseEvalService,
+    eval_metrics: list[EvalMetric],
+) -> list[EvalCaseResult]:
+  """Simple utility methods to collect eval results from an eval service.
+  The method is intentionally kept private to prevent general usage.
+  """
+  eval_results = []
+  evaluate_request = EvaluateRequest(
+      inference_results=inference_results,
+      evaluate_config=EvaluateConfig(eval_metrics=eval_metrics),
+  )
+  async for eval_result in eval_service.evaluate(
+      evaluate_request=evaluate_request
+  ):
+    eval_results.append(eval_result)
+  return eval_results
+@deprecated(
+    "This method is deprecated and will be removed in fututre release. Please"
+    " use LocalEvalService to define your custom evals."
+)
 async def run_evals(
     eval_cases_by_eval_set_id: dict[str, list[EvalCase]],
     root_agent: Agent,
@@ -191,10 +256,16 @@ async def run_evals(
         for eval_metric in eval_metrics:
           metric_evaluator = _get_evaluator(eval_metric)
-          evaluation_result = metric_evaluator.evaluate_invocations(
-              actual_invocations=inference_result,
-              expected_invocations=eval_case.conversation,
-          )
+          if inspect.iscoroutinefunction(metric_evaluator.evaluate_invocations):
+            evaluation_result = await metric_evaluator.evaluate_invocations(
+                actual_invocations=inference_result,
+                expected_invocations=eval_case.conversation,
+            )
+          else:
+            evaluation_result = metric_evaluator.evaluate_invocations(
+                actual_invocations=inference_result,
+                expected_invocations=eval_case.conversation,
+            )
           overall_eval_metric_results.append(
               EvalMetricResult(
@@ -260,6 +331,7 @@ async def run_evals(
 def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
   try:
+    from ..evaluation.final_response_match_v2 import FinalResponseMatchV2Evaluator
     from ..evaluation.response_evaluator import ResponseEvaluator
     from ..evaluation.safety_evaluator import SafetyEvaluatorV1
     from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
@@ -276,5 +348,8 @@ def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
     )
   elif eval_metric.metric_name == SAFETY_V1_KEY:
     return SafetyEvaluatorV1(eval_metric)
+  elif eval_metric.metric_name == FINAL_RESPONSE_MATCH_V2:
+    eval_metric.judge_model_options = JudgeModelOptions()
+    return FinalResponseMatchV2Evaluator(eval_metric)
   raise ValueError(f"Unsupported eval metric: {eval_metric}")

google/adk/cli/cli_tools_click.py CHANGED Viewed

@@ -32,10 +32,6 @@ from . import cli_create
 from . import cli_deploy
 from .. import version
 from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
-from ..evaluation.gcs_eval_set_results_manager import GcsEvalSetResultsManager
-from ..evaluation.gcs_eval_sets_manager import GcsEvalSetsManager
-from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
-from ..sessions.in_memory_session_service import InMemorySessionService
 from .cli import run_cli
 from .fast_api import get_fast_api_app
 from .utils import envs
@@ -276,7 +272,7 @@ def cli_run(
         exists=True, dir_okay=True, file_okay=False, resolve_path=True
     ),
 )
-@click.argument("eval_set_file_path", nargs=-1)
+@click.argument("eval_set_file_path_or_id", nargs=-1)
 @click.option("--config_file_path", help="Optional. The path to config file.")
 @click.option(
     "--print_detailed_results",
@@ -296,7 +292,7 @@ def cli_run(
 )
 def cli_eval(
     agent_module_file_path: str,
-    eval_set_file_path: list[str],
+    eval_set_file_path_or_id: list[str],
     config_file_path: str,
     print_detailed_results: bool,
     eval_storage_uri: Optional[str] = None,
@@ -306,20 +302,51 @@ def cli_eval(
   AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
   module by the name "agent". "agent" module contains a root_agent.
-  EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
+  EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
+  eval set id.
+  Mixing of eval set file paths with eval set ids is not allowed.
+  *Eval Set File Path*
   For each file, all evals will be run by default.
   If you want to run only specific evals from a eval set, first create a comma
   separated list of eval names and then add that as a suffix to the eval set
   file name, demarcated by a `:`.
-  For example,
+  For example, we have `sample_eval_set_file.json` file that has following the
+  eval cases:
+  sample_eval_set_file.json:
+    |....... eval_1
+    |....... eval_2
+    |....... eval_3
+    |....... eval_4
+    |....... eval_5
   sample_eval_set_file.json:eval_1,eval_2,eval_3
   This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
+  *Eval Set Id*
+  For each eval set, all evals will be run by default.
+  If you want to run only specific evals from a eval set, first create a comma
+  separated list of eval names and then add that as a suffix to the eval set
+  file name, demarcated by a `:`.
+  For example, we have `sample_eval_set_id` that has following the eval cases:
+  sample_eval_set_id:
+    |....... eval_1
+    |....... eval_2
+    |....... eval_3
+    |....... eval_4
+    |....... eval_5
+  If we did:
+      sample_eval_set_id:eval_1,eval_2,eval_3
+  This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
   CONFIG_FILE_PATH: The path to config file.
   PRINT_DETAILED_RESULTS: Prints detailed results on the console.
@@ -327,17 +354,23 @@ def cli_eval(
   envs.load_dotenv_for_agent(agent_module_file_path, ".")
   try:
+    from ..evaluation.base_eval_service import InferenceConfig
+    from ..evaluation.base_eval_service import InferenceRequest
+    from ..evaluation.eval_metrics import EvalMetric
+    from ..evaluation.eval_result import EvalCaseResult
+    from ..evaluation.evaluator import EvalStatus
+    from ..evaluation.in_memory_eval_sets_manager import InMemoryEvalSetsManager
+    from ..evaluation.local_eval_service import LocalEvalService
+    from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
     from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
-    from .cli_eval import EvalCaseResult
-    from .cli_eval import EvalMetric
-    from .cli_eval import EvalStatus
+    from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
+    from .cli_eval import _collect_eval_results
+    from .cli_eval import _collect_inferences
     from .cli_eval import get_evaluation_criteria_or_default
     from .cli_eval import get_root_agent
     from .cli_eval import parse_and_get_evals_to_run
-    from .cli_eval import run_evals
-    from .cli_eval import try_get_reset_func
-  except ModuleNotFoundError:
-    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
+  except ModuleNotFoundError as mnf:
+    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
   evaluation_criteria = get_evaluation_criteria_or_default(config_file_path)
   eval_metrics = []
@@ -349,81 +382,104 @@ def cli_eval(
   print(f"Using evaluation criteria: {evaluation_criteria}")
   root_agent = get_root_agent(agent_module_file_path)
-  reset_func = try_get_reset_func(agent_module_file_path)
-  gcs_eval_sets_manager = None
+  app_name = os.path.basename(agent_module_file_path)
+  agents_dir = os.path.dirname(agent_module_file_path)
+  eval_sets_manager = None
   eval_set_results_manager = None
   if eval_storage_uri:
     gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
         eval_storage_uri
     )
-    gcs_eval_sets_manager = gcs_eval_managers.eval_sets_manager
+    eval_sets_manager = gcs_eval_managers.eval_sets_manager
     eval_set_results_manager = gcs_eval_managers.eval_set_results_manager
   else:
-    eval_set_results_manager = LocalEvalSetResultsManager(
-        agents_dir=os.path.dirname(agent_module_file_path)
-    )
-  eval_set_file_path_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
-  eval_set_id_to_eval_cases = {}
-  # Read the eval_set files and get the cases.
-  for eval_set_file_path, eval_case_ids in eval_set_file_path_to_evals.items():
-    if gcs_eval_sets_manager:
-      eval_set = gcs_eval_sets_manager._load_eval_set_from_blob(
-          eval_set_file_path
-      )
-      if not eval_set:
+    eval_set_results_manager = LocalEvalSetResultsManager(agents_dir=agents_dir)
+  inference_requests = []
+  eval_set_file_or_id_to_evals = parse_and_get_evals_to_run(
+      eval_set_file_path_or_id
+  )
+  # Check if the first entry is a file that exists, if it does then we assume
+  # rest of the entries are also files. We enforce this assumption in the if
+  # block.
+  if eval_set_file_or_id_to_evals and os.path.exists(
+      list(eval_set_file_or_id_to_evals.keys())[0]
+  ):
+    eval_sets_manager = InMemoryEvalSetsManager()
+    # Read the eval_set files and get the cases.
+    for (
+        eval_set_file_path,
+        eval_case_ids,
+    ) in eval_set_file_or_id_to_evals.items():
+      try:
+        eval_set = load_eval_set_from_file(
+            eval_set_file_path, eval_set_file_path
+        )
+      except FileNotFoundError as fne:
         raise click.ClickException(
-            f"Eval set {eval_set_file_path} not found in GCS."
+            f"`{eval_set_file_path}` should be a valid eval set file."
+        ) from fne
+      eval_sets_manager.create_eval_set(
+          app_name=app_name, eval_set_id=eval_set.eval_set_id
+      )
+      for eval_case in eval_set.eval_cases:
+        eval_sets_manager.add_eval_case(
+            app_name=app_name,
+            eval_set_id=eval_set.eval_set_id,
+            eval_case=eval_case,
         )
-    else:
-      eval_set = load_eval_set_from_file(eval_set_file_path, eval_set_file_path)
-    eval_cases = eval_set.eval_cases
-    if eval_case_ids:
-      # There are eval_ids that we should select.
-      eval_cases = [
-          e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
-      ]
-    eval_set_id_to_eval_cases[eval_set.eval_set_id] = eval_cases
-  async def _collect_eval_results() -> list[EvalCaseResult]:
-    session_service = InMemorySessionService()
-    eval_case_results = []
-    async for eval_case_result in run_evals(
-        eval_set_id_to_eval_cases,
-        root_agent,
-        reset_func,
-        eval_metrics,
-        session_service=session_service,
-    ):
-      eval_case_result.session_details = await session_service.get_session(
-          app_name=os.path.basename(agent_module_file_path),
-          user_id=eval_case_result.user_id,
-          session_id=eval_case_result.session_id,
+      inference_requests.append(
+          InferenceRequest(
+              app_name=app_name,
+              eval_set_id=eval_set.eval_set_id,
+              eval_case_ids=eval_case_ids,
+              inference_config=InferenceConfig(),
+          )
+      )
+  else:
+    # We assume that what we have are eval set ids instead.
+    eval_sets_manager = (
+        eval_sets_manager
+        if eval_storage_uri
+        else LocalEvalSetsManager(agents_dir=agents_dir)
+    )
+    for eval_set_id_key, eval_case_ids in eval_set_file_or_id_to_evals.items():
+      inference_requests.append(
+          InferenceRequest(
+              app_name=app_name,
+              eval_set_id=eval_set_id_key,
+              eval_case_ids=eval_case_ids,
+              inference_config=InferenceConfig(),
+          )
       )
-      eval_case_results.append(eval_case_result)
-    return eval_case_results
   try:
-    eval_results = asyncio.run(_collect_eval_results())
-  except ModuleNotFoundError:
-    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
-  # Write eval set results.
-  eval_set_id_to_eval_results = collections.defaultdict(list)
-  for eval_case_result in eval_results:
-    eval_set_id = eval_case_result.eval_set_id
-    eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
-  for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
-    eval_set_results_manager.save_eval_set_result(
-        app_name=os.path.basename(agent_module_file_path),
-        eval_set_id=eval_set_id,
-        eval_case_results=eval_case_results,
+    eval_service = LocalEvalService(
+        root_agent=root_agent,
+        eval_sets_manager=eval_sets_manager,
+        eval_set_results_manager=eval_set_results_manager,
     )
+    inference_results = asyncio.run(
+        _collect_inferences(
+            inference_requests=inference_requests, eval_service=eval_service
+        )
+    )
+    eval_results = asyncio.run(
+        _collect_eval_results(
+            inference_results=inference_results,
+            eval_service=eval_service,
+            eval_metrics=eval_metrics,
+        )
+    )
+  except ModuleNotFoundError as mnf:
+    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
   print("*********************************************************************")
   eval_run_summary = {}
@@ -461,7 +517,10 @@ def adk_services_options():
         "--session_service_uri",
         help=(
             """Optional. The URI of the session service.
-          - Use 'agentengine://<agent_engine_resource_id>' to connect to Agent Engine sessions.
+          - Use 'agentengine://<agent_engine>' to connect to Agent Engine
+            sessions. <agent_engine> can either be the full qualified resource
+            name 'projects/abc/locations/us-central1/reasoningEngines/123' or
+            the resource id '123'.
           - Use 'sqlite://<path_to_sqlite_file>' to connect to a SQLite DB.
           - See https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls for more details on supported database URIs."""
         ),
@@ -487,11 +546,12 @@ def adk_services_options():
     @click.option(
         "--memory_service_uri",
         type=str,
-        help=(
-            """Optional. The URI of the memory service.
+        help=("""Optional. The URI of the memory service.
             - Use 'rag://<rag_corpus_id>' to connect to Vertex AI Rag Memory Service.
-            - Use 'agentengine://<agent_engine_resource_id>' to connect to Vertex AI Memory Bank Service. e.g. agentengine://12345"""
-        ),
+            - Use 'agentengine://<agent_engine>' to connect to Agent Engine
+              sessions. <agent_engine> can either be the full qualified resource
+              name 'projects/abc/locations/us-central1/reasoningEngines/123' or
+              the resource id '123'."""),
         default=None,
     )
     @functools.wraps(func)
@@ -1021,7 +1081,8 @@ def cli_deploy_agent_engine(
   Example:
     adk deploy agent_engine --project=[project] --region=[region]
-      --staging_bucket=[staging_bucket] --display_name=[app_name] path/to/my_agent
+      --staging_bucket=[staging_bucket] --display_name=[app_name]
+      path/to/my_agent
   """
   try:
     cli_deploy.to_agent_engine(

google-adk 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

google-adk 1.7.0py3-none-any.whl → 1.8.0py3-none-any.whl