PyPI - google-adk - Versions diffs - 0.5.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

google-adk 0.5.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

google/adk/agents/base_agent.py +76 -30
google/adk/agents/base_agent.py.orig +330 -0
google/adk/agents/callback_context.py +0 -5
google/adk/agents/llm_agent.py +122 -30
google/adk/agents/loop_agent.py +1 -1
google/adk/agents/parallel_agent.py +7 -0
google/adk/agents/readonly_context.py +7 -1
google/adk/agents/run_config.py +1 -1
google/adk/agents/sequential_agent.py +31 -0
google/adk/agents/transcription_entry.py +4 -2
google/adk/artifacts/gcs_artifact_service.py +1 -1
google/adk/artifacts/in_memory_artifact_service.py +1 -1
google/adk/auth/auth_credential.py +6 -1
google/adk/auth/auth_preprocessor.py +7 -1
google/adk/auth/auth_tool.py +3 -4
google/adk/cli/agent_graph.py +5 -5
google/adk/cli/browser/index.html +2 -2
google/adk/cli/browser/{main-ULN5R5I5.js → main-QOEMUXM4.js} +44 -45
google/adk/cli/cli.py +7 -7
google/adk/cli/cli_deploy.py +7 -2
google/adk/cli/cli_eval.py +172 -99
google/adk/cli/cli_tools_click.py +147 -64
google/adk/cli/fast_api.py +330 -148
google/adk/cli/fast_api.py.orig +174 -80
google/adk/cli/utils/common.py +23 -0
google/adk/cli/utils/evals.py +83 -1
google/adk/cli/utils/logs.py +13 -5
google/adk/code_executors/__init__.py +3 -1
google/adk/code_executors/built_in_code_executor.py +52 -0
google/adk/evaluation/__init__.py +1 -1
google/adk/evaluation/agent_evaluator.py +168 -128
google/adk/evaluation/eval_case.py +102 -0
google/adk/evaluation/eval_set.py +37 -0
google/adk/evaluation/eval_sets_manager.py +42 -0
google/adk/evaluation/evaluation_generator.py +88 -113
google/adk/evaluation/evaluator.py +56 -0
google/adk/evaluation/local_eval_sets_manager.py +264 -0
google/adk/evaluation/response_evaluator.py +106 -2
google/adk/evaluation/trajectory_evaluator.py +83 -2
google/adk/events/event.py +6 -1
google/adk/events/event_actions.py +6 -1
google/adk/examples/example_util.py +3 -2
google/adk/flows/llm_flows/_code_execution.py +9 -1
google/adk/flows/llm_flows/audio_transcriber.py +4 -3
google/adk/flows/llm_flows/base_llm_flow.py +54 -15
google/adk/flows/llm_flows/functions.py +9 -8
google/adk/flows/llm_flows/instructions.py +13 -5
google/adk/flows/llm_flows/single_flow.py +1 -1
google/adk/memory/__init__.py +1 -1
google/adk/memory/_utils.py +23 -0
google/adk/memory/base_memory_service.py +23 -21
google/adk/memory/base_memory_service.py.orig +76 -0
google/adk/memory/in_memory_memory_service.py +57 -25
google/adk/memory/memory_entry.py +37 -0
google/adk/memory/vertex_ai_rag_memory_service.py +38 -15
google/adk/models/anthropic_llm.py +16 -9
google/adk/models/gemini_llm_connection.py +11 -11
google/adk/models/google_llm.py +9 -2
google/adk/models/google_llm.py.orig +305 -0
google/adk/models/lite_llm.py +77 -21
google/adk/models/llm_response.py +14 -2
google/adk/models/registry.py +1 -1
google/adk/runners.py +65 -41
google/adk/sessions/__init__.py +1 -1
google/adk/sessions/base_session_service.py +6 -33
google/adk/sessions/database_session_service.py +58 -65
google/adk/sessions/in_memory_session_service.py +106 -24
google/adk/sessions/session.py +3 -0
google/adk/sessions/vertex_ai_session_service.py +23 -45
google/adk/telemetry.py +3 -0
google/adk/tools/__init__.py +4 -7
google/adk/tools/{built_in_code_execution_tool.py → _built_in_code_execution_tool.py} +11 -0
google/adk/tools/_memory_entry_utils.py +30 -0
google/adk/tools/agent_tool.py +9 -9
google/adk/tools/apihub_tool/apihub_toolset.py +55 -74
google/adk/tools/application_integration_tool/application_integration_toolset.py +107 -85
google/adk/tools/application_integration_tool/clients/connections_client.py +20 -0
google/adk/tools/application_integration_tool/clients/integration_client.py +6 -6
google/adk/tools/application_integration_tool/integration_connector_tool.py +69 -26
google/adk/tools/base_toolset.py +58 -0
google/adk/tools/enterprise_search_tool.py +65 -0
google/adk/tools/function_parameter_parse_util.py +2 -2
google/adk/tools/google_api_tool/__init__.py +18 -70
google/adk/tools/google_api_tool/google_api_tool.py +11 -5
google/adk/tools/google_api_tool/google_api_toolset.py +126 -0
google/adk/tools/google_api_tool/google_api_toolsets.py +102 -0
google/adk/tools/google_api_tool/googleapi_to_openapi_converter.py +40 -42
google/adk/tools/langchain_tool.py +96 -49
google/adk/tools/load_memory_tool.py +14 -5
google/adk/tools/mcp_tool/__init__.py +3 -2
google/adk/tools/mcp_tool/mcp_session_manager.py +153 -16
google/adk/tools/mcp_tool/mcp_session_manager.py.orig +322 -0
google/adk/tools/mcp_tool/mcp_tool.py +12 -12
google/adk/tools/mcp_tool/mcp_toolset.py +155 -195
google/adk/tools/openapi_tool/openapi_spec_parser/openapi_toolset.py +32 -7
google/adk/tools/openapi_tool/openapi_spec_parser/operation_parser.py +31 -31
google/adk/tools/openapi_tool/openapi_spec_parser/tool_auth_handler.py +1 -1
google/adk/tools/preload_memory_tool.py +27 -18
google/adk/tools/retrieval/__init__.py +1 -1
google/adk/tools/retrieval/vertex_ai_rag_retrieval.py +1 -1
google/adk/tools/toolbox_toolset.py +79 -0
google/adk/tools/transfer_to_agent_tool.py +0 -1
google/adk/version.py +1 -1
{google_adk-0.5.0.dist-info → google_adk-1.0.0.dist-info}/METADATA +7 -5
google_adk-1.0.0.dist-info/RECORD +195 -0
google/adk/agents/remote_agent.py +0 -50
google/adk/tools/google_api_tool/google_api_tool_set.py +0 -110
google/adk/tools/google_api_tool/google_api_tool_sets.py +0 -112
google/adk/tools/toolbox_tool.py +0 -46
google_adk-0.5.0.dist-info/RECORD +0 -180
{google_adk-0.5.0.dist-info → google_adk-1.0.0.dist-info}/WHEEL +0 -0
{google_adk-0.5.0.dist-info → google_adk-1.0.0.dist-info}/entry_points.txt +0 -0
{google_adk-0.5.0.dist-info → google_adk-1.0.0.dist-info}/licenses/LICENSE +0 -0

google/adk/cli/cli.py CHANGED Viewed

@@ -55,7 +55,7 @@ async def run_input_file(
     input_file = InputFile.model_validate_json(f.read())
   input_file.state['_time'] = datetime.now()
-  session = session_service.create_session(
+  session = await session_service.create_session(
       app_name=app_name, user_id=user_id, state=input_file.state
   )
   for query in input_file.queries:
@@ -105,6 +105,7 @@ async def run_cli(
     input_file: Optional[str] = None,
     saved_session_file: Optional[str] = None,
     save_session: bool,
+    session_id: Optional[str] = None,
 ) -> None:
   """Runs an interactive CLI for a certain agent.
@@ -118,6 +119,7 @@ async def run_cli(
     saved_session_file: Optional[str], the absolute path to the json file that
       contains a previously saved session, exclusive with input_file.
     save_session: bool, whether to save the session on exit.
+    session_id: Optional[str], the session ID to save the session to on exit.
   """
   if agent_parent_dir not in sys.path:
     sys.path.append(agent_parent_dir)
@@ -128,7 +130,7 @@ async def run_cli(
   agent_module_path = os.path.join(agent_parent_dir, agent_folder_name)
   agent_module = importlib.import_module(agent_folder_name)
   user_id = 'test_user'
-  session = session_service.create_session(
+  session = await session_service.create_session(
       app_name=agent_folder_name, user_id=user_id
   )
   root_agent = agent_module.agent.root_agent
@@ -143,14 +145,12 @@ async def run_cli(
         input_path=input_file,
     )
   elif saved_session_file:
-    loaded_session = None
     with open(saved_session_file, 'r') as f:
       loaded_session = Session.model_validate_json(f.read())
     if loaded_session:
       for event in loaded_session.events:
-        session_service.append_event(session, event)
+        await session_service.append_event(session, event)
         content = event.content
         if not content or not content.parts or not content.parts[0].text:
           continue
@@ -175,11 +175,11 @@ async def run_cli(
     )
   if save_session:
-    session_id = input('Session ID to save: ')
+    session_id = session_id or input('Session ID to save: ')
     session_path = f'{agent_module_path}/{session_id}.session.json'
     # Fetch the session again to get all the details.
-    session = session_service.get_session(
+    session = await session_service.get_session(
         app_name=session.app_name,
         user_id=session.user_id,
         session_id=session.id,

google/adk/cli/cli_deploy.py CHANGED Viewed

@@ -42,7 +42,7 @@ ENV GOOGLE_CLOUD_LOCATION={gcp_region}
 # Set up environment variables - End
 # Install ADK - Start
-RUN pip install google-adk
+RUN pip install google-adk=={adk_version}
 # Install ADK - End
 # Copy agent - Start
@@ -54,7 +54,7 @@ COPY "agents/{app_name}/" "/app/agents/{app_name}/"
 EXPOSE {port}
-CMD adk {command} --port={port} {session_db_option} {trace_to_cloud_option} "/app/agents"
+CMD adk {command} --port={port} {host_option} {session_db_option} {trace_to_cloud_option} "/app/agents"
 """
@@ -86,6 +86,7 @@ def to_cloud_run(
     with_ui: bool,
     verbosity: str,
     session_db_url: str,
+    adk_version: str,
 ):
   """Deploys an agent to Google Cloud Run.
@@ -114,6 +115,7 @@ def to_cloud_run(
     with_ui: Whether to deploy with UI.
     verbosity: The verbosity level of the CLI.
     session_db_url: The database URL to connect the session.
+    adk_version: The ADK version to use in Cloud Run.
   """
   app_name = app_name or os.path.basename(agent_folder)
@@ -139,6 +141,7 @@ def to_cloud_run(
     # create Dockerfile
     click.echo('Creating Dockerfile...')
+    host_option = '--host=0.0.0.0' if adk_version > '0.5.0' else ''
     dockerfile_content = _DOCKERFILE_TEMPLATE.format(
         gcp_project_id=project,
         gcp_region=region,
@@ -150,6 +153,8 @@ def to_cloud_run(
         if session_db_url
         else '',
         trace_to_cloud_option='--trace_to_cloud' if trace_to_cloud else '',
+        adk_version=adk_version,
+        host_option=host_option,
     )
     dockerfile_path = os.path.join(temp_folder, 'Dockerfile')
     os.makedirs(temp_folder, exist_ok=True)

google/adk/cli/cli_eval.py CHANGED Viewed

@@ -12,47 +12,107 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from enum import Enum
 import importlib.util
 import json
 import logging
 import os
 import sys
-import traceback
 from typing import Any
 from typing import AsyncGenerator
 from typing import Optional
 import uuid
 from pydantic import BaseModel
+from pydantic import Field
 from ..agents import Agent
+from ..artifacts.base_artifact_service import BaseArtifactService
+from ..evaluation.eval_case import EvalCase
+from ..evaluation.eval_case import Invocation
+from ..evaluation.evaluator import EvalStatus
+from ..evaluation.evaluator import Evaluator
+from ..sessions.base_session_service import BaseSessionService
+from ..sessions.session import Session
+from .utils import common
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("google_adk." + __name__)
-class EvalStatus(Enum):
-  PASSED = 1
-  FAILED = 2
-  NOT_EVALUATED = 3
+class EvalMetric(common.BaseModel):
+  """A metric used to evaluate a particular aspect of an eval case."""
-class EvalMetric(BaseModel):
   metric_name: str
+  """The name of the metric."""
   threshold: float
+  """A threshold value. Each metric decides how to interpret this threshold."""
+class EvalMetricResult(EvalMetric):
+  """The actual computed score/value of a particular EvalMetric."""
-class EvalMetricResult(BaseModel):
-  score: Optional[float]
+  score: Optional[float] = None
   eval_status: EvalStatus
-class EvalResult(BaseModel):
-  eval_set_file: str
-  eval_id: str
+class EvalMetricResultPerInvocation(common.BaseModel):
+  """Eval metric results per invocation."""
+  actual_invocation: Invocation
+  """The actual invocation, usually obtained by inferencing the agent."""
+  expected_invocation: Invocation
+  """The expected invocation, usually the reference or golden invocation."""
+  eval_metric_results: list[EvalMetricResult] = []
+  """Eval resutls for each applicable metric."""
+class EvalCaseResult(common.BaseModel):
+  """Case-level evaluation results."""
+  eval_set_file: str = Field(
+      deprecated=True,
+      description="This field is deprecated, use eval_set_id instead.",
+  )
+  eval_set_id: str = ""
+  """The eval set id."""
+  eval_id: str = ""
+  """The eval case id."""
   final_eval_status: EvalStatus
-  eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
+  """Final evalu status for this eval case."""
+  eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
+      deprecated=True,
+      description=(
+          "This field is deprecated, use overall_eval_metric_results instead."
+      ),
+  )
+  overall_eval_metric_results: list[EvalMetricResult]
+  """Overall result for each metric for the entire eval case."""
+  eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation]
+  """Result for each metric on a per invocation basis."""
   session_id: str
+  """Session id of the session generated as result of inferencing/scraping stage of the eval."""
+  session_details: Optional[Session] = None
+  """Session generated as result of inferencing/scraping stage of the eval."""
+  user_id: Optional[str] = None
+  """User id used during inferencing/scraping stage of the eval."""
+class EvalSetResult(common.BaseModel):
+  eval_set_result_id: str
+  eval_set_result_name: str
+  eval_set_id: str
+  eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
+  creation_timestamp: float = 0.0
 MISSING_EVAL_DEPENDENCIES_MESSAGE = (
@@ -147,14 +207,25 @@ def parse_and_get_evals_to_run(
 async def run_evals(
-    eval_set_to_evals: dict[str, list[str]],
+    eval_cases_by_eval_set_id: dict[str, list[EvalCase]],
     root_agent: Agent,
     reset_func: Optional[Any],
     eval_metrics: list[EvalMetric],
-    session_service=None,
-    artifact_service=None,
-    print_detailed_results=False,
-) -> AsyncGenerator[EvalResult, None]:
+    session_service: Optional[BaseSessionService] = None,
+    artifact_service: Optional[BaseArtifactService] = None,
+) -> AsyncGenerator[EvalCaseResult, None]:
+  """Returns a stream of EvalCaseResult for each eval case that was evaluated.
+  Args:
+    eval_cases_by_eval_set_id: Eval cases categorized by eval set id to which
+      they belong.
+    root_agent: Agent to use for inferencing.
+    reset_func: If present, this will be called before invoking the agent before
+      every inferencing step.
+    eval_metrics: A list of metrics that should be used during evaluation.
+    session_service: The session service to use during inferencing.
+    artifact_service: The artifact service to use during inferencing.
+  """
   try:
     from ..evaluation.agent_evaluator import EvaluationGenerator
     from ..evaluation.response_evaluator import ResponseEvaluator
@@ -162,28 +233,19 @@ async def run_evals(
   except ModuleNotFoundError as e:
     raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e
-  """Returns a summary of eval runs."""
-  for eval_set_file, evals_to_run in eval_set_to_evals.items():
-    with open(eval_set_file, "r", encoding="utf-8") as file:
-      eval_items = json.load(file)  # Load JSON into a list
-    assert eval_items, f"No eval data found in eval set file: {eval_set_file}"
-    for eval_item in eval_items:
-      eval_name = eval_item["name"]
-      eval_data = eval_item["data"]
-      initial_session = eval_item.get("initial_session", {})
-      if evals_to_run and eval_name not in evals_to_run:
-        continue
+  for eval_set_id, eval_cases in eval_cases_by_eval_set_id.items():
+    for eval_case in eval_cases:
+      eval_name = eval_case.eval_id
+      initial_session = eval_case.session_input
+      user_id = initial_session.user_id if initial_session else "test_user_id"
       try:
-        print(f"Running Eval: {eval_set_file}:{eval_name}")
+        print(f"Running Eval: {eval_set_id}:{eval_name}")
         session_id = f"{EVAL_SESSION_ID_PREFIX}{str(uuid.uuid4())}"
-        scrape_result = (
-            await EvaluationGenerator._process_query_with_root_agent(
-                data=eval_data,
+        inference_result = (
+            await EvaluationGenerator._generate_inferences_from_root_agent(
+                invocations=eval_case.conversation,
                 root_agent=root_agent,
                 reset_func=reset_func,
                 initial_session=initial_session,
@@ -193,68 +255,74 @@ async def run_evals(
             )
         )
-        eval_metric_results = []
+        # Initialize the per-invocation metric results to an empty list.
+        # We will fill this as we evaluate each metric.
+        eval_metric_result_per_invocation = []
+        for actual, expected in zip(inference_result, eval_case.conversation):
+          eval_metric_result_per_invocation.append(
+              EvalMetricResultPerInvocation(
+                  actual_invocation=actual,
+                  expected_invocation=expected,
+                  eval_metric_results=[],
+              )
+          )
+        overall_eval_metric_results = []
         for eval_metric in eval_metrics:
-          eval_metric_result = None
-          if eval_metric.metric_name == TOOL_TRAJECTORY_SCORE_KEY:
-            score = TrajectoryEvaluator.evaluate(
-                [scrape_result], print_detailed_results=print_detailed_results
+          metric_evaluator = _get_evaluator(eval_metric)
+          evaluation_result = metric_evaluator.evaluate_invocations(
+              actual_invocations=inference_result,
+              expected_invocations=eval_case.conversation,
+          )
+          overall_eval_metric_results.append(
+              EvalMetricResult(
+                  metric_name=eval_metric.metric_name,
+                  threshold=eval_metric.threshold,
+                  score=evaluation_result.overall_score,
+                  eval_status=evaluation_result.overall_eval_status,
+              )
+          )
+          for index, per_invocation_result in enumerate(
+              evaluation_result.per_invocation_results
+          ):
+            eval_metric_result_per_invocation[index].eval_metric_results.append(
+                EvalMetricResult(
+                    metric_name=eval_metric.metric_name,
+                    threshold=eval_metric.threshold,
+                    score=per_invocation_result.score,
+                    eval_status=per_invocation_result.eval_status,
+                )
             )
-            eval_metric_result = _get_eval_metric_result(eval_metric, score)
-          elif eval_metric.metric_name == RESPONSE_MATCH_SCORE_KEY:
-            score = ResponseEvaluator.evaluate(
-                [scrape_result],
-                [RESPONSE_MATCH_SCORE_KEY],
-                print_detailed_results=print_detailed_results,
-            )
-            eval_metric_result = _get_eval_metric_result(
-                eval_metric, score["rouge_1/mean"].item()
-            )
-          elif eval_metric.metric_name == RESPONSE_EVALUATION_SCORE_KEY:
-            score = ResponseEvaluator.evaluate(
-                [scrape_result],
-                [RESPONSE_EVALUATION_SCORE_KEY],
-                print_detailed_results=print_detailed_results,
-            )
-            eval_metric_result = _get_eval_metric_result(
-                eval_metric, score["coherence/mean"].item()
-            )
-          else:
-            logger.warning("`%s` is not supported.", eval_metric.metric_name)
-            eval_metric_results.append((
-                eval_metric,
-                EvalMetricResult(eval_status=EvalStatus.NOT_EVALUATED),
-            ))
-          eval_metric_results.append((
-              eval_metric,
-              eval_metric_result,
-          ))
-          _print_eval_metric_result(eval_metric, eval_metric_result)
         final_eval_status = EvalStatus.NOT_EVALUATED
         # Go over the all the eval statuses and mark the final eval status as
         # passed if all of them pass, otherwise mark the final eval status to
         # failed.
-        for eval_metric_result in eval_metric_results:
-          eval_status = eval_metric_result[1].eval_status
-          if eval_status == EvalStatus.PASSED:
+        for overall_eval_metric_result in overall_eval_metric_results:
+          overall_eval_status = overall_eval_metric_result.eval_status
+          if overall_eval_status == EvalStatus.PASSED:
             final_eval_status = EvalStatus.PASSED
-          elif eval_status == EvalStatus.NOT_EVALUATED:
+          elif overall_eval_status == EvalStatus.NOT_EVALUATED:
             continue
-          elif eval_status == EvalStatus.FAILED:
+          elif overall_eval_status == EvalStatus.FAILED:
             final_eval_status = EvalStatus.FAILED
             break
           else:
             raise ValueError("Unknown eval status.")
-        yield EvalResult(
-            eval_set_file=eval_set_file,
+        yield EvalCaseResult(
+            eval_set_file=eval_set_id,
+            eval_set_id=eval_set_id,
             eval_id=eval_name,
             final_eval_status=final_eval_status,
-            eval_metric_results=eval_metric_results,
+            eval_metric_results=[],
+            overall_eval_metric_results=overall_eval_metric_results,
+            eval_metric_result_per_invocation=eval_metric_result_per_invocation,
             session_id=session_id,
+            user_id=user_id,
         )
         if final_eval_status == EvalStatus.PASSED:
@@ -264,21 +332,26 @@ async def run_evals(
         print(f"Result: {result}\n")
-      except Exception as e:
-        print(f"Error: {e}")
-        logger.info("Error: %s", str(traceback.format_exc()))
+      except Exception:
+        # Catching the general exception, so that we don't block other eval
+        # cases.
+        logger.exception(f"Eval failed for `{eval_set_id}:{eval_name}`")
-def _get_eval_metric_result(eval_metric, score):
-  eval_status = (
-      EvalStatus.PASSED if score >= eval_metric.threshold else EvalStatus.FAILED
-  )
-  return EvalMetricResult(score=score, eval_status=eval_status)
-def _print_eval_metric_result(eval_metric, eval_metric_result):
-  print(
-      f"Metric: {eval_metric.metric_name}\tStatus:"
-      f" {eval_metric_result.eval_status}\tScore:"
-      f" {eval_metric_result.score}\tThreshold: {eval_metric.threshold}"
-  )
+def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
+  try:
+    from ..evaluation.response_evaluator import ResponseEvaluator
+    from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
+  except ModuleNotFoundError as e:
+    raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e
+  if eval_metric.metric_name == TOOL_TRAJECTORY_SCORE_KEY:
+    return TrajectoryEvaluator(threshold=eval_metric.threshold)
+  elif (
+      eval_metric.metric_name == RESPONSE_MATCH_SCORE_KEY
+      or eval_metric.metric_name == RESPONSE_EVALUATION_SCORE_KEY
+  ):
+    return ResponseEvaluator(
+        threshold=eval_metric.threshold, metric_name=eval_metric.metric_name
+    )
+  raise ValueError(f"Unsupported eval metric: {eval_metric}")

google-adk 0.5.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

google-adk 0.5.0py3-none-any.whl → 1.0.0py3-none-any.whl