google-adk 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google/adk/a2a/converters/request_converter.py +1 -2
- google/adk/a2a/logs/log_utils.py +1 -2
- google/adk/a2a/utils/__init__.py +0 -0
- google/adk/a2a/utils/agent_card_builder.py +544 -0
- google/adk/a2a/utils/agent_to_a2a.py +118 -0
- google/adk/agents/base_agent.py +6 -1
- google/adk/agents/config_schemas/AgentConfig.json +22 -0
- google/adk/agents/live_request_queue.py +15 -0
- google/adk/agents/llm_agent.py +11 -0
- google/adk/agents/loop_agent.py +6 -1
- google/adk/agents/remote_a2a_agent.py +2 -2
- google/adk/artifacts/gcs_artifact_service.py +86 -18
- google/adk/cli/browser/index.html +2 -2
- google/adk/cli/browser/{main-SRBSE46V.js → main-W7QZBYAR.js} +139 -139
- google/adk/cli/cli_eval.py +87 -12
- google/adk/cli/cli_tools_click.py +143 -82
- google/adk/cli/fast_api.py +136 -95
- google/adk/evaluation/eval_metrics.py +4 -0
- google/adk/evaluation/eval_sets_manager.py +5 -1
- google/adk/evaluation/final_response_match_v2.py +2 -2
- google/adk/evaluation/gcs_eval_sets_manager.py +2 -1
- google/adk/evaluation/local_eval_service.py +2 -2
- google/adk/evaluation/local_eval_set_results_manager.py +2 -2
- google/adk/evaluation/local_eval_sets_manager.py +1 -1
- google/adk/evaluation/metric_evaluator_registry.py +16 -6
- google/adk/evaluation/vertex_ai_eval_facade.py +7 -1
- google/adk/events/event.py +7 -2
- google/adk/flows/llm_flows/base_llm_flow.py +25 -6
- google/adk/flows/llm_flows/functions.py +13 -19
- google/adk/memory/in_memory_memory_service.py +1 -1
- google/adk/memory/vertex_ai_memory_bank_service.py +12 -10
- google/adk/models/anthropic_llm.py +2 -1
- google/adk/models/base_llm_connection.py +2 -0
- google/adk/models/gemini_llm_connection.py +17 -6
- google/adk/models/google_llm.py +35 -5
- google/adk/models/lite_llm.py +31 -18
- google/adk/sessions/database_session_service.py +25 -24
- google/adk/sessions/vertex_ai_session_service.py +13 -5
- google/adk/tools/__init__.py +2 -0
- google/adk/tools/_automatic_function_calling_util.py +20 -2
- google/adk/tools/agent_tool.py +14 -3
- google/adk/tools/base_toolset.py +22 -0
- google/adk/tools/bigquery/metadata_tool.py +2 -0
- google/adk/tools/bigquery/query_tool.py +15 -1
- google/adk/tools/computer_use/__init__.py +13 -0
- google/adk/tools/computer_use/base_computer.py +265 -0
- google/adk/tools/computer_use/computer_use_tool.py +166 -0
- google/adk/tools/computer_use/computer_use_toolset.py +220 -0
- google/adk/tools/exit_loop_tool.py +1 -0
- google/adk/tools/langchain_tool.py +14 -3
- google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py +5 -0
- google/adk/version.py +1 -1
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/METADATA +2 -1
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/RECORD +57 -50
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/WHEEL +0 -0
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/entry_points.txt +0 -0
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/licenses/LICENSE +0 -0
google/adk/cli/cli_eval.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
17
|
import importlib.util
|
18
|
+
import inspect
|
18
19
|
import json
|
19
20
|
import logging
|
20
21
|
import os
|
@@ -24,13 +25,22 @@ from typing import AsyncGenerator
|
|
24
25
|
from typing import Optional
|
25
26
|
import uuid
|
26
27
|
|
28
|
+
from typing_extensions import deprecated
|
29
|
+
|
27
30
|
from ..agents import Agent
|
28
31
|
from ..artifacts.base_artifact_service import BaseArtifactService
|
32
|
+
from ..evaluation.base_eval_service import BaseEvalService
|
33
|
+
from ..evaluation.base_eval_service import EvaluateConfig
|
34
|
+
from ..evaluation.base_eval_service import EvaluateRequest
|
35
|
+
from ..evaluation.base_eval_service import InferenceConfig
|
36
|
+
from ..evaluation.base_eval_service import InferenceRequest
|
37
|
+
from ..evaluation.base_eval_service import InferenceResult
|
29
38
|
from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
|
30
39
|
from ..evaluation.eval_case import EvalCase
|
31
40
|
from ..evaluation.eval_metrics import EvalMetric
|
32
41
|
from ..evaluation.eval_metrics import EvalMetricResult
|
33
42
|
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
|
43
|
+
from ..evaluation.eval_metrics import JudgeModelOptions
|
34
44
|
from ..evaluation.eval_result import EvalCaseResult
|
35
45
|
from ..evaluation.evaluator import EvalStatus
|
36
46
|
from ..evaluation.evaluator import Evaluator
|
@@ -42,6 +52,7 @@ logger = logging.getLogger("google_adk." + __name__)
|
|
42
52
|
TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score"
|
43
53
|
RESPONSE_MATCH_SCORE_KEY = "response_match_score"
|
44
54
|
SAFETY_V1_KEY = "safety_v1"
|
55
|
+
FINAL_RESPONSE_MATCH_V2 = "final_response_match_v2"
|
45
56
|
# This evaluation is not very stable.
|
46
57
|
# This is always optional unless explicitly specified.
|
47
58
|
RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score"
|
@@ -107,26 +118,80 @@ def try_get_reset_func(agent_module_file_path: str) -> Any:
|
|
107
118
|
|
108
119
|
|
109
120
|
def parse_and_get_evals_to_run(
|
110
|
-
|
121
|
+
evals_to_run_info: list[str],
|
111
122
|
) -> dict[str, list[str]]:
|
112
|
-
"""Returns a dictionary of eval
|
123
|
+
"""Returns a dictionary of eval set info to evals that should be run.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
evals_to_run_info: While the structure is quite simple, a list of string,
|
127
|
+
each string actually is formatted with the following convention:
|
128
|
+
<eval_set_file_path | eval_set_id>:[comma separated eval case ids]
|
129
|
+
"""
|
113
130
|
eval_set_to_evals = {}
|
114
|
-
for input_eval_set in
|
131
|
+
for input_eval_set in evals_to_run_info:
|
115
132
|
evals = []
|
116
133
|
if ":" not in input_eval_set:
|
117
|
-
|
134
|
+
# We don't have any eval cases specified. This would be the case where the
|
135
|
+
# the user wants to run all eval cases in the eval set.
|
136
|
+
eval_set = input_eval_set
|
118
137
|
else:
|
119
|
-
|
138
|
+
# There are eval cases that we need to parse. The user wants to run
|
139
|
+
# specific eval cases from the eval set.
|
140
|
+
eval_set = input_eval_set.split(":")[0]
|
120
141
|
evals = input_eval_set.split(":")[1].split(",")
|
142
|
+
evals = [s for s in evals if s.strip()]
|
121
143
|
|
122
|
-
if
|
123
|
-
eval_set_to_evals[
|
144
|
+
if eval_set not in eval_set_to_evals:
|
145
|
+
eval_set_to_evals[eval_set] = []
|
124
146
|
|
125
|
-
eval_set_to_evals[
|
147
|
+
eval_set_to_evals[eval_set].extend(evals)
|
126
148
|
|
127
149
|
return eval_set_to_evals
|
128
150
|
|
129
151
|
|
152
|
+
async def _collect_inferences(
|
153
|
+
inference_requests: list[InferenceRequest],
|
154
|
+
eval_service: BaseEvalService,
|
155
|
+
) -> list[InferenceResult]:
|
156
|
+
"""Simple utility methods to collect inferences from an eval service.
|
157
|
+
|
158
|
+
The method is intentionally kept private to prevent general usage.
|
159
|
+
"""
|
160
|
+
inference_results = []
|
161
|
+
for inference_request in inference_requests:
|
162
|
+
async for inference_result in eval_service.perform_inference(
|
163
|
+
inference_request=inference_request
|
164
|
+
):
|
165
|
+
inference_results.append(inference_result)
|
166
|
+
return inference_results
|
167
|
+
|
168
|
+
|
169
|
+
async def _collect_eval_results(
|
170
|
+
inference_results: list[InferenceResult],
|
171
|
+
eval_service: BaseEvalService,
|
172
|
+
eval_metrics: list[EvalMetric],
|
173
|
+
) -> list[EvalCaseResult]:
|
174
|
+
"""Simple utility methods to collect eval results from an eval service.
|
175
|
+
|
176
|
+
The method is intentionally kept private to prevent general usage.
|
177
|
+
"""
|
178
|
+
eval_results = []
|
179
|
+
evaluate_request = EvaluateRequest(
|
180
|
+
inference_results=inference_results,
|
181
|
+
evaluate_config=EvaluateConfig(eval_metrics=eval_metrics),
|
182
|
+
)
|
183
|
+
async for eval_result in eval_service.evaluate(
|
184
|
+
evaluate_request=evaluate_request
|
185
|
+
):
|
186
|
+
eval_results.append(eval_result)
|
187
|
+
|
188
|
+
return eval_results
|
189
|
+
|
190
|
+
|
191
|
+
@deprecated(
|
192
|
+
"This method is deprecated and will be removed in fututre release. Please"
|
193
|
+
" use LocalEvalService to define your custom evals."
|
194
|
+
)
|
130
195
|
async def run_evals(
|
131
196
|
eval_cases_by_eval_set_id: dict[str, list[EvalCase]],
|
132
197
|
root_agent: Agent,
|
@@ -191,10 +256,16 @@ async def run_evals(
|
|
191
256
|
for eval_metric in eval_metrics:
|
192
257
|
metric_evaluator = _get_evaluator(eval_metric)
|
193
258
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
259
|
+
if inspect.iscoroutinefunction(metric_evaluator.evaluate_invocations):
|
260
|
+
evaluation_result = await metric_evaluator.evaluate_invocations(
|
261
|
+
actual_invocations=inference_result,
|
262
|
+
expected_invocations=eval_case.conversation,
|
263
|
+
)
|
264
|
+
else:
|
265
|
+
evaluation_result = metric_evaluator.evaluate_invocations(
|
266
|
+
actual_invocations=inference_result,
|
267
|
+
expected_invocations=eval_case.conversation,
|
268
|
+
)
|
198
269
|
|
199
270
|
overall_eval_metric_results.append(
|
200
271
|
EvalMetricResult(
|
@@ -260,6 +331,7 @@ async def run_evals(
|
|
260
331
|
|
261
332
|
def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
|
262
333
|
try:
|
334
|
+
from ..evaluation.final_response_match_v2 import FinalResponseMatchV2Evaluator
|
263
335
|
from ..evaluation.response_evaluator import ResponseEvaluator
|
264
336
|
from ..evaluation.safety_evaluator import SafetyEvaluatorV1
|
265
337
|
from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
|
@@ -276,5 +348,8 @@ def _get_evaluator(eval_metric: EvalMetric) -> Evaluator:
|
|
276
348
|
)
|
277
349
|
elif eval_metric.metric_name == SAFETY_V1_KEY:
|
278
350
|
return SafetyEvaluatorV1(eval_metric)
|
351
|
+
elif eval_metric.metric_name == FINAL_RESPONSE_MATCH_V2:
|
352
|
+
eval_metric.judge_model_options = JudgeModelOptions()
|
353
|
+
return FinalResponseMatchV2Evaluator(eval_metric)
|
279
354
|
|
280
355
|
raise ValueError(f"Unsupported eval metric: {eval_metric}")
|
@@ -32,10 +32,6 @@ from . import cli_create
|
|
32
32
|
from . import cli_deploy
|
33
33
|
from .. import version
|
34
34
|
from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
|
35
|
-
from ..evaluation.gcs_eval_set_results_manager import GcsEvalSetResultsManager
|
36
|
-
from ..evaluation.gcs_eval_sets_manager import GcsEvalSetsManager
|
37
|
-
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
|
38
|
-
from ..sessions.in_memory_session_service import InMemorySessionService
|
39
35
|
from .cli import run_cli
|
40
36
|
from .fast_api import get_fast_api_app
|
41
37
|
from .utils import envs
|
@@ -276,7 +272,7 @@ def cli_run(
|
|
276
272
|
exists=True, dir_okay=True, file_okay=False, resolve_path=True
|
277
273
|
),
|
278
274
|
)
|
279
|
-
@click.argument("
|
275
|
+
@click.argument("eval_set_file_path_or_id", nargs=-1)
|
280
276
|
@click.option("--config_file_path", help="Optional. The path to config file.")
|
281
277
|
@click.option(
|
282
278
|
"--print_detailed_results",
|
@@ -296,7 +292,7 @@ def cli_run(
|
|
296
292
|
)
|
297
293
|
def cli_eval(
|
298
294
|
agent_module_file_path: str,
|
299
|
-
|
295
|
+
eval_set_file_path_or_id: list[str],
|
300
296
|
config_file_path: str,
|
301
297
|
print_detailed_results: bool,
|
302
298
|
eval_storage_uri: Optional[str] = None,
|
@@ -306,20 +302,51 @@ def cli_eval(
|
|
306
302
|
AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
|
307
303
|
module by the name "agent". "agent" module contains a root_agent.
|
308
304
|
|
309
|
-
|
305
|
+
EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
|
306
|
+
eval set id.
|
310
307
|
|
308
|
+
Mixing of eval set file paths with eval set ids is not allowed.
|
309
|
+
|
310
|
+
*Eval Set File Path*
|
311
311
|
For each file, all evals will be run by default.
|
312
312
|
|
313
313
|
If you want to run only specific evals from a eval set, first create a comma
|
314
314
|
separated list of eval names and then add that as a suffix to the eval set
|
315
315
|
file name, demarcated by a `:`.
|
316
316
|
|
317
|
-
For example,
|
317
|
+
For example, we have `sample_eval_set_file.json` file that has following the
|
318
|
+
eval cases:
|
319
|
+
sample_eval_set_file.json:
|
320
|
+
|....... eval_1
|
321
|
+
|....... eval_2
|
322
|
+
|....... eval_3
|
323
|
+
|....... eval_4
|
324
|
+
|....... eval_5
|
318
325
|
|
319
326
|
sample_eval_set_file.json:eval_1,eval_2,eval_3
|
320
327
|
|
321
328
|
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
|
322
329
|
|
330
|
+
*Eval Set Id*
|
331
|
+
For each eval set, all evals will be run by default.
|
332
|
+
|
333
|
+
If you want to run only specific evals from a eval set, first create a comma
|
334
|
+
separated list of eval names and then add that as a suffix to the eval set
|
335
|
+
file name, demarcated by a `:`.
|
336
|
+
|
337
|
+
For example, we have `sample_eval_set_id` that has following the eval cases:
|
338
|
+
sample_eval_set_id:
|
339
|
+
|....... eval_1
|
340
|
+
|....... eval_2
|
341
|
+
|....... eval_3
|
342
|
+
|....... eval_4
|
343
|
+
|....... eval_5
|
344
|
+
|
345
|
+
If we did:
|
346
|
+
sample_eval_set_id:eval_1,eval_2,eval_3
|
347
|
+
|
348
|
+
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
|
349
|
+
|
323
350
|
CONFIG_FILE_PATH: The path to config file.
|
324
351
|
|
325
352
|
PRINT_DETAILED_RESULTS: Prints detailed results on the console.
|
@@ -327,17 +354,23 @@ def cli_eval(
|
|
327
354
|
envs.load_dotenv_for_agent(agent_module_file_path, ".")
|
328
355
|
|
329
356
|
try:
|
357
|
+
from ..evaluation.base_eval_service import InferenceConfig
|
358
|
+
from ..evaluation.base_eval_service import InferenceRequest
|
359
|
+
from ..evaluation.eval_metrics import EvalMetric
|
360
|
+
from ..evaluation.eval_result import EvalCaseResult
|
361
|
+
from ..evaluation.evaluator import EvalStatus
|
362
|
+
from ..evaluation.in_memory_eval_sets_manager import InMemoryEvalSetsManager
|
363
|
+
from ..evaluation.local_eval_service import LocalEvalService
|
364
|
+
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
|
330
365
|
from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
|
331
|
-
from .
|
332
|
-
from .cli_eval import
|
333
|
-
from .cli_eval import
|
366
|
+
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
|
367
|
+
from .cli_eval import _collect_eval_results
|
368
|
+
from .cli_eval import _collect_inferences
|
334
369
|
from .cli_eval import get_evaluation_criteria_or_default
|
335
370
|
from .cli_eval import get_root_agent
|
336
371
|
from .cli_eval import parse_and_get_evals_to_run
|
337
|
-
|
338
|
-
|
339
|
-
except ModuleNotFoundError:
|
340
|
-
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
|
372
|
+
except ModuleNotFoundError as mnf:
|
373
|
+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
|
341
374
|
|
342
375
|
evaluation_criteria = get_evaluation_criteria_or_default(config_file_path)
|
343
376
|
eval_metrics = []
|
@@ -349,81 +382,104 @@ def cli_eval(
|
|
349
382
|
print(f"Using evaluation criteria: {evaluation_criteria}")
|
350
383
|
|
351
384
|
root_agent = get_root_agent(agent_module_file_path)
|
352
|
-
|
353
|
-
|
354
|
-
|
385
|
+
app_name = os.path.basename(agent_module_file_path)
|
386
|
+
agents_dir = os.path.dirname(agent_module_file_path)
|
387
|
+
eval_sets_manager = None
|
355
388
|
eval_set_results_manager = None
|
389
|
+
|
356
390
|
if eval_storage_uri:
|
357
391
|
gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
|
358
392
|
eval_storage_uri
|
359
393
|
)
|
360
|
-
|
394
|
+
eval_sets_manager = gcs_eval_managers.eval_sets_manager
|
361
395
|
eval_set_results_manager = gcs_eval_managers.eval_set_results_manager
|
362
396
|
else:
|
363
|
-
eval_set_results_manager = LocalEvalSetResultsManager(
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
)
|
375
|
-
|
397
|
+
eval_set_results_manager = LocalEvalSetResultsManager(agents_dir=agents_dir)
|
398
|
+
|
399
|
+
inference_requests = []
|
400
|
+
eval_set_file_or_id_to_evals = parse_and_get_evals_to_run(
|
401
|
+
eval_set_file_path_or_id
|
402
|
+
)
|
403
|
+
|
404
|
+
# Check if the first entry is a file that exists, if it does then we assume
|
405
|
+
# rest of the entries are also files. We enforce this assumption in the if
|
406
|
+
# block.
|
407
|
+
if eval_set_file_or_id_to_evals and os.path.exists(
|
408
|
+
list(eval_set_file_or_id_to_evals.keys())[0]
|
409
|
+
):
|
410
|
+
eval_sets_manager = InMemoryEvalSetsManager()
|
411
|
+
|
412
|
+
# Read the eval_set files and get the cases.
|
413
|
+
for (
|
414
|
+
eval_set_file_path,
|
415
|
+
eval_case_ids,
|
416
|
+
) in eval_set_file_or_id_to_evals.items():
|
417
|
+
try:
|
418
|
+
eval_set = load_eval_set_from_file(
|
419
|
+
eval_set_file_path, eval_set_file_path
|
420
|
+
)
|
421
|
+
except FileNotFoundError as fne:
|
376
422
|
raise click.ClickException(
|
377
|
-
f"
|
423
|
+
f"`{eval_set_file_path}` should be a valid eval set file."
|
424
|
+
) from fne
|
425
|
+
|
426
|
+
eval_sets_manager.create_eval_set(
|
427
|
+
app_name=app_name, eval_set_id=eval_set.eval_set_id
|
428
|
+
)
|
429
|
+
for eval_case in eval_set.eval_cases:
|
430
|
+
eval_sets_manager.add_eval_case(
|
431
|
+
app_name=app_name,
|
432
|
+
eval_set_id=eval_set.eval_set_id,
|
433
|
+
eval_case=eval_case,
|
378
434
|
)
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
user_id=eval_case_result.user_id,
|
404
|
-
session_id=eval_case_result.session_id,
|
435
|
+
inference_requests.append(
|
436
|
+
InferenceRequest(
|
437
|
+
app_name=app_name,
|
438
|
+
eval_set_id=eval_set.eval_set_id,
|
439
|
+
eval_case_ids=eval_case_ids,
|
440
|
+
inference_config=InferenceConfig(),
|
441
|
+
)
|
442
|
+
)
|
443
|
+
else:
|
444
|
+
# We assume that what we have are eval set ids instead.
|
445
|
+
eval_sets_manager = (
|
446
|
+
eval_sets_manager
|
447
|
+
if eval_storage_uri
|
448
|
+
else LocalEvalSetsManager(agents_dir=agents_dir)
|
449
|
+
)
|
450
|
+
|
451
|
+
for eval_set_id_key, eval_case_ids in eval_set_file_or_id_to_evals.items():
|
452
|
+
inference_requests.append(
|
453
|
+
InferenceRequest(
|
454
|
+
app_name=app_name,
|
455
|
+
eval_set_id=eval_set_id_key,
|
456
|
+
eval_case_ids=eval_case_ids,
|
457
|
+
inference_config=InferenceConfig(),
|
458
|
+
)
|
405
459
|
)
|
406
|
-
eval_case_results.append(eval_case_result)
|
407
|
-
return eval_case_results
|
408
460
|
|
409
461
|
try:
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
# Write eval set results.
|
415
|
-
eval_set_id_to_eval_results = collections.defaultdict(list)
|
416
|
-
for eval_case_result in eval_results:
|
417
|
-
eval_set_id = eval_case_result.eval_set_id
|
418
|
-
eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
|
419
|
-
|
420
|
-
for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
|
421
|
-
eval_set_results_manager.save_eval_set_result(
|
422
|
-
app_name=os.path.basename(agent_module_file_path),
|
423
|
-
eval_set_id=eval_set_id,
|
424
|
-
eval_case_results=eval_case_results,
|
462
|
+
eval_service = LocalEvalService(
|
463
|
+
root_agent=root_agent,
|
464
|
+
eval_sets_manager=eval_sets_manager,
|
465
|
+
eval_set_results_manager=eval_set_results_manager,
|
425
466
|
)
|
426
467
|
|
468
|
+
inference_results = asyncio.run(
|
469
|
+
_collect_inferences(
|
470
|
+
inference_requests=inference_requests, eval_service=eval_service
|
471
|
+
)
|
472
|
+
)
|
473
|
+
eval_results = asyncio.run(
|
474
|
+
_collect_eval_results(
|
475
|
+
inference_results=inference_results,
|
476
|
+
eval_service=eval_service,
|
477
|
+
eval_metrics=eval_metrics,
|
478
|
+
)
|
479
|
+
)
|
480
|
+
except ModuleNotFoundError as mnf:
|
481
|
+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
|
482
|
+
|
427
483
|
print("*********************************************************************")
|
428
484
|
eval_run_summary = {}
|
429
485
|
|
@@ -461,7 +517,10 @@ def adk_services_options():
|
|
461
517
|
"--session_service_uri",
|
462
518
|
help=(
|
463
519
|
"""Optional. The URI of the session service.
|
464
|
-
- Use 'agentengine://<
|
520
|
+
- Use 'agentengine://<agent_engine>' to connect to Agent Engine
|
521
|
+
sessions. <agent_engine> can either be the full qualified resource
|
522
|
+
name 'projects/abc/locations/us-central1/reasoningEngines/123' or
|
523
|
+
the resource id '123'.
|
465
524
|
- Use 'sqlite://<path_to_sqlite_file>' to connect to a SQLite DB.
|
466
525
|
- See https://docs.sqlalchemy.org/en/20/core/engines.html#backend-specific-urls for more details on supported database URIs."""
|
467
526
|
),
|
@@ -487,11 +546,12 @@ def adk_services_options():
|
|
487
546
|
@click.option(
|
488
547
|
"--memory_service_uri",
|
489
548
|
type=str,
|
490
|
-
help=(
|
491
|
-
"""Optional. The URI of the memory service.
|
549
|
+
help=("""Optional. The URI of the memory service.
|
492
550
|
- Use 'rag://<rag_corpus_id>' to connect to Vertex AI Rag Memory Service.
|
493
|
-
- Use 'agentengine://<
|
494
|
-
|
551
|
+
- Use 'agentengine://<agent_engine>' to connect to Agent Engine
|
552
|
+
sessions. <agent_engine> can either be the full qualified resource
|
553
|
+
name 'projects/abc/locations/us-central1/reasoningEngines/123' or
|
554
|
+
the resource id '123'."""),
|
495
555
|
default=None,
|
496
556
|
)
|
497
557
|
@functools.wraps(func)
|
@@ -1021,7 +1081,8 @@ def cli_deploy_agent_engine(
|
|
1021
1081
|
Example:
|
1022
1082
|
|
1023
1083
|
adk deploy agent_engine --project=[project] --region=[region]
|
1024
|
-
--staging_bucket=[staging_bucket] --display_name=[app_name]
|
1084
|
+
--staging_bucket=[staging_bucket] --display_name=[app_name]
|
1085
|
+
path/to/my_agent
|
1025
1086
|
"""
|
1026
1087
|
try:
|
1027
1088
|
cli_deploy.to_agent_engine(
|