agentevals-cli 0.9.3__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/Dockerfile +1 -1
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/PKG-INFO +4 -2
- agentevals_cli-0.9.4/charts/agentevals/templates/rbac.yaml +33 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/values.yaml +14 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/adk/run.py +1 -1
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/pyproject.toml +8 -1
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/builtin_metrics.py +77 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/config.py +8 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/custom_evaluators.py +2 -0
- agentevals_cli-0.9.4/src/agentevals/resolvers/__init__.py +167 -0
- agentevals_cli-0.9.4/src/agentevals/resolvers/kubernetes.py +62 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/worker.py +10 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/models.py +9 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/test_live_agents.py +7 -0
- agentevals_cli-0.9.4/tests/resolvers/test_kubernetes.py +63 -0
- agentevals_cli-0.9.4/tests/resolvers/test_registry.py +145 -0
- agentevals_cli-0.9.4/tests/storage/__init__.py +0 -0
- agentevals_cli-0.9.4/tests/test_credential_injection.py +122 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_mcp_server.py +2 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/uv.lock +176 -802
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.dockerignore +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/workflows/ci.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/workflows/publish-evaluator-sdk.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.github/workflows/release.yml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.gitignore +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/.mcp.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/LICENSE +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/Makefile +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/Chart.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/NOTES.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/_helpers.tpl +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/deployment.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/postgresql-secret.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/postgresql.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/service.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/charts/agentevals/templates/serviceaccount.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/custom-evaluators.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/otel-compatibility.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/docs/streaming.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_evaluators/eval_config.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_evaluators/eval_config_openai_eval.yaml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_sink/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_sink/agentevals_example_custom_sink/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_sink/agentevals_example_custom_sink/sink.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/custom_sink/pyproject.toml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/kubernetes/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/ollama/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/ollama/run.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/openai-agents/run.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/flake.lock +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/flake.nix +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/helm.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/helm_2.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/helm_3.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/k8s.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/samples/tempo_export_with_batches.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_static/assets/index-RIquRPno.js +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_static/index.html +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/app.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/debug_routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/dependencies.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/models.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/otlp_app.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/otlp_grpc.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/otlp_processing.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/otlp_routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/runs_routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/api/streaming_routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/cli.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/converter.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/eval_config_loader.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/extraction.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/genai_converter.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/loader/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/loader/auto.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/loader/otlp.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/mcp_server.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/openai_eval_backend.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/output.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/fetcher.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/result_builder.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/service.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/run/sinks.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/runner.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/config.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/postgres/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/postgres/migrations/000001_init.down.sql +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/postgres/migrations/000001_init.up.sql +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/postgres/migrator.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/postgres/pool.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/repos/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/repos/memory.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/storage/repos/postgres.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/trace_metrics.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/api/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/api/test_evaluate_persistence.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/api/test_runs_routes.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/conftest.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/test_otlp_grpc_receiver.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.9.3/tests/run → agentevals_cli-0.9.4/tests/resolvers}/__init__.py +0 -0
- {agentevals_cli-0.9.3/tests/storage → agentevals_cli-0.9.4/tests/run}/__init__.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/run/test_fetcher.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/run/test_result_builder.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/run/test_service.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/run/test_sinks.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/storage/test_config.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/storage/test_memory_repos.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/storage/test_migrator.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/storage/test_models.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_api.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_cli.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_converter.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_eval_config_loader.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_extraction.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_loader_auto.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_openai_eval_backend.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_otlp_loader.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_otlp_receiver.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_output.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_runner.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/tests/test_trace_metrics.py +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/.gitignore +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/README.md +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/index.html +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/package-lock.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/package.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/App.css +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/api/client.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/TraceCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/InspectorView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/sidebar/Sidebar.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/MetricSelector.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/upload/UploadView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/config.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/context/TraceProvider.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/index.css +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/eval-config.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/evalset-builder.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/trace-helpers.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/trace-loader.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/trace-metadata.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/trace-patcher.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/types.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.9.3 → agentevals_cli-0.9.4}/ui/vite.config.ts +0 -0
|
@@ -31,7 +31,7 @@ COPY --from=ui /build/ui/dist ./src/agentevals/_static
|
|
|
31
31
|
ARG VERSION
|
|
32
32
|
ENV SETUPTOOLS_SCM_PRETEND_VERSION=${VERSION}
|
|
33
33
|
|
|
34
|
-
RUN uv sync --frozen --no-dev --extra live --extra postgres \
|
|
34
|
+
RUN uv sync --frozen --no-dev --extra live --extra postgres --extra kubernetes \
|
|
35
35
|
&& groupadd --gid 1000 app \
|
|
36
36
|
&& useradd --uid 1000 --gid app --home-dir /app --no-log-init app \
|
|
37
37
|
&& chown -R app:app /app
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentevals-cli
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.11
|
|
7
7
|
Requires-Dist: click>=8.0
|
|
8
8
|
Requires-Dist: fastapi>=0.115.0
|
|
9
|
-
Requires-Dist: google-adk[eval]
|
|
9
|
+
Requires-Dist: google-adk[eval]<2.2,>=2.1.0
|
|
10
10
|
Requires-Dist: httpx>=0.27.0
|
|
11
11
|
Requires-Dist: opentelemetry-proto>=1.36.0
|
|
12
12
|
Requires-Dist: python-dotenv>=1.0.0
|
|
@@ -14,6 +14,8 @@ Requires-Dist: python-multipart>=0.0.12
|
|
|
14
14
|
Requires-Dist: pyyaml>=6.0
|
|
15
15
|
Requires-Dist: tabulate>=0.9.0
|
|
16
16
|
Requires-Dist: uvicorn[standard]>=0.32.0
|
|
17
|
+
Provides-Extra: kubernetes
|
|
18
|
+
Requires-Dist: kubernetes>=36.0.0; extra == 'kubernetes'
|
|
17
19
|
Provides-Extra: live
|
|
18
20
|
Requires-Dist: httpx>=0.27.0; extra == 'live'
|
|
19
21
|
Requires-Dist: mcp>=1.26.0; extra == 'live'
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{{- if .Values.rbac.create -}}
|
|
2
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
3
|
+
kind: Role
|
|
4
|
+
metadata:
|
|
5
|
+
name: {{ include "agentevals.fullname" . }}
|
|
6
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
7
|
+
labels:
|
|
8
|
+
{{- include "agentevals.labels" . | nindent 4 }}
|
|
9
|
+
rules:
|
|
10
|
+
- apiGroups: [""]
|
|
11
|
+
resources: ["secrets"]
|
|
12
|
+
verbs: ["get"]
|
|
13
|
+
{{- with .Values.rbac.secretNames }}
|
|
14
|
+
resourceNames:
|
|
15
|
+
{{- toYaml . | nindent 6 }}
|
|
16
|
+
{{- end }}
|
|
17
|
+
---
|
|
18
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
19
|
+
kind: RoleBinding
|
|
20
|
+
metadata:
|
|
21
|
+
name: {{ include "agentevals.fullname" . }}
|
|
22
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
23
|
+
labels:
|
|
24
|
+
{{- include "agentevals.labels" . | nindent 4 }}
|
|
25
|
+
roleRef:
|
|
26
|
+
apiGroup: rbac.authorization.k8s.io
|
|
27
|
+
kind: Role
|
|
28
|
+
name: {{ include "agentevals.fullname" . }}
|
|
29
|
+
subjects:
|
|
30
|
+
- kind: ServiceAccount
|
|
31
|
+
name: {{ include "agentevals.serviceAccountName" . }}
|
|
32
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
33
|
+
{{- end }}
|
|
@@ -57,6 +57,20 @@ serviceAccount:
|
|
|
57
57
|
# -- ServiceAccount name override
|
|
58
58
|
name: ""
|
|
59
59
|
|
|
60
|
+
# ==============================================================================
|
|
61
|
+
# RBAC
|
|
62
|
+
# ==============================================================================
|
|
63
|
+
|
|
64
|
+
# -- Namespaced Role + RoleBinding granting the pod's ServiceAccount read
|
|
65
|
+
# access to Secrets. Enable this when the kubernetes secret resolver reads
|
|
66
|
+
# provider credentials from Secrets via in-cluster config.
|
|
67
|
+
rbac:
|
|
68
|
+
# -- Create the Role and RoleBinding
|
|
69
|
+
create: false
|
|
70
|
+
# -- Restrict the Role to these Secret names. Empty grants get on all
|
|
71
|
+
# Secrets in the release namespace.
|
|
72
|
+
secretNames: []
|
|
73
|
+
|
|
60
74
|
# ==============================================================================
|
|
61
75
|
# Pod
|
|
62
76
|
# ==============================================================================
|
|
@@ -74,7 +74,7 @@ async def main():
|
|
|
74
74
|
|
|
75
75
|
agent_response = ""
|
|
76
76
|
async for event in runner.run_async(user_id=user_id, session_id=session.id, new_message=content):
|
|
77
|
-
if event.content.parts and event.content.parts[0].text:
|
|
77
|
+
if event.content and event.content.parts and event.content.parts[0].text:
|
|
78
78
|
agent_response = event.content.parts[0].text
|
|
79
79
|
|
|
80
80
|
print(f" Agent: {agent_response}")
|
|
@@ -9,7 +9,7 @@ description = "Standalone framework to evaluate agent correctness based on porta
|
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
11
11
|
dependencies = [
|
|
12
|
-
"google-adk[eval]>=1.
|
|
12
|
+
"google-adk[eval]>=2.1.0,<2.2",
|
|
13
13
|
"click>=8.0",
|
|
14
14
|
"tabulate>=0.9.0",
|
|
15
15
|
"fastapi>=0.115.0",
|
|
@@ -36,10 +36,17 @@ openai = [
|
|
|
36
36
|
postgres = [
|
|
37
37
|
"asyncpg>=0.30.0",
|
|
38
38
|
]
|
|
39
|
+
kubernetes = [
|
|
40
|
+
"kubernetes>=36.0.0",
|
|
41
|
+
]
|
|
39
42
|
|
|
40
43
|
[project.scripts]
|
|
41
44
|
agentevals = "agentevals.cli:main"
|
|
42
45
|
|
|
46
|
+
[project.entry-points."agentevals.secret_resolvers"]
|
|
47
|
+
env = "agentevals.resolvers:create_env_resolver"
|
|
48
|
+
kubernetes = "agentevals.resolvers.kubernetes:create_kubernetes_resolver"
|
|
49
|
+
|
|
43
50
|
[tool.hatch.version]
|
|
44
51
|
source = "vcs"
|
|
45
52
|
|
|
@@ -27,6 +27,8 @@ from google.adk.evaluation.eval_metrics import (
|
|
|
27
27
|
from google.adk.evaluation.eval_rubrics import Rubric, RubricContent
|
|
28
28
|
from google.adk.evaluation.evaluator import EvaluationResult, Evaluator
|
|
29
29
|
|
|
30
|
+
from .resolvers import get_resolved_credential
|
|
31
|
+
|
|
30
32
|
logger = logging.getLogger(__name__)
|
|
31
33
|
|
|
32
34
|
METRICS_NEEDING_EXPECTED = {
|
|
@@ -267,6 +269,67 @@ def get_evaluator(eval_metric: EvalMetric) -> Evaluator:
|
|
|
267
269
|
return DEFAULT_METRIC_EVALUATOR_REGISTRY.get_evaluator(eval_metric)
|
|
268
270
|
|
|
269
271
|
|
|
272
|
+
def _build_judge_model(model_id: str, api_key: str, base_url: str | None = None):
|
|
273
|
+
"""Build a judge ``BaseLlm`` carrying *api_key* directly, instead of reading it from env.
|
|
274
|
+
|
|
275
|
+
LiteLlm-backed providers take ``api_key`` (and optional ``base_url``) as constructor
|
|
276
|
+
kwargs that forward into every ``litellm.acompletion`` call. The Gemini-native model
|
|
277
|
+
class takes no ``api_key``; its cached ``google.genai`` client is replaced with one
|
|
278
|
+
built from the resolved key.
|
|
279
|
+
|
|
280
|
+
Routing is by ADK's ``LLMRegistry`` class resolution, which is authoritative: the
|
|
281
|
+
evaluator already resolved this same *model_id* to a model class when ``_setup_auto_rater``
|
|
282
|
+
ran at construction, so this lookup cannot disagree or fail here.
|
|
283
|
+
"""
|
|
284
|
+
from google.adk.models.lite_llm import LiteLlm
|
|
285
|
+
from google.adk.models.registry import LLMRegistry
|
|
286
|
+
|
|
287
|
+
if issubclass(LLMRegistry().resolve(model_id), LiteLlm):
|
|
288
|
+
kwargs: dict[str, Any] = {"api_key": api_key}
|
|
289
|
+
if base_url:
|
|
290
|
+
kwargs["base_url"] = base_url
|
|
291
|
+
return LiteLlm(model=model_id, **kwargs)
|
|
292
|
+
|
|
293
|
+
from google.adk.models.google_llm import Gemini
|
|
294
|
+
from google.genai import Client
|
|
295
|
+
from google.genai import types as genai_types
|
|
296
|
+
|
|
297
|
+
model = Gemini(model=model_id)
|
|
298
|
+
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
|
299
|
+
if base_url:
|
|
300
|
+
client_kwargs["http_options"] = genai_types.HttpOptions(base_url=base_url)
|
|
301
|
+
# api_client is a functools.cached_property that memoizes into the instance __dict__;
|
|
302
|
+
# seeding that slot pre-empts the lazily-built client so the judge uses the resolved key.
|
|
303
|
+
model.__dict__["api_client"] = Client(**client_kwargs)
|
|
304
|
+
return model
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _inject_judge_credential(evaluator: Evaluator, api_key: str, base_url: str | None = None) -> None:
|
|
308
|
+
"""Replace a judge evaluator's auto-rater model with one built from *api_key*.
|
|
309
|
+
|
|
310
|
+
Keyed on the ADK private seam (``_judge_model_options`` / ``_judge_model``, set by
|
|
311
|
+
``LlmAsJudge._setup_auto_rater``) rather than on a class, so this single path covers
|
|
312
|
+
``FinalResponseMatchV2Evaluator``, the ``rubric_based_*_v1`` evaluators, and
|
|
313
|
+
``HallucinationsV1Evaluator`` (which exposes the same attributes without subclassing
|
|
314
|
+
``LlmAsJudge``). ``get_evaluator`` returns a fresh instance per evaluation, so mutating
|
|
315
|
+
it here carries no shared state and is safe across concurrent runs.
|
|
316
|
+
|
|
317
|
+
TODO(upstream): propose that ADK ``JudgeModelOptions`` carry a credential or a prebuilt
|
|
318
|
+
model instance, so judge auth no longer depends on this private seam or process env.
|
|
319
|
+
"""
|
|
320
|
+
opts = getattr(evaluator, "_judge_model_options", None)
|
|
321
|
+
if opts is None or not hasattr(evaluator, "_judge_model"):
|
|
322
|
+
logger.warning("evaluator %s is not judge-backed; cannot inject credential", type(evaluator).__name__)
|
|
323
|
+
return
|
|
324
|
+
model_id = getattr(opts, "judge_model", None)
|
|
325
|
+
if not model_id:
|
|
326
|
+
logger.warning(
|
|
327
|
+
"evaluator %s has no resolved judge_model; skipping credential injection", type(evaluator).__name__
|
|
328
|
+
)
|
|
329
|
+
return
|
|
330
|
+
evaluator._judge_model = _build_judge_model(model_id, api_key, base_url)
|
|
331
|
+
|
|
332
|
+
|
|
270
333
|
def extract_trajectory_details(eval_result: EvaluationResult) -> dict[str, Any]:
|
|
271
334
|
"""Extract expected vs actual tool call details from trajectory evaluation."""
|
|
272
335
|
comparisons = []
|
|
@@ -305,6 +368,8 @@ async def evaluate_builtin_metric(
|
|
|
305
368
|
judge_model: str | None,
|
|
306
369
|
threshold: float | None,
|
|
307
370
|
match_type: str | None = None,
|
|
371
|
+
credential_ref: str | None = None,
|
|
372
|
+
judge_base_url: str | None = None,
|
|
308
373
|
) -> dict[str, Any]:
|
|
309
374
|
"""Evaluate a single built-in ADK metric.
|
|
310
375
|
|
|
@@ -326,6 +391,18 @@ async def evaluate_builtin_metric(
|
|
|
326
391
|
eval_metric = build_eval_metric(metric_name, judge_model, threshold, match_type=match_type)
|
|
327
392
|
evaluator: Evaluator = get_evaluator(eval_metric)
|
|
328
393
|
|
|
394
|
+
if credential_ref:
|
|
395
|
+
api_key = get_resolved_credential(credential_ref)
|
|
396
|
+
if api_key is None:
|
|
397
|
+
return MetricResult(
|
|
398
|
+
metric_name=metric_name,
|
|
399
|
+
error=(
|
|
400
|
+
f"Metric '{metric_name}' references credential '{credential_ref}', "
|
|
401
|
+
f"which was not provided in the run's credentialRefs."
|
|
402
|
+
),
|
|
403
|
+
)
|
|
404
|
+
_inject_judge_credential(evaluator, api_key, judge_base_url)
|
|
405
|
+
|
|
329
406
|
if metric_name in _METRICS_NEEDING_INVOCATION_EVENTS:
|
|
330
407
|
actual_invocations = _enrich_app_details([_to_invocation_events(inv) for inv in actual_invocations])
|
|
331
408
|
if expected_invocations is not None:
|
|
@@ -27,6 +27,14 @@ class BuiltinMetricDef(BaseModel):
|
|
|
27
27
|
threshold: float | None = Field(default=None, ge=0, le=1)
|
|
28
28
|
judge_model: str | None = None
|
|
29
29
|
trajectory_match_type: str | None = None
|
|
30
|
+
credential_ref: str | None = Field(
|
|
31
|
+
default=None,
|
|
32
|
+
description="Logical name of a RunSpec.credential_refs entry whose resolved value is the judge API key.",
|
|
33
|
+
)
|
|
34
|
+
judge_base_url: str | None = Field(
|
|
35
|
+
default=None,
|
|
36
|
+
description="Optional base URL for the judge endpoint (e.g. an OpenAI-compatible proxy).",
|
|
37
|
+
)
|
|
30
38
|
|
|
31
39
|
@field_validator("trajectory_match_type")
|
|
32
40
|
@classmethod
|
|
@@ -453,6 +453,8 @@ async def evaluate_custom_evaluator(
|
|
|
453
453
|
judge_model=evaluator_def.judge_model,
|
|
454
454
|
threshold=evaluator_def.threshold,
|
|
455
455
|
match_type=evaluator_def.trajectory_match_type,
|
|
456
|
+
credential_ref=evaluator_def.credential_ref,
|
|
457
|
+
judge_base_url=evaluator_def.judge_base_url,
|
|
456
458
|
)
|
|
457
459
|
|
|
458
460
|
if isinstance(evaluator_def, OpenAIEvalDef):
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Secret resolvers — a generic, pluggable layer for resolving secret references.
|
|
2
|
+
|
|
3
|
+
A host attaches *secret references* to a run (``RunSpec.credential_refs``); each
|
|
4
|
+
reference is a ``dict`` with a ``kind`` plus kind-specific locator fields. At run
|
|
5
|
+
time the worker resolves every reference once to its secret value and stashes the
|
|
6
|
+
``logical-name -> value`` map in a :class:`contextvars.ContextVar` scoped to that
|
|
7
|
+
run's asyncio task. Consumers (e.g. judge construction) read the value they need
|
|
8
|
+
with no ``os.environ`` mutation and no shared state across concurrently running
|
|
9
|
+
evaluations.
|
|
10
|
+
|
|
11
|
+
This layer is deliberately consumer-agnostic: a resolver turns a reference into a
|
|
12
|
+
secret value and nothing more. How that value is used — which provider it
|
|
13
|
+
authenticates, what base URL it pairs with — is the consumer's concern, configured
|
|
14
|
+
where the consumer is built (for judges, on the evaluator definition).
|
|
15
|
+
|
|
16
|
+
**Plugins:** third-party packages declare setuptools entry points in group
|
|
17
|
+
``agentevals.secret_resolvers`` (entry **name** = ``kind`` string; **value** =
|
|
18
|
+
``module:factory`` callable ``factory(spec: dict) -> SecretResolver``). The
|
|
19
|
+
zero-dependency ``env`` resolver ships with agentevals through this same group so
|
|
20
|
+
the discovery path is exercised in OSS. Hosts may replace any kind via
|
|
21
|
+
:func:`register_resolver_factory` (highest precedence).
|
|
22
|
+
|
|
23
|
+
Tests may call :func:`clear_resolver_plugin_registry` to drop programmatic
|
|
24
|
+
registrations.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
from collections.abc import Callable
|
|
32
|
+
from contextvars import ContextVar, Token
|
|
33
|
+
from importlib.metadata import entry_points
|
|
34
|
+
from typing import Any, Protocol, cast
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
SECRET_RESOLVER_ENTRY_POINT_GROUP = "agentevals.secret_resolvers"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SecretResolver(Protocol):
|
|
42
|
+
async def resolve(self, ref: dict[str, Any]) -> str: ...
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
SecretResolverFactory = Callable[[dict[str, Any]], SecretResolver]
|
|
46
|
+
|
|
47
|
+
_PLUGIN_FACTORIES: dict[str, SecretResolverFactory] = {}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class EnvSecretResolver:
|
|
51
|
+
"""Resolve ``{"kind": "env", "name": "OPENAI_API_KEY"}`` from ``os.environ``."""
|
|
52
|
+
|
|
53
|
+
async def resolve(self, ref: dict[str, Any]) -> str:
|
|
54
|
+
name = ref.get("name")
|
|
55
|
+
if not name:
|
|
56
|
+
raise ValueError("env secret reference requires a 'name' field")
|
|
57
|
+
value = os.environ.get(name)
|
|
58
|
+
if value is None:
|
|
59
|
+
raise ValueError(f"environment variable {name!r} is not set")
|
|
60
|
+
return value
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def create_env_resolver(spec: dict[str, Any]) -> EnvSecretResolver:
|
|
64
|
+
return EnvSecretResolver()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def register_resolver_factory(kind: str, factory: SecretResolverFactory) -> None:
|
|
68
|
+
"""Register or replace the factory for ``kind`` (overrides built-ins and entry points).
|
|
69
|
+
|
|
70
|
+
Call during process startup before run workers consume specs. The factory receives
|
|
71
|
+
the full reference dict (including ``kind``) and returns a :class:`SecretResolver`.
|
|
72
|
+
"""
|
|
73
|
+
_PLUGIN_FACTORIES[kind] = factory
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def clear_resolver_plugin_registry() -> None:
|
|
77
|
+
"""Drop all registrations from :func:`register_resolver_factory` (for tests)."""
|
|
78
|
+
_PLUGIN_FACTORIES.clear()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _builtin_factories() -> dict[str, SecretResolverFactory]:
|
|
82
|
+
"""No hardcoded built-ins: ``env`` ships via the entry-point group."""
|
|
83
|
+
return {}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _merge_resolver_factories() -> dict[str, SecretResolverFactory]:
|
|
87
|
+
"""Built-ins, then entry points (no built-in shadowing), then programmatic overrides."""
|
|
88
|
+
merged: dict[str, SecretResolverFactory] = dict(_builtin_factories())
|
|
89
|
+
eps = entry_points(group=SECRET_RESOLVER_ENTRY_POINT_GROUP)
|
|
90
|
+
for ep in eps:
|
|
91
|
+
if ep.name in merged:
|
|
92
|
+
logger.debug("skipping resolver entry point %r; built-in kind takes precedence", ep.name)
|
|
93
|
+
continue
|
|
94
|
+
try:
|
|
95
|
+
loaded = ep.load()
|
|
96
|
+
if not callable(loaded):
|
|
97
|
+
logger.warning("resolver entry point %r is not callable; skipping", ep.name)
|
|
98
|
+
continue
|
|
99
|
+
merged[ep.name] = cast(SecretResolverFactory, loaded)
|
|
100
|
+
except Exception:
|
|
101
|
+
logger.exception("failed to load resolver entry point %r", ep.name)
|
|
102
|
+
merged.update(_PLUGIN_FACTORIES)
|
|
103
|
+
return merged
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def registered_resolver_kinds() -> tuple[str, ...]:
|
|
107
|
+
"""Sorted resolver ``kind`` strings that would resolve if :func:`build_resolver` ran now.
|
|
108
|
+
|
|
109
|
+
Includes built-ins, successfully loaded setuptools entry points for group
|
|
110
|
+
:data:`SECRET_RESOLVER_ENTRY_POINT_GROUP`, and registrations from
|
|
111
|
+
:func:`register_resolver_factory`.
|
|
112
|
+
"""
|
|
113
|
+
return tuple(sorted(_merge_resolver_factories().keys()))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def build_resolver(ref: dict[str, Any]) -> SecretResolver:
|
|
117
|
+
"""Construct the :class:`SecretResolver` for a reference's ``kind``.
|
|
118
|
+
|
|
119
|
+
Factory lookup starts from built-ins, adds setuptools entry points (group
|
|
120
|
+
``agentevals.secret_resolvers``) for ``kind`` names not already built-in, then
|
|
121
|
+
applies :func:`register_resolver_factory` registrations, which override any prior
|
|
122
|
+
factory for the same ``kind``.
|
|
123
|
+
"""
|
|
124
|
+
kind = ref.get("kind")
|
|
125
|
+
if not kind:
|
|
126
|
+
raise ValueError("secret reference is missing a 'kind' field")
|
|
127
|
+
factories = _merge_resolver_factories()
|
|
128
|
+
factory = factories.get(kind)
|
|
129
|
+
if factory is None:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"unknown secret resolver kind '{kind}'. Available: {', '.join(sorted(factories)) or '(none)'}"
|
|
132
|
+
)
|
|
133
|
+
return factory(ref)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
async def resolve_credential_refs(refs: dict[str, dict[str, Any]]) -> dict[str, str]:
|
|
137
|
+
"""Resolve every ``logical-name -> reference`` entry to its secret value.
|
|
138
|
+
|
|
139
|
+
Each resolver reads only its own kind-specific locator fields. Any non-locator
|
|
140
|
+
fields a host puts on a reference are ignored here; consumer-specific config
|
|
141
|
+
belongs with the consumer (for judges, on the evaluator definition).
|
|
142
|
+
"""
|
|
143
|
+
resolved: dict[str, str] = {}
|
|
144
|
+
for logical_name, ref in refs.items():
|
|
145
|
+
resolver = build_resolver(ref)
|
|
146
|
+
resolved[logical_name] = await resolver.resolve(ref)
|
|
147
|
+
return resolved
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
_RESOLVED: ContextVar[dict[str, str] | None] = ContextVar("agentevals_resolved_credentials", default=None)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def set_resolved_credentials(mapping: dict[str, str]) -> Token:
|
|
154
|
+
"""Scope a ``logical-name -> secret value`` map to the current asyncio task. Returns a reset token."""
|
|
155
|
+
return _RESOLVED.set(mapping)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def reset_resolved_credentials(token: Token) -> None:
|
|
159
|
+
_RESOLVED.reset(token)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_resolved_credential(logical_name: str) -> str | None:
|
|
163
|
+
"""Look up a secret value resolved for the current run, or ``None`` if absent."""
|
|
164
|
+
mapping = _RESOLVED.get()
|
|
165
|
+
if not mapping:
|
|
166
|
+
return None
|
|
167
|
+
return mapping.get(logical_name)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Kubernetes Secret resolver — an optional :class:`SecretResolver` plugin.
|
|
2
|
+
|
|
3
|
+
Resolves a reference of the form ``{"kind": "kubernetes", "namespace": ..., "name": ...,
|
|
4
|
+
"key": ...}`` by reading the named Secret and base64-decoding the requested key. Ships
|
|
5
|
+
behind the ``kubernetes`` extra and is wired through the ``agentevals.secret_resolvers``
|
|
6
|
+
entry-point group; the ``kubernetes`` package is imported lazily inside the factory so
|
|
7
|
+
installing agentevals without the extra never breaks import or plugin discovery.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import base64
|
|
14
|
+
import logging
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class KubernetesSecretResolver:
|
|
21
|
+
"""Reads a key out of a Kubernetes Secret via a ``CoreV1Api`` client."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, core_v1_api: Any) -> None:
|
|
24
|
+
self._core_v1 = core_v1_api
|
|
25
|
+
|
|
26
|
+
async def resolve(self, ref: dict[str, Any]) -> str:
|
|
27
|
+
namespace = ref.get("namespace")
|
|
28
|
+
name = ref.get("name")
|
|
29
|
+
key = ref.get("key")
|
|
30
|
+
missing = [f for f, v in (("namespace", namespace), ("name", name), ("key", key)) if not v]
|
|
31
|
+
if missing:
|
|
32
|
+
raise ValueError(f"kubernetes secret reference is missing required field(s): {', '.join(missing)}")
|
|
33
|
+
|
|
34
|
+
secret = await asyncio.to_thread(self._core_v1.read_namespaced_secret, name, namespace)
|
|
35
|
+
data = secret.data or {}
|
|
36
|
+
if key not in data:
|
|
37
|
+
available = ", ".join(sorted(data)) or "(none)"
|
|
38
|
+
raise ValueError(f"key '{key}' not found in Secret {namespace}/{name}; available keys: {available}")
|
|
39
|
+
return base64.b64decode(data[key]).decode("utf-8")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_kubernetes_resolver(spec: dict[str, Any]) -> KubernetesSecretResolver:
|
|
43
|
+
"""Build a :class:`KubernetesSecretResolver`, loading cluster config lazily.
|
|
44
|
+
|
|
45
|
+
Tries in-cluster config first (for pods with a mounted service account), then falls
|
|
46
|
+
back to the local kubeconfig for development. The ``kubernetes`` package is imported
|
|
47
|
+
here rather than at module load so the plugin can be discovered even when the extra
|
|
48
|
+
is not installed.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
from kubernetes import client, config
|
|
52
|
+
except ImportError as exc:
|
|
53
|
+
raise RuntimeError(
|
|
54
|
+
"the kubernetes secret resolver requires the 'kubernetes' extra; install agentevals-cli[kubernetes]"
|
|
55
|
+
) from exc
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
config.load_incluster_config()
|
|
59
|
+
except config.ConfigException:
|
|
60
|
+
config.load_kube_config()
|
|
61
|
+
|
|
62
|
+
return KubernetesSecretResolver(client.CoreV1Api())
|
|
@@ -21,6 +21,11 @@ from uuid import UUID
|
|
|
21
21
|
from google.adk.evaluation.eval_set import EvalSet
|
|
22
22
|
|
|
23
23
|
from ..config import EvalParams
|
|
24
|
+
from ..resolvers import (
|
|
25
|
+
reset_resolved_credentials,
|
|
26
|
+
resolve_credential_refs,
|
|
27
|
+
set_resolved_credentials,
|
|
28
|
+
)
|
|
24
29
|
from ..runner import RunResult, TraceResult, run_evaluation_from_traces
|
|
25
30
|
from ..storage.config import StorageSettings
|
|
26
31
|
from ..storage.models import Run, RunStatus
|
|
@@ -107,7 +112,10 @@ class AsyncRunWorker:
|
|
|
107
112
|
cancel_event = asyncio.Event()
|
|
108
113
|
hb_task = asyncio.create_task(self._heartbeat(run.run_id, worker_id, cancel_event))
|
|
109
114
|
sinks = build_sinks(run.spec.sinks or [])
|
|
115
|
+
cred_token = None
|
|
110
116
|
try:
|
|
117
|
+
if run.spec.credential_refs:
|
|
118
|
+
cred_token = set_resolved_credentials(await resolve_credential_refs(run.spec.credential_refs))
|
|
111
119
|
await self._run_evaluation(run, sinks, cancel_event)
|
|
112
120
|
except asyncio.CancelledError:
|
|
113
121
|
await self._runs.update_status(run.run_id, RunStatus.CANCELLED, error="worker cancelled")
|
|
@@ -126,6 +134,8 @@ class AsyncRunWorker:
|
|
|
126
134
|
await self._runs.update_status(run.run_id, RunStatus.FAILED, error=str(exc))
|
|
127
135
|
await sinks.emit_error(run.run_id, str(exc), run.attempt)
|
|
128
136
|
finally:
|
|
137
|
+
if cred_token is not None:
|
|
138
|
+
reset_resolved_credentials(cred_token)
|
|
129
139
|
hb_task.cancel()
|
|
130
140
|
try:
|
|
131
141
|
await hb_task
|
|
@@ -84,6 +84,15 @@ class RunSpec(BaseModel):
|
|
|
84
84
|
eval_config: dict[str, Any] = Field(default_factory=dict)
|
|
85
85
|
sinks: list[dict[str, Any]] = Field(default_factory=list)
|
|
86
86
|
context: dict[str, Any] = Field(default_factory=dict)
|
|
87
|
+
credential_refs: dict[str, dict[str, Any]] | None = Field(
|
|
88
|
+
default=None,
|
|
89
|
+
description=(
|
|
90
|
+
"Map of logical credential name to a secret reference dict. Each reference has a "
|
|
91
|
+
"'kind' (the resolver to use) plus that kind's locator fields. Resolved per run to its "
|
|
92
|
+
"secret value; never written to the process environment. How a value is used (e.g. which "
|
|
93
|
+
"judge provider it authenticates) is configured on the consumer, not the reference."
|
|
94
|
+
),
|
|
95
|
+
)
|
|
87
96
|
|
|
88
97
|
|
|
89
98
|
class Run(BaseModel):
|
|
@@ -15,6 +15,7 @@ Tests are synchronous because:
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
+
import importlib.util
|
|
18
19
|
import os
|
|
19
20
|
import subprocess
|
|
20
21
|
import sys
|
|
@@ -38,6 +39,11 @@ _skip_no_google = pytest.mark.skipif(
|
|
|
38
39
|
reason="GOOGLE_API_KEY not set",
|
|
39
40
|
)
|
|
40
41
|
|
|
42
|
+
_skip_no_pydantic_ai = pytest.mark.skipif(
|
|
43
|
+
importlib.util.find_spec("pydantic_ai") is None,
|
|
44
|
+
reason="pydantic_ai SDK not installed",
|
|
45
|
+
)
|
|
46
|
+
|
|
41
47
|
|
|
42
48
|
def _run_agent(
|
|
43
49
|
script: str,
|
|
@@ -305,6 +311,7 @@ class TestOpenAIAgentsZeroCode:
|
|
|
305
311
|
assert session_name in session_ids
|
|
306
312
|
|
|
307
313
|
|
|
314
|
+
@_skip_no_pydantic_ai
|
|
308
315
|
@_skip_no_openai
|
|
309
316
|
class TestPydanticAIZeroCode:
|
|
310
317
|
"""Run the Pydantic AI zero-code OTLP example and verify session grouping."""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Kubernetes secret resolver tests.
|
|
2
|
+
|
|
3
|
+
The kubernetes client is mocked, so these run whether or not the optional
|
|
4
|
+
``kubernetes`` extra is installed.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import sys
|
|
11
|
+
from unittest.mock import MagicMock
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
from agentevals.resolvers.kubernetes import KubernetesSecretResolver, create_kubernetes_resolver
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _client_returning(data: dict[str, str]) -> MagicMock:
|
|
19
|
+
client = MagicMock()
|
|
20
|
+
client.read_namespaced_secret.return_value = MagicMock(data=data)
|
|
21
|
+
return client
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _b64(value: str) -> str:
|
|
25
|
+
return base64.b64encode(value.encode()).decode()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TestResolve:
|
|
29
|
+
async def test_reads_and_base64_decodes_value(self):
|
|
30
|
+
client = _client_returning({"api-key": _b64("sk-secret-value")})
|
|
31
|
+
resolver = KubernetesSecretResolver(client)
|
|
32
|
+
|
|
33
|
+
value = await resolver.resolve({"namespace": "ns", "name": "creds", "key": "api-key"})
|
|
34
|
+
|
|
35
|
+
assert value == "sk-secret-value"
|
|
36
|
+
client.read_namespaced_secret.assert_called_once_with("creds", "ns")
|
|
37
|
+
|
|
38
|
+
async def test_missing_fields_raise(self):
|
|
39
|
+
resolver = KubernetesSecretResolver(MagicMock())
|
|
40
|
+
with pytest.raises(ValueError, match="namespace, name, key"):
|
|
41
|
+
await resolver.resolve({"kind": "kubernetes"})
|
|
42
|
+
|
|
43
|
+
async def test_key_not_found_lists_names_not_values(self):
|
|
44
|
+
secret_value = _b64("sk-do-not-leak")
|
|
45
|
+
client = _client_returning({"api-key": secret_value, "tls.crt": _b64("cert")})
|
|
46
|
+
resolver = KubernetesSecretResolver(client)
|
|
47
|
+
|
|
48
|
+
with pytest.raises(ValueError) as exc:
|
|
49
|
+
await resolver.resolve({"namespace": "ns", "name": "creds", "key": "wrong"})
|
|
50
|
+
|
|
51
|
+
message = str(exc.value)
|
|
52
|
+
assert "api-key" in message and "tls.crt" in message
|
|
53
|
+
# The enumeration must never echo the base64-encoded secret values.
|
|
54
|
+
assert secret_value not in message
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TestFactory:
|
|
58
|
+
def test_requires_kubernetes_extra(self, monkeypatch):
|
|
59
|
+
# Shadow the kubernetes package so the lazy import fails regardless of
|
|
60
|
+
# whether the extra is installed in the test venv.
|
|
61
|
+
monkeypatch.setitem(sys.modules, "kubernetes", None)
|
|
62
|
+
with pytest.raises(RuntimeError, match="kubernetes"):
|
|
63
|
+
create_kubernetes_resolver({"kind": "kubernetes"})
|