agentevals-cli 0.8.2__tar.gz → 0.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/PKG-INFO +17 -1
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/README.md +16 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/deployment.yaml +14 -2
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/values.yaml +16 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/pyproject.toml +1 -1
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/app.py +40 -3
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/uv.lock +1 -1
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.dockerignore +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/workflows/ci.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/workflows/publish-evaluator-sdk.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.github/workflows/release.yml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.gitignore +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/.mcp.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/Dockerfile +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/LICENSE +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/Makefile +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/Chart.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/NOTES.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/_helpers.tpl +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/postgresql-secret.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/postgresql.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/service.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/serviceaccount.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/custom-evaluators.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/otel-compatibility.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/docs/streaming.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_evaluators/eval_config.yaml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_sink/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_sink/agentevals_example_custom_sink/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_sink/agentevals_example_custom_sink/sink.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_sink/pyproject.toml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/kubernetes/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/adk/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/ollama/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/ollama/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/openai-agents/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/flake.lock +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/flake.nix +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/helm.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/helm_2.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/helm_3.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/k8s.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/samples/tempo_export_with_batches.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/assets/index-Cl6S2lcn.js +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/index.html +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/debug_routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/dependencies.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/models.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/otlp_app.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/otlp_grpc.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/otlp_processing.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/otlp_routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/runs_routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/api/streaming_routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/builtin_metrics.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/cli.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/config.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/converter.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/custom_evaluators.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/eval_config_loader.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/extraction.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/genai_converter.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/loader/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/loader/auto.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/loader/otlp.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/mcp_server.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/openai_eval_backend.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/output.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/fetcher.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/result_builder.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/service.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/sinks.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/run/worker.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/runner.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/config.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/models.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/postgres/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/postgres/migrations/000001_init.down.sql +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/postgres/migrations/000001_init.up.sql +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/postgres/migrator.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/postgres/pool.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/repos/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/repos/memory.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/storage/repos/postgres.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/trace_metrics.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/api/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/api/test_evaluate_persistence.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/api/test_runs_routes.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/conftest.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/test_live_agents.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/test_otlp_grpc_receiver.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/run/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/run/test_fetcher.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/run/test_result_builder.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/run/test_service.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/run/test_sinks.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/storage/__init__.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/storage/test_config.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/storage/test_memory_repos.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/storage/test_migrator.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/storage/test_models.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_api.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_cli.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_converter.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_extraction.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_loader_auto.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_mcp_server.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_otlp_loader.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_otlp_receiver.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_output.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_runner.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/tests/test_trace_metrics.py +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/.gitignore +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/README.md +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/index.html +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/package-lock.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/package.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/App.css +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/api/client.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/TraceCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/InspectorView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/sidebar/Sidebar.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/MetricSelector.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/upload/UploadView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/config.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/context/TraceProvider.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/index.css +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/evalset-builder.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/trace-helpers.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/trace-loader.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/trace-metadata.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/trace-patcher.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/types.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/ui/vite.config.ts +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentevals-cli
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.3
|
|
4
4
|
Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -425,6 +425,18 @@ Yes. A custom evaluator is any program that reads JSON from stdin and writes a s
|
|
|
425
425
|
|
|
426
426
|
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this pattern.
|
|
427
427
|
|
|
428
|
+
**Can I use agentevals to evaluate Claude Code, Codex, or OpenCode?**
|
|
429
|
+
|
|
430
|
+
Not today. agentevals scores agent behavior from OpenTelemetry GenAI traces (spans for model calls, tool calls, agent invocations following the [OTel GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/)). The major coding agents do not currently emit telemetry in that shape:
|
|
431
|
+
|
|
432
|
+
- **Claude Code** ships OTel telemetry as logs, not GenAI spans. A prior proof of concept on a feature branch made it work by stitching hook events into synthetic traces. Reviving that path is on the backlog, not a near-term commitment.
|
|
433
|
+
- **Codex** exposes OTel, but in a different shape we have not yet validated against the GenAI semconv.
|
|
434
|
+
- **OpenCode** did not have OTel support merged the last time we checked.
|
|
435
|
+
|
|
436
|
+
Retrofitting agentevals to ingest each harness's bespoke telemetry is multiple thousands of lines of glue code per agent, for a use case where the dominant signal is "did the final output feel right," not "did the agent call the right tool with the right arguments in the right order." That kind of vibes evaluation is interesting work for harness and coding-agent vendors themselves, but it is not what agentevals is optimized for.
|
|
437
|
+
|
|
438
|
+
agentevals is built for the opposite end of the spectrum: smaller, purpose-built, properly instrumented agents (kagent, agentregistry, custom Strands / ADK / LangChain / OpenAI Agents SDK flows) running in cloud native environments, where success is measurable through tool trajectories, response matching, and deterministic pass/fail gates. If that is your use case, we are a good fit. If you are evaluating long-running coding sessions end to end, you probably want a tool built specifically for that shape.
|
|
439
|
+
|
|
428
440
|
**How does this compare to ADK's evaluations?**
|
|
429
441
|
|
|
430
442
|
Unlike ADK's eval method, which couples agent execution with evaluation, agentevals only handles scoring: it takes pre-recorded traces and compares them against expected behavior using metrics like tool trajectory matching, response quality, and LLM-based judgments.
|
|
@@ -448,3 +460,7 @@ Langfuse is a full observability platform (requires Postgres, ClickHouse, Redis,
|
|
|
448
460
|
**How does this compare to Opik?**
|
|
449
461
|
|
|
450
462
|
Opik's primary evaluation path re-runs your application code against a dataset, incurring additional LLM costs per eval run. It also supports online evaluation rules that auto-score production traces. While Opik supports OpenTelemetry ingestion alongside its own SDK, its evaluation workflow still centers on re-execution against datasets. agentevals evaluates pre-recorded OTel traces from any framework without re-execution, and runs entirely locally with no cloud dependency.
|
|
463
|
+
|
|
464
|
+
## Acknowledgements
|
|
465
|
+
|
|
466
|
+
agentevals is built on top of [Google's Agent Development Kit](https://github.com/google/adk-python). ADK provides the evaluator protocol and the canonical eval data model (`Invocation`, `EvalSet`, `Evaluator`, prebuilt metrics) that this project extends. `google-adk` is licensed under [Apache 2.0](https://github.com/google/adk-python/blob/main/LICENSE), the same license as agentevals. Thanks to the ADK team and contributors.
|
|
@@ -397,6 +397,18 @@ Yes. A custom evaluator is any program that reads JSON from stdin and writes a s
|
|
|
397
397
|
|
|
398
398
|
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this pattern.
|
|
399
399
|
|
|
400
|
+
**Can I use agentevals to evaluate Claude Code, Codex, or OpenCode?**
|
|
401
|
+
|
|
402
|
+
Not today. agentevals scores agent behavior from OpenTelemetry GenAI traces (spans for model calls, tool calls, agent invocations following the [OTel GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/)). The major coding agents do not currently emit telemetry in that shape:
|
|
403
|
+
|
|
404
|
+
- **Claude Code** ships OTel telemetry as logs, not GenAI spans. A prior proof of concept on a feature branch made it work by stitching hook events into synthetic traces. Reviving that path is on the backlog, not a near-term commitment.
|
|
405
|
+
- **Codex** exposes OTel, but in a different shape we have not yet validated against the GenAI semconv.
|
|
406
|
+
- **OpenCode** did not have OTel support merged the last time we checked.
|
|
407
|
+
|
|
408
|
+
Retrofitting agentevals to ingest each harness's bespoke telemetry is multiple thousands of lines of glue code per agent, for a use case where the dominant signal is "did the final output feel right," not "did the agent call the right tool with the right arguments in the right order." That kind of vibes evaluation is interesting work for harness and coding-agent vendors themselves, but it is not what agentevals is optimized for.
|
|
409
|
+
|
|
410
|
+
agentevals is built for the opposite end of the spectrum: smaller, purpose-built, properly instrumented agents (kagent, agentregistry, custom Strands / ADK / LangChain / OpenAI Agents SDK flows) running in cloud native environments, where success is measurable through tool trajectories, response matching, and deterministic pass/fail gates. If that is your use case, we are a good fit. If you are evaluating long-running coding sessions end to end, you probably want a tool built specifically for that shape.
|
|
411
|
+
|
|
400
412
|
**How does this compare to ADK's evaluations?**
|
|
401
413
|
|
|
402
414
|
Unlike ADK's eval method, which couples agent execution with evaluation, agentevals only handles scoring: it takes pre-recorded traces and compares them against expected behavior using metrics like tool trajectory matching, response quality, and LLM-based judgments.
|
|
@@ -420,3 +432,7 @@ Langfuse is a full observability platform (requires Postgres, ClickHouse, Redis,
|
|
|
420
432
|
**How does this compare to Opik?**
|
|
421
433
|
|
|
422
434
|
Opik's primary evaluation path re-runs your application code against a dataset, incurring additional LLM costs per eval run. It also supports online evaluation rules that auto-score production traces. While Opik supports OpenTelemetry ingestion alongside its own SDK, its evaluation workflow still centers on re-execution against datasets. agentevals evaluates pre-recorded OTel traces from any framework without re-execution, and runs entirely locally with no cloud dependency.
|
|
435
|
+
|
|
436
|
+
## Acknowledgements
|
|
437
|
+
|
|
438
|
+
agentevals is built on top of [Google's Agent Development Kit](https://github.com/google/adk-python). ADK provides the evaluator protocol and the canonical eval data model (`Invocation`, `EvalSet`, `Evaluator`, prebuilt metrics) that this project extends. `google-adk` is licensed under [Apache 2.0](https://github.com/google/adk-python/blob/main/LICENSE), the same license as agentevals. Thanks to the ADK team and contributors.
|
|
@@ -29,8 +29,9 @@ spec:
|
|
|
29
29
|
securityContext:
|
|
30
30
|
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
|
31
31
|
serviceAccountName: {{ include "agentevals.serviceAccountName" . }}
|
|
32
|
-
{{- if .Values.ephemeralVolume.enabled }}
|
|
32
|
+
{{- if or .Values.ephemeralVolume.enabled .Values.extraVolumes }}
|
|
33
33
|
volumes:
|
|
34
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
34
35
|
- name: agentevals-tmp
|
|
35
36
|
{{- if or .Values.ephemeralVolume.sizeLimit (eq .Values.ephemeralVolume.medium "Memory") }}
|
|
36
37
|
emptyDir:
|
|
@@ -43,6 +44,10 @@ spec:
|
|
|
43
44
|
{{- else }}
|
|
44
45
|
emptyDir: {}
|
|
45
46
|
{{- end }}
|
|
47
|
+
{{- end }}
|
|
48
|
+
{{- with .Values.extraVolumes }}
|
|
49
|
+
{{- toYaml . | nindent 8 }}
|
|
50
|
+
{{- end }}
|
|
46
51
|
{{- end }}
|
|
47
52
|
containers:
|
|
48
53
|
- name: agentevals
|
|
@@ -70,6 +75,8 @@ spec:
|
|
|
70
75
|
value: "postgres"
|
|
71
76
|
- name: AGENTEVALS_DATABASE_SCHEMA
|
|
72
77
|
value: {{ .Values.database.postgres.schema | quote }}
|
|
78
|
+
- name: AGENTEVALS_AUTO_MIGRATE
|
|
79
|
+
value: {{ .Values.database.postgres.autoMigrate | quote }}
|
|
73
80
|
{{- if .Values.database.postgres.urlFile }}
|
|
74
81
|
- name: AGENTEVALS_DATABASE_URL_FILE
|
|
75
82
|
value: {{ .Values.database.postgres.urlFile | quote }}
|
|
@@ -135,10 +142,15 @@ spec:
|
|
|
135
142
|
port: http
|
|
136
143
|
initialDelaySeconds: 15
|
|
137
144
|
periodSeconds: 20
|
|
138
|
-
{{- if .Values.ephemeralVolume.enabled }}
|
|
145
|
+
{{- if or .Values.ephemeralVolume.enabled .Values.extraVolumeMounts }}
|
|
139
146
|
volumeMounts:
|
|
147
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
140
148
|
- name: agentevals-tmp
|
|
141
149
|
mountPath: /tmp
|
|
150
|
+
{{- end }}
|
|
151
|
+
{{- with .Values.extraVolumeMounts }}
|
|
152
|
+
{{- toYaml . | nindent 12 }}
|
|
153
|
+
{{- end }}
|
|
142
154
|
{{- end }}
|
|
143
155
|
{{- with .Values.nodeSelector }}
|
|
144
156
|
nodeSelector:
|
|
@@ -159,6 +159,16 @@ env: []
|
|
|
159
159
|
# -- Extra envFrom sources (ConfigMapRef, SecretRef)
|
|
160
160
|
envFrom: []
|
|
161
161
|
|
|
162
|
+
# -- Extra volumes appended to the pod spec. Use this to mount additional
|
|
163
|
+
# config files or secrets (e.g. result-sink credentials) into the pod.
|
|
164
|
+
extraVolumes: []
|
|
165
|
+
|
|
166
|
+
# -- Extra volumeMounts appended to the main container. Pair with
|
|
167
|
+
# extraVolumes by name. securityContext.readOnlyRootFilesystem is true by
|
|
168
|
+
# default; that only makes the root filesystem read-only, mounted paths
|
|
169
|
+
# themselves are unaffected, so a writable extraVolumes entry works fine.
|
|
170
|
+
extraVolumeMounts: []
|
|
171
|
+
|
|
162
172
|
# ==============================================================================
|
|
163
173
|
# STORAGE (preview feature)
|
|
164
174
|
#
|
|
@@ -195,6 +205,12 @@ database:
|
|
|
195
205
|
urlFile: ""
|
|
196
206
|
# -- Postgres schema to use for agentevals tables.
|
|
197
207
|
schema: agentevals
|
|
208
|
+
# -- Apply pending database migrations during server startup before the
|
|
209
|
+
# HTTP listener opens. The Postgres advisory lock serialises concurrent
|
|
210
|
+
# replica starts so this is safe with replicaCount > 1. When set to
|
|
211
|
+
# false the server refuses to start if the schema is behind or dirty;
|
|
212
|
+
# run "agentevals migrate up" manually in that case.
|
|
213
|
+
autoMigrate: true
|
|
198
214
|
# -- Bundled Postgres instance for development and evaluation only.
|
|
199
215
|
# Not suitable for production. Deployed when enabled is true and url /
|
|
200
216
|
# urlFile are not set.
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agentevals-cli"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.3"
|
|
8
8
|
description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -20,7 +20,7 @@ from ..run.service import RunService
|
|
|
20
20
|
from ..run.sinks import log_registered_sinks
|
|
21
21
|
from ..run.worker import AsyncRunWorker
|
|
22
22
|
from ..storage import StorageSettings, build_repos
|
|
23
|
-
from ..storage.postgres.migrator import Migrator
|
|
23
|
+
from ..storage.postgres.migrator import Migrator, discover_migrations
|
|
24
24
|
from ..utils.log_buffer import log_buffer
|
|
25
25
|
from .debug_routes import debug_router
|
|
26
26
|
from .routes import router
|
|
@@ -31,6 +31,22 @@ if TYPE_CHECKING:
|
|
|
31
31
|
|
|
32
32
|
logger = logging.getLogger(__name__)
|
|
33
33
|
|
|
34
|
+
_TRUE_VALUES = {"true", "1", "yes", "on"}
|
|
35
|
+
_FALSE_VALUES = {"false", "0", "no", "off"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _env_bool(name: str, *, default: bool) -> bool:
|
|
39
|
+
raw = os.getenv(name)
|
|
40
|
+
if raw is None or raw == "":
|
|
41
|
+
return default
|
|
42
|
+
val = raw.strip().lower()
|
|
43
|
+
if val in _TRUE_VALUES:
|
|
44
|
+
return True
|
|
45
|
+
if val in _FALSE_VALUES:
|
|
46
|
+
return False
|
|
47
|
+
raise ValueError(f"{name} must be one of true/false/1/0/yes/no/on/off (got: {raw!r})")
|
|
48
|
+
|
|
49
|
+
|
|
34
50
|
try:
|
|
35
51
|
from dotenv import load_dotenv
|
|
36
52
|
|
|
@@ -68,13 +84,34 @@ def _build_lifespan():
|
|
|
68
84
|
logger.error("Storage configuration invalid; /api/runs will not be available: %s", exc)
|
|
69
85
|
|
|
70
86
|
if storage_settings is not None and storage_settings.backend == "postgres":
|
|
71
|
-
logger.info("Applying any pending migrations to schema '%s'", storage_settings.schema_name)
|
|
72
87
|
migrator = Migrator(
|
|
73
88
|
dsn=storage_settings.database_url or "",
|
|
74
89
|
schema=storage_settings.schema_name,
|
|
75
90
|
lock_timeout_s=storage_settings.migrate_lock_timeout_s,
|
|
76
91
|
)
|
|
77
|
-
|
|
92
|
+
if _env_bool("AGENTEVALS_AUTO_MIGRATE", default=True):
|
|
93
|
+
logger.info("Applying any pending migrations to schema '%s'", storage_settings.schema_name)
|
|
94
|
+
await migrator.up()
|
|
95
|
+
else:
|
|
96
|
+
logger.info(
|
|
97
|
+
"AGENTEVALS_AUTO_MIGRATE is disabled; verifying schema '%s' is up to date",
|
|
98
|
+
storage_settings.schema_name,
|
|
99
|
+
)
|
|
100
|
+
status = await migrator.status()
|
|
101
|
+
if status.dirty:
|
|
102
|
+
raise RuntimeError(
|
|
103
|
+
f"schema_migrations is dirty at version {status.version}. "
|
|
104
|
+
"Resolve manually and run 'agentevals migrate force <version>', "
|
|
105
|
+
"or set AGENTEVALS_AUTO_MIGRATE=true to retry on startup."
|
|
106
|
+
)
|
|
107
|
+
current = status.version
|
|
108
|
+
pending = [m.version for m in discover_migrations() if current is None or m.version > current]
|
|
109
|
+
if pending:
|
|
110
|
+
raise RuntimeError(
|
|
111
|
+
f"Database schema is behind: pending migrations {pending}. "
|
|
112
|
+
"Run 'agentevals migrate up' to apply them, "
|
|
113
|
+
"or set AGENTEVALS_AUTO_MIGRATE=true to apply on startup."
|
|
114
|
+
)
|
|
78
115
|
|
|
79
116
|
repos = await build_repos(storage_settings)
|
|
80
117
|
app.state.storage_settings = storage_settings
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/postgresql-secret.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/charts/agentevals/templates/serviceaccount.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_evaluators/response_quality.py
RENAMED
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/custom_evaluators/tool_call_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/sdk_example/context_manager_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/adk/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/langchain/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/ollama/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/openai-agents/run.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/pydantic-ai/run.py
RENAMED
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/examples/zero-code-examples/strands/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/assets/index-BqibLiHO.css
RENAMED
|
File without changes
|
{agentevals_cli-0.8.2 → agentevals_cli-0.8.3}/src/agentevals/_static/assets/index-Cl6S2lcn.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|