agentevals-cli 0.9.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/workflows/ci.yml +4 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/workflows/publish-evaluator-sdk.yml +3 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/workflows/release.yml +2 -6
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/Makefile +1 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/PKG-INFO +1 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/custom-evaluators.md +20 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_evaluators/eval_config.yaml +1 -0
- agentevals_cli-0.9.2/examples/custom_evaluators/eval_config_openai_eval.yaml +18 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/pyproject.toml +5 -2
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/__init__.py +1 -1
- agentevals_cli-0.9.0/src/agentevals/_static/assets/index-f8LUVQc3.js → agentevals_cli-0.9.2/src/agentevals/_static/assets/index-RIquRPno.js +1 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/_static/index.html +1 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/config.py +15 -7
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/converter.py +19 -15
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/extraction.py +38 -8
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/openai_eval_backend.py +40 -19
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_cli.py +18 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_converter.py +131 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_extraction.py +50 -0
- agentevals_cli-0.9.2/tests/test_openai_eval_backend.py +116 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/package-lock.json +10 -10
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/sidebar/Sidebar.tsx +1 -1
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/uv.lock +1 -2
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.dockerignore +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.gitignore +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/.mcp.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/Dockerfile +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/LICENSE +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/Chart.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/NOTES.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/_helpers.tpl +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/deployment.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/postgresql-secret.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/postgresql.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/service.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/templates/serviceaccount.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/charts/agentevals/values.yaml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/otel-compatibility.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/docs/streaming.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_sink/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_sink/agentevals_example_custom_sink/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_sink/agentevals_example_custom_sink/sink.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/custom_sink/pyproject.toml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/kubernetes/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/adk/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/ollama/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/ollama/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/openai-agents/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/flake.lock +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/flake.nix +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/helm.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/helm_2.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/helm_3.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/k8s.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/samples/tempo_export_with_batches.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/app.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/debug_routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/dependencies.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/models.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/otlp_app.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/otlp_grpc.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/otlp_processing.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/otlp_routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/runs_routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/api/streaming_routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/builtin_metrics.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/cli.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/custom_evaluators.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/eval_config_loader.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/genai_converter.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/loader/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/loader/auto.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/loader/otlp.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/mcp_server.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/output.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/fetcher.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/result_builder.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/service.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/sinks.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/run/worker.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/runner.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/config.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/models.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/postgres/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/postgres/migrations/000001_init.down.sql +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/postgres/migrations/000001_init.up.sql +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/postgres/migrator.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/postgres/pool.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/repos/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/repos/memory.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/storage/repos/postgres.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/trace_metrics.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/api/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/api/test_evaluate_persistence.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/api/test_runs_routes.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/conftest.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/test_live_agents.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/test_otlp_grpc_receiver.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/run/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/run/test_fetcher.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/run/test_result_builder.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/run/test_service.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/run/test_sinks.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/storage/__init__.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/storage/test_config.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/storage/test_memory_repos.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/storage/test_migrator.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/storage/test_models.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_api.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_eval_config_loader.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_loader_auto.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_mcp_server.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_otlp_loader.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_otlp_receiver.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_output.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_runner.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/tests/test_trace_metrics.py +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/.gitignore +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/README.md +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/index.html +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/package.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/App.css +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/api/client.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/TraceCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/InspectorView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/MetricSelector.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/upload/UploadView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/config.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/context/TraceProvider.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/index.css +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/eval-config.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/evalset-builder.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/trace-helpers.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/trace-loader.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/trace-metadata.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/trace-patcher.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/types.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.9.0 → agentevals_cli-0.9.2}/ui/vite.config.ts +0 -0
|
@@ -22,6 +22,8 @@ jobs:
|
|
|
22
22
|
runs-on: ubuntu-latest
|
|
23
23
|
steps:
|
|
24
24
|
- uses: actions/checkout@v6
|
|
25
|
+
with:
|
|
26
|
+
fetch-depth: 0
|
|
25
27
|
|
|
26
28
|
- uses: astral-sh/setup-uv@v7
|
|
27
29
|
with:
|
|
@@ -46,6 +48,8 @@ jobs:
|
|
|
46
48
|
python-version: ["3.11", "3.12", "3.13"]
|
|
47
49
|
steps:
|
|
48
50
|
- uses: actions/checkout@v6
|
|
51
|
+
with:
|
|
52
|
+
fetch-depth: 0
|
|
49
53
|
|
|
50
54
|
- uses: astral-sh/setup-uv@v7
|
|
51
55
|
with:
|
|
@@ -20,6 +20,7 @@ jobs:
|
|
|
20
20
|
- uses: actions/checkout@v6
|
|
21
21
|
with:
|
|
22
22
|
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
23
|
+
fetch-depth: 0
|
|
23
24
|
|
|
24
25
|
- uses: astral-sh/setup-uv@v7
|
|
25
26
|
with:
|
|
@@ -31,9 +32,6 @@ jobs:
|
|
|
31
32
|
cache: npm
|
|
32
33
|
cache-dependency-path: ui/package-lock.json
|
|
33
34
|
|
|
34
|
-
- name: Set version from tag
|
|
35
|
-
run: uv version "${{ github.event.inputs.tag || github.ref_name }}" --package agentevals-cli
|
|
36
|
-
|
|
37
35
|
- name: Build core and bundled wheels
|
|
38
36
|
run: make release
|
|
39
37
|
|
|
@@ -51,6 +49,7 @@ jobs:
|
|
|
51
49
|
- uses: actions/checkout@v6
|
|
52
50
|
with:
|
|
53
51
|
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
52
|
+
fetch-depth: 0
|
|
54
53
|
|
|
55
54
|
- uses: astral-sh/setup-uv@v7
|
|
56
55
|
with:
|
|
@@ -65,11 +64,8 @@ jobs:
|
|
|
65
64
|
# Same bundle as `make release` / `build-bundle`: wheel must include ui/dist in src/agentevals/_static
|
|
66
65
|
# (see [tool.hatch.build] artifacts in pyproject.toml).
|
|
67
66
|
- name: Release Python package (wheel + sdist with bundled UI)
|
|
68
|
-
env:
|
|
69
|
-
VERSION: ${{ github.event.inputs.tag || github.ref_name }}
|
|
70
67
|
run: |
|
|
71
68
|
uv sync --package agentevals-cli --all-extras
|
|
72
|
-
uv version "$VERSION" --package agentevals-cli
|
|
73
69
|
|
|
74
70
|
make build-ui
|
|
75
71
|
rm -rf src/agentevals/_static
|
|
@@ -317,6 +317,26 @@ The `grader.evaluation_metric` field selects the similarity algorithm:
|
|
|
317
317
|
| `rouge_1` through `rouge_5` | Unigram through 5-gram overlap (F-measure) |
|
|
318
318
|
| `rouge_l` | Longest common subsequence overlap (F-measure) |
|
|
319
319
|
|
|
320
|
+
### Label Model Grader
|
|
321
|
+
|
|
322
|
+
Scores responses without a golden set. The model reads each response and assigns a label from a fixed list. Passing labels are defined in the config.
|
|
323
|
+
|
|
324
|
+
```yaml
|
|
325
|
+
evaluators:
|
|
326
|
+
- name: quality_check
|
|
327
|
+
type: openai_eval
|
|
328
|
+
grader:
|
|
329
|
+
type: label_model
|
|
330
|
+
model: gpt-4o-mini
|
|
331
|
+
input:
|
|
332
|
+
- role: user
|
|
333
|
+
content: "Rate this response: {{ item.actual_response }}"
|
|
334
|
+
labels: [good, bad]
|
|
335
|
+
passing_labels: [good]
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
The `threshold` field is not used for `label_model`. A response passes if its assigned label is in `passing_labels`.
|
|
339
|
+
|
|
320
340
|
### How it works
|
|
321
341
|
|
|
322
342
|
Under the hood, agentevals creates an ephemeral eval on OpenAI, submits the actual and expected responses as JSONL items, polls for results, and cleans up. The agent's response and the golden reference are both placed in the `item` namespace (with `include_sample_schema: false`), so OpenAI only grades the provided text without generating any model outputs.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Eval config using OpenAI Evals API graders.
|
|
2
|
+
# Requires OPENAI_API_KEY to be set.
|
|
3
|
+
#
|
|
4
|
+
# Run with:
|
|
5
|
+
# agentevals run samples/helm.json \
|
|
6
|
+
# --config examples/custom_evaluators/eval_config_openai_eval.yaml
|
|
7
|
+
|
|
8
|
+
evaluators:
|
|
9
|
+
- name: quality_check
|
|
10
|
+
type: openai_eval
|
|
11
|
+
grader:
|
|
12
|
+
type: label_model
|
|
13
|
+
model: gpt-4o-mini
|
|
14
|
+
input:
|
|
15
|
+
- role: user
|
|
16
|
+
content: "Rate this response: {{ item.actual_response }}"
|
|
17
|
+
labels: [good, bad]
|
|
18
|
+
passing_labels: [good]
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["hatchling"]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
3
|
build-backend = "hatchling.build"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agentevals-cli"
|
|
7
|
-
|
|
7
|
+
dynamic = ["version"]
|
|
8
8
|
description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -40,6 +40,9 @@ postgres = [
|
|
|
40
40
|
[project.scripts]
|
|
41
41
|
agentevals = "agentevals.cli:main"
|
|
42
42
|
|
|
43
|
+
[tool.hatch.version]
|
|
44
|
+
source = "vcs"
|
|
45
|
+
|
|
43
46
|
[tool.hatch.build]
|
|
44
47
|
artifacts = ["src/agentevals/_static/**"]
|
|
45
48
|
|