agentevals-cli 0.7.1__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/ci.yml +8 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/release.yml +51 -25
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/Makefile +35 -1
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/PKG-INFO +7 -3
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/README.md +6 -2
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/Chart.yaml +2 -2
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/streaming.md +12 -6
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/kubernetes/README.md +1 -2
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/pyproject.toml +1 -1
- agentevals_cli-0.7.3/samples/tempo_export_with_batches.json +1 -0
- agentevals_cli-0.7.1/src/agentevals/_static/assets/index-7YPfPT4N.js → agentevals_cli-0.7.3/src/agentevals/_static/assets/index-Cl6S2lcn.js +64 -65
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/index.html +1 -1
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/routes.py +4 -26
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/cli.py +4 -3
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/config.py +7 -4
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/converter.py +19 -6
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/eval_config_loader.py +1 -1
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/extraction.py +51 -2
- agentevals_cli-0.7.3/src/agentevals/loader/__init__.py +29 -0
- agentevals_cli-0.7.3/src/agentevals/loader/auto.py +108 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/otlp.py +38 -12
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/mcp_server.py +5 -6
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/runner.py +3 -15
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_api.py +50 -6
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_converter.py +33 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_extraction.py +74 -0
- agentevals_cli-0.7.3/tests/test_loader_auto.py +241 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_otlp_loader.py +125 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/api/client.ts +1 -4
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/TraceUploadZone.tsx +1 -1
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/TraceEditorDrawer.tsx +2 -2
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/UploadView.tsx +2 -1
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/context/TraceProvider.tsx +20 -2
- agentevals_cli-0.7.3/ui/src/lib/trace-loader.ts +320 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/uv.lock +1 -1
- agentevals_cli-0.7.1/src/agentevals/loader/__init__.py +0 -7
- agentevals_cli-0.7.1/ui/src/lib/trace-loader.ts +0 -249
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.dockerignore +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/publish-evaluator-sdk.yml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.gitignore +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.mcp.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/Dockerfile +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/LICENSE +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/NOTES.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/_helpers.tpl +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/deployment.yaml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/service.yaml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/serviceaccount.yaml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/values.yaml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/custom-evaluators.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/otel-compatibility.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/README.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/eval_config.yaml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/adk/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/ollama/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/ollama/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/openai-agents/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/flake.lock +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/flake.nix +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm_2.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm_3.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/k8s.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/app.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/debug_routes.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/dependencies.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/models.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_app.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_grpc.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_processing.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_routes.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/streaming_routes.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/builtin_metrics.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/custom_evaluators.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/genai_converter.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/openai_eval_backend.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/output.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/trace_metrics.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/conftest.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_live_agents.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_otlp_grpc_receiver.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_cli.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_mcp_server.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_otlp_receiver.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_output.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_runner.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_trace_metrics.py +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/.gitignore +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/README.md +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/index.html +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/package-lock.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/package.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/App.css +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/TraceCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/sidebar/Sidebar.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/MetricSelector.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/config.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/index.css +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/evalset-builder.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-helpers.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-metadata.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-patcher.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/types.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/vite.config.ts +0 -0
|
@@ -18,6 +18,8 @@ jobs:
|
|
|
18
18
|
runs-on: ubuntu-latest
|
|
19
19
|
steps:
|
|
20
20
|
- uses: actions/checkout@v6
|
|
21
|
+
with:
|
|
22
|
+
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
21
23
|
|
|
22
24
|
- uses: astral-sh/setup-uv@v7
|
|
23
25
|
with:
|
|
@@ -42,29 +44,13 @@ jobs:
|
|
|
42
44
|
dist/core/*.whl
|
|
43
45
|
dist/bundle/*.whl
|
|
44
46
|
|
|
45
|
-
github-release:
|
|
46
|
-
needs: build
|
|
47
|
-
runs-on: ubuntu-latest
|
|
48
|
-
permissions:
|
|
49
|
-
contents: write
|
|
50
|
-
|
|
51
|
-
steps:
|
|
52
|
-
- uses: actions/download-artifact@v8
|
|
53
|
-
with:
|
|
54
|
-
name: wheels
|
|
55
|
-
path: dist/
|
|
56
|
-
|
|
57
|
-
- uses: softprops/action-gh-release@v2.5.0
|
|
58
|
-
with:
|
|
59
|
-
tag_name: ${{ github.event.inputs.tag || github.ref_name }}
|
|
60
|
-
files: dist/**/*.whl
|
|
61
|
-
generate_release_notes: true
|
|
62
|
-
|
|
63
47
|
publish:
|
|
64
48
|
needs: build
|
|
65
49
|
runs-on: ubuntu-latest
|
|
66
50
|
steps:
|
|
67
51
|
- uses: actions/checkout@v6
|
|
52
|
+
with:
|
|
53
|
+
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
68
54
|
|
|
69
55
|
- uses: astral-sh/setup-uv@v7
|
|
70
56
|
with:
|
|
@@ -93,13 +79,34 @@ jobs:
|
|
|
93
79
|
uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
|
|
94
80
|
rm -rf src/agentevals/_static
|
|
95
81
|
|
|
82
|
+
github-release:
|
|
83
|
+
needs: publish
|
|
84
|
+
runs-on: ubuntu-latest
|
|
85
|
+
permissions:
|
|
86
|
+
contents: write
|
|
87
|
+
|
|
88
|
+
steps:
|
|
89
|
+
- uses: actions/download-artifact@v8
|
|
90
|
+
with:
|
|
91
|
+
name: wheels
|
|
92
|
+
path: dist/
|
|
93
|
+
|
|
94
|
+
- uses: softprops/action-gh-release@v2.5.0
|
|
95
|
+
with:
|
|
96
|
+
tag_name: ${{ github.event.inputs.tag || github.ref_name }}
|
|
97
|
+
files: dist/**/*.whl
|
|
98
|
+
generate_release_notes: true
|
|
99
|
+
|
|
96
100
|
push-docker:
|
|
101
|
+
needs: github-release
|
|
97
102
|
runs-on: ubuntu-latest
|
|
98
103
|
permissions:
|
|
99
104
|
contents: read
|
|
100
105
|
packages: write
|
|
101
106
|
steps:
|
|
102
107
|
- uses: actions/checkout@v6
|
|
108
|
+
with:
|
|
109
|
+
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
103
110
|
|
|
104
111
|
- name: Login to GitHub Container Registry
|
|
105
112
|
uses: docker/login-action@v4
|
|
@@ -114,13 +121,6 @@ jobs:
|
|
|
114
121
|
- name: Set up Docker Buildx
|
|
115
122
|
uses: docker/setup-buildx-action@v4
|
|
116
123
|
|
|
117
|
-
- name: Set appVersion in Chart.yaml
|
|
118
|
-
run: |
|
|
119
|
-
VERSION="${TAG#v}"
|
|
120
|
-
sed -i "s/^appVersion:.*/appVersion: \"$VERSION\"/" charts/agentevals/Chart.yaml
|
|
121
|
-
env:
|
|
122
|
-
TAG: ${{ github.event.inputs.tag || github.ref_name }}
|
|
123
|
-
|
|
124
124
|
- name: Build and push
|
|
125
125
|
run: |
|
|
126
126
|
VERSION="${TAG#v}"
|
|
@@ -129,3 +129,29 @@ jobs:
|
|
|
129
129
|
DOCKER_TAG="$VERSION"
|
|
130
130
|
env:
|
|
131
131
|
TAG: ${{ github.event.inputs.tag || github.ref_name }}
|
|
132
|
+
|
|
133
|
+
push-helm-chart:
|
|
134
|
+
needs: push-docker
|
|
135
|
+
runs-on: ubuntu-latest
|
|
136
|
+
permissions:
|
|
137
|
+
contents: read
|
|
138
|
+
packages: write
|
|
139
|
+
steps:
|
|
140
|
+
- uses: actions/checkout@v6
|
|
141
|
+
with:
|
|
142
|
+
ref: ${{ github.event.inputs.tag || github.ref_name }}
|
|
143
|
+
|
|
144
|
+
- name: Login to GitHub Container Registry
|
|
145
|
+
uses: docker/login-action@v4
|
|
146
|
+
with:
|
|
147
|
+
registry: ghcr.io
|
|
148
|
+
username: ${{ github.actor }}
|
|
149
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
150
|
+
|
|
151
|
+
- name: Publish Helm chart to GHCR (OCI)
|
|
152
|
+
env:
|
|
153
|
+
TAG: ${{ github.event.inputs.tag || github.ref_name }}
|
|
154
|
+
HELM_REPO: oci://ghcr.io/${{ github.repository }}
|
|
155
|
+
run: |
|
|
156
|
+
export HELM_CHART_VERSION="${TAG#v}"
|
|
157
|
+
make helm-publish
|
|
@@ -9,7 +9,13 @@ DOCKER_IMAGE_REF := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY:%/=%)/$(DOCKER_IMA
|
|
|
9
9
|
# Multi-arch build (requires docker buildx). Manifest lists must be pushed — use build-docker-local for a single-arch --load.
|
|
10
10
|
PLATFORMS ?= linux/amd64,linux/arm64
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
HELM_REPO ?= oci://ghcr.io/agentevals-dev/agentevals
|
|
13
|
+
HELM_DIST_FOLDER ?= dist/helm
|
|
14
|
+
HELM_CHART_DIR ?= charts/agentevals
|
|
15
|
+
HELM_CHART_OCI_URL ?= $(HELM_REPO)/helm
|
|
16
|
+
HELM_CHART_VERSION ?= $(VERSION)
|
|
17
|
+
|
|
18
|
+
.PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e helm-lint helm-template helm-test helm-cleanup helm-package helm-publish
|
|
13
19
|
|
|
14
20
|
build:
|
|
15
21
|
uv build
|
|
@@ -70,3 +76,31 @@ test-e2e:
|
|
|
70
76
|
clean:
|
|
71
77
|
rm -rf dist/ build/ src/agentevals/_static/ ui/dist/
|
|
72
78
|
find . -name '*.egg-info' -type d -exec rm -rf {} + 2>/dev/null || true
|
|
79
|
+
|
|
80
|
+
.PHONY: helm-lint
|
|
81
|
+
helm-lint:
|
|
82
|
+
helm lint "$(HELM_CHART_DIR)"
|
|
83
|
+
|
|
84
|
+
# Render templates to catch YAML/Helm errors (default values + ephemeralVolume disabled path).
|
|
85
|
+
.PHONY: helm-template
|
|
86
|
+
helm-template:
|
|
87
|
+
helm template agentevals "$(HELM_CHART_DIR)" --namespace agentevals >/dev/null
|
|
88
|
+
helm template agentevals "$(HELM_CHART_DIR)" --namespace agentevals \
|
|
89
|
+
--set ephemeralVolume.enabled=false >/dev/null
|
|
90
|
+
|
|
91
|
+
.PHONY: helm-test
|
|
92
|
+
helm-test: helm-lint helm-template
|
|
93
|
+
|
|
94
|
+
.PHONY: helm-cleanup
|
|
95
|
+
helm-cleanup:
|
|
96
|
+
rm -f $(HELM_DIST_FOLDER)/agentevals-*.tgz
|
|
97
|
+
|
|
98
|
+
.PHONY: helm-package
|
|
99
|
+
helm-package: helm-cleanup
|
|
100
|
+
mkdir -p $(HELM_DIST_FOLDER)
|
|
101
|
+
helm package "$(HELM_CHART_DIR)" -d "$(HELM_DIST_FOLDER)" \
|
|
102
|
+
--version "$(HELM_CHART_VERSION)" --app-version "$(HELM_CHART_VERSION)"
|
|
103
|
+
|
|
104
|
+
.PHONY: helm-publish
|
|
105
|
+
helm-publish: helm-package
|
|
106
|
+
helm push "$(HELM_DIST_FOLDER)/agentevals-$(HELM_CHART_VERSION).tgz" "$(HELM_CHART_OCI_URL)"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentevals-cli
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -300,12 +300,16 @@ docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
|
|
|
300
300
|
|
|
301
301
|
### Helm
|
|
302
302
|
|
|
303
|
-
|
|
303
|
+
The Helm chart is published as an OCI artifact to GitHub Container Registry:
|
|
304
304
|
|
|
305
305
|
```bash
|
|
306
|
-
helm install agentevals
|
|
306
|
+
helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
|
|
307
307
|
```
|
|
308
308
|
|
|
309
|
+
Pass `--version <x.y.z>` to pin to a specific release. Available versions are listed under [packages](https://github.com/agentevals-dev/agentevals/pkgs/container/agentevals%2Fhelm%2Fagentevals).
|
|
310
|
+
|
|
311
|
+
The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if you want to install from a local checkout instead.
|
|
312
|
+
|
|
309
313
|
See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
|
|
310
314
|
|
|
311
315
|
## MCP Server
|
|
@@ -274,12 +274,16 @@ docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
|
|
|
274
274
|
|
|
275
275
|
### Helm
|
|
276
276
|
|
|
277
|
-
|
|
277
|
+
The Helm chart is published as an OCI artifact to GitHub Container Registry:
|
|
278
278
|
|
|
279
279
|
```bash
|
|
280
|
-
helm install agentevals
|
|
280
|
+
helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
|
|
281
281
|
```
|
|
282
282
|
|
|
283
|
+
Pass `--version <x.y.z>` to pin to a specific release. Available versions are listed under [packages](https://github.com/agentevals-dev/agentevals/pkgs/container/agentevals%2Fhelm%2Fagentevals).
|
|
284
|
+
|
|
285
|
+
The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if you want to install from a local checkout instead.
|
|
286
|
+
|
|
283
287
|
See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
|
|
284
288
|
|
|
285
289
|
## MCP Server
|
|
@@ -66,13 +66,18 @@ See [examples/README.md](../examples/README.md) for details on supported instrum
|
|
|
66
66
|
|
|
67
67
|
### OTLP/JSON Support
|
|
68
68
|
|
|
69
|
-
Native OpenTelemetry format
|
|
69
|
+
Native OpenTelemetry format. The CLI auto-detects Jaeger vs OTLP from
|
|
70
|
+
file contents, so `.json` and `.jsonl` exports from Tempo, Jaeger, or
|
|
71
|
+
the OTel collector all work without a `--format` flag:
|
|
70
72
|
|
|
71
73
|
```bash
|
|
72
|
-
# Load
|
|
73
|
-
agentevals run trace.otlp.json --
|
|
74
|
+
# Load any trace file directly; format is auto-detected
|
|
75
|
+
agentevals run trace.otlp.json --eval-set eval.json
|
|
74
76
|
```
|
|
75
77
|
|
|
78
|
+
Pass `--format otlp-json` (or `jaeger-json`) only as an override when
|
|
79
|
+
auto-detection fails on a non-standard export.
|
|
80
|
+
|
|
76
81
|
### Real-time Span Streaming
|
|
77
82
|
|
|
78
83
|
The `AgentEvalsStreamingProcessor` is an OTel `SpanProcessor` that streams spans over WebSocket as they complete:
|
|
@@ -311,6 +316,7 @@ This installs `opentelemetry-sdk>=1.20.0`. Agent code also needs `websockets` fo
|
|
|
311
316
|
## Compatibility
|
|
312
317
|
|
|
313
318
|
All existing workflows continue to work:
|
|
314
|
-
-
|
|
315
|
-
|
|
316
|
-
-
|
|
319
|
+
- Trace files (Jaeger or OTLP, including Tempo exports) auto-detect by
|
|
320
|
+
content: `agentevals run trace.json --eval-set ...`
|
|
321
|
+
- Pass `--format` only to override detection on non-standard exports.
|
|
322
|
+
- Web UI upload flow unchanged.
|
|
@@ -25,8 +25,7 @@ kagent (gRPC :4317) --> OTel Collector( optional ) --> agentevals (gRPC :4317 /
|
|
|
25
25
|
### 1. agentevals
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
|
-
helm install agentevals
|
|
29
|
-
--set tag=0.6.3
|
|
28
|
+
helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
|
|
30
29
|
```
|
|
31
30
|
|
|
32
31
|
This creates a single pod exposing:
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agentevals-cli"
|
|
7
|
-
version = "0.7.
|
|
7
|
+
version = "0.7.3"
|
|
8
8
|
description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|