agentevals-cli 0.6.4__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/PKG-INFO +16 -37
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/README.md +14 -35
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/otel-compatibility.md +104 -10
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/README.md +4 -1
- agentevals_cli-0.7.1/examples/zero-code-examples/pydantic-ai/requirements.txt +5 -0
- agentevals_cli-0.7.1/examples/zero-code-examples/pydantic-ai/run.py +105 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/flake.lock +0 -21
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/flake.nix +21 -13
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/pyproject.toml +14 -2
- agentevals_cli-0.6.4/src/agentevals/_static/assets/index-X7q-J7YQ.js → agentevals_cli-0.7.1/src/agentevals/_static/assets/index-7YPfPT4N.js +12 -12
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/_static/index.html +1 -1
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/app.py +2 -5
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/models.py +10 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/routes.py +191 -2
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/builtin_metrics.py +123 -1
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/config.py +35 -18
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/extraction.py +3 -3
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/loader/otlp.py +55 -13
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/output.py +77 -19
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/runner.py +98 -31
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/trace_metrics.py +60 -14
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/test_live_agents.py +60 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_api.py +315 -5
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_extraction.py +2 -2
- agentevals_cli-0.7.1/tests/test_otlp_loader.py +454 -0
- agentevals_cli-0.7.1/tests/test_trace_metrics.py +519 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/MetricSelector.tsx +46 -31
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/types.ts +31 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/uv.lock +764 -27
- agentevals_cli-0.6.4/tests/test_otlp_loader.py +0 -210
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.dockerignore +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/workflows/ci.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/workflows/publish-evaluator-sdk.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.github/workflows/release.yml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.gitignore +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/.mcp.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/Dockerfile +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/LICENSE +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/Makefile +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/Chart.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/templates/NOTES.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/templates/_helpers.tpl +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/templates/deployment.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/templates/service.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/templates/serviceaccount.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/charts/agentevals/values.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/custom-evaluators.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/docs/streaming.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/custom_evaluators/eval_config.yaml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/kubernetes/README.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/adk/run.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/ollama/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/ollama/run.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/openai-agents/run.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/helm.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/helm_2.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/helm_3.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/samples/k8s.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/debug_routes.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/dependencies.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/otlp_app.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/otlp_grpc.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/otlp_processing.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/otlp_routes.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/api/streaming_routes.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/cli.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/converter.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/custom_evaluators.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/eval_config_loader.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/genai_converter.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/loader/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/mcp_server.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/openai_eval_backend.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/conftest.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/test_otlp_grpc_receiver.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_cli.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_converter.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_mcp_server.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_otlp_receiver.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_output.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_runner.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/.gitignore +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/README.md +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/index.html +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/package-lock.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/package.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/App.css +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/api/client.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/TraceCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/InspectorView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/sidebar/Sidebar.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/upload/UploadView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/config.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/context/TraceProvider.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/index.css +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/evalset-builder.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/trace-helpers.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/trace-loader.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/trace-metadata.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/trace-patcher.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.6.4 → agentevals_cli-0.7.1}/ui/vite.config.ts +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentevals-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.11
|
|
7
7
|
Requires-Dist: click>=8.0
|
|
8
8
|
Requires-Dist: fastapi>=0.115.0
|
|
9
|
-
Requires-Dist: google-adk[eval]>=1.
|
|
9
|
+
Requires-Dist: google-adk[eval]>=1.30.0
|
|
10
10
|
Requires-Dist: httpx>=0.27.0
|
|
11
11
|
Requires-Dist: opentelemetry-proto>=1.36.0
|
|
12
12
|
Requires-Dist: python-dotenv>=1.0.0
|
|
@@ -59,34 +59,16 @@ agentevals scores performance and inference quality from OpenTelemetry traces. N
|
|
|
59
59
|
|
|
60
60
|
## What is agentevals?
|
|
61
61
|
|
|
62
|
-
agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want
|
|
62
|
+
agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want without re-executing or burning extra tokens.
|
|
63
63
|
|
|
64
64
|
It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, OpenAI Agents SDK, and others), supports Jaeger JSON and native OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
|
|
65
65
|
|
|
66
|
-
- **CLI** for scripting and CI pipelines
|
|
67
|
-
- **Web UI** for visual inspection and local developer experience
|
|
68
|
-
- **Kubernetes and OTel support** so you can deploy right next to your agents; works natively in your OpenTelemetry pipeline
|
|
69
|
-
- **MCP server** so MCP clients can run evaluations from a conversation
|
|
70
|
-
|
|
71
|
-
## Why agentevals?
|
|
72
|
-
|
|
73
|
-
Most evaluation tools require you to **re-execute your agent** for every test, burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
|
|
74
|
-
|
|
75
66
|
- **No re-execution**: score agents from existing traces without replaying expensive LLM calls
|
|
76
|
-
- **Framework-agnostic**: works with any agent framework that emits OpenTelemetry spans
|
|
77
67
|
- **Golden eval sets**: compare actual behavior against defined expected behaviors for deterministic pass/fail gating
|
|
78
68
|
- **Custom evaluators**: write scoring logic in Python, JavaScript, or any language, or offload scoring to OpenAI Eval API
|
|
79
69
|
- **CI/CD ready**: gate deployments on quality thresholds directly in your pipeline
|
|
80
70
|
- **Local-first**: no cloud dependency required; everything runs on your machine
|
|
81
|
-
|
|
82
|
-
## How It Works
|
|
83
|
-
|
|
84
|
-
agentevals follows three simple steps:
|
|
85
|
-
|
|
86
|
-
1. **Collect traces**: Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
|
|
87
|
-
2. **Define eval sets**: Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
|
|
88
|
-
3. **Run evaluations**: Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
|
|
89
|
-
|
|
71
|
+
- **Multiple interfaces**: CLI for scripting and CI, Web UI for visual inspection, MCP server for conversational evaluation, Helm chart for Kubernetes environments
|
|
90
72
|
|
|
91
73
|
> [!IMPORTANT]
|
|
92
74
|
> This project is under active development. Expect breaking changes.
|
|
@@ -95,7 +77,7 @@ agentevals follows three simple steps:
|
|
|
95
77
|
|
|
96
78
|
- [Installation](#installation)
|
|
97
79
|
- [Quick Start](#quick-start)
|
|
98
|
-
- [
|
|
80
|
+
- [Use-cases and Integrations](#use-cases-and-integrations)
|
|
99
81
|
- [CLI](#cli)
|
|
100
82
|
- [Custom Evaluators](#custom-evaluators)
|
|
101
83
|
- [Web UI](#web-ui)
|
|
@@ -194,14 +176,14 @@ agentevals serve
|
|
|
194
176
|
# opens http://localhost:8001
|
|
195
177
|
```
|
|
196
178
|
|
|
197
|
-
You can also point any OTel-instrumented agent directly at the built-in receiver (`OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`). The UI streams tool calls, inputs, and outputs live as your agent runs. For production setups, the same receiver slots into a Kubernetes OTel Collector pipeline as an exporter destination. See [
|
|
179
|
+
You can also point any OTel-instrumented agent directly at the built-in receiver (`OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`). The UI streams tool calls, inputs, and outputs live as your agent runs. For production setups, the same receiver slots into a Kubernetes OTel Collector pipeline as an exporter destination. See [Use-cases and Integrations](#use-cases-and-integrations) and the [Kubernetes example](examples/kubernetes/README.md) for walkthroughs.
|
|
198
180
|
|
|
199
181
|
**Next steps:**
|
|
200
182
|
|
|
201
183
|
- `agentevals evaluator list` to see all built-in and community evaluators
|
|
202
184
|
- [Custom Evaluators](#custom-evaluators) to write your own scoring logic
|
|
203
185
|
|
|
204
|
-
## Use-cases and
|
|
186
|
+
## Use-cases and Integrations
|
|
205
187
|
|
|
206
188
|
### Zero-Code (Recommended)
|
|
207
189
|
|
|
@@ -243,7 +225,7 @@ with app.session(eval_set_id="my-eval"):
|
|
|
243
225
|
|
|
244
226
|
Requires `pip install "agentevals-cli[streaming]"`. See [examples/sdk_example/](examples/sdk_example/) for framework-specific patterns.
|
|
245
227
|
|
|
246
|
-
## CLI
|
|
228
|
+
## CLI
|
|
247
229
|
|
|
248
230
|
```bash
|
|
249
231
|
# Multiple traces, JSON output
|
|
@@ -306,12 +288,13 @@ A `Dockerfile` is included at the project root. The image bundles the API, web U
|
|
|
306
288
|
|
|
307
289
|
```bash
|
|
308
290
|
docker build -t agentevals .
|
|
309
|
-
docker run -p 8001:8001 -p 4318:4318 agentevals
|
|
291
|
+
docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
|
|
310
292
|
```
|
|
311
293
|
|
|
312
294
|
| Port | Purpose |
|
|
313
295
|
|------|---------|
|
|
314
296
|
| 8001 | Web UI and REST API |
|
|
297
|
+
| 4317 | OTLP gRPC receiver (traces and logs) |
|
|
315
298
|
| 4318 | OTLP HTTP receiver (traces and logs) |
|
|
316
299
|
| 8080 | MCP (Streamable HTTP) |
|
|
317
300
|
|
|
@@ -389,7 +372,7 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for build tiers, Makefile targets, and Nix
|
|
|
389
372
|
|
|
390
373
|
**Do I need a database or any infrastructure to run agentevals?**
|
|
391
374
|
|
|
392
|
-
No. agentevals is a single `pip install` with no database, no message queue, and no external services. The CLI evaluates trace files directly from disk. The web UI and live streaming use in-memory session state.
|
|
375
|
+
No. agentevals is a single `pip install` with no database, no message queue, and no external services. The CLI evaluates trace files directly from disk. The web UI and live streaming use in-memory session state.
|
|
393
376
|
|
|
394
377
|
**Does the CLI require a running server?**
|
|
395
378
|
|
|
@@ -397,23 +380,19 @@ No. `agentevals run` evaluates trace files entirely offline. The server (`agente
|
|
|
397
380
|
|
|
398
381
|
**Can I use agentevals in CI/CD?**
|
|
399
382
|
|
|
400
|
-
Yes.
|
|
383
|
+
Yes. Pass trace files and an eval set, set a threshold, and let the exit code gate your deployment. Combine with `--output json` for machine-readable results. No server process needed.
|
|
401
384
|
|
|
402
385
|
**What if I switch agent frameworks?**
|
|
403
386
|
|
|
404
|
-
Because agentevals uses OpenTelemetry as its universal interface, switching frameworks
|
|
387
|
+
Because agentevals uses OpenTelemetry as its universal interface, switching frameworks does not require changing your evaluation setup. As long as your new framework emits OTel spans, the same eval sets and metrics work as before.
|
|
405
388
|
|
|
406
389
|
**Can I write evaluators in my own language?**
|
|
407
390
|
|
|
408
|
-
Yes. A custom evaluator is any program that reads JSON from stdin and writes a score to stdout. Python and JavaScript have first-class scaffolding support (`agentevals evaluator init`), but any language works.
|
|
391
|
+
Yes. A custom evaluator is any program that reads JSON from stdin and writes a score to stdout. Python and JavaScript have first-class scaffolding support (`agentevals evaluator init`), but any language works.
|
|
409
392
|
|
|
410
393
|
**Can I plug agentevals into an existing OTel pipeline?**
|
|
411
394
|
|
|
412
|
-
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this
|
|
413
|
-
|
|
414
|
-
**Can I deploy agentevals on Kubernetes?**
|
|
415
|
-
|
|
416
|
-
Yes. A Dockerfile and a [Helm chart](charts/agentevals/) are included. A single pod exposes the web UI (8001), OTLP receiver (4318), and MCP server (8080). See the [Kubernetes example](examples/kubernetes/README.md) for a full walkthrough deploying agentevals alongside kagent and an OTel Collector.
|
|
395
|
+
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this pattern.
|
|
417
396
|
|
|
418
397
|
**How does this compare to ADK's evaluations?**
|
|
419
398
|
|
|
@@ -425,7 +404,7 @@ However, if you're iterating on your agents locally, you can point your agents t
|
|
|
425
404
|
|
|
426
405
|
AgentCore's evaluation integration (via `strands-agents-evals`) also couples agent execution with evaluation. It re-invokes the agent for each test case, converts the resulting OTel spans to AWS's ADOT format, and scores them against 4 built-in evaluators (Helpfulness, Accuracy, Harmfulness, Relevance) via a cloud API call. This means you need an AWS account, valid credentials, and network access for every evaluation.
|
|
427
406
|
|
|
428
|
-
agentevals
|
|
407
|
+
agentevals scores pre-recorded traces locally without re-running anything. It works with standard Jaeger JSON and OTLP formats from any framework, supports open-ended metrics (tool trajectory matching, LLM-based judges, custom scorers), and ships with a CLI, web UI, and MCP server. No cloud dependency required, though we do include all ADK's GCP-based evals as of now.
|
|
429
408
|
|
|
430
409
|
**How does this compare to LangSmith?**
|
|
431
410
|
|
|
@@ -33,34 +33,16 @@ agentevals scores performance and inference quality from OpenTelemetry traces. N
|
|
|
33
33
|
|
|
34
34
|
## What is agentevals?
|
|
35
35
|
|
|
36
|
-
agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want
|
|
36
|
+
agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want without re-executing or burning extra tokens.
|
|
37
37
|
|
|
38
38
|
It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, OpenAI Agents SDK, and others), supports Jaeger JSON and native OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
|
|
39
39
|
|
|
40
|
-
- **CLI** for scripting and CI pipelines
|
|
41
|
-
- **Web UI** for visual inspection and local developer experience
|
|
42
|
-
- **Kubernetes and OTel support** so you can deploy right next to your agents; works natively in your OpenTelemetry pipeline
|
|
43
|
-
- **MCP server** so MCP clients can run evaluations from a conversation
|
|
44
|
-
|
|
45
|
-
## Why agentevals?
|
|
46
|
-
|
|
47
|
-
Most evaluation tools require you to **re-execute your agent** for every test, burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
|
|
48
|
-
|
|
49
40
|
- **No re-execution**: score agents from existing traces without replaying expensive LLM calls
|
|
50
|
-
- **Framework-agnostic**: works with any agent framework that emits OpenTelemetry spans
|
|
51
41
|
- **Golden eval sets**: compare actual behavior against defined expected behaviors for deterministic pass/fail gating
|
|
52
42
|
- **Custom evaluators**: write scoring logic in Python, JavaScript, or any language, or offload scoring to OpenAI Eval API
|
|
53
43
|
- **CI/CD ready**: gate deployments on quality thresholds directly in your pipeline
|
|
54
44
|
- **Local-first**: no cloud dependency required; everything runs on your machine
|
|
55
|
-
|
|
56
|
-
## How It Works
|
|
57
|
-
|
|
58
|
-
agentevals follows three simple steps:
|
|
59
|
-
|
|
60
|
-
1. **Collect traces**: Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
|
|
61
|
-
2. **Define eval sets**: Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
|
|
62
|
-
3. **Run evaluations**: Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
|
|
63
|
-
|
|
45
|
+
- **Multiple interfaces**: CLI for scripting and CI, Web UI for visual inspection, MCP server for conversational evaluation, Helm chart for Kubernetes environments
|
|
64
46
|
|
|
65
47
|
> [!IMPORTANT]
|
|
66
48
|
> This project is under active development. Expect breaking changes.
|
|
@@ -69,7 +51,7 @@ agentevals follows three simple steps:
|
|
|
69
51
|
|
|
70
52
|
- [Installation](#installation)
|
|
71
53
|
- [Quick Start](#quick-start)
|
|
72
|
-
- [
|
|
54
|
+
- [Use-cases and Integrations](#use-cases-and-integrations)
|
|
73
55
|
- [CLI](#cli)
|
|
74
56
|
- [Custom Evaluators](#custom-evaluators)
|
|
75
57
|
- [Web UI](#web-ui)
|
|
@@ -168,14 +150,14 @@ agentevals serve
|
|
|
168
150
|
# opens http://localhost:8001
|
|
169
151
|
```
|
|
170
152
|
|
|
171
|
-
You can also point any OTel-instrumented agent directly at the built-in receiver (`OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`). The UI streams tool calls, inputs, and outputs live as your agent runs. For production setups, the same receiver slots into a Kubernetes OTel Collector pipeline as an exporter destination. See [
|
|
153
|
+
You can also point any OTel-instrumented agent directly at the built-in receiver (`OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318`). The UI streams tool calls, inputs, and outputs live as your agent runs. For production setups, the same receiver slots into a Kubernetes OTel Collector pipeline as an exporter destination. See [Use-cases and Integrations](#use-cases-and-integrations) and the [Kubernetes example](examples/kubernetes/README.md) for walkthroughs.
|
|
172
154
|
|
|
173
155
|
**Next steps:**
|
|
174
156
|
|
|
175
157
|
- `agentevals evaluator list` to see all built-in and community evaluators
|
|
176
158
|
- [Custom Evaluators](#custom-evaluators) to write your own scoring logic
|
|
177
159
|
|
|
178
|
-
## Use-cases and
|
|
160
|
+
## Use-cases and Integrations
|
|
179
161
|
|
|
180
162
|
### Zero-Code (Recommended)
|
|
181
163
|
|
|
@@ -217,7 +199,7 @@ with app.session(eval_set_id="my-eval"):
|
|
|
217
199
|
|
|
218
200
|
Requires `pip install "agentevals-cli[streaming]"`. See [examples/sdk_example/](examples/sdk_example/) for framework-specific patterns.
|
|
219
201
|
|
|
220
|
-
## CLI
|
|
202
|
+
## CLI
|
|
221
203
|
|
|
222
204
|
```bash
|
|
223
205
|
# Multiple traces, JSON output
|
|
@@ -280,12 +262,13 @@ A `Dockerfile` is included at the project root. The image bundles the API, web U
|
|
|
280
262
|
|
|
281
263
|
```bash
|
|
282
264
|
docker build -t agentevals .
|
|
283
|
-
docker run -p 8001:8001 -p 4318:4318 agentevals
|
|
265
|
+
docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
|
|
284
266
|
```
|
|
285
267
|
|
|
286
268
|
| Port | Purpose |
|
|
287
269
|
|------|---------|
|
|
288
270
|
| 8001 | Web UI and REST API |
|
|
271
|
+
| 4317 | OTLP gRPC receiver (traces and logs) |
|
|
289
272
|
| 4318 | OTLP HTTP receiver (traces and logs) |
|
|
290
273
|
| 8080 | MCP (Streamable HTTP) |
|
|
291
274
|
|
|
@@ -363,7 +346,7 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for build tiers, Makefile targets, and Nix
|
|
|
363
346
|
|
|
364
347
|
**Do I need a database or any infrastructure to run agentevals?**
|
|
365
348
|
|
|
366
|
-
No. agentevals is a single `pip install` with no database, no message queue, and no external services. The CLI evaluates trace files directly from disk. The web UI and live streaming use in-memory session state.
|
|
349
|
+
No. agentevals is a single `pip install` with no database, no message queue, and no external services. The CLI evaluates trace files directly from disk. The web UI and live streaming use in-memory session state.
|
|
367
350
|
|
|
368
351
|
**Does the CLI require a running server?**
|
|
369
352
|
|
|
@@ -371,23 +354,19 @@ No. `agentevals run` evaluates trace files entirely offline. The server (`agente
|
|
|
371
354
|
|
|
372
355
|
**Can I use agentevals in CI/CD?**
|
|
373
356
|
|
|
374
|
-
Yes.
|
|
357
|
+
Yes. Pass trace files and an eval set, set a threshold, and let the exit code gate your deployment. Combine with `--output json` for machine-readable results. No server process needed.
|
|
375
358
|
|
|
376
359
|
**What if I switch agent frameworks?**
|
|
377
360
|
|
|
378
|
-
Because agentevals uses OpenTelemetry as its universal interface, switching frameworks
|
|
361
|
+
Because agentevals uses OpenTelemetry as its universal interface, switching frameworks does not require changing your evaluation setup. As long as your new framework emits OTel spans, the same eval sets and metrics work as before.
|
|
379
362
|
|
|
380
363
|
**Can I write evaluators in my own language?**
|
|
381
364
|
|
|
382
|
-
Yes. A custom evaluator is any program that reads JSON from stdin and writes a score to stdout. Python and JavaScript have first-class scaffolding support (`agentevals evaluator init`), but any language works.
|
|
365
|
+
Yes. A custom evaluator is any program that reads JSON from stdin and writes a score to stdout. Python and JavaScript have first-class scaffolding support (`agentevals evaluator init`), but any language works.
|
|
383
366
|
|
|
384
367
|
**Can I plug agentevals into an existing OTel pipeline?**
|
|
385
368
|
|
|
386
|
-
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this
|
|
387
|
-
|
|
388
|
-
**Can I deploy agentevals on Kubernetes?**
|
|
389
|
-
|
|
390
|
-
Yes. A Dockerfile and a [Helm chart](charts/agentevals/) are included. A single pod exposes the web UI (8001), OTLP receiver (4318), and MCP server (8080). See the [Kubernetes example](examples/kubernetes/README.md) for a full walkthrough deploying agentevals alongside kagent and an OTel Collector.
|
|
369
|
+
Yes. The OTLP receiver on port 4318 accepts standard `http/protobuf` and `http/json` trace exports, so it slots into any OpenTelemetry pipeline as just another exporter destination. If your pipeline uses gRPC (port 4317), place an [OTel Collector](https://opentelemetry.io/docs/collector/) in front to bridge gRPC to HTTP. The [Kubernetes example](examples/kubernetes/README.md) shows this pattern.
|
|
391
370
|
|
|
392
371
|
**How does this compare to ADK's evaluations?**
|
|
393
372
|
|
|
@@ -399,7 +378,7 @@ However, if you're iterating on your agents locally, you can point your agents t
|
|
|
399
378
|
|
|
400
379
|
AgentCore's evaluation integration (via `strands-agents-evals`) also couples agent execution with evaluation. It re-invokes the agent for each test case, converts the resulting OTel spans to AWS's ADOT format, and scores them against 4 built-in evaluators (Helpfulness, Accuracy, Harmfulness, Relevance) via a cloud API call. This means you need an AWS account, valid credentials, and network access for every evaluation.
|
|
401
380
|
|
|
402
|
-
agentevals
|
|
381
|
+
agentevals scores pre-recorded traces locally without re-running anything. It works with standard Jaeger JSON and OTLP formats from any framework, supports open-ended metrics (tool trajectory matching, LLM-based judges, custom scorers), and ships with a CLI, web UI, and MCP server. No cloud dependency required, though we do include all ADK's GCP-based evals as of now.
|
|
403
382
|
|
|
404
383
|
**How does this compare to LangSmith?**
|
|
405
384
|
|
|
@@ -8,7 +8,9 @@ agentevals consumes OpenTelemetry traces to evaluate AI agents. This document co
|
|
|
8
8
|
|
|
9
9
|
The [GenAI semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/) define standard span attributes for LLM interactions. agentevals auto-detects this format when spans contain `gen_ai.request.model` or `gen_ai.input.messages`.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
This format works with LangChain, Strands, OpenAI instrumentation, Anthropic instrumentation, and any framework that follows the GenAI semantic conventions.
|
|
12
|
+
|
|
13
|
+
#### Core attributes
|
|
12
14
|
|
|
13
15
|
| Attribute | Description |
|
|
14
16
|
|-----------|-------------|
|
|
@@ -18,9 +20,51 @@ Supported attributes:
|
|
|
18
20
|
| `gen_ai.response.finish_reasons` | Why the model stopped generating |
|
|
19
21
|
| `gen_ai.usage.input_tokens` | Input token count |
|
|
20
22
|
| `gen_ai.usage.output_tokens` | Output token count |
|
|
21
|
-
| `gen_ai.system` | AI system identifier (e.g. `openai`, `anthropic`) |
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
#### Provider and response metadata (v1.37.0+)
|
|
25
|
+
|
|
26
|
+
| Attribute | Description |
|
|
27
|
+
|-----------|-------------|
|
|
28
|
+
| `gen_ai.provider.name` | LLM provider (e.g. `openai`, `anthropic`). Replaces the deprecated `gen_ai.system`. |
|
|
29
|
+
| `gen_ai.response.model` | Model name returned in the response |
|
|
30
|
+
| `gen_ai.response.id` | Unique response identifier |
|
|
31
|
+
|
|
32
|
+
#### Request parameters (v1.40.0)
|
|
33
|
+
|
|
34
|
+
| Attribute | Description |
|
|
35
|
+
|-----------|-------------|
|
|
36
|
+
| `gen_ai.request.temperature` | Temperature sampling parameter |
|
|
37
|
+
| `gen_ai.request.max_tokens` | Maximum output tokens limit |
|
|
38
|
+
| `gen_ai.request.top_p` | Top-P (nucleus) sampling parameter |
|
|
39
|
+
| `gen_ai.request.top_k` | Top-K sampling parameter |
|
|
40
|
+
|
|
41
|
+
#### Cache token usage
|
|
42
|
+
|
|
43
|
+
| Attribute | Description |
|
|
44
|
+
|-----------|-------------|
|
|
45
|
+
| `gen_ai.usage.cache_creation.input_tokens` | Tokens spent creating a prompt cache entry |
|
|
46
|
+
| `gen_ai.usage.cache_read.input_tokens` | Tokens served from an existing cache entry |
|
|
47
|
+
|
|
48
|
+
These are relevant for providers that support prompt caching (Anthropic, OpenAI). agentevals aggregates these across LLM spans and displays them in the performance summary.
|
|
49
|
+
|
|
50
|
+
#### Agent and tool metadata (v1.31.0+)
|
|
51
|
+
|
|
52
|
+
| Attribute | Description |
|
|
53
|
+
|-----------|-------------|
|
|
54
|
+
| `gen_ai.agent.id` | Unique agent identifier |
|
|
55
|
+
| `gen_ai.agent.description` | Agent description |
|
|
56
|
+
| `gen_ai.tool.description` | Tool description |
|
|
57
|
+
| `gen_ai.tool.type` | Tool type classification |
|
|
58
|
+
|
|
59
|
+
#### Opt-in attributes (v1.37.0+)
|
|
60
|
+
|
|
61
|
+
These may contain large payloads and are typically gated behind instrumentation flags:
|
|
62
|
+
|
|
63
|
+
| Attribute | Description |
|
|
64
|
+
|-----------|-------------|
|
|
65
|
+
| `gen_ai.system_instructions` | System prompt text |
|
|
66
|
+
| `gen_ai.tool.definitions` | Tool schema definitions (JSON) |
|
|
67
|
+
| `gen_ai.output.type` | Classification of output content |
|
|
24
68
|
|
|
25
69
|
### Google ADK (framework-native)
|
|
26
70
|
|
|
@@ -30,9 +74,33 @@ Google ADK emits spans under the `gcp.vertex.agent` OTel scope with proprietary
|
|
|
30
74
|
|
|
31
75
|
Format detection is automatic. When a trace contains both ADK and GenAI attributes, ADK takes priority because it provides richer structured data. The detection logic lives in `src/agentevals/converter.py` (`get_extractor()`).
|
|
32
76
|
|
|
77
|
+
## Message Formats
|
|
78
|
+
|
|
79
|
+
GenAI message content (`gen_ai.input.messages`, `gen_ai.output.messages`) can use two JSON schemas. agentevals supports both and normalizes them internally.
|
|
80
|
+
|
|
81
|
+
### Content-based format
|
|
82
|
+
|
|
83
|
+
Used by OpenAI and LangChain instrumentors (v2):
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{"role": "user", "content": "Hello"}
|
|
87
|
+
{"role": "assistant", "content": "...", "tool_calls": [{"type": "function", "function": {"name": "get_weather", "arguments": "{\"city\": \"NYC\"}"}}]}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Parts-based format (v1.36.0+)
|
|
91
|
+
|
|
92
|
+
Used by newer instrumentors that follow the GenAI semconv parts schema:
|
|
93
|
+
|
|
94
|
+
```json
|
|
95
|
+
{"role": "user", "parts": [{"type": "text", "content": "Hello"}]}
|
|
96
|
+
{"role": "assistant", "parts": [{"type": "tool_call", "name": "get_weather", "arguments": {"city": "NYC"}}]}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Both formats are auto-detected per message. Tool calls are normalized to `{name, id, arguments}` regardless of source format.
|
|
100
|
+
|
|
33
101
|
## Message Content Delivery
|
|
34
102
|
|
|
35
|
-
GenAI message content
|
|
103
|
+
GenAI message content can arrive through three mechanisms. agentevals supports all of them:
|
|
36
104
|
|
|
37
105
|
### 1. Span attributes (simplest)
|
|
38
106
|
|
|
@@ -80,18 +148,44 @@ If you maintain an OTel-instrumented agent framework and want to align with the
|
|
|
80
148
|
|
|
81
149
|
## OTLP Receiver
|
|
82
150
|
|
|
83
|
-
agentevals runs:
|
|
151
|
+
agentevals runs two OTLP receivers:
|
|
152
|
+
|
|
153
|
+
- **gRPC** on port 4317 (standard OTLP gRPC port, configurable via `--otlp-grpc-port`)
|
|
154
|
+
- **HTTP** on port 4318 (standard OTLP HTTP port)
|
|
84
155
|
|
|
85
|
-
|
|
86
|
-
- OTLP gRPC receiver on port 4317 (standard OTLP gRPC port).
|
|
156
|
+
Both accept traces and logs and feed into the same session manager.
|
|
87
157
|
|
|
88
|
-
OTLP HTTP
|
|
158
|
+
### OTLP HTTP
|
|
89
159
|
|
|
90
160
|
| Endpoint | Content Types |
|
|
91
161
|
|----------|--------------|
|
|
92
162
|
| `/v1/traces` | `application/json`, `application/x-protobuf` |
|
|
93
163
|
| `/v1/logs` | `application/json`, `application/x-protobuf` |
|
|
94
164
|
|
|
95
|
-
|
|
96
|
-
|
|
165
|
+
### OTLP gRPC
|
|
166
|
+
|
|
167
|
+
Implements the standard `TraceService/Export` and `LogsService/Export` RPCs. Configuration:
|
|
168
|
+
|
|
169
|
+
| Setting | Default |
|
|
170
|
+
|---------|---------|
|
|
171
|
+
| Max message size | 8 MB |
|
|
172
|
+
| Max concurrent RPCs | 32 |
|
|
173
|
+
| Compression | gzip |
|
|
174
|
+
| TLS | off (insecure) |
|
|
175
|
+
|
|
176
|
+
### Client configuration
|
|
177
|
+
|
|
178
|
+
For HTTP exporters:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
For gRPC exporters:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
|
|
188
|
+
export OTEL_EXPORTER_OTLP_PROTOCOL=grpc
|
|
189
|
+
```
|
|
190
|
+
|
|
97
191
|
Traces and logs stream into agentevals automatically. See [examples/README.md](../examples/README.md) for zero-code setup instructions.
|
|
@@ -29,6 +29,7 @@ agentevals accepts OTLP/HTTP on port 4318 (`http/protobuf` and `http/json`) and
|
|
|
29
29
|
| [zero-code-examples/ollama/](./zero-code-examples/ollama/) | LangChain | Ollama |
|
|
30
30
|
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI |
|
|
31
31
|
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini |
|
|
32
|
+
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI |
|
|
32
33
|
|
|
33
34
|
This approach works with any framework that has OTel instrumentation: LangChain, Strands, Google ADK, etc. If your framework already emits OTel spans, you only need to add `OTLPSpanExporter` (and `OTLPLogExporter` if it uses GenAI log-based content delivery).
|
|
34
35
|
|
|
@@ -103,6 +104,7 @@ Detection checks for `gen_ai.request.model` / `gen_ai.input.messages` (GenAI sem
|
|
|
103
104
|
| [zero-code-examples/ollama/](./zero-code-examples/ollama/) | LangChain | Ollama | GenAI semconv (logs) | Standard OTLP export |
|
|
104
105
|
| [zero-code-examples/strands/](./zero-code-examples/strands/) | Strands | OpenAI | GenAI semconv (events*) | Standard OTLP export |
|
|
105
106
|
| [zero-code-examples/adk/](./zero-code-examples/adk/) | Google ADK | Gemini | ADK built-in | Standard OTLP export |
|
|
107
|
+
| [zero-code-examples/pydantic-ai/](./zero-code-examples/pydantic-ai/) | Pydantic AI | OpenAI | GenAI semconv (span attrs) | Standard OTLP export |
|
|
106
108
|
| [langchain_agent](./langchain_agent/) | LangChain | OpenAI | GenAI semconv (logs) | SDK WebSocket |
|
|
107
109
|
| [strands_agent](./strands_agent/) | Strands | OpenAI | GenAI semconv (events*) | SDK WebSocket |
|
|
108
110
|
| [dice_agent](./dice_agent/) | Google ADK | Gemini | ADK built-in | SDK WebSocket |
|
|
@@ -217,6 +219,7 @@ python examples/zero-code-examples/langchain/run.py
|
|
|
217
219
|
python examples/zero-code-examples/ollama/run.py
|
|
218
220
|
python examples/zero-code-examples/strands/run.py
|
|
219
221
|
python examples/zero-code-examples/adk/run.py
|
|
222
|
+
python examples/zero-code-examples/pydantic-ai/run.py
|
|
220
223
|
|
|
221
224
|
# SDK examples:
|
|
222
225
|
python examples/sdk_example/context_manager_example.py
|
|
@@ -232,7 +235,7 @@ python examples/strands_agent/main.py
|
|
|
232
235
|
Traces stream to the dev server in real-time. Evaluation runs automatically when the session completes.
|
|
233
236
|
|
|
234
237
|
See each example's README for prerequisites and detailed instructions:
|
|
235
|
-
- [zero-code-examples/](./zero-code-examples/) (LangChain
|
|
238
|
+
- [zero-code-examples/](./zero-code-examples/) (LangChain, Strands, ADK, OpenAI Agents, Pydantic AI — standard OTLP)
|
|
236
239
|
- [dice_agent/README.md](./dice_agent/README.md) (Google ADK + Gemini)
|
|
237
240
|
- [langchain_agent/README.md](./langchain_agent/README.md) (LangChain + OpenAI, SDK)
|
|
238
241
|
- [strands_agent/](./strands_agent/) (Strands + OpenAI, SDK)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Run a dice-rolling Pydantic AI agent with OTLP export — no agentevals SDK.
|
|
2
|
+
|
|
3
|
+
Demonstrates zero-code integration: any OTel-instrumented agent streams
|
|
4
|
+
traces to agentevals by pointing the OTLP exporter at the receiver.
|
|
5
|
+
|
|
6
|
+
Pydantic AI has built-in OTel support via Agent.instrument_all(). By default
|
|
7
|
+
it uses version 2 of the GenAI semconv format, storing message content in span
|
|
8
|
+
attributes — only a TracerProvider is needed.
|
|
9
|
+
No separate instrumentation library is needed.
|
|
10
|
+
|
|
11
|
+
Prerequisites:
|
|
12
|
+
1. pip install -r requirements.txt
|
|
13
|
+
2. agentevals serve --dev
|
|
14
|
+
3. export OPENAI_API_KEY="your-key-here"
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python examples/zero-code-examples/pydantic-ai/run.py
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import random
|
|
22
|
+
|
|
23
|
+
from dotenv import load_dotenv
|
|
24
|
+
from opentelemetry import trace
|
|
25
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
26
|
+
from opentelemetry.sdk.resources import Resource
|
|
27
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
28
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
29
|
+
from pydantic_ai import Agent
|
|
30
|
+
|
|
31
|
+
load_dotenv(override=True)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def roll_die(sides: int) -> int:
|
|
35
|
+
"""Roll a die with the given number of sides and return the result."""
|
|
36
|
+
return random.randint(1, sides)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def check_prime(number: int) -> bool:
|
|
40
|
+
"""Return True if the number is prime, False otherwise."""
|
|
41
|
+
if number < 2:
|
|
42
|
+
return False
|
|
43
|
+
for i in range(2, int(number**0.5) + 1):
|
|
44
|
+
if number % i == 0:
|
|
45
|
+
return False
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main():
|
|
50
|
+
if not os.getenv("OPENAI_API_KEY"):
|
|
51
|
+
print("OPENAI_API_KEY not set.")
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
|
55
|
+
print(f"OTLP endpoint: {endpoint}")
|
|
56
|
+
|
|
57
|
+
os.environ.setdefault(
|
|
58
|
+
"OTEL_RESOURCE_ATTRIBUTES",
|
|
59
|
+
"agentevals.eval_set_id=pydantic_ai_eval,agentevals.session_name=pydantic-ai-zero-code",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
resource = Resource.create()
|
|
63
|
+
|
|
64
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
65
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))
|
|
66
|
+
trace.set_tracer_provider(tracer_provider)
|
|
67
|
+
|
|
68
|
+
# Enable Pydantic AI's built-in OTel instrumentation. This one call
|
|
69
|
+
# wires up all agents globally — no framework-specific instrumentor
|
|
70
|
+
# library (like opentelemetry-instrumentation-openai-v2) is needed.
|
|
71
|
+
Agent.instrument_all()
|
|
72
|
+
|
|
73
|
+
agent = Agent(
|
|
74
|
+
"openai:gpt-4o-mini",
|
|
75
|
+
instructions="You are a helpful assistant. You can roll dice and check if numbers are prime.",
|
|
76
|
+
)
|
|
77
|
+
agent.tool_plain(roll_die)
|
|
78
|
+
agent.tool_plain(check_prime)
|
|
79
|
+
|
|
80
|
+
test_queries = [
|
|
81
|
+
"Hi! Can you help me?",
|
|
82
|
+
"Roll a 20-sided die for me",
|
|
83
|
+
"Is the number you rolled prime?",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
message_history = []
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
for i, query in enumerate(test_queries, 1):
|
|
90
|
+
print(f"\n[{i}/{len(test_queries)}] User: {query}")
|
|
91
|
+
|
|
92
|
+
result = agent.run_sync(query, message_history=message_history)
|
|
93
|
+
|
|
94
|
+
print(f" Agent: {result.output}")
|
|
95
|
+
|
|
96
|
+
# Pass the full message history forward for multi-turn conversation.
|
|
97
|
+
message_history = result.all_messages()
|
|
98
|
+
finally:
|
|
99
|
+
print()
|
|
100
|
+
tracer_provider.force_flush()
|
|
101
|
+
print("All traces flushed to OTLP receiver.")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
main()
|
|
@@ -1,25 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"nodes": {
|
|
3
|
-
"devshell": {
|
|
4
|
-
"inputs": {
|
|
5
|
-
"nixpkgs": [
|
|
6
|
-
"nixpkgs"
|
|
7
|
-
]
|
|
8
|
-
},
|
|
9
|
-
"locked": {
|
|
10
|
-
"lastModified": 1768818222,
|
|
11
|
-
"narHash": "sha256-460jc0+CZfyaO8+w8JNtlClB2n4ui1RbHfPTLkpwhU8=",
|
|
12
|
-
"owner": "numtide",
|
|
13
|
-
"repo": "devshell",
|
|
14
|
-
"rev": "255a2b1725a20d060f566e4755dbf571bbbb5f76",
|
|
15
|
-
"type": "github"
|
|
16
|
-
},
|
|
17
|
-
"original": {
|
|
18
|
-
"owner": "numtide",
|
|
19
|
-
"repo": "devshell",
|
|
20
|
-
"type": "github"
|
|
21
|
-
}
|
|
22
|
-
},
|
|
23
3
|
"flake-utils": {
|
|
24
4
|
"inputs": {
|
|
25
5
|
"systems": "systems"
|
|
@@ -102,7 +82,6 @@
|
|
|
102
82
|
},
|
|
103
83
|
"root": {
|
|
104
84
|
"inputs": {
|
|
105
|
-
"devshell": "devshell",
|
|
106
85
|
"flake-utils": "flake-utils",
|
|
107
86
|
"nixpkgs": "nixpkgs",
|
|
108
87
|
"pyproject-build-systems": "pyproject-build-systems",
|