agentevals-cli 0.6.0__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentevals_cli-0.6.1/.dockerignore +16 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/release.yml +40 -0
- agentevals_cli-0.6.1/Dockerfile +38 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/Makefile +12 -1
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/PKG-INFO +1 -1
- agentevals_cli-0.6.1/charts/agentevals/Chart.yaml +6 -0
- agentevals_cli-0.6.1/charts/agentevals/templates/NOTES.txt +12 -0
- agentevals_cli-0.6.1/charts/agentevals/templates/_helpers.tpl +57 -0
- agentevals_cli-0.6.1/charts/agentevals/templates/deployment.yaml +128 -0
- agentevals_cli-0.6.1/charts/agentevals/templates/service.yaml +24 -0
- agentevals_cli-0.6.1/charts/agentevals/templates/serviceaccount.yaml +14 -0
- agentevals_cli-0.6.1/charts/agentevals/values.yaml +153 -0
- agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/requirements.txt +6 -0
- agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/run.py +105 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/pyproject.toml +1 -1
- agentevals_cli-0.6.1/src/agentevals/_static/assets/index-lHPO8TkI.js +342 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/index.html +1 -1
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/app.py +14 -18
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/debug_routes.py +19 -25
- agentevals_cli-0.6.1/src/agentevals/api/dependencies.py +23 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/models.py +20 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_app.py +4 -4
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_routes.py +34 -40
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/routes.py +140 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/streaming_routes.py +67 -51
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/cli.py +62 -7
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/converter.py +35 -61
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/extraction.py +25 -2
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/genai_converter.py +37 -98
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/mcp_server.py +3 -2
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/conftest.py +8 -10
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_live_agents.py +57 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_api.py +7 -15
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_extraction.py +11 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_otlp_receiver.py +25 -49
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/api/client.ts +29 -1
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/TraceUploadZone.tsx +12 -12
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceCard.tsx +11 -20
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorHeader.tsx +11 -20
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorView.tsx +10 -39
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/TraceEditorDrawer.tsx +11 -14
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/context/TraceProvider.tsx +23 -13
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/evalset-builder.ts +10 -36
- agentevals_cli-0.6.1/ui/src/lib/trace-helpers.ts +73 -0
- agentevals_cli-0.6.1/ui/src/lib/trace-metadata.ts +12 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/trace-patcher.ts +1 -1
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/types.ts +21 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/uv.lock +1 -1
- agentevals_cli-0.6.0/src/agentevals/_static/assets/index-Dz2NgC8m.js +0 -343
- agentevals_cli-0.6.0/ui/src/lib/trace-converter.ts +0 -734
- agentevals_cli-0.6.0/ui/src/lib/trace-metadata.ts +0 -391
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/eval/SKILL.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/eval/evals/evals.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/inspect/SKILL.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/inspect/evals/evals.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/ci.yml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/publish-evaluator-sdk.yml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.gitignore +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.mcp.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/CONTRIBUTING.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/DEVELOPMENT.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/LICENSE +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-color-on-transparent.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-color.png +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-dark-on-transparent.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/custom-evaluators.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/eval-set-format.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/otel-compatibility.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/streaming.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/eval_config.yaml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/response_quality.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/tool_call_checker.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/agent.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/main.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/test_streaming.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/agent.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/main.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/test_streaming.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/async_example.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/context_manager_example.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/decorator_example.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/agent.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/eval_set.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/main.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/run.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/run.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/requirements.txt +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/run.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/flake.lock +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/flake.nix +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/pyproject.toml +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/eval_set_helm.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/evalset_helm_3_2026-02-23.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/evalset_k8s_2026-02-20.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm_2.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm_3.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/k8s.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_protocol.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/logo.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/vite.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/builtin_metrics.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/config.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/custom_evaluators.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/eval_config_loader.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/resolver.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/sources.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/templates.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/venv.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/base.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/jaeger.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/otlp.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/openai_eval_backend.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/output.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/runner.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/sdk.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/incremental_processor.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/processor.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/session.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/ws_server.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/trace_attrs.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/trace_metrics.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/genai_messages.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/log_buffer.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/log_enrichment.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/__init__.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_evaluation_pipeline.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_session_grouping.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_timing_stress.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_converter.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_genai_converter.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_jaeger_loader.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_log_enrichment.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_otlp_loader.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_output.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_protocol.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_runner.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_sdk.py +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/.gitignore +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/README.md +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/eslint.config.js +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/index.html +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/package-lock.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/package.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/public/logo.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/public/vite.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/App.css +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/App.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/assets/react.svg +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderHeader.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCasesList.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/InvocationEditor.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/JsonPreview.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/MetadataEditor.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/index.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/DashboardView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceTable.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/DataSection.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationCard.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/ToolCallList.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/sidebar/Sidebar.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveMessage.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionCard.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/FileDropZone.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/MetricSelector.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/UploadView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/welcome/WelcomeView.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/config.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/context/TraceContext.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/index.css +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/console-capture.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/network-capture.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/trace-loader.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/utils.ts +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/main.tsx +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.app.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.node.json +0 -0
- {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/vite.config.ts +0 -0
|
@@ -29,6 +29,9 @@ jobs:
|
|
|
29
29
|
cache: npm
|
|
30
30
|
cache-dependency-path: ui/package-lock.json
|
|
31
31
|
|
|
32
|
+
- name: Set version from tag
|
|
33
|
+
run: uv version "${{ github.event.inputs.tag || github.ref_name }}" --package agentevals-cli
|
|
34
|
+
|
|
32
35
|
- name: Build core and bundled wheels
|
|
33
36
|
run: make release
|
|
34
37
|
|
|
@@ -89,3 +92,40 @@ jobs:
|
|
|
89
92
|
uv build --package agentevals-cli
|
|
90
93
|
uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
|
|
91
94
|
rm -rf src/agentevals/_static
|
|
95
|
+
|
|
96
|
+
push-docker:
|
|
97
|
+
runs-on: ubuntu-latest
|
|
98
|
+
permissions:
|
|
99
|
+
contents: read
|
|
100
|
+
packages: write
|
|
101
|
+
steps:
|
|
102
|
+
- uses: actions/checkout@v6
|
|
103
|
+
|
|
104
|
+
- name: Login to GitHub Container Registry
|
|
105
|
+
uses: docker/login-action@v4
|
|
106
|
+
with:
|
|
107
|
+
registry: ghcr.io
|
|
108
|
+
username: ${{ github.actor }}
|
|
109
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
110
|
+
|
|
111
|
+
- name: Set up QEMU
|
|
112
|
+
uses: docker/setup-qemu-action@v4
|
|
113
|
+
|
|
114
|
+
- name: Set up Docker Buildx
|
|
115
|
+
uses: docker/setup-buildx-action@v4
|
|
116
|
+
|
|
117
|
+
- name: Set appVersion in Chart.yaml
|
|
118
|
+
run: |
|
|
119
|
+
VERSION="${TAG#v}"
|
|
120
|
+
sed -i "s/^appVersion:.*/appVersion: \"$VERSION\"/" charts/agentevals/Chart.yaml
|
|
121
|
+
env:
|
|
122
|
+
TAG: ${{ github.event.inputs.tag || github.ref_name }}
|
|
123
|
+
|
|
124
|
+
- name: Build and push
|
|
125
|
+
run: |
|
|
126
|
+
VERSION="${TAG#v}"
|
|
127
|
+
make build-docker \
|
|
128
|
+
DOCKER_REGISTRY="ghcr.io/${{ github.repository_owner }}" \
|
|
129
|
+
DOCKER_TAG="$VERSION"
|
|
130
|
+
env:
|
|
131
|
+
TAG: ${{ github.event.inputs.tag || github.ref_name }}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
|
|
3
|
+
FROM node:25-bookworm-slim AS ui
|
|
4
|
+
WORKDIR /build/ui
|
|
5
|
+
COPY ui/package.json ui/package-lock.json ./
|
|
6
|
+
# Skip lifecycle scripts during ci, then rebuild esbuild in its own layer — avoids ETXTBSY when
|
|
7
|
+
# install.js execs the binary while overlayfs still has the file busy (common with BuildKit).
|
|
8
|
+
RUN npm ci --ignore-scripts
|
|
9
|
+
RUN npm rebuild esbuild
|
|
10
|
+
COPY ui/ ./
|
|
11
|
+
RUN npm run build
|
|
12
|
+
|
|
13
|
+
FROM python:3.14-slim-bookworm
|
|
14
|
+
|
|
15
|
+
WORKDIR /app
|
|
16
|
+
|
|
17
|
+
# Install uv binary only (no pip); same approach as astral-sh/uv's Dockerfile.
|
|
18
|
+
# https://github.com/astral-sh/uv/blob/6d889fd53d5c108d304c5a4085eb3140ec6a9cdb/Dockerfile#L21
|
|
19
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
20
|
+
|
|
21
|
+
COPY pyproject.toml uv.lock README.md ./
|
|
22
|
+
COPY packages ./packages
|
|
23
|
+
COPY src ./src
|
|
24
|
+
|
|
25
|
+
COPY --from=ui /build/ui/dist ./src/agentevals/_static
|
|
26
|
+
|
|
27
|
+
RUN uv sync --frozen --no-dev --extra live \
|
|
28
|
+
&& groupadd --gid 1000 app \
|
|
29
|
+
&& useradd --uid 1000 --gid app --home-dir /app --no-log-init app \
|
|
30
|
+
&& chown -R app:app /app
|
|
31
|
+
|
|
32
|
+
USER app
|
|
33
|
+
ENV PATH="/app/.venv/bin:$PATH"
|
|
34
|
+
ENV AGENTEVALS_SERVER_URL=http://127.0.0.1:8001
|
|
35
|
+
|
|
36
|
+
EXPOSE 8001 4318 8080
|
|
37
|
+
|
|
38
|
+
CMD ["agentevals", "serve", "--host", "0.0.0.0", "--port", "8001", "--otlp-port", "4318", "--mcp-port", "8080"]
|
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
VERSION := $(shell grep '^version' pyproject.toml | cut -d'"' -f2)
|
|
2
2
|
WHEEL := dist/agentevals_cli-$(VERSION)-py3-none-any.whl
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
DOCKER_REGISTRY ?= soloio
|
|
5
|
+
DOCKER_IMAGE ?= agentevals
|
|
6
|
+
DOCKER_TAG ?= $(VERSION)
|
|
7
|
+
DOCKER_IMAGE_REF := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY:%/=%)/$(DOCKER_IMAGE),$(DOCKER_IMAGE))
|
|
8
|
+
|
|
9
|
+
# Multi-arch build (requires docker buildx). Manifest lists must be pushed — use build-docker-local for a single-arch --load.
|
|
10
|
+
PLATFORMS ?= linux/amd64,linux/arm64
|
|
11
|
+
|
|
12
|
+
.PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
|
|
5
13
|
|
|
6
14
|
build:
|
|
7
15
|
uv build
|
|
8
16
|
|
|
17
|
+
build-docker:
|
|
18
|
+
docker buildx build --platform $(PLATFORMS) -t $(DOCKER_IMAGE_REF):$(DOCKER_TAG) --push .
|
|
19
|
+
|
|
9
20
|
build-ui:
|
|
10
21
|
cd ui && npm ci && npm run build
|
|
11
22
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
1. UI and API are available at port {{ .Values.service.http.port }} (Service port name: http).
|
|
2
|
+
2. OTLP HTTP receiver: port {{ .Values.service.otlpHttp.port }} (OTEL_EXPORTER_OTLP_ENDPOINT=http://<service>:{{ .Values.service.otlpHttp.port }}).
|
|
3
|
+
3. MCP (Streamable HTTP): port {{ .Values.service.mcp.port }}, path /mcp (e.g. http://<service>:{{ .Values.service.mcp.port }}/mcp).
|
|
4
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
5
|
+
4. An emptyDir is mounted at /tmp with HOME=/tmp/agentevals-home (ephemeral; lost on pod restart). Set ephemeralVolume.enabled=false and readOnlyRootFilesystem=false if you need a writable root without this mount.
|
|
6
|
+
{{- end }}
|
|
7
|
+
|
|
8
|
+
Get the Service URL:
|
|
9
|
+
export POD_NAME=$(kubectl get pods --namespace {{ include "agentevals.namespace" . }} -l "app.kubernetes.io/name={{ include "agentevals.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
|
10
|
+
kubectl --namespace {{ include "agentevals.namespace" . }} port-forward $POD_NAME {{ .Values.service.http.port }}:{{ .Values.service.http.port }}
|
|
11
|
+
|
|
12
|
+
Health check: GET http://<pod-ip>:{{ .Values.service.http.containerPort }}/api/health
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{{- define "agentevals.name" -}}
|
|
2
|
+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
3
|
+
{{- end }}
|
|
4
|
+
|
|
5
|
+
{{- define "agentevals.fullname" -}}
|
|
6
|
+
{{- if .Values.fullnameOverride }}
|
|
7
|
+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
8
|
+
{{- else }}
|
|
9
|
+
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
10
|
+
{{- if contains $name .Release.Name }}
|
|
11
|
+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
12
|
+
{{- else }}
|
|
13
|
+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
14
|
+
{{- end }}
|
|
15
|
+
{{- end }}
|
|
16
|
+
{{- end }}
|
|
17
|
+
|
|
18
|
+
{{- define "agentevals.namespace" -}}
|
|
19
|
+
{{- default .Release.Namespace .Values.namespaceOverride }}
|
|
20
|
+
{{- end }}
|
|
21
|
+
|
|
22
|
+
{{- define "agentevals.chart" -}}
|
|
23
|
+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
|
24
|
+
{{- end }}
|
|
25
|
+
|
|
26
|
+
{{- define "agentevals.image" -}}
|
|
27
|
+
{{- $registry := .Values.image.registry | default .Values.registry -}}
|
|
28
|
+
{{- $tag := .Values.image.tag | default .Values.tag | default .Chart.AppVersion -}}
|
|
29
|
+
{{- if $registry -}}
|
|
30
|
+
{{- printf "%s/%s:%s" $registry .Values.image.repository $tag -}}
|
|
31
|
+
{{- else -}}
|
|
32
|
+
{{- printf "%s:%s" .Values.image.repository $tag -}}
|
|
33
|
+
{{- end -}}
|
|
34
|
+
{{- end }}
|
|
35
|
+
|
|
36
|
+
{{- define "agentevals.labels" -}}
|
|
37
|
+
helm.sh/chart: {{ include "agentevals.chart" . }}
|
|
38
|
+
{{ include "agentevals.selectorLabels" . }}
|
|
39
|
+
{{- if .Chart.AppVersion }}
|
|
40
|
+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
41
|
+
{{- end }}
|
|
42
|
+
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
43
|
+
app.kubernetes.io/part-of: agentevals
|
|
44
|
+
{{- end }}
|
|
45
|
+
|
|
46
|
+
{{- define "agentevals.selectorLabels" -}}
|
|
47
|
+
app.kubernetes.io/name: {{ include "agentevals.name" . }}
|
|
48
|
+
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
49
|
+
{{- end }}
|
|
50
|
+
|
|
51
|
+
{{- define "agentevals.serviceAccountName" -}}
|
|
52
|
+
{{- if .Values.serviceAccount.create }}
|
|
53
|
+
{{- default (include "agentevals.fullname" .) .Values.serviceAccount.name }}
|
|
54
|
+
{{- else }}
|
|
55
|
+
{{- default "default" .Values.serviceAccount.name }}
|
|
56
|
+
{{- end }}
|
|
57
|
+
{{- end }}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
apiVersion: apps/v1
|
|
2
|
+
kind: Deployment
|
|
3
|
+
metadata:
|
|
4
|
+
name: {{ include "agentevals.fullname" . }}
|
|
5
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
6
|
+
labels:
|
|
7
|
+
{{- include "agentevals.labels" . | nindent 4 }}
|
|
8
|
+
spec:
|
|
9
|
+
replicas: {{ .Values.replicaCount }}
|
|
10
|
+
selector:
|
|
11
|
+
matchLabels:
|
|
12
|
+
{{- include "agentevals.selectorLabels" . | nindent 6 }}
|
|
13
|
+
template:
|
|
14
|
+
metadata:
|
|
15
|
+
{{- with .Values.podAnnotations }}
|
|
16
|
+
annotations:
|
|
17
|
+
{{- toYaml . | nindent 8 }}
|
|
18
|
+
{{- end }}
|
|
19
|
+
labels:
|
|
20
|
+
{{- include "agentevals.selectorLabels" . | nindent 8 }}
|
|
21
|
+
{{- with .Values.podLabels }}
|
|
22
|
+
{{- toYaml . | nindent 8 }}
|
|
23
|
+
{{- end }}
|
|
24
|
+
spec:
|
|
25
|
+
{{- with .Values.imagePullSecrets }}
|
|
26
|
+
imagePullSecrets:
|
|
27
|
+
{{- toYaml . | nindent 8 }}
|
|
28
|
+
{{- end }}
|
|
29
|
+
securityContext:
|
|
30
|
+
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
|
31
|
+
serviceAccountName: {{ include "agentevals.serviceAccountName" . }}
|
|
32
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
33
|
+
volumes:
|
|
34
|
+
- name: agentevals-tmp
|
|
35
|
+
{{- if or .Values.ephemeralVolume.sizeLimit (eq .Values.ephemeralVolume.medium "Memory") }}
|
|
36
|
+
emptyDir:
|
|
37
|
+
{{- if eq .Values.ephemeralVolume.medium "Memory" }}
|
|
38
|
+
medium: Memory
|
|
39
|
+
{{- end }}
|
|
40
|
+
{{- with .Values.ephemeralVolume.sizeLimit }}
|
|
41
|
+
sizeLimit: {{ . }}
|
|
42
|
+
{{- end }}
|
|
43
|
+
{{- else }}
|
|
44
|
+
emptyDir: {}
|
|
45
|
+
{{- end }}
|
|
46
|
+
{{- end }}
|
|
47
|
+
containers:
|
|
48
|
+
- name: agentevals
|
|
49
|
+
image: {{ include "agentevals.image" . | quote }}
|
|
50
|
+
imagePullPolicy: {{ .Values.image.pullPolicy | default .Values.imagePullPolicy }}
|
|
51
|
+
{{- if .Values.command }}
|
|
52
|
+
command:
|
|
53
|
+
{{- toYaml .Values.command | nindent 12 }}
|
|
54
|
+
{{- end }}
|
|
55
|
+
{{- if .Values.args }}
|
|
56
|
+
args:
|
|
57
|
+
{{- toYaml .Values.args | nindent 12 }}
|
|
58
|
+
{{- end }}
|
|
59
|
+
env:
|
|
60
|
+
- name: AGENTEVALS_SERVER_URL
|
|
61
|
+
value: "http://127.0.0.1:{{ .Values.service.http.containerPort }}"
|
|
62
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
63
|
+
- name: TMPDIR
|
|
64
|
+
value: "/tmp"
|
|
65
|
+
- name: HOME
|
|
66
|
+
value: "/tmp/agentevals-home"
|
|
67
|
+
{{- end }}
|
|
68
|
+
{{- with .Values.env }}
|
|
69
|
+
{{- toYaml . | nindent 12 }}
|
|
70
|
+
{{- end }}
|
|
71
|
+
{{- with .Values.envFrom }}
|
|
72
|
+
envFrom:
|
|
73
|
+
{{- toYaml . | nindent 12 }}
|
|
74
|
+
{{- end }}
|
|
75
|
+
ports:
|
|
76
|
+
- name: http
|
|
77
|
+
containerPort: {{ .Values.service.http.containerPort }}
|
|
78
|
+
protocol: TCP
|
|
79
|
+
- name: otlp-http
|
|
80
|
+
containerPort: {{ .Values.service.otlpHttp.containerPort }}
|
|
81
|
+
protocol: TCP
|
|
82
|
+
- name: mcp
|
|
83
|
+
containerPort: {{ .Values.service.mcp.containerPort }}
|
|
84
|
+
protocol: TCP
|
|
85
|
+
resources:
|
|
86
|
+
{{- toYaml .Values.resources | nindent 12 }}
|
|
87
|
+
securityContext:
|
|
88
|
+
{{- $sc := deepCopy .Values.securityContext }}
|
|
89
|
+
{{- if not .Values.ephemeralVolume.enabled }}
|
|
90
|
+
{{- $_ := set $sc "readOnlyRootFilesystem" false }}
|
|
91
|
+
{{- end }}
|
|
92
|
+
{{- toYaml $sc | nindent 12 }}
|
|
93
|
+
startupProbe:
|
|
94
|
+
httpGet:
|
|
95
|
+
path: /api/health
|
|
96
|
+
port: http
|
|
97
|
+
failureThreshold: 60
|
|
98
|
+
periodSeconds: 10
|
|
99
|
+
timeoutSeconds: 5
|
|
100
|
+
readinessProbe:
|
|
101
|
+
httpGet:
|
|
102
|
+
path: /api/health
|
|
103
|
+
port: http
|
|
104
|
+
initialDelaySeconds: 5
|
|
105
|
+
periodSeconds: 10
|
|
106
|
+
livenessProbe:
|
|
107
|
+
httpGet:
|
|
108
|
+
path: /api/health
|
|
109
|
+
port: http
|
|
110
|
+
initialDelaySeconds: 15
|
|
111
|
+
periodSeconds: 20
|
|
112
|
+
{{- if .Values.ephemeralVolume.enabled }}
|
|
113
|
+
volumeMounts:
|
|
114
|
+
- name: agentevals-tmp
|
|
115
|
+
mountPath: /tmp
|
|
116
|
+
{{- end }}
|
|
117
|
+
{{- with .Values.nodeSelector }}
|
|
118
|
+
nodeSelector:
|
|
119
|
+
{{- toYaml . | nindent 8 }}
|
|
120
|
+
{{- end }}
|
|
121
|
+
{{- with .Values.affinity }}
|
|
122
|
+
affinity:
|
|
123
|
+
{{- toYaml . | nindent 8 }}
|
|
124
|
+
{{- end }}
|
|
125
|
+
{{- with .Values.tolerations }}
|
|
126
|
+
tolerations:
|
|
127
|
+
{{- toYaml . | nindent 8 }}
|
|
128
|
+
{{- end }}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
apiVersion: v1
|
|
2
|
+
kind: Service
|
|
3
|
+
metadata:
|
|
4
|
+
name: {{ include "agentevals.fullname" . }}
|
|
5
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
6
|
+
labels:
|
|
7
|
+
{{- include "agentevals.labels" . | nindent 4 }}
|
|
8
|
+
spec:
|
|
9
|
+
type: {{ .Values.service.type }}
|
|
10
|
+
ports:
|
|
11
|
+
- name: http
|
|
12
|
+
port: {{ .Values.service.http.port }}
|
|
13
|
+
targetPort: http
|
|
14
|
+
protocol: TCP
|
|
15
|
+
- name: otlp-http
|
|
16
|
+
port: {{ .Values.service.otlpHttp.port }}
|
|
17
|
+
targetPort: otlp-http
|
|
18
|
+
protocol: TCP
|
|
19
|
+
- name: mcp
|
|
20
|
+
port: {{ .Values.service.mcp.port }}
|
|
21
|
+
targetPort: mcp
|
|
22
|
+
protocol: TCP
|
|
23
|
+
selector:
|
|
24
|
+
{{- include "agentevals.selectorLabels" . | nindent 4 }}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{{- if .Values.serviceAccount.create -}}
|
|
2
|
+
apiVersion: v1
|
|
3
|
+
kind: ServiceAccount
|
|
4
|
+
metadata:
|
|
5
|
+
name: {{ include "agentevals.serviceAccountName" . }}
|
|
6
|
+
namespace: {{ include "agentevals.namespace" . }}
|
|
7
|
+
labels:
|
|
8
|
+
{{- include "agentevals.labels" . | nindent 4 }}
|
|
9
|
+
{{- with .Values.serviceAccount.annotations }}
|
|
10
|
+
annotations:
|
|
11
|
+
{{- toYaml . | nindent 4 }}
|
|
12
|
+
{{- end }}
|
|
13
|
+
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
|
14
|
+
{{- end }}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Global
|
|
3
|
+
# ==============================================================================
|
|
4
|
+
|
|
5
|
+
# -- Number of replicas. Only 1 is supported (no shared job state across pods).
|
|
6
|
+
replicaCount: 1
|
|
7
|
+
|
|
8
|
+
# -- Global container image registry (prepended to image.repository)
|
|
9
|
+
registry: ghcr.io
|
|
10
|
+
|
|
11
|
+
# -- Global image tag override (defaults to Chart.appVersion)
|
|
12
|
+
tag: ""
|
|
13
|
+
|
|
14
|
+
# -- Global image pull policy
|
|
15
|
+
imagePullPolicy: IfNotPresent
|
|
16
|
+
|
|
17
|
+
# -- Image pull secrets
|
|
18
|
+
imagePullSecrets: []
|
|
19
|
+
|
|
20
|
+
# -- Override the chart name
|
|
21
|
+
nameOverride: ""
|
|
22
|
+
|
|
23
|
+
# -- Override the full resource name
|
|
24
|
+
fullnameOverride: ""
|
|
25
|
+
|
|
26
|
+
# -- Override the release namespace
|
|
27
|
+
namespaceOverride: ""
|
|
28
|
+
|
|
29
|
+
# ==============================================================================
|
|
30
|
+
# Image
|
|
31
|
+
# ==============================================================================
|
|
32
|
+
|
|
33
|
+
image:
|
|
34
|
+
# -- Container image registry (overrides global registry)
|
|
35
|
+
registry: ""
|
|
36
|
+
# -- Container image repository (org/name, without registry prefix)
|
|
37
|
+
repository: agentevals-dev/agentevals
|
|
38
|
+
# -- Container image tag (defaults to global tag, then Chart.appVersion)
|
|
39
|
+
tag: ""
|
|
40
|
+
# -- Container image pull policy (defaults to global imagePullPolicy)
|
|
41
|
+
pullPolicy: ""
|
|
42
|
+
|
|
43
|
+
# ==============================================================================
|
|
44
|
+
# Service Account
|
|
45
|
+
# ==============================================================================
|
|
46
|
+
|
|
47
|
+
serviceAccount:
|
|
48
|
+
# -- Create a ServiceAccount
|
|
49
|
+
create: false
|
|
50
|
+
# -- Automount the service account token
|
|
51
|
+
automount: true
|
|
52
|
+
# -- ServiceAccount annotations
|
|
53
|
+
annotations: {}
|
|
54
|
+
# -- ServiceAccount name override
|
|
55
|
+
name: ""
|
|
56
|
+
|
|
57
|
+
# ==============================================================================
|
|
58
|
+
# Pod
|
|
59
|
+
# ==============================================================================
|
|
60
|
+
|
|
61
|
+
# -- Pod annotations
|
|
62
|
+
podAnnotations: {}
|
|
63
|
+
|
|
64
|
+
# -- Additional pod labels
|
|
65
|
+
podLabels: {}
|
|
66
|
+
|
|
67
|
+
# -- Pod security context
|
|
68
|
+
podSecurityContext:
|
|
69
|
+
fsGroup: 1000
|
|
70
|
+
|
|
71
|
+
# -- Container security context.
|
|
72
|
+
# When ephemeralVolume.enabled is true, emptyDir at /tmp keeps the root
|
|
73
|
+
# filesystem read-only safely. When ephemeralVolume.enabled is false the chart
|
|
74
|
+
# forces readOnlyRootFilesystem to false so /tmp stays writable.
|
|
75
|
+
securityContext:
|
|
76
|
+
allowPrivilegeEscalation: false
|
|
77
|
+
capabilities:
|
|
78
|
+
drop:
|
|
79
|
+
- ALL
|
|
80
|
+
readOnlyRootFilesystem: true
|
|
81
|
+
runAsNonRoot: true
|
|
82
|
+
runAsUser: 1000
|
|
83
|
+
|
|
84
|
+
# ==============================================================================
|
|
85
|
+
# Ephemeral Volume
|
|
86
|
+
# ==============================================================================
|
|
87
|
+
|
|
88
|
+
# -- Writable scratch space at /tmp (trace uploads, MCP temp files, streaming
|
|
89
|
+
# JSONL). HOME is set to /tmp/agentevals-home so Path.home()/.cache stays
|
|
90
|
+
# writable. When disabled the chart sets readOnlyRootFilesystem to false.
|
|
91
|
+
ephemeralVolume:
|
|
92
|
+
# -- Enable emptyDir mount at /tmp
|
|
93
|
+
enabled: true
|
|
94
|
+
# -- Size limit for the emptyDir (Kubernetes 1.22+), e.g. "2Gi"
|
|
95
|
+
sizeLimit: ""
|
|
96
|
+
# -- Use "Memory" for tmpfs (faster, counts against memory limits); leave "" for node disk
|
|
97
|
+
medium: ""
|
|
98
|
+
|
|
99
|
+
# ==============================================================================
|
|
100
|
+
# Service
|
|
101
|
+
# ==============================================================================
|
|
102
|
+
|
|
103
|
+
service:
|
|
104
|
+
# -- Service type
|
|
105
|
+
type: ClusterIP
|
|
106
|
+
# -- UI / API HTTP port
|
|
107
|
+
http:
|
|
108
|
+
port: 8001
|
|
109
|
+
containerPort: 8001
|
|
110
|
+
# -- OTLP HTTP receiver port
|
|
111
|
+
otlpHttp:
|
|
112
|
+
port: 4318
|
|
113
|
+
containerPort: 4318
|
|
114
|
+
# -- MCP (Streamable HTTP) port
|
|
115
|
+
mcp:
|
|
116
|
+
port: 8080
|
|
117
|
+
containerPort: 8080
|
|
118
|
+
|
|
119
|
+
# ==============================================================================
|
|
120
|
+
# Resources
|
|
121
|
+
# ==============================================================================
|
|
122
|
+
|
|
123
|
+
# -- Container resource requests and limits
|
|
124
|
+
resources: {}
|
|
125
|
+
|
|
126
|
+
# ==============================================================================
|
|
127
|
+
# Scheduling
|
|
128
|
+
# ==============================================================================
|
|
129
|
+
|
|
130
|
+
# -- Node selector
|
|
131
|
+
nodeSelector: {}
|
|
132
|
+
|
|
133
|
+
# -- Tolerations
|
|
134
|
+
tolerations: []
|
|
135
|
+
|
|
136
|
+
# -- Affinity rules
|
|
137
|
+
affinity: {}
|
|
138
|
+
|
|
139
|
+
# ==============================================================================
|
|
140
|
+
# Overrides
|
|
141
|
+
# ==============================================================================
|
|
142
|
+
|
|
143
|
+
# -- Override the image entrypoint
|
|
144
|
+
command: []
|
|
145
|
+
|
|
146
|
+
# -- Override the image arguments
|
|
147
|
+
args: []
|
|
148
|
+
|
|
149
|
+
# -- Extra environment variables appended to the container env block
|
|
150
|
+
env: []
|
|
151
|
+
|
|
152
|
+
# -- Extra envFrom sources (ConfigMapRef, SecretRef)
|
|
153
|
+
envFrom: []
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Run a dice-rolling OpenAI Agents SDK agent with OTLP export — no agentevals SDK.
|
|
2
|
+
|
|
3
|
+
Demonstrates zero-code integration: any OTel-instrumented agent streams
|
|
4
|
+
traces to agentevals by pointing the OTLP exporter at the receiver.
|
|
5
|
+
|
|
6
|
+
Unlike the LangChain and Strands examples, this one is fully self-contained:
|
|
7
|
+
the agent code lives inline with no cross-folder imports.
|
|
8
|
+
|
|
9
|
+
Prerequisites:
|
|
10
|
+
1. pip install -r requirements.txt
|
|
11
|
+
2. agentevals serve --dev
|
|
12
|
+
3. export OPENAI_API_KEY="your-key-here"
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
python examples/zero-code-examples/openai-agents/run.py
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import random
|
|
20
|
+
|
|
21
|
+
from agents import Agent, Runner, function_tool
|
|
22
|
+
from dotenv import load_dotenv
|
|
23
|
+
from opentelemetry import trace
|
|
24
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
25
|
+
from opentelemetry.instrumentation.openai_agents import OpenAIAgentsInstrumentor
|
|
26
|
+
from opentelemetry.sdk.resources import Resource
|
|
27
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
28
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
29
|
+
|
|
30
|
+
load_dotenv(override=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@function_tool
|
|
34
|
+
def roll_die(sides: int) -> int:
|
|
35
|
+
"""Roll a die with the given number of sides and return the result."""
|
|
36
|
+
return random.randint(1, sides)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@function_tool
|
|
40
|
+
def check_prime(number: int) -> bool:
|
|
41
|
+
"""Return True if the number is prime, False otherwise."""
|
|
42
|
+
if number < 2:
|
|
43
|
+
return False
|
|
44
|
+
for i in range(2, int(number**0.5) + 1):
|
|
45
|
+
if number % i == 0:
|
|
46
|
+
return False
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def main():
|
|
51
|
+
if not os.getenv("OPENAI_API_KEY"):
|
|
52
|
+
print("OPENAI_API_KEY not set.")
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
|
56
|
+
print(f"OTLP endpoint: {endpoint}")
|
|
57
|
+
|
|
58
|
+
os.environ.setdefault("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "span_and_event")
|
|
59
|
+
|
|
60
|
+
os.environ.setdefault(
|
|
61
|
+
"OTEL_RESOURCE_ATTRIBUTES",
|
|
62
|
+
"agentevals.eval_set_id=openai_agents_eval,agentevals.session_name=openai-agents-zero-code",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
resource = Resource.create()
|
|
66
|
+
|
|
67
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
68
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))
|
|
69
|
+
trace.set_tracer_provider(tracer_provider)
|
|
70
|
+
|
|
71
|
+
OpenAIAgentsInstrumentor().instrument()
|
|
72
|
+
|
|
73
|
+
agent = Agent(
|
|
74
|
+
name="Dice Agent",
|
|
75
|
+
instructions="You are a helpful assistant. You can roll dice and check if numbers are prime.",
|
|
76
|
+
tools=[roll_die, check_prime],
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
test_queries = [
|
|
80
|
+
"Hi! Can you help me?",
|
|
81
|
+
"Roll a 20-sided die for me",
|
|
82
|
+
"Is the number you rolled prime?",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
conversation_input: list = []
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
for i, query in enumerate(test_queries, 1):
|
|
89
|
+
print(f"\n[{i}/{len(test_queries)}] User: {query}")
|
|
90
|
+
|
|
91
|
+
conversation_input.append({"role": "user", "content": query})
|
|
92
|
+
result = Runner.run_sync(agent, conversation_input)
|
|
93
|
+
|
|
94
|
+
agent_response = result.final_output or ""
|
|
95
|
+
print(f" Agent: {agent_response}")
|
|
96
|
+
|
|
97
|
+
conversation_input = result.to_input_list()
|
|
98
|
+
finally:
|
|
99
|
+
print()
|
|
100
|
+
tracer_provider.force_flush()
|
|
101
|
+
print("All traces flushed to OTLP receiver.")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
main()
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agentevals-cli"
|
|
7
|
-
version = "0.6.
|
|
7
|
+
version = "0.6.1"
|
|
8
8
|
description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|