agentevals-cli 0.6.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. agentevals_cli-0.6.1/.dockerignore +16 -0
  2. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/release.yml +40 -0
  3. agentevals_cli-0.6.1/Dockerfile +38 -0
  4. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/Makefile +12 -1
  5. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/PKG-INFO +1 -1
  6. agentevals_cli-0.6.1/charts/agentevals/Chart.yaml +6 -0
  7. agentevals_cli-0.6.1/charts/agentevals/templates/NOTES.txt +12 -0
  8. agentevals_cli-0.6.1/charts/agentevals/templates/_helpers.tpl +57 -0
  9. agentevals_cli-0.6.1/charts/agentevals/templates/deployment.yaml +128 -0
  10. agentevals_cli-0.6.1/charts/agentevals/templates/service.yaml +24 -0
  11. agentevals_cli-0.6.1/charts/agentevals/templates/serviceaccount.yaml +14 -0
  12. agentevals_cli-0.6.1/charts/agentevals/values.yaml +153 -0
  13. agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/requirements.txt +6 -0
  14. agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/run.py +105 -0
  15. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/pyproject.toml +1 -1
  16. agentevals_cli-0.6.1/src/agentevals/_static/assets/index-lHPO8TkI.js +342 -0
  17. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/index.html +1 -1
  18. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/app.py +14 -18
  19. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/debug_routes.py +19 -25
  20. agentevals_cli-0.6.1/src/agentevals/api/dependencies.py +23 -0
  21. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/models.py +20 -0
  22. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_app.py +4 -4
  23. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_routes.py +34 -40
  24. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/routes.py +140 -0
  25. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/streaming_routes.py +67 -51
  26. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/cli.py +62 -7
  27. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/converter.py +35 -61
  28. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/extraction.py +25 -2
  29. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/genai_converter.py +37 -98
  30. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/mcp_server.py +3 -2
  31. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/conftest.py +8 -10
  32. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_live_agents.py +57 -0
  33. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_api.py +7 -15
  34. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_extraction.py +11 -0
  35. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_otlp_receiver.py +25 -49
  36. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/api/client.ts +29 -1
  37. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/TraceUploadZone.tsx +12 -12
  38. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceCard.tsx +11 -20
  39. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorHeader.tsx +11 -20
  40. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorView.tsx +10 -39
  41. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/TraceEditorDrawer.tsx +11 -14
  42. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/context/TraceProvider.tsx +23 -13
  43. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/evalset-builder.ts +10 -36
  44. agentevals_cli-0.6.1/ui/src/lib/trace-helpers.ts +73 -0
  45. agentevals_cli-0.6.1/ui/src/lib/trace-metadata.ts +12 -0
  46. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/trace-patcher.ts +1 -1
  47. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/types.ts +21 -0
  48. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/uv.lock +1 -1
  49. agentevals_cli-0.6.0/src/agentevals/_static/assets/index-Dz2NgC8m.js +0 -343
  50. agentevals_cli-0.6.0/ui/src/lib/trace-converter.ts +0 -734
  51. agentevals_cli-0.6.0/ui/src/lib/trace-metadata.ts +0 -391
  52. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/eval/SKILL.md +0 -0
  53. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/eval/evals/evals.json +0 -0
  54. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/inspect/SKILL.md +0 -0
  55. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.claude/skills/inspect/evals/evals.json +0 -0
  56. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  57. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  58. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  59. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/ci.yml +0 -0
  60. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.github/workflows/publish-evaluator-sdk.yml +0 -0
  61. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.gitignore +0 -0
  62. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/.mcp.json +0 -0
  63. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/CONTRIBUTING.md +0 -0
  64. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/DEVELOPMENT.md +0 -0
  65. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/LICENSE +0 -0
  66. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/README.md +0 -0
  67. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-color-on-transparent.svg +0 -0
  68. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-color.png +0 -0
  69. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/assets/logo-dark-on-transparent.svg +0 -0
  70. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/custom-evaluators.md +0 -0
  71. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/eval-set-format.md +0 -0
  72. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/otel-compatibility.md +0 -0
  73. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/docs/streaming.md +0 -0
  74. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/README.md +0 -0
  75. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/eval_config.yaml +0 -0
  76. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/response_quality.py +0 -0
  77. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/custom_evaluators/tool_call_checker.py +0 -0
  78. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/README.md +0 -0
  79. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/agent.py +0 -0
  80. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/eval_set.json +0 -0
  81. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/main.py +0 -0
  82. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/dice_agent/test_streaming.py +0 -0
  83. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/README.md +0 -0
  84. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/agent.py +0 -0
  85. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/eval_set.json +0 -0
  86. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/main.py +0 -0
  87. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/requirements.txt +0 -0
  88. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/langchain_agent/test_streaming.py +0 -0
  89. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/async_example.py +0 -0
  90. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/context_manager_example.py +0 -0
  91. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/decorator_example.py +0 -0
  92. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/sdk_example/requirements.txt +0 -0
  93. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/agent.py +0 -0
  94. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/eval_set.json +0 -0
  95. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/main.py +0 -0
  96. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/strands_agent/requirements.txt +0 -0
  97. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/requirements.txt +0 -0
  98. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/run.py +0 -0
  99. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/requirements.txt +0 -0
  100. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/run.py +0 -0
  101. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/requirements.txt +0 -0
  102. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/run.py +0 -0
  103. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/flake.lock +0 -0
  104. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/flake.nix +0 -0
  105. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/README.md +0 -0
  106. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/pyproject.toml +0 -0
  107. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
  108. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
  109. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
  110. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/eval_set_helm.json +0 -0
  111. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/evalset_helm_3_2026-02-23.json +0 -0
  112. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/evalset_k8s_2026-02-20.json +0 -0
  113. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm.json +0 -0
  114. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm_2.json +0 -0
  115. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/helm_3.json +0 -0
  116. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/samples/k8s.json +0 -0
  117. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/__init__.py +0 -0
  118. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_protocol.py +0 -0
  119. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
  120. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/logo.svg +0 -0
  121. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/_static/vite.svg +0 -0
  122. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/api/__init__.py +0 -0
  123. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/builtin_metrics.py +0 -0
  124. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/config.py +0 -0
  125. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/custom_evaluators.py +0 -0
  126. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/eval_config_loader.py +0 -0
  127. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/__init__.py +0 -0
  128. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/resolver.py +0 -0
  129. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/sources.py +0 -0
  130. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/templates.py +0 -0
  131. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/evaluator/venv.py +0 -0
  132. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/__init__.py +0 -0
  133. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/base.py +0 -0
  134. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/jaeger.py +0 -0
  135. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/loader/otlp.py +0 -0
  136. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/openai_eval_backend.py +0 -0
  137. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/output.py +0 -0
  138. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/runner.py +0 -0
  139. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/sdk.py +0 -0
  140. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/__init__.py +0 -0
  141. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/incremental_processor.py +0 -0
  142. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/processor.py +0 -0
  143. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/session.py +0 -0
  144. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/streaming/ws_server.py +0 -0
  145. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/trace_attrs.py +0 -0
  146. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/trace_metrics.py +0 -0
  147. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/__init__.py +0 -0
  148. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/genai_messages.py +0 -0
  149. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/log_buffer.py +0 -0
  150. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/src/agentevals/utils/log_enrichment.py +0 -0
  151. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/__init__.py +0 -0
  152. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_evaluation_pipeline.py +0 -0
  153. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_session_grouping.py +0 -0
  154. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/integration/test_timing_stress.py +0 -0
  155. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_converter.py +0 -0
  156. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_genai_converter.py +0 -0
  157. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_jaeger_loader.py +0 -0
  158. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_log_enrichment.py +0 -0
  159. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_otlp_loader.py +0 -0
  160. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_output.py +0 -0
  161. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_protocol.py +0 -0
  162. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_runner.py +0 -0
  163. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/tests/test_sdk.py +0 -0
  164. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/.gitignore +0 -0
  165. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/README.md +0 -0
  166. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/eslint.config.js +0 -0
  167. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/index.html +0 -0
  168. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/package-lock.json +0 -0
  169. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/package.json +0 -0
  170. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/public/logo.svg +0 -0
  171. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/public/vite.svg +0 -0
  172. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/App.css +0 -0
  173. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/App.tsx +0 -0
  174. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/assets/react.svg +0 -0
  175. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
  176. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
  177. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
  178. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
  179. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderHeader.tsx +0 -0
  180. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderView.tsx +0 -0
  181. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
  182. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCasesList.tsx +0 -0
  183. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/InvocationEditor.tsx +0 -0
  184. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/JsonPreview.tsx +0 -0
  185. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/MetadataEditor.tsx +0 -0
  186. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/builder/index.ts +0 -0
  187. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/DashboardView.tsx +0 -0
  188. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
  189. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
  190. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
  191. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
  192. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceTable.tsx +0 -0
  193. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
  194. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/DataSection.tsx +0 -0
  195. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
  196. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationCard.tsx +0 -0
  197. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
  198. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
  199. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
  200. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
  201. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/ToolCallList.tsx +0 -0
  202. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
  203. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/sidebar/Sidebar.tsx +0 -0
  204. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
  205. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveMessage.tsx +0 -0
  206. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
  207. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionCard.tsx +0 -0
  208. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
  209. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
  210. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/FileDropZone.tsx +0 -0
  211. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/MetricSelector.tsx +0 -0
  212. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
  213. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/upload/UploadView.tsx +0 -0
  214. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/components/welcome/WelcomeView.tsx +0 -0
  215. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/config.ts +0 -0
  216. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/context/TraceContext.tsx +0 -0
  217. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/index.css +0 -0
  218. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/console-capture.ts +0 -0
  219. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/network-capture.ts +0 -0
  220. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/trace-loader.ts +0 -0
  221. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/lib/utils.ts +0 -0
  222. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/src/main.tsx +0 -0
  223. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.app.json +0 -0
  224. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.json +0 -0
  225. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/tsconfig.node.json +0 -0
  226. {agentevals_cli-0.6.0 → agentevals_cli-0.6.1}/ui/vite.config.ts +0 -0
@@ -0,0 +1,16 @@
1
+ .venv
2
+ **/__pycache__
3
+ *.py[cod]
4
+ .git
5
+ .gitignore
6
+ tests
7
+ .pytest_cache
8
+ .ruff_cache
9
+ htmlcov
10
+ .coverage
11
+ ui/node_modules
12
+ dist
13
+ *.egg-info
14
+ **/*.egg-info
15
+ agents.md
16
+ .cursor
@@ -29,6 +29,9 @@ jobs:
29
29
  cache: npm
30
30
  cache-dependency-path: ui/package-lock.json
31
31
 
32
+ - name: Set version from tag
33
+ run: uv version "${{ github.event.inputs.tag || github.ref_name }}" --package agentevals-cli
34
+
32
35
  - name: Build core and bundled wheels
33
36
  run: make release
34
37
 
@@ -89,3 +92,40 @@ jobs:
89
92
  uv build --package agentevals-cli
90
93
  uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
91
94
  rm -rf src/agentevals/_static
95
+
96
+ push-docker:
97
+ runs-on: ubuntu-latest
98
+ permissions:
99
+ contents: read
100
+ packages: write
101
+ steps:
102
+ - uses: actions/checkout@v6
103
+
104
+ - name: Login to GitHub Container Registry
105
+ uses: docker/login-action@v4
106
+ with:
107
+ registry: ghcr.io
108
+ username: ${{ github.actor }}
109
+ password: ${{ secrets.GITHUB_TOKEN }}
110
+
111
+ - name: Set up QEMU
112
+ uses: docker/setup-qemu-action@v4
113
+
114
+ - name: Set up Docker Buildx
115
+ uses: docker/setup-buildx-action@v4
116
+
117
+ - name: Set appVersion in Chart.yaml
118
+ run: |
119
+ VERSION="${TAG#v}"
120
+ sed -i "s/^appVersion:.*/appVersion: \"$VERSION\"/" charts/agentevals/Chart.yaml
121
+ env:
122
+ TAG: ${{ github.event.inputs.tag || github.ref_name }}
123
+
124
+ - name: Build and push
125
+ run: |
126
+ VERSION="${TAG#v}"
127
+ make build-docker \
128
+ DOCKER_REGISTRY="ghcr.io/${{ github.repository_owner }}" \
129
+ DOCKER_TAG="$VERSION"
130
+ env:
131
+ TAG: ${{ github.event.inputs.tag || github.ref_name }}
@@ -0,0 +1,38 @@
1
+ # syntax=docker/dockerfile:1
2
+
3
+ FROM node:25-bookworm-slim AS ui
4
+ WORKDIR /build/ui
5
+ COPY ui/package.json ui/package-lock.json ./
6
+ # Skip lifecycle scripts during ci, then rebuild esbuild in its own layer — avoids ETXTBSY when
7
+ # install.js execs the binary while overlayfs still has the file busy (common with BuildKit).
8
+ RUN npm ci --ignore-scripts
9
+ RUN npm rebuild esbuild
10
+ COPY ui/ ./
11
+ RUN npm run build
12
+
13
+ FROM python:3.14-slim-bookworm
14
+
15
+ WORKDIR /app
16
+
17
+ # Install uv binary only (no pip); same approach as astral-sh/uv's Dockerfile.
18
+ # https://github.com/astral-sh/uv/blob/6d889fd53d5c108d304c5a4085eb3140ec6a9cdb/Dockerfile#L21
19
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
20
+
21
+ COPY pyproject.toml uv.lock README.md ./
22
+ COPY packages ./packages
23
+ COPY src ./src
24
+
25
+ COPY --from=ui /build/ui/dist ./src/agentevals/_static
26
+
27
+ RUN uv sync --frozen --no-dev --extra live \
28
+ && groupadd --gid 1000 app \
29
+ && useradd --uid 1000 --gid app --home-dir /app --no-log-init app \
30
+ && chown -R app:app /app
31
+
32
+ USER app
33
+ ENV PATH="/app/.venv/bin:$PATH"
34
+ ENV AGENTEVALS_SERVER_URL=http://127.0.0.1:8001
35
+
36
+ EXPOSE 8001 4318 8080
37
+
38
+ CMD ["agentevals", "serve", "--host", "0.0.0.0", "--port", "8001", "--otlp-port", "4318", "--mcp-port", "8080"]
@@ -1,11 +1,22 @@
1
1
  VERSION := $(shell grep '^version' pyproject.toml | cut -d'"' -f2)
2
2
  WHEEL := dist/agentevals_cli-$(VERSION)-py3-none-any.whl
3
3
 
4
- .PHONY: build build-bundle build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
4
+ DOCKER_REGISTRY ?= soloio
5
+ DOCKER_IMAGE ?= agentevals
6
+ DOCKER_TAG ?= $(VERSION)
7
+ DOCKER_IMAGE_REF := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY:%/=%)/$(DOCKER_IMAGE),$(DOCKER_IMAGE))
8
+
9
+ # Multi-arch build (requires docker buildx). Manifest lists must be pushed — use build-docker-local for a single-arch --load.
10
+ PLATFORMS ?= linux/amd64,linux/arm64
11
+
12
+ .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
5
13
 
6
14
  build:
7
15
  uv build
8
16
 
17
+ build-docker:
18
+ docker buildx build --platform $(PLATFORMS) -t $(DOCKER_IMAGE_REF):$(DOCKER_TAG) --push .
19
+
9
20
  build-ui:
10
21
  cd ui && npm ci && npm run build
11
22
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentevals-cli
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.11
@@ -0,0 +1,6 @@
1
+ apiVersion: v2
2
+ name: agentevals
3
+ description: agentevals web UI, OTLP HTTP receiver, and MCP (Streamable HTTP)
4
+ type: application
5
+ version: 0.1.0
6
+ appVersion: "0.5.2"
@@ -0,0 +1,12 @@
1
+ 1. UI and API are available at port {{ .Values.service.http.port }} (Service port name: http).
2
+ 2. OTLP HTTP receiver: port {{ .Values.service.otlpHttp.port }} (OTEL_EXPORTER_OTLP_ENDPOINT=http://<service>:{{ .Values.service.otlpHttp.port }}).
3
+ 3. MCP (Streamable HTTP): port {{ .Values.service.mcp.port }}, path /mcp (e.g. http://<service>:{{ .Values.service.mcp.port }}/mcp).
4
+ {{- if .Values.ephemeralVolume.enabled }}
5
+ 4. An emptyDir is mounted at /tmp with HOME=/tmp/agentevals-home (ephemeral; lost on pod restart). Set ephemeralVolume.enabled=false and readOnlyRootFilesystem=false if you need a writable root without this mount.
6
+ {{- end }}
7
+
8
+ Get the Service URL:
9
+ export POD_NAME=$(kubectl get pods --namespace {{ include "agentevals.namespace" . }} -l "app.kubernetes.io/name={{ include "agentevals.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
10
+ kubectl --namespace {{ include "agentevals.namespace" . }} port-forward $POD_NAME {{ .Values.service.http.port }}:{{ .Values.service.http.port }}
11
+
12
+ Health check: GET http://<pod-ip>:{{ .Values.service.http.containerPort }}/api/health
@@ -0,0 +1,57 @@
1
+ {{- define "agentevals.name" -}}
2
+ {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
3
+ {{- end }}
4
+
5
+ {{- define "agentevals.fullname" -}}
6
+ {{- if .Values.fullnameOverride }}
7
+ {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
8
+ {{- else }}
9
+ {{- $name := default .Chart.Name .Values.nameOverride }}
10
+ {{- if contains $name .Release.Name }}
11
+ {{- .Release.Name | trunc 63 | trimSuffix "-" }}
12
+ {{- else }}
13
+ {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
14
+ {{- end }}
15
+ {{- end }}
16
+ {{- end }}
17
+
18
+ {{- define "agentevals.namespace" -}}
19
+ {{- default .Release.Namespace .Values.namespaceOverride }}
20
+ {{- end }}
21
+
22
+ {{- define "agentevals.chart" -}}
23
+ {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
24
+ {{- end }}
25
+
26
+ {{- define "agentevals.image" -}}
27
+ {{- $registry := .Values.image.registry | default .Values.registry -}}
28
+ {{- $tag := .Values.image.tag | default .Values.tag | default .Chart.AppVersion -}}
29
+ {{- if $registry -}}
30
+ {{- printf "%s/%s:%s" $registry .Values.image.repository $tag -}}
31
+ {{- else -}}
32
+ {{- printf "%s:%s" .Values.image.repository $tag -}}
33
+ {{- end -}}
34
+ {{- end }}
35
+
36
+ {{- define "agentevals.labels" -}}
37
+ helm.sh/chart: {{ include "agentevals.chart" . }}
38
+ {{ include "agentevals.selectorLabels" . }}
39
+ {{- if .Chart.AppVersion }}
40
+ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41
+ {{- end }}
42
+ app.kubernetes.io/managed-by: {{ .Release.Service }}
43
+ app.kubernetes.io/part-of: agentevals
44
+ {{- end }}
45
+
46
+ {{- define "agentevals.selectorLabels" -}}
47
+ app.kubernetes.io/name: {{ include "agentevals.name" . }}
48
+ app.kubernetes.io/instance: {{ .Release.Name }}
49
+ {{- end }}
50
+
51
+ {{- define "agentevals.serviceAccountName" -}}
52
+ {{- if .Values.serviceAccount.create }}
53
+ {{- default (include "agentevals.fullname" .) .Values.serviceAccount.name }}
54
+ {{- else }}
55
+ {{- default "default" .Values.serviceAccount.name }}
56
+ {{- end }}
57
+ {{- end }}
@@ -0,0 +1,128 @@
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ name: {{ include "agentevals.fullname" . }}
5
+ namespace: {{ include "agentevals.namespace" . }}
6
+ labels:
7
+ {{- include "agentevals.labels" . | nindent 4 }}
8
+ spec:
9
+ replicas: {{ .Values.replicaCount }}
10
+ selector:
11
+ matchLabels:
12
+ {{- include "agentevals.selectorLabels" . | nindent 6 }}
13
+ template:
14
+ metadata:
15
+ {{- with .Values.podAnnotations }}
16
+ annotations:
17
+ {{- toYaml . | nindent 8 }}
18
+ {{- end }}
19
+ labels:
20
+ {{- include "agentevals.selectorLabels" . | nindent 8 }}
21
+ {{- with .Values.podLabels }}
22
+ {{- toYaml . | nindent 8 }}
23
+ {{- end }}
24
+ spec:
25
+ {{- with .Values.imagePullSecrets }}
26
+ imagePullSecrets:
27
+ {{- toYaml . | nindent 8 }}
28
+ {{- end }}
29
+ securityContext:
30
+ {{- toYaml .Values.podSecurityContext | nindent 8 }}
31
+ serviceAccountName: {{ include "agentevals.serviceAccountName" . }}
32
+ {{- if .Values.ephemeralVolume.enabled }}
33
+ volumes:
34
+ - name: agentevals-tmp
35
+ {{- if or .Values.ephemeralVolume.sizeLimit (eq .Values.ephemeralVolume.medium "Memory") }}
36
+ emptyDir:
37
+ {{- if eq .Values.ephemeralVolume.medium "Memory" }}
38
+ medium: Memory
39
+ {{- end }}
40
+ {{- with .Values.ephemeralVolume.sizeLimit }}
41
+ sizeLimit: {{ . }}
42
+ {{- end }}
43
+ {{- else }}
44
+ emptyDir: {}
45
+ {{- end }}
46
+ {{- end }}
47
+ containers:
48
+ - name: agentevals
49
+ image: {{ include "agentevals.image" . | quote }}
50
+ imagePullPolicy: {{ .Values.image.pullPolicy | default .Values.imagePullPolicy }}
51
+ {{- if .Values.command }}
52
+ command:
53
+ {{- toYaml .Values.command | nindent 12 }}
54
+ {{- end }}
55
+ {{- if .Values.args }}
56
+ args:
57
+ {{- toYaml .Values.args | nindent 12 }}
58
+ {{- end }}
59
+ env:
60
+ - name: AGENTEVALS_SERVER_URL
61
+ value: "http://127.0.0.1:{{ .Values.service.http.containerPort }}"
62
+ {{- if .Values.ephemeralVolume.enabled }}
63
+ - name: TMPDIR
64
+ value: "/tmp"
65
+ - name: HOME
66
+ value: "/tmp/agentevals-home"
67
+ {{- end }}
68
+ {{- with .Values.env }}
69
+ {{- toYaml . | nindent 12 }}
70
+ {{- end }}
71
+ {{- with .Values.envFrom }}
72
+ envFrom:
73
+ {{- toYaml . | nindent 12 }}
74
+ {{- end }}
75
+ ports:
76
+ - name: http
77
+ containerPort: {{ .Values.service.http.containerPort }}
78
+ protocol: TCP
79
+ - name: otlp-http
80
+ containerPort: {{ .Values.service.otlpHttp.containerPort }}
81
+ protocol: TCP
82
+ - name: mcp
83
+ containerPort: {{ .Values.service.mcp.containerPort }}
84
+ protocol: TCP
85
+ resources:
86
+ {{- toYaml .Values.resources | nindent 12 }}
87
+ securityContext:
88
+ {{- $sc := deepCopy .Values.securityContext }}
89
+ {{- if not .Values.ephemeralVolume.enabled }}
90
+ {{- $_ := set $sc "readOnlyRootFilesystem" false }}
91
+ {{- end }}
92
+ {{- toYaml $sc | nindent 12 }}
93
+ startupProbe:
94
+ httpGet:
95
+ path: /api/health
96
+ port: http
97
+ failureThreshold: 60
98
+ periodSeconds: 10
99
+ timeoutSeconds: 5
100
+ readinessProbe:
101
+ httpGet:
102
+ path: /api/health
103
+ port: http
104
+ initialDelaySeconds: 5
105
+ periodSeconds: 10
106
+ livenessProbe:
107
+ httpGet:
108
+ path: /api/health
109
+ port: http
110
+ initialDelaySeconds: 15
111
+ periodSeconds: 20
112
+ {{- if .Values.ephemeralVolume.enabled }}
113
+ volumeMounts:
114
+ - name: agentevals-tmp
115
+ mountPath: /tmp
116
+ {{- end }}
117
+ {{- with .Values.nodeSelector }}
118
+ nodeSelector:
119
+ {{- toYaml . | nindent 8 }}
120
+ {{- end }}
121
+ {{- with .Values.affinity }}
122
+ affinity:
123
+ {{- toYaml . | nindent 8 }}
124
+ {{- end }}
125
+ {{- with .Values.tolerations }}
126
+ tolerations:
127
+ {{- toYaml . | nindent 8 }}
128
+ {{- end }}
@@ -0,0 +1,24 @@
1
+ apiVersion: v1
2
+ kind: Service
3
+ metadata:
4
+ name: {{ include "agentevals.fullname" . }}
5
+ namespace: {{ include "agentevals.namespace" . }}
6
+ labels:
7
+ {{- include "agentevals.labels" . | nindent 4 }}
8
+ spec:
9
+ type: {{ .Values.service.type }}
10
+ ports:
11
+ - name: http
12
+ port: {{ .Values.service.http.port }}
13
+ targetPort: http
14
+ protocol: TCP
15
+ - name: otlp-http
16
+ port: {{ .Values.service.otlpHttp.port }}
17
+ targetPort: otlp-http
18
+ protocol: TCP
19
+ - name: mcp
20
+ port: {{ .Values.service.mcp.port }}
21
+ targetPort: mcp
22
+ protocol: TCP
23
+ selector:
24
+ {{- include "agentevals.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,14 @@
1
+ {{- if .Values.serviceAccount.create -}}
2
+ apiVersion: v1
3
+ kind: ServiceAccount
4
+ metadata:
5
+ name: {{ include "agentevals.serviceAccountName" . }}
6
+ namespace: {{ include "agentevals.namespace" . }}
7
+ labels:
8
+ {{- include "agentevals.labels" . | nindent 4 }}
9
+ {{- with .Values.serviceAccount.annotations }}
10
+ annotations:
11
+ {{- toYaml . | nindent 4 }}
12
+ {{- end }}
13
+ automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
14
+ {{- end }}
@@ -0,0 +1,153 @@
1
+ # ==============================================================================
2
+ # Global
3
+ # ==============================================================================
4
+
5
+ # -- Number of replicas. Only 1 is supported (no shared job state across pods).
6
+ replicaCount: 1
7
+
8
+ # -- Global container image registry (prepended to image.repository)
9
+ registry: ghcr.io
10
+
11
+ # -- Global image tag override (defaults to Chart.appVersion)
12
+ tag: ""
13
+
14
+ # -- Global image pull policy
15
+ imagePullPolicy: IfNotPresent
16
+
17
+ # -- Image pull secrets
18
+ imagePullSecrets: []
19
+
20
+ # -- Override the chart name
21
+ nameOverride: ""
22
+
23
+ # -- Override the full resource name
24
+ fullnameOverride: ""
25
+
26
+ # -- Override the release namespace
27
+ namespaceOverride: ""
28
+
29
+ # ==============================================================================
30
+ # Image
31
+ # ==============================================================================
32
+
33
+ image:
34
+ # -- Container image registry (overrides global registry)
35
+ registry: ""
36
+ # -- Container image repository (org/name, without registry prefix)
37
+ repository: agentevals-dev/agentevals
38
+ # -- Container image tag (defaults to global tag, then Chart.appVersion)
39
+ tag: ""
40
+ # -- Container image pull policy (defaults to global imagePullPolicy)
41
+ pullPolicy: ""
42
+
43
+ # ==============================================================================
44
+ # Service Account
45
+ # ==============================================================================
46
+
47
+ serviceAccount:
48
+ # -- Create a ServiceAccount
49
+ create: false
50
+ # -- Automount the service account token
51
+ automount: true
52
+ # -- ServiceAccount annotations
53
+ annotations: {}
54
+ # -- ServiceAccount name override
55
+ name: ""
56
+
57
+ # ==============================================================================
58
+ # Pod
59
+ # ==============================================================================
60
+
61
+ # -- Pod annotations
62
+ podAnnotations: {}
63
+
64
+ # -- Additional pod labels
65
+ podLabels: {}
66
+
67
+ # -- Pod security context
68
+ podSecurityContext:
69
+ fsGroup: 1000
70
+
71
+ # -- Container security context.
72
+ # When ephemeralVolume.enabled is true, emptyDir at /tmp keeps the root
73
+ # filesystem read-only safely. When ephemeralVolume.enabled is false the chart
74
+ # forces readOnlyRootFilesystem to false so /tmp stays writable.
75
+ securityContext:
76
+ allowPrivilegeEscalation: false
77
+ capabilities:
78
+ drop:
79
+ - ALL
80
+ readOnlyRootFilesystem: true
81
+ runAsNonRoot: true
82
+ runAsUser: 1000
83
+
84
+ # ==============================================================================
85
+ # Ephemeral Volume
86
+ # ==============================================================================
87
+
88
+ # -- Writable scratch space at /tmp (trace uploads, MCP temp files, streaming
89
+ # JSONL). HOME is set to /tmp/agentevals-home so Path.home()/.cache stays
90
+ # writable. When disabled the chart sets readOnlyRootFilesystem to false.
91
+ ephemeralVolume:
92
+ # -- Enable emptyDir mount at /tmp
93
+ enabled: true
94
+ # -- Size limit for the emptyDir (Kubernetes 1.22+), e.g. "2Gi"
95
+ sizeLimit: ""
96
+ # -- Use "Memory" for tmpfs (faster, counts against memory limits); leave "" for node disk
97
+ medium: ""
98
+
99
+ # ==============================================================================
100
+ # Service
101
+ # ==============================================================================
102
+
103
+ service:
104
+ # -- Service type
105
+ type: ClusterIP
106
+ # -- UI / API HTTP port
107
+ http:
108
+ port: 8001
109
+ containerPort: 8001
110
+ # -- OTLP HTTP receiver port
111
+ otlpHttp:
112
+ port: 4318
113
+ containerPort: 4318
114
+ # -- MCP (Streamable HTTP) port
115
+ mcp:
116
+ port: 8080
117
+ containerPort: 8080
118
+
119
+ # ==============================================================================
120
+ # Resources
121
+ # ==============================================================================
122
+
123
+ # -- Container resource requests and limits
124
+ resources: {}
125
+
126
+ # ==============================================================================
127
+ # Scheduling
128
+ # ==============================================================================
129
+
130
+ # -- Node selector
131
+ nodeSelector: {}
132
+
133
+ # -- Tolerations
134
+ tolerations: []
135
+
136
+ # -- Affinity rules
137
+ affinity: {}
138
+
139
+ # ==============================================================================
140
+ # Overrides
141
+ # ==============================================================================
142
+
143
+ # -- Override the image entrypoint
144
+ command: []
145
+
146
+ # -- Override the image arguments
147
+ args: []
148
+
149
+ # -- Extra environment variables appended to the container env block
150
+ env: []
151
+
152
+ # -- Extra envFrom sources (ConfigMapRef, SecretRef)
153
+ envFrom: []
@@ -0,0 +1,6 @@
1
+ openai-agents>=0.3.3
2
+ opentelemetry-instrumentation-openai-agents-v2>=0.1.0
3
+
4
+ opentelemetry-sdk>=1.36.0
5
+ opentelemetry-exporter-otlp-proto-http>=1.36.0
6
+ python-dotenv>=1.0.0
@@ -0,0 +1,105 @@
1
+ """Run a dice-rolling OpenAI Agents SDK agent with OTLP export — no agentevals SDK.
2
+
3
+ Demonstrates zero-code integration: any OTel-instrumented agent streams
4
+ traces to agentevals by pointing the OTLP exporter at the receiver.
5
+
6
+ Unlike the LangChain and Strands examples, this one is fully self-contained:
7
+ the agent code lives inline with no cross-folder imports.
8
+
9
+ Prerequisites:
10
+ 1. pip install -r requirements.txt
11
+ 2. agentevals serve --dev
12
+ 3. export OPENAI_API_KEY="your-key-here"
13
+
14
+ Usage:
15
+ python examples/zero-code-examples/openai-agents/run.py
16
+ """
17
+
18
+ import os
19
+ import random
20
+
21
+ from agents import Agent, Runner, function_tool
22
+ from dotenv import load_dotenv
23
+ from opentelemetry import trace
24
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
25
+ from opentelemetry.instrumentation.openai_agents import OpenAIAgentsInstrumentor
26
+ from opentelemetry.sdk.resources import Resource
27
+ from opentelemetry.sdk.trace import TracerProvider
28
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
29
+
30
+ load_dotenv(override=True)
31
+
32
+
33
+ @function_tool
34
+ def roll_die(sides: int) -> int:
35
+ """Roll a die with the given number of sides and return the result."""
36
+ return random.randint(1, sides)
37
+
38
+
39
+ @function_tool
40
+ def check_prime(number: int) -> bool:
41
+ """Return True if the number is prime, False otherwise."""
42
+ if number < 2:
43
+ return False
44
+ for i in range(2, int(number**0.5) + 1):
45
+ if number % i == 0:
46
+ return False
47
+ return True
48
+
49
+
50
+ def main():
51
+ if not os.getenv("OPENAI_API_KEY"):
52
+ print("OPENAI_API_KEY not set.")
53
+ return
54
+
55
+ endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
56
+ print(f"OTLP endpoint: {endpoint}")
57
+
58
+ os.environ.setdefault("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "span_and_event")
59
+
60
+ os.environ.setdefault(
61
+ "OTEL_RESOURCE_ATTRIBUTES",
62
+ "agentevals.eval_set_id=openai_agents_eval,agentevals.session_name=openai-agents-zero-code",
63
+ )
64
+
65
+ resource = Resource.create()
66
+
67
+ tracer_provider = TracerProvider(resource=resource)
68
+ tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))
69
+ trace.set_tracer_provider(tracer_provider)
70
+
71
+ OpenAIAgentsInstrumentor().instrument()
72
+
73
+ agent = Agent(
74
+ name="Dice Agent",
75
+ instructions="You are a helpful assistant. You can roll dice and check if numbers are prime.",
76
+ tools=[roll_die, check_prime],
77
+ )
78
+
79
+ test_queries = [
80
+ "Hi! Can you help me?",
81
+ "Roll a 20-sided die for me",
82
+ "Is the number you rolled prime?",
83
+ ]
84
+
85
+ conversation_input: list = []
86
+
87
+ try:
88
+ for i, query in enumerate(test_queries, 1):
89
+ print(f"\n[{i}/{len(test_queries)}] User: {query}")
90
+
91
+ conversation_input.append({"role": "user", "content": query})
92
+ result = Runner.run_sync(agent, conversation_input)
93
+
94
+ agent_response = result.final_output or ""
95
+ print(f" Agent: {agent_response}")
96
+
97
+ conversation_input = result.to_input_list()
98
+ finally:
99
+ print()
100
+ tracer_provider.force_flush()
101
+ print("All traces flushed to OTLP receiver.")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "agentevals-cli"
7
- version = "0.6.0"
7
+ version = "0.6.1"
8
8
  description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"