agentevals-cli 0.5.3__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. agentevals_cli-0.6.1/.dockerignore +16 -0
  2. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/workflows/release.yml +40 -0
  3. agentevals_cli-0.6.1/Dockerfile +38 -0
  4. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/Makefile +12 -1
  5. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/PKG-INFO +59 -5
  6. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/README.md +56 -4
  7. agentevals_cli-0.6.1/charts/agentevals/Chart.yaml +6 -0
  8. agentevals_cli-0.6.1/charts/agentevals/templates/NOTES.txt +12 -0
  9. agentevals_cli-0.6.1/charts/agentevals/templates/_helpers.tpl +57 -0
  10. agentevals_cli-0.6.1/charts/agentevals/templates/deployment.yaml +128 -0
  11. agentevals_cli-0.6.1/charts/agentevals/templates/service.yaml +24 -0
  12. agentevals_cli-0.6.1/charts/agentevals/templates/serviceaccount.yaml +14 -0
  13. agentevals_cli-0.6.1/charts/agentevals/values.yaml +153 -0
  14. agentevals_cli-0.6.1/docs/assets/logo-color-on-transparent.svg +13 -0
  15. agentevals_cli-0.6.1/docs/assets/logo-dark-on-transparent.svg +13 -0
  16. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/docs/custom-evaluators.md +82 -35
  17. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/custom_evaluators/eval_config.yaml +0 -1
  18. agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/requirements.txt +6 -0
  19. agentevals_cli-0.6.1/examples/zero-code-examples/openai-agents/run.py +105 -0
  20. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/pyproject.toml +4 -1
  21. agentevals_cli-0.6.1/src/agentevals/_static/assets/index-lHPO8TkI.js +342 -0
  22. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/_static/index.html +1 -1
  23. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/app.py +14 -18
  24. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/debug_routes.py +19 -25
  25. agentevals_cli-0.6.1/src/agentevals/api/dependencies.py +23 -0
  26. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/models.py +20 -0
  27. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_app.py +4 -4
  28. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/otlp_routes.py +34 -40
  29. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/routes.py +142 -0
  30. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/streaming_routes.py +67 -51
  31. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/cli.py +62 -7
  32. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/config.py +41 -1
  33. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/converter.py +35 -61
  34. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/custom_evaluators.py +45 -11
  35. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/eval_config_loader.py +3 -1
  36. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/evaluator/sources.py +23 -3
  37. agentevals_cli-0.6.1/src/agentevals/evaluator/venv.py +119 -0
  38. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/extraction.py +25 -2
  39. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/genai_converter.py +37 -98
  40. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/mcp_server.py +3 -2
  41. agentevals_cli-0.6.1/src/agentevals/openai_eval_backend.py +246 -0
  42. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/output.py +21 -4
  43. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/runner.py +6 -0
  44. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/conftest.py +8 -10
  45. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/test_live_agents.py +57 -0
  46. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_api.py +7 -15
  47. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_extraction.py +11 -0
  48. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_otlp_receiver.py +25 -49
  49. agentevals_cli-0.6.1/tests/test_output.py +112 -0
  50. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_runner.py +4 -0
  51. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/api/client.ts +29 -1
  52. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/TraceUploadZone.tsx +12 -12
  53. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceCard.tsx +11 -20
  54. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorHeader.tsx +11 -20
  55. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorView.tsx +10 -39
  56. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/TraceEditorDrawer.tsx +11 -14
  57. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/context/TraceProvider.tsx +23 -13
  58. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/evalset-builder.ts +10 -36
  59. agentevals_cli-0.6.1/ui/src/lib/trace-helpers.ts +73 -0
  60. agentevals_cli-0.6.1/ui/src/lib/trace-metadata.ts +12 -0
  61. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/trace-patcher.ts +1 -1
  62. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/types.ts +21 -0
  63. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/uv.lock +6 -2
  64. agentevals_cli-0.5.3/src/agentevals/_static/assets/index-Dz2NgC8m.js +0 -343
  65. agentevals_cli-0.5.3/ui/src/lib/trace-converter.ts +0 -734
  66. agentevals_cli-0.5.3/ui/src/lib/trace-metadata.ts +0 -391
  67. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.claude/skills/eval/SKILL.md +0 -0
  68. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.claude/skills/eval/evals/evals.json +0 -0
  69. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.claude/skills/inspect/SKILL.md +0 -0
  70. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.claude/skills/inspect/evals/evals.json +0 -0
  71. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  72. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  73. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  74. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/workflows/ci.yml +0 -0
  75. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.github/workflows/publish-evaluator-sdk.yml +0 -0
  76. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.gitignore +0 -0
  77. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/.mcp.json +0 -0
  78. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/CONTRIBUTING.md +0 -0
  79. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/DEVELOPMENT.md +0 -0
  80. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/LICENSE +0 -0
  81. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/docs/assets/logo-color.png +0 -0
  82. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/docs/eval-set-format.md +0 -0
  83. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/docs/otel-compatibility.md +0 -0
  84. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/docs/streaming.md +0 -0
  85. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/README.md +0 -0
  86. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/custom_evaluators/response_quality.py +0 -0
  87. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/custom_evaluators/tool_call_checker.py +0 -0
  88. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/dice_agent/README.md +0 -0
  89. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/dice_agent/agent.py +0 -0
  90. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/dice_agent/eval_set.json +0 -0
  91. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/dice_agent/main.py +0 -0
  92. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/dice_agent/test_streaming.py +0 -0
  93. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/README.md +0 -0
  94. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/agent.py +0 -0
  95. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/eval_set.json +0 -0
  96. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/main.py +0 -0
  97. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/requirements.txt +0 -0
  98. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/langchain_agent/test_streaming.py +0 -0
  99. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/sdk_example/async_example.py +0 -0
  100. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/sdk_example/context_manager_example.py +0 -0
  101. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/sdk_example/decorator_example.py +0 -0
  102. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/sdk_example/requirements.txt +0 -0
  103. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/strands_agent/agent.py +0 -0
  104. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/strands_agent/eval_set.json +0 -0
  105. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/strands_agent/main.py +0 -0
  106. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/strands_agent/requirements.txt +0 -0
  107. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/requirements.txt +0 -0
  108. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/adk/run.py +0 -0
  109. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/requirements.txt +0 -0
  110. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/langchain/run.py +0 -0
  111. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/requirements.txt +0 -0
  112. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/examples/zero-code-examples/strands/run.py +0 -0
  113. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/flake.lock +0 -0
  114. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/flake.nix +0 -0
  115. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/README.md +0 -0
  116. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/pyproject.toml +0 -0
  117. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
  118. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
  119. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
  120. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/eval_set_helm.json +0 -0
  121. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/evalset_helm_3_2026-02-23.json +0 -0
  122. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/evalset_k8s_2026-02-20.json +0 -0
  123. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/helm.json +0 -0
  124. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/helm_2.json +0 -0
  125. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/helm_3.json +0 -0
  126. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/samples/k8s.json +0 -0
  127. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/__init__.py +0 -0
  128. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/_protocol.py +0 -0
  129. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
  130. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/_static/logo.svg +0 -0
  131. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/_static/vite.svg +0 -0
  132. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/api/__init__.py +0 -0
  133. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/builtin_metrics.py +0 -0
  134. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/evaluator/__init__.py +0 -0
  135. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/evaluator/resolver.py +0 -0
  136. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/evaluator/templates.py +0 -0
  137. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/loader/__init__.py +0 -0
  138. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/loader/base.py +0 -0
  139. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/loader/jaeger.py +0 -0
  140. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/loader/otlp.py +0 -0
  141. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/sdk.py +0 -0
  142. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/streaming/__init__.py +0 -0
  143. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/streaming/incremental_processor.py +0 -0
  144. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/streaming/processor.py +0 -0
  145. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/streaming/session.py +0 -0
  146. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/streaming/ws_server.py +0 -0
  147. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/trace_attrs.py +0 -0
  148. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/trace_metrics.py +0 -0
  149. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/utils/__init__.py +0 -0
  150. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/utils/genai_messages.py +0 -0
  151. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/utils/log_buffer.py +0 -0
  152. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/src/agentevals/utils/log_enrichment.py +0 -0
  153. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/__init__.py +0 -0
  154. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/test_evaluation_pipeline.py +0 -0
  155. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/test_session_grouping.py +0 -0
  156. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/integration/test_timing_stress.py +0 -0
  157. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_converter.py +0 -0
  158. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_genai_converter.py +0 -0
  159. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_jaeger_loader.py +0 -0
  160. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_log_enrichment.py +0 -0
  161. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_otlp_loader.py +0 -0
  162. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_protocol.py +0 -0
  163. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/tests/test_sdk.py +0 -0
  164. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/.gitignore +0 -0
  165. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/README.md +0 -0
  166. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/eslint.config.js +0 -0
  167. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/index.html +0 -0
  168. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/package-lock.json +0 -0
  169. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/package.json +0 -0
  170. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/public/logo.svg +0 -0
  171. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/public/vite.svg +0 -0
  172. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/App.css +0 -0
  173. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/App.tsx +0 -0
  174. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/assets/react.svg +0 -0
  175. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
  176. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
  177. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
  178. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
  179. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderHeader.tsx +0 -0
  180. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/BuilderView.tsx +0 -0
  181. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
  182. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/EvalCasesList.tsx +0 -0
  183. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/InvocationEditor.tsx +0 -0
  184. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/JsonPreview.tsx +0 -0
  185. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/MetadataEditor.tsx +0 -0
  186. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/builder/index.ts +0 -0
  187. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/DashboardView.tsx +0 -0
  188. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
  189. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
  190. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
  191. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
  192. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/dashboard/TraceTable.tsx +0 -0
  193. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
  194. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/DataSection.tsx +0 -0
  195. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
  196. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationCard.tsx +0 -0
  197. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
  198. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
  199. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
  200. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
  201. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/ToolCallList.tsx +0 -0
  202. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
  203. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/sidebar/Sidebar.tsx +0 -0
  204. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
  205. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveMessage.tsx +0 -0
  206. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
  207. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionCard.tsx +0 -0
  208. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
  209. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
  210. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/FileDropZone.tsx +0 -0
  211. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/MetricSelector.tsx +0 -0
  212. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
  213. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/upload/UploadView.tsx +0 -0
  214. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/components/welcome/WelcomeView.tsx +0 -0
  215. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/config.ts +0 -0
  216. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/context/TraceContext.tsx +0 -0
  217. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/index.css +0 -0
  218. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/console-capture.ts +0 -0
  219. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/network-capture.ts +0 -0
  220. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/trace-loader.ts +0 -0
  221. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/lib/utils.ts +0 -0
  222. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/src/main.tsx +0 -0
  223. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/tsconfig.app.json +0 -0
  224. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/tsconfig.json +0 -0
  225. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/tsconfig.node.json +0 -0
  226. {agentevals_cli-0.5.3 → agentevals_cli-0.6.1}/ui/vite.config.ts +0 -0
@@ -0,0 +1,16 @@
1
+ .venv
2
+ **/__pycache__
3
+ *.py[cod]
4
+ .git
5
+ .gitignore
6
+ tests
7
+ .pytest_cache
8
+ .ruff_cache
9
+ htmlcov
10
+ .coverage
11
+ ui/node_modules
12
+ dist
13
+ *.egg-info
14
+ **/*.egg-info
15
+ agents.md
16
+ .cursor
@@ -29,6 +29,9 @@ jobs:
29
29
  cache: npm
30
30
  cache-dependency-path: ui/package-lock.json
31
31
 
32
+ - name: Set version from tag
33
+ run: uv version "${{ github.event.inputs.tag || github.ref_name }}" --package agentevals-cli
34
+
32
35
  - name: Build core and bundled wheels
33
36
  run: make release
34
37
 
@@ -89,3 +92,40 @@ jobs:
89
92
  uv build --package agentevals-cli
90
93
  uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
91
94
  rm -rf src/agentevals/_static
95
+
96
+ push-docker:
97
+ runs-on: ubuntu-latest
98
+ permissions:
99
+ contents: read
100
+ packages: write
101
+ steps:
102
+ - uses: actions/checkout@v6
103
+
104
+ - name: Login to GitHub Container Registry
105
+ uses: docker/login-action@v4
106
+ with:
107
+ registry: ghcr.io
108
+ username: ${{ github.actor }}
109
+ password: ${{ secrets.GITHUB_TOKEN }}
110
+
111
+ - name: Set up QEMU
112
+ uses: docker/setup-qemu-action@v4
113
+
114
+ - name: Set up Docker Buildx
115
+ uses: docker/setup-buildx-action@v4
116
+
117
+ - name: Set appVersion in Chart.yaml
118
+ run: |
119
+ VERSION="${TAG#v}"
120
+ sed -i "s/^appVersion:.*/appVersion: \"$VERSION\"/" charts/agentevals/Chart.yaml
121
+ env:
122
+ TAG: ${{ github.event.inputs.tag || github.ref_name }}
123
+
124
+ - name: Build and push
125
+ run: |
126
+ VERSION="${TAG#v}"
127
+ make build-docker \
128
+ DOCKER_REGISTRY="ghcr.io/${{ github.repository_owner }}" \
129
+ DOCKER_TAG="$VERSION"
130
+ env:
131
+ TAG: ${{ github.event.inputs.tag || github.ref_name }}
@@ -0,0 +1,38 @@
1
+ # syntax=docker/dockerfile:1
2
+
3
+ FROM node:25-bookworm-slim AS ui
4
+ WORKDIR /build/ui
5
+ COPY ui/package.json ui/package-lock.json ./
6
+ # Skip lifecycle scripts during ci, then rebuild esbuild in its own layer — avoids ETXTBSY when
7
+ # install.js execs the binary while overlayfs still has the file busy (common with BuildKit).
8
+ RUN npm ci --ignore-scripts
9
+ RUN npm rebuild esbuild
10
+ COPY ui/ ./
11
+ RUN npm run build
12
+
13
+ FROM python:3.14-slim-bookworm
14
+
15
+ WORKDIR /app
16
+
17
+ # Install uv binary only (no pip); same approach as astral-sh/uv's Dockerfile.
18
+ # https://github.com/astral-sh/uv/blob/6d889fd53d5c108d304c5a4085eb3140ec6a9cdb/Dockerfile#L21
19
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
20
+
21
+ COPY pyproject.toml uv.lock README.md ./
22
+ COPY packages ./packages
23
+ COPY src ./src
24
+
25
+ COPY --from=ui /build/ui/dist ./src/agentevals/_static
26
+
27
+ RUN uv sync --frozen --no-dev --extra live \
28
+ && groupadd --gid 1000 app \
29
+ && useradd --uid 1000 --gid app --home-dir /app --no-log-init app \
30
+ && chown -R app:app /app
31
+
32
+ USER app
33
+ ENV PATH="/app/.venv/bin:$PATH"
34
+ ENV AGENTEVALS_SERVER_URL=http://127.0.0.1:8001
35
+
36
+ EXPOSE 8001 4318 8080
37
+
38
+ CMD ["agentevals", "serve", "--host", "0.0.0.0", "--port", "8001", "--otlp-port", "4318", "--mcp-port", "8080"]
@@ -1,11 +1,22 @@
1
1
  VERSION := $(shell grep '^version' pyproject.toml | cut -d'"' -f2)
2
2
  WHEEL := dist/agentevals_cli-$(VERSION)-py3-none-any.whl
3
3
 
4
- .PHONY: build build-bundle build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
4
+ DOCKER_REGISTRY ?= soloio
5
+ DOCKER_IMAGE ?= agentevals
6
+ DOCKER_TAG ?= $(VERSION)
7
+ DOCKER_IMAGE_REF := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY:%/=%)/$(DOCKER_IMAGE),$(DOCKER_IMAGE))
8
+
9
+ # Multi-arch build (requires docker buildx). Manifest lists must be pushed — use build-docker-local for a single-arch --load.
10
+ PLATFORMS ?= linux/amd64,linux/arm64
11
+
12
+ .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
5
13
 
6
14
  build:
7
15
  uv build
8
16
 
17
+ build-docker:
18
+ docker buildx build --platform $(PLATFORMS) -t $(DOCKER_IMAGE_REF):$(DOCKER_TAG) --push .
19
+
9
20
  build-ui:
10
21
  cd ui && npm ci && npm run build
11
22
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentevals-cli
3
- Version: 0.5.3
3
+ Version: 0.6.1
4
4
  Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.11
@@ -17,23 +17,76 @@ Requires-Dist: uvicorn[standard]>=0.32.0
17
17
  Provides-Extra: live
18
18
  Requires-Dist: httpx>=0.27.0; extra == 'live'
19
19
  Requires-Dist: mcp>=1.26.0; extra == 'live'
20
+ Provides-Extra: openai
21
+ Requires-Dist: openai>=2.0; extra == 'openai'
20
22
  Provides-Extra: streaming
21
23
  Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'streaming'
22
24
  Requires-Dist: websockets>=12.0; extra == 'streaming'
23
25
  Description-Content-Type: text/markdown
24
26
 
25
27
  <p align="center">
26
- <img src="docs/assets/logo-color.png" alt="agentevals" width="420" />
28
+ <picture>
29
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
30
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
31
+ <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
32
+ </picture>
27
33
  </p>
28
34
 
29
- `agentevals` evaluates AI agent behavior from OpenTelemetry traces, without re-running the agent. Record once, score as many times as you want.
35
+ <h1 align="center">Ship Agents Reliably</h1>
30
36
 
31
- Works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others). Supports Jaeger JSON and OTLP trace formats, built-in and custom evaluators, and LLM-based judges.
37
+ <p align="center">
38
+ Benchmark your agents before they hit production.<br>
39
+ agentevals scores performance and inference quality from OpenTelemetry traces — no re-runs, no guesswork.
40
+ </p>
41
+
42
+ <p align="center">
43
+ <a href="https://github.com/agentevals-dev/agentevals/stargazers"><img src="https://img.shields.io/github/stars/agentevals-dev/agentevals?style=social" alt="GitHub Stars"></a>
44
+ &nbsp;
45
+ <a href="https://discord.gg/cpveEn8Ah2"><img src="https://img.shields.io/discord/1435836734666707190?label=Discord&logo=discord&logoColor=white&color=5865F2" alt="Discord"></a>
46
+ &nbsp;
47
+ <a href="https://github.com/agentevals-dev/agentevals/releases"><img src="https://img.shields.io/github/v/release/agentevals-dev/agentevals?label=Release" alt="Release"></a>
48
+ &nbsp;
49
+ <a href="https://github.com/agentevals-dev/agentevals/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-green.svg" alt="License"></a>
50
+ &nbsp;
51
+ <a href="https://pypi.org/project/agentevals-cli/"><img src="https://img.shields.io/pypi/v/agentevals-cli?label=PyPI&color=blue" alt="PyPI"></a>
52
+ </p>
53
+
54
+ <p align="center">
55
+ <a href="#installation">Install</a> · <a href="#quick-start">Quick Start</a> · <a href="https://github.com/agentevals-dev/agentevals/releases">Releases</a> · <a href="CONTRIBUTING.md">Contributing</a> · <a href="https://discord.gg/cpveEn8Ah2">Discord</a>
56
+ </p>
57
+
58
+ ---
59
+
60
+ ## What is agentevals?
61
+
62
+ agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want — no re-runs, no guesswork.
63
+
64
+ It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others), supports Jaeger JSON and OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
32
65
 
33
66
  - **CLI** for scripting and CI pipelines
34
67
  - **Web UI** for visual inspection and local developer experience
35
68
  - **MCP server** so MCP clients can run evaluations from a conversation
36
69
 
70
+ ## Why agentevals?
71
+
72
+ Most evaluation tools require you to **re-execute your agent** for every test — burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
73
+
74
+ - **No re-execution** — score agents from existing traces without replaying expensive LLM calls
75
+ - **Framework-agnostic** — works with any agent framework that emits OpenTelemetry spans
76
+ - **Golden eval sets** — compare actual behavior against defined expected behaviors for deterministic pass/fail gating
77
+ - **Custom evaluators** — write scoring logic in Python, JavaScript, or any language
78
+ - **CI/CD ready** — gate deployments on quality thresholds directly in your pipeline
79
+ - **Local-first** — no cloud dependency required; everything runs on your machine
80
+
81
+ ## How It Works
82
+
83
+ agentevals follows three simple steps:
84
+
85
+ 1. **Collect traces** — Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
86
+ 2. **Define eval sets** — Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
87
+ 3. **Run evaluations** — Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
88
+
89
+
37
90
  > [!IMPORTANT]
38
91
  > This project is under active development. Expect breaking changes.
39
92
 
@@ -64,6 +117,7 @@ Optional extras:
64
117
 
65
118
  ```bash
66
119
  pip install "agentevals-cli[live]" # MCP server support
120
+ pip install "agentevals-cli[openai]" # OpenAI Evals API graders
67
121
  ```
68
122
 
69
123
  **GitHub [releases](../../releases)** also ship **core** wheels (CLI and API only) and **bundle** wheels (with the embedded UI) if you need a specific version or offline `pip install ./path/to.whl`.
@@ -188,7 +242,7 @@ evaluators:
188
242
  agentevals run trace.json --config eval_config.yaml --eval-set eval_set.json
189
243
  ```
190
244
 
191
- Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
245
+ Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. You can also delegate grading to the [OpenAI Evals API](https://developers.openai.com/api/reference/resources/evals/methods/create) using `type: openai_eval` (requires `pip install "agentevals-cli[openai]"` and `OPENAI_API_KEY`). See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
192
246
 
193
247
  ## Web UI
194
248
 
@@ -1,15 +1,66 @@
1
1
  <p align="center">
2
- <img src="docs/assets/logo-color.png" alt="agentevals" width="420" />
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
5
+ <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
6
+ </picture>
3
7
  </p>
4
8
 
5
- `agentevals` evaluates AI agent behavior from OpenTelemetry traces, without re-running the agent. Record once, score as many times as you want.
9
+ <h1 align="center">Ship Agents Reliably</h1>
6
10
 
7
- Works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others). Supports Jaeger JSON and OTLP trace formats, built-in and custom evaluators, and LLM-based judges.
11
+ <p align="center">
12
+ Benchmark your agents before they hit production.<br>
13
+ agentevals scores performance and inference quality from OpenTelemetry traces — no re-runs, no guesswork.
14
+ </p>
15
+
16
+ <p align="center">
17
+ <a href="https://github.com/agentevals-dev/agentevals/stargazers"><img src="https://img.shields.io/github/stars/agentevals-dev/agentevals?style=social" alt="GitHub Stars"></a>
18
+ &nbsp;
19
+ <a href="https://discord.gg/cpveEn8Ah2"><img src="https://img.shields.io/discord/1435836734666707190?label=Discord&logo=discord&logoColor=white&color=5865F2" alt="Discord"></a>
20
+ &nbsp;
21
+ <a href="https://github.com/agentevals-dev/agentevals/releases"><img src="https://img.shields.io/github/v/release/agentevals-dev/agentevals?label=Release" alt="Release"></a>
22
+ &nbsp;
23
+ <a href="https://github.com/agentevals-dev/agentevals/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-green.svg" alt="License"></a>
24
+ &nbsp;
25
+ <a href="https://pypi.org/project/agentevals-cli/"><img src="https://img.shields.io/pypi/v/agentevals-cli?label=PyPI&color=blue" alt="PyPI"></a>
26
+ </p>
27
+
28
+ <p align="center">
29
+ <a href="#installation">Install</a> · <a href="#quick-start">Quick Start</a> · <a href="https://github.com/agentevals-dev/agentevals/releases">Releases</a> · <a href="CONTRIBUTING.md">Contributing</a> · <a href="https://discord.gg/cpveEn8Ah2">Discord</a>
30
+ </p>
31
+
32
+ ---
33
+
34
+ ## What is agentevals?
35
+
36
+ agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want — no re-runs, no guesswork.
37
+
38
+ It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others), supports Jaeger JSON and OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
8
39
 
9
40
  - **CLI** for scripting and CI pipelines
10
41
  - **Web UI** for visual inspection and local developer experience
11
42
  - **MCP server** so MCP clients can run evaluations from a conversation
12
43
 
44
+ ## Why agentevals?
45
+
46
+ Most evaluation tools require you to **re-execute your agent** for every test — burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
47
+
48
+ - **No re-execution** — score agents from existing traces without replaying expensive LLM calls
49
+ - **Framework-agnostic** — works with any agent framework that emits OpenTelemetry spans
50
+ - **Golden eval sets** — compare actual behavior against defined expected behaviors for deterministic pass/fail gating
51
+ - **Custom evaluators** — write scoring logic in Python, JavaScript, or any language
52
+ - **CI/CD ready** — gate deployments on quality thresholds directly in your pipeline
53
+ - **Local-first** — no cloud dependency required; everything runs on your machine
54
+
55
+ ## How It Works
56
+
57
+ agentevals follows three simple steps:
58
+
59
+ 1. **Collect traces** — Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
60
+ 2. **Define eval sets** — Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
61
+ 3. **Run evaluations** — Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
62
+
63
+
13
64
  > [!IMPORTANT]
14
65
  > This project is under active development. Expect breaking changes.
15
66
 
@@ -40,6 +91,7 @@ Optional extras:
40
91
 
41
92
  ```bash
42
93
  pip install "agentevals-cli[live]" # MCP server support
94
+ pip install "agentevals-cli[openai]" # OpenAI Evals API graders
43
95
  ```
44
96
 
45
97
  **GitHub [releases](../../releases)** also ship **core** wheels (CLI and API only) and **bundle** wheels (with the embedded UI) if you need a specific version or offline `pip install ./path/to.whl`.
@@ -164,7 +216,7 @@ evaluators:
164
216
  agentevals run trace.json --config eval_config.yaml --eval-set eval_set.json
165
217
  ```
166
218
 
167
- Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
219
+ Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. You can also delegate grading to the [OpenAI Evals API](https://developers.openai.com/api/reference/resources/evals/methods/create) using `type: openai_eval` (requires `pip install "agentevals-cli[openai]"` and `OPENAI_API_KEY`). See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
168
220
 
169
221
  ## Web UI
170
222
 
@@ -0,0 +1,6 @@
1
+ apiVersion: v2
2
+ name: agentevals
3
+ description: agentevals web UI, OTLP HTTP receiver, and MCP (Streamable HTTP)
4
+ type: application
5
+ version: 0.1.0
6
+ appVersion: "0.5.2"
@@ -0,0 +1,12 @@
1
+ 1. UI and API are available at port {{ .Values.service.http.port }} (Service port name: http).
2
+ 2. OTLP HTTP receiver: port {{ .Values.service.otlpHttp.port }} (OTEL_EXPORTER_OTLP_ENDPOINT=http://<service>:{{ .Values.service.otlpHttp.port }}).
3
+ 3. MCP (Streamable HTTP): port {{ .Values.service.mcp.port }}, path /mcp (e.g. http://<service>:{{ .Values.service.mcp.port }}/mcp).
4
+ {{- if .Values.ephemeralVolume.enabled }}
5
+ 4. An emptyDir is mounted at /tmp with HOME=/tmp/agentevals-home (ephemeral; lost on pod restart). Set ephemeralVolume.enabled=false and readOnlyRootFilesystem=false if you need a writable root without this mount.
6
+ {{- end }}
7
+
8
+ Get the Service URL:
9
+ export POD_NAME=$(kubectl get pods --namespace {{ include "agentevals.namespace" . }} -l "app.kubernetes.io/name={{ include "agentevals.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
10
+ kubectl --namespace {{ include "agentevals.namespace" . }} port-forward $POD_NAME {{ .Values.service.http.port }}:{{ .Values.service.http.port }}
11
+
12
+ Health check: GET http://<pod-ip>:{{ .Values.service.http.containerPort }}/api/health
@@ -0,0 +1,57 @@
1
+ {{- define "agentevals.name" -}}
2
+ {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
3
+ {{- end }}
4
+
5
+ {{- define "agentevals.fullname" -}}
6
+ {{- if .Values.fullnameOverride }}
7
+ {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
8
+ {{- else }}
9
+ {{- $name := default .Chart.Name .Values.nameOverride }}
10
+ {{- if contains $name .Release.Name }}
11
+ {{- .Release.Name | trunc 63 | trimSuffix "-" }}
12
+ {{- else }}
13
+ {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
14
+ {{- end }}
15
+ {{- end }}
16
+ {{- end }}
17
+
18
+ {{- define "agentevals.namespace" -}}
19
+ {{- default .Release.Namespace .Values.namespaceOverride }}
20
+ {{- end }}
21
+
22
+ {{- define "agentevals.chart" -}}
23
+ {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
24
+ {{- end }}
25
+
26
+ {{- define "agentevals.image" -}}
27
+ {{- $registry := .Values.image.registry | default .Values.registry -}}
28
+ {{- $tag := .Values.image.tag | default .Values.tag | default .Chart.AppVersion -}}
29
+ {{- if $registry -}}
30
+ {{- printf "%s/%s:%s" $registry .Values.image.repository $tag -}}
31
+ {{- else -}}
32
+ {{- printf "%s:%s" .Values.image.repository $tag -}}
33
+ {{- end -}}
34
+ {{- end }}
35
+
36
+ {{- define "agentevals.labels" -}}
37
+ helm.sh/chart: {{ include "agentevals.chart" . }}
38
+ {{ include "agentevals.selectorLabels" . }}
39
+ {{- if .Chart.AppVersion }}
40
+ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41
+ {{- end }}
42
+ app.kubernetes.io/managed-by: {{ .Release.Service }}
43
+ app.kubernetes.io/part-of: agentevals
44
+ {{- end }}
45
+
46
+ {{- define "agentevals.selectorLabels" -}}
47
+ app.kubernetes.io/name: {{ include "agentevals.name" . }}
48
+ app.kubernetes.io/instance: {{ .Release.Name }}
49
+ {{- end }}
50
+
51
+ {{- define "agentevals.serviceAccountName" -}}
52
+ {{- if .Values.serviceAccount.create }}
53
+ {{- default (include "agentevals.fullname" .) .Values.serviceAccount.name }}
54
+ {{- else }}
55
+ {{- default "default" .Values.serviceAccount.name }}
56
+ {{- end }}
57
+ {{- end }}
@@ -0,0 +1,128 @@
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ name: {{ include "agentevals.fullname" . }}
5
+ namespace: {{ include "agentevals.namespace" . }}
6
+ labels:
7
+ {{- include "agentevals.labels" . | nindent 4 }}
8
+ spec:
9
+ replicas: {{ .Values.replicaCount }}
10
+ selector:
11
+ matchLabels:
12
+ {{- include "agentevals.selectorLabels" . | nindent 6 }}
13
+ template:
14
+ metadata:
15
+ {{- with .Values.podAnnotations }}
16
+ annotations:
17
+ {{- toYaml . | nindent 8 }}
18
+ {{- end }}
19
+ labels:
20
+ {{- include "agentevals.selectorLabels" . | nindent 8 }}
21
+ {{- with .Values.podLabels }}
22
+ {{- toYaml . | nindent 8 }}
23
+ {{- end }}
24
+ spec:
25
+ {{- with .Values.imagePullSecrets }}
26
+ imagePullSecrets:
27
+ {{- toYaml . | nindent 8 }}
28
+ {{- end }}
29
+ securityContext:
30
+ {{- toYaml .Values.podSecurityContext | nindent 8 }}
31
+ serviceAccountName: {{ include "agentevals.serviceAccountName" . }}
32
+ {{- if .Values.ephemeralVolume.enabled }}
33
+ volumes:
34
+ - name: agentevals-tmp
35
+ {{- if or .Values.ephemeralVolume.sizeLimit (eq .Values.ephemeralVolume.medium "Memory") }}
36
+ emptyDir:
37
+ {{- if eq .Values.ephemeralVolume.medium "Memory" }}
38
+ medium: Memory
39
+ {{- end }}
40
+ {{- with .Values.ephemeralVolume.sizeLimit }}
41
+ sizeLimit: {{ . }}
42
+ {{- end }}
43
+ {{- else }}
44
+ emptyDir: {}
45
+ {{- end }}
46
+ {{- end }}
47
+ containers:
48
+ - name: agentevals
49
+ image: {{ include "agentevals.image" . | quote }}
50
+ imagePullPolicy: {{ .Values.image.pullPolicy | default .Values.imagePullPolicy }}
51
+ {{- if .Values.command }}
52
+ command:
53
+ {{- toYaml .Values.command | nindent 12 }}
54
+ {{- end }}
55
+ {{- if .Values.args }}
56
+ args:
57
+ {{- toYaml .Values.args | nindent 12 }}
58
+ {{- end }}
59
+ env:
60
+ - name: AGENTEVALS_SERVER_URL
61
+ value: "http://127.0.0.1:{{ .Values.service.http.containerPort }}"
62
+ {{- if .Values.ephemeralVolume.enabled }}
63
+ - name: TMPDIR
64
+ value: "/tmp"
65
+ - name: HOME
66
+ value: "/tmp/agentevals-home"
67
+ {{- end }}
68
+ {{- with .Values.env }}
69
+ {{- toYaml . | nindent 12 }}
70
+ {{- end }}
71
+ {{- with .Values.envFrom }}
72
+ envFrom:
73
+ {{- toYaml . | nindent 12 }}
74
+ {{- end }}
75
+ ports:
76
+ - name: http
77
+ containerPort: {{ .Values.service.http.containerPort }}
78
+ protocol: TCP
79
+ - name: otlp-http
80
+ containerPort: {{ .Values.service.otlpHttp.containerPort }}
81
+ protocol: TCP
82
+ - name: mcp
83
+ containerPort: {{ .Values.service.mcp.containerPort }}
84
+ protocol: TCP
85
+ resources:
86
+ {{- toYaml .Values.resources | nindent 12 }}
87
+ securityContext:
88
+ {{- $sc := deepCopy .Values.securityContext }}
89
+ {{- if not .Values.ephemeralVolume.enabled }}
90
+ {{- $_ := set $sc "readOnlyRootFilesystem" false }}
91
+ {{- end }}
92
+ {{- toYaml $sc | nindent 12 }}
93
+ startupProbe:
94
+ httpGet:
95
+ path: /api/health
96
+ port: http
97
+ failureThreshold: 60
98
+ periodSeconds: 10
99
+ timeoutSeconds: 5
100
+ readinessProbe:
101
+ httpGet:
102
+ path: /api/health
103
+ port: http
104
+ initialDelaySeconds: 5
105
+ periodSeconds: 10
106
+ livenessProbe:
107
+ httpGet:
108
+ path: /api/health
109
+ port: http
110
+ initialDelaySeconds: 15
111
+ periodSeconds: 20
112
+ {{- if .Values.ephemeralVolume.enabled }}
113
+ volumeMounts:
114
+ - name: agentevals-tmp
115
+ mountPath: /tmp
116
+ {{- end }}
117
+ {{- with .Values.nodeSelector }}
118
+ nodeSelector:
119
+ {{- toYaml . | nindent 8 }}
120
+ {{- end }}
121
+ {{- with .Values.affinity }}
122
+ affinity:
123
+ {{- toYaml . | nindent 8 }}
124
+ {{- end }}
125
+ {{- with .Values.tolerations }}
126
+ tolerations:
127
+ {{- toYaml . | nindent 8 }}
128
+ {{- end }}
@@ -0,0 +1,24 @@
1
+ apiVersion: v1
2
+ kind: Service
3
+ metadata:
4
+ name: {{ include "agentevals.fullname" . }}
5
+ namespace: {{ include "agentevals.namespace" . }}
6
+ labels:
7
+ {{- include "agentevals.labels" . | nindent 4 }}
8
+ spec:
9
+ type: {{ .Values.service.type }}
10
+ ports:
11
+ - name: http
12
+ port: {{ .Values.service.http.port }}
13
+ targetPort: http
14
+ protocol: TCP
15
+ - name: otlp-http
16
+ port: {{ .Values.service.otlpHttp.port }}
17
+ targetPort: otlp-http
18
+ protocol: TCP
19
+ - name: mcp
20
+ port: {{ .Values.service.mcp.port }}
21
+ targetPort: mcp
22
+ protocol: TCP
23
+ selector:
24
+ {{- include "agentevals.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,14 @@
1
+ {{- if .Values.serviceAccount.create -}}
2
+ apiVersion: v1
3
+ kind: ServiceAccount
4
+ metadata:
5
+ name: {{ include "agentevals.serviceAccountName" . }}
6
+ namespace: {{ include "agentevals.namespace" . }}
7
+ labels:
8
+ {{- include "agentevals.labels" . | nindent 4 }}
9
+ {{- with .Values.serviceAccount.annotations }}
10
+ annotations:
11
+ {{- toYaml . | nindent 4 }}
12
+ {{- end }}
13
+ automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
14
+ {{- end }}