agentevals-cli 0.7.1__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/ci.yml +8 -0
  2. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/release.yml +51 -25
  3. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/Makefile +35 -1
  4. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/PKG-INFO +7 -3
  5. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/README.md +6 -2
  6. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/Chart.yaml +2 -2
  7. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/streaming.md +12 -6
  8. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/kubernetes/README.md +1 -2
  9. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/pyproject.toml +1 -1
  10. agentevals_cli-0.7.3/samples/tempo_export_with_batches.json +1 -0
  11. agentevals_cli-0.7.1/src/agentevals/_static/assets/index-7YPfPT4N.js → agentevals_cli-0.7.3/src/agentevals/_static/assets/index-Cl6S2lcn.js +64 -65
  12. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/index.html +1 -1
  13. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/routes.py +4 -26
  14. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/cli.py +4 -3
  15. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/config.py +7 -4
  16. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/converter.py +19 -6
  17. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/eval_config_loader.py +1 -1
  18. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/extraction.py +51 -2
  19. agentevals_cli-0.7.3/src/agentevals/loader/__init__.py +29 -0
  20. agentevals_cli-0.7.3/src/agentevals/loader/auto.py +108 -0
  21. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/otlp.py +38 -12
  22. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/mcp_server.py +5 -6
  23. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/runner.py +3 -15
  24. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_api.py +50 -6
  25. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_converter.py +33 -0
  26. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_extraction.py +74 -0
  27. agentevals_cli-0.7.3/tests/test_loader_auto.py +241 -0
  28. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_otlp_loader.py +125 -0
  29. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/api/client.ts +1 -4
  30. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/TraceUploadZone.tsx +1 -1
  31. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/TraceEditorDrawer.tsx +2 -2
  32. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/UploadView.tsx +2 -1
  33. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/context/TraceProvider.tsx +20 -2
  34. agentevals_cli-0.7.3/ui/src/lib/trace-loader.ts +320 -0
  35. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/uv.lock +1 -1
  36. agentevals_cli-0.7.1/src/agentevals/loader/__init__.py +0 -7
  37. agentevals_cli-0.7.1/ui/src/lib/trace-loader.ts +0 -249
  38. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/eval/SKILL.md +0 -0
  39. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/eval/evals/evals.json +0 -0
  40. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/inspect/SKILL.md +0 -0
  41. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.claude/skills/inspect/evals/evals.json +0 -0
  42. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.dockerignore +0 -0
  43. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  44. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  45. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  46. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.github/workflows/publish-evaluator-sdk.yml +0 -0
  47. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.gitignore +0 -0
  48. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/.mcp.json +0 -0
  49. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/CONTRIBUTING.md +0 -0
  50. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/DEVELOPMENT.md +0 -0
  51. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/Dockerfile +0 -0
  52. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/LICENSE +0 -0
  53. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/NOTES.txt +0 -0
  54. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/_helpers.tpl +0 -0
  55. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/deployment.yaml +0 -0
  56. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/service.yaml +0 -0
  57. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/templates/serviceaccount.yaml +0 -0
  58. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/charts/agentevals/values.yaml +0 -0
  59. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-color-on-transparent.svg +0 -0
  60. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-color.png +0 -0
  61. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/assets/logo-dark-on-transparent.svg +0 -0
  62. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/custom-evaluators.md +0 -0
  63. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/eval-set-format.md +0 -0
  64. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/docs/otel-compatibility.md +0 -0
  65. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/README.md +0 -0
  66. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/eval_config.yaml +0 -0
  67. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/response_quality.py +0 -0
  68. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/custom_evaluators/tool_call_checker.py +0 -0
  69. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/README.md +0 -0
  70. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/agent.py +0 -0
  71. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/eval_set.json +0 -0
  72. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/main.py +0 -0
  73. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/dice_agent/test_streaming.py +0 -0
  74. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/README.md +0 -0
  75. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/agent.py +0 -0
  76. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/eval_set.json +0 -0
  77. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/main.py +0 -0
  78. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/requirements.txt +0 -0
  79. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/langchain_agent/test_streaming.py +0 -0
  80. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/async_example.py +0 -0
  81. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/context_manager_example.py +0 -0
  82. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/decorator_example.py +0 -0
  83. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/sdk_example/requirements.txt +0 -0
  84. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/agent.py +0 -0
  85. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/eval_set.json +0 -0
  86. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/main.py +0 -0
  87. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/strands_agent/requirements.txt +0 -0
  88. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/adk/requirements.txt +0 -0
  89. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/adk/run.py +0 -0
  90. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/langchain/requirements.txt +0 -0
  91. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/langchain/run.py +0 -0
  92. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/ollama/requirements.txt +0 -0
  93. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/ollama/run.py +0 -0
  94. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
  95. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/openai-agents/run.py +0 -0
  96. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
  97. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
  98. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/strands/requirements.txt +0 -0
  99. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/examples/zero-code-examples/strands/run.py +0 -0
  100. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/flake.lock +0 -0
  101. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/flake.nix +0 -0
  102. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/README.md +0 -0
  103. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/pyproject.toml +0 -0
  104. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
  105. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
  106. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
  107. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/eval_set_helm.json +0 -0
  108. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/evalset_helm_3_2026-02-23.json +0 -0
  109. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/evalset_k8s_2026-02-20.json +0 -0
  110. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm.json +0 -0
  111. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm_2.json +0 -0
  112. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/helm_3.json +0 -0
  113. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/samples/k8s.json +0 -0
  114. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/__init__.py +0 -0
  115. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_protocol.py +0 -0
  116. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
  117. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/logo.svg +0 -0
  118. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/_static/vite.svg +0 -0
  119. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/__init__.py +0 -0
  120. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/app.py +0 -0
  121. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/debug_routes.py +0 -0
  122. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/dependencies.py +0 -0
  123. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/models.py +0 -0
  124. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_app.py +0 -0
  125. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_grpc.py +0 -0
  126. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_processing.py +0 -0
  127. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/otlp_routes.py +0 -0
  128. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/api/streaming_routes.py +0 -0
  129. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/builtin_metrics.py +0 -0
  130. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/custom_evaluators.py +0 -0
  131. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/__init__.py +0 -0
  132. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/resolver.py +0 -0
  133. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/sources.py +0 -0
  134. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/templates.py +0 -0
  135. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/evaluator/venv.py +0 -0
  136. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/genai_converter.py +0 -0
  137. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/base.py +0 -0
  138. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/loader/jaeger.py +0 -0
  139. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/openai_eval_backend.py +0 -0
  140. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/output.py +0 -0
  141. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/sdk.py +0 -0
  142. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/__init__.py +0 -0
  143. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/incremental_processor.py +0 -0
  144. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/processor.py +0 -0
  145. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/session.py +0 -0
  146. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/streaming/ws_server.py +0 -0
  147. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/trace_attrs.py +0 -0
  148. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/trace_metrics.py +0 -0
  149. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/__init__.py +0 -0
  150. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/genai_messages.py +0 -0
  151. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/log_buffer.py +0 -0
  152. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/src/agentevals/utils/log_enrichment.py +0 -0
  153. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/__init__.py +0 -0
  154. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/conftest.py +0 -0
  155. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_evaluation_pipeline.py +0 -0
  156. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_live_agents.py +0 -0
  157. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_otlp_grpc_receiver.py +0 -0
  158. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_session_grouping.py +0 -0
  159. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/integration/test_timing_stress.py +0 -0
  160. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_cli.py +0 -0
  161. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_genai_converter.py +0 -0
  162. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_jaeger_loader.py +0 -0
  163. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_log_enrichment.py +0 -0
  164. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_mcp_server.py +0 -0
  165. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_otlp_receiver.py +0 -0
  166. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_output.py +0 -0
  167. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_protocol.py +0 -0
  168. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_runner.py +0 -0
  169. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_sdk.py +0 -0
  170. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/tests/test_trace_metrics.py +0 -0
  171. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/.gitignore +0 -0
  172. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/README.md +0 -0
  173. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/eslint.config.js +0 -0
  174. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/index.html +0 -0
  175. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/package-lock.json +0 -0
  176. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/package.json +0 -0
  177. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/public/logo.svg +0 -0
  178. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/public/vite.svg +0 -0
  179. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/App.css +0 -0
  180. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/App.tsx +0 -0
  181. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/assets/react.svg +0 -0
  182. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
  183. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
  184. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
  185. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
  186. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/BuilderHeader.tsx +0 -0
  187. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/BuilderView.tsx +0 -0
  188. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
  189. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/EvalCasesList.tsx +0 -0
  190. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/InvocationEditor.tsx +0 -0
  191. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/JsonPreview.tsx +0 -0
  192. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/MetadataEditor.tsx +0 -0
  193. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/builder/index.ts +0 -0
  194. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/DashboardView.tsx +0 -0
  195. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
  196. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
  197. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
  198. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
  199. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/TraceCard.tsx +0 -0
  200. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/dashboard/TraceTable.tsx +0 -0
  201. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
  202. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/DataSection.tsx +0 -0
  203. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
  204. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
  205. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InspectorView.tsx +0 -0
  206. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InvocationCard.tsx +0 -0
  207. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
  208. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
  209. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
  210. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
  211. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/ToolCallList.tsx +0 -0
  212. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
  213. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/sidebar/Sidebar.tsx +0 -0
  214. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
  215. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveMessage.tsx +0 -0
  216. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
  217. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/SessionCard.tsx +0 -0
  218. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
  219. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
  220. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/FileDropZone.tsx +0 -0
  221. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/MetricSelector.tsx +0 -0
  222. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
  223. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/components/welcome/WelcomeView.tsx +0 -0
  224. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/config.ts +0 -0
  225. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/context/TraceContext.tsx +0 -0
  226. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/index.css +0 -0
  227. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/console-capture.ts +0 -0
  228. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/evalset-builder.ts +0 -0
  229. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/network-capture.ts +0 -0
  230. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-helpers.ts +0 -0
  231. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-metadata.ts +0 -0
  232. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/trace-patcher.ts +0 -0
  233. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/types.ts +0 -0
  234. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/lib/utils.ts +0 -0
  235. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/src/main.tsx +0 -0
  236. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.app.json +0 -0
  237. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.json +0 -0
  238. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/tsconfig.node.json +0 -0
  239. {agentevals_cli-0.7.1 → agentevals_cli-0.7.3}/ui/vite.config.ts +0 -0
@@ -10,6 +10,14 @@ permissions:
10
10
  contents: read
11
11
 
12
12
  jobs:
13
+ helm-chart:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v6
17
+
18
+ - name: Lint + template
19
+ run: make helm-test
20
+
13
21
  lint:
14
22
  runs-on: ubuntu-latest
15
23
  steps:
@@ -18,6 +18,8 @@ jobs:
18
18
  runs-on: ubuntu-latest
19
19
  steps:
20
20
  - uses: actions/checkout@v6
21
+ with:
22
+ ref: ${{ github.event.inputs.tag || github.ref_name }}
21
23
 
22
24
  - uses: astral-sh/setup-uv@v7
23
25
  with:
@@ -42,29 +44,13 @@ jobs:
42
44
  dist/core/*.whl
43
45
  dist/bundle/*.whl
44
46
 
45
- github-release:
46
- needs: build
47
- runs-on: ubuntu-latest
48
- permissions:
49
- contents: write
50
-
51
- steps:
52
- - uses: actions/download-artifact@v8
53
- with:
54
- name: wheels
55
- path: dist/
56
-
57
- - uses: softprops/action-gh-release@v2.5.0
58
- with:
59
- tag_name: ${{ github.event.inputs.tag || github.ref_name }}
60
- files: dist/**/*.whl
61
- generate_release_notes: true
62
-
63
47
  publish:
64
48
  needs: build
65
49
  runs-on: ubuntu-latest
66
50
  steps:
67
51
  - uses: actions/checkout@v6
52
+ with:
53
+ ref: ${{ github.event.inputs.tag || github.ref_name }}
68
54
 
69
55
  - uses: astral-sh/setup-uv@v7
70
56
  with:
@@ -93,13 +79,34 @@ jobs:
93
79
  uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
94
80
  rm -rf src/agentevals/_static
95
81
 
82
+ github-release:
83
+ needs: publish
84
+ runs-on: ubuntu-latest
85
+ permissions:
86
+ contents: write
87
+
88
+ steps:
89
+ - uses: actions/download-artifact@v8
90
+ with:
91
+ name: wheels
92
+ path: dist/
93
+
94
+ - uses: softprops/action-gh-release@v2.5.0
95
+ with:
96
+ tag_name: ${{ github.event.inputs.tag || github.ref_name }}
97
+ files: dist/**/*.whl
98
+ generate_release_notes: true
99
+
96
100
  push-docker:
101
+ needs: github-release
97
102
  runs-on: ubuntu-latest
98
103
  permissions:
99
104
  contents: read
100
105
  packages: write
101
106
  steps:
102
107
  - uses: actions/checkout@v6
108
+ with:
109
+ ref: ${{ github.event.inputs.tag || github.ref_name }}
103
110
 
104
111
  - name: Login to GitHub Container Registry
105
112
  uses: docker/login-action@v4
@@ -114,13 +121,6 @@ jobs:
114
121
  - name: Set up Docker Buildx
115
122
  uses: docker/setup-buildx-action@v4
116
123
 
117
- - name: Set appVersion in Chart.yaml
118
- run: |
119
- VERSION="${TAG#v}"
120
- sed -i "s/^appVersion:.*/appVersion: \"$VERSION\"/" charts/agentevals/Chart.yaml
121
- env:
122
- TAG: ${{ github.event.inputs.tag || github.ref_name }}
123
-
124
124
  - name: Build and push
125
125
  run: |
126
126
  VERSION="${TAG#v}"
@@ -129,3 +129,29 @@ jobs:
129
129
  DOCKER_TAG="$VERSION"
130
130
  env:
131
131
  TAG: ${{ github.event.inputs.tag || github.ref_name }}
132
+
133
+ push-helm-chart:
134
+ needs: push-docker
135
+ runs-on: ubuntu-latest
136
+ permissions:
137
+ contents: read
138
+ packages: write
139
+ steps:
140
+ - uses: actions/checkout@v6
141
+ with:
142
+ ref: ${{ github.event.inputs.tag || github.ref_name }}
143
+
144
+ - name: Login to GitHub Container Registry
145
+ uses: docker/login-action@v4
146
+ with:
147
+ registry: ghcr.io
148
+ username: ${{ github.actor }}
149
+ password: ${{ secrets.GITHUB_TOKEN }}
150
+
151
+ - name: Publish Helm chart to GHCR (OCI)
152
+ env:
153
+ TAG: ${{ github.event.inputs.tag || github.ref_name }}
154
+ HELM_REPO: oci://ghcr.io/${{ github.repository }}
155
+ run: |
156
+ export HELM_CHART_VERSION="${TAG#v}"
157
+ make helm-publish
@@ -9,7 +9,13 @@ DOCKER_IMAGE_REF := $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY:%/=%)/$(DOCKER_IMA
9
9
  # Multi-arch build (requires docker buildx). Manifest lists must be pushed — use build-docker-local for a single-arch --load.
10
10
  PLATFORMS ?= linux/amd64,linux/arm64
11
11
 
12
- .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e
12
+ HELM_REPO ?= oci://ghcr.io/agentevals-dev/agentevals
13
+ HELM_DIST_FOLDER ?= dist/helm
14
+ HELM_CHART_DIR ?= charts/agentevals
15
+ HELM_CHART_OCI_URL ?= $(HELM_REPO)/helm
16
+ HELM_CHART_VERSION ?= $(VERSION)
17
+
18
+ .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e helm-lint helm-template helm-test helm-cleanup helm-package helm-publish
13
19
 
14
20
  build:
15
21
  uv build
@@ -70,3 +76,31 @@ test-e2e:
70
76
  clean:
71
77
  rm -rf dist/ build/ src/agentevals/_static/ ui/dist/
72
78
  find . -name '*.egg-info' -type d -exec rm -rf {} + 2>/dev/null || true
79
+
80
+ .PHONY: helm-lint
81
+ helm-lint:
82
+ helm lint "$(HELM_CHART_DIR)"
83
+
84
+ # Render templates to catch YAML/Helm errors (default values + ephemeralVolume disabled path).
85
+ .PHONY: helm-template
86
+ helm-template:
87
+ helm template agentevals "$(HELM_CHART_DIR)" --namespace agentevals >/dev/null
88
+ helm template agentevals "$(HELM_CHART_DIR)" --namespace agentevals \
89
+ --set ephemeralVolume.enabled=false >/dev/null
90
+
91
+ .PHONY: helm-test
92
+ helm-test: helm-lint helm-template
93
+
94
+ .PHONY: helm-cleanup
95
+ helm-cleanup:
96
+ rm -f $(HELM_DIST_FOLDER)/agentevals-*.tgz
97
+
98
+ .PHONY: helm-package
99
+ helm-package: helm-cleanup
100
+ mkdir -p $(HELM_DIST_FOLDER)
101
+ helm package "$(HELM_CHART_DIR)" -d "$(HELM_DIST_FOLDER)" \
102
+ --version "$(HELM_CHART_VERSION)" --app-version "$(HELM_CHART_VERSION)"
103
+
104
+ .PHONY: helm-publish
105
+ helm-publish: helm-package
106
+ helm push "$(HELM_DIST_FOLDER)/agentevals-$(HELM_CHART_VERSION).tgz" "$(HELM_CHART_OCI_URL)"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentevals-cli
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.11
@@ -300,12 +300,16 @@ docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
300
300
 
301
301
  ### Helm
302
302
 
303
- A Helm chart is available in [`charts/agentevals/`](charts/agentevals/):
303
+ The Helm chart is published as an OCI artifact to GitHub Container Registry:
304
304
 
305
305
  ```bash
306
- helm install agentevals ./charts/agentevals
306
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
307
307
  ```
308
308
 
309
+ Pass `--version <x.y.z>` to pin to a specific release. Available versions are listed under [packages](https://github.com/agentevals-dev/agentevals/pkgs/container/agentevals%2Fhelm%2Fagentevals).
310
+
311
+ The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if you want to install from a local checkout instead.
312
+
309
313
  See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
310
314
 
311
315
  ## MCP Server
@@ -274,12 +274,16 @@ docker run -p 8001:8001 -p 4317:4317 -p 4318:4318 agentevals
274
274
 
275
275
  ### Helm
276
276
 
277
- A Helm chart is available in [`charts/agentevals/`](charts/agentevals/):
277
+ The Helm chart is published as an OCI artifact to GitHub Container Registry:
278
278
 
279
279
  ```bash
280
- helm install agentevals ./charts/agentevals
280
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
281
281
  ```
282
282
 
283
+ Pass `--version <x.y.z>` to pin to a specific release. Available versions are listed under [packages](https://github.com/agentevals-dev/agentevals/pkgs/container/agentevals%2Fhelm%2Fagentevals).
284
+
285
+ The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if you want to install from a local checkout instead.
286
+
283
287
  See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
284
288
 
285
289
  ## MCP Server
@@ -2,5 +2,5 @@ apiVersion: v2
2
2
  name: agentevals
3
3
  description: agentevals web UI, OTLP HTTP+gRPC receivers, and MCP (Streamable HTTP)
4
4
  type: application
5
- version: 0.1.0
6
- appVersion: "0.5.2"
5
+ version: 0.7.1
6
+ appVersion: "0.7.1"
@@ -66,13 +66,18 @@ See [examples/README.md](../examples/README.md) for details on supported instrum
66
66
 
67
67
  ### OTLP/JSON Support
68
68
 
69
- Native OpenTelemetry format no conversion to Jaeger needed:
69
+ Native OpenTelemetry format. The CLI auto-detects Jaeger vs OTLP from
70
+ file contents, so `.json` and `.jsonl` exports from Tempo, Jaeger, or
71
+ the OTel collector all work without a `--format` flag:
70
72
 
71
73
  ```bash
72
- # Load OTLP files directly
73
- agentevals run trace.otlp.json --format otlp-json --eval-set eval.json
74
+ # Load any trace file directly; format is auto-detected
75
+ agentevals run trace.otlp.json --eval-set eval.json
74
76
  ```
75
77
 
78
+ Pass `--format otlp-json` (or `jaeger-json`) only as an override when
79
+ auto-detection fails on a non-standard export.
80
+
76
81
  ### Real-time Span Streaming
77
82
 
78
83
  The `AgentEvalsStreamingProcessor` is an OTel `SpanProcessor` that streams spans over WebSocket as they complete:
@@ -311,6 +316,7 @@ This installs `opentelemetry-sdk>=1.20.0`. Agent code also needs `websockets` fo
311
316
  ## Compatibility
312
317
 
313
318
  All existing workflows continue to work:
314
- - Jaeger JSON files still supported: `agentevals run trace.json --eval-set ...`
315
- - OTLP/JSON files: `agentevals run trace.otlp.json --format otlp-json --eval-set ...`
316
- - Web UI upload flow unchanged
319
+ - Trace files (Jaeger or OTLP, including Tempo exports) auto-detect by
320
+ content: `agentevals run trace.json --eval-set ...`
321
+ - Pass `--format` only to override detection on non-standard exports.
322
+ - Web UI upload flow unchanged.
@@ -25,8 +25,7 @@ kagent (gRPC :4317) --> OTel Collector( optional ) --> agentevals (gRPC :4317 /
25
25
  ### 1. agentevals
26
26
 
27
27
  ```bash
28
- helm install agentevals ./charts/agentevals \
29
- --set tag=0.6.3
28
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals
30
29
  ```
31
30
 
32
31
  This creates a single pod exposing:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "agentevals-cli"
7
- version = "0.7.1"
7
+ version = "0.7.3"
8
8
  description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"