agentevals-cli 0.7.3__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/DEVELOPMENT.md +29 -1
  2. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/Dockerfile +1 -1
  3. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/Makefile +32 -1
  4. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/PKG-INFO +31 -4
  5. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/README.md +28 -3
  6. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/templates/NOTES.txt +8 -0
  7. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/templates/_helpers.tpl +33 -0
  8. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/templates/deployment.yaml +25 -2
  9. agentevals_cli-0.8.0/charts/agentevals/templates/postgresql-secret.yaml +13 -0
  10. agentevals_cli-0.8.0/charts/agentevals/templates/postgresql.yaml +142 -0
  11. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/templates/service.yaml +1 -1
  12. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/values.yaml +74 -1
  13. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/pyproject.toml +7 -1
  14. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/app.py +52 -0
  15. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/routes.py +93 -0
  16. agentevals_cli-0.8.0/src/agentevals/api/runs_routes.py +130 -0
  17. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/cli.py +145 -0
  18. agentevals_cli-0.8.0/src/agentevals/run/__init__.py +9 -0
  19. agentevals_cli-0.8.0/src/agentevals/run/fetcher.py +83 -0
  20. agentevals_cli-0.8.0/src/agentevals/run/result_builder.py +125 -0
  21. agentevals_cli-0.8.0/src/agentevals/run/service.py +128 -0
  22. agentevals_cli-0.8.0/src/agentevals/run/sinks.py +230 -0
  23. agentevals_cli-0.8.0/src/agentevals/run/worker.py +184 -0
  24. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/runner.py +1 -0
  25. agentevals_cli-0.8.0/src/agentevals/storage/__init__.py +48 -0
  26. agentevals_cli-0.8.0/src/agentevals/storage/config.py +95 -0
  27. agentevals_cli-0.8.0/src/agentevals/storage/models.py +123 -0
  28. agentevals_cli-0.8.0/src/agentevals/storage/postgres/__init__.py +5 -0
  29. agentevals_cli-0.8.0/src/agentevals/storage/postgres/migrations/000001_init.down.sql +5 -0
  30. agentevals_cli-0.8.0/src/agentevals/storage/postgres/migrations/000001_init.up.sql +110 -0
  31. agentevals_cli-0.8.0/src/agentevals/storage/postgres/migrator.py +287 -0
  32. agentevals_cli-0.8.0/src/agentevals/storage/postgres/pool.py +80 -0
  33. agentevals_cli-0.8.0/src/agentevals/storage/repos/__init__.py +90 -0
  34. agentevals_cli-0.8.0/src/agentevals/storage/repos/memory.py +179 -0
  35. agentevals_cli-0.8.0/src/agentevals/storage/repos/postgres.py +406 -0
  36. agentevals_cli-0.8.0/tests/api/test_evaluate_persistence.py +173 -0
  37. agentevals_cli-0.8.0/tests/api/test_runs_routes.py +185 -0
  38. agentevals_cli-0.8.0/tests/integration/__init__.py +0 -0
  39. agentevals_cli-0.8.0/tests/run/__init__.py +0 -0
  40. agentevals_cli-0.8.0/tests/run/test_fetcher.py +79 -0
  41. agentevals_cli-0.8.0/tests/run/test_result_builder.py +184 -0
  42. agentevals_cli-0.8.0/tests/run/test_service.py +169 -0
  43. agentevals_cli-0.8.0/tests/run/test_sinks.py +248 -0
  44. agentevals_cli-0.8.0/tests/storage/__init__.py +0 -0
  45. agentevals_cli-0.8.0/tests/storage/test_config.py +90 -0
  46. agentevals_cli-0.8.0/tests/storage/test_memory_repos.py +226 -0
  47. agentevals_cli-0.8.0/tests/storage/test_migrator.py +122 -0
  48. agentevals_cli-0.8.0/tests/storage/test_models.py +96 -0
  49. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/uv.lock +54 -2
  50. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.claude/skills/eval/SKILL.md +0 -0
  51. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.claude/skills/eval/evals/evals.json +0 -0
  52. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.claude/skills/inspect/SKILL.md +0 -0
  53. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.claude/skills/inspect/evals/evals.json +0 -0
  54. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.dockerignore +0 -0
  55. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  56. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  57. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  58. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/workflows/ci.yml +0 -0
  59. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/workflows/publish-evaluator-sdk.yml +0 -0
  60. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.github/workflows/release.yml +0 -0
  61. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.gitignore +0 -0
  62. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/.mcp.json +0 -0
  63. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/CONTRIBUTING.md +0 -0
  64. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/LICENSE +0 -0
  65. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/Chart.yaml +0 -0
  66. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/charts/agentevals/templates/serviceaccount.yaml +0 -0
  67. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/assets/logo-color-on-transparent.svg +0 -0
  68. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/assets/logo-color.png +0 -0
  69. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/assets/logo-dark-on-transparent.svg +0 -0
  70. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/custom-evaluators.md +0 -0
  71. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/eval-set-format.md +0 -0
  72. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/otel-compatibility.md +0 -0
  73. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/docs/streaming.md +0 -0
  74. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/README.md +0 -0
  75. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/custom_evaluators/eval_config.yaml +0 -0
  76. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/custom_evaluators/response_quality.py +0 -0
  77. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/custom_evaluators/tool_call_checker.py +0 -0
  78. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/dice_agent/README.md +0 -0
  79. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/dice_agent/agent.py +0 -0
  80. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/dice_agent/eval_set.json +0 -0
  81. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/dice_agent/main.py +0 -0
  82. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/dice_agent/test_streaming.py +0 -0
  83. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/kubernetes/README.md +0 -0
  84. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/README.md +0 -0
  85. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/agent.py +0 -0
  86. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/eval_set.json +0 -0
  87. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/main.py +0 -0
  88. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/requirements.txt +0 -0
  89. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/langchain_agent/test_streaming.py +0 -0
  90. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/sdk_example/async_example.py +0 -0
  91. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/sdk_example/context_manager_example.py +0 -0
  92. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/sdk_example/decorator_example.py +0 -0
  93. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/sdk_example/requirements.txt +0 -0
  94. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/strands_agent/agent.py +0 -0
  95. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/strands_agent/eval_set.json +0 -0
  96. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/strands_agent/main.py +0 -0
  97. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/strands_agent/requirements.txt +0 -0
  98. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/adk/requirements.txt +0 -0
  99. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/adk/run.py +0 -0
  100. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/langchain/requirements.txt +0 -0
  101. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/langchain/run.py +0 -0
  102. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/ollama/requirements.txt +0 -0
  103. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/ollama/run.py +0 -0
  104. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/openai-agents/requirements.txt +0 -0
  105. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/openai-agents/run.py +0 -0
  106. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/pydantic-ai/requirements.txt +0 -0
  107. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/pydantic-ai/run.py +0 -0
  108. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/strands/requirements.txt +0 -0
  109. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/examples/zero-code-examples/strands/run.py +0 -0
  110. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/flake.lock +0 -0
  111. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/flake.nix +0 -0
  112. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/packages/evaluator-sdk-py/README.md +0 -0
  113. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/packages/evaluator-sdk-py/pyproject.toml +0 -0
  114. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
  115. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
  116. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
  117. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/eval_set_helm.json +0 -0
  118. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/evalset_helm_3_2026-02-23.json +0 -0
  119. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/evalset_k8s_2026-02-20.json +0 -0
  120. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/helm.json +0 -0
  121. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/helm_2.json +0 -0
  122. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/helm_3.json +0 -0
  123. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/k8s.json +0 -0
  124. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/samples/tempo_export_with_batches.json +0 -0
  125. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/__init__.py +0 -0
  126. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_protocol.py +0 -0
  127. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_static/assets/index-BqibLiHO.css +0 -0
  128. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_static/assets/index-Cl6S2lcn.js +0 -0
  129. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_static/index.html +0 -0
  130. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_static/logo.svg +0 -0
  131. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/_static/vite.svg +0 -0
  132. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/__init__.py +0 -0
  133. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/debug_routes.py +0 -0
  134. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/dependencies.py +0 -0
  135. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/models.py +0 -0
  136. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/otlp_app.py +0 -0
  137. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/otlp_grpc.py +0 -0
  138. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/otlp_processing.py +0 -0
  139. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/otlp_routes.py +0 -0
  140. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/api/streaming_routes.py +0 -0
  141. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/builtin_metrics.py +0 -0
  142. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/config.py +0 -0
  143. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/converter.py +0 -0
  144. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/custom_evaluators.py +0 -0
  145. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/eval_config_loader.py +0 -0
  146. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/evaluator/__init__.py +0 -0
  147. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/evaluator/resolver.py +0 -0
  148. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/evaluator/sources.py +0 -0
  149. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/evaluator/templates.py +0 -0
  150. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/evaluator/venv.py +0 -0
  151. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/extraction.py +0 -0
  152. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/genai_converter.py +0 -0
  153. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/loader/__init__.py +0 -0
  154. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/loader/auto.py +0 -0
  155. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/loader/base.py +0 -0
  156. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/loader/jaeger.py +0 -0
  157. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/loader/otlp.py +0 -0
  158. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/mcp_server.py +0 -0
  159. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/openai_eval_backend.py +0 -0
  160. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/output.py +0 -0
  161. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/sdk.py +0 -0
  162. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/streaming/__init__.py +0 -0
  163. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/streaming/incremental_processor.py +0 -0
  164. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/streaming/processor.py +0 -0
  165. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/streaming/session.py +0 -0
  166. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/streaming/ws_server.py +0 -0
  167. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/trace_attrs.py +0 -0
  168. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/trace_metrics.py +0 -0
  169. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/utils/__init__.py +0 -0
  170. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/utils/genai_messages.py +0 -0
  171. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/utils/log_buffer.py +0 -0
  172. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/src/agentevals/utils/log_enrichment.py +0 -0
  173. {agentevals_cli-0.7.3/tests/integration → agentevals_cli-0.8.0/tests/api}/__init__.py +0 -0
  174. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/conftest.py +0 -0
  175. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/test_evaluation_pipeline.py +0 -0
  176. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/test_live_agents.py +0 -0
  177. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/test_otlp_grpc_receiver.py +0 -0
  178. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/test_session_grouping.py +0 -0
  179. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/integration/test_timing_stress.py +0 -0
  180. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_api.py +0 -0
  181. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_cli.py +0 -0
  182. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_converter.py +0 -0
  183. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_extraction.py +0 -0
  184. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_genai_converter.py +0 -0
  185. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_jaeger_loader.py +0 -0
  186. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_loader_auto.py +0 -0
  187. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_log_enrichment.py +0 -0
  188. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_mcp_server.py +0 -0
  189. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_otlp_loader.py +0 -0
  190. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_otlp_receiver.py +0 -0
  191. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_output.py +0 -0
  192. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_protocol.py +0 -0
  193. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_runner.py +0 -0
  194. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_sdk.py +0 -0
  195. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/tests/test_trace_metrics.py +0 -0
  196. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/.gitignore +0 -0
  197. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/README.md +0 -0
  198. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/eslint.config.js +0 -0
  199. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/index.html +0 -0
  200. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/package-lock.json +0 -0
  201. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/package.json +0 -0
  202. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/public/logo.svg +0 -0
  203. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/public/vite.svg +0 -0
  204. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/App.css +0 -0
  205. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/App.tsx +0 -0
  206. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/api/client.ts +0 -0
  207. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/assets/react.svg +0 -0
  208. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
  209. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
  210. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
  211. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
  212. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/BuilderHeader.tsx +0 -0
  213. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/BuilderView.tsx +0 -0
  214. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
  215. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/EvalCasesList.tsx +0 -0
  216. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/InvocationEditor.tsx +0 -0
  217. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/JsonPreview.tsx +0 -0
  218. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/MetadataEditor.tsx +0 -0
  219. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
  220. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/builder/index.ts +0 -0
  221. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/DashboardView.tsx +0 -0
  222. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
  223. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
  224. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
  225. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
  226. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/TraceCard.tsx +0 -0
  227. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/dashboard/TraceTable.tsx +0 -0
  228. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
  229. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/DataSection.tsx +0 -0
  230. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
  231. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
  232. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/InspectorView.tsx +0 -0
  233. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/InvocationCard.tsx +0 -0
  234. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
  235. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
  236. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
  237. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
  238. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/ToolCallList.tsx +0 -0
  239. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
  240. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/sidebar/Sidebar.tsx +0 -0
  241. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
  242. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/streaming/LiveMessage.tsx +0 -0
  243. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
  244. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/streaming/SessionCard.tsx +0 -0
  245. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
  246. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
  247. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/FileDropZone.tsx +0 -0
  248. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/MetricSelector.tsx +0 -0
  249. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
  250. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
  251. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/upload/UploadView.tsx +0 -0
  252. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/components/welcome/WelcomeView.tsx +0 -0
  253. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/config.ts +0 -0
  254. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/context/TraceContext.tsx +0 -0
  255. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/context/TraceProvider.tsx +0 -0
  256. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/index.css +0 -0
  257. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/console-capture.ts +0 -0
  258. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/evalset-builder.ts +0 -0
  259. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/network-capture.ts +0 -0
  260. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/trace-helpers.ts +0 -0
  261. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/trace-loader.ts +0 -0
  262. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/trace-metadata.ts +0 -0
  263. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/trace-patcher.ts +0 -0
  264. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/types.ts +0 -0
  265. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/lib/utils.ts +0 -0
  266. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/src/main.tsx +0 -0
  267. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/tsconfig.app.json +0 -0
  268. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/tsconfig.json +0 -0
  269. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/tsconfig.node.json +0 -0
  270. {agentevals_cli-0.7.3 → agentevals_cli-0.8.0}/ui/vite.config.ts +0 -0
@@ -23,10 +23,38 @@ make dev-frontend # start Vite dev server (port 5173) with HMR
23
23
  make dev-bundle # build UI, serve full bundled experience at port 8001 via uv run
24
24
  ```
25
25
 
26
- Standard development uses `dev-backend` + `dev-frontend` in separate terminals. The Vite dev server proxies nothing the frontend calls the backend at `http://localhost:8001` directly via CORS.
26
+ Standard development uses `dev-backend` + `dev-frontend` in separate terminals. The Vite dev server proxies nothing; the frontend calls the backend at `http://localhost:8001` directly via CORS.
27
27
 
28
28
  `dev-bundle` is useful for testing the bundled UI experience without building a wheel. It copies `ui/dist` into the source tree temporarily and cleans up when the server exits.
29
29
 
30
+ ### Postgres backend (optional, for `/api/runs`)
31
+
32
+ > **Preview.** The schema, the CLI surface, and `/api/runs` shape are still
33
+ > stabilizing. Recreate the agentevals schema between minor version upgrades
34
+ > until further notice; do not depend on persisted data surviving a
35
+ > `git pull` of agentevals itself.
36
+
37
+ The default in-memory backend keeps `make dev-backend` zero-config. To exercise the async run pipeline locally, bring up a Postgres alongside the app:
38
+
39
+ ```bash
40
+ make pg-up # start postgres:18.3-alpine in a docker container (port 5432, ephemeral via --rm)
41
+ make migrate # apply the agentevals schema
42
+ make dev-backend-pg # pg-up + migrate + serve --dev with backend=postgres wired up
43
+ make pg-down # stop the container; data is discarded with --rm
44
+ ```
45
+
46
+ Override the defaults via `PG_PORT=5433 make pg-up` etc. The `migrate` target is idempotent (a second invocation is a no-op).
47
+
48
+ Once running, submit a run with:
49
+
50
+ ```bash
51
+ curl -X POST http://localhost:8001/api/runs \
52
+ -H 'content-type: application/json' \
53
+ -d '{"spec": {"approach": "trace_replay", "target": {"kind": "inline", "inline": {...}}, "evalConfig": {"metrics": ["tool_trajectory_avg_score"]}}}'
54
+ ```
55
+
56
+ Then poll `GET /api/runs/{runId}` and `GET /api/runs/{runId}/results`. Without `storage.backend=postgres`, the `/api/runs` endpoints return 503 with a hint pointing at the env var.
57
+
30
58
  ### Building
31
59
 
32
60
  ```bash
@@ -24,7 +24,7 @@ COPY src ./src
24
24
 
25
25
  COPY --from=ui /build/ui/dist ./src/agentevals/_static
26
26
 
27
- RUN uv sync --frozen --no-dev --extra live \
27
+ RUN uv sync --frozen --no-dev --extra live --extra postgres \
28
28
  && groupadd --gid 1000 app \
29
29
  && useradd --uid 1000 --gid app --home-dir /app --no-log-init app \
30
30
  && chown -R app:app /app
@@ -15,7 +15,14 @@ HELM_CHART_DIR ?= charts/agentevals
15
15
  HELM_CHART_OCI_URL ?= $(HELM_REPO)/helm
16
16
  HELM_CHART_VERSION ?= $(VERSION)
17
17
 
18
- .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-frontend dev-bundle test test-unit test-integration test-e2e helm-lint helm-template helm-test helm-cleanup helm-package helm-publish
18
+ .PHONY: build build-bundle build-docker build-ui release clean dev-backend dev-backend-pg dev-frontend dev-bundle pg-up pg-down migrate test test-unit test-integration test-e2e helm-lint helm-template helm-test helm-cleanup helm-package helm-publish
19
+
20
+ PG_CONTAINER ?= agentevals-pg
21
+ PG_PORT ?= 5432
22
+ PG_USER ?= agentevals
23
+ PG_PASSWORD ?= agentevals
24
+ PG_DATABASE ?= agentevals
25
+ PG_DSN ?= postgresql://$(PG_USER):$(PG_PASSWORD)@localhost:$(PG_PORT)/$(PG_DATABASE)
19
26
 
20
27
  build:
21
28
  uv build
@@ -53,6 +60,30 @@ release: clean build-ui
53
60
  dev-backend:
54
61
  uv run agentevals serve --dev
55
62
 
63
+ pg-up:
64
+ @if [ -z "$$(docker ps -q -f name=^/$(PG_CONTAINER)$$)" ]; then \
65
+ docker run -d --rm --name $(PG_CONTAINER) \
66
+ -e POSTGRES_USER=$(PG_USER) \
67
+ -e POSTGRES_PASSWORD=$(PG_PASSWORD) \
68
+ -e POSTGRES_DB=$(PG_DATABASE) \
69
+ -p $(PG_PORT):5432 postgres:18.3-alpine; \
70
+ else \
71
+ echo "container $(PG_CONTAINER) already running"; \
72
+ fi
73
+ @until docker exec $(PG_CONTAINER) pg_isready -U $(PG_USER) >/dev/null 2>&1; do sleep 1; done
74
+ @echo "Postgres ready at $(PG_DSN)"
75
+
76
+ pg-down:
77
+ -docker stop $(PG_CONTAINER)
78
+
79
+ migrate:
80
+ AGENTEVALS_DATABASE_URL=$(PG_DSN) uv run agentevals migrate up
81
+
82
+ dev-backend-pg: pg-up migrate
83
+ AGENTEVALS_STORAGE_BACKEND=postgres \
84
+ AGENTEVALS_DATABASE_URL=$(PG_DSN) \
85
+ uv run agentevals serve --dev
86
+
56
87
  dev-frontend:
57
88
  cd ui && npm run dev
58
89
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentevals-cli
3
- Version: 0.7.3
3
+ Version: 0.8.0
4
4
  Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.11
@@ -19,6 +19,8 @@ Requires-Dist: httpx>=0.27.0; extra == 'live'
19
19
  Requires-Dist: mcp>=1.26.0; extra == 'live'
20
20
  Provides-Extra: openai
21
21
  Requires-Dist: openai>=2.0; extra == 'openai'
22
+ Provides-Extra: postgres
23
+ Requires-Dist: asyncpg>=0.30.0; extra == 'postgres'
22
24
  Provides-Extra: streaming
23
25
  Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'streaming'
24
26
  Requires-Dist: websockets>=12.0; extra == 'streaming'
@@ -26,9 +28,9 @@ Description-Content-Type: text/markdown
26
28
 
27
29
  <p align="center">
28
30
  <picture>
29
- <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
30
- <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
31
- <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
31
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-color-on-transparent.svg">
32
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-dark-on-transparent.svg">
33
+ <img src="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
32
34
  </picture>
33
35
  </p>
34
36
 
@@ -312,6 +314,31 @@ The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if
312
314
 
313
315
  See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
314
316
 
317
+ #### Postgres backend (`/api/runs`)
318
+
319
+ > **Preview.** Persistent run history backed by Postgres is under active
320
+ > development. The `storage.*` and `database.postgres.*` chart values, the
321
+ > `/api/runs` HTTP surface, and the database schema may change incompatibly
322
+ > in upcoming releases. Operators evaluating this feature should plan to
323
+ > recreate the agentevals schema when upgrading between minor versions.
324
+ > Default in-memory mode is unaffected.
325
+
326
+ By default the chart deploys agentevals with an in-memory backend; runs and results are not persisted. To enable the async `POST /api/runs` pipeline with durable Postgres-backed state:
327
+
328
+ ```bash
329
+ # Bundled Postgres (dev / evaluation only):
330
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals \
331
+ --set storage.backend=postgres \
332
+ --set database.postgres.bundled.enabled=true
333
+
334
+ # Or supply an external Postgres DSN:
335
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals \
336
+ --set storage.backend=postgres \
337
+ --set database.postgres.url='postgresql://user:pass@host:5432/dbname'
338
+ ```
339
+
340
+ When `storage.backend=postgres` the app applies any pending schema migrations on startup (advisory-lock protected, safe across replicas) and starts an in-process worker that processes the run queue. Without `storage.backend=postgres` the `/api/runs` endpoints return 503 with a hint pointing at the env var.
341
+
315
342
  ## MCP Server
316
343
 
317
344
  Exposes evaluation tools to MCP clients. A `.mcp.json` at the project root lets Claude Code pick it up automatically.
@@ -1,8 +1,8 @@
1
1
  <p align="center">
2
2
  <picture>
3
- <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
4
- <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
5
- <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-color-on-transparent.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-dark-on-transparent.svg">
5
+ <img src="https://raw.githubusercontent.com/agentevals-dev/agentevals/main/docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
6
6
  </picture>
7
7
  </p>
8
8
 
@@ -286,6 +286,31 @@ The source for the chart lives in [`charts/agentevals/`](charts/agentevals/) if
286
286
 
287
287
  See the [Kubernetes example](examples/kubernetes/README.md) for an end-to-end walkthrough deploying agentevals alongside kagent and an OTel Collector on Kubernetes.
288
288
 
289
+ #### Postgres backend (`/api/runs`)
290
+
291
+ > **Preview.** Persistent run history backed by Postgres is under active
292
+ > development. The `storage.*` and `database.postgres.*` chart values, the
293
+ > `/api/runs` HTTP surface, and the database schema may change incompatibly
294
+ > in upcoming releases. Operators evaluating this feature should plan to
295
+ > recreate the agentevals schema when upgrading between minor versions.
296
+ > Default in-memory mode is unaffected.
297
+
298
+ By default the chart deploys agentevals with an in-memory backend; runs and results are not persisted. To enable the async `POST /api/runs` pipeline with durable Postgres-backed state:
299
+
300
+ ```bash
301
+ # Bundled Postgres (dev / evaluation only):
302
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals \
303
+ --set storage.backend=postgres \
304
+ --set database.postgres.bundled.enabled=true
305
+
306
+ # Or supply an external Postgres DSN:
307
+ helm install agentevals oci://ghcr.io/agentevals-dev/agentevals/helm/agentevals \
308
+ --set storage.backend=postgres \
309
+ --set database.postgres.url='postgresql://user:pass@host:5432/dbname'
310
+ ```
311
+
312
+ When `storage.backend=postgres` the app applies any pending schema migrations on startup (advisory-lock protected, safe across replicas) and starts an in-process worker that processes the run queue. Without `storage.backend=postgres` the `/api/runs` endpoints return 503 with a hint pointing at the env var.
313
+
289
314
  ## MCP Server
290
315
 
291
316
  Exposes evaluation tools to MCP clients. A `.mcp.json` at the project root lets Claude Code pick it up automatically.
@@ -11,3 +11,11 @@ Get the Service URL:
11
11
  kubectl --namespace {{ include "agentevals.namespace" . }} port-forward $POD_NAME {{ .Values.service.http.port }}:{{ .Values.service.http.port }}
12
12
 
13
13
  Health check: GET http://<pod-ip>:{{ .Values.service.http.containerPort }}/api/health
14
+
15
+ {{- if eq .Values.storage.backend "postgres" }}
16
+
17
+ NOTE: Postgres-backed storage is a preview feature. The storage.* and
18
+ database.postgres.* values, the /api/runs HTTP surface, and the database
19
+ schema may change incompatibly in upcoming releases. Recreate the
20
+ agentevals schema when upgrading between minor versions.
21
+ {{- end }}
@@ -48,6 +48,17 @@ app.kubernetes.io/name: {{ include "agentevals.name" . }}
48
48
  app.kubernetes.io/instance: {{ .Release.Name }}
49
49
  {{- end }}
50
50
 
51
+ {{- /*
52
+ Selector labels scoped to the main app Pod and its Service. Carries the
53
+ ``app.kubernetes.io/component: agentevals`` discriminator so the agentevals
54
+ Service does not also match the bundled Postgres Pod (which carries
55
+ ``app.kubernetes.io/component: database`` instead).
56
+ */ -}}
57
+ {{- define "agentevals.app.selectorLabels" -}}
58
+ {{ include "agentevals.selectorLabels" . }}
59
+ app.kubernetes.io/component: agentevals
60
+ {{- end }}
61
+
51
62
  {{- define "agentevals.serviceAccountName" -}}
52
63
  {{- if .Values.serviceAccount.create }}
53
64
  {{- default (include "agentevals.fullname" .) .Values.serviceAccount.name }}
@@ -55,3 +66,25 @@ app.kubernetes.io/instance: {{ .Release.Name }}
55
66
  {{- default "default" .Values.serviceAccount.name }}
56
67
  {{- end }}
57
68
  {{- end }}
69
+
70
+ {{/*
71
+ Service name for the bundled Postgres instance.
72
+ */}}
73
+ {{- define "agentevals.postgresqlServiceName" -}}
74
+ {{- printf "%s-postgresql" (include "agentevals.fullname" .) -}}
75
+ {{- end -}}
76
+
77
+ {{/*
78
+ Bundled Postgres image reference (registry/repository/name:tag).
79
+ */}}
80
+ {{- define "agentevals.postgresql.image" -}}
81
+ {{- $pg := .Values.database.postgres.bundled -}}
82
+ {{- printf "%s/%s/%s:%s" $pg.image.registry $pg.image.repository $pg.image.name $pg.image.tag -}}
83
+ {{- end -}}
84
+
85
+ {{/*
86
+ Secret name holding POSTGRES_PASSWORD for the bundled Postgres instance.
87
+ */}}
88
+ {{- define "agentevals.passwordSecretName" -}}
89
+ {{- printf "%s-postgresql" (include "agentevals.fullname" .) -}}
90
+ {{- end -}}
@@ -9,7 +9,7 @@ spec:
9
9
  replicas: {{ .Values.replicaCount }}
10
10
  selector:
11
11
  matchLabels:
12
- {{- include "agentevals.selectorLabels" . | nindent 6 }}
12
+ {{- include "agentevals.app.selectorLabels" . | nindent 6 }}
13
13
  template:
14
14
  metadata:
15
15
  {{- with .Values.podAnnotations }}
@@ -17,7 +17,7 @@ spec:
17
17
  {{- toYaml . | nindent 8 }}
18
18
  {{- end }}
19
19
  labels:
20
- {{- include "agentevals.selectorLabels" . | nindent 8 }}
20
+ {{- include "agentevals.app.selectorLabels" . | nindent 8 }}
21
21
  {{- with .Values.podLabels }}
22
22
  {{- toYaml . | nindent 8 }}
23
23
  {{- end }}
@@ -65,6 +65,29 @@ spec:
65
65
  - name: HOME
66
66
  value: "/tmp/agentevals-home"
67
67
  {{- end }}
68
+ {{- if eq .Values.storage.backend "postgres" }}
69
+ - name: AGENTEVALS_STORAGE_BACKEND
70
+ value: "postgres"
71
+ - name: AGENTEVALS_DATABASE_SCHEMA
72
+ value: {{ .Values.database.postgres.schema | quote }}
73
+ {{- if .Values.database.postgres.urlFile }}
74
+ - name: AGENTEVALS_DATABASE_URL_FILE
75
+ value: {{ .Values.database.postgres.urlFile | quote }}
76
+ {{- else if .Values.database.postgres.url }}
77
+ - name: AGENTEVALS_DATABASE_URL
78
+ value: {{ .Values.database.postgres.url | quote }}
79
+ {{- else if .Values.database.postgres.bundled.enabled }}
80
+ - name: POSTGRES_PASSWORD
81
+ valueFrom:
82
+ secretKeyRef:
83
+ name: {{ include "agentevals.passwordSecretName" . }}
84
+ key: POSTGRES_PASSWORD
85
+ - name: AGENTEVALS_DATABASE_URL
86
+ value: {{ printf "postgresql://agentevals:$(POSTGRES_PASSWORD)@%s.%s.svc.cluster.local:5432/agentevals?sslmode=disable" (include "agentevals.postgresqlServiceName" .) (include "agentevals.namespace" .) | quote }}
87
+ {{- else }}
88
+ {{ fail "storage.backend=postgres requires database.postgres.url, database.postgres.urlFile, or database.postgres.bundled.enabled=true" }}
89
+ {{- end }}
90
+ {{- end }}
68
91
  {{- with .Values.env }}
69
92
  {{- toYaml . | nindent 12 }}
70
93
  {{- end }}
@@ -0,0 +1,13 @@
1
+ {{- if and (eq .Values.storage.backend "postgres") .Values.database.postgres.bundled.enabled (not .Values.database.postgres.url) (not .Values.database.postgres.urlFile) }}
2
+ apiVersion: v1
3
+ kind: Secret
4
+ metadata:
5
+ name: {{ include "agentevals.passwordSecretName" . }}
6
+ namespace: {{ include "agentevals.namespace" . }}
7
+ labels:
8
+ {{- include "agentevals.labels" . | nindent 4 }}
9
+ app.kubernetes.io/component: database
10
+ type: Opaque
11
+ data:
12
+ POSTGRES_PASSWORD: {{ "agentevals" | b64enc | quote }}
13
+ {{- end }}
@@ -0,0 +1,142 @@
1
+ {{- if and (eq .Values.storage.backend "postgres") .Values.database.postgres.bundled.enabled (not .Values.database.postgres.url) (not .Values.database.postgres.urlFile) }}
2
+ {{- $pg := .Values.database.postgres.bundled }}
3
+ {{- $fullname := include "agentevals.postgresqlServiceName" . }}
4
+ ---
5
+ apiVersion: v1
6
+ kind: ServiceAccount
7
+ metadata:
8
+ name: {{ $fullname }}
9
+ namespace: {{ include "agentevals.namespace" . }}
10
+ labels:
11
+ {{- include "agentevals.labels" . | nindent 4 }}
12
+ app.kubernetes.io/component: database
13
+ ---
14
+ apiVersion: v1
15
+ kind: PersistentVolumeClaim
16
+ metadata:
17
+ name: {{ $fullname }}
18
+ namespace: {{ include "agentevals.namespace" . }}
19
+ labels:
20
+ {{- include "agentevals.labels" . | nindent 4 }}
21
+ app.kubernetes.io/component: database
22
+ spec:
23
+ accessModes:
24
+ - ReadWriteOnce
25
+ {{- if $pg.storageClassName }}
26
+ storageClassName: {{ $pg.storageClassName | quote }}
27
+ {{- end }}
28
+ resources:
29
+ requests:
30
+ storage: {{ $pg.storage | quote }}
31
+ ---
32
+ apiVersion: apps/v1
33
+ kind: Deployment
34
+ metadata:
35
+ name: {{ $fullname }}
36
+ namespace: {{ include "agentevals.namespace" . }}
37
+ labels:
38
+ {{- include "agentevals.labels" . | nindent 4 }}
39
+ app.kubernetes.io/component: database
40
+ spec:
41
+ replicas: 1
42
+ strategy:
43
+ type: Recreate
44
+ selector:
45
+ matchLabels:
46
+ {{- include "agentevals.selectorLabels" . | nindent 6 }}
47
+ app.kubernetes.io/component: database
48
+ template:
49
+ metadata:
50
+ labels:
51
+ {{- include "agentevals.selectorLabels" . | nindent 8 }}
52
+ app.kubernetes.io/component: database
53
+ spec:
54
+ {{- with .Values.imagePullSecrets }}
55
+ imagePullSecrets:
56
+ {{- toYaml . | nindent 8 }}
57
+ {{- end }}
58
+ serviceAccountName: {{ $fullname }}
59
+ securityContext:
60
+ fsGroup: 999
61
+ runAsUser: 999
62
+ runAsGroup: 999
63
+ runAsNonRoot: true
64
+ containers:
65
+ - name: postgresql
66
+ image: {{ include "agentevals.postgresql.image" . }}
67
+ imagePullPolicy: {{ $pg.image.pullPolicy }}
68
+ securityContext:
69
+ allowPrivilegeEscalation: false
70
+ ports:
71
+ - name: postgresql
72
+ containerPort: 5432
73
+ protocol: TCP
74
+ env:
75
+ - name: POSTGRES_DB
76
+ value: "agentevals"
77
+ - name: POSTGRES_USER
78
+ value: "agentevals"
79
+ - name: POSTGRES_PASSWORD
80
+ valueFrom:
81
+ secretKeyRef:
82
+ name: {{ include "agentevals.passwordSecretName" . }}
83
+ key: POSTGRES_PASSWORD
84
+ - name: PGDATA
85
+ value: /var/lib/postgresql/data/pgdata
86
+ livenessProbe:
87
+ exec:
88
+ command:
89
+ - pg_isready
90
+ - -U
91
+ - agentevals
92
+ - -d
93
+ - agentevals
94
+ initialDelaySeconds: 20
95
+ periodSeconds: 10
96
+ timeoutSeconds: 5
97
+ failureThreshold: 6
98
+ successThreshold: 1
99
+ readinessProbe:
100
+ exec:
101
+ command:
102
+ - pg_isready
103
+ - -U
104
+ - agentevals
105
+ - -d
106
+ - agentevals
107
+ initialDelaySeconds: 5
108
+ periodSeconds: 5
109
+ timeoutSeconds: 3
110
+ failureThreshold: 3
111
+ successThreshold: 1
112
+ {{- with $pg.resources }}
113
+ resources:
114
+ {{- toYaml . | nindent 12 }}
115
+ {{- end }}
116
+ volumeMounts:
117
+ - name: data
118
+ mountPath: /var/lib/postgresql/data
119
+ volumes:
120
+ - name: data
121
+ persistentVolumeClaim:
122
+ claimName: {{ $fullname }}
123
+ ---
124
+ apiVersion: v1
125
+ kind: Service
126
+ metadata:
127
+ name: {{ $fullname }}
128
+ namespace: {{ include "agentevals.namespace" . }}
129
+ labels:
130
+ {{- include "agentevals.labels" . | nindent 4 }}
131
+ app.kubernetes.io/component: database
132
+ spec:
133
+ type: ClusterIP
134
+ ports:
135
+ - name: postgresql
136
+ port: 5432
137
+ targetPort: postgresql
138
+ protocol: TCP
139
+ selector:
140
+ {{- include "agentevals.selectorLabels" . | nindent 4 }}
141
+ app.kubernetes.io/component: database
142
+ {{- end }}
@@ -25,4 +25,4 @@ spec:
25
25
  targetPort: mcp
26
26
  protocol: TCP
27
27
  selector:
28
- {{- include "agentevals.selectorLabels" . | nindent 4 }}
28
+ {{- include "agentevals.app.selectorLabels" . | nindent 4 }}
@@ -2,7 +2,10 @@
2
2
  # Global
3
3
  # ==============================================================================
4
4
 
5
- # -- Number of replicas. Only 1 is supported (no shared job state across pods).
5
+ # -- Number of replicas. The default in-memory backend has no shared state, so
6
+ # scale beyond 1 only when storage.backend is "postgres" (durable runs/results
7
+ # in Postgres are safe to share across replicas via SELECT FOR UPDATE SKIP
8
+ # LOCKED claim semantics).
6
9
  replicaCount: 1
7
10
 
8
11
  # -- Global container image registry (prepended to image.repository)
@@ -155,3 +158,73 @@ env: []
155
158
 
156
159
  # -- Extra envFrom sources (ConfigMapRef, SecretRef)
157
160
  envFrom: []
161
+
162
+ # ==============================================================================
163
+ # STORAGE (preview feature)
164
+ #
165
+ # Persistent run history backed by Postgres is under active development.
166
+ # storage.* and database.postgres.* keys, and the underlying schema, may
167
+ # change incompatibly in upcoming releases. Treat persisted runs and
168
+ # results as ephemeral; recreate the agentevals schema when upgrading
169
+ # between minor versions. Default in-memory backend is unaffected.
170
+ # ==============================================================================
171
+
172
+ storage:
173
+ # -- Storage backend. "memory" (default) keeps the developer experience
174
+ # zero-config: nothing persisted, restarts lose in-flight state. "postgres"
175
+ # enables /api/runs and persists runs + results in Postgres (preview).
176
+ backend: memory
177
+
178
+ # ==============================================================================
179
+ # DATABASE CONFIGURATION
180
+ # ==============================================================================
181
+ # Used only when storage.backend is "postgres". Priority order (first match wins):
182
+ # 1. database.postgres.urlFile -- file-based DSN (workload identity friendly)
183
+ # 2. database.postgres.url -- literal DSN
184
+ # 3. database.postgres.bundled -- chart-bundled Postgres (dev/eval only)
185
+ # If none is configured the chart fails to render.
186
+
187
+ database:
188
+ postgres:
189
+ # -- External Postgres connection string.
190
+ # When set, takes precedence over the bundled instance regardless of
191
+ # database.postgres.bundled.enabled.
192
+ url: ""
193
+ # -- Path to a file containing the connection string. Takes precedence
194
+ # over url when set. Useful for projected workload-identity tokens.
195
+ urlFile: ""
196
+ # -- Postgres schema to use for agentevals tables.
197
+ schema: agentevals
198
+ # -- Bundled Postgres instance for development and evaluation only.
199
+ # Not suitable for production. Deployed when enabled is true and url /
200
+ # urlFile are not set.
201
+ bundled:
202
+ # -- Set to true to deploy a chart-managed Postgres alongside the app.
203
+ # Off by default so the zero-config install stays in-memory.
204
+ enabled: false
205
+ image:
206
+ # -- Bundled Postgres image registry
207
+ registry: docker.io
208
+ # -- Bundled Postgres image repository (org/namespace)
209
+ repository: library
210
+ # -- Bundled Postgres image name
211
+ name: postgres
212
+ # -- Bundled Postgres image tag
213
+ tag: "18.3-alpine"
214
+ # -- Bundled Postgres image pull policy
215
+ pullPolicy: IfNotPresent
216
+ # -- PersistentVolumeClaim size for the bundled Postgres data
217
+ storage: 1Gi
218
+ # -- StorageClass for the PVC. Defaults to the cluster default when empty.
219
+ storageClassName: ""
220
+ # The database name, user, and password are hardcoded for the bundled
221
+ # instance (all: "agentevals"). This is intentional for a dev/eval
222
+ # setup. Switch to an external database for production.
223
+ # -- Resource requests/limits for the bundled Postgres container
224
+ resources:
225
+ requests:
226
+ cpu: 250m
227
+ memory: 256Mi
228
+ limits:
229
+ cpu: 500m
230
+ memory: 512Mi
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "agentevals-cli"
7
- version = "0.7.3"
7
+ version = "0.8.0"
8
8
  description = "Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -33,6 +33,9 @@ streaming = [
33
33
  openai = [
34
34
  "openai>=2.0",
35
35
  ]
36
+ postgres = [
37
+ "asyncpg>=0.30.0",
38
+ ]
36
39
 
37
40
  [project.scripts]
38
41
  agentevals = "agentevals.cli:main"
@@ -43,6 +46,9 @@ artifacts = ["src/agentevals/_static/**"]
43
46
  [tool.hatch.build.targets.wheel]
44
47
  packages = ["src/agentevals"]
45
48
 
49
+ [tool.hatch.build.targets.wheel.force-include]
50
+ "src/agentevals/storage/postgres/migrations" = "agentevals/storage/postgres/migrations"
51
+
46
52
  [tool.uv.workspace]
47
53
  members = ["packages/evaluator-sdk-py"]
48
54
 
@@ -16,13 +16,20 @@ from fastapi.responses import StreamingResponse
16
16
 
17
17
  from agentevals import __version__
18
18
 
19
+ from ..run.service import RunService
20
+ from ..run.worker import AsyncRunWorker
21
+ from ..storage import StorageSettings, build_repos
22
+ from ..storage.postgres.migrator import Migrator
19
23
  from ..utils.log_buffer import log_buffer
20
24
  from .debug_routes import debug_router
21
25
  from .routes import router
26
+ from .runs_routes import runs_router
22
27
 
23
28
  if TYPE_CHECKING:
24
29
  from ..streaming.ws_server import StreamingTraceManager
25
30
 
31
+ logger = logging.getLogger(__name__)
32
+
26
33
  try:
27
34
  from dotenv import load_dotenv
28
35
 
@@ -51,7 +58,39 @@ def _build_lifespan():
51
58
  mgr = getattr(app.state, "trace_manager", None)
52
59
  if mgr:
53
60
  mgr.start_cleanup_task()
61
+
62
+ storage_settings: StorageSettings | None = None
63
+ worker: AsyncRunWorker | None = None
64
+ try:
65
+ storage_settings = StorageSettings.from_env()
66
+ except Exception as exc:
67
+ logger.error("Storage configuration invalid; /api/runs will not be available: %s", exc)
68
+
69
+ if storage_settings is not None and storage_settings.backend == "postgres":
70
+ logger.info("Applying any pending migrations to schema '%s'", storage_settings.schema_name)
71
+ migrator = Migrator(
72
+ dsn=storage_settings.database_url or "",
73
+ schema=storage_settings.schema_name,
74
+ lock_timeout_s=storage_settings.migrate_lock_timeout_s,
75
+ )
76
+ await migrator.up()
77
+
78
+ repos = await build_repos(storage_settings)
79
+ app.state.storage_settings = storage_settings
80
+ app.state.repos = repos
81
+ app.state.run_service = RunService(repos.runs, repos.results)
82
+
83
+ worker = AsyncRunWorker(runs=repos.runs, results=repos.results, settings=storage_settings)
84
+ await worker.start()
85
+ app.state.run_worker = worker
86
+
54
87
  yield
88
+
89
+ if worker is not None:
90
+ await worker.stop()
91
+ repos = getattr(app.state, "repos", None)
92
+ if repos is not None:
93
+ await repos.close()
55
94
  if mgr:
56
95
  await mgr.shutdown()
57
96
  ae_logger.removeHandler(log_buffer)
@@ -70,6 +109,18 @@ def create_app(
70
109
  version=__version__,
71
110
  description="REST API for evaluating agent traces using ADK's scoring framework",
72
111
  lifespan=_build_lifespan(),
112
+ openapi_tags=[
113
+ {
114
+ "name": "runs",
115
+ "description": (
116
+ "**Preview.** Async run pipeline backed by Postgres. The shape of "
117
+ "submission, responses, and persisted run / result data may change "
118
+ "incompatibly in upcoming releases. Operators evaluating this "
119
+ "surface should plan to recreate persisted data when upgrading "
120
+ "agentevals between minor versions."
121
+ ),
122
+ },
123
+ ],
73
124
  )
74
125
 
75
126
  app.add_middleware(
@@ -83,6 +134,7 @@ def create_app(
83
134
 
84
135
  app.include_router(router, prefix="/api")
85
136
  app.include_router(debug_router, prefix="/api/debug")
137
+ app.include_router(runs_router, prefix="/api")
86
138
 
87
139
  if trace_manager is not None:
88
140
  app.state.trace_manager = trace_manager