agentevals-cli 0.5.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/workflows/release.yml +18 -6
  2. agentevals_cli-0.6.0/PKG-INFO +333 -0
  3. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/README.md +78 -19
  4. agentevals_cli-0.6.0/docs/assets/logo-color-on-transparent.svg +13 -0
  5. agentevals_cli-0.6.0/docs/assets/logo-dark-on-transparent.svg +13 -0
  6. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/docs/custom-evaluators.md +82 -35
  7. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/custom_evaluators/eval_config.yaml +0 -1
  8. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/pyproject.toml +5 -1
  9. agentevals_cli-0.6.0/src/agentevals/_static/assets/index-BqibLiHO.css +1 -0
  10. agentevals_cli-0.6.0/src/agentevals/_static/assets/index-Dz2NgC8m.js +343 -0
  11. agentevals_cli-0.6.0/src/agentevals/_static/index.html +14 -0
  12. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/routes.py +2 -0
  13. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/config.py +41 -1
  14. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/custom_evaluators.py +45 -11
  15. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/eval_config_loader.py +3 -1
  16. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/evaluator/sources.py +23 -3
  17. agentevals_cli-0.6.0/src/agentevals/evaluator/venv.py +119 -0
  18. agentevals_cli-0.6.0/src/agentevals/openai_eval_backend.py +246 -0
  19. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/output.py +21 -4
  20. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/runner.py +6 -0
  21. agentevals_cli-0.6.0/tests/test_output.py +112 -0
  22. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_runner.py +4 -0
  23. agentevals_cli-0.6.0/ui/public/logo.svg +13 -0
  24. agentevals_cli-0.6.0/ui/public/vite.svg +1 -0
  25. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/uv.lock +6 -2
  26. agentevals_cli-0.5.2/PKG-INFO +0 -22
  27. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.claude/skills/eval/SKILL.md +0 -0
  28. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.claude/skills/eval/evals/evals.json +0 -0
  29. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.claude/skills/inspect/SKILL.md +0 -0
  30. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.claude/skills/inspect/evals/evals.json +0 -0
  31. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  32. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  33. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  34. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/workflows/ci.yml +0 -0
  35. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.github/workflows/publish-evaluator-sdk.yml +0 -0
  36. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.gitignore +0 -0
  37. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/.mcp.json +0 -0
  38. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/CONTRIBUTING.md +0 -0
  39. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/DEVELOPMENT.md +0 -0
  40. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/LICENSE +0 -0
  41. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/Makefile +0 -0
  42. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/docs/assets/logo-color.png +0 -0
  43. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/docs/eval-set-format.md +0 -0
  44. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/docs/otel-compatibility.md +0 -0
  45. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/docs/streaming.md +0 -0
  46. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/README.md +0 -0
  47. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/custom_evaluators/response_quality.py +0 -0
  48. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/custom_evaluators/tool_call_checker.py +0 -0
  49. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/dice_agent/README.md +0 -0
  50. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/dice_agent/agent.py +0 -0
  51. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/dice_agent/eval_set.json +0 -0
  52. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/dice_agent/main.py +0 -0
  53. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/dice_agent/test_streaming.py +0 -0
  54. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/README.md +0 -0
  55. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/agent.py +0 -0
  56. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/eval_set.json +0 -0
  57. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/main.py +0 -0
  58. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/requirements.txt +0 -0
  59. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/langchain_agent/test_streaming.py +0 -0
  60. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/sdk_example/async_example.py +0 -0
  61. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/sdk_example/context_manager_example.py +0 -0
  62. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/sdk_example/decorator_example.py +0 -0
  63. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/sdk_example/requirements.txt +0 -0
  64. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/strands_agent/agent.py +0 -0
  65. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/strands_agent/eval_set.json +0 -0
  66. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/strands_agent/main.py +0 -0
  67. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/strands_agent/requirements.txt +0 -0
  68. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/adk/requirements.txt +0 -0
  69. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/adk/run.py +0 -0
  70. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/langchain/requirements.txt +0 -0
  71. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/langchain/run.py +0 -0
  72. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/strands/requirements.txt +0 -0
  73. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/examples/zero-code-examples/strands/run.py +0 -0
  74. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/flake.lock +0 -0
  75. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/flake.nix +0 -0
  76. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/packages/evaluator-sdk-py/README.md +0 -0
  77. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/packages/evaluator-sdk-py/pyproject.toml +0 -0
  78. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/__init__.py +0 -0
  79. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/decorator.py +0 -0
  80. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/packages/evaluator-sdk-py/src/agentevals_evaluator_sdk/types.py +0 -0
  81. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/eval_set_helm.json +0 -0
  82. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/evalset_helm_3_2026-02-23.json +0 -0
  83. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/evalset_k8s_2026-02-20.json +0 -0
  84. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/helm.json +0 -0
  85. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/helm_2.json +0 -0
  86. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/helm_3.json +0 -0
  87. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/samples/k8s.json +0 -0
  88. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/__init__.py +0 -0
  89. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/_protocol.py +0 -0
  90. {agentevals_cli-0.5.2/ui/public → agentevals_cli-0.6.0/src/agentevals/_static}/logo.svg +0 -0
  91. {agentevals_cli-0.5.2/ui/public → agentevals_cli-0.6.0/src/agentevals/_static}/vite.svg +0 -0
  92. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/__init__.py +0 -0
  93. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/app.py +0 -0
  94. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/debug_routes.py +0 -0
  95. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/models.py +0 -0
  96. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/otlp_app.py +0 -0
  97. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/otlp_routes.py +0 -0
  98. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/api/streaming_routes.py +0 -0
  99. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/builtin_metrics.py +0 -0
  100. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/cli.py +0 -0
  101. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/converter.py +0 -0
  102. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/evaluator/__init__.py +0 -0
  103. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/evaluator/resolver.py +0 -0
  104. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/evaluator/templates.py +0 -0
  105. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/extraction.py +0 -0
  106. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/genai_converter.py +0 -0
  107. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/loader/__init__.py +0 -0
  108. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/loader/base.py +0 -0
  109. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/loader/jaeger.py +0 -0
  110. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/loader/otlp.py +0 -0
  111. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/mcp_server.py +0 -0
  112. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/sdk.py +0 -0
  113. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/streaming/__init__.py +0 -0
  114. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/streaming/incremental_processor.py +0 -0
  115. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/streaming/processor.py +0 -0
  116. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/streaming/session.py +0 -0
  117. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/streaming/ws_server.py +0 -0
  118. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/trace_attrs.py +0 -0
  119. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/trace_metrics.py +0 -0
  120. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/utils/__init__.py +0 -0
  121. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/utils/genai_messages.py +0 -0
  122. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/utils/log_buffer.py +0 -0
  123. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/src/agentevals/utils/log_enrichment.py +0 -0
  124. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/__init__.py +0 -0
  125. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/conftest.py +0 -0
  126. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/test_evaluation_pipeline.py +0 -0
  127. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/test_live_agents.py +0 -0
  128. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/test_session_grouping.py +0 -0
  129. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/integration/test_timing_stress.py +0 -0
  130. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_api.py +0 -0
  131. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_converter.py +0 -0
  132. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_extraction.py +0 -0
  133. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_genai_converter.py +0 -0
  134. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_jaeger_loader.py +0 -0
  135. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_log_enrichment.py +0 -0
  136. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_otlp_loader.py +0 -0
  137. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_otlp_receiver.py +0 -0
  138. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_protocol.py +0 -0
  139. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/tests/test_sdk.py +0 -0
  140. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/.gitignore +0 -0
  141. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/README.md +0 -0
  142. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/eslint.config.js +0 -0
  143. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/index.html +0 -0
  144. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/package-lock.json +0 -0
  145. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/package.json +0 -0
  146. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/App.css +0 -0
  147. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/App.tsx +0 -0
  148. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/api/client.ts +0 -0
  149. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/assets/react.svg +0 -0
  150. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/annotation-queue/AnnotationDetailPanel.tsx +0 -0
  151. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/annotation-queue/AnnotationQueueView.tsx +0 -0
  152. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/annotation-queue/AnnotationTable.tsx +0 -0
  153. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/bug-report/BugReportModal.tsx +0 -0
  154. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/BuilderHeader.tsx +0 -0
  155. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/BuilderView.tsx +0 -0
  156. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/EvalCaseCard.tsx +0 -0
  157. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/EvalCasesList.tsx +0 -0
  158. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/InvocationEditor.tsx +0 -0
  159. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/JsonPreview.tsx +0 -0
  160. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/MetadataEditor.tsx +0 -0
  161. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/TraceUploadZone.tsx +0 -0
  162. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/builder/index.ts +0 -0
  163. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/DashboardView.tsx +0 -0
  164. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/MetricScoreCard.tsx +0 -0
  165. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/PerformanceCard.tsx +0 -0
  166. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/PerformanceCharts.tsx +0 -0
  167. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/SummaryStats.tsx +0 -0
  168. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/TraceCard.tsx +0 -0
  169. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/dashboard/TraceTable.tsx +0 -0
  170. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/ComparisonPanel.tsx +0 -0
  171. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/DataSection.tsx +0 -0
  172. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/InspectorHeader.tsx +0 -0
  173. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/InspectorLayout.tsx +0 -0
  174. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/InspectorView.tsx +0 -0
  175. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/InvocationCard.tsx +0 -0
  176. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/InvocationSummaryPanel.tsx +0 -0
  177. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/MetricResultsSection.tsx +0 -0
  178. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/MetricsComparisonSection.tsx +0 -0
  179. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/PerformanceSection.tsx +0 -0
  180. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/ToolCallList.tsx +0 -0
  181. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/inspector/TrajectoryComparisonDetails.tsx +0 -0
  182. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/sidebar/Sidebar.tsx +0 -0
  183. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/streaming/LiveConversationPanel.tsx +0 -0
  184. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/streaming/LiveMessage.tsx +0 -0
  185. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/streaming/LiveStreamingView.tsx +0 -0
  186. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/streaming/SessionCard.tsx +0 -0
  187. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/streaming/SessionMetadata.tsx +0 -0
  188. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/EvalSetEditorDrawer.tsx +0 -0
  189. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/FileDropZone.tsx +0 -0
  190. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/MetricSelector.tsx +0 -0
  191. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/RawJsonPreview.tsx +0 -0
  192. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/TraceEditorDrawer.tsx +0 -0
  193. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/upload/UploadView.tsx +0 -0
  194. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/components/welcome/WelcomeView.tsx +0 -0
  195. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/config.ts +0 -0
  196. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/context/TraceContext.tsx +0 -0
  197. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/context/TraceProvider.tsx +0 -0
  198. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/index.css +0 -0
  199. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/console-capture.ts +0 -0
  200. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/evalset-builder.ts +0 -0
  201. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/network-capture.ts +0 -0
  202. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/trace-converter.ts +0 -0
  203. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/trace-loader.ts +0 -0
  204. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/trace-metadata.ts +0 -0
  205. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/trace-patcher.ts +0 -0
  206. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/types.ts +0 -0
  207. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/lib/utils.ts +0 -0
  208. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/src/main.tsx +0 -0
  209. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/tsconfig.app.json +0 -0
  210. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/tsconfig.json +0 -0
  211. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/tsconfig.node.json +0 -0
  212. {agentevals_cli-0.5.2 → agentevals_cli-0.6.0}/ui/vite.config.ts +0 -0
@@ -61,19 +61,31 @@ jobs:
61
61
  needs: build
62
62
  runs-on: ubuntu-latest
63
63
  steps:
64
- - name: 'Checkout GitHub Action'
65
- uses: actions/checkout@main
64
+ - uses: actions/checkout@v6
65
+
66
+ - uses: astral-sh/setup-uv@v7
67
+ with:
68
+ enable-cache: true
66
69
 
67
- - name: Install uv
68
- uses: astral-sh/setup-uv@v6
70
+ - uses: actions/setup-node@v6
71
+ with:
72
+ node-version: '22'
73
+ cache: npm
74
+ cache-dependency-path: ui/package-lock.json
69
75
 
70
- # Repo root cwd: uv build puts artifacts in ./dist; uv publish looks for dist/* relative to cwd.
71
- - name: 'Release Python Packages'
76
+ # Same bundle as `make release` / `build-bundle`: wheel must include ui/dist in src/agentevals/_static
77
+ # (see [tool.hatch.build] artifacts in pyproject.toml).
78
+ - name: Release Python package (wheel + sdist with bundled UI)
72
79
  env:
73
80
  VERSION: ${{ github.event.inputs.tag || github.ref_name }}
74
81
  run: |
75
82
  uv sync --package agentevals-cli --all-extras
76
83
  uv version "$VERSION" --package agentevals-cli
77
84
 
85
+ make build-ui
86
+ rm -rf src/agentevals/_static
87
+ cp -r ui/dist src/agentevals/_static
88
+ rm -rf dist
78
89
  uv build --package agentevals-cli
79
90
  uv publish dist/* --token ${{ secrets.PYPI_TOKEN }}
91
+ rm -rf src/agentevals/_static
@@ -0,0 +1,333 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentevals-cli
3
+ Version: 0.6.0
4
+ Summary: Standalone framework to evaluate agent correctness based on portable OpenTelemetry traces
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: click>=8.0
8
+ Requires-Dist: fastapi>=0.115.0
9
+ Requires-Dist: google-adk[eval]>=1.25.0
10
+ Requires-Dist: httpx>=0.27.0
11
+ Requires-Dist: opentelemetry-proto>=1.36.0
12
+ Requires-Dist: python-dotenv>=1.0.0
13
+ Requires-Dist: python-multipart>=0.0.12
14
+ Requires-Dist: pyyaml>=6.0
15
+ Requires-Dist: tabulate>=0.9.0
16
+ Requires-Dist: uvicorn[standard]>=0.32.0
17
+ Provides-Extra: live
18
+ Requires-Dist: httpx>=0.27.0; extra == 'live'
19
+ Requires-Dist: mcp>=1.26.0; extra == 'live'
20
+ Provides-Extra: openai
21
+ Requires-Dist: openai>=2.0; extra == 'openai'
22
+ Provides-Extra: streaming
23
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'streaming'
24
+ Requires-Dist: websockets>=12.0; extra == 'streaming'
25
+ Description-Content-Type: text/markdown
26
+
27
+ <p align="center">
28
+ <picture>
29
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
30
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
31
+ <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
32
+ </picture>
33
+ </p>
34
+
35
+ <h1 align="center">Ship Agents Reliably</h1>
36
+
37
+ <p align="center">
38
+ Benchmark your agents before they hit production.<br>
39
+ agentevals scores performance and inference quality from OpenTelemetry traces — no re-runs, no guesswork.
40
+ </p>
41
+
42
+ <p align="center">
43
+ <a href="https://github.com/agentevals-dev/agentevals/stargazers"><img src="https://img.shields.io/github/stars/agentevals-dev/agentevals?style=social" alt="GitHub Stars"></a>
44
+ &nbsp;
45
+ <a href="https://discord.gg/cpveEn8Ah2"><img src="https://img.shields.io/discord/1435836734666707190?label=Discord&logo=discord&logoColor=white&color=5865F2" alt="Discord"></a>
46
+ &nbsp;
47
+ <a href="https://github.com/agentevals-dev/agentevals/releases"><img src="https://img.shields.io/github/v/release/agentevals-dev/agentevals?label=Release" alt="Release"></a>
48
+ &nbsp;
49
+ <a href="https://github.com/agentevals-dev/agentevals/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-green.svg" alt="License"></a>
50
+ &nbsp;
51
+ <a href="https://pypi.org/project/agentevals-cli/"><img src="https://img.shields.io/pypi/v/agentevals-cli?label=PyPI&color=blue" alt="PyPI"></a>
52
+ </p>
53
+
54
+ <p align="center">
55
+ <a href="#installation">Install</a> · <a href="#quick-start">Quick Start</a> · <a href="https://github.com/agentevals-dev/agentevals/releases">Releases</a> · <a href="CONTRIBUTING.md">Contributing</a> · <a href="https://discord.gg/cpveEn8Ah2">Discord</a>
56
+ </p>
57
+
58
+ ---
59
+
60
+ ## What is agentevals?
61
+
62
+ agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want — no re-runs, no guesswork.
63
+
64
+ It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others), supports Jaeger JSON and OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
65
+
66
+ - **CLI** for scripting and CI pipelines
67
+ - **Web UI** for visual inspection and local developer experience
68
+ - **MCP server** so MCP clients can run evaluations from a conversation
69
+
70
+ ## Why agentevals?
71
+
72
+ Most evaluation tools require you to **re-execute your agent** for every test — burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
73
+
74
+ - **No re-execution** — score agents from existing traces without replaying expensive LLM calls
75
+ - **Framework-agnostic** — works with any agent framework that emits OpenTelemetry spans
76
+ - **Golden eval sets** — compare actual behavior against defined expected behaviors for deterministic pass/fail gating
77
+ - **Custom evaluators** — write scoring logic in Python, JavaScript, or any language
78
+ - **CI/CD ready** — gate deployments on quality thresholds directly in your pipeline
79
+ - **Local-first** — no cloud dependency required; everything runs on your machine
80
+
81
+ ## How It Works
82
+
83
+ agentevals follows three simple steps:
84
+
85
+ 1. **Collect traces** — Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
86
+ 2. **Define eval sets** — Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
87
+ 3. **Run evaluations** — Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
88
+
89
+
90
+ > [!IMPORTANT]
91
+ > This project is under active development. Expect breaking changes.
92
+
93
+ ## Contents
94
+
95
+ - [Installation](#installation)
96
+ - [Quick Start](#quick-start)
97
+ - [Integration](#integration)
98
+ - [CLI](#cli)
99
+ - [Custom Evaluators](#custom-evaluators)
100
+ - [Web UI](#web-ui)
101
+ - [REST API Reference](#rest-api-reference)
102
+ - [MCP Server](#mcp-server)
103
+ - [Claude Code Skills](#claude-code-skills)
104
+ - [Docs](#docs)
105
+ - [Development](#development)
106
+ - [FAQ](#faq)
107
+
108
+ ## Installation
109
+
110
+ **From PyPI** (recommended): the published package includes the **CLI**, **REST API**, and **embedded web UI**.
111
+
112
+ ```bash
113
+ pip install agentevals-cli
114
+ ```
115
+
116
+ Optional extras:
117
+
118
+ ```bash
119
+ pip install "agentevals-cli[live]" # MCP server support
120
+ pip install "agentevals-cli[openai]" # OpenAI Evals API graders
121
+ ```
122
+
123
+ **GitHub [releases](../../releases)** also ship **core** wheels (CLI and API only) and **bundle** wheels (with the embedded UI) if you need a specific version or offline `pip install ./path/to.whl`.
124
+
125
+ **From source** with `uv` or Nix:
126
+
127
+ ```bash
128
+ uv sync
129
+ # or: nix develop .
130
+ ```
131
+
132
+ See [DEVELOPMENT.md](DEVELOPMENT.md) for build instructions.
133
+
134
+ ## Quick Start
135
+
136
+ Examples use `agentevals` on your PATH after `pip install agentevals-cli`. If you are working from a clone of this repo, use `uv run agentevals` instead.
137
+
138
+ Run an evaluation against a sample trace:
139
+
140
+ ```bash
141
+ agentevals run samples/helm.json \
142
+ --eval-set samples/eval_set_helm.json \
143
+ -m tool_trajectory_avg_score
144
+ ```
145
+
146
+ List available evaluators:
147
+
148
+ ```bash
149
+ agentevals evaluator list
150
+ ```
151
+
152
+ ## Integration
153
+
154
+ ### Zero-Code (Recommended)
155
+
156
+ Point any OTel-instrumented agent at the receiver. No SDK, no code changes:
157
+
158
+ ```bash
159
+ # Terminal 1
160
+ agentevals serve --dev
161
+
162
+ # Terminal 2
163
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
164
+ export OTEL_RESOURCE_ATTRIBUTES="agentevals.session_name=my-agent"
165
+ python your_agent.py
166
+ ```
167
+
168
+ Traces stream to the UI in real-time. Works with LangChain, Strands, Google ADK, or any framework that emits OTel spans (`http/protobuf` and `http/json` supported). Sessions are auto-created and grouped by `agentevals.session_name`. Set `agentevals.eval_set_id` to associate traces with an eval set.
169
+
170
+ See [examples/zero-code-examples/](examples/zero-code-examples/) for working examples.
171
+
172
+ ### SDK
173
+
174
+ For programmatic session lifecycle and decorator API:
175
+
176
+ ```python
177
+ from agentevals import AgentEvals
178
+
179
+ app = AgentEvals()
180
+
181
+ with app.session(eval_set_id="my-eval"):
182
+ agent.invoke("Roll a 20-sided die for me")
183
+ ```
184
+
185
+ Requires `pip install "agentevals-cli[streaming]"`. See [examples/sdk_example/](examples/sdk_example/) for framework-specific patterns.
186
+
187
+ ## CLI
188
+
189
+ ```bash
190
+ # Single trace
191
+ agentevals run samples/helm.json \
192
+ --eval-set samples/eval_set_helm.json \
193
+ -m tool_trajectory_avg_score
194
+
195
+ # Multiple traces
196
+ agentevals run samples/helm.json samples/k8s.json \
197
+ --eval-set samples/eval_set_helm.json \
198
+ -m tool_trajectory_avg_score
199
+
200
+ # JSON output
201
+ agentevals run samples/helm.json \
202
+ --eval-set samples/eval_set_helm.json \
203
+ --output json
204
+
205
+ # List available evaluators (builtin + community)
206
+ agentevals evaluator list
207
+
208
+ # List only builtin evaluators
209
+ agentevals evaluator list --source builtin
210
+ ```
211
+
212
+ ## Custom Evaluators
213
+
214
+ Beyond the built-in metrics, you can write your own evaluators in Python, JavaScript, or any language. An evaluator is any program that reads JSON from stdin and writes a score to stdout.
215
+
216
+ ```bash
217
+ agentevals evaluator init my_evaluator
218
+ ```
219
+
220
+ This scaffolds a directory with boilerplate and a manifest. You can also list supported runtimes and generate config snippets:
221
+
222
+ ```bash
223
+ agentevals evaluator runtimes # show supported languages
224
+ agentevals evaluator config my_evaluator --path ./evaluators/my_evaluator.py
225
+ ```
226
+
227
+ Implement your scoring logic, then reference it in an eval config:
228
+
229
+ ```yaml
230
+ # eval_config.yaml
231
+ evaluators:
232
+ - name: tool_trajectory_avg_score
233
+ type: builtin
234
+
235
+ - name: my_evaluator
236
+ type: code
237
+ path: ./evaluators/my_evaluator.py
238
+ threshold: 0.7
239
+ ```
240
+
241
+ ```bash
242
+ agentevals run trace.json --config eval_config.yaml --eval-set eval_set.json
243
+ ```
244
+
245
+ Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. You can also delegate grading to the [OpenAI Evals API](https://developers.openai.com/api/reference/resources/evals/methods/create) using `type: openai_eval` (requires `pip install "agentevals-cli[openai]"` and `OPENAI_API_KEY`). See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
246
+
247
+ ## Web UI
248
+
249
+ **Installed bundle** (port 8001):
250
+
251
+ ```bash
252
+ agentevals serve
253
+ ```
254
+
255
+ **From source** (two terminals):
256
+
257
+ ```bash
258
+ uv run agentevals serve --dev # Terminal 1
259
+ cd ui && npm install && npm run dev # Terminal 2 → http://localhost:5173
260
+ ```
261
+
262
+ Upload traces and eval sets, select metrics, and view results with interactive span trees. Live-streamed traces appear in the "Local Dev" tab, grouped by session ID.
263
+
264
+ ## REST API Reference
265
+
266
+ While the server is running, interactive API documentation is available at:
267
+
268
+ | Endpoint | Description |
269
+ |----------|-------------|
270
+ | [`/docs`](http://localhost:8001/docs) | Swagger UI with interactive request builder |
271
+ | [`/redoc`](http://localhost:8001/redoc) | ReDoc reference documentation |
272
+ | [`/openapi.json`](http://localhost:8001/openapi.json) | Raw OpenAPI 3.x schema (for code generation or CI) |
273
+
274
+ The OTLP receiver (port 4318) serves its own docs at `http://localhost:4318/docs`.
275
+
276
+ ## MCP Server
277
+
278
+ Exposes evaluation tools to MCP clients. A `.mcp.json` at the project root lets Claude Code pick it up automatically.
279
+
280
+ | Tool | Requires `serve` | Description |
281
+ |------|:---:|-------------|
282
+ | `list_metrics` | yes | List available metrics |
283
+ | `evaluate_traces` | no | Evaluate local trace files (OTLP or Jaeger) |
284
+ | `list_sessions` | yes | List streaming sessions |
285
+ | `summarize_session` | yes | Structured summary of a session's tool calls |
286
+ | `evaluate_sessions` | yes | Evaluate sessions against a golden reference |
287
+
288
+ ```bash
289
+ # Custom server URL (requires pip install "agentevals-cli[live]")
290
+ AGENTEVALS_SERVER_URL=http://localhost:9000 agentevals mcp
291
+ ```
292
+
293
+ The React UI and MCP server share the same in-memory session state and can run simultaneously.
294
+
295
+ ## Claude Code Skills
296
+
297
+ Two slash-command workflows in `.claude/skills/`, available automatically in this repo:
298
+
299
+ | Skill | What it does |
300
+ |-------|-------------|
301
+ | `/eval` | Score traces or compare sessions against a golden reference |
302
+ | `/inspect` | Turn-by-turn narrative of a live session with anomaly detection |
303
+
304
+ ## Docs
305
+
306
+ | Guide | Description |
307
+ |-------|-------------|
308
+ | [Eval Set Format](docs/eval-set-format.md) | Schema, field reference, and examples for golden eval set JSON files |
309
+ | [Custom Evaluators](docs/custom-evaluators.md) | Write your own scoring logic in Python, JavaScript, or any language |
310
+ | [Live Streaming](docs/streaming.md) | Real-time trace streaming, dev server setup, and session management |
311
+ | [OpenTelemetry Compatibility](docs/otel-compatibility.md) | Supported OTel conventions, message delivery mechanisms, and OTLP receiver |
312
+
313
+ ## Development
314
+
315
+ ```bash
316
+ uv run pytest # run tests
317
+ uv run agentevals serve --dev # backend
318
+ cd ui && npm run dev # frontend (separate terminal)
319
+ ```
320
+
321
+ See [DEVELOPMENT.md](DEVELOPMENT.md) for build tiers, Makefile targets, and Nix setup. To contribute, see [CONTRIBUTING.md](CONTRIBUTING.md).
322
+
323
+ ## FAQ
324
+
325
+ **How does this compare to ADK's evaluations?**
326
+ Unlike ADK's LocalEvalService, which couples agent execution with evaluation, agentevals only handles scoring: it takes pre-recorded traces and compares them against expected behavior using metrics like tool trajectory matching, response quality, and LLM-based judgments.
327
+
328
+ However, if you're iterating on your agents locally, you can point your agents to agentevals and you will see rich runtime information in your browser. For more details, use the bundled wheel and explore the Local Development option in the UI.
329
+
330
+ **How does this compare to Bedrock AgentCore's evaluation?**
331
+ AgentCore's evaluation integration (via `strands-agents-evals`) also couples agent execution with evaluation. It re-invokes the agent for each test case, converts the resulting OTel spans to AWS's ADOT format, and scores them against 4 built-in evaluators (Helpfulness, Accuracy, Harmfulness, Relevance) via a cloud API call. This means you need an AWS account, valid credentials, and network access for every evaluation.
332
+
333
+ agentevals takes a different approach: it scores pre-recorded traces locally without re-running anything. It works with standard Jaeger JSON and OTLP formats from any framework, supports open-ended metrics (tool trajectory matching, LLM-based judges, custom scorers), and ships with a CLI, web UI, and MCP server. No cloud dependency required, though we do include all ADK's GCP-based evals as of now.
@@ -1,15 +1,66 @@
1
1
  <p align="center">
2
- <img src="docs/assets/logo-color.png" alt="agentevals" width="420" />
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-color-on-transparent.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-dark-on-transparent.svg">
5
+ <img src="docs/assets/logo-color-on-transparent.svg" alt="agentevals" width="420" />
6
+ </picture>
3
7
  </p>
4
8
 
5
- `agentevals` evaluates AI agent behavior from OpenTelemetry traces, without re-running the agent. Record once, score as many times as you want.
9
+ <h1 align="center">Ship Agents Reliably</h1>
6
10
 
7
- Works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others). Supports Jaeger JSON and OTLP trace formats, built-in and custom evaluators, and LLM-based judges.
11
+ <p align="center">
12
+ Benchmark your agents before they hit production.<br>
13
+ agentevals scores performance and inference quality from OpenTelemetry traces — no re-runs, no guesswork.
14
+ </p>
15
+
16
+ <p align="center">
17
+ <a href="https://github.com/agentevals-dev/agentevals/stargazers"><img src="https://img.shields.io/github/stars/agentevals-dev/agentevals?style=social" alt="GitHub Stars"></a>
18
+ &nbsp;
19
+ <a href="https://discord.gg/cpveEn8Ah2"><img src="https://img.shields.io/discord/1435836734666707190?label=Discord&logo=discord&logoColor=white&color=5865F2" alt="Discord"></a>
20
+ &nbsp;
21
+ <a href="https://github.com/agentevals-dev/agentevals/releases"><img src="https://img.shields.io/github/v/release/agentevals-dev/agentevals?label=Release" alt="Release"></a>
22
+ &nbsp;
23
+ <a href="https://github.com/agentevals-dev/agentevals/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-Apache%202.0-green.svg" alt="License"></a>
24
+ &nbsp;
25
+ <a href="https://pypi.org/project/agentevals-cli/"><img src="https://img.shields.io/pypi/v/agentevals-cli?label=PyPI&color=blue" alt="PyPI"></a>
26
+ </p>
27
+
28
+ <p align="center">
29
+ <a href="#installation">Install</a> · <a href="#quick-start">Quick Start</a> · <a href="https://github.com/agentevals-dev/agentevals/releases">Releases</a> · <a href="CONTRIBUTING.md">Contributing</a> · <a href="https://discord.gg/cpveEn8Ah2">Discord</a>
30
+ </p>
31
+
32
+ ---
33
+
34
+ ## What is agentevals?
35
+
36
+ agentevals is a framework-agnostic evaluation solution that scores AI agent behavior directly from [OpenTelemetry](https://opentelemetry.io/) traces. Record your agent's actions once, then evaluate as many times as you want — no re-runs, no guesswork.
37
+
38
+ It works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and others), supports Jaeger JSON and OTLP trace formats, and ships with built-in evaluators, custom evaluator support, and LLM-based judges.
8
39
 
9
40
  - **CLI** for scripting and CI pipelines
10
41
  - **Web UI** for visual inspection and local developer experience
11
42
  - **MCP server** so MCP clients can run evaluations from a conversation
12
43
 
44
+ ## Why agentevals?
45
+
46
+ Most evaluation tools require you to **re-execute your agent** for every test — burning tokens, time, and money on duplicate LLM calls. agentevals takes a different approach:
47
+
48
+ - **No re-execution** — score agents from existing traces without replaying expensive LLM calls
49
+ - **Framework-agnostic** — works with any agent framework that emits OpenTelemetry spans
50
+ - **Golden eval sets** — compare actual behavior against defined expected behaviors for deterministic pass/fail gating
51
+ - **Custom evaluators** — write scoring logic in Python, JavaScript, or any language
52
+ - **CI/CD ready** — gate deployments on quality thresholds directly in your pipeline
53
+ - **Local-first** — no cloud dependency required; everything runs on your machine
54
+
55
+ ## How It Works
56
+
57
+ agentevals follows three simple steps:
58
+
59
+ 1. **Collect traces** — Instrument your agent with OpenTelemetry (or export traces from your tracing backend). Point the OTLP exporter at the agentevals receiver, or load trace files directly.
60
+ 2. **Define eval sets** — Create golden evaluation sets that describe expected agent behavior: which tools should be called, in what order, and what the output should look like.
61
+ 3. **Run evaluations** — Use the CLI, Web UI, or MCP server to score traces against your eval sets. Get per-metric scores, pass/fail results, and detailed span-level breakdowns.
62
+
63
+
13
64
  > [!IMPORTANT]
14
65
  > This project is under active development. Expect breaking changes.
15
66
 
@@ -30,15 +81,21 @@ Works with any OTel-instrumented framework (LangChain, Strands, Google ADK, and
30
81
 
31
82
  ## Installation
32
83
 
33
- Grab a wheel from the [releases page](../../releases). The **core** wheel has the CLI and REST API. The **bundle** wheel adds streaming and the embedded web UI.
84
+ **From PyPI** (recommended): the published package includes the **CLI**, **REST API**, and **embedded web UI**.
34
85
 
35
86
  ```bash
36
- pip install agentevals-<version>-py3-none-any.whl
87
+ pip install agentevals-cli
88
+ ```
89
+
90
+ Optional extras:
37
91
 
38
- # For MCP server support:
39
- pip install "agentevals-<version>-py3-none-any.whl[live]"
92
+ ```bash
93
+ pip install "agentevals-cli[live]" # MCP server support
94
+ pip install "agentevals-cli[openai]" # OpenAI Evals API graders
40
95
  ```
41
96
 
97
+ **GitHub [releases](../../releases)** also ship **core** wheels (CLI and API only) and **bundle** wheels (with the embedded UI) if you need a specific version or offline `pip install ./path/to.whl`.
98
+
42
99
  **From source** with `uv` or Nix:
43
100
 
44
101
  ```bash
@@ -50,10 +107,12 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for build instructions.
50
107
 
51
108
  ## Quick Start
52
109
 
110
+ Examples use `agentevals` on your PATH after `pip install agentevals-cli`. If you are working from a clone of this repo, use `uv run agentevals` instead.
111
+
53
112
  Run an evaluation against a sample trace:
54
113
 
55
114
  ```bash
56
- uv run agentevals run samples/helm.json \
115
+ agentevals run samples/helm.json \
57
116
  --eval-set samples/eval_set_helm.json \
58
117
  -m tool_trajectory_avg_score
59
118
  ```
@@ -61,7 +120,7 @@ uv run agentevals run samples/helm.json \
61
120
  List available evaluators:
62
121
 
63
122
  ```bash
64
- uv run agentevals evaluator list
123
+ agentevals evaluator list
65
124
  ```
66
125
 
67
126
  ## Integration
@@ -72,7 +131,7 @@ Point any OTel-instrumented agent at the receiver. No SDK, no code changes:
72
131
 
73
132
  ```bash
74
133
  # Terminal 1
75
- uv run agentevals serve --dev
134
+ agentevals serve --dev
76
135
 
77
136
  # Terminal 2
78
137
  export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
@@ -97,31 +156,31 @@ with app.session(eval_set_id="my-eval"):
97
156
  agent.invoke("Roll a 20-sided die for me")
98
157
  ```
99
158
 
100
- Requires `pip install "agentevals[streaming]"`. See [examples/sdk_example/](examples/sdk_example/) for framework-specific patterns.
159
+ Requires `pip install "agentevals-cli[streaming]"`. See [examples/sdk_example/](examples/sdk_example/) for framework-specific patterns.
101
160
 
102
161
  ## CLI
103
162
 
104
163
  ```bash
105
164
  # Single trace
106
- uv run agentevals run samples/helm.json \
165
+ agentevals run samples/helm.json \
107
166
  --eval-set samples/eval_set_helm.json \
108
167
  -m tool_trajectory_avg_score
109
168
 
110
169
  # Multiple traces
111
- uv run agentevals run samples/helm.json samples/k8s.json \
170
+ agentevals run samples/helm.json samples/k8s.json \
112
171
  --eval-set samples/eval_set_helm.json \
113
172
  -m tool_trajectory_avg_score
114
173
 
115
174
  # JSON output
116
- uv run agentevals run samples/helm.json \
175
+ agentevals run samples/helm.json \
117
176
  --eval-set samples/eval_set_helm.json \
118
177
  --output json
119
178
 
120
179
  # List available evaluators (builtin + community)
121
- uv run agentevals evaluator list
180
+ agentevals evaluator list
122
181
 
123
182
  # List only builtin evaluators
124
- uv run agentevals evaluator list --source builtin
183
+ agentevals evaluator list --source builtin
125
184
  ```
126
185
 
127
186
  ## Custom Evaluators
@@ -157,7 +216,7 @@ evaluators:
157
216
  agentevals run trace.json --config eval_config.yaml --eval-set eval_set.json
158
217
  ```
159
218
 
160
- Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
219
+ Community evaluators can be referenced directly from a shared GitHub repository using `type: remote`. You can also delegate grading to the [OpenAI Evals API](https://developers.openai.com/api/reference/resources/evals/methods/create) using `type: openai_eval` (requires `pip install "agentevals-cli[openai]"` and `OPENAI_API_KEY`). See the [Custom Evaluators guide](docs/custom-evaluators.md) for the full protocol reference, SDK usage, and how to contribute evaluators.
161
220
 
162
221
  ## Web UI
163
222
 
@@ -201,8 +260,8 @@ Exposes evaluation tools to MCP clients. A `.mcp.json` at the project root lets
201
260
  | `evaluate_sessions` | yes | Evaluate sessions against a golden reference |
202
261
 
203
262
  ```bash
204
- # Custom server URL
205
- AGENTEVALS_SERVER_URL=http://localhost:9000 uv run agentevals mcp
263
+ # Custom server URL (requires pip install "agentevals-cli[live]")
264
+ AGENTEVALS_SERVER_URL=http://localhost:9000 agentevals mcp
206
265
  ```
207
266
 
208
267
  The React UI and MCP server share the same in-memory session state and can run simultaneously.
@@ -0,0 +1,13 @@
1
+ <svg width="3302" height="1066" viewBox="0 0 3302 1066" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <path d="M518.695 264C560.958 264 595.207 298.274 595.207 340.537C595.207 382.8 560.958 417.048 518.695 417.048C454.983 417.048 403.305 468.548 403 532.184V533.304C403.306 596.94 454.983 648.438 518.695 648.438H518.722C560.985 648.439 595.232 682.687 595.232 724.95C595.232 767.213 560.984 801.461 518.722 801.461C476.459 801.461 442.21 767.213 442.21 724.95V724.67C442.057 661.008 390.482 609.408 326.795 609.255H326.515C284.252 609.255 250.004 575.006 250.004 532.743C250.004 490.48 284.252 456.232 326.515 456.232H326.642C390.431 456.156 442.108 404.453 442.185 340.664V340.512C442.185 298.249 476.432 264 518.695 264ZM492.436 469.353C527.452 454.848 567.596 471.476 582.101 506.492C596.605 541.508 579.976 581.653 544.96 596.157C509.944 610.661 469.8 594.033 455.296 559.017C440.792 524.001 457.42 483.857 492.436 469.353Z" fill="#8023C3"/>
3
+ <path d="M1029.16 401.476V655.084H982.736L976.321 616.93C956.253 644.357 928.75 658.054 893.878 658.054C870.849 658.054 850.254 652.839 832.16 642.443C814.066 632.046 799.887 617.029 789.688 597.359C779.49 577.721 774.391 554.683 774.391 528.247C774.391 501.81 779.589 479.796 789.952 460.125C800.315 440.487 814.56 425.305 832.654 414.546C850.748 403.819 871.178 398.439 893.878 398.439C912.301 398.439 928.454 401.839 942.271 408.605C956.089 415.371 967.274 424.711 975.861 436.593V401.41H1029.19L1029.16 401.476ZM902.76 612.97C924.802 612.97 942.567 605.214 956.089 589.701C969.577 574.189 976.321 554.023 976.321 529.27C976.321 504.516 969.577 483.294 956.089 467.617C942.6 451.94 924.802 444.085 902.76 444.085C880.718 444.085 862.92 451.94 849.432 467.617C835.944 483.294 829.199 503.526 829.199 528.28C829.199 553.033 835.944 573.76 849.432 589.47C862.92 605.148 880.685 613.003 902.76 613.003V612.97Z" fill="white"/>
4
+ <path d="M2723.45 401.476V655.084H2677.03L2670.61 616.93C2650.54 644.357 2623.04 658.054 2588.17 658.054C2565.14 658.054 2544.54 652.839 2526.45 642.443C2508.36 632.046 2494.18 617.029 2483.98 597.359C2473.78 577.721 2468.68 554.683 2468.68 528.247C2468.68 501.81 2473.88 479.796 2484.24 460.125C2494.6 440.487 2508.85 425.305 2526.94 414.546C2545.04 403.819 2565.47 398.439 2588.17 398.439C2606.59 398.439 2622.74 401.839 2636.56 408.605C2650.38 415.371 2661.56 424.711 2670.15 436.593V401.41H2723.48L2723.45 401.476ZM2597.05 612.97C2619.09 612.97 2636.86 605.214 2650.38 589.701C2663.87 574.189 2670.61 554.023 2670.61 529.27C2670.61 504.516 2663.87 483.294 2650.38 467.617C2636.89 451.94 2619.09 444.085 2597.05 444.085C2575.01 444.085 2557.21 451.94 2543.72 467.617C2530.23 483.294 2523.49 503.526 2523.49 528.28C2523.49 553.033 2530.23 573.76 2543.72 589.47C2557.21 605.148 2574.97 613.003 2597.05 613.003V612.97Z" fill="white"/>
5
+ <path d="M1308.72 401.47V644.682C1308.72 680.36 1298.19 707.985 1277.14 727.655C1256.08 747.293 1223.48 757.129 1179.36 757.129C1145.11 757.129 1117.32 749.439 1095.93 734.091C1074.51 718.744 1062.67 697.027 1060.37 668.94H1114.68C1117.97 683.132 1125.54 694.123 1137.38 701.879C1149.23 709.635 1164.52 713.529 1183.31 713.529C1231.7 713.529 1255.88 689.898 1255.88 642.701V614.482C1237.46 642.206 1209.96 656.101 1173.44 656.101C1150.41 656.101 1129.82 650.887 1111.72 640.49C1093.63 630.094 1079.45 615.242 1069.25 595.901C1059.05 576.593 1053.95 553.721 1053.95 527.284C1053.95 500.847 1059.15 479.394 1069.51 459.922C1079.88 440.449 1094.12 425.333 1112.22 414.606C1130.31 403.88 1150.74 398.5 1173.44 398.5C1192.52 398.5 1209 402.296 1222.82 409.887C1236.64 417.478 1247.82 427.907 1256.41 441.109L1262.33 401.47H1308.75H1308.72ZM1182.32 610.984C1204.36 610.984 1222.13 603.294 1235.65 587.947C1249.14 572.6 1255.88 552.698 1255.88 528.274C1255.88 503.851 1249.14 482.794 1235.65 467.084C1222.16 451.406 1204.36 443.551 1182.32 443.551C1160.28 443.551 1142.48 451.307 1128.99 466.82C1115.51 482.332 1108.76 502.498 1108.76 527.251C1108.76 552.005 1115.51 572.17 1128.99 587.683C1142.48 603.195 1160.25 610.951 1182.32 610.951V610.984Z" fill="white"/>
6
+ <path d="M1324.01 528.769C1324.01 502.696 1329.21 479.823 1339.57 460.153C1349.94 440.515 1364.41 425.333 1383.03 414.573C1401.62 403.847 1422.94 398.467 1446.99 398.467C1471.03 398.467 1492.81 403.417 1511.43 413.319C1530.02 423.22 1544.66 437.28 1555.39 455.433C1566.08 473.585 1571.61 494.906 1571.93 519.33C1571.93 525.931 1571.44 532.697 1570.45 539.628H1379.87V542.598C1381.19 564.711 1388.1 582.237 1400.6 595.109C1413.1 607.98 1429.71 614.416 1450.47 614.416C1466.92 614.416 1480.74 610.555 1491.96 602.766C1503.14 595.01 1510.55 584.019 1514.16 569.827H1567.49C1562.89 595.571 1550.45 616.727 1530.22 633.229C1509.99 649.731 1484.72 657.982 1454.42 657.982C1428.07 657.982 1405.14 652.636 1385.53 641.876C1365.96 631.15 1350.79 616.034 1340.1 596.561C1329.41 577.088 1324.04 554.447 1324.04 528.703L1324.01 528.769ZM1517.55 500.517C1515.25 482.035 1507.91 467.579 1495.58 457.182C1483.24 446.786 1467.68 441.571 1448.93 441.571C1431.49 441.571 1416.42 446.951 1403.76 457.677C1391.09 468.404 1383.76 482.695 1381.78 500.517H1517.55Z" fill="white"/>
7
+ <path d="M1587.2 401.47H1633.61L1639.54 434.673C1658.62 410.58 1685.63 398.5 1720.5 398.5C1735.3 398.5 1748.96 400.711 1761.49 405.2C1773.99 409.656 1784.78 416.686 1793.83 426.257C1802.88 435.828 1809.88 447.974 1814.82 462.661C1819.75 477.348 1822.22 494.94 1822.22 515.402V655.078H1768.4V518.373C1768.4 494.28 1763.3 475.929 1753.1 463.387C1742.9 450.845 1727.93 444.574 1708.16 444.574C1687.11 444.574 1670.56 451.935 1658.55 466.622C1646.54 481.309 1640.52 501.541 1640.52 527.317V655.111H1587.2V401.47Z" fill="white"/>
8
+ <path d="M1845.75 401.47V330.609H1899.57V401.437H1960.3V448.502H1899.57V580.752C1899.57 590.653 1901.54 597.683 1905.49 601.809C1909.44 605.934 1916.18 608.014 1925.72 608.014H1966.22V655.078H1914.87C1890.85 655.078 1873.31 649.566 1862.29 638.477C1851.27 627.42 1845.75 609.994 1845.75 586.23V448.535" fill="white"/>
9
+ <path d="M1976.12 528.769C1976.12 502.696 1981.32 479.823 1991.69 460.153C2002.05 440.515 2016.52 425.333 2035.14 414.573C2053.73 403.847 2075.05 398.467 2099.1 398.467C2123.15 398.467 2144.93 403.417 2163.51 413.319C2182.1 423.22 2196.77 437.28 2207.47 455.433C2218.16 473.585 2223.69 494.906 2224.01 519.33C2224.01 525.931 2223.52 532.697 2222.53 539.628H2031.95V542.598C2033.27 564.711 2040.18 582.237 2052.68 595.109C2065.18 607.98 2081.83 614.416 2102.55 614.416C2119 614.416 2132.85 610.555 2144.04 602.766C2155.22 595.01 2162.63 584.019 2166.24 569.827H2219.57C2214.97 595.571 2202.53 616.727 2182.3 633.229C2162.07 649.731 2136.8 657.982 2106.5 657.982C2080.15 657.982 2057.19 652.636 2037.61 641.876C2018.04 631.15 2002.87 616.034 1992.18 596.561C1981.49 577.088 1976.12 554.447 1976.12 528.703V528.769ZM2169.67 500.517C2167.36 482.035 2160.03 467.579 2147.69 457.182C2135.35 446.786 2119.79 441.571 2101.04 441.571C2083.6 441.571 2068.54 446.951 2055.87 457.677C2043.2 468.404 2035.87 482.695 2033.89 500.517H2169.67Z" fill="white"/>
10
+ <path d="M2216.86 401.475H2274.14L2343.75 597.621L2412.38 401.475H2468.67L2375.33 655.082H2310.16L2216.83 401.475H2216.86Z" fill="white"/>
11
+ <path d="M2754.43 308.332H2807.76V655.079H2754.43V308.332Z" fill="white"/>
12
+ <path d="M2882.6 571.352C2883.59 584.554 2889.77 595.379 2901.12 603.796C2912.47 612.212 2927.21 616.436 2945.31 616.436C2961.43 616.436 2974.52 613.4 2984.55 607.261C2994.59 601.155 2999.62 592.97 2999.62 582.739C2999.62 574.157 2997.32 567.721 2992.71 563.431C2988.11 559.14 2981.92 556.104 2974.19 554.256C2966.46 552.44 2954.52 550.559 2938.4 548.546C2916.36 545.905 2898.16 542.341 2883.85 537.885C2869.54 533.43 2857.99 526.334 2849.28 516.597C2840.56 506.861 2836.18 493.725 2836.18 477.223C2836.18 461.711 2840.56 447.915 2849.28 435.868C2857.99 423.821 2870 414.481 2885.33 407.88C2900.63 401.279 2918 397.979 2937.41 397.979C2969.32 397.979 2995.25 405.075 3015.18 419.267C3035.09 433.459 3045.88 453.459 3047.52 479.203H2995.67C2994.36 467.651 2988.6 458.146 2978.4 450.72C2968.2 443.294 2955.37 439.564 2939.88 439.564C2924.38 439.564 2911.92 442.535 2902.34 448.476C2892.8 454.416 2888.03 462.503 2888.03 472.734C2888.03 480.325 2890.4 486.035 2895.2 489.83C2899.97 493.626 2905.99 496.266 2913.23 497.752C2920.47 499.237 2932.15 500.986 2948.3 502.966C2970.01 505.277 2988.31 508.841 3003.11 513.627C3017.91 518.413 3029.76 526.004 3038.67 536.4C3047.56 546.797 3052 560.923 3052 578.745C3052 594.587 3047.39 608.548 3038.18 620.595C3028.97 632.642 3016.27 641.883 3000.15 648.319C2984.03 654.755 2965.9 657.989 2945.83 657.989C2911.92 657.989 2884.51 650.299 2863.62 634.952C2842.73 619.605 2831.94 598.383 2831.28 571.286H2882.64L2882.6 571.352Z" fill="white"/>
13
+ </svg>
@@ -0,0 +1,13 @@
1
+ <svg width="3302" height="1066" viewBox="0 0 3302 1066" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <path d="M518.695 264C560.958 264 595.207 298.274 595.207 340.537C595.207 382.8 560.958 417.048 518.695 417.048C454.983 417.048 403.305 468.548 403 532.184V533.304C403.306 596.94 454.983 648.438 518.695 648.438H518.722C560.985 648.439 595.232 682.687 595.232 724.95C595.232 767.213 560.984 801.461 518.722 801.461C476.459 801.461 442.21 767.213 442.21 724.95V724.67C442.057 661.008 390.482 609.408 326.795 609.255H326.515C284.252 609.255 250.004 575.006 250.004 532.743C250.004 490.48 284.252 456.232 326.515 456.232H326.642C390.431 456.156 442.108 404.453 442.185 340.664V340.512C442.185 298.249 476.432 264 518.695 264ZM492.436 469.353C527.452 454.848 567.596 471.476 582.101 506.492C596.605 541.508 579.976 581.653 544.96 596.157C509.944 610.661 469.8 594.033 455.296 559.017C440.792 524.001 457.42 483.857 492.436 469.353Z" fill="#151927"/>
3
+ <path d="M1029.16 401.476V655.084H982.736L976.321 616.93C956.253 644.357 928.75 658.054 893.878 658.054C870.849 658.054 850.254 652.839 832.16 642.443C814.066 632.046 799.887 617.029 789.688 597.359C779.49 577.721 774.391 554.683 774.391 528.247C774.391 501.81 779.589 479.796 789.952 460.125C800.315 440.487 814.56 425.305 832.654 414.546C850.748 403.819 871.178 398.439 893.878 398.439C912.301 398.439 928.454 401.839 942.271 408.605C956.089 415.371 967.274 424.711 975.861 436.593V401.41H1029.19L1029.16 401.476ZM902.76 612.97C924.802 612.97 942.567 605.214 956.089 589.701C969.577 574.189 976.321 554.023 976.321 529.27C976.321 504.516 969.577 483.294 956.089 467.617C942.6 451.94 924.802 444.085 902.76 444.085C880.718 444.085 862.92 451.94 849.432 467.617C835.944 483.294 829.199 503.526 829.199 528.28C829.199 553.033 835.944 573.76 849.432 589.47C862.92 605.148 880.685 613.003 902.76 613.003V612.97Z" fill="#151927"/>
4
+ <path d="M2723.45 401.476V655.084H2677.03L2670.61 616.93C2650.54 644.357 2623.04 658.054 2588.17 658.054C2565.14 658.054 2544.54 652.839 2526.45 642.443C2508.36 632.046 2494.18 617.029 2483.98 597.359C2473.78 577.721 2468.68 554.683 2468.68 528.247C2468.68 501.81 2473.88 479.796 2484.24 460.125C2494.6 440.487 2508.85 425.305 2526.94 414.546C2545.04 403.819 2565.47 398.439 2588.17 398.439C2606.59 398.439 2622.74 401.839 2636.56 408.605C2650.38 415.371 2661.56 424.711 2670.15 436.593V401.41H2723.48L2723.45 401.476ZM2597.05 612.97C2619.09 612.97 2636.86 605.214 2650.38 589.701C2663.87 574.189 2670.61 554.023 2670.61 529.27C2670.61 504.516 2663.87 483.294 2650.38 467.617C2636.89 451.94 2619.09 444.085 2597.05 444.085C2575.01 444.085 2557.21 451.94 2543.72 467.617C2530.23 483.294 2523.49 503.526 2523.49 528.28C2523.49 553.033 2530.23 573.76 2543.72 589.47C2557.21 605.148 2574.97 613.003 2597.05 613.003V612.97Z" fill="#151927"/>
5
+ <path d="M1308.72 401.47V644.682C1308.72 680.36 1298.19 707.985 1277.14 727.655C1256.08 747.293 1223.48 757.129 1179.36 757.129C1145.11 757.129 1117.32 749.439 1095.93 734.091C1074.51 718.744 1062.67 697.027 1060.37 668.94H1114.68C1117.97 683.132 1125.54 694.123 1137.38 701.879C1149.23 709.635 1164.52 713.529 1183.31 713.529C1231.7 713.529 1255.88 689.898 1255.88 642.701V614.482C1237.46 642.206 1209.96 656.101 1173.44 656.101C1150.41 656.101 1129.82 650.887 1111.72 640.49C1093.63 630.094 1079.45 615.242 1069.25 595.901C1059.05 576.593 1053.95 553.721 1053.95 527.284C1053.95 500.847 1059.15 479.394 1069.51 459.922C1079.88 440.449 1094.12 425.333 1112.22 414.606C1130.31 403.88 1150.74 398.5 1173.44 398.5C1192.52 398.5 1209 402.296 1222.82 409.887C1236.64 417.478 1247.82 427.907 1256.41 441.109L1262.33 401.47H1308.75H1308.72ZM1182.32 610.984C1204.36 610.984 1222.13 603.294 1235.65 587.947C1249.14 572.6 1255.88 552.698 1255.88 528.274C1255.88 503.851 1249.14 482.794 1235.65 467.084C1222.16 451.406 1204.36 443.551 1182.32 443.551C1160.28 443.551 1142.48 451.307 1128.99 466.82C1115.51 482.332 1108.76 502.498 1108.76 527.251C1108.76 552.005 1115.51 572.17 1128.99 587.683C1142.48 603.195 1160.25 610.951 1182.32 610.951V610.984Z" fill="#151927"/>
6
+ <path d="M1324.01 528.769C1324.01 502.696 1329.21 479.823 1339.57 460.153C1349.94 440.515 1364.41 425.333 1383.03 414.573C1401.62 403.847 1422.94 398.467 1446.99 398.467C1471.03 398.467 1492.81 403.417 1511.43 413.319C1530.02 423.22 1544.66 437.28 1555.39 455.433C1566.08 473.585 1571.61 494.906 1571.93 519.33C1571.93 525.931 1571.44 532.697 1570.45 539.628H1379.87V542.598C1381.19 564.711 1388.1 582.237 1400.6 595.109C1413.1 607.98 1429.71 614.416 1450.47 614.416C1466.92 614.416 1480.74 610.555 1491.96 602.766C1503.14 595.01 1510.55 584.019 1514.16 569.827H1567.49C1562.89 595.571 1550.45 616.727 1530.22 633.229C1509.99 649.731 1484.72 657.982 1454.42 657.982C1428.07 657.982 1405.14 652.636 1385.53 641.876C1365.96 631.15 1350.79 616.034 1340.1 596.561C1329.41 577.088 1324.04 554.447 1324.04 528.703L1324.01 528.769ZM1517.55 500.517C1515.25 482.035 1507.91 467.579 1495.58 457.182C1483.24 446.786 1467.68 441.571 1448.93 441.571C1431.49 441.571 1416.42 446.951 1403.76 457.677C1391.09 468.404 1383.76 482.695 1381.78 500.517H1517.55Z" fill="#151927"/>
7
+ <path d="M1587.2 401.47H1633.61L1639.54 434.673C1658.62 410.58 1685.63 398.5 1720.5 398.5C1735.3 398.5 1748.96 400.711 1761.49 405.2C1773.99 409.656 1784.78 416.686 1793.83 426.257C1802.88 435.828 1809.88 447.974 1814.82 462.661C1819.75 477.348 1822.22 494.94 1822.22 515.402V655.078H1768.4V518.373C1768.4 494.28 1763.3 475.929 1753.1 463.387C1742.9 450.845 1727.93 444.574 1708.16 444.574C1687.11 444.574 1670.56 451.935 1658.55 466.622C1646.54 481.309 1640.52 501.541 1640.52 527.317V655.111H1587.2V401.47Z" fill="#151927"/>
8
+ <path d="M1845.75 401.47V330.609H1899.57V401.437H1960.3V448.502H1899.57V580.752C1899.57 590.653 1901.54 597.683 1905.49 601.809C1909.44 605.934 1916.18 608.014 1925.72 608.014H1966.22V655.078H1914.87C1890.85 655.078 1873.31 649.566 1862.29 638.477C1851.27 627.42 1845.75 609.994 1845.75 586.23V448.535" fill="#151927"/>
9
+ <path d="M1976.12 528.769C1976.12 502.696 1981.32 479.823 1991.69 460.153C2002.05 440.515 2016.52 425.333 2035.14 414.573C2053.73 403.847 2075.05 398.467 2099.1 398.467C2123.15 398.467 2144.93 403.417 2163.51 413.319C2182.1 423.22 2196.77 437.28 2207.47 455.433C2218.16 473.585 2223.69 494.906 2224.01 519.33C2224.01 525.931 2223.52 532.697 2222.53 539.628H2031.95V542.598C2033.27 564.711 2040.18 582.237 2052.68 595.109C2065.18 607.98 2081.83 614.416 2102.55 614.416C2119 614.416 2132.85 610.555 2144.04 602.766C2155.22 595.01 2162.63 584.019 2166.24 569.827H2219.57C2214.97 595.571 2202.53 616.727 2182.3 633.229C2162.07 649.731 2136.8 657.982 2106.5 657.982C2080.15 657.982 2057.19 652.636 2037.61 641.876C2018.04 631.15 2002.87 616.034 1992.18 596.561C1981.49 577.088 1976.12 554.447 1976.12 528.703V528.769ZM2169.67 500.517C2167.36 482.035 2160.03 467.579 2147.69 457.182C2135.35 446.786 2119.79 441.571 2101.04 441.571C2083.6 441.571 2068.54 446.951 2055.87 457.677C2043.2 468.404 2035.87 482.695 2033.89 500.517H2169.67Z" fill="#151927"/>
10
+ <path d="M2216.86 401.475H2274.14L2343.75 597.621L2412.38 401.475H2468.67L2375.33 655.082H2310.16L2216.83 401.475H2216.86Z" fill="#151927"/>
11
+ <path d="M2754.43 308.332H2807.76V655.079H2754.43V308.332Z" fill="#151927"/>
12
+ <path d="M2882.6 571.352C2883.59 584.554 2889.77 595.379 2901.12 603.796C2912.47 612.212 2927.21 616.436 2945.31 616.436C2961.43 616.436 2974.52 613.4 2984.55 607.261C2994.59 601.155 2999.62 592.97 2999.62 582.739C2999.62 574.157 2997.32 567.721 2992.71 563.431C2988.11 559.14 2981.92 556.104 2974.19 554.256C2966.46 552.44 2954.52 550.559 2938.4 548.546C2916.36 545.905 2898.16 542.341 2883.85 537.885C2869.54 533.43 2857.99 526.334 2849.28 516.597C2840.56 506.861 2836.18 493.725 2836.18 477.223C2836.18 461.711 2840.56 447.915 2849.28 435.868C2857.99 423.821 2870 414.481 2885.33 407.88C2900.63 401.279 2918 397.979 2937.41 397.979C2969.32 397.979 2995.25 405.075 3015.18 419.267C3035.09 433.459 3045.88 453.459 3047.52 479.203H2995.67C2994.36 467.651 2988.6 458.146 2978.4 450.72C2968.2 443.294 2955.37 439.564 2939.88 439.564C2924.38 439.564 2911.92 442.535 2902.34 448.476C2892.8 454.416 2888.03 462.503 2888.03 472.734C2888.03 480.325 2890.4 486.035 2895.2 489.83C2899.97 493.626 2905.99 496.266 2913.23 497.752C2920.47 499.237 2932.15 500.986 2948.3 502.966C2970.01 505.277 2988.31 508.841 3003.11 513.627C3017.91 518.413 3029.76 526.004 3038.67 536.4C3047.56 546.797 3052 560.923 3052 578.745C3052 594.587 3047.39 608.548 3038.18 620.595C3028.97 632.642 3016.27 641.883 3000.15 648.319C2984.03 654.755 2965.9 657.989 2945.83 657.989C2911.92 657.989 2884.51 650.299 2863.62 634.952C2842.73 619.605 2831.94 598.383 2831.28 571.286H2882.64L2882.6 571.352Z" fill="#151927"/>
13
+ </svg>