genoma-evolution 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (445) hide show
  1. package/.brv/.obsidian/app.json +1 -0
  2. package/.brv/.obsidian/appearance.json +1 -0
  3. package/.brv/.obsidian/core-plugins.json +33 -0
  4. package/.brv/.obsidian/graph.json +22 -0
  5. package/.brv/.obsidian/workspace.json +195 -0
  6. package/.brv/Sin ti/314/201tulo 1.canvas" +1 -0
  7. package/.brv/Sin ti/314/201tulo 2.canvas" +1 -0
  8. package/.brv/Sin ti/314/201tulo.canvas" +1 -0
  9. package/.brv/_queue_status.json +1 -0
  10. package/.brv/config.json +5 -0
  11. package/.brv/context-tree/_index.md +60 -0
  12. package/.brv/context-tree/_manifest.json +165 -0
  13. package/.brv/context-tree/backend/_index.md +24 -0
  14. package/.brv/context-tree/backend/backend/_index.md +40 -0
  15. package/.brv/context-tree/backend/backend/init.abstract.md +0 -0
  16. package/.brv/context-tree/backend/backend/init.md +27 -0
  17. package/.brv/context-tree/backend/backend/init.overview.md +29 -0
  18. package/.brv/context-tree/backend/backend/job_tracker.abstract.md +1 -0
  19. package/.brv/context-tree/backend/backend/job_tracker.md +273 -0
  20. package/.brv/context-tree/backend/backend/job_tracker.overview.md +31 -0
  21. package/.brv/context-tree/backend/backend/main.abstract.md +0 -0
  22. package/.brv/context-tree/backend/backend/main.md +1292 -0
  23. package/.brv/context-tree/backend/backend/main.overview.md +30 -0
  24. package/.brv/context-tree/backend/backend/requirements.abstract.md +1 -0
  25. package/.brv/context-tree/backend/backend/requirements.md +37 -0
  26. package/.brv/context-tree/backend/backend/requirements.overview.md +28 -0
  27. package/.brv/context-tree/docs/_index.md +37 -0
  28. package/.brv/context-tree/docs/api/_index.md +54 -0
  29. package/.brv/context-tree/docs/api/context.md +11 -0
  30. package/.brv/context-tree/docs/api/hermes_api_openapi_specification.abstract.md +0 -0
  31. package/.brv/context-tree/docs/api/hermes_api_openapi_specification.md +468 -0
  32. package/.brv/context-tree/docs/api/hermes_api_openapi_specification.overview.md +44 -0
  33. package/.brv/context-tree/frontend/_index.md +48 -0
  34. package/.brv/context-tree/frontend/hermes_dashboard/_index.md +31 -0
  35. package/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.abstract.md +0 -0
  36. package/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.md +41 -0
  37. package/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.overview.md +34 -0
  38. package/.brv/context-tree/frontend/src/_index.md +53 -0
  39. package/.brv/context-tree/frontend/src/components/_index.md +52 -0
  40. package/.brv/context-tree/frontend/src/components/sidebar_navigation_component.abstract.md +0 -0
  41. package/.brv/context-tree/frontend/src/components/sidebar_navigation_component.md +161 -0
  42. package/.brv/context-tree/frontend/src/components/sidebar_navigation_component.overview.md +32 -0
  43. package/.brv/context-tree/frontend/src/context.md +10 -0
  44. package/.brv/context-tree/frontend/src/functioncallingpage.abstract.md +0 -0
  45. package/.brv/context-tree/frontend/src/functioncallingpage.md +34 -0
  46. package/.brv/context-tree/frontend/src/functioncallingpage.overview.md +26 -0
  47. package/.brv/context-tree/frontend/src/lib/_index.md +48 -0
  48. package/.brv/context-tree/frontend/src/lib/api_client_library.abstract.md +1 -0
  49. package/.brv/context-tree/frontend/src/lib/api_client_library.md +403 -0
  50. package/.brv/context-tree/frontend/src/lib/api_client_library.overview.md +69 -0
  51. package/.brv/context-tree/frontend/src/page.abstract.md +0 -0
  52. package/.brv/context-tree/frontend/src/page.md +103 -0
  53. package/.brv/context-tree/frontend/src/page.overview.md +7 -0
  54. package/.brv/context-tree/frontend/src/settingspage.abstract.md +0 -0
  55. package/.brv/context-tree/frontend/src/settingspage.md +124 -0
  56. package/.brv/context-tree/frontend/src/settingspage.overview.md +34 -0
  57. package/.brv/context-tree/frontend/src/sidebar.abstract.md +0 -0
  58. package/.brv/context-tree/frontend/src/sidebar.md +170 -0
  59. package/.brv/context-tree/frontend/src/sidebar.overview.md +25 -0
  60. package/.brv/context-tree/meta/_index.md +24 -0
  61. package/.brv/context-tree/meta/curation_context/_index.md +24 -0
  62. package/.brv/context-tree/meta/curation_context/empty_context.abstract.md +4 -0
  63. package/.brv/context-tree/meta/curation_context/empty_context.md +35 -0
  64. package/.brv/context-tree/meta/curation_context/empty_context.overview.md +20 -0
  65. package/.brv/dream-log/drm-1777341062653.json +33 -0
  66. package/.brv/dream-state.json +8 -0
  67. package/.brv/dream.lock +0 -0
  68. package/.brv/review-backups/docs/api/hermes_api_openapi_specification.md +468 -0
  69. package/.claude/settings.local.json +7 -0
  70. package/.claude/worktrees/phase-2-mcp/.brv/.obsidian/app.json +1 -0
  71. package/.claude/worktrees/phase-2-mcp/.brv/.obsidian/appearance.json +1 -0
  72. package/.claude/worktrees/phase-2-mcp/.brv/.obsidian/core-plugins.json +33 -0
  73. package/.claude/worktrees/phase-2-mcp/.brv/.obsidian/graph.json +22 -0
  74. package/.claude/worktrees/phase-2-mcp/.brv/.obsidian/workspace.json +195 -0
  75. package/.claude/worktrees/phase-2-mcp/.brv/Sin t/303/255tulo 1.canvas" +1 -0
  76. package/.claude/worktrees/phase-2-mcp/.brv/Sin t/303/255tulo 2.canvas" +1 -0
  77. package/.claude/worktrees/phase-2-mcp/.brv/Sin t/303/255tulo.canvas" +1 -0
  78. package/.claude/worktrees/phase-2-mcp/.brv/_queue_status.json +1 -0
  79. package/.claude/worktrees/phase-2-mcp/.brv/config.json +5 -0
  80. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/_index.md +60 -0
  81. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/_manifest.json +165 -0
  82. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/_index.md +24 -0
  83. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/_index.md +40 -0
  84. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/init.abstract.md +0 -0
  85. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/init.md +27 -0
  86. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/init.overview.md +29 -0
  87. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/job_tracker.abstract.md +1 -0
  88. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/job_tracker.md +273 -0
  89. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/job_tracker.overview.md +31 -0
  90. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/main.abstract.md +0 -0
  91. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/main.md +1292 -0
  92. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/main.overview.md +30 -0
  93. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/requirements.abstract.md +1 -0
  94. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/requirements.md +37 -0
  95. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/backend/backend/requirements.overview.md +28 -0
  96. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/_index.md +37 -0
  97. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/api/_index.md +54 -0
  98. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/api/context.md +11 -0
  99. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/api/hermes_api_openapi_specification.abstract.md +0 -0
  100. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/api/hermes_api_openapi_specification.md +468 -0
  101. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/docs/api/hermes_api_openapi_specification.overview.md +44 -0
  102. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/_index.md +48 -0
  103. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/hermes_dashboard/_index.md +31 -0
  104. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.abstract.md +0 -0
  105. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.md +41 -0
  106. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/hermes_dashboard/architecture_overview.overview.md +34 -0
  107. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/_index.md +53 -0
  108. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/components/_index.md +52 -0
  109. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/components/sidebar_navigation_component.abstract.md +0 -0
  110. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/components/sidebar_navigation_component.md +161 -0
  111. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/components/sidebar_navigation_component.overview.md +32 -0
  112. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/context.md +10 -0
  113. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/functioncallingpage.abstract.md +0 -0
  114. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/functioncallingpage.md +34 -0
  115. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/functioncallingpage.overview.md +26 -0
  116. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/lib/_index.md +48 -0
  117. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/lib/api_client_library.abstract.md +1 -0
  118. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/lib/api_client_library.md +403 -0
  119. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/lib/api_client_library.overview.md +69 -0
  120. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/page.abstract.md +0 -0
  121. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/page.md +103 -0
  122. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/page.overview.md +7 -0
  123. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/settingspage.abstract.md +0 -0
  124. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/settingspage.md +124 -0
  125. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/settingspage.overview.md +34 -0
  126. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/sidebar.abstract.md +0 -0
  127. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/sidebar.md +170 -0
  128. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/frontend/src/sidebar.overview.md +25 -0
  129. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/meta/_index.md +24 -0
  130. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/meta/curation_context/_index.md +24 -0
  131. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/meta/curation_context/empty_context.abstract.md +4 -0
  132. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/meta/curation_context/empty_context.md +35 -0
  133. package/.claude/worktrees/phase-2-mcp/.brv/context-tree/meta/curation_context/empty_context.overview.md +20 -0
  134. package/.claude/worktrees/phase-2-mcp/.brv/dream-log/drm-1777341062653.json +33 -0
  135. package/.claude/worktrees/phase-2-mcp/.brv/dream-state.json +8 -0
  136. package/.claude/worktrees/phase-2-mcp/.brv/dream.lock +0 -0
  137. package/.claude/worktrees/phase-2-mcp/.brv/review-backups/docs/api/hermes_api_openapi_specification.md +468 -0
  138. package/.claude/worktrees/phase-2-mcp/.claude/settings.local.json +13 -0
  139. package/.claude/worktrees/phase-2-mcp/.kilocode/package-lock.json +378 -0
  140. package/.claude/worktrees/phase-2-mcp/.kilocode/package.json +5 -0
  141. package/.claude/worktrees/phase-2-mcp/AGENTS.md +5 -0
  142. package/.claude/worktrees/phase-2-mcp/CLAUDE.md +29 -0
  143. package/.claude/worktrees/phase-2-mcp/QA_AUDIT_PLAN.md +156 -0
  144. package/.claude/worktrees/phase-2-mcp/README.md +316 -0
  145. package/.claude/worktrees/phase-2-mcp/agent-agnostic-evolution-dashboard.md +405 -0
  146. package/.claude/worktrees/phase-2-mcp/backend/__init__.py +0 -0
  147. package/.claude/worktrees/phase-2-mcp/backend/collectors/__init__.py +0 -0
  148. package/.claude/worktrees/phase-2-mcp/backend/collectors/claude_code_collector.py +277 -0
  149. package/.claude/worktrees/phase-2-mcp/backend/collectors/hermes_collector.py +68 -0
  150. package/.claude/worktrees/phase-2-mcp/backend/curator.py +512 -0
  151. package/.claude/worktrees/phase-2-mcp/backend/eval/__init__.py +19 -0
  152. package/.claude/worktrees/phase-2-mcp/backend/eval/engine.py +116 -0
  153. package/.claude/worktrees/phase-2-mcp/backend/eval/scorers.py +201 -0
  154. package/.claude/worktrees/phase-2-mcp/backend/generate_dataset.py +86 -0
  155. package/.claude/worktrees/phase-2-mcp/backend/job_tracker.py +232 -0
  156. package/.claude/worktrees/phase-2-mcp/backend/main.py +1746 -0
  157. package/.claude/worktrees/phase-2-mcp/backend/mcp_server.py +250 -0
  158. package/.claude/worktrees/phase-2-mcp/backend/promethean/__init__.py +24 -0
  159. package/.claude/worktrees/phase-2-mcp/backend/promethean/cycle_orchestrator.py +270 -0
  160. package/.claude/worktrees/phase-2-mcp/backend/promethean/delta_validator.py +191 -0
  161. package/.claude/worktrees/phase-2-mcp/backend/promethean/dspy_compiler.py +315 -0
  162. package/.claude/worktrees/phase-2-mcp/backend/promethean/gepa_strategist.py +213 -0
  163. package/.claude/worktrees/phase-2-mcp/backend/promethean/models.py +260 -0
  164. package/.claude/worktrees/phase-2-mcp/backend/promethean/skill_deployer.py +195 -0
  165. package/.claude/worktrees/phase-2-mcp/backend/promethean/trace_ingestion.py +142 -0
  166. package/.claude/worktrees/phase-2-mcp/backend/requirements.txt +6 -0
  167. package/.claude/worktrees/phase-2-mcp/backend/sdd_evolve.py +459 -0
  168. package/.claude/worktrees/phase-2-mcp/backend/skill_detector.py +227 -0
  169. package/.claude/worktrees/phase-2-mcp/backend/skill_registry.py +289 -0
  170. package/.claude/worktrees/phase-2-mcp/backend/storage/__init__.py +5 -0
  171. package/.claude/worktrees/phase-2-mcp/backend/storage/run_store.py +393 -0
  172. package/.claude/worktrees/phase-2-mcp/backend/storage/schema.sql +99 -0
  173. package/.claude/worktrees/phase-2-mcp/backend/validate_evolution.py +267 -0
  174. package/.claude/worktrees/phase-2-mcp/components.json +28 -0
  175. package/.claude/worktrees/phase-2-mcp/docs/api/hermes-api.openapi.yaml +438 -0
  176. package/.claude/worktrees/phase-2-mcp/docs/hero.svg +148 -0
  177. package/.claude/worktrees/phase-2-mcp/eslint.config.mjs +18 -0
  178. package/.claude/worktrees/phase-2-mcp/install.sh +245 -0
  179. package/.claude/worktrees/phase-2-mcp/next-env.d.ts +6 -0
  180. package/.claude/worktrees/phase-2-mcp/next.config.ts +32 -0
  181. package/.claude/worktrees/phase-2-mcp/package-lock.json +11936 -0
  182. package/.claude/worktrees/phase-2-mcp/package.json +41 -0
  183. package/.claude/worktrees/phase-2-mcp/pnpm-workspace.yaml +4 -0
  184. package/.claude/worktrees/phase-2-mcp/postcss.config.mjs +7 -0
  185. package/.claude/worktrees/phase-2-mcp/public/file.svg +1 -0
  186. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Display-Bold.otf +0 -0
  187. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Display-Heavy.otf +0 -0
  188. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Display-Medium.otf +0 -0
  189. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Display-Regular.otf +0 -0
  190. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Display-Semibold.otf +0 -0
  191. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Text-Bold.otf +0 -0
  192. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Text-Heavy.otf +0 -0
  193. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Text-Medium.otf +0 -0
  194. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Text-Regular.otf +0 -0
  195. package/.claude/worktrees/phase-2-mcp/public/fonts/SF-Pro-Text-Semibold.otf +0 -0
  196. package/.claude/worktrees/phase-2-mcp/public/globe.svg +1 -0
  197. package/.claude/worktrees/phase-2-mcp/public/next.svg +1 -0
  198. package/.claude/worktrees/phase-2-mcp/public/theme-preview.html +257 -0
  199. package/.claude/worktrees/phase-2-mcp/public/vercel.svg +1 -0
  200. package/.claude/worktrees/phase-2-mcp/public/window.svg +1 -0
  201. package/.claude/worktrees/phase-2-mcp/run.sh +26 -0
  202. package/.claude/worktrees/phase-2-mcp/skills-lock.json +10 -0
  203. package/.claude/worktrees/phase-2-mcp/specs/event-schema.md +223 -0
  204. package/.claude/worktrees/phase-2-mcp/specs/examples/run.jsonl +3 -0
  205. package/.claude/worktrees/phase-2-mcp/src/app/api/[...path]/route.ts +55 -0
  206. package/.claude/worktrees/phase-2-mcp/src/app/api/auth/token/route.ts +22 -0
  207. package/.claude/worktrees/phase-2-mcp/src/app/evolution/page.tsx +589 -0
  208. package/.claude/worktrees/phase-2-mcp/src/app/favicon.ico +0 -0
  209. package/.claude/worktrees/phase-2-mcp/src/app/globals.css +321 -0
  210. package/.claude/worktrees/phase-2-mcp/src/app/layout.tsx +63 -0
  211. package/.claude/worktrees/phase-2-mcp/src/app/page.tsx +70 -0
  212. package/.claude/worktrees/phase-2-mcp/src/app/skills/page.tsx +369 -0
  213. package/.claude/worktrees/phase-2-mcp/src/components/ApiConfigCard.tsx +199 -0
  214. package/.claude/worktrees/phase-2-mcp/src/components/ColorBends.css +1 -0
  215. package/.claude/worktrees/phase-2-mcp/src/components/ColorBends.d.ts +1 -0
  216. package/.claude/worktrees/phase-2-mcp/src/components/ColorBends.jsx +1 -0
  217. package/.claude/worktrees/phase-2-mcp/src/components/CoreLoopToggle.tsx +111 -0
  218. package/.claude/worktrees/phase-2-mcp/src/components/EnvironmentStatus.tsx +176 -0
  219. package/.claude/worktrees/phase-2-mcp/src/components/EvolutionBackground.tsx +1 -0
  220. package/.claude/worktrees/phase-2-mcp/src/components/ReactQueryProvider.tsx +24 -0
  221. package/.claude/worktrees/phase-2-mcp/src/components/Sidebar.tsx +247 -0
  222. package/.claude/worktrees/phase-2-mcp/src/components/SkillDiffViewer.tsx +154 -0
  223. package/.claude/worktrees/phase-2-mcp/src/components/ThemeAwareBackground.tsx +67 -0
  224. package/.claude/worktrees/phase-2-mcp/src/components/ThemeToggle.tsx +54 -0
  225. package/.claude/worktrees/phase-2-mcp/src/components/WelcomeHero.tsx +77 -0
  226. package/.claude/worktrees/phase-2-mcp/src/components/bits/ClickSpark.tsx +116 -0
  227. package/.claude/worktrees/phase-2-mcp/src/components/bits/CountUp.tsx +98 -0
  228. package/.claude/worktrees/phase-2-mcp/src/components/bits/DarkSelect.tsx +95 -0
  229. package/.claude/worktrees/phase-2-mcp/src/components/bits/DecryptedText.tsx +161 -0
  230. package/.claude/worktrees/phase-2-mcp/src/components/bits/ElectricBorder.tsx +184 -0
  231. package/.claude/worktrees/phase-2-mcp/src/components/bits/GlitchText.tsx +34 -0
  232. package/.claude/worktrees/phase-2-mcp/src/components/bits/ShinyText.tsx +55 -0
  233. package/.claude/worktrees/phase-2-mcp/src/components/bits/SpotlightCard.tsx +42 -0
  234. package/.claude/worktrees/phase-2-mcp/src/components/bits/TextType.tsx +95 -0
  235. package/.claude/worktrees/phase-2-mcp/src/components/bits/index.ts +9 -0
  236. package/.claude/worktrees/phase-2-mcp/src/components/pages/CuratorPage.tsx +632 -0
  237. package/.claude/worktrees/phase-2-mcp/src/components/pages/DatasetPage.tsx +271 -0
  238. package/.claude/worktrees/phase-2-mcp/src/components/pages/EvolutionPage.tsx +676 -0
  239. package/.claude/worktrees/phase-2-mcp/src/components/pages/FunctionCallingPage.tsx +1 -0
  240. package/.claude/worktrees/phase-2-mcp/src/components/pages/LogsPage.tsx +272 -0
  241. package/.claude/worktrees/phase-2-mcp/src/components/pages/MetricsPage.tsx +246 -0
  242. package/.claude/worktrees/phase-2-mcp/src/components/pages/OverviewPage.tsx +420 -0
  243. package/.claude/worktrees/phase-2-mcp/src/components/pages/SettingsPage.tsx +88 -0
  244. package/.claude/worktrees/phase-2-mcp/src/components/pages/SkillStudioPage.tsx +376 -0
  245. package/.claude/worktrees/phase-2-mcp/src/components/ui/animated-theme-toggler.tsx +97 -0
  246. package/.claude/worktrees/phase-2-mcp/src/components/ui/button.tsx +67 -0
  247. package/.claude/worktrees/phase-2-mcp/src/components/ui/card.tsx +103 -0
  248. package/.claude/worktrees/phase-2-mcp/src/components/ui/input.tsx +19 -0
  249. package/.claude/worktrees/phase-2-mcp/src/components/ui/separator.tsx +28 -0
  250. package/.claude/worktrees/phase-2-mcp/src/components/ui/sheet.tsx +147 -0
  251. package/.claude/worktrees/phase-2-mcp/src/components/ui/sidebar.tsx +702 -0
  252. package/.claude/worktrees/phase-2-mcp/src/components/ui/skeleton.tsx +13 -0
  253. package/.claude/worktrees/phase-2-mcp/src/components/ui/theme-toggle.tsx +272 -0
  254. package/.claude/worktrees/phase-2-mcp/src/components/ui/tooltip.tsx +57 -0
  255. package/.claude/worktrees/phase-2-mcp/src/hooks/use-mobile.ts +19 -0
  256. package/.claude/worktrees/phase-2-mcp/src/lib/api.ts +455 -0
  257. package/.claude/worktrees/phase-2-mcp/src/lib/queryClient.ts +12 -0
  258. package/.claude/worktrees/phase-2-mcp/src/lib/utils.ts +6 -0
  259. package/.claude/worktrees/phase-2-mcp/stitch/agent_dashboard/DESIGN_SPEC.md +521 -0
  260. package/.claude/worktrees/phase-2-mcp/stitch/agent_dashboard/prototype.html +676 -0
  261. package/.claude/worktrees/phase-2-mcp/stitch/curator_workspace/code.html +448 -0
  262. package/.claude/worktrees/phase-2-mcp/stitch/curator_workspace/screen.png +0 -0
  263. package/.claude/worktrees/phase-2-mcp/stitch/datasets/code.html +479 -0
  264. package/.claude/worktrees/phase-2-mcp/stitch/datasets/screen.png +0 -0
  265. package/.claude/worktrees/phase-2-mcp/stitch/evolution_history/code.html +461 -0
  266. package/.claude/worktrees/phase-2-mcp/stitch/evolution_history/screen.png +0 -0
  267. package/.claude/worktrees/phase-2-mcp/stitch/hermes_dashboard/DESIGN.md +192 -0
  268. package/.claude/worktrees/phase-2-mcp/stitch/hermes_dashboard/DESIGN_SPEC.md +455 -0
  269. package/.claude/worktrees/phase-2-mcp/stitch/hermes_overview/code.html +399 -0
  270. package/.claude/worktrees/phase-2-mcp/stitch/hermes_overview/screen.png +0 -0
  271. package/.claude/worktrees/phase-2-mcp/stitch/live_logs/code.html +324 -0
  272. package/.claude/worktrees/phase-2-mcp/stitch/live_logs/screen.png +0 -0
  273. package/.claude/worktrees/phase-2-mcp/stitch/skill_hub/code.html +596 -0
  274. package/.claude/worktrees/phase-2-mcp/stitch/skill_hub/screen.png +0 -0
  275. package/.claude/worktrees/phase-2-mcp/stitch/system_metrics/code.html +527 -0
  276. package/.claude/worktrees/phase-2-mcp/stitch/system_metrics/screen.png +0 -0
  277. package/.claude/worktrees/phase-2-mcp/stitch/system_settings/code.html +257 -0
  278. package/.claude/worktrees/phase-2-mcp/stitch/system_settings/screen.png +0 -0
  279. package/.claude/worktrees/phase-2-mcp/test_dashboard.py +201 -0
  280. package/.claude/worktrees/phase-2-mcp/tests/collectors/__init__.py +0 -0
  281. package/.claude/worktrees/phase-2-mcp/tests/collectors/fixtures/sample_session.jsonl +7 -0
  282. package/.claude/worktrees/phase-2-mcp/tests/collectors/test_claude_code_collector.py +171 -0
  283. package/.claude/worktrees/phase-2-mcp/tests/collectors/test_hermes_collector.py +167 -0
  284. package/.claude/worktrees/phase-2-mcp/tests/eval/test_engine.py +234 -0
  285. package/.claude/worktrees/phase-2-mcp/tests/eval/test_scorers.py +249 -0
  286. package/.claude/worktrees/phase-2-mcp/tests/storage/__init__.py +0 -0
  287. package/.claude/worktrees/phase-2-mcp/tests/storage/test_run_store.py +359 -0
  288. package/.claude/worktrees/phase-2-mcp/tests/test_curator.py +559 -0
  289. package/.claude/worktrees/phase-2-mcp/tests/test_mcp_server.py +114 -0
  290. package/.claude/worktrees/phase-2-mcp/tsconfig.json +34 -0
  291. package/.env.example +72 -0
  292. package/.kilocode/package-lock.json +378 -0
  293. package/.kilocode/package.json +5 -0
  294. package/AGENTS.md +5 -0
  295. package/CLAUDE.md +29 -0
  296. package/QA_AUDIT_PLAN.md +156 -0
  297. package/README.md +355 -0
  298. package/agent-agnostic-evolution-dashboard.md +405 -0
  299. package/backend/__init__.py +0 -0
  300. package/backend/collectors/__init__.py +0 -0
  301. package/backend/collectors/claude_code_collector.py +277 -0
  302. package/backend/collectors/hermes_collector.py +68 -0
  303. package/backend/curator.py +512 -0
  304. package/backend/eval/__init__.py +19 -0
  305. package/backend/eval/engine.py +116 -0
  306. package/backend/eval/scorers.py +201 -0
  307. package/backend/generate_dataset.py +86 -0
  308. package/backend/job_tracker.py +232 -0
  309. package/backend/main.py +1746 -0
  310. package/backend/mcp_server.py +250 -0
  311. package/backend/promethean/__init__.py +24 -0
  312. package/backend/promethean/cycle_orchestrator.py +270 -0
  313. package/backend/promethean/delta_validator.py +191 -0
  314. package/backend/promethean/dspy_compiler.py +315 -0
  315. package/backend/promethean/gepa_strategist.py +213 -0
  316. package/backend/promethean/models.py +260 -0
  317. package/backend/promethean/skill_deployer.py +195 -0
  318. package/backend/promethean/trace_ingestion.py +142 -0
  319. package/backend/requirements.txt +6 -0
  320. package/backend/sdd_evolve.py +459 -0
  321. package/backend/skill_detector.py +227 -0
  322. package/backend/skill_registry.py +289 -0
  323. package/backend/storage/__init__.py +5 -0
  324. package/backend/storage/run_store.py +393 -0
  325. package/backend/storage/schema.sql +99 -0
  326. package/backend/validate_evolution.py +267 -0
  327. package/bin/genoma.js +250 -0
  328. package/components.json +28 -0
  329. package/docs/api/hermes-api.openapi.yaml +438 -0
  330. package/docs/hero.svg +148 -0
  331. package/eslint.config.mjs +18 -0
  332. package/install.sh +245 -0
  333. package/next-env.d.ts +6 -0
  334. package/next.config.ts +32 -0
  335. package/package.json +46 -0
  336. package/pnpm-workspace.yaml +4 -0
  337. package/postcss.config.mjs +7 -0
  338. package/public/file.svg +1 -0
  339. package/public/fonts/SF-Pro-Display-Bold.otf +0 -0
  340. package/public/fonts/SF-Pro-Display-Heavy.otf +0 -0
  341. package/public/fonts/SF-Pro-Display-Medium.otf +0 -0
  342. package/public/fonts/SF-Pro-Display-Regular.otf +0 -0
  343. package/public/fonts/SF-Pro-Display-Semibold.otf +0 -0
  344. package/public/fonts/SF-Pro-Text-Bold.otf +0 -0
  345. package/public/fonts/SF-Pro-Text-Heavy.otf +0 -0
  346. package/public/fonts/SF-Pro-Text-Medium.otf +0 -0
  347. package/public/fonts/SF-Pro-Text-Regular.otf +0 -0
  348. package/public/fonts/SF-Pro-Text-Semibold.otf +0 -0
  349. package/public/globe.svg +1 -0
  350. package/public/next.svg +1 -0
  351. package/public/theme-preview.html +257 -0
  352. package/public/vercel.svg +1 -0
  353. package/public/window.svg +1 -0
  354. package/run.sh +26 -0
  355. package/scripts/postinstall.js +50 -0
  356. package/skills-lock.json +10 -0
  357. package/specs/event-schema.md +223 -0
  358. package/specs/examples/run.jsonl +3 -0
  359. package/src/app/api/[...path]/route.ts +55 -0
  360. package/src/app/api/auth/token/route.ts +22 -0
  361. package/src/app/evolution/page.tsx +589 -0
  362. package/src/app/favicon.ico +0 -0
  363. package/src/app/globals.css +321 -0
  364. package/src/app/layout.tsx +63 -0
  365. package/src/app/page.tsx +70 -0
  366. package/src/app/skills/page.tsx +369 -0
  367. package/src/components/ApiConfigCard.tsx +199 -0
  368. package/src/components/ColorBends.css +1 -0
  369. package/src/components/ColorBends.d.ts +1 -0
  370. package/src/components/ColorBends.jsx +1 -0
  371. package/src/components/CoreLoopToggle.tsx +111 -0
  372. package/src/components/EnvironmentStatus.tsx +176 -0
  373. package/src/components/EvolutionBackground.tsx +1 -0
  374. package/src/components/ReactQueryProvider.tsx +24 -0
  375. package/src/components/Sidebar.tsx +247 -0
  376. package/src/components/SkillDiffViewer.tsx +154 -0
  377. package/src/components/ThemeAwareBackground.tsx +67 -0
  378. package/src/components/ThemeToggle.tsx +54 -0
  379. package/src/components/WelcomeHero.tsx +77 -0
  380. package/src/components/bits/ClickSpark.tsx +116 -0
  381. package/src/components/bits/CountUp.tsx +98 -0
  382. package/src/components/bits/DarkSelect.tsx +95 -0
  383. package/src/components/bits/DecryptedText.tsx +161 -0
  384. package/src/components/bits/ElectricBorder.tsx +184 -0
  385. package/src/components/bits/GlitchText.tsx +34 -0
  386. package/src/components/bits/ShinyText.tsx +55 -0
  387. package/src/components/bits/SpotlightCard.tsx +42 -0
  388. package/src/components/bits/TextType.tsx +95 -0
  389. package/src/components/bits/index.ts +9 -0
  390. package/src/components/pages/CuratorPage.tsx +632 -0
  391. package/src/components/pages/DatasetPage.tsx +271 -0
  392. package/src/components/pages/EvolutionPage.tsx +676 -0
  393. package/src/components/pages/FunctionCallingPage.tsx +1 -0
  394. package/src/components/pages/LogsPage.tsx +272 -0
  395. package/src/components/pages/MetricsPage.tsx +246 -0
  396. package/src/components/pages/OverviewPage.tsx +420 -0
  397. package/src/components/pages/SettingsPage.tsx +88 -0
  398. package/src/components/pages/SkillStudioPage.tsx +376 -0
  399. package/src/components/ui/animated-theme-toggler.tsx +97 -0
  400. package/src/components/ui/button.tsx +67 -0
  401. package/src/components/ui/card.tsx +103 -0
  402. package/src/components/ui/input.tsx +19 -0
  403. package/src/components/ui/separator.tsx +28 -0
  404. package/src/components/ui/sheet.tsx +147 -0
  405. package/src/components/ui/sidebar.tsx +702 -0
  406. package/src/components/ui/skeleton.tsx +13 -0
  407. package/src/components/ui/theme-toggle.tsx +272 -0
  408. package/src/components/ui/tooltip.tsx +57 -0
  409. package/src/hooks/use-mobile.ts +19 -0
  410. package/src/lib/api.ts +455 -0
  411. package/src/lib/queryClient.ts +12 -0
  412. package/src/lib/utils.ts +6 -0
  413. package/stitch/agent_dashboard/DESIGN_SPEC.md +521 -0
  414. package/stitch/agent_dashboard/prototype.html +676 -0
  415. package/stitch/curator_workspace/code.html +448 -0
  416. package/stitch/curator_workspace/screen.png +0 -0
  417. package/stitch/datasets/code.html +479 -0
  418. package/stitch/datasets/screen.png +0 -0
  419. package/stitch/evolution_history/code.html +461 -0
  420. package/stitch/evolution_history/screen.png +0 -0
  421. package/stitch/hermes_dashboard/DESIGN.md +192 -0
  422. package/stitch/hermes_dashboard/DESIGN_SPEC.md +455 -0
  423. package/stitch/hermes_overview/code.html +399 -0
  424. package/stitch/hermes_overview/screen.png +0 -0
  425. package/stitch/live_logs/code.html +324 -0
  426. package/stitch/live_logs/screen.png +0 -0
  427. package/stitch/skill_hub/code.html +596 -0
  428. package/stitch/skill_hub/screen.png +0 -0
  429. package/stitch/system_metrics/code.html +527 -0
  430. package/stitch/system_metrics/screen.png +0 -0
  431. package/stitch/system_settings/code.html +257 -0
  432. package/stitch/system_settings/screen.png +0 -0
  433. package/test_dashboard.py +201 -0
  434. package/tests/collectors/__init__.py +0 -0
  435. package/tests/collectors/fixtures/sample_session.jsonl +7 -0
  436. package/tests/collectors/test_claude_code_collector.py +171 -0
  437. package/tests/collectors/test_hermes_collector.py +167 -0
  438. package/tests/eval/test_engine.py +234 -0
  439. package/tests/eval/test_scorers.py +249 -0
  440. package/tests/storage/__init__.py +0 -0
  441. package/tests/storage/test_run_store.py +359 -0
  442. package/tests/test_curator.py +559 -0
  443. package/tests/test_e2e_npm.py +621 -0
  444. package/tests/test_mcp_server.py +114 -0
  445. package/tsconfig.json +34 -0
@@ -0,0 +1,167 @@
1
+ """Tests for HermesCollector — Phase 2 verification."""
2
+
3
+ import pytest
4
+ from backend.promethean.models import TraceRecord, CanonicalRun
5
+ from backend.collectors.hermes_collector import HermesCollector
6
+
7
+
8
+ @pytest.fixture
9
+ def collector():
10
+ """Fixture for HermesCollector."""
11
+ return HermesCollector()
12
+
13
+
14
+ @pytest.fixture
15
+ def sample_trace():
16
+ """Sample TraceRecord for testing."""
17
+ return TraceRecord(
18
+ agent="hermes",
19
+ agent_version="2.1.143",
20
+ timestamp="2026-05-19T14:32:00Z",
21
+ task="Implement user authentication",
22
+ outcome="success",
23
+ error_signature=None,
24
+ context={"skill_name": "auth-middleware"},
25
+ trace_id="hermes-001"
26
+ )
27
+
28
+
29
+ @pytest.fixture
30
+ def failed_trace():
31
+ """TraceRecord with error."""
32
+ return TraceRecord(
33
+ agent="hermes",
34
+ agent_version="2.1.143",
35
+ timestamp="2026-05-19T14:32:00Z",
36
+ task="Test task",
37
+ outcome="failure",
38
+ error_signature="TypeError: Cannot read property 'foo' of undefined",
39
+ resolution="Rolled back changes",
40
+ trace_id="hermes-002"
41
+ )
42
+
43
+
44
+ class TestHermesCollectorBasic:
45
+ """Basic HermesCollector functionality tests."""
46
+
47
+ def test_collector_instantiation(self, collector):
48
+ """Collector should instantiate without error."""
49
+ assert collector is not None
50
+ assert collector.VERSION == "0.1.0"
51
+ assert collector.AGENT_NAME == "hermes"
52
+
53
+ def test_collect_from_trace_success(self, collector, sample_trace):
54
+ """Convert successful TraceRecord to CanonicalRun."""
55
+ canonical = collector.collect_from_trace(sample_trace)
56
+
57
+ assert isinstance(canonical, CanonicalRun)
58
+ assert canonical.run_id == "hermes-001"
59
+ assert canonical.agent_name == "hermes"
60
+ assert canonical.collector == "hermes-trace-ingestor"
61
+ assert canonical.outcome == "success"
62
+ assert canonical.task_name == "Implement user authentication"
63
+ assert canonical.started_at == "2026-05-19T14:32:00Z"
64
+ assert canonical.provider == "hermes"
65
+
66
+ def test_collect_from_trace_with_error(self, collector, failed_trace):
67
+ """Convert failed TraceRecord with error_signature."""
68
+ canonical = collector.collect_from_trace(failed_trace)
69
+
70
+ assert canonical.outcome == "failure"
71
+ assert len(canonical.errors) == 1
72
+ assert canonical.errors[0]["signature"] == "TypeError: Cannot read property 'foo' of undefined"
73
+ assert canonical.resolution == "Rolled back changes"
74
+
75
+ def test_collect_from_trace_no_error(self, collector, sample_trace):
76
+ """TraceRecord without error_signature should have empty errors list."""
77
+ canonical = collector.collect_from_trace(sample_trace)
78
+
79
+ assert canonical.errors == []
80
+
81
+ def test_collect_batch(self, collector, sample_trace, failed_trace):
82
+ """Batch conversion should work."""
83
+ traces = [sample_trace, failed_trace]
84
+ canonicals = collector.collect_batch(traces)
85
+
86
+ assert len(canonicals) == 2
87
+ assert canonicals[0].run_id == "hermes-001"
88
+ assert canonicals[1].run_id == "hermes-002"
89
+ assert canonicals[0].outcome == "success"
90
+ assert canonicals[1].outcome == "failure"
91
+
92
+
93
+ class TestCanonicalRunSerialization:
94
+ """CanonicalRun serialization tests."""
95
+
96
+ def test_to_dict(self, collector, sample_trace):
97
+ """CanonicalRun.to_dict() should produce valid dict."""
98
+ canonical = collector.collect_from_trace(sample_trace)
99
+ data = canonical.to_dict()
100
+
101
+ assert isinstance(data, dict)
102
+ assert data["run_id"] == "hermes-001"
103
+ assert data["agent_name"] == "hermes"
104
+ assert "collector" in data
105
+ assert "started_at" in data
106
+
107
+ def test_to_json(self, collector, sample_trace):
108
+ """CanonicalRun.to_json() should produce valid JSON."""
109
+ import json
110
+
111
+ canonical = collector.collect_from_trace(sample_trace)
112
+ json_str = canonical.to_json()
113
+
114
+ # Should be valid JSON
115
+ parsed = json.loads(json_str)
116
+ assert parsed["run_id"] == "hermes-001"
117
+ assert parsed["agent_name"] == "hermes"
118
+
119
+ def test_from_dict_roundtrip(self, collector, sample_trace):
120
+ """CanonicalRun should round-trip through dict."""
121
+ original = collector.collect_from_trace(sample_trace)
122
+ data = original.to_dict()
123
+ restored = CanonicalRun.from_dict(data)
124
+
125
+ assert restored.run_id == original.run_id
126
+ assert restored.agent_name == original.agent_name
127
+ assert restored.outcome == original.outcome
128
+ assert restored.task_name == original.task_name
129
+
130
+ def test_context_preservation(self, collector, sample_trace):
131
+ """TraceRecord context should be preserved in CanonicalRun."""
132
+ canonical = collector.collect_from_trace(sample_trace)
133
+
134
+ assert canonical.context == {"skill_name": "auth-middleware"}
135
+
136
+ def test_minimal_trace_conversion(self, collector):
137
+ """Minimal TraceRecord (no optional fields) should convert."""
138
+ minimal = TraceRecord(
139
+ agent="hermes",
140
+ agent_version="2.0.0",
141
+ timestamp="2026-05-19T10:00:00Z",
142
+ task="Minimal task",
143
+ outcome="unknown",
144
+ trace_id="minimal-001"
145
+ )
146
+
147
+ canonical = collector.collect_from_trace(minimal)
148
+
149
+ assert canonical.run_id == "minimal-001"
150
+ assert canonical.outcome == "unknown"
151
+ assert canonical.errors == []
152
+ assert canonical.context == {}
153
+ assert canonical.resolution is None
154
+
155
+
156
+ class TestCanonicalRunRequiredFields:
157
+ """Test that required fields are always present."""
158
+
159
+ def test_required_fields_present(self, collector, sample_trace):
160
+ """All required fields must be present and non-None."""
161
+ canonical = collector.collect_from_trace(sample_trace)
162
+
163
+ required = {"run_id", "agent_name", "collector", "started_at", "task_name", "outcome"}
164
+ for field in required:
165
+ value = getattr(canonical, field)
166
+ assert value is not None, f"Required field '{field}' is None"
167
+ assert len(str(value)) > 0, f"Required field '{field}' is empty string"
@@ -0,0 +1,234 @@
1
+ """Tests for evaluation engine — Phase 5 verification."""
2
+
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ import pytest
7
+
8
+ from backend.eval.engine import EvaluationEngine
9
+ from backend.promethean.models import CanonicalRun, RunMetrics
10
+ from backend.storage import RunStore
11
+
12
+
13
+ @pytest.fixture
14
+ def temp_db():
15
+ """Temporary database for testing."""
16
+ with tempfile.TemporaryDirectory() as tmpdir:
17
+ yield Path(tmpdir) / "test.db"
18
+
19
+
20
+ @pytest.fixture
21
+ def store(temp_db):
22
+ """RunStore instance with temp database."""
23
+ return RunStore(db_path=temp_db)
24
+
25
+
26
+ @pytest.fixture
27
+ def engine(store):
28
+ """EvaluationEngine instance."""
29
+ return EvaluationEngine(store=store)
30
+
31
+
32
+ @pytest.fixture
33
+ def sample_run(store):
34
+ """Sample run stored in database."""
35
+ run = CanonicalRun(
36
+ run_id="run-001",
37
+ agent_name="hermes",
38
+ collector="hermes-trace-ingestor",
39
+ started_at="2026-05-19T10:00:00Z",
40
+ task_name="Task",
41
+ outcome="success",
42
+ metrics=RunMetrics(input_tokens=1000, output_tokens=500, tool_call_count=2),
43
+ errors=[],
44
+ )
45
+ store.upsert_run(run)
46
+ return run
47
+
48
+
49
+ @pytest.fixture
50
+ def failed_run(store):
51
+ """Failed run stored in database."""
52
+ run = CanonicalRun(
53
+ run_id="run-002",
54
+ agent_name="hermes",
55
+ collector="hermes-trace-ingestor",
56
+ started_at="2026-05-19T11:00:00Z",
57
+ task_name="Task",
58
+ outcome="failure",
59
+ metrics=RunMetrics(input_tokens=2000, output_tokens=800, tool_call_count=0),
60
+ errors=[{"signature": "Error", "message": "Failed"}],
61
+ )
62
+ store.upsert_run(run)
63
+ return run
64
+
65
+
66
+ class TestEvaluationEngine:
67
+ """EvaluationEngine tests."""
68
+
69
+ def test_engine_instantiation(self, engine):
70
+ """Engine should instantiate with default scorers."""
71
+ assert engine is not None
72
+ assert len(engine.scorers) == 5 # 5 default scorers
73
+
74
+ def test_evaluate_single_run(self, engine, sample_run):
75
+ """evaluate() should return scores from applicable scorers."""
76
+ scores = engine.evaluate(sample_run)
77
+
78
+ assert len(scores) > 0 # At least some scorers apply
79
+ assert all(hasattr(s, "score") for s in scores)
80
+ assert all(hasattr(s, "passed") for s in scores)
81
+ assert all(0.0 <= s.score <= 1.0 for s in scores)
82
+
83
+ def test_evaluate_batch(self, engine, sample_run, failed_run):
84
+ """evaluate_batch() should process multiple runs."""
85
+ runs = [sample_run, failed_run]
86
+ result = engine.evaluate_batch(runs)
87
+
88
+ assert result["total"] == 2
89
+ assert result["evaluated"] == 2
90
+ assert result["errors"] == 0
91
+
92
+ def test_get_aggregate_score(self, engine, sample_run):
93
+ """get_aggregate_score() should return weighted average."""
94
+ agg = engine.get_aggregate_score(sample_run)
95
+
96
+ assert 0.0 <= agg <= 1.0
97
+ # Success run should score > 0.5
98
+ assert agg > 0.5
99
+
100
+ def test_aggregate_score_consistency(self, engine, sample_run):
101
+ """Aggregate score should be consistent."""
102
+ agg1 = engine.get_aggregate_score(sample_run)
103
+ agg2 = engine.get_aggregate_score(sample_run)
104
+
105
+ assert agg1 == agg2
106
+
107
+ def test_detect_regression_improvement(self, engine, store):
108
+ """detect_regression() should detect improvement."""
109
+ baseline = CanonicalRun(
110
+ run_id="baseline",
111
+ agent_name="hermes",
112
+ collector="hermes-trace-ingestor",
113
+ started_at="2026-05-19T10:00:00Z",
114
+ task_name="Task",
115
+ outcome="partial",
116
+ )
117
+ evolved = CanonicalRun(
118
+ run_id="evolved",
119
+ agent_name="hermes",
120
+ collector="hermes-trace-ingestor",
121
+ started_at="2026-05-19T11:00:00Z",
122
+ task_name="Task",
123
+ outcome="success",
124
+ )
125
+ store.upsert_run(baseline)
126
+ store.upsert_run(evolved)
127
+
128
+ result = engine.detect_regression("baseline", "evolved", threshold=0.05)
129
+
130
+ assert result["improvement"] is True
131
+ assert result["regression"] is False
132
+ assert result["delta"] > 0
133
+
134
+ def test_detect_regression_failure(self, engine, store):
135
+ """detect_regression() should detect regression."""
136
+ baseline = CanonicalRun(
137
+ run_id="baseline",
138
+ agent_name="hermes",
139
+ collector="hermes-trace-ingestor",
140
+ started_at="2026-05-19T10:00:00Z",
141
+ task_name="Task",
142
+ outcome="success",
143
+ )
144
+ evolved = CanonicalRun(
145
+ run_id="evolved",
146
+ agent_name="hermes",
147
+ collector="hermes-trace-ingestor",
148
+ started_at="2026-05-19T11:00:00Z",
149
+ task_name="Task",
150
+ outcome="failure",
151
+ )
152
+ store.upsert_run(baseline)
153
+ store.upsert_run(evolved)
154
+
155
+ result = engine.detect_regression("baseline", "evolved", threshold=0.05)
156
+
157
+ assert result["regression"] is True
158
+ assert result["improvement"] is False
159
+ assert result["delta"] < 0
160
+
161
+ def test_detect_regression_neutral(self, engine, store):
162
+ """detect_regression() should detect neutral change."""
163
+ baseline = CanonicalRun(
164
+ run_id="baseline",
165
+ agent_name="hermes",
166
+ collector="hermes-trace-ingestor",
167
+ started_at="2026-05-19T10:00:00Z",
168
+ task_name="Task",
169
+ outcome="success",
170
+ )
171
+ evolved = CanonicalRun(
172
+ run_id="evolved",
173
+ agent_name="hermes",
174
+ collector="hermes-trace-ingestor",
175
+ started_at="2026-05-19T11:00:00Z",
176
+ task_name="Task",
177
+ outcome="success",
178
+ )
179
+ store.upsert_run(baseline)
180
+ store.upsert_run(evolved)
181
+
182
+ result = engine.detect_regression("baseline", "evolved", threshold=0.05)
183
+
184
+ assert result["neutral"] is True
185
+ assert result["improvement"] is False
186
+ assert result["regression"] is False
187
+
188
+ def test_detect_regression_missing_run(self, engine):
189
+ """detect_regression() should handle missing runs."""
190
+ result = engine.detect_regression("nonexistent-1", "nonexistent-2")
191
+
192
+ assert "error" in result
193
+ assert result["baseline_found"] is False
194
+ assert result["evolved_found"] is False
195
+
196
+
197
+ class TestEngineWithCustomScorers:
198
+ """Test engine with custom scorer configurations."""
199
+
200
+ def test_custom_scorers(self, store):
201
+ """Engine should work with custom scorer list."""
202
+ from backend.eval.scorers import OutcomeScorer
203
+
204
+ custom_scorers = [OutcomeScorer()]
205
+ engine = EvaluationEngine(store=store, scorers=custom_scorers)
206
+
207
+ run = CanonicalRun(
208
+ run_id="test",
209
+ agent_name="hermes",
210
+ collector="hermes-trace-ingestor",
211
+ started_at="2026-05-19T10:00:00Z",
212
+ task_name="Task",
213
+ outcome="success",
214
+ )
215
+
216
+ scores = engine.evaluate(run)
217
+ assert len(scores) == 1
218
+ assert scores[0].scorer == "outcome"
219
+
220
+ def test_empty_scorers(self, store):
221
+ """Engine with empty scorers should return empty list."""
222
+ engine = EvaluationEngine(store=store, scorers=[])
223
+
224
+ run = CanonicalRun(
225
+ run_id="test",
226
+ agent_name="hermes",
227
+ collector="hermes-trace-ingestor",
228
+ started_at="2026-05-19T10:00:00Z",
229
+ task_name="Task",
230
+ outcome="success",
231
+ )
232
+
233
+ scores = engine.evaluate(run)
234
+ assert len(scores) == 0
@@ -0,0 +1,249 @@
1
+ """Tests for evaluation scorers — Phase 5 verification."""
2
+
3
+ import pytest
4
+
5
+ from backend.eval.scorers import (
6
+ OutcomeScorer,
7
+ ToolEfficiencyScorer,
8
+ TokenCostScorer,
9
+ ErrorRecoveryScorer,
10
+ )
11
+ from backend.promethean.models import CanonicalRun, ToolCallRecord, RunMetrics
12
+
13
+
14
+ @pytest.fixture
15
+ def success_run():
16
+ """Successful run with no errors."""
17
+ return CanonicalRun(
18
+ run_id="run-success",
19
+ agent_name="hermes",
20
+ collector="hermes-trace-ingestor",
21
+ started_at="2026-05-19T10:00:00Z",
22
+ task_name="Task",
23
+ outcome="success",
24
+ errors=[],
25
+ )
26
+
27
+
28
+ @pytest.fixture
29
+ def failure_run():
30
+ """Failed run with error."""
31
+ return CanonicalRun(
32
+ run_id="run-failure",
33
+ agent_name="hermes",
34
+ collector="hermes-trace-ingestor",
35
+ started_at="2026-05-19T10:00:00Z",
36
+ task_name="Task",
37
+ outcome="failure",
38
+ errors=[{"signature": "TypeError", "message": "Type error"}],
39
+ )
40
+
41
+
42
+ @pytest.fixture
43
+ def run_with_tools():
44
+ """Run with tool calls."""
45
+ return CanonicalRun(
46
+ run_id="run-tools",
47
+ agent_name="claude-code",
48
+ collector="claude-code-session-collector",
49
+ started_at="2026-05-19T10:00:00Z",
50
+ task_name="Task",
51
+ outcome="success",
52
+ tool_calls=[
53
+ ToolCallRecord(id="1", name="Read"),
54
+ ToolCallRecord(id="2", name="Read"),
55
+ ToolCallRecord(id="3", name="Edit"),
56
+ ],
57
+ )
58
+
59
+
60
+ @pytest.fixture
61
+ def run_with_metrics():
62
+ """Run with token metrics."""
63
+ return CanonicalRun(
64
+ run_id="run-metrics",
65
+ agent_name="hermes",
66
+ collector="hermes-trace-ingestor",
67
+ started_at="2026-05-19T10:00:00Z",
68
+ task_name="Task",
69
+ outcome="success",
70
+ metrics=RunMetrics(
71
+ input_tokens=5000,
72
+ output_tokens=2000,
73
+ cache_tokens=500,
74
+ tool_call_count=0,
75
+ ),
76
+ )
77
+
78
+
79
+ class TestOutcomeScorer:
80
+ """OutcomeScorer tests."""
81
+
82
+ def test_success_outcome(self, success_run):
83
+ """Success outcome should score 1.0."""
84
+ scorer = OutcomeScorer()
85
+ score = scorer.score(success_run)
86
+
87
+ assert score.score == 1.0
88
+ assert score.passed is True
89
+ assert score.scorer == "outcome"
90
+
91
+ def test_failure_outcome(self, failure_run):
92
+ """Failure outcome should score 0.0."""
93
+ scorer = OutcomeScorer()
94
+ score = scorer.score(failure_run)
95
+
96
+ assert score.score == 0.0
97
+ assert score.passed is False
98
+
99
+ def test_partial_outcome(self, success_run):
100
+ """Partial outcome should score 0.5."""
101
+ success_run.outcome = "partial"
102
+ scorer = OutcomeScorer()
103
+ score = scorer.score(success_run)
104
+
105
+ assert score.score == 0.5
106
+ assert score.passed is False
107
+
108
+ def test_unknown_outcome(self, success_run):
109
+ """Unknown outcome should score 0.3."""
110
+ success_run.outcome = "unknown"
111
+ scorer = OutcomeScorer()
112
+ score = scorer.score(success_run)
113
+
114
+ assert score.score == 0.3
115
+ assert score.passed is False
116
+
117
+ def test_applies_to_all(self, success_run):
118
+ """OutcomeScorer applies to all runs."""
119
+ scorer = OutcomeScorer()
120
+ assert scorer.applies_to(success_run) is True
121
+
122
+
123
+ class TestToolEfficiencyScorer:
124
+ """ToolEfficiencyScorer tests."""
125
+
126
+ def test_no_tools(self, success_run):
127
+ """No tools = 1.0 score (efficient)."""
128
+ scorer = ToolEfficiencyScorer()
129
+ score = scorer.score(success_run)
130
+
131
+ assert score.score == 1.0
132
+ assert score.passed is True
133
+
134
+ def test_efficient_tools(self, run_with_tools):
135
+ """2 unique / 3 total = 0.67 ratio (pass)."""
136
+ scorer = ToolEfficiencyScorer()
137
+ score = scorer.score(run_with_tools)
138
+
139
+ assert score.score > 0.3 # Passes threshold
140
+ assert score.passed is True
141
+ assert score.details["unique_tools"] == 2
142
+ assert score.details["total_calls"] == 3
143
+
144
+ def test_applies_only_with_tools(self, success_run, run_with_tools):
145
+ """ToolEfficiencyScorer applies only with tool calls."""
146
+ scorer = ToolEfficiencyScorer()
147
+ assert scorer.applies_to(success_run) is False
148
+ assert scorer.applies_to(run_with_tools) is True
149
+
150
+
151
+ class TestTokenCostScorer:
152
+ """TokenCostScorer tests."""
153
+
154
+ def test_low_tokens(self, run_with_metrics):
155
+ """Low token count scores high."""
156
+ scorer = TokenCostScorer()
157
+ score = scorer.score(run_with_metrics)
158
+
159
+ assert score.score > 0.5
160
+ assert score.passed is True
161
+
162
+ def test_high_tokens(self, run_with_metrics):
163
+ """High token count scores low."""
164
+ run_with_metrics.metrics.input_tokens = 40000
165
+ run_with_metrics.metrics.output_tokens = 15000
166
+ scorer = TokenCostScorer()
167
+ score = scorer.score(run_with_metrics)
168
+
169
+ assert score.score < 0.3
170
+ assert score.passed is False
171
+
172
+ def test_no_metrics(self, success_run):
173
+ """No metrics = 1.0 score."""
174
+ scorer = TokenCostScorer()
175
+ score = scorer.score(success_run)
176
+
177
+ assert score.score == 1.0
178
+ assert score.passed is True
179
+
180
+ def test_applies_only_with_metrics(self, success_run, run_with_metrics):
181
+ """TokenCostScorer applies only with metrics."""
182
+ scorer = TokenCostScorer()
183
+ assert scorer.applies_to(success_run) is False
184
+ assert scorer.applies_to(run_with_metrics) is True
185
+
186
+
187
+ class TestErrorRecoveryScorer:
188
+ """ErrorRecoveryScorer tests."""
189
+
190
+ def test_success_no_errors(self, success_run):
191
+ """Success with no errors = 1.0."""
192
+ scorer = ErrorRecoveryScorer()
193
+ score = scorer.score(success_run)
194
+
195
+ assert score.score == 1.0
196
+ assert score.passed is True
197
+
198
+ def test_success_with_errors(self, success_run):
199
+ """Success with errors = 0.8."""
200
+ success_run.errors = [{"signature": "Warning"}]
201
+ scorer = ErrorRecoveryScorer()
202
+ score = scorer.score(success_run)
203
+
204
+ assert score.score == 0.8
205
+ assert score.passed is True
206
+
207
+ def test_failure(self, failure_run):
208
+ """Failure = 0.0."""
209
+ scorer = ErrorRecoveryScorer()
210
+ score = scorer.score(failure_run)
211
+
212
+ assert score.score == 0.0
213
+ assert score.passed is False
214
+
215
+ def test_applies_to_all(self, success_run):
216
+ """ErrorRecoveryScorer applies to all runs."""
217
+ scorer = ErrorRecoveryScorer()
218
+ assert scorer.applies_to(success_run) is True
219
+
220
+
221
+ class TestScorerDeterminism:
222
+ """Verify scorers are deterministic."""
223
+
224
+ def test_outcome_scorer_determinism(self, success_run):
225
+ """Same run should score identically."""
226
+ scorer = OutcomeScorer()
227
+ score1 = scorer.score(success_run)
228
+ score2 = scorer.score(success_run)
229
+
230
+ assert score1.score == score2.score
231
+ assert score1.passed == score2.passed
232
+
233
+ def test_tool_scorer_determinism(self, run_with_tools):
234
+ """Same run should score identically."""
235
+ scorer = ToolEfficiencyScorer()
236
+ score1 = scorer.score(run_with_tools)
237
+ score2 = scorer.score(run_with_tools)
238
+
239
+ assert score1.score == score2.score
240
+ assert score1.passed == score2.passed
241
+
242
+ def test_token_scorer_determinism(self, run_with_metrics):
243
+ """Same run should score identically."""
244
+ scorer = TokenCostScorer()
245
+ score1 = scorer.score(run_with_metrics)
246
+ score2 = scorer.score(run_with_metrics)
247
+
248
+ assert score1.score == score2.score
249
+ assert score1.passed == score2.passed