@useatlas/create 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (515) hide show
  1. package/README.md +231 -0
  2. package/index.ts +829 -0
  3. package/package.json +38 -0
  4. package/templates/docker/.env.example +67 -0
  5. package/templates/docker/Dockerfile +52 -0
  6. package/templates/docker/bin/__tests__/benchmark.test.ts +598 -0
  7. package/templates/docker/bin/__tests__/duckdb-ingest.test.ts +171 -0
  8. package/templates/docker/bin/__tests__/eval.test.ts +434 -0
  9. package/templates/docker/bin/__tests__/matview-partition.test.ts +615 -0
  10. package/templates/docker/bin/__tests__/multi-source.test.ts +113 -0
  11. package/templates/docker/bin/__tests__/plugin-cli.test.ts +322 -0
  12. package/templates/docker/bin/__tests__/profiler-heuristics.test.ts +608 -0
  13. package/templates/docker/bin/__tests__/query.test.ts +240 -0
  14. package/templates/docker/bin/__tests__/schema-drift.test.ts +542 -0
  15. package/templates/docker/bin/__tests__/view-yaml-generation.test.ts +146 -0
  16. package/templates/docker/bin/atlas.ts +5044 -0
  17. package/templates/docker/bin/benchmark.ts +695 -0
  18. package/templates/docker/bin/enrich.ts +559 -0
  19. package/templates/docker/bin/eval.ts +770 -0
  20. package/templates/docker/bin/smoke.ts +438 -0
  21. package/templates/docker/data/.gitkeep +0 -0
  22. package/templates/docker/data/cybersec.sql +1961 -0
  23. package/templates/docker/data/demo-semantic/catalog.yml +40 -0
  24. package/templates/docker/data/demo-semantic/entities/accounts.yml +170 -0
  25. package/templates/docker/data/demo-semantic/entities/companies.yml +207 -0
  26. package/templates/docker/data/demo-semantic/entities/people.yml +145 -0
  27. package/templates/docker/data/demo-semantic/glossary.yml +22 -0
  28. package/templates/docker/data/demo-semantic/metrics/accounts.yml +38 -0
  29. package/templates/docker/data/demo-semantic/metrics/companies.yml +89 -0
  30. package/templates/docker/data/demo.sql +373 -0
  31. package/templates/docker/data/ecommerce.sql +1690 -0
  32. package/templates/docker/data/init-demo-db.sql +8 -0
  33. package/templates/docker/docker-compose.yml +34 -0
  34. package/templates/docker/docs/deploy.md +390 -0
  35. package/templates/docker/eslint.config.mjs +18 -0
  36. package/templates/docker/gitignore +5 -0
  37. package/templates/docker/next.config.ts +9 -0
  38. package/templates/docker/package.json +59 -0
  39. package/templates/docker/postcss.config.mjs +8 -0
  40. package/templates/docker/public/.gitkeep +0 -0
  41. package/templates/docker/public/favicon.svg +4 -0
  42. package/templates/docker/railway.json +13 -0
  43. package/templates/docker/render.yaml +34 -0
  44. package/templates/docker/semantic/catalog.yml +5 -0
  45. package/templates/docker/semantic/entities/.gitkeep +0 -0
  46. package/templates/docker/semantic/glossary.yml +6 -0
  47. package/templates/docker/semantic/metrics/.gitkeep +0 -0
  48. package/templates/docker/sidecar/Dockerfile +28 -0
  49. package/templates/docker/sidecar/railway.json +14 -0
  50. package/templates/docker/sidecar/server.ts +188 -0
  51. package/templates/docker/src/api/__tests__/actions.test.ts +683 -0
  52. package/templates/docker/src/api/__tests__/admin.test.ts +820 -0
  53. package/templates/docker/src/api/__tests__/auth.test.ts +165 -0
  54. package/templates/docker/src/api/__tests__/chat.test.ts +376 -0
  55. package/templates/docker/src/api/__tests__/conversations.test.ts +555 -0
  56. package/templates/docker/src/api/__tests__/cors.test.ts +135 -0
  57. package/templates/docker/src/api/__tests__/health-plugin.test.ts +169 -0
  58. package/templates/docker/src/api/__tests__/health.test.ts +261 -0
  59. package/templates/docker/src/api/__tests__/query.test.ts +891 -0
  60. package/templates/docker/src/api/__tests__/scheduled-tasks.test.ts +601 -0
  61. package/templates/docker/src/api/__tests__/slack.test.ts +847 -0
  62. package/templates/docker/src/api/index.ts +117 -0
  63. package/templates/docker/src/api/routes/actions.ts +274 -0
  64. package/templates/docker/src/api/routes/admin.ts +757 -0
  65. package/templates/docker/src/api/routes/auth.ts +48 -0
  66. package/templates/docker/src/api/routes/chat.ts +465 -0
  67. package/templates/docker/src/api/routes/conversations.ts +266 -0
  68. package/templates/docker/src/api/routes/health.ts +287 -0
  69. package/templates/docker/src/api/routes/openapi.ts +390 -0
  70. package/templates/docker/src/api/routes/query.ts +318 -0
  71. package/templates/docker/src/api/routes/scheduled-tasks.ts +467 -0
  72. package/templates/docker/src/api/routes/slack.ts +611 -0
  73. package/templates/docker/src/api/server.ts +226 -0
  74. package/templates/docker/src/app/api/[...route]/route.ts +33 -0
  75. package/templates/docker/src/app/error.tsx +24 -0
  76. package/templates/docker/src/app/globals.css +126 -0
  77. package/templates/docker/src/app/layout.tsx +19 -0
  78. package/templates/docker/src/app/page.tsx +14 -0
  79. package/templates/docker/src/global.d.ts +1 -0
  80. package/templates/docker/src/lib/__tests__/agent-cache.test.ts +437 -0
  81. package/templates/docker/src/lib/__tests__/agent-dialect.test.ts +114 -0
  82. package/templates/docker/src/lib/__tests__/agent-health-annotations.test.ts +164 -0
  83. package/templates/docker/src/lib/__tests__/agent-integration.test.ts +514 -0
  84. package/templates/docker/src/lib/__tests__/config-actions.test.ts +166 -0
  85. package/templates/docker/src/lib/__tests__/config.test.ts +1063 -0
  86. package/templates/docker/src/lib/__tests__/conversations.test.ts +589 -0
  87. package/templates/docker/src/lib/__tests__/errors.test.ts +256 -0
  88. package/templates/docker/src/lib/__tests__/logger.test.ts +200 -0
  89. package/templates/docker/src/lib/__tests__/providers.test.ts +99 -0
  90. package/templates/docker/src/lib/__tests__/rls.test.ts +435 -0
  91. package/templates/docker/src/lib/__tests__/scheduled-task-types.test.ts +124 -0
  92. package/templates/docker/src/lib/__tests__/scheduled-tasks.test.ts +550 -0
  93. package/templates/docker/src/lib/__tests__/semantic-index.test.ts +547 -0
  94. package/templates/docker/src/lib/__tests__/semantic-multisource.test.ts +544 -0
  95. package/templates/docker/src/lib/__tests__/semantic.test.ts +363 -0
  96. package/templates/docker/src/lib/__tests__/startup-actions.test.ts +452 -0
  97. package/templates/docker/src/lib/__tests__/startup.test.ts +465 -0
  98. package/templates/docker/src/lib/__tests__/tracing.test.ts +28 -0
  99. package/templates/docker/src/lib/action-types.ts +95 -0
  100. package/templates/docker/src/lib/agent-query.ts +178 -0
  101. package/templates/docker/src/lib/agent.ts +505 -0
  102. package/templates/docker/src/lib/api-url.ts +2 -0
  103. package/templates/docker/src/lib/auth/__tests__/audit.test.ts +418 -0
  104. package/templates/docker/src/lib/auth/__tests__/byot-integration.test.ts +222 -0
  105. package/templates/docker/src/lib/auth/__tests__/byot.test.ts +366 -0
  106. package/templates/docker/src/lib/auth/__tests__/detect.test.ts +190 -0
  107. package/templates/docker/src/lib/auth/__tests__/managed.test.ts +173 -0
  108. package/templates/docker/src/lib/auth/__tests__/middleware.test.ts +456 -0
  109. package/templates/docker/src/lib/auth/__tests__/migrate.test.ts +201 -0
  110. package/templates/docker/src/lib/auth/__tests__/permissions.test.ts +225 -0
  111. package/templates/docker/src/lib/auth/__tests__/server.test.ts +34 -0
  112. package/templates/docker/src/lib/auth/__tests__/simple-key.test.ts +176 -0
  113. package/templates/docker/src/lib/auth/__tests__/types.test.ts +44 -0
  114. package/templates/docker/src/lib/auth/audit.ts +89 -0
  115. package/templates/docker/src/lib/auth/byot.ts +158 -0
  116. package/templates/docker/src/lib/auth/client.ts +35 -0
  117. package/templates/docker/src/lib/auth/detect.ts +83 -0
  118. package/templates/docker/src/lib/auth/managed.ts +73 -0
  119. package/templates/docker/src/lib/auth/middleware.ts +208 -0
  120. package/templates/docker/src/lib/auth/migrate.ts +111 -0
  121. package/templates/docker/src/lib/auth/permissions.ts +156 -0
  122. package/templates/docker/src/lib/auth/server.ts +142 -0
  123. package/templates/docker/src/lib/auth/simple-key.ts +92 -0
  124. package/templates/docker/src/lib/auth/types.ts +49 -0
  125. package/templates/docker/src/lib/config.ts +704 -0
  126. package/templates/docker/src/lib/conversation-types.ts +29 -0
  127. package/templates/docker/src/lib/conversations.ts +270 -0
  128. package/templates/docker/src/lib/db/__tests__/connection.test.ts +69 -0
  129. package/templates/docker/src/lib/db/__tests__/duckdb.test.ts +141 -0
  130. package/templates/docker/src/lib/db/__tests__/internal.test.ts +387 -0
  131. package/templates/docker/src/lib/db/__tests__/registry-health.test.ts +207 -0
  132. package/templates/docker/src/lib/db/__tests__/registry-pool-limits.test.ts +156 -0
  133. package/templates/docker/src/lib/db/__tests__/registry.test.ts +595 -0
  134. package/templates/docker/src/lib/db/__tests__/salesforce.test.ts +339 -0
  135. package/templates/docker/src/lib/db/__tests__/snowflake.test.ts +217 -0
  136. package/templates/docker/src/lib/db/__tests__/source-rate-limit.test.ts +130 -0
  137. package/templates/docker/src/lib/db/connection.ts +753 -0
  138. package/templates/docker/src/lib/db/duckdb.ts +122 -0
  139. package/templates/docker/src/lib/db/internal.ts +273 -0
  140. package/templates/docker/src/lib/db/salesforce.ts +342 -0
  141. package/templates/docker/src/lib/db/source-rate-limit.ts +191 -0
  142. package/templates/docker/src/lib/errors.ts +154 -0
  143. package/templates/docker/src/lib/logger.ts +98 -0
  144. package/templates/docker/src/lib/plugins/__tests__/hooks-integration.test.ts +202 -0
  145. package/templates/docker/src/lib/plugins/__tests__/hooks.test.ts +529 -0
  146. package/templates/docker/src/lib/plugins/__tests__/migrate.test.ts +521 -0
  147. package/templates/docker/src/lib/plugins/__tests__/registry.test.ts +346 -0
  148. package/templates/docker/src/lib/plugins/__tests__/tools.test.ts +49 -0
  149. package/templates/docker/src/lib/plugins/__tests__/wiring.test.ts +585 -0
  150. package/templates/docker/src/lib/plugins/hooks.ts +162 -0
  151. package/templates/docker/src/lib/plugins/index.ts +9 -0
  152. package/templates/docker/src/lib/plugins/migrate.ts +309 -0
  153. package/templates/docker/src/lib/plugins/registry.ts +231 -0
  154. package/templates/docker/src/lib/plugins/tools.ts +39 -0
  155. package/templates/docker/src/lib/plugins/wiring.ts +291 -0
  156. package/templates/docker/src/lib/providers.ts +102 -0
  157. package/templates/docker/src/lib/rls.ts +321 -0
  158. package/templates/docker/src/lib/scheduled-task-types.ts +132 -0
  159. package/templates/docker/src/lib/scheduled-tasks.ts +475 -0
  160. package/templates/docker/src/lib/scheduler/__tests__/delivery.test.ts +192 -0
  161. package/templates/docker/src/lib/scheduler/__tests__/engine.test.ts +248 -0
  162. package/templates/docker/src/lib/scheduler/__tests__/format-email.test.ts +96 -0
  163. package/templates/docker/src/lib/scheduler/__tests__/format-slack.test.ts +78 -0
  164. package/templates/docker/src/lib/scheduler/__tests__/format-webhook.test.ts +78 -0
  165. package/templates/docker/src/lib/scheduler/delivery.ts +248 -0
  166. package/templates/docker/src/lib/scheduler/engine.ts +317 -0
  167. package/templates/docker/src/lib/scheduler/executor.ts +73 -0
  168. package/templates/docker/src/lib/scheduler/format-email.ts +109 -0
  169. package/templates/docker/src/lib/scheduler/format-slack.ts +35 -0
  170. package/templates/docker/src/lib/scheduler/format-webhook.ts +37 -0
  171. package/templates/docker/src/lib/scheduler/index.ts +7 -0
  172. package/templates/docker/src/lib/security.ts +11 -0
  173. package/templates/docker/src/lib/semantic-index.ts +503 -0
  174. package/templates/docker/src/lib/semantic.ts +387 -0
  175. package/templates/docker/src/lib/sidecar-types.ts +16 -0
  176. package/templates/docker/src/lib/slack/__tests__/api.test.ts +160 -0
  177. package/templates/docker/src/lib/slack/__tests__/format.test.ts +237 -0
  178. package/templates/docker/src/lib/slack/__tests__/store.test.ts +188 -0
  179. package/templates/docker/src/lib/slack/__tests__/threads.test.ts +112 -0
  180. package/templates/docker/src/lib/slack/__tests__/verify.test.ts +111 -0
  181. package/templates/docker/src/lib/slack/api.ts +102 -0
  182. package/templates/docker/src/lib/slack/format.ts +209 -0
  183. package/templates/docker/src/lib/slack/store.ts +107 -0
  184. package/templates/docker/src/lib/slack/threads.ts +64 -0
  185. package/templates/docker/src/lib/slack/verify.ts +71 -0
  186. package/templates/docker/src/lib/startup.ts +730 -0
  187. package/templates/docker/src/lib/tools/__tests__/action-permissions.test.ts +594 -0
  188. package/templates/docker/src/lib/tools/__tests__/custom-validation.test.ts +238 -0
  189. package/templates/docker/src/lib/tools/__tests__/explore-backend.test.ts +267 -0
  190. package/templates/docker/src/lib/tools/__tests__/explore-nsjail.test.ts +492 -0
  191. package/templates/docker/src/lib/tools/__tests__/explore-plugin.test.ts +374 -0
  192. package/templates/docker/src/lib/tools/__tests__/explore-sdk-compat.test.ts +82 -0
  193. package/templates/docker/src/lib/tools/__tests__/explore-sidecar.test.ts +208 -0
  194. package/templates/docker/src/lib/tools/__tests__/registry-actions.test.ts +144 -0
  195. package/templates/docker/src/lib/tools/__tests__/registry.test.ts +235 -0
  196. package/templates/docker/src/lib/tools/__tests__/salesforce-tool.test.ts +154 -0
  197. package/templates/docker/src/lib/tools/__tests__/soql-validation.test.ts +303 -0
  198. package/templates/docker/src/lib/tools/__tests__/sql-audit.test.ts +225 -0
  199. package/templates/docker/src/lib/tools/__tests__/sql-connection-whitelist.test.ts +98 -0
  200. package/templates/docker/src/lib/tools/__tests__/sql-duckdb.test.ts +233 -0
  201. package/templates/docker/src/lib/tools/__tests__/sql-ratelimit.test.ts +225 -0
  202. package/templates/docker/src/lib/tools/__tests__/sql.test.ts +1012 -0
  203. package/templates/docker/src/lib/tools/actions/__tests__/audit.test.ts +211 -0
  204. package/templates/docker/src/lib/tools/actions/__tests__/email.test.ts +378 -0
  205. package/templates/docker/src/lib/tools/actions/__tests__/handler.test.ts +681 -0
  206. package/templates/docker/src/lib/tools/actions/__tests__/jira.test.ts +427 -0
  207. package/templates/docker/src/lib/tools/actions/audit.ts +47 -0
  208. package/templates/docker/src/lib/tools/actions/email.ts +191 -0
  209. package/templates/docker/src/lib/tools/actions/handler.ts +591 -0
  210. package/templates/docker/src/lib/tools/actions/index.ts +23 -0
  211. package/templates/docker/src/lib/tools/actions/jira.ts +220 -0
  212. package/templates/docker/src/lib/tools/explore-nsjail.ts +343 -0
  213. package/templates/docker/src/lib/tools/explore-sandbox.ts +264 -0
  214. package/templates/docker/src/lib/tools/explore-sidecar.ts +163 -0
  215. package/templates/docker/src/lib/tools/explore.ts +379 -0
  216. package/templates/docker/src/lib/tools/registry.ts +221 -0
  217. package/templates/docker/src/lib/tools/salesforce.ts +138 -0
  218. package/templates/docker/src/lib/tools/soql-validation.ts +172 -0
  219. package/templates/docker/src/lib/tools/sql.ts +680 -0
  220. package/templates/docker/src/lib/tracing.ts +40 -0
  221. package/templates/docker/src/lib/utils.ts +6 -0
  222. package/templates/docker/src/test-setup.ts +38 -0
  223. package/templates/docker/src/types/vercel-sandbox.d.ts +54 -0
  224. package/templates/docker/src/ui/components/actions/action-approval-card.tsx +295 -0
  225. package/templates/docker/src/ui/components/actions/action-status-badge.tsx +50 -0
  226. package/templates/docker/src/ui/components/admin/admin-layout.tsx +26 -0
  227. package/templates/docker/src/ui/components/admin/admin-sidebar.tsx +96 -0
  228. package/templates/docker/src/ui/components/admin/empty-state.tsx +24 -0
  229. package/templates/docker/src/ui/components/admin/entity-detail.tsx +233 -0
  230. package/templates/docker/src/ui/components/admin/entity-list.tsx +96 -0
  231. package/templates/docker/src/ui/components/admin/error-banner.tsx +22 -0
  232. package/templates/docker/src/ui/components/admin/feature-disabled.tsx +44 -0
  233. package/templates/docker/src/ui/components/admin/health-badge.tsx +30 -0
  234. package/templates/docker/src/ui/components/admin/loading-state.tsx +14 -0
  235. package/templates/docker/src/ui/components/admin/stat-card.tsx +32 -0
  236. package/templates/docker/src/ui/components/atlas-chat.tsx +370 -0
  237. package/templates/docker/src/ui/components/chart/chart-detection.ts +261 -0
  238. package/templates/docker/src/ui/components/chart/result-chart.tsx +375 -0
  239. package/templates/docker/src/ui/components/chat/api-key-bar.tsx +66 -0
  240. package/templates/docker/src/ui/components/chat/copy-button.tsx +25 -0
  241. package/templates/docker/src/ui/components/chat/data-table.tsx +102 -0
  242. package/templates/docker/src/ui/components/chat/error-banner.tsx +32 -0
  243. package/templates/docker/src/ui/components/chat/explore-card.tsx +41 -0
  244. package/templates/docker/src/ui/components/chat/loading-card.tsx +10 -0
  245. package/templates/docker/src/ui/components/chat/managed-auth-card.tsx +116 -0
  246. package/templates/docker/src/ui/components/chat/markdown.tsx +72 -0
  247. package/templates/docker/src/ui/components/chat/sql-block.tsx +30 -0
  248. package/templates/docker/src/ui/components/chat/sql-result-card.tsx +144 -0
  249. package/templates/docker/src/ui/components/chat/starter-prompts.ts +6 -0
  250. package/templates/docker/src/ui/components/chat/tool-part.tsx +40 -0
  251. package/templates/docker/src/ui/components/chat/typing-indicator.tsx +19 -0
  252. package/templates/docker/src/ui/components/conversations/conversation-item.tsx +120 -0
  253. package/templates/docker/src/ui/components/conversations/conversation-list.tsx +66 -0
  254. package/templates/docker/src/ui/components/conversations/conversation-sidebar.tsx +78 -0
  255. package/templates/docker/src/ui/components/conversations/delete-confirmation.tsx +27 -0
  256. package/templates/docker/src/ui/context.tsx +78 -0
  257. package/templates/docker/src/ui/hooks/use-admin-fetch.ts +104 -0
  258. package/templates/docker/src/ui/hooks/use-conversations.ts +184 -0
  259. package/templates/docker/src/ui/hooks/use-dark-mode.ts +17 -0
  260. package/templates/docker/src/ui/lib/action-types.ts +63 -0
  261. package/templates/docker/src/ui/lib/helpers.ts +104 -0
  262. package/templates/docker/src/ui/lib/types.ts +145 -0
  263. package/templates/docker/tsconfig.json +41 -0
  264. package/templates/docker/vercel.json +3 -0
  265. package/templates/nextjs-standalone/.env.example +68 -0
  266. package/templates/nextjs-standalone/bin/__tests__/benchmark.test.ts +598 -0
  267. package/templates/nextjs-standalone/bin/__tests__/duckdb-ingest.test.ts +171 -0
  268. package/templates/nextjs-standalone/bin/__tests__/eval.test.ts +434 -0
  269. package/templates/nextjs-standalone/bin/__tests__/matview-partition.test.ts +615 -0
  270. package/templates/nextjs-standalone/bin/__tests__/multi-source.test.ts +113 -0
  271. package/templates/nextjs-standalone/bin/__tests__/plugin-cli.test.ts +322 -0
  272. package/templates/nextjs-standalone/bin/__tests__/profiler-heuristics.test.ts +608 -0
  273. package/templates/nextjs-standalone/bin/__tests__/query.test.ts +240 -0
  274. package/templates/nextjs-standalone/bin/__tests__/schema-drift.test.ts +542 -0
  275. package/templates/nextjs-standalone/bin/__tests__/view-yaml-generation.test.ts +146 -0
  276. package/templates/nextjs-standalone/bin/atlas.ts +5044 -0
  277. package/templates/nextjs-standalone/bin/benchmark.ts +695 -0
  278. package/templates/nextjs-standalone/bin/enrich.ts +559 -0
  279. package/templates/nextjs-standalone/bin/eval.ts +770 -0
  280. package/templates/nextjs-standalone/bin/smoke.ts +438 -0
  281. package/templates/nextjs-standalone/data/.gitkeep +0 -0
  282. package/templates/nextjs-standalone/data/cybersec.sql +1961 -0
  283. package/templates/nextjs-standalone/data/demo-semantic/catalog.yml +40 -0
  284. package/templates/nextjs-standalone/data/demo-semantic/entities/accounts.yml +170 -0
  285. package/templates/nextjs-standalone/data/demo-semantic/entities/companies.yml +207 -0
  286. package/templates/nextjs-standalone/data/demo-semantic/entities/people.yml +145 -0
  287. package/templates/nextjs-standalone/data/demo-semantic/glossary.yml +22 -0
  288. package/templates/nextjs-standalone/data/demo-semantic/metrics/accounts.yml +38 -0
  289. package/templates/nextjs-standalone/data/demo-semantic/metrics/companies.yml +89 -0
  290. package/templates/nextjs-standalone/data/demo.sql +373 -0
  291. package/templates/nextjs-standalone/data/ecommerce.sql +1690 -0
  292. package/templates/nextjs-standalone/data/init-demo-db.sql +8 -0
  293. package/templates/nextjs-standalone/docs/deploy.md +390 -0
  294. package/templates/nextjs-standalone/eslint.config.mjs +18 -0
  295. package/templates/nextjs-standalone/gitignore +5 -0
  296. package/templates/nextjs-standalone/next.config.ts +10 -0
  297. package/templates/nextjs-standalone/package.json +63 -0
  298. package/templates/nextjs-standalone/postcss.config.mjs +8 -0
  299. package/templates/nextjs-standalone/semantic/catalog.yml +5 -0
  300. package/templates/nextjs-standalone/semantic/entities/.gitkeep +0 -0
  301. package/templates/nextjs-standalone/semantic/glossary.yml +6 -0
  302. package/templates/nextjs-standalone/semantic/metrics/.gitkeep +0 -0
  303. package/templates/nextjs-standalone/src/api/__tests__/actions.test.ts +683 -0
  304. package/templates/nextjs-standalone/src/api/__tests__/admin.test.ts +820 -0
  305. package/templates/nextjs-standalone/src/api/__tests__/auth.test.ts +165 -0
  306. package/templates/nextjs-standalone/src/api/__tests__/chat.test.ts +376 -0
  307. package/templates/nextjs-standalone/src/api/__tests__/conversations.test.ts +555 -0
  308. package/templates/nextjs-standalone/src/api/__tests__/cors.test.ts +135 -0
  309. package/templates/nextjs-standalone/src/api/__tests__/health-plugin.test.ts +169 -0
  310. package/templates/nextjs-standalone/src/api/__tests__/health.test.ts +261 -0
  311. package/templates/nextjs-standalone/src/api/__tests__/query.test.ts +891 -0
  312. package/templates/nextjs-standalone/src/api/__tests__/scheduled-tasks.test.ts +601 -0
  313. package/templates/nextjs-standalone/src/api/__tests__/slack.test.ts +847 -0
  314. package/templates/nextjs-standalone/src/api/index.ts +117 -0
  315. package/templates/nextjs-standalone/src/api/routes/actions.ts +274 -0
  316. package/templates/nextjs-standalone/src/api/routes/admin.ts +757 -0
  317. package/templates/nextjs-standalone/src/api/routes/auth.ts +48 -0
  318. package/templates/nextjs-standalone/src/api/routes/chat.ts +465 -0
  319. package/templates/nextjs-standalone/src/api/routes/conversations.ts +266 -0
  320. package/templates/nextjs-standalone/src/api/routes/health.ts +287 -0
  321. package/templates/nextjs-standalone/src/api/routes/openapi.ts +390 -0
  322. package/templates/nextjs-standalone/src/api/routes/query.ts +318 -0
  323. package/templates/nextjs-standalone/src/api/routes/scheduled-tasks.ts +467 -0
  324. package/templates/nextjs-standalone/src/api/routes/slack.ts +611 -0
  325. package/templates/nextjs-standalone/src/api/server.ts +226 -0
  326. package/templates/nextjs-standalone/src/app/api/[...route]/route.ts +33 -0
  327. package/templates/nextjs-standalone/src/app/error.tsx +24 -0
  328. package/templates/nextjs-standalone/src/app/global-error.tsx +68 -0
  329. package/templates/nextjs-standalone/src/app/globals.css +126 -0
  330. package/templates/nextjs-standalone/src/app/layout.tsx +19 -0
  331. package/templates/nextjs-standalone/src/app/page.tsx +14 -0
  332. package/templates/nextjs-standalone/src/lib/__tests__/agent-cache.test.ts +437 -0
  333. package/templates/nextjs-standalone/src/lib/__tests__/agent-dialect.test.ts +114 -0
  334. package/templates/nextjs-standalone/src/lib/__tests__/agent-health-annotations.test.ts +164 -0
  335. package/templates/nextjs-standalone/src/lib/__tests__/agent-integration.test.ts +514 -0
  336. package/templates/nextjs-standalone/src/lib/__tests__/config-actions.test.ts +166 -0
  337. package/templates/nextjs-standalone/src/lib/__tests__/config.test.ts +1063 -0
  338. package/templates/nextjs-standalone/src/lib/__tests__/conversations.test.ts +589 -0
  339. package/templates/nextjs-standalone/src/lib/__tests__/errors.test.ts +256 -0
  340. package/templates/nextjs-standalone/src/lib/__tests__/logger.test.ts +200 -0
  341. package/templates/nextjs-standalone/src/lib/__tests__/providers.test.ts +99 -0
  342. package/templates/nextjs-standalone/src/lib/__tests__/rls.test.ts +435 -0
  343. package/templates/nextjs-standalone/src/lib/__tests__/scheduled-task-types.test.ts +124 -0
  344. package/templates/nextjs-standalone/src/lib/__tests__/scheduled-tasks.test.ts +550 -0
  345. package/templates/nextjs-standalone/src/lib/__tests__/semantic-index.test.ts +547 -0
  346. package/templates/nextjs-standalone/src/lib/__tests__/semantic-multisource.test.ts +544 -0
  347. package/templates/nextjs-standalone/src/lib/__tests__/semantic.test.ts +363 -0
  348. package/templates/nextjs-standalone/src/lib/__tests__/startup-actions.test.ts +452 -0
  349. package/templates/nextjs-standalone/src/lib/__tests__/startup.test.ts +465 -0
  350. package/templates/nextjs-standalone/src/lib/__tests__/tracing.test.ts +28 -0
  351. package/templates/nextjs-standalone/src/lib/action-types.ts +95 -0
  352. package/templates/nextjs-standalone/src/lib/agent-query.ts +178 -0
  353. package/templates/nextjs-standalone/src/lib/agent.ts +505 -0
  354. package/templates/nextjs-standalone/src/lib/api-url.ts +3 -0
  355. package/templates/nextjs-standalone/src/lib/auth/__tests__/audit.test.ts +418 -0
  356. package/templates/nextjs-standalone/src/lib/auth/__tests__/byot-integration.test.ts +222 -0
  357. package/templates/nextjs-standalone/src/lib/auth/__tests__/byot.test.ts +366 -0
  358. package/templates/nextjs-standalone/src/lib/auth/__tests__/detect.test.ts +190 -0
  359. package/templates/nextjs-standalone/src/lib/auth/__tests__/managed.test.ts +173 -0
  360. package/templates/nextjs-standalone/src/lib/auth/__tests__/middleware.test.ts +456 -0
  361. package/templates/nextjs-standalone/src/lib/auth/__tests__/migrate.test.ts +201 -0
  362. package/templates/nextjs-standalone/src/lib/auth/__tests__/permissions.test.ts +225 -0
  363. package/templates/nextjs-standalone/src/lib/auth/__tests__/server.test.ts +34 -0
  364. package/templates/nextjs-standalone/src/lib/auth/__tests__/simple-key.test.ts +176 -0
  365. package/templates/nextjs-standalone/src/lib/auth/__tests__/types.test.ts +44 -0
  366. package/templates/nextjs-standalone/src/lib/auth/audit.ts +89 -0
  367. package/templates/nextjs-standalone/src/lib/auth/byot.ts +158 -0
  368. package/templates/nextjs-standalone/src/lib/auth/client.ts +23 -0
  369. package/templates/nextjs-standalone/src/lib/auth/detect.ts +83 -0
  370. package/templates/nextjs-standalone/src/lib/auth/managed.ts +73 -0
  371. package/templates/nextjs-standalone/src/lib/auth/middleware.ts +208 -0
  372. package/templates/nextjs-standalone/src/lib/auth/migrate.ts +111 -0
  373. package/templates/nextjs-standalone/src/lib/auth/permissions.ts +156 -0
  374. package/templates/nextjs-standalone/src/lib/auth/server.ts +142 -0
  375. package/templates/nextjs-standalone/src/lib/auth/simple-key.ts +92 -0
  376. package/templates/nextjs-standalone/src/lib/auth/types.ts +49 -0
  377. package/templates/nextjs-standalone/src/lib/config.ts +704 -0
  378. package/templates/nextjs-standalone/src/lib/conversation-types.ts +29 -0
  379. package/templates/nextjs-standalone/src/lib/conversations.ts +270 -0
  380. package/templates/nextjs-standalone/src/lib/db/__tests__/connection.test.ts +69 -0
  381. package/templates/nextjs-standalone/src/lib/db/__tests__/duckdb.test.ts +141 -0
  382. package/templates/nextjs-standalone/src/lib/db/__tests__/internal.test.ts +387 -0
  383. package/templates/nextjs-standalone/src/lib/db/__tests__/registry-health.test.ts +207 -0
  384. package/templates/nextjs-standalone/src/lib/db/__tests__/registry-pool-limits.test.ts +156 -0
  385. package/templates/nextjs-standalone/src/lib/db/__tests__/registry.test.ts +595 -0
  386. package/templates/nextjs-standalone/src/lib/db/__tests__/salesforce.test.ts +339 -0
  387. package/templates/nextjs-standalone/src/lib/db/__tests__/snowflake.test.ts +217 -0
  388. package/templates/nextjs-standalone/src/lib/db/__tests__/source-rate-limit.test.ts +130 -0
  389. package/templates/nextjs-standalone/src/lib/db/connection.ts +753 -0
  390. package/templates/nextjs-standalone/src/lib/db/duckdb.ts +122 -0
  391. package/templates/nextjs-standalone/src/lib/db/internal.ts +273 -0
  392. package/templates/nextjs-standalone/src/lib/db/salesforce.ts +342 -0
  393. package/templates/nextjs-standalone/src/lib/db/source-rate-limit.ts +191 -0
  394. package/templates/nextjs-standalone/src/lib/errors.ts +154 -0
  395. package/templates/nextjs-standalone/src/lib/logger.ts +98 -0
  396. package/templates/nextjs-standalone/src/lib/plugins/__tests__/hooks-integration.test.ts +202 -0
  397. package/templates/nextjs-standalone/src/lib/plugins/__tests__/hooks.test.ts +529 -0
  398. package/templates/nextjs-standalone/src/lib/plugins/__tests__/migrate.test.ts +521 -0
  399. package/templates/nextjs-standalone/src/lib/plugins/__tests__/registry.test.ts +346 -0
  400. package/templates/nextjs-standalone/src/lib/plugins/__tests__/tools.test.ts +49 -0
  401. package/templates/nextjs-standalone/src/lib/plugins/__tests__/wiring.test.ts +585 -0
  402. package/templates/nextjs-standalone/src/lib/plugins/hooks.ts +162 -0
  403. package/templates/nextjs-standalone/src/lib/plugins/index.ts +9 -0
  404. package/templates/nextjs-standalone/src/lib/plugins/migrate.ts +309 -0
  405. package/templates/nextjs-standalone/src/lib/plugins/registry.ts +231 -0
  406. package/templates/nextjs-standalone/src/lib/plugins/tools.ts +39 -0
  407. package/templates/nextjs-standalone/src/lib/plugins/wiring.ts +291 -0
  408. package/templates/nextjs-standalone/src/lib/providers.ts +102 -0
  409. package/templates/nextjs-standalone/src/lib/rls.ts +321 -0
  410. package/templates/nextjs-standalone/src/lib/scheduled-task-types.ts +132 -0
  411. package/templates/nextjs-standalone/src/lib/scheduled-tasks.ts +475 -0
  412. package/templates/nextjs-standalone/src/lib/scheduler/__tests__/delivery.test.ts +192 -0
  413. package/templates/nextjs-standalone/src/lib/scheduler/__tests__/engine.test.ts +248 -0
  414. package/templates/nextjs-standalone/src/lib/scheduler/__tests__/format-email.test.ts +96 -0
  415. package/templates/nextjs-standalone/src/lib/scheduler/__tests__/format-slack.test.ts +78 -0
  416. package/templates/nextjs-standalone/src/lib/scheduler/__tests__/format-webhook.test.ts +78 -0
  417. package/templates/nextjs-standalone/src/lib/scheduler/delivery.ts +248 -0
  418. package/templates/nextjs-standalone/src/lib/scheduler/engine.ts +317 -0
  419. package/templates/nextjs-standalone/src/lib/scheduler/executor.ts +73 -0
  420. package/templates/nextjs-standalone/src/lib/scheduler/format-email.ts +109 -0
  421. package/templates/nextjs-standalone/src/lib/scheduler/format-slack.ts +35 -0
  422. package/templates/nextjs-standalone/src/lib/scheduler/format-webhook.ts +37 -0
  423. package/templates/nextjs-standalone/src/lib/scheduler/index.ts +7 -0
  424. package/templates/nextjs-standalone/src/lib/security.ts +11 -0
  425. package/templates/nextjs-standalone/src/lib/semantic-index.ts +503 -0
  426. package/templates/nextjs-standalone/src/lib/semantic.ts +387 -0
  427. package/templates/nextjs-standalone/src/lib/sidecar-types.ts +16 -0
  428. package/templates/nextjs-standalone/src/lib/slack/__tests__/api.test.ts +160 -0
  429. package/templates/nextjs-standalone/src/lib/slack/__tests__/format.test.ts +237 -0
  430. package/templates/nextjs-standalone/src/lib/slack/__tests__/store.test.ts +188 -0
  431. package/templates/nextjs-standalone/src/lib/slack/__tests__/threads.test.ts +112 -0
  432. package/templates/nextjs-standalone/src/lib/slack/__tests__/verify.test.ts +111 -0
  433. package/templates/nextjs-standalone/src/lib/slack/api.ts +102 -0
  434. package/templates/nextjs-standalone/src/lib/slack/format.ts +209 -0
  435. package/templates/nextjs-standalone/src/lib/slack/store.ts +107 -0
  436. package/templates/nextjs-standalone/src/lib/slack/threads.ts +64 -0
  437. package/templates/nextjs-standalone/src/lib/slack/verify.ts +71 -0
  438. package/templates/nextjs-standalone/src/lib/startup.ts +730 -0
  439. package/templates/nextjs-standalone/src/lib/tools/__tests__/action-permissions.test.ts +594 -0
  440. package/templates/nextjs-standalone/src/lib/tools/__tests__/custom-validation.test.ts +238 -0
  441. package/templates/nextjs-standalone/src/lib/tools/__tests__/explore-backend.test.ts +267 -0
  442. package/templates/nextjs-standalone/src/lib/tools/__tests__/explore-nsjail.test.ts +492 -0
  443. package/templates/nextjs-standalone/src/lib/tools/__tests__/explore-plugin.test.ts +374 -0
  444. package/templates/nextjs-standalone/src/lib/tools/__tests__/explore-sdk-compat.test.ts +82 -0
  445. package/templates/nextjs-standalone/src/lib/tools/__tests__/explore-sidecar.test.ts +208 -0
  446. package/templates/nextjs-standalone/src/lib/tools/__tests__/registry-actions.test.ts +144 -0
  447. package/templates/nextjs-standalone/src/lib/tools/__tests__/registry.test.ts +235 -0
  448. package/templates/nextjs-standalone/src/lib/tools/__tests__/salesforce-tool.test.ts +154 -0
  449. package/templates/nextjs-standalone/src/lib/tools/__tests__/soql-validation.test.ts +303 -0
  450. package/templates/nextjs-standalone/src/lib/tools/__tests__/sql-audit.test.ts +225 -0
  451. package/templates/nextjs-standalone/src/lib/tools/__tests__/sql-connection-whitelist.test.ts +98 -0
  452. package/templates/nextjs-standalone/src/lib/tools/__tests__/sql-duckdb.test.ts +233 -0
  453. package/templates/nextjs-standalone/src/lib/tools/__tests__/sql-ratelimit.test.ts +225 -0
  454. package/templates/nextjs-standalone/src/lib/tools/__tests__/sql.test.ts +1012 -0
  455. package/templates/nextjs-standalone/src/lib/tools/actions/__tests__/audit.test.ts +211 -0
  456. package/templates/nextjs-standalone/src/lib/tools/actions/__tests__/email.test.ts +378 -0
  457. package/templates/nextjs-standalone/src/lib/tools/actions/__tests__/handler.test.ts +681 -0
  458. package/templates/nextjs-standalone/src/lib/tools/actions/__tests__/jira.test.ts +427 -0
  459. package/templates/nextjs-standalone/src/lib/tools/actions/audit.ts +47 -0
  460. package/templates/nextjs-standalone/src/lib/tools/actions/email.ts +191 -0
  461. package/templates/nextjs-standalone/src/lib/tools/actions/handler.ts +591 -0
  462. package/templates/nextjs-standalone/src/lib/tools/actions/index.ts +23 -0
  463. package/templates/nextjs-standalone/src/lib/tools/actions/jira.ts +220 -0
  464. package/templates/nextjs-standalone/src/lib/tools/explore-nsjail.ts +343 -0
  465. package/templates/nextjs-standalone/src/lib/tools/explore-sandbox.ts +264 -0
  466. package/templates/nextjs-standalone/src/lib/tools/explore-sidecar.ts +163 -0
  467. package/templates/nextjs-standalone/src/lib/tools/explore.ts +379 -0
  468. package/templates/nextjs-standalone/src/lib/tools/registry.ts +221 -0
  469. package/templates/nextjs-standalone/src/lib/tools/salesforce.ts +138 -0
  470. package/templates/nextjs-standalone/src/lib/tools/soql-validation.ts +172 -0
  471. package/templates/nextjs-standalone/src/lib/tools/sql.ts +680 -0
  472. package/templates/nextjs-standalone/src/lib/tracing.ts +40 -0
  473. package/templates/nextjs-standalone/src/lib/utils.ts +6 -0
  474. package/templates/nextjs-standalone/src/test-setup.ts +38 -0
  475. package/templates/nextjs-standalone/src/ui/components/actions/action-approval-card.tsx +295 -0
  476. package/templates/nextjs-standalone/src/ui/components/actions/action-status-badge.tsx +50 -0
  477. package/templates/nextjs-standalone/src/ui/components/admin/admin-layout.tsx +26 -0
  478. package/templates/nextjs-standalone/src/ui/components/admin/admin-sidebar.tsx +96 -0
  479. package/templates/nextjs-standalone/src/ui/components/admin/empty-state.tsx +24 -0
  480. package/templates/nextjs-standalone/src/ui/components/admin/entity-detail.tsx +233 -0
  481. package/templates/nextjs-standalone/src/ui/components/admin/entity-list.tsx +96 -0
  482. package/templates/nextjs-standalone/src/ui/components/admin/error-banner.tsx +22 -0
  483. package/templates/nextjs-standalone/src/ui/components/admin/feature-disabled.tsx +44 -0
  484. package/templates/nextjs-standalone/src/ui/components/admin/health-badge.tsx +30 -0
  485. package/templates/nextjs-standalone/src/ui/components/admin/loading-state.tsx +14 -0
  486. package/templates/nextjs-standalone/src/ui/components/admin/stat-card.tsx +32 -0
  487. package/templates/nextjs-standalone/src/ui/components/atlas-chat.tsx +370 -0
  488. package/templates/nextjs-standalone/src/ui/components/chart/chart-detection.ts +261 -0
  489. package/templates/nextjs-standalone/src/ui/components/chart/result-chart.tsx +375 -0
  490. package/templates/nextjs-standalone/src/ui/components/chat/api-key-bar.tsx +66 -0
  491. package/templates/nextjs-standalone/src/ui/components/chat/copy-button.tsx +25 -0
  492. package/templates/nextjs-standalone/src/ui/components/chat/data-table.tsx +102 -0
  493. package/templates/nextjs-standalone/src/ui/components/chat/error-banner.tsx +32 -0
  494. package/templates/nextjs-standalone/src/ui/components/chat/explore-card.tsx +41 -0
  495. package/templates/nextjs-standalone/src/ui/components/chat/loading-card.tsx +10 -0
  496. package/templates/nextjs-standalone/src/ui/components/chat/managed-auth-card.tsx +116 -0
  497. package/templates/nextjs-standalone/src/ui/components/chat/markdown.tsx +72 -0
  498. package/templates/nextjs-standalone/src/ui/components/chat/sql-block.tsx +30 -0
  499. package/templates/nextjs-standalone/src/ui/components/chat/sql-result-card.tsx +144 -0
  500. package/templates/nextjs-standalone/src/ui/components/chat/starter-prompts.ts +6 -0
  501. package/templates/nextjs-standalone/src/ui/components/chat/tool-part.tsx +40 -0
  502. package/templates/nextjs-standalone/src/ui/components/chat/typing-indicator.tsx +19 -0
  503. package/templates/nextjs-standalone/src/ui/components/conversations/conversation-item.tsx +120 -0
  504. package/templates/nextjs-standalone/src/ui/components/conversations/conversation-list.tsx +66 -0
  505. package/templates/nextjs-standalone/src/ui/components/conversations/conversation-sidebar.tsx +78 -0
  506. package/templates/nextjs-standalone/src/ui/components/conversations/delete-confirmation.tsx +27 -0
  507. package/templates/nextjs-standalone/src/ui/context.tsx +78 -0
  508. package/templates/nextjs-standalone/src/ui/hooks/use-admin-fetch.ts +104 -0
  509. package/templates/nextjs-standalone/src/ui/hooks/use-conversations.ts +184 -0
  510. package/templates/nextjs-standalone/src/ui/hooks/use-dark-mode.ts +17 -0
  511. package/templates/nextjs-standalone/src/ui/lib/action-types.ts +63 -0
  512. package/templates/nextjs-standalone/src/ui/lib/helpers.ts +104 -0
  513. package/templates/nextjs-standalone/src/ui/lib/types.ts +145 -0
  514. package/templates/nextjs-standalone/tsconfig.json +32 -0
  515. package/templates/nextjs-standalone/vercel.json +4 -0
@@ -0,0 +1,770 @@
1
+ /**
2
+ * Atlas eval pipeline — run curated YAML eval cases against demo Postgres schemas,
3
+ * compare agent output against gold SQL, and detect regressions.
4
+ *
5
+ * Usage:
6
+ * bun run atlas -- eval # Run all cases
7
+ * bun run atlas -- eval --schema cybersec # Filter by schema
8
+ * bun run atlas -- eval --category aggregation # Filter by category
9
+ * bun run atlas -- eval --difficulty simple # Filter by difficulty
10
+ * bun run atlas -- eval --id cs-001 # Single case
11
+ * bun run atlas -- eval --limit 5 # Max N cases
12
+ * bun run atlas -- eval --resume results.jsonl # Resume from JSONL
13
+ * bun run atlas -- eval --baseline # Save results as new baseline
14
+ * bun run atlas -- eval --compare <file.jsonl> # Diff against baseline (exit 1 on regression)
15
+ * bun run atlas -- eval --csv # CSV output
16
+ * bun run atlas -- eval --json # JSON summary output
17
+ */
18
+
19
+ import * as fs from "fs";
20
+ import * as path from "path";
21
+ import * as yaml from "js-yaml";
22
+ import { getFlag, seedDemoPostgres, type DemoDataset } from "./atlas";
23
+ import { explainMismatch } from "../lib/compare";
24
+ import { connections } from "@atlas/api/lib/db/connection";
25
+ import { _resetWhitelists } from "@atlas/api/lib/semantic";
26
+ import { invalidateExploreBackend } from "@atlas/api/lib/tools/explore";
27
+
28
+ // --- Types ---
29
+
30
+ export interface EvalCase {
31
+ id: string;
32
+ question: string;
33
+ schema: DemoDataset;
34
+ difficulty: "simple" | "medium" | "complex";
35
+ category: string;
36
+ tags: string[];
37
+ gold_sql: string;
38
+ skip?: boolean;
39
+ expected_rows?: number;
40
+ notes?: string;
41
+ }
42
+
43
+ export interface EvalResult {
44
+ id: string;
45
+ schema: string;
46
+ question: string;
47
+ category: string;
48
+ difficulty: string;
49
+ tags: string[];
50
+ gold_sql: string;
51
+ predicted_sql: string | null;
52
+ match: boolean;
53
+ error: string | null;
54
+ latency_ms: number;
55
+ tokens: number;
56
+ steps: number;
57
+ }
58
+
59
+ interface EvalSummary {
60
+ total: number;
61
+ correct: number;
62
+ errors: number;
63
+ accuracy: number;
64
+ bySchema: Map<string, { total: number; correct: number }>;
65
+ byCategory: Map<string, { total: number; correct: number }>;
66
+ byDifficulty: Map<string, { total: number; correct: number }>;
67
+ totalTokens: number;
68
+ totalLatencyMs: number;
69
+ avgLatencyMs: number;
70
+ }
71
+
72
+ interface RegressionReport {
73
+ regressions: EvalResult[];
74
+ newPasses: EvalResult[];
75
+ newCases: EvalResult[];
76
+ stable: number;
77
+ }
78
+
79
+ // --- YAML case loading ---
80
+
81
+ const EVAL_DIR = path.resolve("eval");
82
+ const CASES_DIR = path.join(EVAL_DIR, "cases");
83
+ const SCHEMAS_DIR = path.join(EVAL_DIR, "schemas");
84
+ const BASELINES_DIR = path.join(EVAL_DIR, "baselines");
85
+ const SEMANTIC_DIR = path.resolve("semantic");
86
+ const BACKUP_DIR = path.resolve(".semantic-backup-eval");
87
+
88
+ const REQUIRED_CASE_FIELDS = ["id", "question", "schema", "difficulty", "category", "gold_sql"] as const;
89
+ const VALID_DIFFICULTIES = ["simple", "medium", "complex"] as const;
90
+ const VALID_SCHEMAS: DemoDataset[] = ["simple", "cybersec", "ecommerce"];
91
+
92
+ export function loadEvalCases(casesDir: string = CASES_DIR): EvalCase[] {
93
+ if (!fs.existsSync(casesDir)) {
94
+ throw new Error(`Eval cases directory not found: ${casesDir}`);
95
+ }
96
+
97
+ const cases: EvalCase[] = [];
98
+ const seenIds = new Set<string>();
99
+ const schemaDirs = fs.readdirSync(casesDir, { withFileTypes: true })
100
+ .filter(d => d.isDirectory())
101
+ .map(d => d.name);
102
+
103
+ for (const schemaDir of schemaDirs) {
104
+ const dirPath = path.join(casesDir, schemaDir);
105
+ const files = fs.readdirSync(dirPath).filter(f => f.endsWith(".yml") || f.endsWith(".yaml"));
106
+
107
+ for (const file of files) {
108
+ const filePath = path.join(dirPath, file);
109
+ const content = fs.readFileSync(filePath, "utf-8");
110
+ const doc = yaml.load(content) as Record<string, unknown>;
111
+
112
+ validateCase(doc, filePath);
113
+
114
+ const caseId = doc.id as string;
115
+ if (seenIds.has(caseId)) {
116
+ throw new Error(`Duplicate eval case id "${caseId}" in ${filePath}`);
117
+ }
118
+ seenIds.add(caseId);
119
+
120
+ cases.push({
121
+ id: doc.id as string,
122
+ question: doc.question as string,
123
+ schema: doc.schema as DemoDataset,
124
+ difficulty: doc.difficulty as EvalCase["difficulty"],
125
+ category: doc.category as string,
126
+ tags: (doc.tags as string[]) ?? [],
127
+ gold_sql: (doc.gold_sql as string).trim(),
128
+ skip: doc.skip as boolean | undefined,
129
+ expected_rows: doc.expected_rows as number | undefined,
130
+ notes: doc.notes as string | undefined,
131
+ });
132
+ }
133
+ }
134
+
135
+ return cases;
136
+ }
137
+
138
+ export function validateCase(doc: Record<string, unknown>, filePath: string): void {
139
+ for (const field of REQUIRED_CASE_FIELDS) {
140
+ if (!doc[field]) {
141
+ throw new Error(`Missing required field "${field}" in ${filePath}`);
142
+ }
143
+ }
144
+
145
+ if (!VALID_SCHEMAS.includes(doc.schema as DemoDataset)) {
146
+ throw new Error(`Invalid schema "${doc.schema}" in ${filePath}. Valid: ${VALID_SCHEMAS.join(", ")}`);
147
+ }
148
+
149
+ if (!VALID_DIFFICULTIES.includes(doc.difficulty as typeof VALID_DIFFICULTIES[number])) {
150
+ throw new Error(`Invalid difficulty "${doc.difficulty}" in ${filePath}. Valid: ${VALID_DIFFICULTIES.join(", ")}`);
151
+ }
152
+
153
+ if (typeof doc.id !== "string" || !doc.id.trim()) {
154
+ throw new Error(`Invalid id in ${filePath}: must be a non-empty string`);
155
+ }
156
+
157
+ if (typeof doc.question !== "string" || !doc.question.trim()) {
158
+ throw new Error(`Invalid question in ${filePath}: must be a non-empty string`);
159
+ }
160
+
161
+ if (typeof doc.gold_sql !== "string" || !doc.gold_sql.trim()) {
162
+ throw new Error(`Invalid gold_sql in ${filePath}: must be a non-empty string`);
163
+ }
164
+ }
165
+
166
+ export function filterCases(
167
+ cases: EvalCase[],
168
+ filters: {
169
+ schema?: string;
170
+ category?: string;
171
+ difficulty?: string;
172
+ id?: string;
173
+ limit?: number;
174
+ },
175
+ ): EvalCase[] {
176
+ let filtered = cases.filter(c => !c.skip);
177
+
178
+ if (filters.id) {
179
+ filtered = filtered.filter(c => c.id === filters.id);
180
+ }
181
+ if (filters.schema) {
182
+ filtered = filtered.filter(c => c.schema === filters.schema);
183
+ }
184
+ if (filters.category) {
185
+ filtered = filtered.filter(c => c.category === filters.category);
186
+ }
187
+ if (filters.difficulty) {
188
+ filtered = filtered.filter(c => c.difficulty === filters.difficulty);
189
+ }
190
+ if (filters.limit && filters.limit > 0) {
191
+ filtered = filtered.slice(0, filters.limit);
192
+ }
193
+
194
+ return filtered;
195
+ }
196
+
197
+ // --- Semantic layer management ---
198
+
199
+ function backupSemanticLayer(): void {
200
+ if (fs.existsSync(BACKUP_DIR)) {
201
+ fs.rmSync(BACKUP_DIR, { recursive: true });
202
+ }
203
+ if (fs.existsSync(SEMANTIC_DIR)) {
204
+ try {
205
+ fs.cpSync(SEMANTIC_DIR, BACKUP_DIR, { recursive: true });
206
+ } catch (err) {
207
+ throw new Error(
208
+ `Failed to backup semantic layer before eval: ${err instanceof Error ? err.message : String(err)}. ` +
209
+ `Refusing to proceed — your semantic/ directory would be at risk.`,
210
+ { cause: err },
211
+ );
212
+ }
213
+ }
214
+ }
215
+
216
+ function restoreSemanticLayer(): void {
217
+ if (!fs.existsSync(BACKUP_DIR)) return;
218
+
219
+ try {
220
+ if (fs.existsSync(SEMANTIC_DIR)) {
221
+ fs.rmSync(SEMANTIC_DIR, { recursive: true });
222
+ }
223
+ fs.cpSync(BACKUP_DIR, SEMANTIC_DIR, { recursive: true });
224
+ fs.rmSync(BACKUP_DIR, { recursive: true });
225
+ } catch (err) {
226
+ process.stderr.write(
227
+ `\nCRITICAL: Failed to restore semantic layer: ${err instanceof Error ? err.message : String(err)}\n` +
228
+ `Your original semantic layer was backed up to: ${BACKUP_DIR}\n` +
229
+ `To restore manually: rm -rf ${SEMANTIC_DIR} && cp -r ${BACKUP_DIR} ${SEMANTIC_DIR}\n`,
230
+ );
231
+ }
232
+ }
233
+
234
+ function installSchemaSemanticLayer(schema: string): void {
235
+ const srcDir = path.join(SCHEMAS_DIR, schema);
236
+ if (!fs.existsSync(srcDir)) {
237
+ throw new Error(`Semantic layer not found for schema "${schema}" at ${srcDir}`);
238
+ }
239
+
240
+ if (fs.existsSync(SEMANTIC_DIR)) {
241
+ fs.rmSync(SEMANTIC_DIR, { recursive: true });
242
+ }
243
+ fs.cpSync(srcDir, SEMANTIC_DIR, { recursive: true });
244
+ }
245
+
246
+ function resetCaches(): void {
247
+ connections._reset();
248
+ _resetWhitelists();
249
+ invalidateExploreBackend();
250
+ }
251
+
252
+ // --- Case evaluation ---
253
+
254
+ async function evaluateCase(
255
+ evalCase: EvalCase,
256
+ ): Promise<EvalResult> {
257
+ const start = Date.now();
258
+ const baseResult = {
259
+ id: evalCase.id,
260
+ schema: evalCase.schema,
261
+ question: evalCase.question,
262
+ category: evalCase.category,
263
+ difficulty: evalCase.difficulty,
264
+ tags: evalCase.tags,
265
+ gold_sql: evalCase.gold_sql,
266
+ };
267
+
268
+ // 1) Run agent
269
+ let agentResult: Awaited<ReturnType<typeof import("@atlas/api/lib/agent-query").executeAgentQuery>>;
270
+ try {
271
+ const { executeAgentQuery } = await import("@atlas/api/lib/agent-query");
272
+ agentResult = await executeAgentQuery(evalCase.question);
273
+ } catch (err) {
274
+ process.stderr.write(
275
+ `\n AGENT ERROR ${evalCase.id}: ${err instanceof Error && err.stack ? err.stack : String(err)}\n`,
276
+ );
277
+ return {
278
+ ...baseResult,
279
+ predicted_sql: null,
280
+ match: false,
281
+ error: `Agent error: ${err instanceof Error ? err.message : String(err)}`,
282
+ latency_ms: Date.now() - start,
283
+ tokens: 0,
284
+ steps: 0,
285
+ };
286
+ }
287
+
288
+ const latencyMs = Date.now() - start;
289
+ const predictedSql = agentResult.sql.length > 0 ? agentResult.sql[agentResult.sql.length - 1] : null;
290
+
291
+ // 2) Execute gold SQL directly (bypasses validation — developer-authored queries only)
292
+ const trimmedGold = evalCase.gold_sql.trim().toUpperCase();
293
+ if (!trimmedGold.startsWith("SELECT") && !trimmedGold.startsWith("WITH")) {
294
+ return {
295
+ ...baseResult,
296
+ predicted_sql: predictedSql,
297
+ match: false,
298
+ error: `Gold SQL for ${evalCase.id} is not a SELECT/WITH statement`,
299
+ latency_ms: latencyMs,
300
+ tokens: agentResult.usage.totalTokens,
301
+ steps: agentResult.steps,
302
+ };
303
+ }
304
+
305
+ let goldResult: { columns: string[]; rows: Record<string, unknown>[] };
306
+ try {
307
+ const db = connections.getDefault();
308
+ goldResult = await db.query(evalCase.gold_sql, 60000);
309
+ } catch (err) {
310
+ process.stderr.write(
311
+ `\n GOLD SQL ERROR ${evalCase.id}: ${err instanceof Error ? err.message : String(err)}\n`,
312
+ );
313
+ return {
314
+ ...baseResult,
315
+ predicted_sql: predictedSql,
316
+ match: false,
317
+ error: `Gold SQL error (test harness bug): ${err instanceof Error ? err.message : String(err)}`,
318
+ latency_ms: latencyMs,
319
+ tokens: agentResult.usage.totalTokens,
320
+ steps: agentResult.steps,
321
+ };
322
+ }
323
+
324
+ // 3) Compare result sets
325
+ const predictedResult = agentResult.data.length > 0 ? agentResult.data[agentResult.data.length - 1] : null;
326
+ let match = false;
327
+ if (predictedResult) {
328
+ const mismatchReason = explainMismatch(goldResult, predictedResult);
329
+ match = mismatchReason === null;
330
+ if (mismatchReason) {
331
+ process.stderr.write(`\n MISMATCH ${evalCase.id}: ${mismatchReason}\n`);
332
+ }
333
+ }
334
+
335
+ return {
336
+ ...baseResult,
337
+ predicted_sql: predictedSql,
338
+ match,
339
+ error: null,
340
+ latency_ms: latencyMs,
341
+ tokens: agentResult.usage.totalTokens,
342
+ steps: agentResult.steps,
343
+ };
344
+ }
345
+
346
+ // --- Summary ---
347
+
348
+ export function computeSummary(results: EvalResult[]): EvalSummary {
349
+ const total = results.length;
350
+ const correct = results.filter(r => r.match).length;
351
+ const errors = results.filter(r => r.error).length;
352
+ const accuracy = total > 0 ? (correct / total) * 100 : 0;
353
+
354
+ const bySchema = new Map<string, { total: number; correct: number }>();
355
+ const byCategory = new Map<string, { total: number; correct: number }>();
356
+ const byDifficulty = new Map<string, { total: number; correct: number }>();
357
+
358
+ for (const r of results) {
359
+ for (const [map, key] of [
360
+ [bySchema, r.schema],
361
+ [byCategory, r.category],
362
+ [byDifficulty, r.difficulty],
363
+ ] as [Map<string, { total: number; correct: number }>, string][]) {
364
+ const entry = map.get(key) ?? { total: 0, correct: 0 };
365
+ entry.total++;
366
+ if (r.match) entry.correct++;
367
+ map.set(key, entry);
368
+ }
369
+ }
370
+
371
+ const totalTokens = results.reduce((s, r) => s + r.tokens, 0);
372
+ const totalLatencyMs = results.reduce((s, r) => s + r.latency_ms, 0);
373
+ const avgLatencyMs = total > 0 ? Math.round(totalLatencyMs / total) : 0;
374
+
375
+ return {
376
+ total,
377
+ correct,
378
+ errors,
379
+ accuracy,
380
+ bySchema,
381
+ byCategory,
382
+ byDifficulty,
383
+ totalTokens,
384
+ totalLatencyMs,
385
+ avgLatencyMs,
386
+ };
387
+ }
388
+
389
+ function printSummary(summary: EvalSummary): void {
390
+ console.log("\n" + "=".repeat(60));
391
+ console.log("Atlas Eval Results");
392
+ console.log("=".repeat(60));
393
+ console.log(`Total: ${summary.total}`);
394
+ console.log(`Correct: ${summary.correct}`);
395
+ console.log(`Errors: ${summary.errors}`);
396
+ console.log(`Accuracy: ${summary.accuracy.toFixed(1)}%`);
397
+
398
+ if (summary.bySchema.size > 1) {
399
+ console.log("\nPer-schema:");
400
+ for (const [schema, stats] of [...summary.bySchema.entries()].sort((a, b) => a[0].localeCompare(b[0]))) {
401
+ const acc = ((stats.correct / stats.total) * 100).toFixed(1);
402
+ console.log(` ${schema.padEnd(20)} ${stats.correct}/${stats.total} (${acc}%)`);
403
+ }
404
+ }
405
+
406
+ if (summary.byCategory.size > 1) {
407
+ console.log("\nPer-category:");
408
+ for (const [cat, stats] of [...summary.byCategory.entries()].sort((a, b) => a[0].localeCompare(b[0]))) {
409
+ const acc = ((stats.correct / stats.total) * 100).toFixed(1);
410
+ console.log(` ${cat.padEnd(20)} ${stats.correct}/${stats.total} (${acc}%)`);
411
+ }
412
+ }
413
+
414
+ if (summary.byDifficulty.size > 1) {
415
+ console.log("\nPer-difficulty:");
416
+ for (const [diff, stats] of [...summary.byDifficulty.entries()].sort((a, b) => a[0].localeCompare(b[0]))) {
417
+ const acc = ((stats.correct / stats.total) * 100).toFixed(1);
418
+ console.log(` ${diff.padEnd(15)} ${stats.correct}/${stats.total} (${acc}%)`);
419
+ }
420
+ }
421
+
422
+ console.log(`\nTotal tokens: ${summary.totalTokens.toLocaleString()}`);
423
+ console.log(`Avg latency: ${summary.avgLatencyMs.toLocaleString()}ms`);
424
+ console.log("=".repeat(60));
425
+ }
426
+
427
+ function printCSV(results: EvalResult[]): void {
428
+ console.log("id,schema,difficulty,category,match,predicted_sql,gold_sql,error,latency_ms,tokens,steps");
429
+ for (const r of results) {
430
+ const csvSafe = (s: string | null) =>
431
+ s ? `"${s.replace(/"/g, '""').replace(/\n/g, " ")}"` : "";
432
+ console.log(
433
+ [
434
+ r.id,
435
+ r.schema,
436
+ r.difficulty,
437
+ r.category,
438
+ r.match,
439
+ csvSafe(r.predicted_sql),
440
+ csvSafe(r.gold_sql),
441
+ csvSafe(r.error),
442
+ r.latency_ms,
443
+ r.tokens,
444
+ r.steps,
445
+ ].join(","),
446
+ );
447
+ }
448
+ }
449
+
450
+ function printJSON(results: EvalResult[], summary: EvalSummary): void {
451
+ const output = {
452
+ summary: {
453
+ total: summary.total,
454
+ correct: summary.correct,
455
+ errors: summary.errors,
456
+ accuracy: Number(summary.accuracy.toFixed(1)),
457
+ totalTokens: summary.totalTokens,
458
+ avgLatencyMs: summary.avgLatencyMs,
459
+ },
460
+ bySchema: Object.fromEntries(summary.bySchema),
461
+ byCategory: Object.fromEntries(summary.byCategory),
462
+ byDifficulty: Object.fromEntries(summary.byDifficulty),
463
+ results,
464
+ };
465
+ console.log(JSON.stringify(output, null, 2));
466
+ }
467
+
468
+ // --- Regression detection ---
469
+
470
+ export function detectRegressions(
471
+ current: EvalResult[],
472
+ baseline: EvalResult[],
473
+ ): RegressionReport {
474
+ const baselineMap = new Map<string, EvalResult>();
475
+ for (const r of baseline) {
476
+ baselineMap.set(r.id, r);
477
+ }
478
+
479
+ const regressions: EvalResult[] = [];
480
+ const newPasses: EvalResult[] = [];
481
+ const newCases: EvalResult[] = [];
482
+ let stable = 0;
483
+
484
+ for (const r of current) {
485
+ const prev = baselineMap.get(r.id);
486
+ if (!prev) {
487
+ newCases.push(r);
488
+ } else if (prev.match && !r.match) {
489
+ regressions.push(r);
490
+ } else if (!prev.match && r.match) {
491
+ newPasses.push(r);
492
+ } else {
493
+ stable++;
494
+ }
495
+ }
496
+
497
+ return { regressions, newPasses, newCases, stable };
498
+ }
499
+
500
+ function printRegressionReport(report: RegressionReport): void {
501
+ console.log("\n" + "=".repeat(60));
502
+ console.log("Regression Report");
503
+ console.log("=".repeat(60));
504
+
505
+ if (report.regressions.length > 0) {
506
+ console.log(`\n\x1b[31mREGRESSIONS (${report.regressions.length}):\x1b[0m`);
507
+ for (const r of report.regressions) {
508
+ console.log(` FAIL ${r.id} [${r.schema}/${r.category}] ${r.question.slice(0, 60)}`);
509
+ if (r.error) console.log(` Error: ${r.error}`);
510
+ }
511
+ }
512
+
513
+ if (report.newPasses.length > 0) {
514
+ console.log(`\n\x1b[32mNEW PASSES (${report.newPasses.length}):\x1b[0m`);
515
+ for (const r of report.newPasses) {
516
+ console.log(` PASS ${r.id} [${r.schema}/${r.category}] ${r.question.slice(0, 60)}`);
517
+ }
518
+ }
519
+
520
+ if (report.newCases.length > 0) {
521
+ console.log(`\nNEW CASES (${report.newCases.length}):`);
522
+ for (const r of report.newCases) {
523
+ const status = r.match ? "PASS" : "FAIL";
524
+ console.log(` ${status} ${r.id} [${r.schema}/${r.category}] ${r.question.slice(0, 60)}`);
525
+ }
526
+ }
527
+
528
+ console.log(`\nStable: ${report.stable}`);
529
+ console.log("=".repeat(60));
530
+ }
531
+
532
+ function loadBaseline(filePath: string): EvalResult[] {
533
+ if (!fs.existsSync(filePath)) {
534
+ throw new Error(`Baseline file not found: ${filePath}`);
535
+ }
536
+
537
+ const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
538
+ const results: EvalResult[] = [];
539
+ let skippedLines = 0;
540
+
541
+ for (let i = 0; i < lines.length; i++) {
542
+ try {
543
+ const parsed = JSON.parse(lines[i]) as EvalResult;
544
+ if (typeof parsed.id !== "string" || !parsed.id || typeof parsed.match !== "boolean") {
545
+ skippedLines++;
546
+ continue;
547
+ }
548
+ results.push(parsed);
549
+ } catch (err) {
550
+ if (skippedLines < 3) {
551
+ process.stderr.write(
552
+ `WARNING: Baseline line ${i + 1} is malformed: ${err instanceof Error ? err.message : String(err)}\n`,
553
+ );
554
+ }
555
+ skippedLines++;
556
+ }
557
+ }
558
+
559
+ if (skippedLines > 0) {
560
+ process.stderr.write(`WARNING: ${skippedLines} of ${lines.length} baseline line(s) skipped in ${filePath}\n`);
561
+ }
562
+
563
+ return results;
564
+ }
565
+
566
+ // --- Main entry point ---
567
+
568
+ export async function handleEval(args: string[]): Promise<void> {
569
+ const schemaFilter = getFlag(args, "--schema");
570
+ const categoryFilter = getFlag(args, "--category");
571
+ const difficultyFilter = getFlag(args, "--difficulty");
572
+ const idFilter = getFlag(args, "--id");
573
+ const limitArg = getFlag(args, "--limit");
574
+ const limit = limitArg ? parseInt(limitArg, 10) : undefined;
575
+ const resumeFile = getFlag(args, "--resume");
576
+ const compareFile = getFlag(args, "--compare");
577
+ if (compareFile && !fs.existsSync(compareFile)) {
578
+ console.error(`Error: Baseline file not found: ${compareFile}`);
579
+ process.exit(1);
580
+ }
581
+ const saveBaseline = args.includes("--baseline");
582
+ const csvOutput = args.includes("--csv");
583
+ const jsonOutput = args.includes("--json");
584
+
585
+ // Load and filter cases
586
+ let allCases: EvalCase[];
587
+ try {
588
+ allCases = loadEvalCases();
589
+ } catch (err) {
590
+ console.error(`Error loading eval cases: ${err instanceof Error ? err.message : String(err)}`);
591
+ process.exit(1);
592
+ }
593
+
594
+ const cases = filterCases(allCases, {
595
+ schema: schemaFilter,
596
+ category: categoryFilter,
597
+ difficulty: difficultyFilter,
598
+ id: idFilter,
599
+ limit,
600
+ });
601
+
602
+ if (cases.length === 0) {
603
+ console.error("No eval cases match the given filters.");
604
+ process.exit(1);
605
+ }
606
+
607
+ // Load completed results for resume
608
+ const completedIds = new Set<string>();
609
+ const allResults: EvalResult[] = [];
610
+ const resultsFile = resumeFile ?? `eval-results-${Date.now()}.jsonl`;
611
+
612
+ if (resumeFile && fs.existsSync(resumeFile)) {
613
+ const lines = fs.readFileSync(resumeFile, "utf-8").split("\n").filter(Boolean);
614
+ let skippedLines = 0;
615
+ for (let i = 0; i < lines.length; i++) {
616
+ try {
617
+ const r = JSON.parse(lines[i]) as EvalResult;
618
+ if (typeof r.id !== "string" || !r.id) {
619
+ skippedLines++;
620
+ continue;
621
+ }
622
+ completedIds.add(r.id);
623
+ allResults.push(r);
624
+ } catch (err) {
625
+ if (skippedLines < 3) {
626
+ process.stderr.write(
627
+ `WARNING: Resume line ${i + 1} is malformed: ${err instanceof Error ? err.message : String(err)}\n`,
628
+ );
629
+ }
630
+ skippedLines++;
631
+ }
632
+ }
633
+ if (skippedLines > 0) {
634
+ process.stderr.write(`WARNING: ${skippedLines} malformed line(s) skipped in resume file\n`);
635
+ }
636
+ console.log(`Resuming: ${completedIds.size} cases already completed`);
637
+ }
638
+
639
+ // Group remaining cases by schema to minimize re-seeding
640
+ const bySchema = new Map<string, EvalCase[]>();
641
+ for (const c of cases) {
642
+ if (completedIds.has(c.id)) continue;
643
+ const arr = bySchema.get(c.schema) ?? [];
644
+ arr.push(c);
645
+ bySchema.set(c.schema, arr);
646
+ }
647
+
648
+ const completedInScope = cases.filter(c => completedIds.has(c.id)).length;
649
+ const remainingCount = cases.length - completedInScope;
650
+ const schemaCount = bySchema.size;
651
+
652
+ if (!csvOutput && !jsonOutput) {
653
+ console.log(
654
+ `Atlas Eval: ${cases.length} cases across ${schemaCount} schema(s)` +
655
+ (completedIds.size > 0 ? ` (${remainingCount} remaining)` : ""),
656
+ );
657
+ }
658
+
659
+ // Get connection string
660
+ const connStr = process.env.ATLAS_DATASOURCE_URL;
661
+ if (!connStr) {
662
+ console.error("Error: ATLAS_DATASOURCE_URL is required for eval");
663
+ process.exit(1);
664
+ }
665
+
666
+ // Backup semantic layer
667
+ backupSemanticLayer();
668
+
669
+ try {
670
+ let caseIdx = 0;
671
+
672
+ for (const [schema, schemaCases] of bySchema) {
673
+ if (!csvOutput && !jsonOutput) {
674
+ console.log(`\n--- Schema: ${schema} (${schemaCases.length} cases) ---`);
675
+ }
676
+
677
+ // Setup phase — errors here affect all cases in this schema
678
+ try {
679
+ await seedDemoPostgres(connStr, schema as DemoDataset);
680
+ installSchemaSemanticLayer(schema);
681
+ resetCaches();
682
+ process.env.ATLAS_DATASOURCE_URL = connStr;
683
+ } catch (schemaErr) {
684
+ console.error(
685
+ ` Error setting up schema ${schema}: ${schemaErr instanceof Error ? schemaErr.message : String(schemaErr)}`,
686
+ );
687
+ for (const c of schemaCases) {
688
+ const errResult: EvalResult = {
689
+ id: c.id,
690
+ schema: c.schema,
691
+ question: c.question,
692
+ category: c.category,
693
+ difficulty: c.difficulty,
694
+ tags: c.tags,
695
+ gold_sql: c.gold_sql,
696
+ predicted_sql: null,
697
+ match: false,
698
+ error: `Schema setup failed: ${schemaErr instanceof Error ? schemaErr.message : String(schemaErr)}`,
699
+ latency_ms: 0,
700
+ tokens: 0,
701
+ steps: 0,
702
+ };
703
+ allResults.push(errResult);
704
+ fs.appendFileSync(resultsFile, JSON.stringify(errResult) + "\n");
705
+ }
706
+ continue;
707
+ }
708
+
709
+ // Evaluation phase — individual cases handle their own errors
710
+ for (const evalCase of schemaCases) {
711
+ caseIdx++;
712
+ const progress = `[${caseIdx}/${remainingCount}]`;
713
+
714
+ if (!csvOutput && !jsonOutput) {
715
+ process.stderr.write(
716
+ `${progress} ${evalCase.id} (${evalCase.difficulty}): ${evalCase.question.slice(0, 60)}...`,
717
+ );
718
+ }
719
+
720
+ const result = await evaluateCase(evalCase);
721
+ allResults.push(result);
722
+
723
+ // Append to JSONL
724
+ fs.appendFileSync(resultsFile, JSON.stringify(result) + "\n");
725
+
726
+ const status = result.match ? "PASS" : result.error ? "ERROR" : "FAIL";
727
+ if (!csvOutput && !jsonOutput) {
728
+ process.stderr.write(` ${status} (${result.latency_ms}ms)\n`);
729
+ }
730
+ }
731
+ }
732
+ } finally {
733
+ restoreSemanticLayer();
734
+ }
735
+
736
+ // Output results
737
+ const summary = computeSummary(allResults);
738
+
739
+ if (csvOutput) {
740
+ printCSV(allResults);
741
+ } else if (jsonOutput) {
742
+ printJSON(allResults, summary);
743
+ } else {
744
+ printSummary(summary);
745
+ console.log(`\nResults saved to: ${resultsFile}`);
746
+ }
747
+
748
+ // Baseline save
749
+ if (saveBaseline) {
750
+ const provider = process.env.ATLAS_PROVIDER ?? "anthropic";
751
+ const model = (process.env.ATLAS_MODEL ?? "claude-opus-4-6").replace(/[/:]/g, "-");
752
+ const baselineFile = path.join(BASELINES_DIR, `${provider}-${model}.jsonl`);
753
+ fs.mkdirSync(BASELINES_DIR, { recursive: true });
754
+ const content = allResults.map(r => JSON.stringify(r)).join("\n") + "\n";
755
+ fs.writeFileSync(baselineFile, content);
756
+ console.log(`\nBaseline saved to: ${baselineFile}`);
757
+ }
758
+
759
+ // Regression comparison
760
+ if (compareFile) {
761
+ const baseline = loadBaseline(compareFile);
762
+ const report = detectRegressions(allResults, baseline);
763
+ printRegressionReport(report);
764
+
765
+ if (report.regressions.length > 0) {
766
+ console.error(`\n${report.regressions.length} regression(s) detected — exiting with code 1`);
767
+ process.exit(1);
768
+ }
769
+ }
770
+ }