claude-test-bench 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +474 -0
  3. package/dist/bin/ctb.d.ts +3 -0
  4. package/dist/bin/ctb.d.ts.map +1 -0
  5. package/dist/bin/ctb.js +107 -0
  6. package/dist/bin/ctb.js.map +1 -0
  7. package/dist/server/index.d.ts +13 -0
  8. package/dist/server/index.d.ts.map +1 -0
  9. package/dist/server/index.js +72 -0
  10. package/dist/server/index.js.map +1 -0
  11. package/dist/server/interfaces/evaluator.d.ts +15 -0
  12. package/dist/server/interfaces/evaluator.d.ts.map +1 -0
  13. package/dist/server/interfaces/evaluator.js +2 -0
  14. package/dist/server/interfaces/evaluator.js.map +1 -0
  15. package/dist/server/interfaces/logger.d.ts +9 -0
  16. package/dist/server/interfaces/logger.d.ts.map +1 -0
  17. package/dist/server/interfaces/logger.js +2 -0
  18. package/dist/server/interfaces/logger.js.map +1 -0
  19. package/dist/server/interfaces/runner.d.ts +9 -0
  20. package/dist/server/interfaces/runner.d.ts.map +1 -0
  21. package/dist/server/interfaces/runner.js +2 -0
  22. package/dist/server/interfaces/runner.js.map +1 -0
  23. package/dist/server/interfaces/storage.d.ts +36 -0
  24. package/dist/server/interfaces/storage.d.ts.map +1 -0
  25. package/dist/server/interfaces/storage.js +2 -0
  26. package/dist/server/interfaces/storage.js.map +1 -0
  27. package/dist/server/interfaces/workspace.d.ts +9 -0
  28. package/dist/server/interfaces/workspace.d.ts.map +1 -0
  29. package/dist/server/interfaces/workspace.js +2 -0
  30. package/dist/server/interfaces/workspace.js.map +1 -0
  31. package/dist/server/routes/eval-queue.d.ts +23 -0
  32. package/dist/server/routes/eval-queue.d.ts.map +1 -0
  33. package/dist/server/routes/eval-queue.js +45 -0
  34. package/dist/server/routes/eval-queue.js.map +1 -0
  35. package/dist/server/routes/evaluations.d.ts +8 -0
  36. package/dist/server/routes/evaluations.d.ts.map +1 -0
  37. package/dist/server/routes/evaluations.js +217 -0
  38. package/dist/server/routes/evaluations.js.map +1 -0
  39. package/dist/server/routes/providers.d.ts +5 -0
  40. package/dist/server/routes/providers.d.ts.map +1 -0
  41. package/dist/server/routes/providers.js +194 -0
  42. package/dist/server/routes/providers.js.map +1 -0
  43. package/dist/server/routes/run-queue.d.ts +17 -0
  44. package/dist/server/routes/run-queue.d.ts.map +1 -0
  45. package/dist/server/routes/run-queue.js +34 -0
  46. package/dist/server/routes/run-queue.js.map +1 -0
  47. package/dist/server/routes/run-sse.d.ts +18 -0
  48. package/dist/server/routes/run-sse.d.ts.map +1 -0
  49. package/dist/server/routes/run-sse.js +57 -0
  50. package/dist/server/routes/run-sse.js.map +1 -0
  51. package/dist/server/routes/runs.d.ts +9 -0
  52. package/dist/server/routes/runs.d.ts.map +1 -0
  53. package/dist/server/routes/runs.js +379 -0
  54. package/dist/server/routes/runs.js.map +1 -0
  55. package/dist/server/routes/scenarios.d.ts +5 -0
  56. package/dist/server/routes/scenarios.d.ts.map +1 -0
  57. package/dist/server/routes/scenarios.js +209 -0
  58. package/dist/server/routes/scenarios.js.map +1 -0
  59. package/dist/server/routes/setups.d.ts +5 -0
  60. package/dist/server/routes/setups.d.ts.map +1 -0
  61. package/dist/server/routes/setups.js +194 -0
  62. package/dist/server/routes/setups.js.map +1 -0
  63. package/dist/server/services/agent-mapper.d.ts +12 -0
  64. package/dist/server/services/agent-mapper.d.ts.map +1 -0
  65. package/dist/server/services/agent-mapper.js +75 -0
  66. package/dist/server/services/agent-mapper.js.map +1 -0
  67. package/dist/server/services/env-builder.d.ts +10 -0
  68. package/dist/server/services/env-builder.d.ts.map +1 -0
  69. package/dist/server/services/env-builder.js +50 -0
  70. package/dist/server/services/env-builder.js.map +1 -0
  71. package/dist/server/services/eval-helpers.d.ts +22 -0
  72. package/dist/server/services/eval-helpers.d.ts.map +1 -0
  73. package/dist/server/services/eval-helpers.js +75 -0
  74. package/dist/server/services/eval-helpers.js.map +1 -0
  75. package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
  76. package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  77. package/dist/server/services/eval-parsers-debate-impl.js +133 -0
  78. package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
  79. package/dist/server/services/eval-parsers.d.ts +24 -0
  80. package/dist/server/services/eval-parsers.d.ts.map +1 -0
  81. package/dist/server/services/eval-parsers.js +153 -0
  82. package/dist/server/services/eval-parsers.js.map +1 -0
  83. package/dist/server/services/eval-prompts.d.ts +9 -0
  84. package/dist/server/services/eval-prompts.d.ts.map +1 -0
  85. package/dist/server/services/eval-prompts.js +170 -0
  86. package/dist/server/services/eval-prompts.js.map +1 -0
  87. package/dist/server/services/evaluator.d.ts +10 -0
  88. package/dist/server/services/evaluator.d.ts.map +1 -0
  89. package/dist/server/services/evaluator.js +156 -0
  90. package/dist/server/services/evaluator.js.map +1 -0
  91. package/dist/server/services/fs-adapter.d.ts +20 -0
  92. package/dist/server/services/fs-adapter.d.ts.map +1 -0
  93. package/dist/server/services/fs-adapter.js +13 -0
  94. package/dist/server/services/fs-adapter.js.map +1 -0
  95. package/dist/server/services/instruction-parser.d.ts +26 -0
  96. package/dist/server/services/instruction-parser.d.ts.map +1 -0
  97. package/dist/server/services/instruction-parser.js +121 -0
  98. package/dist/server/services/instruction-parser.js.map +1 -0
  99. package/dist/server/services/log-rotator.d.ts +20 -0
  100. package/dist/server/services/log-rotator.d.ts.map +1 -0
  101. package/dist/server/services/log-rotator.js +60 -0
  102. package/dist/server/services/log-rotator.js.map +1 -0
  103. package/dist/server/services/logger.d.ts +15 -0
  104. package/dist/server/services/logger.d.ts.map +1 -0
  105. package/dist/server/services/logger.js +69 -0
  106. package/dist/server/services/logger.js.map +1 -0
  107. package/dist/server/services/runner.d.ts +12 -0
  108. package/dist/server/services/runner.d.ts.map +1 -0
  109. package/dist/server/services/runner.js +161 -0
  110. package/dist/server/services/runner.js.map +1 -0
  111. package/dist/server/services/seeder.d.ts +5 -0
  112. package/dist/server/services/seeder.d.ts.map +1 -0
  113. package/dist/server/services/seeder.js +79 -0
  114. package/dist/server/services/seeder.js.map +1 -0
  115. package/dist/server/services/storage-test-helpers.d.ts +21 -0
  116. package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
  117. package/dist/server/services/storage-test-helpers.js +158 -0
  118. package/dist/server/services/storage-test-helpers.js.map +1 -0
  119. package/dist/server/services/storage.d.ts +35 -0
  120. package/dist/server/services/storage.d.ts.map +1 -0
  121. package/dist/server/services/storage.js +219 -0
  122. package/dist/server/services/storage.js.map +1 -0
  123. package/dist/server/services/transcript-formatter.d.ts +18 -0
  124. package/dist/server/services/transcript-formatter.d.ts.map +1 -0
  125. package/dist/server/services/transcript-formatter.js +156 -0
  126. package/dist/server/services/transcript-formatter.js.map +1 -0
  127. package/dist/server/services/workspace.d.ts +11 -0
  128. package/dist/server/services/workspace.d.ts.map +1 -0
  129. package/dist/server/services/workspace.js +113 -0
  130. package/dist/server/services/workspace.js.map +1 -0
  131. package/dist/server/types/evaluation.d.ts +108 -0
  132. package/dist/server/types/evaluation.d.ts.map +1 -0
  133. package/dist/server/types/evaluation.js +5 -0
  134. package/dist/server/types/evaluation.js.map +1 -0
  135. package/dist/server/types/index.d.ts +5 -0
  136. package/dist/server/types/index.d.ts.map +1 -0
  137. package/dist/server/types/index.js +5 -0
  138. package/dist/server/types/index.js.map +1 -0
  139. package/dist/server/types/provider.d.ts +99 -0
  140. package/dist/server/types/provider.d.ts.map +1 -0
  141. package/dist/server/types/provider.js +5 -0
  142. package/dist/server/types/provider.js.map +1 -0
  143. package/dist/server/types/run.d.ts +31 -0
  144. package/dist/server/types/run.d.ts.map +1 -0
  145. package/dist/server/types/run.js +5 -0
  146. package/dist/server/types/run.js.map +1 -0
  147. package/dist/server/types/scenario.d.ts +32 -0
  148. package/dist/server/types/scenario.d.ts.map +1 -0
  149. package/dist/server/types/scenario.js +5 -0
  150. package/dist/server/types/scenario.js.map +1 -0
  151. package/dist/server/types/setup.d.ts +99 -0
  152. package/dist/server/types/setup.d.ts.map +1 -0
  153. package/dist/server/types/setup.js +5 -0
  154. package/dist/server/types/setup.js.map +1 -0
  155. package/dist/src/server/index.d.ts +13 -0
  156. package/dist/src/server/index.d.ts.map +1 -0
  157. package/dist/src/server/index.js +72 -0
  158. package/dist/src/server/index.js.map +1 -0
  159. package/dist/src/server/interfaces/evaluator.d.ts +15 -0
  160. package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
  161. package/dist/src/server/interfaces/evaluator.js +2 -0
  162. package/dist/src/server/interfaces/evaluator.js.map +1 -0
  163. package/dist/src/server/interfaces/logger.d.ts +9 -0
  164. package/dist/src/server/interfaces/logger.d.ts.map +1 -0
  165. package/dist/src/server/interfaces/logger.js +2 -0
  166. package/dist/src/server/interfaces/logger.js.map +1 -0
  167. package/dist/src/server/interfaces/runner.d.ts +9 -0
  168. package/dist/src/server/interfaces/runner.d.ts.map +1 -0
  169. package/dist/src/server/interfaces/runner.js +2 -0
  170. package/dist/src/server/interfaces/runner.js.map +1 -0
  171. package/dist/src/server/interfaces/storage.d.ts +36 -0
  172. package/dist/src/server/interfaces/storage.d.ts.map +1 -0
  173. package/dist/src/server/interfaces/storage.js +2 -0
  174. package/dist/src/server/interfaces/storage.js.map +1 -0
  175. package/dist/src/server/interfaces/workspace.d.ts +9 -0
  176. package/dist/src/server/interfaces/workspace.d.ts.map +1 -0
  177. package/dist/src/server/interfaces/workspace.js +2 -0
  178. package/dist/src/server/interfaces/workspace.js.map +1 -0
  179. package/dist/src/server/routes/eval-queue.d.ts +23 -0
  180. package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
  181. package/dist/src/server/routes/eval-queue.js +45 -0
  182. package/dist/src/server/routes/eval-queue.js.map +1 -0
  183. package/dist/src/server/routes/evaluations.d.ts +8 -0
  184. package/dist/src/server/routes/evaluations.d.ts.map +1 -0
  185. package/dist/src/server/routes/evaluations.js +217 -0
  186. package/dist/src/server/routes/evaluations.js.map +1 -0
  187. package/dist/src/server/routes/providers.d.ts +5 -0
  188. package/dist/src/server/routes/providers.d.ts.map +1 -0
  189. package/dist/src/server/routes/providers.js +194 -0
  190. package/dist/src/server/routes/providers.js.map +1 -0
  191. package/dist/src/server/routes/run-queue.d.ts +17 -0
  192. package/dist/src/server/routes/run-queue.d.ts.map +1 -0
  193. package/dist/src/server/routes/run-queue.js +34 -0
  194. package/dist/src/server/routes/run-queue.js.map +1 -0
  195. package/dist/src/server/routes/run-sse.d.ts +18 -0
  196. package/dist/src/server/routes/run-sse.d.ts.map +1 -0
  197. package/dist/src/server/routes/run-sse.js +57 -0
  198. package/dist/src/server/routes/run-sse.js.map +1 -0
  199. package/dist/src/server/routes/runs.d.ts +9 -0
  200. package/dist/src/server/routes/runs.d.ts.map +1 -0
  201. package/dist/src/server/routes/runs.js +379 -0
  202. package/dist/src/server/routes/runs.js.map +1 -0
  203. package/dist/src/server/routes/scenarios.d.ts +5 -0
  204. package/dist/src/server/routes/scenarios.d.ts.map +1 -0
  205. package/dist/src/server/routes/scenarios.js +209 -0
  206. package/dist/src/server/routes/scenarios.js.map +1 -0
  207. package/dist/src/server/routes/setups.d.ts +5 -0
  208. package/dist/src/server/routes/setups.d.ts.map +1 -0
  209. package/dist/src/server/routes/setups.js +194 -0
  210. package/dist/src/server/routes/setups.js.map +1 -0
  211. package/dist/src/server/services/agent-mapper.d.ts +12 -0
  212. package/dist/src/server/services/agent-mapper.d.ts.map +1 -0
  213. package/dist/src/server/services/agent-mapper.js +75 -0
  214. package/dist/src/server/services/agent-mapper.js.map +1 -0
  215. package/dist/src/server/services/env-builder.d.ts +10 -0
  216. package/dist/src/server/services/env-builder.d.ts.map +1 -0
  217. package/dist/src/server/services/env-builder.js +50 -0
  218. package/dist/src/server/services/env-builder.js.map +1 -0
  219. package/dist/src/server/services/eval-helpers.d.ts +22 -0
  220. package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
  221. package/dist/src/server/services/eval-helpers.js +75 -0
  222. package/dist/src/server/services/eval-helpers.js.map +1 -0
  223. package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
  224. package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  225. package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
  226. package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
  227. package/dist/src/server/services/eval-parsers.d.ts +24 -0
  228. package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
  229. package/dist/src/server/services/eval-parsers.js +153 -0
  230. package/dist/src/server/services/eval-parsers.js.map +1 -0
  231. package/dist/src/server/services/eval-prompts.d.ts +9 -0
  232. package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
  233. package/dist/src/server/services/eval-prompts.js +170 -0
  234. package/dist/src/server/services/eval-prompts.js.map +1 -0
  235. package/dist/src/server/services/evaluator.d.ts +10 -0
  236. package/dist/src/server/services/evaluator.d.ts.map +1 -0
  237. package/dist/src/server/services/evaluator.js +156 -0
  238. package/dist/src/server/services/evaluator.js.map +1 -0
  239. package/dist/src/server/services/fs-adapter.d.ts +20 -0
  240. package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
  241. package/dist/src/server/services/fs-adapter.js +13 -0
  242. package/dist/src/server/services/fs-adapter.js.map +1 -0
  243. package/dist/src/server/services/instruction-parser.d.ts +26 -0
  244. package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
  245. package/dist/src/server/services/instruction-parser.js +121 -0
  246. package/dist/src/server/services/instruction-parser.js.map +1 -0
  247. package/dist/src/server/services/log-rotator.d.ts +20 -0
  248. package/dist/src/server/services/log-rotator.d.ts.map +1 -0
  249. package/dist/src/server/services/log-rotator.js +60 -0
  250. package/dist/src/server/services/log-rotator.js.map +1 -0
  251. package/dist/src/server/services/logger.d.ts +15 -0
  252. package/dist/src/server/services/logger.d.ts.map +1 -0
  253. package/dist/src/server/services/logger.js +69 -0
  254. package/dist/src/server/services/logger.js.map +1 -0
  255. package/dist/src/server/services/runner.d.ts +12 -0
  256. package/dist/src/server/services/runner.d.ts.map +1 -0
  257. package/dist/src/server/services/runner.js +161 -0
  258. package/dist/src/server/services/runner.js.map +1 -0
  259. package/dist/src/server/services/seeder.d.ts +5 -0
  260. package/dist/src/server/services/seeder.d.ts.map +1 -0
  261. package/dist/src/server/services/seeder.js +79 -0
  262. package/dist/src/server/services/seeder.js.map +1 -0
  263. package/dist/src/server/services/storage.d.ts +35 -0
  264. package/dist/src/server/services/storage.d.ts.map +1 -0
  265. package/dist/src/server/services/storage.js +219 -0
  266. package/dist/src/server/services/storage.js.map +1 -0
  267. package/dist/src/server/services/transcript-formatter.d.ts +18 -0
  268. package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
  269. package/dist/src/server/services/transcript-formatter.js +156 -0
  270. package/dist/src/server/services/transcript-formatter.js.map +1 -0
  271. package/dist/src/server/services/workspace.d.ts +11 -0
  272. package/dist/src/server/services/workspace.d.ts.map +1 -0
  273. package/dist/src/server/services/workspace.js +113 -0
  274. package/dist/src/server/services/workspace.js.map +1 -0
  275. package/dist/src/server/types/evaluation.d.ts +108 -0
  276. package/dist/src/server/types/evaluation.d.ts.map +1 -0
  277. package/dist/src/server/types/evaluation.js +5 -0
  278. package/dist/src/server/types/evaluation.js.map +1 -0
  279. package/dist/src/server/types/index.d.ts +5 -0
  280. package/dist/src/server/types/index.d.ts.map +1 -0
  281. package/dist/src/server/types/index.js +5 -0
  282. package/dist/src/server/types/index.js.map +1 -0
  283. package/dist/src/server/types/provider.d.ts +99 -0
  284. package/dist/src/server/types/provider.d.ts.map +1 -0
  285. package/dist/src/server/types/provider.js +5 -0
  286. package/dist/src/server/types/provider.js.map +1 -0
  287. package/dist/src/server/types/run.d.ts +31 -0
  288. package/dist/src/server/types/run.d.ts.map +1 -0
  289. package/dist/src/server/types/run.js +5 -0
  290. package/dist/src/server/types/run.js.map +1 -0
  291. package/dist/src/server/types/scenario.d.ts +32 -0
  292. package/dist/src/server/types/scenario.d.ts.map +1 -0
  293. package/dist/src/server/types/scenario.js +5 -0
  294. package/dist/src/server/types/scenario.js.map +1 -0
  295. package/dist/src/server/types/setup.d.ts +99 -0
  296. package/dist/src/server/types/setup.d.ts.map +1 -0
  297. package/dist/src/server/types/setup.js +5 -0
  298. package/dist/src/server/types/setup.js.map +1 -0
  299. package/dist/web/assets/index-C4dw8OpW.css +1 -0
  300. package/dist/web/assets/index-wve8IczO.js +76 -0
  301. package/dist/web/index.html +15 -0
  302. package/docs/schemas/provider-api.example.json +16 -0
  303. package/docs/schemas/provider-oauth.example.json +15 -0
  304. package/docs/schemas/provider.example.json +16 -0
  305. package/docs/schemas/scenario-baseline.example.json +35 -0
  306. package/docs/schemas/scenario-carwash-baseline.example.json +33 -0
  307. package/docs/schemas/scenario-carwash-with-claude-md.example.json +40 -0
  308. package/docs/schemas/scenario-golden-rules-baseline.example.json +51 -0
  309. package/docs/schemas/scenario-golden-rules-with-claude-md.example.json +61 -0
  310. package/docs/schemas/scenario-negative-analysis-baseline.example.json +34 -0
  311. package/docs/schemas/scenario-negative-analysis-with-claude-md.example.json +41 -0
  312. package/docs/schemas/scenario-with-claude-md.example.json +41 -0
  313. package/docs/schemas/scenario.example.json +33 -0
  314. package/package.json +92 -0
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=ctb.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ctb.d.ts","sourceRoot":"","sources":["../../bin/ctb.ts"],"names":[],"mappings":""}
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env node
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import http from 'node:http';
5
+ import { JsonLogger } from '../src/server/services/logger.js';
6
+ import { JsonFileStorage } from '../src/server/services/storage.js';
7
+ import { WorkspaceBuilder } from '../src/server/services/workspace.js';
8
+ import { ScenarioRunner } from '../src/server/services/runner.js';
9
+ import { EvaluationOrchestrator } from '../src/server/services/evaluator.js';
10
+ import { createApp } from '../src/server/index.js';
11
+ import { seedIfEmpty } from '../src/server/services/seeder.js';
12
+ // ─── .env loader (same approach as POC) ──────────────────────────────
13
+ function loadEnvFile(envPath) {
14
+ if (!fs.existsSync(envPath))
15
+ return;
16
+ const content = fs.readFileSync(envPath, 'utf-8');
17
+ for (const line of content.split('\n')) {
18
+ const trimmed = line.trim();
19
+ if (!trimmed || trimmed.startsWith('#'))
20
+ continue;
21
+ const eqIdx = trimmed.indexOf('=');
22
+ if (eqIdx === -1)
23
+ continue;
24
+ const key = trimmed.slice(0, eqIdx).trim();
25
+ const val = trimmed.slice(eqIdx + 1).trim();
26
+ if (key && val && !(key in process.env)) {
27
+ process.env[key] = val;
28
+ }
29
+ }
30
+ }
31
+ function parseArgs(argv) {
32
+ const args = {
33
+ port: 3847,
34
+ logLevel: 'info',
35
+ open: true,
36
+ };
37
+ for (let i = 2; i < argv.length; i++) {
38
+ const arg = argv[i];
39
+ if (arg === '--port' && i + 1 < argv.length) {
40
+ const parsed = parseInt(argv[++i], 10);
41
+ if (!Number.isNaN(parsed) && parsed > 0 && parsed < 65536) {
42
+ args.port = parsed;
43
+ }
44
+ }
45
+ else if (arg === '--log-level' && i + 1 < argv.length) {
46
+ const level = argv[++i];
47
+ if (['debug', 'info', 'warn', 'error'].includes(level)) {
48
+ args.logLevel = level;
49
+ }
50
+ }
51
+ else if (arg === '--no-open') {
52
+ args.open = false;
53
+ }
54
+ else if (arg === '--open') {
55
+ args.open = true;
56
+ }
57
+ }
58
+ return args;
59
+ }
60
+ // ─── Main ────────────────────────────────────────────────────────────
61
+ async function main() {
62
+ // Load .env from the user's current working directory
63
+ loadEnvFile(path.join(process.cwd(), '.env'));
64
+ const cliArgs = parseArgs(process.argv);
65
+ const basePath = path.join(process.cwd(), '.claude-test-bench');
66
+ const logFilePath = path.join(basePath, 'logs', 'ctb.log');
67
+ const logger = new JsonLogger(cliArgs.logLevel, {}, undefined, logFilePath);
68
+ const storage = new JsonFileStorage(basePath);
69
+ await seedIfEmpty(storage, logger);
70
+ const workspace = new WorkspaceBuilder();
71
+ const runner = new ScenarioRunner(workspace, logger);
72
+ const evaluator = new EvaluationOrchestrator();
73
+ const app = createApp({ storage, logger, runner, evaluator });
74
+ const server = http.createServer(app);
75
+ const url = `http://localhost:${cliArgs.port}`;
76
+ server.listen(cliArgs.port, () => {
77
+ logger.info('Server started', { port: cliArgs.port, url });
78
+ if (cliArgs.open) {
79
+ // Dynamic import to handle ESM 'open' package
80
+ import('open')
81
+ .then((mod) => mod.default(url))
82
+ .catch((err) => {
83
+ logger.warn('Failed to open browser', { error: err.message });
84
+ });
85
+ }
86
+ });
87
+ // ─── Graceful shutdown ───────────────────────────────────────────
88
+ const shutdown = () => {
89
+ logger.info('Shutting down...');
90
+ server.close(() => {
91
+ logger.info('Server closed');
92
+ process.exit(0);
93
+ });
94
+ // Force exit after 5 seconds
95
+ setTimeout(() => {
96
+ logger.warn('Forced shutdown after timeout');
97
+ process.exit(1);
98
+ }, 5000).unref();
99
+ };
100
+ process.on('SIGTERM', shutdown);
101
+ process.on('SIGINT', shutdown);
102
+ }
103
+ main().catch((err) => {
104
+ console.error('Fatal error:', err.message);
105
+ process.exit(1);
106
+ });
107
+ //# sourceMappingURL=ctb.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ctb.js","sourceRoot":"","sources":["../../bin/ctb.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,kCAAkC,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,mCAAmC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,cAAc,EAAE,MAAM,kCAAkC,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,qCAAqC,CAAC;AAC7E,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,kCAAkC,CAAC;AAE/D,wEAAwE;AAExE,SAAS,WAAW,CAAC,OAAe;IAClC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO;IACpC,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAClD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,KAAK,CAAC,CAAC;YAAE,SAAS;QAC3B,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;QACzB,CAAC;IACH,CAAC;AACH,CAAC;AAUD,SAAS,SAAS,CAAC,IAAc;IAC/B,MAAM,IAAI,GAAY;QACpB,IAAI,EAAE,IAAI;QACV,QAAQ,EAAE,MAAM;QAChB,IAAI,EAAE,IAAI;KACX,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,KAAK,QAAQ,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACvC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,MAAM,GAAG,KAAK,EAAE,CAAC;gBAC1D,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,KAAK,aAAa,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACxD,MAAM,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAa,CAAC;YACpC,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACvD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;YAC/B,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QACpB,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACnB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,wEAAwE;AAExE,KAAK,UAAU,IAAI;IACjB,sDAAsD;IACtD,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC,CAAC;IAE9C,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAExC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,oBAAoB,CAAC,CAAC;IAChE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE3D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;IAE5E,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;IAE9C,MAAM,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACzC,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACrD,MAAM,SAAS,GAAG,IAAI,sBAAsB,EAAE,CAAC;IAE/C,MAAM,GAAG,GAAG,SAAS,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAEtC,MAAM,GAAG,GAAG,oBAAoB,OAAO,CAAC,IAAI,EAAE,CAAC;IAE/C,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;QAE3D,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,8CAA8C;YAC9C,MAAM,CAAC,MAAM,CAAC;iBACX,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;iBAC/B,KAAK,CAAC,CAAC,GAAU,EAAE,EAAE;gBACpB,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAChE,CAAC,CAAC,CAAC;QACP,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,oEAAoE;IACpE,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAChC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE;YAChB,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,6BAA6B;QAC7B,UAAU,CAAC,GAAG,EAAE;YACd,MAAM,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACnB,CAAC,CAAC;IAEF,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAChC,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;AACjC,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAU,EAAE,EAAE;IAC1B,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,13 @@
1
+ import express from 'express';
2
+ import type { IStorage } from './interfaces/storage.js';
3
+ import type { ILogger } from './interfaces/logger.js';
4
+ import type { IRunner } from './interfaces/runner.js';
5
+ import type { IEvaluator } from './interfaces/evaluator.js';
6
+ export interface AppDeps {
7
+ storage: IStorage;
8
+ logger: ILogger;
9
+ runner?: IRunner;
10
+ evaluator?: IEvaluator;
11
+ }
12
+ export declare function createApp(deps: AppDeps): express.Express;
13
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAK9B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAQ5D,MAAM,WAAW,OAAO;IACtB,OAAO,EAAE,QAAQ,CAAC;IAClB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,UAAU,CAAC;CACxB;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,CAgExD"}
@@ -0,0 +1,72 @@
1
+ import express from 'express';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { createProviderRoutes } from './routes/providers.js';
6
+ import { createScenarioRoutes } from './routes/scenarios.js';
7
+ import { createRunRoutes } from './routes/runs.js';
8
+ import { createEvaluationRoutes } from './routes/evaluations.js';
9
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
+ export function createApp(deps) {
11
+ const { logger } = deps;
12
+ const app = express();
13
+ // ─── CORS for dev mode ─────────────────────────────────────────────
14
+ app.use((_req, res, next) => {
15
+ res.header('Access-Control-Allow-Origin', '*');
16
+ res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
17
+ res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization');
18
+ if (_req.method === 'OPTIONS') {
19
+ res.sendStatus(204);
20
+ return;
21
+ }
22
+ next();
23
+ });
24
+ // ─── JSON body parser ──────────────────────────────────────────────
25
+ app.use(express.json({ limit: '10mb' }));
26
+ // ─── Health check ──────────────────────────────────────────────────
27
+ app.get('/api/health', (_req, res) => {
28
+ res.json({ status: 'ok', timestamp: new Date().toISOString() });
29
+ });
30
+ // ─── API routes ──────────────────────────────────────────────────────
31
+ app.use('/api/providers', createProviderRoutes(deps.storage, deps.logger));
32
+ app.use('/api/scenarios', createScenarioRoutes(deps.storage, deps.logger));
33
+ if (deps.runner) {
34
+ app.use('/api/runs', createRunRoutes(deps.storage, deps.runner, deps.logger, deps.evaluator));
35
+ }
36
+ if (deps.evaluator) {
37
+ app.use('/api/evaluations', createEvaluationRoutes(deps.storage, deps.evaluator, deps.logger));
38
+ }
39
+ // ─── Static files (production) ─────────────────────────────────────
40
+ // Try multiple resolution paths: dist/server/../web, or cwd/dist/web
41
+ const webDistCandidates = [
42
+ path.resolve(__dirname, '..', 'web'), // from dist/server/ → dist/web/
43
+ path.resolve(process.cwd(), 'dist', 'web'), // from project root
44
+ ];
45
+ const webDistPath = webDistCandidates.find((p) => {
46
+ try {
47
+ return fs.statSync(p).isDirectory();
48
+ }
49
+ catch {
50
+ return false;
51
+ }
52
+ }) ?? webDistCandidates[0];
53
+ app.use(express.static(webDistPath));
54
+ // SPA fallback: serve index.html for non-API routes
55
+ app.get(/^\/(?!api\/).*/, (_req, res, next) => {
56
+ res.sendFile(path.join(webDistPath, 'index.html'), (err) => {
57
+ if (err) {
58
+ next();
59
+ }
60
+ });
61
+ });
62
+ // ─── Error handling middleware ──────────────────────────────────────
63
+ app.use((err, _req, res, _next) => {
64
+ logger.error('Unhandled error', {
65
+ error: err.message,
66
+ stack: err.stack,
67
+ });
68
+ res.status(500).json({ error: 'Internal server error' });
69
+ });
70
+ return app;
71
+ }
72
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAKzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAS/D,MAAM,UAAU,SAAS,CAAC,IAAa;IACrC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACxB,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IAEtB,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,CAAC,IAAa,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QAC3D,GAAG,CAAC,MAAM,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;QAC/C,GAAG,CAAC,MAAM,CAAC,8BAA8B,EAAE,iCAAiC,CAAC,CAAC;QAC9E,GAAG,CAAC,MAAM,CAAC,8BAA8B,EAAE,6BAA6B,CAAC,CAAC;QAC1E,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC9B,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACpB,OAAO;QACT,CAAC;QACD,IAAI,EAAE,CAAC;IACT,CAAC,CAAC,CAAC;IAEH,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;IAEzC,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QACtD,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,wEAAwE;IACxE,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,oBAAoB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3E,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,oBAAoB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3E,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,eAAe,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAChG,CAAC;IACD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,kBAAkB,EAAE,sBAAsB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACjG,CAAC;IAED,sEAAsE;IACtE,qEAAqE;IACrE,MAAM,iBAAiB,GAAG;QACxB,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,EAAY,gCAAgC;QAChF,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,EAAM,oBAAoB;KACrE,CAAC;IACF,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE;QAC/C,IAAI,CAAC;YAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,KAAK,CAAC;QAAC,CAAC;IACtE,CAAC,CAAC,IAAI,iBAAiB,CAAC,CAAC,CAAC,CAAC;IAC3B,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;IAErC,oDAAoD;IACpD,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QAC7E,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE;YACzD,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,EAAE,CAAC;YACT,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,uEAAuE;IACvE,GAAG,CAAC,GAAG,CAAC,CAAC,GAAU,EAAE,IAAa,EAAE,GAAa,EAAE,KAAmB,EAAE,EAAE;QACxE,MAAM,CAAC,KAAK,CAAC,iBAAiB,EAAE;YAC9B,KAAK,EAAE,GAAG,CAAC,OAAO;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;QACH,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { Provider, Scenario, Run, Evaluation, EvaluationRequest, EvaluationStatus, SDKMessageRecord } from '../types/index.js';
2
+ export interface EvalMessageInfo {
3
+ readonly phase: 'score' | 'compliance' | 'debate' | 'synthesis';
4
+ readonly evaluatorRole: string;
5
+ readonly roundNumber: number;
6
+ }
7
+ export interface EvaluationCallbacks {
8
+ onStatusChange(status: EvaluationStatus): void;
9
+ onProgress(step: string, detail?: string): void;
10
+ onMessage(info: EvalMessageInfo, message: SDKMessageRecord): void;
11
+ }
12
+ export interface IEvaluator {
13
+ evaluateRun(run: Run, scenario: Scenario, provider: Provider, request: EvaluationRequest, callbacks: EvaluationCallbacks): Promise<Evaluation>;
14
+ }
15
+ //# sourceMappingURL=evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../src/server/interfaces/evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,QAAQ,EACR,QAAQ,EACR,GAAG,EACH,UAAU,EACV,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,KAAK,EAAE,OAAO,GAAG,YAAY,GAAG,QAAQ,GAAG,WAAW,CAAC;IAChE,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,mBAAmB;IAClC,cAAc,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC/C,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAChD,SAAS,CAAC,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;CACnE;AAED,MAAM,WAAW,UAAU;IACzB,WAAW,CACT,GAAG,EAAE,GAAG,EACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,EAC1B,SAAS,EAAE,mBAAmB,GAC7B,OAAO,CAAC,UAAU,CAAC,CAAC;CACxB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../src/server/interfaces/evaluator.ts"],"names":[],"mappings":""}
@@ -0,0 +1,9 @@
1
+ export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
2
+ export interface ILogger {
3
+ debug(msg: string, attrs?: Record<string, unknown>): void;
4
+ info(msg: string, attrs?: Record<string, unknown>): void;
5
+ warn(msg: string, attrs?: Record<string, unknown>): void;
6
+ error(msg: string, attrs?: Record<string, unknown>): void;
7
+ child(attrs: Record<string, unknown>): ILogger;
8
+ }
9
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../../src/server/interfaces/logger.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAE3D,MAAM,WAAW,OAAO;IACtB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAC1D,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAC1D,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC;CAChD"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../../src/server/interfaces/logger.ts"],"names":[],"mappings":""}
@@ -0,0 +1,9 @@
1
+ import type { Provider, Scenario, Run, RunStatus, SDKMessageRecord } from '../types/index.js';
2
+ export interface RunCallbacks {
3
+ onMessage(message: SDKMessageRecord): void;
4
+ onStatusChange(status: RunStatus): void;
5
+ }
6
+ export interface IRunner {
7
+ executeRun(provider: Provider, scenario: Scenario, run: Run, callbacks: RunCallbacks, abortController?: AbortController): Promise<Run>;
8
+ }
9
+ //# sourceMappingURL=runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../../src/server/interfaces/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAE9F,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3C,cAAc,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAAC;CACzC;AAED,MAAM,WAAW,OAAO;IACtB,UAAU,CACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,YAAY,EACvB,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC,GAAG,CAAC,CAAC;CACjB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.js","sourceRoot":"","sources":["../../../src/server/interfaces/runner.ts"],"names":[],"mappings":""}
@@ -0,0 +1,36 @@
1
+ import type { Provider, Scenario, ScenarioCategory, Run, RunStatus, Evaluation, EvaluationStatus } from '../types/index.js';
2
+ export interface ProviderFilter {
3
+ readonly provider?: 'api' | 'oauth';
4
+ readonly model?: string;
5
+ }
6
+ export interface ScenarioFilter {
7
+ readonly category?: ScenarioCategory;
8
+ }
9
+ export interface RunFilter {
10
+ readonly providerId?: string;
11
+ readonly scenarioId?: string;
12
+ readonly status?: RunStatus;
13
+ }
14
+ export interface EvaluationFilter {
15
+ readonly runId?: string;
16
+ readonly status?: EvaluationStatus;
17
+ }
18
+ export interface IStorage {
19
+ getProvider(id: string): Promise<Provider | undefined>;
20
+ listProviders(filter?: ProviderFilter): Promise<readonly Provider[]>;
21
+ saveProvider(provider: Provider): Promise<void>;
22
+ deleteProvider(id: string): Promise<boolean>;
23
+ getScenario(id: string): Promise<Scenario | undefined>;
24
+ listScenarios(filter?: ScenarioFilter): Promise<readonly Scenario[]>;
25
+ saveScenario(scenario: Scenario): Promise<void>;
26
+ deleteScenario(id: string): Promise<boolean>;
27
+ getRun(id: string): Promise<Run | undefined>;
28
+ listRuns(filter?: RunFilter): Promise<readonly Run[]>;
29
+ saveRun(run: Run): Promise<void>;
30
+ deleteRun(id: string): Promise<boolean>;
31
+ getEvaluation(id: string): Promise<Evaluation | undefined>;
32
+ listEvaluations(filter?: EvaluationFilter): Promise<readonly Evaluation[]>;
33
+ saveEvaluation(evaluation: Evaluation): Promise<void>;
34
+ deleteEvaluation(id: string): Promise<boolean>;
35
+ }
36
+ //# sourceMappingURL=storage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage.d.ts","sourceRoot":"","sources":["../../../src/server/interfaces/storage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,QAAQ,EACR,QAAQ,EACR,gBAAgB,EAChB,GAAG,EACH,SAAS,EACT,UAAU,EACV,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC;IACpC,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CACtC;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,gBAAgB,CAAC;CACpC;AAED,MAAM,WAAW,QAAQ;IAEvB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;IACvD,aAAa,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC,CAAC;IACrE,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAG7C,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;IACvD,aAAa,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC,CAAC;IACrE,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAG7C,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC;IAC7C,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAGxC,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;IAC3D,eAAe,CAAC,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,SAAS,UAAU,EAAE,CAAC,CAAC;IAC3E,cAAc,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CAChD"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=storage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"storage.js","sourceRoot":"","sources":["../../../src/server/interfaces/storage.ts"],"names":[],"mappings":""}
@@ -0,0 +1,9 @@
1
+ import type { Scenario } from '../types/index.js';
2
+ export interface WorkspaceResult {
3
+ readonly workspacePath: string;
4
+ readonly cleanup: () => Promise<void>;
5
+ }
6
+ export interface IWorkspaceBuilder {
7
+ createWorkspace(scenario: Scenario): Promise<WorkspaceResult>;
8
+ }
9
+ //# sourceMappingURL=workspace.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace.d.ts","sourceRoot":"","sources":["../../../src/server/interfaces/workspace.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAElD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CACvC;AAED,MAAM,WAAW,iBAAiB;IAChC,eAAe,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;CAC/D"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=workspace.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"workspace.js","sourceRoot":"","sources":["../../../src/server/interfaces/workspace.ts"],"names":[],"mappings":""}
@@ -0,0 +1,23 @@
1
+ import type { Evaluation } from '../types/index.js';
2
+ /** Validated evaluator entry from the request body (providerId-based). */
3
+ export interface EvalEntry {
4
+ readonly providerId: string;
5
+ readonly role: string;
6
+ }
7
+ /** Validate an evaluator entry from request body. Returns parsed entry or error string. */
8
+ export declare function validateEvalEntry(raw: unknown): EvalEntry | string;
9
+ export interface EvalQueueEntry {
10
+ evaluation: Evaluation;
11
+ execute: () => Promise<void>;
12
+ }
13
+ export declare class EvalQueue {
14
+ private readonly queue;
15
+ private active;
16
+ private readonly maxConcurrency;
17
+ constructor(maxConcurrency?: number);
18
+ enqueue(entry: EvalQueueEntry): void;
19
+ get pendingCount(): number;
20
+ get activeCount(): number;
21
+ private drain;
22
+ }
23
+ //# sourceMappingURL=eval-queue.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-queue.d.ts","sourceRoot":"","sources":["../../../src/server/routes/eval-queue.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAMpD,0EAA0E;AAC1E,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,2FAA2F;AAC3F,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,OAAO,GAAG,SAAS,GAAG,MAAM,CAMlE;AAMD,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC9B;AAED,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAwB;IAC9C,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,cAAc,SAAI;IAI9B,OAAO,CAAC,KAAK,EAAE,cAAc,GAAG,IAAI;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,WAAW,IAAI,MAAM,CAExB;YAEa,KAAK;CAWpB"}
@@ -0,0 +1,45 @@
1
+ // ---------------------------------------------------------------------------
2
+ // EvalQueue — concurrency-limited queue for evaluation execution
3
+ // ---------------------------------------------------------------------------
4
+ /** Validate an evaluator entry from request body. Returns parsed entry or error string. */
5
+ export function validateEvalEntry(raw) {
6
+ if (!raw || typeof raw !== 'object')
7
+ return 'Each evaluator must be an object';
8
+ const obj = raw;
9
+ if (!obj.role || typeof obj.role !== 'string')
10
+ return 'Each evaluator must have a string role';
11
+ if (!obj.providerId || typeof obj.providerId !== 'string')
12
+ return 'Each evaluator must have a string providerId';
13
+ return { providerId: obj.providerId, role: obj.role };
14
+ }
15
+ export class EvalQueue {
16
+ queue = [];
17
+ active = 0;
18
+ maxConcurrency;
19
+ constructor(maxConcurrency = 1) {
20
+ this.maxConcurrency = maxConcurrency;
21
+ }
22
+ enqueue(entry) {
23
+ this.queue.push(entry);
24
+ void this.drain();
25
+ }
26
+ get pendingCount() {
27
+ return this.queue.length;
28
+ }
29
+ get activeCount() {
30
+ return this.active;
31
+ }
32
+ async drain() {
33
+ while (this.active < this.maxConcurrency && this.queue.length > 0) {
34
+ const next = this.queue.shift();
35
+ if (!next)
36
+ break;
37
+ this.active++;
38
+ next.execute().finally(() => {
39
+ this.active--;
40
+ void this.drain();
41
+ });
42
+ }
43
+ }
44
+ }
45
+ //# sourceMappingURL=eval-queue.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-queue.js","sourceRoot":"","sources":["../../../src/server/routes/eval-queue.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAc9E,2FAA2F;AAC3F,MAAM,UAAU,iBAAiB,CAAC,GAAY;IAC5C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,kCAAkC,CAAC;IAC/E,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ;QAAE,OAAO,wCAAwC,CAAC;IAC/F,IAAI,CAAC,GAAG,CAAC,UAAU,IAAI,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ;QAAE,OAAO,8CAA8C,CAAC;IACjH,OAAO,EAAE,UAAU,EAAE,GAAG,CAAC,UAAoB,EAAE,IAAI,EAAE,GAAG,CAAC,IAAc,EAAE,CAAC;AAC5E,CAAC;AAWD,MAAM,OAAO,SAAS;IACH,KAAK,GAAqB,EAAE,CAAC;IACtC,MAAM,GAAG,CAAC,CAAC;IACF,cAAc,CAAS;IAExC,YAAY,cAAc,GAAG,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED,OAAO,CAAC,KAAqB;QAC3B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACvB,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;IACpB,CAAC;IAED,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAC3B,CAAC;IAED,IAAI,WAAW;QACb,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAEO,KAAK,CAAC,KAAK;QACjB,OAAO,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClE,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,CAAC,IAAI;gBAAE,MAAM;YACjB,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,IAAI,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE;gBAC1B,IAAI,CAAC,MAAM,EAAE,CAAC;gBACd,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;YACpB,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,8 @@
1
+ import { Router } from 'express';
2
+ import type { IStorage } from '../interfaces/storage.js';
3
+ import type { ILogger } from '../interfaces/logger.js';
4
+ import type { IEvaluator } from '../interfaces/evaluator.js';
5
+ import { EvalQueue } from './eval-queue.js';
6
+ export { EvalQueue, validateEvalEntry } from './eval-queue.js';
7
+ export declare function createEvaluationRoutes(storage: IStorage, evaluator: IEvaluator, logger: ILogger, queue?: EvalQueue): Router;
8
+ //# sourceMappingURL=evaluations.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluations.d.ts","sourceRoot":"","sources":["../../../src/server/routes/evaluations.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAGjC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,KAAK,EAAE,UAAU,EAAuB,MAAM,4BAA4B,CAAC;AAIlF,OAAO,EAAE,SAAS,EAAqB,MAAM,iBAAiB,CAAC;AAI/D,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAuB/D,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,QAAQ,EACjB,SAAS,EAAE,UAAU,EACrB,MAAM,EAAE,OAAO,EACf,KAAK,CAAC,EAAE,SAAS,GAChB,MAAM,CA4MR"}