claude-test-bench 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +474 -0
  3. package/dist/bin/ctb.d.ts +3 -0
  4. package/dist/bin/ctb.d.ts.map +1 -0
  5. package/dist/bin/ctb.js +107 -0
  6. package/dist/bin/ctb.js.map +1 -0
  7. package/dist/server/index.d.ts +13 -0
  8. package/dist/server/index.d.ts.map +1 -0
  9. package/dist/server/index.js +72 -0
  10. package/dist/server/index.js.map +1 -0
  11. package/dist/server/interfaces/evaluator.d.ts +15 -0
  12. package/dist/server/interfaces/evaluator.d.ts.map +1 -0
  13. package/dist/server/interfaces/evaluator.js +2 -0
  14. package/dist/server/interfaces/evaluator.js.map +1 -0
  15. package/dist/server/interfaces/logger.d.ts +9 -0
  16. package/dist/server/interfaces/logger.d.ts.map +1 -0
  17. package/dist/server/interfaces/logger.js +2 -0
  18. package/dist/server/interfaces/logger.js.map +1 -0
  19. package/dist/server/interfaces/runner.d.ts +9 -0
  20. package/dist/server/interfaces/runner.d.ts.map +1 -0
  21. package/dist/server/interfaces/runner.js +2 -0
  22. package/dist/server/interfaces/runner.js.map +1 -0
  23. package/dist/server/interfaces/storage.d.ts +36 -0
  24. package/dist/server/interfaces/storage.d.ts.map +1 -0
  25. package/dist/server/interfaces/storage.js +2 -0
  26. package/dist/server/interfaces/storage.js.map +1 -0
  27. package/dist/server/interfaces/workspace.d.ts +9 -0
  28. package/dist/server/interfaces/workspace.d.ts.map +1 -0
  29. package/dist/server/interfaces/workspace.js +2 -0
  30. package/dist/server/interfaces/workspace.js.map +1 -0
  31. package/dist/server/routes/eval-queue.d.ts +23 -0
  32. package/dist/server/routes/eval-queue.d.ts.map +1 -0
  33. package/dist/server/routes/eval-queue.js +45 -0
  34. package/dist/server/routes/eval-queue.js.map +1 -0
  35. package/dist/server/routes/evaluations.d.ts +8 -0
  36. package/dist/server/routes/evaluations.d.ts.map +1 -0
  37. package/dist/server/routes/evaluations.js +217 -0
  38. package/dist/server/routes/evaluations.js.map +1 -0
  39. package/dist/server/routes/providers.d.ts +5 -0
  40. package/dist/server/routes/providers.d.ts.map +1 -0
  41. package/dist/server/routes/providers.js +194 -0
  42. package/dist/server/routes/providers.js.map +1 -0
  43. package/dist/server/routes/run-queue.d.ts +17 -0
  44. package/dist/server/routes/run-queue.d.ts.map +1 -0
  45. package/dist/server/routes/run-queue.js +34 -0
  46. package/dist/server/routes/run-queue.js.map +1 -0
  47. package/dist/server/routes/run-sse.d.ts +18 -0
  48. package/dist/server/routes/run-sse.d.ts.map +1 -0
  49. package/dist/server/routes/run-sse.js +57 -0
  50. package/dist/server/routes/run-sse.js.map +1 -0
  51. package/dist/server/routes/runs.d.ts +9 -0
  52. package/dist/server/routes/runs.d.ts.map +1 -0
  53. package/dist/server/routes/runs.js +379 -0
  54. package/dist/server/routes/runs.js.map +1 -0
  55. package/dist/server/routes/scenarios.d.ts +5 -0
  56. package/dist/server/routes/scenarios.d.ts.map +1 -0
  57. package/dist/server/routes/scenarios.js +209 -0
  58. package/dist/server/routes/scenarios.js.map +1 -0
  59. package/dist/server/routes/setups.d.ts +5 -0
  60. package/dist/server/routes/setups.d.ts.map +1 -0
  61. package/dist/server/routes/setups.js +194 -0
  62. package/dist/server/routes/setups.js.map +1 -0
  63. package/dist/server/services/agent-mapper.d.ts +12 -0
  64. package/dist/server/services/agent-mapper.d.ts.map +1 -0
  65. package/dist/server/services/agent-mapper.js +75 -0
  66. package/dist/server/services/agent-mapper.js.map +1 -0
  67. package/dist/server/services/env-builder.d.ts +10 -0
  68. package/dist/server/services/env-builder.d.ts.map +1 -0
  69. package/dist/server/services/env-builder.js +50 -0
  70. package/dist/server/services/env-builder.js.map +1 -0
  71. package/dist/server/services/eval-helpers.d.ts +22 -0
  72. package/dist/server/services/eval-helpers.d.ts.map +1 -0
  73. package/dist/server/services/eval-helpers.js +75 -0
  74. package/dist/server/services/eval-helpers.js.map +1 -0
  75. package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
  76. package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  77. package/dist/server/services/eval-parsers-debate-impl.js +133 -0
  78. package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
  79. package/dist/server/services/eval-parsers.d.ts +24 -0
  80. package/dist/server/services/eval-parsers.d.ts.map +1 -0
  81. package/dist/server/services/eval-parsers.js +153 -0
  82. package/dist/server/services/eval-parsers.js.map +1 -0
  83. package/dist/server/services/eval-prompts.d.ts +9 -0
  84. package/dist/server/services/eval-prompts.d.ts.map +1 -0
  85. package/dist/server/services/eval-prompts.js +170 -0
  86. package/dist/server/services/eval-prompts.js.map +1 -0
  87. package/dist/server/services/evaluator.d.ts +10 -0
  88. package/dist/server/services/evaluator.d.ts.map +1 -0
  89. package/dist/server/services/evaluator.js +156 -0
  90. package/dist/server/services/evaluator.js.map +1 -0
  91. package/dist/server/services/fs-adapter.d.ts +20 -0
  92. package/dist/server/services/fs-adapter.d.ts.map +1 -0
  93. package/dist/server/services/fs-adapter.js +13 -0
  94. package/dist/server/services/fs-adapter.js.map +1 -0
  95. package/dist/server/services/instruction-parser.d.ts +26 -0
  96. package/dist/server/services/instruction-parser.d.ts.map +1 -0
  97. package/dist/server/services/instruction-parser.js +121 -0
  98. package/dist/server/services/instruction-parser.js.map +1 -0
  99. package/dist/server/services/log-rotator.d.ts +20 -0
  100. package/dist/server/services/log-rotator.d.ts.map +1 -0
  101. package/dist/server/services/log-rotator.js +60 -0
  102. package/dist/server/services/log-rotator.js.map +1 -0
  103. package/dist/server/services/logger.d.ts +15 -0
  104. package/dist/server/services/logger.d.ts.map +1 -0
  105. package/dist/server/services/logger.js +69 -0
  106. package/dist/server/services/logger.js.map +1 -0
  107. package/dist/server/services/runner.d.ts +12 -0
  108. package/dist/server/services/runner.d.ts.map +1 -0
  109. package/dist/server/services/runner.js +161 -0
  110. package/dist/server/services/runner.js.map +1 -0
  111. package/dist/server/services/seeder.d.ts +5 -0
  112. package/dist/server/services/seeder.d.ts.map +1 -0
  113. package/dist/server/services/seeder.js +79 -0
  114. package/dist/server/services/seeder.js.map +1 -0
  115. package/dist/server/services/storage-test-helpers.d.ts +21 -0
  116. package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
  117. package/dist/server/services/storage-test-helpers.js +158 -0
  118. package/dist/server/services/storage-test-helpers.js.map +1 -0
  119. package/dist/server/services/storage.d.ts +35 -0
  120. package/dist/server/services/storage.d.ts.map +1 -0
  121. package/dist/server/services/storage.js +219 -0
  122. package/dist/server/services/storage.js.map +1 -0
  123. package/dist/server/services/transcript-formatter.d.ts +18 -0
  124. package/dist/server/services/transcript-formatter.d.ts.map +1 -0
  125. package/dist/server/services/transcript-formatter.js +156 -0
  126. package/dist/server/services/transcript-formatter.js.map +1 -0
  127. package/dist/server/services/workspace.d.ts +11 -0
  128. package/dist/server/services/workspace.d.ts.map +1 -0
  129. package/dist/server/services/workspace.js +113 -0
  130. package/dist/server/services/workspace.js.map +1 -0
  131. package/dist/server/types/evaluation.d.ts +108 -0
  132. package/dist/server/types/evaluation.d.ts.map +1 -0
  133. package/dist/server/types/evaluation.js +5 -0
  134. package/dist/server/types/evaluation.js.map +1 -0
  135. package/dist/server/types/index.d.ts +5 -0
  136. package/dist/server/types/index.d.ts.map +1 -0
  137. package/dist/server/types/index.js +5 -0
  138. package/dist/server/types/index.js.map +1 -0
  139. package/dist/server/types/provider.d.ts +99 -0
  140. package/dist/server/types/provider.d.ts.map +1 -0
  141. package/dist/server/types/provider.js +5 -0
  142. package/dist/server/types/provider.js.map +1 -0
  143. package/dist/server/types/run.d.ts +31 -0
  144. package/dist/server/types/run.d.ts.map +1 -0
  145. package/dist/server/types/run.js +5 -0
  146. package/dist/server/types/run.js.map +1 -0
  147. package/dist/server/types/scenario.d.ts +32 -0
  148. package/dist/server/types/scenario.d.ts.map +1 -0
  149. package/dist/server/types/scenario.js +5 -0
  150. package/dist/server/types/scenario.js.map +1 -0
  151. package/dist/server/types/setup.d.ts +99 -0
  152. package/dist/server/types/setup.d.ts.map +1 -0
  153. package/dist/server/types/setup.js +5 -0
  154. package/dist/server/types/setup.js.map +1 -0
  155. package/dist/src/server/index.d.ts +13 -0
  156. package/dist/src/server/index.d.ts.map +1 -0
  157. package/dist/src/server/index.js +72 -0
  158. package/dist/src/server/index.js.map +1 -0
  159. package/dist/src/server/interfaces/evaluator.d.ts +15 -0
  160. package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
  161. package/dist/src/server/interfaces/evaluator.js +2 -0
  162. package/dist/src/server/interfaces/evaluator.js.map +1 -0
  163. package/dist/src/server/interfaces/logger.d.ts +9 -0
  164. package/dist/src/server/interfaces/logger.d.ts.map +1 -0
  165. package/dist/src/server/interfaces/logger.js +2 -0
  166. package/dist/src/server/interfaces/logger.js.map +1 -0
  167. package/dist/src/server/interfaces/runner.d.ts +9 -0
  168. package/dist/src/server/interfaces/runner.d.ts.map +1 -0
  169. package/dist/src/server/interfaces/runner.js +2 -0
  170. package/dist/src/server/interfaces/runner.js.map +1 -0
  171. package/dist/src/server/interfaces/storage.d.ts +36 -0
  172. package/dist/src/server/interfaces/storage.d.ts.map +1 -0
  173. package/dist/src/server/interfaces/storage.js +2 -0
  174. package/dist/src/server/interfaces/storage.js.map +1 -0
  175. package/dist/src/server/interfaces/workspace.d.ts +9 -0
  176. package/dist/src/server/interfaces/workspace.d.ts.map +1 -0
  177. package/dist/src/server/interfaces/workspace.js +2 -0
  178. package/dist/src/server/interfaces/workspace.js.map +1 -0
  179. package/dist/src/server/routes/eval-queue.d.ts +23 -0
  180. package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
  181. package/dist/src/server/routes/eval-queue.js +45 -0
  182. package/dist/src/server/routes/eval-queue.js.map +1 -0
  183. package/dist/src/server/routes/evaluations.d.ts +8 -0
  184. package/dist/src/server/routes/evaluations.d.ts.map +1 -0
  185. package/dist/src/server/routes/evaluations.js +217 -0
  186. package/dist/src/server/routes/evaluations.js.map +1 -0
  187. package/dist/src/server/routes/providers.d.ts +5 -0
  188. package/dist/src/server/routes/providers.d.ts.map +1 -0
  189. package/dist/src/server/routes/providers.js +194 -0
  190. package/dist/src/server/routes/providers.js.map +1 -0
  191. package/dist/src/server/routes/run-queue.d.ts +17 -0
  192. package/dist/src/server/routes/run-queue.d.ts.map +1 -0
  193. package/dist/src/server/routes/run-queue.js +34 -0
  194. package/dist/src/server/routes/run-queue.js.map +1 -0
  195. package/dist/src/server/routes/run-sse.d.ts +18 -0
  196. package/dist/src/server/routes/run-sse.d.ts.map +1 -0
  197. package/dist/src/server/routes/run-sse.js +57 -0
  198. package/dist/src/server/routes/run-sse.js.map +1 -0
  199. package/dist/src/server/routes/runs.d.ts +9 -0
  200. package/dist/src/server/routes/runs.d.ts.map +1 -0
  201. package/dist/src/server/routes/runs.js +379 -0
  202. package/dist/src/server/routes/runs.js.map +1 -0
  203. package/dist/src/server/routes/scenarios.d.ts +5 -0
  204. package/dist/src/server/routes/scenarios.d.ts.map +1 -0
  205. package/dist/src/server/routes/scenarios.js +209 -0
  206. package/dist/src/server/routes/scenarios.js.map +1 -0
  207. package/dist/src/server/routes/setups.d.ts +5 -0
  208. package/dist/src/server/routes/setups.d.ts.map +1 -0
  209. package/dist/src/server/routes/setups.js +194 -0
  210. package/dist/src/server/routes/setups.js.map +1 -0
  211. package/dist/src/server/services/agent-mapper.d.ts +12 -0
  212. package/dist/src/server/services/agent-mapper.d.ts.map +1 -0
  213. package/dist/src/server/services/agent-mapper.js +75 -0
  214. package/dist/src/server/services/agent-mapper.js.map +1 -0
  215. package/dist/src/server/services/env-builder.d.ts +10 -0
  216. package/dist/src/server/services/env-builder.d.ts.map +1 -0
  217. package/dist/src/server/services/env-builder.js +50 -0
  218. package/dist/src/server/services/env-builder.js.map +1 -0
  219. package/dist/src/server/services/eval-helpers.d.ts +22 -0
  220. package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
  221. package/dist/src/server/services/eval-helpers.js +75 -0
  222. package/dist/src/server/services/eval-helpers.js.map +1 -0
  223. package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
  224. package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  225. package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
  226. package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
  227. package/dist/src/server/services/eval-parsers.d.ts +24 -0
  228. package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
  229. package/dist/src/server/services/eval-parsers.js +153 -0
  230. package/dist/src/server/services/eval-parsers.js.map +1 -0
  231. package/dist/src/server/services/eval-prompts.d.ts +9 -0
  232. package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
  233. package/dist/src/server/services/eval-prompts.js +170 -0
  234. package/dist/src/server/services/eval-prompts.js.map +1 -0
  235. package/dist/src/server/services/evaluator.d.ts +10 -0
  236. package/dist/src/server/services/evaluator.d.ts.map +1 -0
  237. package/dist/src/server/services/evaluator.js +156 -0
  238. package/dist/src/server/services/evaluator.js.map +1 -0
  239. package/dist/src/server/services/fs-adapter.d.ts +20 -0
  240. package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
  241. package/dist/src/server/services/fs-adapter.js +13 -0
  242. package/dist/src/server/services/fs-adapter.js.map +1 -0
  243. package/dist/src/server/services/instruction-parser.d.ts +26 -0
  244. package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
  245. package/dist/src/server/services/instruction-parser.js +121 -0
  246. package/dist/src/server/services/instruction-parser.js.map +1 -0
  247. package/dist/src/server/services/log-rotator.d.ts +20 -0
  248. package/dist/src/server/services/log-rotator.d.ts.map +1 -0
  249. package/dist/src/server/services/log-rotator.js +60 -0
  250. package/dist/src/server/services/log-rotator.js.map +1 -0
  251. package/dist/src/server/services/logger.d.ts +15 -0
  252. package/dist/src/server/services/logger.d.ts.map +1 -0
  253. package/dist/src/server/services/logger.js +69 -0
  254. package/dist/src/server/services/logger.js.map +1 -0
  255. package/dist/src/server/services/runner.d.ts +12 -0
  256. package/dist/src/server/services/runner.d.ts.map +1 -0
  257. package/dist/src/server/services/runner.js +161 -0
  258. package/dist/src/server/services/runner.js.map +1 -0
  259. package/dist/src/server/services/seeder.d.ts +5 -0
  260. package/dist/src/server/services/seeder.d.ts.map +1 -0
  261. package/dist/src/server/services/seeder.js +79 -0
  262. package/dist/src/server/services/seeder.js.map +1 -0
  263. package/dist/src/server/services/storage.d.ts +35 -0
  264. package/dist/src/server/services/storage.d.ts.map +1 -0
  265. package/dist/src/server/services/storage.js +219 -0
  266. package/dist/src/server/services/storage.js.map +1 -0
  267. package/dist/src/server/services/transcript-formatter.d.ts +18 -0
  268. package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
  269. package/dist/src/server/services/transcript-formatter.js +156 -0
  270. package/dist/src/server/services/transcript-formatter.js.map +1 -0
  271. package/dist/src/server/services/workspace.d.ts +11 -0
  272. package/dist/src/server/services/workspace.d.ts.map +1 -0
  273. package/dist/src/server/services/workspace.js +113 -0
  274. package/dist/src/server/services/workspace.js.map +1 -0
  275. package/dist/src/server/types/evaluation.d.ts +108 -0
  276. package/dist/src/server/types/evaluation.d.ts.map +1 -0
  277. package/dist/src/server/types/evaluation.js +5 -0
  278. package/dist/src/server/types/evaluation.js.map +1 -0
  279. package/dist/src/server/types/index.d.ts +5 -0
  280. package/dist/src/server/types/index.d.ts.map +1 -0
  281. package/dist/src/server/types/index.js +5 -0
  282. package/dist/src/server/types/index.js.map +1 -0
  283. package/dist/src/server/types/provider.d.ts +99 -0
  284. package/dist/src/server/types/provider.d.ts.map +1 -0
  285. package/dist/src/server/types/provider.js +5 -0
  286. package/dist/src/server/types/provider.js.map +1 -0
  287. package/dist/src/server/types/run.d.ts +31 -0
  288. package/dist/src/server/types/run.d.ts.map +1 -0
  289. package/dist/src/server/types/run.js +5 -0
  290. package/dist/src/server/types/run.js.map +1 -0
  291. package/dist/src/server/types/scenario.d.ts +32 -0
  292. package/dist/src/server/types/scenario.d.ts.map +1 -0
  293. package/dist/src/server/types/scenario.js +5 -0
  294. package/dist/src/server/types/scenario.js.map +1 -0
  295. package/dist/src/server/types/setup.d.ts +99 -0
  296. package/dist/src/server/types/setup.d.ts.map +1 -0
  297. package/dist/src/server/types/setup.js +5 -0
  298. package/dist/src/server/types/setup.js.map +1 -0
  299. package/dist/web/assets/index-C4dw8OpW.css +1 -0
  300. package/dist/web/assets/index-wve8IczO.js +76 -0
  301. package/dist/web/index.html +15 -0
  302. package/docs/schemas/provider-api.example.json +16 -0
  303. package/docs/schemas/provider-oauth.example.json +15 -0
  304. package/docs/schemas/provider.example.json +16 -0
  305. package/docs/schemas/scenario-baseline.example.json +35 -0
  306. package/docs/schemas/scenario-carwash-baseline.example.json +33 -0
  307. package/docs/schemas/scenario-carwash-with-claude-md.example.json +40 -0
  308. package/docs/schemas/scenario-golden-rules-baseline.example.json +51 -0
  309. package/docs/schemas/scenario-golden-rules-with-claude-md.example.json +61 -0
  310. package/docs/schemas/scenario-negative-analysis-baseline.example.json +34 -0
  311. package/docs/schemas/scenario-negative-analysis-with-claude-md.example.json +41 -0
  312. package/docs/schemas/scenario-with-claude-md.example.json +41 -0
  313. package/docs/schemas/scenario.example.json +33 -0
  314. package/package.json +92 -0
@@ -0,0 +1,24 @@
1
+ import type { IndividualEvaluation, InstructionCompliance } from '../types/evaluation.js';
2
+ export { parseSynthesisResponse, parseDebateResponse } from './eval-parsers-debate-impl.js';
3
+ export type { Verdict, DebateParseResult } from './eval-parsers-debate-impl.js';
4
+ export interface ScoreParseResult {
5
+ readonly scores: Readonly<Record<string, number>>;
6
+ readonly overallCloseness: number;
7
+ readonly missedCritical: readonly string[];
8
+ readonly strengths: readonly string[];
9
+ readonly weaknesses: readonly string[];
10
+ readonly summary: string;
11
+ }
12
+ export declare function parseScoreResponse(response: string): Partial<ScoreParseResult>;
13
+ export interface ComplianceParseResult {
14
+ readonly followed: readonly string[];
15
+ readonly violated: readonly string[];
16
+ readonly notApplicable: readonly string[];
17
+ readonly overallCompliance: number;
18
+ }
19
+ export declare function parseComplianceResponse(response: string): Partial<ComplianceParseResult>;
20
+ /** Convert parsed compliance into our InstructionCompliance type. */
21
+ export declare function toInstructionCompliance(result: Partial<ComplianceParseResult>): InstructionCompliance;
22
+ /** Convert score parse result into IndividualEvaluation entries. */
23
+ export declare function toIndividualEvaluations(scores: Readonly<Record<string, number>>, role: string, reasoningMap: Readonly<Record<string, string>>): IndividualEvaluation[];
24
+ //# sourceMappingURL=eval-parsers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-parsers.d.ts","sourceRoot":"","sources":["../../../src/server/services/eval-parsers.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAC5F,YAAY,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAMhF,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAClD,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;IAC3C,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAa9E;AAMD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACpC;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAMxF;AAED,qEAAqE;AACrE,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,OAAO,CAAC,qBAAqB,CAAC,GACrC,qBAAqB,CAOvB;AAMD,oEAAoE;AACpE,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,EACxC,IAAI,EAAE,MAAM,EACZ,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GAC7C,oBAAoB,EAAE,CAOxB"}
@@ -0,0 +1,153 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Evaluation Response Parsers — extract structured data from LLM responses
3
+ // ---------------------------------------------------------------------------
4
+ // Re-export debate/synthesis parsers so existing imports continue to work
5
+ export { parseSynthesisResponse, parseDebateResponse } from './eval-parsers-debate-impl.js';
6
+ export function parseScoreResponse(response) {
7
+ const parsed = tryParseJson(response);
8
+ if (parsed) {
9
+ return {
10
+ scores: validScores(parsed.scores),
11
+ overallCloseness: clamp01(parsed.overallCloseness),
12
+ missedCritical: toStringArray(parsed.missedCritical),
13
+ strengths: toStringArray(parsed.strengths),
14
+ weaknesses: toStringArray(parsed.weaknesses),
15
+ summary: typeof parsed.summary === 'string' ? parsed.summary : undefined,
16
+ };
17
+ }
18
+ return parseScoreFromText(response);
19
+ }
20
+ export function parseComplianceResponse(response) {
21
+ const parsed = tryParseJson(response);
22
+ if (parsed) {
23
+ return categorizeComplianceResults(parsed);
24
+ }
25
+ return parseComplianceFromText(response);
26
+ }
27
+ /** Convert parsed compliance into our InstructionCompliance type. */
28
+ export function toInstructionCompliance(result) {
29
+ return {
30
+ followed: result.followed ?? [],
31
+ violated: result.violated ?? [],
32
+ notApplicable: result.notApplicable ?? [],
33
+ overallCompliance: result.overallCompliance ?? 0,
34
+ };
35
+ }
36
+ // ---------------------------------------------------------------------------
37
+ // Helpers
38
+ // ---------------------------------------------------------------------------
39
+ /** Convert score parse result into IndividualEvaluation entries. */
40
+ export function toIndividualEvaluations(scores, role, reasoningMap) {
41
+ return Object.entries(scores).map(([dimension, score]) => ({
42
+ evaluatorRole: role,
43
+ dimension,
44
+ score: clampScore(score),
45
+ reasoning: reasoningMap[dimension] ?? '',
46
+ }));
47
+ }
48
+ // ---------------------------------------------------------------------------
49
+ // Internal: JSON parsing
50
+ // ---------------------------------------------------------------------------
51
+ function tryParseJson(text) {
52
+ try {
53
+ return JSON.parse(text);
54
+ }
55
+ catch {
56
+ // fall through
57
+ }
58
+ const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
59
+ if (jsonMatch) {
60
+ try {
61
+ return JSON.parse(jsonMatch[1]);
62
+ }
63
+ catch {
64
+ // fall through
65
+ }
66
+ }
67
+ const braceMatch = text.match(/\{[\s\S]*\}/);
68
+ if (braceMatch) {
69
+ try {
70
+ return JSON.parse(braceMatch[0]);
71
+ }
72
+ catch {
73
+ // fall through
74
+ }
75
+ }
76
+ return undefined;
77
+ }
78
+ // ---------------------------------------------------------------------------
79
+ // Internal: Text fallback parsers
80
+ // ---------------------------------------------------------------------------
81
+ function parseScoreFromText(text) {
82
+ const scores = {};
83
+ const scorePattern = /(\w[\w\s]*?):\s*(\d+(?:\.\d+)?)\s*(?:\/\s*10)?/g;
84
+ let match;
85
+ while ((match = scorePattern.exec(text)) !== null) {
86
+ const dim = match[1].trim();
87
+ const val = parseFloat(match[2]);
88
+ if (!isNaN(val) && val <= 10)
89
+ scores[dim] = val;
90
+ }
91
+ return { scores: Object.keys(scores).length > 0 ? scores : undefined };
92
+ }
93
+ function parseComplianceFromText(text) {
94
+ const followed = [];
95
+ const violated = [];
96
+ if (/followed|compliant/i.test(text))
97
+ followed.push('(extracted from text)');
98
+ if (/violated|non-compliant/i.test(text))
99
+ violated.push('(extracted from text)');
100
+ return { followed, violated, notApplicable: [], overallCompliance: undefined };
101
+ }
102
+ // ---------------------------------------------------------------------------
103
+ // Internal: Utilities
104
+ // ---------------------------------------------------------------------------
105
+ function validScores(scores) {
106
+ if (!scores || typeof scores !== 'object')
107
+ return {};
108
+ const result = {};
109
+ for (const [k, v] of Object.entries(scores)) {
110
+ if (typeof v === 'number' && !isNaN(v))
111
+ result[k] = clampScore(v);
112
+ }
113
+ return result;
114
+ }
115
+ function clamp01(val) {
116
+ if (val === undefined || isNaN(val))
117
+ return 0;
118
+ return Math.max(0, Math.min(1, val));
119
+ }
120
+ function clampScore(val) {
121
+ if (val === undefined || isNaN(val))
122
+ return 0;
123
+ return Math.max(0, Math.min(10, val));
124
+ }
125
+ function toStringArray(arr) {
126
+ if (!Array.isArray(arr))
127
+ return [];
128
+ return arr.filter((x) => typeof x === 'string');
129
+ }
130
+ function categorizeComplianceResults(parsed) {
131
+ const followed = [];
132
+ const violated = [];
133
+ const notApplicable = [];
134
+ if (Array.isArray(parsed.results)) {
135
+ for (const r of parsed.results) {
136
+ const text = r.instruction ?? '(unknown)';
137
+ const status = (r.status ?? '').toLowerCase();
138
+ if (status === 'followed')
139
+ followed.push(text);
140
+ else if (status === 'violated')
141
+ violated.push(text);
142
+ else
143
+ notApplicable.push(text);
144
+ }
145
+ }
146
+ return {
147
+ followed,
148
+ violated,
149
+ notApplicable,
150
+ overallCompliance: clamp01(parsed.overallCompliance),
151
+ };
152
+ }
153
+ //# sourceMappingURL=eval-parsers.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-parsers.js","sourceRoot":"","sources":["../../../src/server/services/eval-parsers.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAO9E,0EAA0E;AAC1E,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAgB5F,MAAM,UAAU,kBAAkB,CAAC,QAAgB;IACjD,MAAM,MAAM,GAAG,YAAY,CAAmB,QAAQ,CAAC,CAAC;IACxD,IAAI,MAAM,EAAE,CAAC;QACX,OAAO;YACL,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC;YAClC,gBAAgB,EAAE,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC;YAClD,cAAc,EAAE,aAAa,CAAC,MAAM,CAAC,cAAc,CAAC;YACpD,SAAS,EAAE,aAAa,CAAC,MAAM,CAAC,SAAS,CAAC;YAC1C,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,UAAU,CAAC;YAC5C,OAAO,EAAE,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;SACzE,CAAC;IACJ,CAAC;IACD,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAC;AACtC,CAAC;AAaD,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,MAAM,MAAM,GAAG,YAAY,CAAwB,QAAQ,CAAC,CAAC;IAC7D,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,2BAA2B,CAAC,MAAM,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,uBAAuB,CAAC,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED,qEAAqE;AACrE,MAAM,UAAU,uBAAuB,CACrC,MAAsC;IAEtC,OAAO;QACL,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;QACzC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,CAAC;KACjD,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,oEAAoE;AACpE,MAAM,UAAU,uBAAuB,CACrC,MAAwC,EACxC,IAAY,EACZ,YAA8C;IAE9C,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACzD,aAAa,EAAE,IAAI;QACnB,SAAS;QACT,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC;QACxB,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE;KACzC,CAAC,CAAC,CAAC;AACN,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,YAAY,CAAI,IAAY;IACnC,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,eAAe;IACjB,CAAC;IACD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAC7D,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAM,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7C,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAM,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,MAAM,YAAY,GAAG,iDAAiD,CAAC;IACvE,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,EAAE;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;AACzE,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,QAAQ,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAC7E,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,QAAQ,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IACjF,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,EAAE,EAAE,iBAAiB,EAAE,SAAS,EAAE,CAAC;AACjF,CAAC;AAoBD,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,SAAS,WAAW,CAClB,MAA0C;IAE1C,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IACrD,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5C,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YAAE,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,OAAO,CAAC,GAAuB;IACtC,IAAI,GAAG,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,GAAuB;IACzC,IAAI,GAAG,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,aAAa,CAAC,GAAY;IACjC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IACnC,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC;AAC/D,CAAC;AAED,SAAS,2BAA2B,CAClC,MAA6B;IAE7B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,WAAW,CAAC;YAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YAC9C,IAAI,MAAM,KAAK,UAAU;gBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC1C,IAAI,MAAM,KAAK,UAAU;gBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;gBAC/C,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ;QACR,QAAQ;QACR,aAAa;QACb,iBAAiB,EAAE,OAAO,CAAC,MAAM,CAAC,iBAAiB,CAAC;KACrD,CAAC;AACJ,CAAC"}
@@ -0,0 +1,9 @@
1
+ import type { Scenario, Provider } from '../types/index.js';
2
+ import type { IndividualEvaluation } from '../types/evaluation.js';
3
+ import type { TranscriptSummary } from './transcript-formatter.js';
4
+ import type { InstructionBlock } from './instruction-parser.js';
5
+ export declare function buildScorePrompt(transcript: string, scenario: Scenario, summary: TranscriptSummary): string;
6
+ export declare function buildCompliancePrompt(transcript: string, scenario: Scenario, instructions: readonly InstructionBlock[]): string;
7
+ export declare function buildDebatePrompt(myPreviousAssessment: string, otherAssessments: readonly string[], roundNumber: number): string;
8
+ export declare function buildSynthesisPrompt(allEvaluations: readonly IndividualEvaluation[], scenario: Scenario, provider: Provider): string;
9
+ //# sourceMappingURL=eval-prompts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-prompts.d.ts","sourceRoot":"","sources":["../../../src/server/services/eval-prompts.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAoB,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC9E,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACnE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAMhE,wBAAgB,gBAAgB,CAC9B,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,GACzB,MAAM,CA+CR;AAMD,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,QAAQ,EAClB,YAAY,EAAE,SAAS,gBAAgB,EAAE,GACxC,MAAM,CAmCR;AAMD,wBAAgB,iBAAiB,CAC/B,oBAAoB,EAAE,MAAM,EAC5B,gBAAgB,EAAE,SAAS,MAAM,EAAE,EACnC,WAAW,EAAE,MAAM,GAClB,MAAM,CAyBR;AAMD,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,SAAS,oBAAoB,EAAE,EAC/C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,GACjB,MAAM,CAkCR"}
@@ -0,0 +1,170 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Evaluation Prompt Builders
3
+ // ---------------------------------------------------------------------------
4
+ // ---------------------------------------------------------------------------
5
+ // Query 1: Score + answer comparison prompt
6
+ // ---------------------------------------------------------------------------
7
+ export function buildScorePrompt(transcript, scenario, summary) {
8
+ const dimensions = formatDimensions(scenario.scoringDimensions);
9
+ const toolSequence = summary.toolCallSequence.length > 0
10
+ ? `Tool call sequence: ${summary.toolCallSequence.join(' → ')}`
11
+ : 'No tool calls recorded.';
12
+ return `You are an expert evaluator assessing an AI agent's performance on a task.
13
+
14
+ ## Task Description
15
+ ${scenario.prompt}
16
+
17
+ ## Expected Answer
18
+ ${scenario.expectedAnswer}
19
+
20
+ ## Critical Requirements
21
+ ${formatCriticalRequirements(scenario.criticalRequirements)}
22
+
23
+ ## Grading Guidelines
24
+ ${scenario.gradingGuidelines || 'No specific grading guidelines provided.'}
25
+
26
+ ## Scoring Dimensions
27
+ ${dimensions}
28
+
29
+ ## Agent Behavior Summary
30
+ ${toolSequence}
31
+ Files read: ${summary.filesRead.length > 0 ? summary.filesRead.join(', ') : 'none'}
32
+ Files modified: ${summary.filesModified.length > 0 ? summary.filesModified.join(', ') : 'none'}
33
+ Command failures: ${summary.commandFailures.length}
34
+ Asked clarifying questions: ${summary.askedClarifyingQuestions ? 'yes' : 'no'}
35
+
36
+ ## Full Transcript
37
+ ${transcript}
38
+
39
+ ## Instructions
40
+ Evaluate the agent's output. For each scoring dimension, provide a score from 0-10.
41
+ Also assess how closely the agent's final answer matches the expected answer (0.0-1.0).
42
+ Identify any critical requirements that were missed, as well as strengths and weaknesses.
43
+
44
+ Respond with valid JSON matching this structure:
45
+ {
46
+ "scores": { "<dimension_name>": <0-10>, ... },
47
+ "overallCloseness": <0.0-1.0>,
48
+ "missedCritical": ["<requirement that was not met>", ...],
49
+ "strengths": ["<strength>", ...],
50
+ "weaknesses": ["<weakness>", ...],
51
+ "summary": "<brief overall assessment>"
52
+ }`;
53
+ }
54
+ // ---------------------------------------------------------------------------
55
+ // Query 2: Instruction compliance prompt
56
+ // ---------------------------------------------------------------------------
57
+ export function buildCompliancePrompt(transcript, scenario, instructions) {
58
+ const instructionList = instructions
59
+ .map((b, i) => `${i + 1}. [${b.source}] ${b.text}`)
60
+ .join('\n');
61
+ const skillNames = scenario.skills.map((s) => s.name);
62
+ const subagentNames = scenario.subagents.map((s) => s.name);
63
+ return `You are an expert evaluator checking whether an AI agent followed its configured instructions.
64
+
65
+ ## Agent Configuration
66
+ Skills: ${skillNames.length > 0 ? skillNames.join(', ') : 'none'}
67
+ Subagents: ${subagentNames.length > 0 ? subagentNames.join(', ') : 'none'}
68
+
69
+ ## Instructions to Check
70
+ ${instructionList || 'No instructions configured.'}
71
+
72
+ ## Agent Transcript
73
+ ${transcript}
74
+
75
+ ## Instructions
76
+ For each instruction listed above, determine if the agent:
77
+ - "followed" it (clear evidence of compliance)
78
+ - "violated" it (clear evidence of non-compliance)
79
+ - "not_applicable" (instruction was not relevant to this task)
80
+
81
+ Also rate overall compliance from 0.0 to 1.0.
82
+
83
+ Respond with valid JSON:
84
+ {
85
+ "results": [
86
+ { "instruction": "<instruction text>", "status": "followed|violated|not_applicable", "evidence": "<brief evidence>" }
87
+ ],
88
+ "overallCompliance": <0.0-1.0>
89
+ }`;
90
+ }
91
+ // ---------------------------------------------------------------------------
92
+ // Multi-round debate prompt
93
+ // ---------------------------------------------------------------------------
94
+ export function buildDebatePrompt(myPreviousAssessment, otherAssessments, roundNumber) {
95
+ const othersFormatted = otherAssessments
96
+ .map((a, i) => `### Other Evaluator ${i + 1}\n${a}`)
97
+ .join('\n\n');
98
+ return `You are participating in round ${roundNumber} of a multi-evaluator review.
99
+
100
+ ## Your Previous Assessment
101
+ ${myPreviousAssessment}
102
+
103
+ ## Other Evaluators' Assessments
104
+ ${othersFormatted}
105
+
106
+ ## Instructions
107
+ Review the other evaluators' assessments and compare them with your own.
108
+ Determine if you agree, partially agree, or disagree with the emerging consensus.
109
+ Provide your updated scores if you have changed your mind, and explain why.
110
+
111
+ Respond with valid JSON:
112
+ {
113
+ "verdict": "AGREE|DISAGREE|PARTIAL",
114
+ "updatedScores": { "<dimension_name>": <0-10>, ... },
115
+ "critiques": ["<specific point of disagreement>", ...],
116
+ "reasoning": "<why you agree/disagree>"
117
+ }`;
118
+ }
119
+ // ---------------------------------------------------------------------------
120
+ // Synthesis prompt (final aggregation)
121
+ // ---------------------------------------------------------------------------
122
+ export function buildSynthesisPrompt(allEvaluations, scenario, provider) {
123
+ const evalSummaries = allEvaluations
124
+ .map((e) => `[${e.evaluatorRole}] ${e.dimension}: ${e.score}/10 — ${e.reasoning}`)
125
+ .join('\n');
126
+ const dimensions = formatDimensions(scenario.scoringDimensions);
127
+ return `You are the final synthesizer for a multi-evaluator assessment.
128
+
129
+ ## Scenario
130
+ ${scenario.name}: ${scenario.prompt}
131
+
132
+ ## Scoring Dimensions & Weights
133
+ ${dimensions}
134
+
135
+ ## Provider
136
+ Name: ${provider.name}
137
+ Model: ${provider.provider.model}
138
+
139
+ ## All Individual Evaluations
140
+ ${evalSummaries}
141
+
142
+ ## Instructions
143
+ Synthesize all evaluations into final scores. Weight each dimension according to the scoring
144
+ dimensions defined above. Identify areas of evaluator consensus and disagreement.
145
+ Provide a confidence level (0.0-1.0) based on evaluator agreement.
146
+
147
+ Respond with valid JSON:
148
+ {
149
+ "dimensionScores": { "<dimension_name>": <0-10>, ... },
150
+ "weightedTotal": <weighted average 0-10>,
151
+ "confidence": <0.0-1.0>,
152
+ "dissenting": ["<areas where evaluators disagreed>", ...]
153
+ }`;
154
+ }
155
+ // ---------------------------------------------------------------------------
156
+ // Internal helpers
157
+ // ---------------------------------------------------------------------------
158
+ function formatDimensions(dims) {
159
+ if (dims.length === 0)
160
+ return 'No dimensions defined. Use general quality assessment.';
161
+ return dims
162
+ .map((d) => `- ${d.name} (weight: ${d.weight}): ${d.description}`)
163
+ .join('\n');
164
+ }
165
+ function formatCriticalRequirements(reqs) {
166
+ if (reqs.length === 0)
167
+ return 'None specified.';
168
+ return reqs.map((r, i) => `${i + 1}. ${r}`).join('\n');
169
+ }
170
+ //# sourceMappingURL=eval-prompts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-prompts.js","sourceRoot":"","sources":["../../../src/server/services/eval-prompts.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6BAA6B;AAC7B,8EAA8E;AAO9E,8EAA8E;AAC9E,4CAA4C;AAC5C,8EAA8E;AAE9E,MAAM,UAAU,gBAAgB,CAC9B,UAAkB,EAClB,QAAkB,EAClB,OAA0B;IAE1B,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IAChE,MAAM,YAAY,GAAG,OAAO,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;QACtD,CAAC,CAAC,uBAAuB,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;QAC/D,CAAC,CAAC,yBAAyB,CAAC;IAE9B,OAAO;;;EAGP,QAAQ,CAAC,MAAM;;;EAGf,QAAQ,CAAC,cAAc;;;EAGvB,0BAA0B,CAAC,QAAQ,CAAC,oBAAoB,CAAC;;;EAGzD,QAAQ,CAAC,iBAAiB,IAAI,0CAA0C;;;EAGxE,UAAU;;;EAGV,YAAY;cACA,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;kBAChE,OAAO,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;oBAC1E,OAAO,CAAC,eAAe,CAAC,MAAM;8BACpB,OAAO,CAAC,wBAAwB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;;;EAG3E,UAAU;;;;;;;;;;;;;;;EAeV,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E;AAE9E,MAAM,UAAU,qBAAqB,CACnC,UAAkB,EAClB,QAAkB,EAClB,YAAyC;IAEzC,MAAM,eAAe,GAAG,YAAY;SACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;SAClD,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACtD,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAE5D,OAAO;;;UAGC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;aACnD,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;;;EAGvE,eAAe,IAAI,6BAA6B;;;EAGhD,UAAU;;;;;;;;;;;;;;;;EAgBV,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAC/B,oBAA4B,EAC5B,gBAAmC,EACnC,WAAmB;IAEnB,MAAM,eAAe,GAAG,gBAAgB;SACrC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,uBAAuB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;SACnD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO,kCAAkC,WAAW;;;EAGpD,oBAAoB;;;EAGpB,eAAe;;;;;;;;;;;;;EAaf,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,UAAU,oBAAoB,CAClC,cAA+C,EAC/C,QAAkB,EAClB,QAAkB;IAElB,MAAM,aAAa,GAAG,cAAc;SACjC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,aAAa,KAAK,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,SAAS,EAAE,CAAC;SACjF,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IAEhE,OAAO;;;EAGP,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,MAAM;;;EAGjC,UAAU;;;QAGJ,QAAQ,CAAC,IAAI;SACZ,QAAQ,CAAC,QAAQ,CAAC,KAAK;;;EAG9B,aAAa;;;;;;;;;;;;;EAab,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,SAAS,gBAAgB,CAAC,IAAiC;IACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,wDAAwD,CAAC;IACvF,OAAO,IAAI;SACR,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;SACjE,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAuB;IACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,iBAAiB,CAAC;IAChD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzD,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { IEvaluator, EvaluationCallbacks } from '../interfaces/evaluator.js';
2
+ import type { Run, Scenario, Provider, Evaluation, EvaluationRequest } from '../types/index.js';
3
+ export declare class EvaluationOrchestrator implements IEvaluator {
4
+ evaluateRun(run: Run, scenario: Scenario, provider: Provider, request: EvaluationRequest, callbacks: EvaluationCallbacks): Promise<Evaluation>;
5
+ private runRound1;
6
+ private runDebateRound;
7
+ private runSynthesis;
8
+ private runQuery;
9
+ }
10
+ //# sourceMappingURL=evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../src/server/services/evaluator.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,UAAU,EAAE,mBAAmB,EAAmB,MAAM,4BAA4B,CAAC;AACnG,OAAO,KAAK,EACV,GAAG,EACH,QAAQ,EACR,QAAQ,EACR,UAAU,EACV,iBAAiB,EAMlB,MAAM,mBAAmB,CAAC;AAyC3B,qBAAa,sBAAuB,YAAW,UAAU;IACjD,WAAW,CACf,GAAG,EAAE,GAAG,EACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,EAC1B,SAAS,EAAE,mBAAmB,GAC7B,OAAO,CAAC,UAAU,CAAC;YAyER,SAAS;YA6BT,cAAc;YAyBd,YAAY;YAiBZ,QAAQ;CA4CvB"}
@@ -0,0 +1,156 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Evaluator — orchestrates evaluation pipeline via SDK query()
3
+ // ---------------------------------------------------------------------------
4
+ import { query } from '@anthropic-ai/claude-agent-sdk';
5
+ import { buildRunEnv } from './env-builder.js';
6
+ import { formatTranscript } from './transcript-formatter.js';
7
+ import { parseAllInstructions } from './instruction-parser.js';
8
+ import { buildScorePrompt, buildCompliancePrompt, buildDebatePrompt, buildSynthesisPrompt, } from './eval-prompts.js';
9
+ import { parseScoreResponse, parseComplianceResponse, parseSynthesisResponse, parseDebateResponse, toIndividualEvaluations, } from './eval-parsers.js';
10
+ import { checkConsensus, buildAnswerComparison, buildCriticalResults, mergeCompliance, } from './eval-helpers.js';
11
+ // ---------------------------------------------------------------------------
12
+ // Implementation
13
+ // ---------------------------------------------------------------------------
14
+ export class EvaluationOrchestrator {
15
+ async evaluateRun(run, scenario, provider, request, callbacks) {
16
+ callbacks.onStatusChange('running');
17
+ callbacks.onProgress('preparing', 'Formatting transcript and parsing instructions...');
18
+ const { text: transcript, summary } = formatTranscript(run.messages);
19
+ const instructions = parseAllInstructions([
20
+ ...scenario.claudeMdFiles.map((c) => ({ content: c.content, source: `CLAUDE.md (${c.role})` })),
21
+ ...scenario.rules.map((r) => ({ content: r.content, source: `rule:${r.name}` })),
22
+ ]);
23
+ const accumulators = request.evaluators.map((e) => ({
24
+ role: e.role, costUsd: 0, tokensIn: 0, tokensOut: 0, rounds: 0,
25
+ scoreResult: {}, complianceResult: {}, assessmentText: '',
26
+ }));
27
+ // ── Round 1 ──────────────────────────────────────────────────────
28
+ callbacks.onProgress('scoring', `Running score and compliance queries (${request.evaluators.length} evaluator${request.evaluators.length > 1 ? 's' : ''})...`);
29
+ const round1Evals = await this.runRound1(request.evaluators, accumulators, transcript, scenario, provider, instructions, summary, callbacks);
30
+ const round1Consensus = request.maxRounds <= 1 || checkConsensus(accumulators);
31
+ const rounds = [{
32
+ roundNumber: 1, evaluations: round1Evals,
33
+ consensusReached: round1Consensus, timestamp: new Date().toISOString(),
34
+ }];
35
+ // ── Multi-round debate ───────────────────────────────────────────
36
+ if (request.maxRounds > 1 && !round1Consensus) {
37
+ for (let roundNum = 2; roundNum <= request.maxRounds; roundNum++) {
38
+ callbacks.onProgress('debate', `Debate round ${roundNum} of ${request.maxRounds}...`);
39
+ const debateEvals = await this.runDebateRound(request.evaluators, accumulators, roundNum, callbacks);
40
+ const consensus = checkConsensus(accumulators);
41
+ rounds.push({
42
+ roundNumber: roundNum, evaluations: debateEvals,
43
+ consensusReached: consensus, timestamp: new Date().toISOString(),
44
+ });
45
+ if (consensus)
46
+ break;
47
+ }
48
+ }
49
+ // ── Synthesis ────────────────────────────────────────────────────
50
+ callbacks.onProgress('synthesis', 'Synthesizing final scores and confidence...');
51
+ const latestRound = rounds[rounds.length - 1];
52
+ const latestEvals = latestRound.evaluations;
53
+ const synthesizer = request.evaluators[request.evaluators.length - 1];
54
+ const synthInfo = { phase: 'synthesis', evaluatorRole: synthesizer.role, roundNumber: rounds.length };
55
+ const synthResult = await this.runSynthesis(synthesizer, latestEvals, scenario, provider, accumulators, callbacks, synthInfo);
56
+ // ── Assemble final evaluation ────────────────────────────────────
57
+ const ledger = accumulators.map((a) => ({
58
+ evaluatorRole: a.role, totalCostUsd: a.costUsd,
59
+ totalTokensIn: a.tokensIn, totalTokensOut: a.tokensOut, roundsParticipated: a.rounds,
60
+ }));
61
+ const now = new Date().toISOString();
62
+ callbacks.onProgress('complete', 'Evaluation finished.');
63
+ callbacks.onStatusChange('completed');
64
+ return {
65
+ id: '', runId: run.id, status: 'completed', evaluators: request.evaluators, rounds,
66
+ answerComparison: buildAnswerComparison(accumulators),
67
+ criticalResults: buildCriticalResults(accumulators, scenario),
68
+ setupCompliance: { instructionCompliance: mergeCompliance(accumulators), skillUsage: [], subagentUsage: [] },
69
+ synthesis: {
70
+ dimensionScores: synthResult.dimensionScores ?? {}, weightedTotal: synthResult.weightedTotal ?? 0,
71
+ confidence: synthResult.confidence ?? 0, dissenting: synthResult.dissenting ?? [],
72
+ },
73
+ ledger, totalCostUsd: ledger.reduce((sum, l) => sum + l.totalCostUsd, 0),
74
+ createdAt: now, updatedAt: now,
75
+ };
76
+ }
77
+ // ─── Round 1: Score + Compliance ─────────────────────────────────────
78
+ async runRound1(evaluators, accumulators, transcript, scenario, provider, instructions, summary, callbacks) {
79
+ const promises = evaluators.map(async (evaluator, idx) => {
80
+ const acc = accumulators[idx];
81
+ acc.rounds++;
82
+ const scoreInfo = { phase: 'score', evaluatorRole: evaluator.role, roundNumber: 1 };
83
+ const scoreResp = await this.runQuery(evaluator, buildScorePrompt(transcript, scenario, summary), callbacks, scoreInfo);
84
+ acc.costUsd += scoreResp.costUsd;
85
+ acc.scoreResult = parseScoreResponse(scoreResp.text);
86
+ acc.assessmentText = scoreResp.text;
87
+ const compInfo = { phase: 'compliance', evaluatorRole: evaluator.role, roundNumber: 1 };
88
+ const compResp = await this.runQuery(evaluator, buildCompliancePrompt(transcript, scenario, instructions), callbacks, compInfo);
89
+ acc.costUsd += compResp.costUsd;
90
+ acc.complianceResult = parseComplianceResponse(compResp.text);
91
+ return toIndividualEvaluations(acc.scoreResult.scores ?? {}, evaluator.role, {});
92
+ });
93
+ return (await Promise.all(promises)).flat();
94
+ }
95
+ // ─── Multi-round debate ──────────────────────────────────────────────
96
+ async runDebateRound(evaluators, accumulators, roundNumber, callbacks) {
97
+ const promises = evaluators.map(async (evaluator, idx) => {
98
+ const acc = accumulators[idx];
99
+ acc.rounds++;
100
+ const others = accumulators.filter((_, i) => i !== idx).map((a) => a.assessmentText);
101
+ const info = { phase: 'debate', evaluatorRole: evaluator.role, roundNumber };
102
+ const resp = await this.runQuery(evaluator, buildDebatePrompt(acc.assessmentText, others, roundNumber), callbacks, info);
103
+ acc.costUsd += resp.costUsd;
104
+ const result = parseDebateResponse(resp.text);
105
+ if (result.updatedScores && Object.keys(result.updatedScores).length > 0) {
106
+ acc.scoreResult = { ...acc.scoreResult, scores: result.updatedScores };
107
+ }
108
+ acc.assessmentText = resp.text;
109
+ return toIndividualEvaluations(acc.scoreResult.scores ?? {}, evaluator.role, {});
110
+ });
111
+ return (await Promise.all(promises)).flat();
112
+ }
113
+ // ─── Synthesis ───────────────────────────────────────────────────────
114
+ async runSynthesis(synthesizer, allEvals, scenario, provider, accumulators, callbacks, info) {
115
+ const resp = await this.runQuery(synthesizer, buildSynthesisPrompt(allEvals, scenario, provider), callbacks, info);
116
+ const acc = accumulators.find((a) => a.role === synthesizer.role);
117
+ if (acc)
118
+ acc.costUsd += resp.costUsd;
119
+ return parseSynthesisResponse(resp.text);
120
+ }
121
+ // ─── SDK query wrapper ───────────────────────────────────────────────
122
+ async runQuery(evaluator, prompt, callbacks, info) {
123
+ const q = query({
124
+ prompt,
125
+ options: {
126
+ env: buildRunEnv(evaluator.provider),
127
+ model: evaluator.provider.model,
128
+ tools: [],
129
+ maxTurns: 3,
130
+ permissionMode: 'dontAsk',
131
+ persistSession: false,
132
+ },
133
+ });
134
+ let resultText = '';
135
+ let costUsd = 0;
136
+ for await (const msg of q) {
137
+ const record = msg;
138
+ const sdkRecord = {
139
+ timestamp: new Date().toISOString(),
140
+ message: record,
141
+ };
142
+ callbacks.onMessage(info, sdkRecord);
143
+ if (record['type'] === 'result') {
144
+ const resultMsg = record;
145
+ costUsd = resultMsg.total_cost_usd ?? 0;
146
+ if (resultMsg.subtype !== 'success') {
147
+ const errorDetail = resultMsg.result ?? 'unknown error';
148
+ throw new Error(`SDK query failed (subtype: ${resultMsg.subtype}): ${errorDetail}`);
149
+ }
150
+ resultText = resultMsg.result ?? '';
151
+ }
152
+ }
153
+ return { text: resultText, costUsd };
154
+ }
155
+ }
156
+ //# sourceMappingURL=evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../src/server/services/evaluator.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+DAA+D;AAC/D,8EAA8E;AAE9E,OAAO,EAAE,KAAK,EAAE,MAAM,gCAAgC,CAAC;AAcvD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC/D,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,cAAc,EACd,qBAAqB,EACrB,oBAAoB,EACpB,eAAe,GAChB,MAAM,mBAAmB,CAAC;AAe3B,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,MAAM,OAAO,sBAAsB;IACjC,KAAK,CAAC,WAAW,CACf,GAAQ,EACR,QAAkB,EAClB,QAAkB,EAClB,OAA0B,EAC1B,SAA8B;QAE9B,SAAS,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;QACpC,SAAS,CAAC,UAAU,CAAC,WAAW,EAAE,mDAAmD,CAAC,CAAC;QAEvF,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACrE,MAAM,YAAY,GAAG,oBAAoB,CAAC;YACxC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;YAC/F,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;SACjF,CAAC,CAAC;QAEH,MAAM,YAAY,GAA2B,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1E,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;YAC9D,WAAW,EAAE,EAAE,EAAE,gBAAgB,EAAE,EAAE,EAAE,cAAc,EAAE,EAAE;SAC1D,CAAC,CAAC,CAAC;QAEJ,oEAAoE;QACpE,SAAS,CAAC,UAAU,CAAC,SAAS,EAAE,yCAAyC,OAAO,CAAC,UAAU,CAAC,MAAM,aAAa,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC/J,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,SAAS,CACtC,OAAO,CAAC,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,SAAS,CACnG,CAAC;QACF,MAAM,eAAe,GAAG,OAAO,CAAC,SAAS,IAAI,CAAC,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAsB,CAAC;gBACjC,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,WAAW;gBACxC,gBAAgB,EAAE,eAAe,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACvE,CAAC,CAAC;QAEH,oEAAoE;QACpE,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC9C,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,IAAI,OAAO,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,CAAC;gBACjE,SAAS,CAAC,UAAU,CAAC,QAAQ,EAAE,gBAAgB,QAAQ,OAAO,OAAO,CAAC,SAAS,KAAK,CAAC,CAAC;gBACtF,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;gBACrG,MAAM,SAAS,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;gBAC/C,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW;oBAC/C,gBAAgB,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACjE,CAAC,CAAC;gBACH,IAAI,SAAS;oBAAE,MAAM;YACvB,CAAC;QACH,CAAC;QAED,oEAAoE;QACpE,SAAS,CAAC,UAAU,CAAC,WAAW,EAAE,6CAA6C,CAAC,CAAC;QACjF,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9C,MAAM,WAAW,GAAG,WAAW,CAAC,WAAW,CAAC;QAC5C,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACtE,MAAM,SAAS,GAAoB,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;QACvH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAE9H,oEAAoE;QACpE,MAAM,MAAM,GAAsB,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzD,aAAa,EAAE,CAAC,CAAC,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC,OAAO;YAC9C,aAAa,EAAE,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC,MAAM;SACrF,CAAC,CAAC,CAAC;QACJ,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,SAAS,CAAC,UAAU,CAAC,UAAU,EAAE,sBAAsB,CAAC,CAAC;QACzD,SAAS,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAEtC,OAAO;YACL,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,EAAE,MAAM;YAClF,gBAAgB,EAAE,qBAAqB,CAAC,YAAY,CAAC;YACrD,eAAe,EAAE,oBAAoB,CAAC,YAAY,EAAE,QAAQ,CAAC;YAC7D,eAAe,EAAE,EAAE,qBAAqB,EAAE,eAAe,CAAC,YAAY,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE;YAC5G,SAAS,EAAE;gBACT,eAAe,EAAE,WAAW,CAAC,eAAe,IAAI,EAAE,EAAE,aAAa,EAAE,WAAW,CAAC,aAAa,IAAI,CAAC;gBACjG,UAAU,EAAE,WAAW,CAAC,UAAU,IAAI,CAAC,EAAE,UAAU,EAAE,WAAW,CAAC,UAAU,IAAI,EAAE;aAClF;YACD,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;YACxE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG;SAC/B,CAAC;IACJ,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,SAAS,CACrB,UAAsC,EACtC,YAAoC,EACpC,UAAkB,EAClB,QAAkB,EAClB,QAAkB,EAClB,YAAqD,EACrD,OAAuD,EACvD,SAA8B;QAE9B,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE;YACvD,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YAC9B,GAAG,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,SAAS,GAAoB,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;YACrG,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,gBAAgB,CAAC,UAAU,EAAE,QAAQ,EAAE,OAAO,CAAC,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YACxH,GAAG,CAAC,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC;YACjC,GAAG,CAAC,WAAW,GAAG,kBAAkB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACrD,GAAG,CAAC,cAAc,GAAG,SAAS,CAAC,IAAI,CAAC;YACpC,MAAM,QAAQ,GAAoB,EAAE,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;YACzG,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;YAChI,GAAG,CAAC,OAAO,IAAI,QAAQ,CAAC,OAAO,CAAC;YAChC,GAAG,CAAC,gBAAgB,GAAG,uBAAuB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC9D,OAAO,uBAAuB,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,EAAE,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,cAAc,CAC1B,UAAsC,EACtC,YAAoC,EACpC,WAAmB,EACnB,SAA8B;QAE9B,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE;YACvD,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YAC9B,GAAG,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;YACrF,MAAM,IAAI,GAAoB,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC;YAC9F,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,iBAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;YACzH,GAAG,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC;YAC5B,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,MAAM,CAAC,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzE,GAAG,CAAC,WAAW,GAAG,EAAE,GAAG,GAAG,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,CAAC,aAAa,EAAE,CAAC;YACzE,CAAC;YACD,GAAG,CAAC,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC;YAC/B,OAAO,uBAAuB,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,EAAE,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,YAAY,CACxB,WAA4B,EAC5B,QAAyC,EACzC,QAAkB,EAClB,QAAkB,EAClB,YAAoC,EACpC,SAA8B,EAC9B,IAAqB;QAErB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QACnH,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,IAAI,CAAC,CAAC;QAClE,IAAI,GAAG;YAAE,GAAG,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC;QACrC,OAAO,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,QAAQ,CACpB,SAA0B,EAC1B,MAAc,EACd,SAA8B,EAC9B,IAAqB;QAErB,MAAM,CAAC,GAAG,KAAK,CAAC;YACd,MAAM;YACN,OAAO,EAAE;gBACP,GAAG,EAAE,WAAW,CAAC,SAAS,CAAC,QAAQ,CAAC;gBACpC,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,KAAK;gBAC/B,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,CAAC;gBACX,cAAc,EAAE,SAAS;gBACzB,cAAc,EAAE,KAAK;aACtB;SACF,CAAC,CAAC;QAEH,IAAI,UAAU,GAAG,EAAE,CAAC;QACpB,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,MAAM,GAAG,GAAyC,CAAC;YACzD,MAAM,SAAS,GAAqB;gBAClC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,OAAO,EAAE,MAAM;aAChB,CAAC;YACF,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAErC,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAChC,MAAM,SAAS,GAAG,MAAuC,CAAC;gBAC1D,OAAO,GAAG,SAAS,CAAC,cAAc,IAAI,CAAC,CAAC;gBACxC,IAAI,SAAS,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;oBACpC,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,IAAI,eAAe,CAAC;oBACxD,MAAM,IAAI,KAAK,CACb,8BAA8B,SAAS,CAAC,OAAO,MAAM,WAAW,EAAE,CACnE,CAAC;gBACJ,CAAC;gBACD,UAAU,GAAG,SAAS,CAAC,MAAM,IAAI,EAAE,CAAC;YACtC,CAAC;QACH,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC;IACvC,CAAC;CACF"}
@@ -0,0 +1,20 @@
1
+ /** File system abstraction for testing */
2
+ export interface FsAdapter {
3
+ mkdir(dirPath: string, opts: {
4
+ recursive: boolean;
5
+ }): Promise<void>;
6
+ writeFile(filePath: string, data: string, opts?: {
7
+ mode?: number;
8
+ }): Promise<void>;
9
+ readFile(filePath: string, encoding: BufferEncoding): Promise<string>;
10
+ readdir(dirPath: string): Promise<string[]>;
11
+ unlink(filePath: string): Promise<void>;
12
+ rename(oldPath: string, newPath: string): Promise<void>;
13
+ stat(filePath: string): Promise<{
14
+ isFile(): boolean;
15
+ }>;
16
+ access(filePath: string): Promise<void>;
17
+ }
18
+ /** Default adapter that delegates to node:fs/promises */
19
+ export declare const defaultFs: FsAdapter;
20
+ //# sourceMappingURL=fs-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fs-adapter.d.ts","sourceRoot":"","sources":["../../../src/server/services/fs-adapter.ts"],"names":[],"mappings":"AAEA,0CAA0C;AAC1C,MAAM,WAAW,SAAS;IACxB,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnF,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACtE,OAAO,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,MAAM,IAAI,OAAO,CAAA;KAAE,CAAC,CAAC;IACvD,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC;AAED,yDAAyD;AACzD,eAAO,MAAM,SAAS,EAAE,SAUvB,CAAC"}
@@ -0,0 +1,13 @@
1
+ import fs from 'node:fs/promises';
2
+ /** Default adapter that delegates to node:fs/promises */
3
+ export const defaultFs = {
4
+ mkdir: (dirPath, opts) => fs.mkdir(dirPath, opts).then(() => undefined),
5
+ writeFile: (filePath, data, opts) => fs.writeFile(filePath, data, opts).then(() => undefined),
6
+ readFile: (filePath, encoding) => fs.readFile(filePath, encoding),
7
+ readdir: (dirPath) => fs.readdir(dirPath).then((entries) => entries.map(String)),
8
+ unlink: (filePath) => fs.unlink(filePath),
9
+ rename: (oldPath, newPath) => fs.rename(oldPath, newPath),
10
+ stat: (filePath) => fs.stat(filePath),
11
+ access: (filePath) => fs.access(filePath),
12
+ };
13
+ //# sourceMappingURL=fs-adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fs-adapter.js","sourceRoot":"","sources":["../../../src/server/services/fs-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAclC,yDAAyD;AACzD,MAAM,CAAC,MAAM,SAAS,GAAc;IAClC,KAAK,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC;IACvE,SAAS,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,CAClC,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC;IAC1D,QAAQ,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACjE,OAAO,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChF,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC;IACzC,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC;IACzD,IAAI,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC;IACrC,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC;CAC1C,CAAC"}