model-test-bench 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +389 -0
  3. package/dist/bin/mtb.d.ts +3 -0
  4. package/dist/bin/mtb.d.ts.map +1 -0
  5. package/dist/bin/mtb.js +148 -0
  6. package/dist/bin/mtb.js.map +1 -0
  7. package/dist/server/index.d.ts +13 -0
  8. package/dist/server/index.d.ts.map +1 -0
  9. package/dist/server/index.js +72 -0
  10. package/dist/server/index.js.map +1 -0
  11. package/dist/server/interfaces/evaluator.d.ts +15 -0
  12. package/dist/server/interfaces/evaluator.d.ts.map +1 -0
  13. package/dist/server/interfaces/evaluator.js +2 -0
  14. package/dist/server/interfaces/evaluator.js.map +1 -0
  15. package/dist/server/interfaces/logger.d.ts +9 -0
  16. package/dist/server/interfaces/logger.d.ts.map +1 -0
  17. package/dist/server/interfaces/logger.js +2 -0
  18. package/dist/server/interfaces/logger.js.map +1 -0
  19. package/dist/server/interfaces/runner.d.ts +9 -0
  20. package/dist/server/interfaces/runner.d.ts.map +1 -0
  21. package/dist/server/interfaces/runner.js +2 -0
  22. package/dist/server/interfaces/runner.js.map +1 -0
  23. package/dist/server/interfaces/storage.d.ts +36 -0
  24. package/dist/server/interfaces/storage.d.ts.map +1 -0
  25. package/dist/server/interfaces/storage.js +2 -0
  26. package/dist/server/interfaces/storage.js.map +1 -0
  27. package/dist/server/routes/eval-queue.d.ts +23 -0
  28. package/dist/server/routes/eval-queue.d.ts.map +1 -0
  29. package/dist/server/routes/eval-queue.js +45 -0
  30. package/dist/server/routes/eval-queue.js.map +1 -0
  31. package/dist/server/routes/evaluations.d.ts +8 -0
  32. package/dist/server/routes/evaluations.d.ts.map +1 -0
  33. package/dist/server/routes/evaluations.js +221 -0
  34. package/dist/server/routes/evaluations.js.map +1 -0
  35. package/dist/server/routes/providers.d.ts +5 -0
  36. package/dist/server/routes/providers.d.ts.map +1 -0
  37. package/dist/server/routes/providers.js +179 -0
  38. package/dist/server/routes/providers.js.map +1 -0
  39. package/dist/server/routes/run-queue.d.ts +17 -0
  40. package/dist/server/routes/run-queue.d.ts.map +1 -0
  41. package/dist/server/routes/run-queue.js +34 -0
  42. package/dist/server/routes/run-queue.js.map +1 -0
  43. package/dist/server/routes/run-sse.d.ts +18 -0
  44. package/dist/server/routes/run-sse.d.ts.map +1 -0
  45. package/dist/server/routes/run-sse.js +57 -0
  46. package/dist/server/routes/run-sse.js.map +1 -0
  47. package/dist/server/routes/runs.d.ts +9 -0
  48. package/dist/server/routes/runs.d.ts.map +1 -0
  49. package/dist/server/routes/runs.js +380 -0
  50. package/dist/server/routes/runs.js.map +1 -0
  51. package/dist/server/routes/scenarios.d.ts +5 -0
  52. package/dist/server/routes/scenarios.d.ts.map +1 -0
  53. package/dist/server/routes/scenarios.js +181 -0
  54. package/dist/server/routes/scenarios.js.map +1 -0
  55. package/dist/server/services/eval-helpers.d.ts +22 -0
  56. package/dist/server/services/eval-helpers.d.ts.map +1 -0
  57. package/dist/server/services/eval-helpers.js +75 -0
  58. package/dist/server/services/eval-helpers.js.map +1 -0
  59. package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
  60. package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  61. package/dist/server/services/eval-parsers-debate-impl.js +133 -0
  62. package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
  63. package/dist/server/services/eval-parsers.d.ts +24 -0
  64. package/dist/server/services/eval-parsers.d.ts.map +1 -0
  65. package/dist/server/services/eval-parsers.js +153 -0
  66. package/dist/server/services/eval-parsers.js.map +1 -0
  67. package/dist/server/services/eval-prompts.d.ts +9 -0
  68. package/dist/server/services/eval-prompts.d.ts.map +1 -0
  69. package/dist/server/services/eval-prompts.js +164 -0
  70. package/dist/server/services/eval-prompts.js.map +1 -0
  71. package/dist/server/services/evaluator.d.ts +10 -0
  72. package/dist/server/services/evaluator.d.ts.map +1 -0
  73. package/dist/server/services/evaluator.js +143 -0
  74. package/dist/server/services/evaluator.js.map +1 -0
  75. package/dist/server/services/fs-adapter.d.ts +20 -0
  76. package/dist/server/services/fs-adapter.d.ts.map +1 -0
  77. package/dist/server/services/fs-adapter.js +13 -0
  78. package/dist/server/services/fs-adapter.js.map +1 -0
  79. package/dist/server/services/instruction-parser.d.ts +26 -0
  80. package/dist/server/services/instruction-parser.d.ts.map +1 -0
  81. package/dist/server/services/instruction-parser.js +121 -0
  82. package/dist/server/services/instruction-parser.js.map +1 -0
  83. package/dist/server/services/log-rotator.d.ts +20 -0
  84. package/dist/server/services/log-rotator.d.ts.map +1 -0
  85. package/dist/server/services/log-rotator.js +60 -0
  86. package/dist/server/services/log-rotator.js.map +1 -0
  87. package/dist/server/services/logger.d.ts +15 -0
  88. package/dist/server/services/logger.d.ts.map +1 -0
  89. package/dist/server/services/logger.js +69 -0
  90. package/dist/server/services/logger.js.map +1 -0
  91. package/dist/server/services/model-factory.d.ts +10 -0
  92. package/dist/server/services/model-factory.d.ts.map +1 -0
  93. package/dist/server/services/model-factory.js +33 -0
  94. package/dist/server/services/model-factory.js.map +1 -0
  95. package/dist/server/services/runner.d.ts +9 -0
  96. package/dist/server/services/runner.d.ts.map +1 -0
  97. package/dist/server/services/runner.js +99 -0
  98. package/dist/server/services/runner.js.map +1 -0
  99. package/dist/server/services/seeder.d.ts +5 -0
  100. package/dist/server/services/seeder.d.ts.map +1 -0
  101. package/dist/server/services/seeder.js +79 -0
  102. package/dist/server/services/seeder.js.map +1 -0
  103. package/dist/server/services/storage-test-helpers.d.ts +15 -0
  104. package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
  105. package/dist/server/services/storage-test-helpers.js +151 -0
  106. package/dist/server/services/storage-test-helpers.js.map +1 -0
  107. package/dist/server/services/storage.d.ts +35 -0
  108. package/dist/server/services/storage.d.ts.map +1 -0
  109. package/dist/server/services/storage.js +219 -0
  110. package/dist/server/services/storage.js.map +1 -0
  111. package/dist/server/services/tools.d.ts +6 -0
  112. package/dist/server/services/tools.d.ts.map +1 -0
  113. package/dist/server/services/tools.js +94 -0
  114. package/dist/server/services/tools.js.map +1 -0
  115. package/dist/server/services/transcript-formatter.d.ts +18 -0
  116. package/dist/server/services/transcript-formatter.d.ts.map +1 -0
  117. package/dist/server/services/transcript-formatter.js +227 -0
  118. package/dist/server/services/transcript-formatter.js.map +1 -0
  119. package/dist/server/services/update-checker.d.ts +3 -0
  120. package/dist/server/services/update-checker.d.ts.map +1 -0
  121. package/dist/server/services/update-checker.js +34 -0
  122. package/dist/server/services/update-checker.js.map +1 -0
  123. package/dist/server/types/evaluation.d.ts +94 -0
  124. package/dist/server/types/evaluation.d.ts.map +1 -0
  125. package/dist/server/types/evaluation.js +5 -0
  126. package/dist/server/types/evaluation.js.map +1 -0
  127. package/dist/server/types/index.d.ts +5 -0
  128. package/dist/server/types/index.d.ts.map +1 -0
  129. package/dist/server/types/index.js +5 -0
  130. package/dist/server/types/index.js.map +1 -0
  131. package/dist/server/types/provider.d.ts +23 -0
  132. package/dist/server/types/provider.d.ts.map +1 -0
  133. package/dist/server/types/provider.js +5 -0
  134. package/dist/server/types/provider.js.map +1 -0
  135. package/dist/server/types/run.d.ts +31 -0
  136. package/dist/server/types/run.d.ts.map +1 -0
  137. package/dist/server/types/run.js +5 -0
  138. package/dist/server/types/run.js.map +1 -0
  139. package/dist/server/types/scenario.d.ts +19 -0
  140. package/dist/server/types/scenario.d.ts.map +1 -0
  141. package/dist/server/types/scenario.js +5 -0
  142. package/dist/server/types/scenario.js.map +1 -0
  143. package/dist/src/server/index.d.ts +13 -0
  144. package/dist/src/server/index.d.ts.map +1 -0
  145. package/dist/src/server/index.js +72 -0
  146. package/dist/src/server/index.js.map +1 -0
  147. package/dist/src/server/interfaces/evaluator.d.ts +15 -0
  148. package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
  149. package/dist/src/server/interfaces/evaluator.js +2 -0
  150. package/dist/src/server/interfaces/evaluator.js.map +1 -0
  151. package/dist/src/server/interfaces/logger.d.ts +9 -0
  152. package/dist/src/server/interfaces/logger.d.ts.map +1 -0
  153. package/dist/src/server/interfaces/logger.js +2 -0
  154. package/dist/src/server/interfaces/logger.js.map +1 -0
  155. package/dist/src/server/interfaces/runner.d.ts +9 -0
  156. package/dist/src/server/interfaces/runner.d.ts.map +1 -0
  157. package/dist/src/server/interfaces/runner.js +2 -0
  158. package/dist/src/server/interfaces/runner.js.map +1 -0
  159. package/dist/src/server/interfaces/storage.d.ts +36 -0
  160. package/dist/src/server/interfaces/storage.d.ts.map +1 -0
  161. package/dist/src/server/interfaces/storage.js +2 -0
  162. package/dist/src/server/interfaces/storage.js.map +1 -0
  163. package/dist/src/server/routes/eval-queue.d.ts +23 -0
  164. package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
  165. package/dist/src/server/routes/eval-queue.js +45 -0
  166. package/dist/src/server/routes/eval-queue.js.map +1 -0
  167. package/dist/src/server/routes/evaluations.d.ts +8 -0
  168. package/dist/src/server/routes/evaluations.d.ts.map +1 -0
  169. package/dist/src/server/routes/evaluations.js +221 -0
  170. package/dist/src/server/routes/evaluations.js.map +1 -0
  171. package/dist/src/server/routes/providers.d.ts +5 -0
  172. package/dist/src/server/routes/providers.d.ts.map +1 -0
  173. package/dist/src/server/routes/providers.js +179 -0
  174. package/dist/src/server/routes/providers.js.map +1 -0
  175. package/dist/src/server/routes/run-queue.d.ts +17 -0
  176. package/dist/src/server/routes/run-queue.d.ts.map +1 -0
  177. package/dist/src/server/routes/run-queue.js +34 -0
  178. package/dist/src/server/routes/run-queue.js.map +1 -0
  179. package/dist/src/server/routes/run-sse.d.ts +18 -0
  180. package/dist/src/server/routes/run-sse.d.ts.map +1 -0
  181. package/dist/src/server/routes/run-sse.js +57 -0
  182. package/dist/src/server/routes/run-sse.js.map +1 -0
  183. package/dist/src/server/routes/runs.d.ts +9 -0
  184. package/dist/src/server/routes/runs.d.ts.map +1 -0
  185. package/dist/src/server/routes/runs.js +380 -0
  186. package/dist/src/server/routes/runs.js.map +1 -0
  187. package/dist/src/server/routes/scenarios.d.ts +5 -0
  188. package/dist/src/server/routes/scenarios.d.ts.map +1 -0
  189. package/dist/src/server/routes/scenarios.js +181 -0
  190. package/dist/src/server/routes/scenarios.js.map +1 -0
  191. package/dist/src/server/services/eval-helpers.d.ts +22 -0
  192. package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
  193. package/dist/src/server/services/eval-helpers.js +75 -0
  194. package/dist/src/server/services/eval-helpers.js.map +1 -0
  195. package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
  196. package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
  197. package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
  198. package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
  199. package/dist/src/server/services/eval-parsers.d.ts +24 -0
  200. package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
  201. package/dist/src/server/services/eval-parsers.js +153 -0
  202. package/dist/src/server/services/eval-parsers.js.map +1 -0
  203. package/dist/src/server/services/eval-prompts.d.ts +9 -0
  204. package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
  205. package/dist/src/server/services/eval-prompts.js +164 -0
  206. package/dist/src/server/services/eval-prompts.js.map +1 -0
  207. package/dist/src/server/services/evaluator.d.ts +10 -0
  208. package/dist/src/server/services/evaluator.d.ts.map +1 -0
  209. package/dist/src/server/services/evaluator.js +143 -0
  210. package/dist/src/server/services/evaluator.js.map +1 -0
  211. package/dist/src/server/services/fs-adapter.d.ts +20 -0
  212. package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
  213. package/dist/src/server/services/fs-adapter.js +13 -0
  214. package/dist/src/server/services/fs-adapter.js.map +1 -0
  215. package/dist/src/server/services/instruction-parser.d.ts +26 -0
  216. package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
  217. package/dist/src/server/services/instruction-parser.js +121 -0
  218. package/dist/src/server/services/instruction-parser.js.map +1 -0
  219. package/dist/src/server/services/log-rotator.d.ts +20 -0
  220. package/dist/src/server/services/log-rotator.d.ts.map +1 -0
  221. package/dist/src/server/services/log-rotator.js +60 -0
  222. package/dist/src/server/services/log-rotator.js.map +1 -0
  223. package/dist/src/server/services/logger.d.ts +15 -0
  224. package/dist/src/server/services/logger.d.ts.map +1 -0
  225. package/dist/src/server/services/logger.js +69 -0
  226. package/dist/src/server/services/logger.js.map +1 -0
  227. package/dist/src/server/services/model-factory.d.ts +10 -0
  228. package/dist/src/server/services/model-factory.d.ts.map +1 -0
  229. package/dist/src/server/services/model-factory.js +33 -0
  230. package/dist/src/server/services/model-factory.js.map +1 -0
  231. package/dist/src/server/services/runner.d.ts +9 -0
  232. package/dist/src/server/services/runner.d.ts.map +1 -0
  233. package/dist/src/server/services/runner.js +99 -0
  234. package/dist/src/server/services/runner.js.map +1 -0
  235. package/dist/src/server/services/seeder.d.ts +5 -0
  236. package/dist/src/server/services/seeder.d.ts.map +1 -0
  237. package/dist/src/server/services/seeder.js +79 -0
  238. package/dist/src/server/services/seeder.js.map +1 -0
  239. package/dist/src/server/services/storage.d.ts +35 -0
  240. package/dist/src/server/services/storage.d.ts.map +1 -0
  241. package/dist/src/server/services/storage.js +219 -0
  242. package/dist/src/server/services/storage.js.map +1 -0
  243. package/dist/src/server/services/tools.d.ts +6 -0
  244. package/dist/src/server/services/tools.d.ts.map +1 -0
  245. package/dist/src/server/services/tools.js +94 -0
  246. package/dist/src/server/services/tools.js.map +1 -0
  247. package/dist/src/server/services/transcript-formatter.d.ts +18 -0
  248. package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
  249. package/dist/src/server/services/transcript-formatter.js +227 -0
  250. package/dist/src/server/services/transcript-formatter.js.map +1 -0
  251. package/dist/src/server/services/update-checker.d.ts +3 -0
  252. package/dist/src/server/services/update-checker.d.ts.map +1 -0
  253. package/dist/src/server/services/update-checker.js +34 -0
  254. package/dist/src/server/services/update-checker.js.map +1 -0
  255. package/dist/src/server/types/evaluation.d.ts +94 -0
  256. package/dist/src/server/types/evaluation.d.ts.map +1 -0
  257. package/dist/src/server/types/evaluation.js +5 -0
  258. package/dist/src/server/types/evaluation.js.map +1 -0
  259. package/dist/src/server/types/index.d.ts +5 -0
  260. package/dist/src/server/types/index.d.ts.map +1 -0
  261. package/dist/src/server/types/index.js +5 -0
  262. package/dist/src/server/types/index.js.map +1 -0
  263. package/dist/src/server/types/provider.d.ts +23 -0
  264. package/dist/src/server/types/provider.d.ts.map +1 -0
  265. package/dist/src/server/types/provider.js +5 -0
  266. package/dist/src/server/types/provider.js.map +1 -0
  267. package/dist/src/server/types/run.d.ts +31 -0
  268. package/dist/src/server/types/run.d.ts.map +1 -0
  269. package/dist/src/server/types/run.js +5 -0
  270. package/dist/src/server/types/run.js.map +1 -0
  271. package/dist/src/server/types/scenario.d.ts +19 -0
  272. package/dist/src/server/types/scenario.d.ts.map +1 -0
  273. package/dist/src/server/types/scenario.js +5 -0
  274. package/dist/src/server/types/scenario.js.map +1 -0
  275. package/dist/web/assets/index-AJu1Yn5F.js +70 -0
  276. package/dist/web/assets/index-C_ioEISr.css +1 -0
  277. package/dist/web/index.html +15 -0
  278. package/docs/schemas/provider-api.example.json +12 -0
  279. package/docs/schemas/provider-openai.example.json +11 -0
  280. package/docs/schemas/scenario-baseline.example.json +24 -0
  281. package/docs/schemas/scenario-carwash-baseline.example.json +22 -0
  282. package/docs/schemas/scenario-carwash-with-system-prompt.example.json +24 -0
  283. package/docs/schemas/scenario-golden-rules-baseline.example.json +24 -0
  284. package/docs/schemas/scenario-golden-rules-with-system-prompt.example.json +28 -0
  285. package/docs/schemas/scenario-negative-analysis-baseline.example.json +23 -0
  286. package/docs/schemas/scenario-negative-analysis-with-system-prompt.example.json +25 -0
  287. package/docs/schemas/scenario-with-system-prompt.example.json +25 -0
  288. package/package.json +97 -0
@@ -0,0 +1,227 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Transcript Formatter — converts SDK messages into evaluator-readable text
3
+ // ---------------------------------------------------------------------------
4
+ const MAX_TRANSCRIPT_CHARS = 100_000;
5
+ const TRUNCATED_MARKER = '\n[transcript truncated]';
6
+ const MAX_TOOL_INPUT_CHARS = 500;
7
+ const MAX_TOOL_OUTPUT_CHARS = 500;
8
+ /** Format SDK messages into a readable transcript and structured summary. */
9
+ export function formatTranscript(messages) {
10
+ const lines = [];
11
+ const toolCalls = [];
12
+ const filesRead = new Set();
13
+ const filesModified = new Set();
14
+ const commandFailures = [];
15
+ const retryPatterns = [];
16
+ let askedClarifyingQuestions = false;
17
+ let totalChars = 0;
18
+ let truncated = false;
19
+ for (const record of messages) {
20
+ if (truncated)
21
+ break;
22
+ const msg = record.message;
23
+ const msgType = msg['type'];
24
+ if (msgType === 'text-delta') {
25
+ // Streaming text chunk — accumulate for transcript
26
+ const text = msg['text'];
27
+ if (text) {
28
+ const r = appendLine(lines, `[Text] ${text}`, totalChars);
29
+ totalChars = r.totalChars;
30
+ truncated = r.truncated;
31
+ if (looksLikeQuestion(text))
32
+ askedClarifyingQuestions = true;
33
+ }
34
+ }
35
+ else if (msgType === 'reasoning-delta') {
36
+ const text = msg['text'];
37
+ if (text) {
38
+ const r = appendLine(lines, `[Thinking] ${text}`, totalChars);
39
+ totalChars = r.totalChars;
40
+ truncated = r.truncated;
41
+ }
42
+ }
43
+ else if (msgType === 'tool-call') {
44
+ const name = msg['toolName'] ?? 'unknown_tool';
45
+ const args = truncateStr(JSON.stringify(msg['args'] ?? ''), MAX_TOOL_INPUT_CHARS);
46
+ toolCalls.push(name);
47
+ trackFileAccess(name, msg, filesRead, filesModified);
48
+ const r = appendLine(lines, `[Tool Call] ${name}: ${args}`, totalChars);
49
+ totalChars = r.totalChars;
50
+ truncated = r.truncated;
51
+ }
52
+ else if (msgType === 'tool-result') {
53
+ const output = truncateStr(String(msg['result'] ?? ''), MAX_TOOL_OUTPUT_CHARS);
54
+ const r = appendLine(lines, `[Tool Result] ${output}`, totalChars);
55
+ totalChars = r.totalChars;
56
+ truncated = r.truncated;
57
+ }
58
+ else if (msgType === 'step') {
59
+ // AI SDK step format: { text, toolCalls, toolResults, usage }
60
+ const stepText = msg['text'];
61
+ if (stepText) {
62
+ const r = appendLine(lines, `[Assistant] ${stepText}`, totalChars);
63
+ totalChars = r.totalChars;
64
+ truncated = r.truncated;
65
+ if (looksLikeQuestion(stepText))
66
+ askedClarifyingQuestions = true;
67
+ }
68
+ const calls = msg['toolCalls'];
69
+ if (Array.isArray(calls)) {
70
+ for (const call of calls) {
71
+ if (truncated)
72
+ break;
73
+ const name = call['toolName'] ?? 'unknown_tool';
74
+ const input = truncateStr(JSON.stringify(call['args'] ?? ''), MAX_TOOL_INPUT_CHARS);
75
+ toolCalls.push(name);
76
+ trackFileAccess(name, call, filesRead, filesModified);
77
+ const r = appendLine(lines, `[Tool Call] ${name}: ${input}`, totalChars);
78
+ totalChars = r.totalChars;
79
+ truncated = r.truncated;
80
+ }
81
+ }
82
+ const results = msg['toolResults'];
83
+ if (Array.isArray(results)) {
84
+ for (const tr of results) {
85
+ if (truncated)
86
+ break;
87
+ const output = truncateStr(String(tr['result'] ?? ''), MAX_TOOL_OUTPUT_CHARS);
88
+ const r = appendLine(lines, `[Tool Result] ${output}`, totalChars);
89
+ totalChars = r.totalChars;
90
+ truncated = r.truncated;
91
+ }
92
+ }
93
+ }
94
+ else if (msgType === 'assistant') {
95
+ const formatted = formatAssistantMessage(msg);
96
+ if (formatted) {
97
+ const result = appendLine(lines, formatted, totalChars);
98
+ totalChars = result.totalChars;
99
+ truncated = result.truncated;
100
+ if (looksLikeQuestion(formatted)) {
101
+ askedClarifyingQuestions = true;
102
+ }
103
+ }
104
+ }
105
+ else if (msgType === 'tool_use') {
106
+ const name = msg['name'] ?? 'unknown_tool';
107
+ const input = truncateStr(JSON.stringify(msg['input'] ?? ''), MAX_TOOL_INPUT_CHARS);
108
+ toolCalls.push(name);
109
+ trackFileAccess(name, msg, filesRead, filesModified);
110
+ const line = `[Tool Call] ${name}: ${input}`;
111
+ const result = appendLine(lines, line, totalChars);
112
+ totalChars = result.totalChars;
113
+ truncated = result.truncated;
114
+ }
115
+ else if (msgType === 'tool_result') {
116
+ const output = truncateStr(extractToolResultText(msg), MAX_TOOL_OUTPUT_CHARS);
117
+ const isError = msg['is_error'] === true;
118
+ if (isError) {
119
+ commandFailures.push(output);
120
+ detectRetryPattern(toolCalls, retryPatterns);
121
+ }
122
+ const prefix = isError ? '[Tool Error]' : '[Tool Result]';
123
+ const line = `${prefix} ${output}`;
124
+ const result = appendLine(lines, line, totalChars);
125
+ totalChars = result.totalChars;
126
+ truncated = result.truncated;
127
+ }
128
+ else if (msgType === 'result') {
129
+ const resultText = msg['result'] ?? '';
130
+ if (resultText) {
131
+ const line = `[Final Result] ${resultText}`;
132
+ const result = appendLine(lines, line, totalChars);
133
+ totalChars = result.totalChars;
134
+ truncated = result.truncated;
135
+ }
136
+ }
137
+ }
138
+ if (truncated) {
139
+ lines.push(TRUNCATED_MARKER);
140
+ }
141
+ return {
142
+ text: lines.join('\n'),
143
+ summary: {
144
+ toolCallSequence: toolCalls,
145
+ filesRead: [...filesRead],
146
+ filesModified: [...filesModified],
147
+ commandFailures,
148
+ retryPatterns,
149
+ askedClarifyingQuestions,
150
+ },
151
+ };
152
+ }
153
+ // ---------------------------------------------------------------------------
154
+ // Internal helpers
155
+ // ---------------------------------------------------------------------------
156
+ function appendLine(lines, line, totalChars) {
157
+ const newTotal = totalChars + line.length + 1; // +1 for newline
158
+ if (newTotal > MAX_TRANSCRIPT_CHARS) {
159
+ return { totalChars, truncated: true };
160
+ }
161
+ lines.push(line);
162
+ return { totalChars: newTotal, truncated: false };
163
+ }
164
+ function formatAssistantMessage(msg) {
165
+ const content = msg['message'];
166
+ if (!content)
167
+ return undefined;
168
+ const contentBlocks = content['content'];
169
+ if (!Array.isArray(contentBlocks))
170
+ return undefined;
171
+ const textParts = [];
172
+ for (const block of contentBlocks) {
173
+ if (block['type'] === 'text' && typeof block['text'] === 'string') {
174
+ textParts.push(block['text']);
175
+ }
176
+ }
177
+ return textParts.length > 0 ? `[Assistant] ${textParts.join(' ')}` : undefined;
178
+ }
179
+ function extractToolResultText(msg) {
180
+ const content = msg['content'];
181
+ if (typeof content === 'string')
182
+ return content;
183
+ if (Array.isArray(content)) {
184
+ const parts = [];
185
+ for (const block of content) {
186
+ if (block['type'] === 'text' && typeof block['text'] === 'string') {
187
+ parts.push(block['text']);
188
+ }
189
+ }
190
+ return parts.join(' ');
191
+ }
192
+ return '';
193
+ }
194
+ function truncateStr(s, max) {
195
+ return s.length > max ? s.slice(0, max) + '...' : s;
196
+ }
197
+ function looksLikeQuestion(text) {
198
+ return /\?\s*$/.test(text.trim());
199
+ }
200
+ function trackFileAccess(toolName, msg, filesRead, filesModified) {
201
+ // Support both Claude SDK format (msg.input) and AI SDK format (msg.args)
202
+ const input = (msg['input'] ?? msg['args']);
203
+ if (!input)
204
+ return;
205
+ const filePath = (input['file_path'] ?? input['path'] ?? input['filename']);
206
+ if (!filePath)
207
+ return;
208
+ if (toolName === 'Read' || toolName === 'Glob' || toolName === 'Grep') {
209
+ filesRead.add(filePath);
210
+ }
211
+ else if (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit') {
212
+ filesModified.add(filePath);
213
+ }
214
+ }
215
+ function detectRetryPattern(toolCalls, retryPatterns) {
216
+ if (toolCalls.length < 2)
217
+ return;
218
+ const last = toolCalls[toolCalls.length - 1];
219
+ const prev = toolCalls[toolCalls.length - 2];
220
+ if (last === prev) {
221
+ const pattern = `Repeated ${last} after error`;
222
+ if (!retryPatterns.includes(pattern)) {
223
+ retryPatterns.push(pattern);
224
+ }
225
+ }
226
+ }
227
+ //# sourceMappingURL=transcript-formatter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcript-formatter.js","sourceRoot":"","sources":["../../../../src/server/services/transcript-formatter.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAI9E,MAAM,oBAAoB,GAAG,OAAO,CAAC;AACrC,MAAM,gBAAgB,GAAG,0BAA0B,CAAC;AACpD,MAAM,oBAAoB,GAAG,GAAG,CAAC;AACjC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAkBlC,6EAA6E;AAC7E,MAAM,UAAU,gBAAgB,CAAC,QAAqC;IACpE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAgB,IAAI,GAAG,EAAE,CAAC;IACzC,MAAM,aAAa,GAAgB,IAAI,GAAG,EAAE,CAAC;IAC7C,MAAM,eAAe,GAAa,EAAE,CAAC;IACrC,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,IAAI,wBAAwB,GAAG,KAAK,CAAC;IACrC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,IAAI,SAAS;YAAE,MAAM;QACrB,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC;QAC3B,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAuB,CAAC;QAElD,IAAI,OAAO,KAAK,YAAY,EAAE,CAAC;YAC7B,mDAAmD;YACnD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAuB,CAAC;YAC/C,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;gBAC1D,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;gBAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;gBACxB,IAAI,iBAAiB,CAAC,IAAI,CAAC;oBAAE,wBAAwB,GAAG,IAAI,CAAC;YAC/D,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,KAAK,iBAAiB,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAuB,CAAC;YAC/C,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,cAAc,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;gBAC9D,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;gBAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;YAC1B,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,KAAK,WAAW,EAAE,CAAC;YACnC,MAAM,IAAI,GAAI,GAAG,CAAC,UAAU,CAAY,IAAI,cAAc,CAAC;YAC3D,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,EAAE,oBAAoB,CAAC,CAAC;YAClF,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,eAAe,CAAC,IAAI,EAAE,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;YACrD,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,eAAe,IAAI,KAAK,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;YACxE,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;YAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;QAC1B,CAAC;aAAM,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YACrC,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,EAAE,qBAAqB,CAAC,CAAC;YAC/E,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,iBAAiB,MAAM,EAAE,EAAE,UAAU,CAAC,CAAC;YACnE,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;YAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;QAC1B,CAAC;aAAM,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YAC9B,8DAA8D;YAC9D,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAuB,CAAC;YACnD,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,eAAe,QAAQ,EAAE,EAAE,UAAU,CAAC,CAAC;gBACnE,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;gBAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;gBACxB,IAAI,iBAAiB,CAAC,QAAQ,CAAC;oBAAE,wBAAwB,GAAG,IAAI,CAAC;YACnE,CAAC;YACD,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,CAAuD,CAAC;YACrF,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,SAAS;wBAAE,MAAM;oBACrB,MAAM,IAAI,GAAI,IAAI,CAAC,UAAU,CAAY,IAAI,cAAc,CAAC;oBAC5D,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,EAAE,oBAAoB,CAAC,CAAC;oBACpF,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACrB,eAAe,CAAC,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;oBACtD,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,eAAe,IAAI,KAAK,KAAK,EAAE,EAAE,UAAU,CAAC,CAAC;oBACzE,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;oBAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;gBAC1B,CAAC;YACH,CAAC;YACD,MAAM,OAAO,GAAG,GAAG,CAAC,aAAa,CAAuD,CAAC;YACzF,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3B,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;oBACzB,IAAI,SAAS;wBAAE,MAAM;oBACrB,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,EAAE,qBAAqB,CAAC,CAAC;oBAC9E,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,iBAAiB,MAAM,EAAE,EAAE,UAAU,CAAC,CAAC;oBACnE,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;oBAC1B,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,KAAK,WAAW,EAAE,CAAC;YACnC,MAAM,SAAS,GAAG,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAC9C,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;gBACxD,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;gBAC/B,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;gBAC7B,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;oBACjC,wBAAwB,GAAG,IAAI,CAAC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;YAClC,MAAM,IAAI,GAAI,GAAG,CAAC,MAAM,CAAY,IAAI,cAAc,CAAC;YACvD,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,EAAE,oBAAoB,CAAC,CAAC;YACpF,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,eAAe,CAAC,IAAI,EAAE,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;YACrD,MAAM,IAAI,GAAG,eAAe,IAAI,KAAK,KAAK,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;YACnD,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;YAC/B,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAC/B,CAAC;aAAM,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;YACrC,MAAM,MAAM,GAAG,WAAW,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,qBAAqB,CAAC,CAAC;YAC9E,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,CAAC,KAAK,IAAI,CAAC;YACzC,IAAI,OAAO,EAAE,CAAC;gBACZ,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC7B,kBAAkB,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;YACD,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAC;YAC1D,MAAM,IAAI,GAAG,GAAG,MAAM,IAAI,MAAM,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;YACnD,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;YAC/B,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAC/B,CAAC;aAAM,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;YAChC,MAAM,UAAU,GAAI,GAAG,CAAC,QAAQ,CAAY,IAAI,EAAE,CAAC;YACnD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,IAAI,GAAG,kBAAkB,UAAU,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;gBACnD,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;gBAC/B,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,SAAS,EAAE,CAAC;QACd,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO;QACL,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;QACtB,OAAO,EAAE;YACP,gBAAgB,EAAE,SAAS;YAC3B,SAAS,EAAE,CAAC,GAAG,SAAS,CAAC;YACzB,aAAa,EAAE,CAAC,GAAG,aAAa,CAAC;YACjC,eAAe;YACf,aAAa;YACb,wBAAwB;SACzB;KACF,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,SAAS,UAAU,CACjB,KAAe,EACf,IAAY,EACZ,UAAkB;IAElB,MAAM,QAAQ,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,iBAAiB;IAChE,IAAI,QAAQ,GAAG,oBAAoB,EAAE,CAAC;QACpC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACzC,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACpD,CAAC;AAED,SAAS,sBAAsB,CAAC,GAAsC;IACpE,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAwC,CAAC;IACtE,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAE/B,MAAM,aAAa,GAAG,OAAO,CAAC,SAAS,CAAuD,CAAC;IAC/F,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,aAAa,CAAC;QAAE,OAAO,SAAS,CAAC;IAEpD,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;QAClC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,MAAM,IAAI,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;YAClE,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;AACjF,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAsC;IACnE,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAgE,CAAC;IAC9F,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,OAAO,CAAC;IAChD,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,MAAM,IAAI,OAAO,KAAK,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAClE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,WAAW,CAAC,CAAS,EAAE,GAAW;IACzC,OAAO,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,eAAe,CACtB,QAAgB,EAChB,GAAsC,EACtC,SAAsB,EACtB,aAA0B;IAE1B,0EAA0E;IAC1E,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAwC,CAAC;IACnF,IAAI,CAAC,KAAK;QAAE,OAAO;IACnB,MAAM,QAAQ,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,CAAuB,CAAC;IAClG,IAAI,CAAC,QAAQ;QAAE,OAAO;IAEtB,IAAI,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACtE,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC1B,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,OAAO,IAAI,QAAQ,KAAK,cAAc,EAAE,CAAC;QACtF,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,SAA4B,EAAE,aAAuB;IAC/E,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO;IACjC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,YAAY,IAAI,cAAc,CAAC;QAC/C,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACrC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function isNewer(latest: string, current: string): boolean;
2
+ export declare function checkForUpdate(currentVersion: string): void;
3
+ //# sourceMappingURL=update-checker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"update-checker.d.ts","sourceRoot":"","sources":["../../../../src/server/services/update-checker.ts"],"names":[],"mappings":"AAEA,wBAAgB,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQhE;AAED,wBAAgB,cAAc,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAwB3D"}
@@ -0,0 +1,34 @@
1
+ import https from 'node:https';
2
+ export function isNewer(latest, current) {
3
+ const a = latest.split('.').map(Number);
4
+ const b = current.split('.').map(Number);
5
+ for (let i = 0; i < 3; i++) {
6
+ if ((a[i] ?? 0) > (b[i] ?? 0))
7
+ return true;
8
+ if ((a[i] ?? 0) < (b[i] ?? 0))
9
+ return false;
10
+ }
11
+ return false;
12
+ }
13
+ export function checkForUpdate(currentVersion) {
14
+ const req = https.get('https://registry.npmjs.org/model-test-bench/latest', { timeout: 3000, headers: { Accept: 'application/json' } }, (res) => {
15
+ if (res.statusCode !== 200)
16
+ return;
17
+ let body = '';
18
+ res.on('data', (chunk) => { body += chunk.toString(); });
19
+ res.on('end', () => {
20
+ try {
21
+ const data = JSON.parse(body);
22
+ const latest = data.version;
23
+ if (latest && latest !== currentVersion && isNewer(latest, currentVersion)) {
24
+ console.log(`\n Update available: ${currentVersion} → \x1b[32m${latest}\x1b[0m` +
25
+ `\n Run \x1b[36mnpm install -g model-test-bench\x1b[0m to update\n`);
26
+ }
27
+ }
28
+ catch { /* ignore parse errors */ }
29
+ });
30
+ });
31
+ req.on('error', () => { });
32
+ req.setTimeout(3000, () => req.destroy());
33
+ }
34
+ //# sourceMappingURL=update-checker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"update-checker.js","sourceRoot":"","sources":["../../../../src/server/services/update-checker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,YAAY,CAAC;AAE/B,MAAM,UAAU,OAAO,CAAC,MAAc,EAAE,OAAe;IACrD,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACxC,MAAM,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;QAC3C,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,cAAsB;IACnD,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CACnB,oDAAoD,EACpD,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,EAC1D,CAAC,GAAG,EAAE,EAAE;QACN,IAAI,GAAG,CAAC,UAAU,KAAK,GAAG;YAAE,OAAO;QACnC,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE,GAAG,IAAI,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YACjB,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAyB,CAAC;gBACtD,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;gBAC5B,IAAI,MAAM,IAAI,MAAM,KAAK,cAAc,IAAI,OAAO,CAAC,MAAM,EAAE,cAAc,CAAC,EAAE,CAAC;oBAC3E,OAAO,CAAC,GAAG,CACT,yBAAyB,cAAc,cAAc,MAAM,SAAS;wBACpE,oEAAoE,CACrE,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACL,CAAC,CACF,CAAC;IACF,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,GAA6C,CAAC,CAAC,CAAC;IACrE,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,94 @@
1
+ /** Configuration for an evaluator agent. */
2
+ export interface EvaluatorConfig {
3
+ readonly providerName: string;
4
+ readonly model: string;
5
+ readonly apiKey: string;
6
+ readonly baseUrl?: string;
7
+ readonly role: string;
8
+ }
9
+ /** Request to start an evaluation pipeline. */
10
+ export interface EvaluationRequest {
11
+ readonly runId: string;
12
+ readonly evaluators: readonly EvaluatorConfig[];
13
+ readonly maxRounds: number;
14
+ readonly maxBudgetUsd?: number;
15
+ }
16
+ /** Lifecycle status of an evaluation. */
17
+ export type EvaluationStatus = 'pending' | 'running' | 'completed' | 'failed';
18
+ /** How well the answer matches the expected answer. */
19
+ export interface AnswerComparison {
20
+ readonly matches: boolean;
21
+ readonly explanation: string;
22
+ readonly similarity: number;
23
+ }
24
+ /** Result for a single critical requirement. */
25
+ export interface CriticalPartResult {
26
+ readonly requirement: string;
27
+ readonly met: boolean;
28
+ readonly evidence: string;
29
+ }
30
+ /** Per-dimension score from one evaluator. */
31
+ export interface IndividualEvaluation {
32
+ readonly evaluatorRole: string;
33
+ readonly dimension: string;
34
+ readonly score: number;
35
+ readonly reasoning: string;
36
+ }
37
+ /** Did the agent follow the instructions in its system prompt? */
38
+ export interface InstructionCompliance {
39
+ readonly followed: readonly string[];
40
+ readonly violated: readonly string[];
41
+ readonly notApplicable: readonly string[];
42
+ readonly overallCompliance: number;
43
+ }
44
+ /** Aggregated report on instruction compliance. */
45
+ export interface SetupComplianceReport {
46
+ readonly instructionCompliance: InstructionCompliance;
47
+ }
48
+ /** Ledger tracking cost/usage per evaluator. */
49
+ export interface EvaluatorLedger {
50
+ readonly evaluatorRole: string;
51
+ readonly totalCostUsd: number;
52
+ readonly totalTokensIn: number;
53
+ readonly totalTokensOut: number;
54
+ readonly roundsParticipated: number;
55
+ }
56
+ /** A single round of evaluation (there may be multiple rounds for consensus). */
57
+ export interface EvaluationRound {
58
+ readonly roundNumber: number;
59
+ readonly evaluations: readonly IndividualEvaluation[];
60
+ readonly consensusReached: boolean;
61
+ readonly timestamp: string;
62
+ }
63
+ /** Synthesised final scores across evaluators and rounds. */
64
+ export interface EvaluationSynthesis {
65
+ readonly dimensionScores: Readonly<Record<string, number>>;
66
+ readonly weightedTotal: number;
67
+ readonly confidence: number;
68
+ readonly dissenting: readonly string[];
69
+ }
70
+ /** Overall report on how effective a setup is across evaluations. */
71
+ export interface SetupEffectivenessReport {
72
+ readonly setupId: string;
73
+ readonly averageScore: number;
74
+ readonly scenarioBreakdown: Readonly<Record<string, number>>;
75
+ readonly strengths: readonly string[];
76
+ readonly weaknesses: readonly string[];
77
+ }
78
+ /** The full evaluation record persisted after the pipeline completes. */
79
+ export interface Evaluation {
80
+ readonly id: string;
81
+ readonly runId: string;
82
+ readonly status: EvaluationStatus;
83
+ readonly evaluators: readonly EvaluatorConfig[];
84
+ readonly rounds: readonly EvaluationRound[];
85
+ readonly answerComparison: AnswerComparison;
86
+ readonly criticalResults: readonly CriticalPartResult[];
87
+ readonly setupCompliance: SetupComplianceReport;
88
+ readonly synthesis: EvaluationSynthesis;
89
+ readonly ledger: readonly EvaluatorLedger[];
90
+ readonly totalCostUsd: number;
91
+ readonly createdAt: string;
92
+ readonly updatedAt: string;
93
+ }
94
+ //# sourceMappingURL=evaluation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../../../../src/server/types/evaluation.ts"],"names":[],"mappings":"AAIA,4CAA4C;AAC5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,+CAA+C;AAC/C,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,yCAAyC;AACzC,MAAM,MAAM,gBAAgB,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE9E,uDAAuD;AACvD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,gDAAgD;AAChD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,8CAA8C;AAC9C,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,kEAAkE;AAClE,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACpC;AAED,mDAAmD;AACnD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;CACvD;AAED,gDAAgD;AAChD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;CACrC;AAED,iFAAiF;AACjF,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,SAAS,oBAAoB,EAAE,CAAC;IACtD,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACnC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,6DAA6D;AAC7D,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC3D,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,qEAAqE;AACrE,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,iBAAiB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC7D,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,yEAAyE;AACzE,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAClC,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,MAAM,EAAE,SAAS,eAAe,EAAE,CAAC;IAC5C,QAAQ,CAAC,gBAAgB,EAAE,gBAAgB,CAAC;IAC5C,QAAQ,CAAC,eAAe,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACxD,QAAQ,CAAC,eAAe,EAAE,qBAAqB,CAAC;IAChD,QAAQ,CAAC,SAAS,EAAE,mBAAmB,CAAC;IACxC,QAAQ,CAAC,MAAM,EAAE,SAAS,eAAe,EAAE,CAAC;IAC5C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
@@ -0,0 +1,5 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Evaluation Types
3
+ // ---------------------------------------------------------------------------
4
+ export {};
5
+ //# sourceMappingURL=evaluation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluation.js","sourceRoot":"","sources":["../../../../src/server/types/evaluation.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E"}
@@ -0,0 +1,5 @@
1
+ export type { Provider, ScoringDimension, } from './provider.js';
2
+ export type { Scenario, ScenarioCategory } from './scenario.js';
3
+ export type { Run, RunStatus, SDKMessageRecord } from './run.js';
4
+ export type { AnswerComparison, CriticalPartResult, Evaluation, EvaluationRequest, EvaluationRound, EvaluationStatus, EvaluationSynthesis, EvaluatorConfig, EvaluatorLedger, IndividualEvaluation, InstructionCompliance, SetupComplianceReport, SetupEffectivenessReport, } from './evaluation.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/server/types/index.ts"],"names":[],"mappings":"AAIA,YAAY,EACV,QAAQ,EACR,gBAAgB,GACjB,MAAM,eAAe,CAAC;AAEvB,YAAY,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAEhE,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjE,YAAY,EACV,gBAAgB,EAChB,kBAAkB,EAClB,UAAU,EACV,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,5 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Central re-export for all shared types
3
+ // ---------------------------------------------------------------------------
4
+ export {};
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/server/types/index.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E"}
@@ -0,0 +1,23 @@
1
+ /** Scoring dimension used to evaluate a run. */
2
+ export interface ScoringDimension {
3
+ readonly name: string;
4
+ readonly weight: number;
5
+ readonly description: string;
6
+ }
7
+ /** A provider configuration that defines how to connect to an LLM provider. */
8
+ export interface Provider {
9
+ readonly id: string;
10
+ readonly name: string;
11
+ readonly description: string;
12
+ readonly providerName: string;
13
+ readonly model: string;
14
+ readonly apiKey: string;
15
+ readonly baseUrl?: string;
16
+ readonly temperature?: number;
17
+ readonly maxTokens?: number;
18
+ readonly topP?: number;
19
+ readonly timeoutSeconds: number;
20
+ readonly createdAt: string;
21
+ readonly updatedAt: string;
22
+ }
23
+ //# sourceMappingURL=provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../../src/server/types/provider.ts"],"names":[],"mappings":"AAIA,gDAAgD;AAChD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,+EAA+E;AAC/E,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
@@ -0,0 +1,5 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Provider Types
3
+ // ---------------------------------------------------------------------------
4
+ export {};
5
+ //# sourceMappingURL=provider.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provider.js","sourceRoot":"","sources":["../../../../src/server/types/provider.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E"}
@@ -0,0 +1,31 @@
1
+ import type { Provider } from './provider.js';
2
+ import type { Scenario } from './scenario.js';
3
+ /** A raw message captured during a run (provider-agnostic). */
4
+ export interface SDKMessageRecord {
5
+ readonly timestamp: string;
6
+ readonly message: Readonly<Record<string, unknown>>;
7
+ }
8
+ /** Lifecycle status of a run. */
9
+ export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
10
+ /** A single run: one provider + one scenario + captured output. */
11
+ export interface Run {
12
+ readonly id: string;
13
+ readonly providerId: string;
14
+ readonly scenarioId: string;
15
+ readonly status: RunStatus;
16
+ readonly providerSnapshot: Provider;
17
+ readonly scenarioSnapshot: Scenario;
18
+ readonly messages: readonly SDKMessageRecord[];
19
+ readonly resultText: string;
20
+ readonly totalCostUsd: number;
21
+ readonly durationMs: number;
22
+ readonly numTurns: number;
23
+ readonly error?: string;
24
+ readonly reviewerProviderIds?: readonly string[];
25
+ readonly reviewerProviderSnapshots?: readonly Provider[];
26
+ readonly maxEvalRounds?: number;
27
+ readonly evaluationId?: string;
28
+ readonly createdAt: string;
29
+ readonly updatedAt: string;
30
+ }
31
+ //# sourceMappingURL=run.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../../src/server/types/run.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,+DAA+D;AAC/D,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACrD;AAED,iCAAiC;AACjC,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,WAAW,CAAC;AAErF,mEAAmE;AACnE,MAAM,WAAW,GAAG;IAClB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC;IAC3B,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,gBAAgB,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAExB,QAAQ,CAAC,mBAAmB,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,yBAAyB,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;IACzD,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
@@ -0,0 +1,5 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Run Types
3
+ // ---------------------------------------------------------------------------
4
+ export {};
5
+ //# sourceMappingURL=run.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../../src/server/types/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E"}
@@ -0,0 +1,19 @@
1
+ import type { ScoringDimension } from './provider.js';
2
+ /** Built-in category labels for scenarios (behavior/planning focused). */
3
+ export type ScenarioCategory = 'planning' | 'instruction-following' | 'reasoning' | 'tool-strategy' | 'error-handling' | 'ambiguity-handling' | 'scope-management' | 'custom';
4
+ /** A scenario defines what to test and how to grade. */
5
+ export interface Scenario {
6
+ readonly id: string;
7
+ readonly name: string;
8
+ readonly category: ScenarioCategory;
9
+ readonly prompt: string;
10
+ readonly systemPrompt: string;
11
+ readonly enabledTools: readonly string[];
12
+ readonly expectedAnswer: string;
13
+ readonly criticalRequirements: readonly string[];
14
+ readonly gradingGuidelines: string;
15
+ readonly scoringDimensions: readonly ScoringDimension[];
16
+ readonly createdAt: string;
17
+ readonly updatedAt: string;
18
+ }
19
+ //# sourceMappingURL=scenario.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenario.d.ts","sourceRoot":"","sources":["../../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAEtD,0EAA0E;AAC1E,MAAM,MAAM,gBAAgB,GACxB,UAAU,GACV,uBAAuB,GACvB,WAAW,GACX,eAAe,GACf,gBAAgB,GAChB,oBAAoB,GACpB,kBAAkB,GAClB,QAAQ,CAAC;AAEb,wDAAwD;AACxD,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAC;IAEpC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAE9B,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,iBAAiB,EAAE,SAAS,gBAAgB,EAAE,CAAC;IACxD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
@@ -0,0 +1,5 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Scenario Types
3
+ // ---------------------------------------------------------------------------
4
+ export {};
5
+ //# sourceMappingURL=scenario.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenario.js","sourceRoot":"","sources":["../../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E"}