elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,96 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { getOriginalFetch } from '../interceptors/http.js';
3
+ // ─── API Client ──────────────────────────────────────────────
4
+ // Uses getOriginalFetch() to bypass SDK interceptors.
5
+ /** Normalize serverUrl: strip trailing slash and trailing /api to avoid double /api/api paths */
6
+ function normalizeBase(serverUrl) {
7
+ return serverUrl.replace(/\/+$/, '').replace(/\/api$/, '');
8
+ }
9
+ function headers(apiKey) {
10
+ return {
11
+ 'Content-Type': 'application/json',
12
+ 'api-key': apiKey || '',
13
+ 'X-Correlation-ID': randomUUID(),
14
+ };
15
+ }
16
+ async function apiRequest(url, apiKey, options = {}) {
17
+ const method = (options.method || 'GET').toUpperCase();
18
+ console.log(`[elasticdash ci] ${method} ${url}`);
19
+ const res = await getOriginalFetch()(url, {
20
+ ...options,
21
+ headers: { ...headers(apiKey), ...(options.headers ?? {}) },
22
+ });
23
+ if (!res.ok) {
24
+ const text = await res.text().catch(() => '');
25
+ console.log(`[elasticdash ci] ${method} ${url} → ${res.status} ${text.substring(0, 200)}`);
26
+ throw new Error(`API ${res.status}: ${text || res.statusText}`);
27
+ }
28
+ const json = await res.json();
29
+ // Backend wraps responses in generalApiResponseSender which sends { result: ... }
30
+ // Use 'in' check instead of nullish coalescing — result:null is valid data, not "missing"
31
+ const result = 'result' in json ? json.result : ('data' in json ? json.data : json);
32
+ console.log(`[elasticdash ci] ${method} ${url} → ${res.status} (result keys: ${result && typeof result === 'object' ? Object.keys(result).join(',') : typeof result})`);
33
+ return result;
34
+ }
35
+ /**
36
+ * Fetch all active test groups (with nested tests & expectations) for the project
37
+ * scoped to the API key.
38
+ */
39
+ export async function fetchTestGroups(serverUrl, apiKey, filters) {
40
+ const base = normalizeBase(serverUrl);
41
+ const params = new URLSearchParams();
42
+ if (filters?.workflowName)
43
+ params.set('workflowName', filters.workflowName);
44
+ if (filters?.tags?.length)
45
+ params.set('tags', filters.tags.join(','));
46
+ if (filters?.status)
47
+ params.set('status', filters.status);
48
+ const qs = params.toString();
49
+ const url = `${base}/api/testgroups/by-project${qs ? `?${qs}` : ''}`;
50
+ return apiRequest(url, apiKey);
51
+ }
52
+ /**
53
+ * Submit a test run result to the backend.
54
+ */
55
+ export async function submitTestRun(serverUrl, apiKey, testGroupId, payload) {
56
+ const base = normalizeBase(serverUrl);
57
+ const url = `${base}/api/testgroups/${testGroupId}/runs`;
58
+ return apiRequest(url, apiKey, {
59
+ method: 'POST',
60
+ body: JSON.stringify(payload),
61
+ });
62
+ }
63
+ /**
64
+ * Create a batch grouping multiple test run IDs.
65
+ */
66
+ export async function createBatch(serverUrl, apiKey, payload) {
67
+ const base = normalizeBase(serverUrl);
68
+ const url = `${base}/api/testgroups/batches`;
69
+ return apiRequest(url, apiKey, {
70
+ method: 'POST',
71
+ body: JSON.stringify(payload),
72
+ });
73
+ }
74
+ /** Maps backend llmProviderId to provider name. */
75
+ const LLM_PROVIDER_MAP = {
76
+ 1: 'openai',
77
+ 2: 'gemini',
78
+ 3: 'anthropic',
79
+ 4: 'moonshot',
80
+ };
81
+ /**
82
+ * Fetch the user's default evaluator config.
83
+ * Used by ed-test llm_judge benchmarks when judge_provider/judge_model
84
+ * are not specified in the test definition.
85
+ */
86
+ export async function fetchEvaluatorConfig(serverUrl, apiKey) {
87
+ const base = normalizeBase(serverUrl);
88
+ const url = `${base}/api/user/settings/llm/default-evaluator`;
89
+ const raw = await apiRequest(url, apiKey);
90
+ return {
91
+ provider: raw?.llmProviderId ? (LLM_PROVIDER_MAP[raw.llmProviderId] ?? null) : null,
92
+ model: raw?.model ?? null,
93
+ apiKey: null,
94
+ };
95
+ }
96
+ //# sourceMappingURL=api-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/ci/api-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAG1D,gEAAgE;AAChE,sDAAsD;AAEtD,iGAAiG;AACjG,SAAS,aAAa,CAAC,SAAiB;IACtC,OAAO,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;AAC5D,CAAC;AAED,SAAS,OAAO,CAAC,MAAc;IAC7B,OAAO;QACL,cAAc,EAAE,kBAAkB;QAClC,SAAS,EAAE,MAAM,IAAI,EAAE;QACvB,kBAAkB,EAAE,UAAU,EAAE;KACjC,CAAA;AACH,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,GAAW,EACX,MAAc,EACd,UAAuB,EAAE;IAEzB,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,EAAE,CAAC,CAAA;IAEhD,MAAM,GAAG,GAAG,MAAM,gBAAgB,EAAE,CAAC,GAAG,EAAE;QACxC,GAAG,OAAO;QACV,OAAO,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,OAAiC,IAAI,EAAE,CAAC,EAAE;KACtF,CAAC,CAAA;IAEF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAA;QAC7C,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAA;QAC1F,MAAM,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;IACjE,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA6B,CAAA;IACxD,kFAAkF;IAClF,0FAA0F;IAC1F,MAAM,MAAM,GAAG,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;IACnF,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,MAAM,kBAAkB,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,MAAM,GAAG,CAAC,CAAA;IACvK,OAAO,MAAW,CAAA;AACpB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,SAAiB,EACjB,MAAc,EACd,OAAqE;IAErE,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAA;IACpC,IAAI,OAAO,EAAE,YAAY;QAAE,MAAM,CAAC,GAAG,CAAC,cAAc,EAAE,OAAO,CAAC,YAAY,CAAC,CAAA;IAC3E,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;IACrE,IAAI,OAAO,EAAE,MAAM;QAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IAEzD,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAA;IAC5B,MAAM,GAAG,GAAG,GAAG,IAAI,6BAA6B,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;IACpE,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,CAAC,CAAA;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,MAAc,EACd,WAAmB,EACnB,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,mBAAmB,WAAW,OAAO,CAAA;IACxD,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,SAAiB,EACjB,MAAc,EACd,OAAgC;IAEhC,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,yBAAyB,CAAA;IAC5C,OAAO,UAAU,CAAiB,GAAG,EAAE,MAAM,EAAE;QAC7C,MAAM,EAAE,MAAM;QACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAA;AACJ,CAAC;AAYD,mDAAmD;AACnD,MAAM,gBAAgB,GAA2B;IAC/C,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,QAAQ;IACX,CAAC,EAAE,WAAW;IACd,CAAC,EAAE,UAAU;CACd,CAAA;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,SAAiB,EACjB,MAAc;IAEd,MAAM,IAAI,GAAG,aAAa,CAAC,SAAS,CAAC,CAAA;IACrC,MAAM,GAAG,GAAG,GAAG,IAAI,0CAA0C,CAAA;IAC7D,MAAM,GAAG,GAAG,MAAM,UAAU,CAA6C,GAAG,EAAE,MAAM,CAAC,CAAA;IACrF,OAAO;QACL,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,aAAa,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;QACnF,KAAK,EAAE,GAAG,EAAE,KAAK,IAAI,IAAI;QACzB,MAAM,EAAE,IAAI;KACb,CAAA;AACH,CAAC"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * benchmark.ts
3
+ *
4
+ * Compares recorded trace measurements against user-defined benchmarks.
5
+ * Supports duration, token, output-contains, and LLM-as-a-judge evaluations.
6
+ *
7
+ * Generated/updated on 2026-04-20.
8
+ */
9
+ import type { TestMeasurement } from './measurement.js';
10
+ import type { TestBenchmarks } from './test-registry.js';
11
+ import type { EvaluatorConfig } from './api-client.js';
12
+ export type MetricName = 'duration_ms' | 'tokens_total' | 'output_contains' | 'output_not_contains' | 'llm_judge';
13
+ export interface MetricResult {
14
+ name: MetricName;
15
+ value: number;
16
+ threshold: number;
17
+ passed: boolean;
18
+ detail?: string;
19
+ }
20
+ export interface BenchmarkResult {
21
+ passed: boolean;
22
+ failure_reason?: string;
23
+ metrics: MetricResult[];
24
+ }
25
+ /**
26
+ * Compare a measurement against benchmarks. Async because llm_judge requires
27
+ * an LLM call. The step's output is needed for output_contains/llm_judge checks.
28
+ *
29
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
30
+ * when the test does not specify judge_provider/judge_model.
31
+ */
32
+ export declare function compareBenchmarks(measurement: TestMeasurement, benchmarks: TestBenchmarks, stepOutput?: unknown, evaluatorConfig?: EvaluatorConfig | null): Promise<BenchmarkResult>;
33
+ //# sourceMappingURL=benchmark.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAA;AACxD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAA;AAEtD,MAAM,MAAM,UAAU,GAAG,aAAa,GAAG,cAAc,GAAG,iBAAiB,GAAG,qBAAqB,GAAG,WAAW,CAAA;AAEjH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,UAAU,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,MAAM,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,OAAO,CAAA;IACf,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,OAAO,EAAE,YAAY,EAAE,CAAA;CACxB;AAuCD;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACrC,WAAW,EAAE,eAAe,EAC5B,UAAU,EAAE,cAAc,EAC1B,UAAU,CAAC,EAAE,OAAO,EACpB,eAAe,CAAC,EAAE,eAAe,GAAG,IAAI,GACvC,OAAO,CAAC,eAAe,CAAC,CAyK1B"}
@@ -0,0 +1,213 @@
1
+ /**
2
+ * benchmark.ts
3
+ *
4
+ * Compares recorded trace measurements against user-defined benchmarks.
5
+ * Supports duration, token, output-contains, and LLM-as-a-judge evaluations.
6
+ *
7
+ * Generated/updated on 2026-04-20.
8
+ */
9
+ import { callProviderLLM } from '../matchers/index.js';
10
+ import { prepareOutputForJudge } from '../core/judge-utils.js';
11
+ /** Maps backend provider names to SDK provider names used by callProviderLLM. */
12
+ const PROVIDER_NAME_MAP = {
13
+ anthropic: 'claude',
14
+ moonshot: 'kimi',
15
+ };
16
+ /** Default model for each provider, used when no explicit model is set or
17
+ * when the evaluator config model doesn't belong to the resolved provider. */
18
+ const DEFAULT_PROVIDER_MODELS = {
19
+ openai: 'gpt-4o',
20
+ claude: 'claude-sonnet-4-20250514',
21
+ gemini: 'gemini-2.0-flash',
22
+ grok: 'grok-3',
23
+ kimi: 'moonshot-v1-auto',
24
+ };
25
+ /** Known model prefixes per provider — used to check if a model belongs to a provider. */
26
+ const PROVIDER_MODEL_PREFIXES = {
27
+ openai: ['gpt-', 'o1-', 'o3-', 'o4-', 'chatgpt-', 'omni-'],
28
+ claude: ['claude-'],
29
+ gemini: ['gemini-'],
30
+ grok: ['grok-'],
31
+ kimi: ['moonshot-', 'kimi-'],
32
+ };
33
+ /** Check if a model name belongs to the given provider. */
34
+ function isModelForProvider(model, provider) {
35
+ const prefixes = PROVIDER_MODEL_PREFIXES[provider];
36
+ if (!prefixes)
37
+ return false;
38
+ return prefixes.some(p => model.toLowerCase().startsWith(p));
39
+ }
40
+ /** Normalize provider name from backend format to SDK format. */
41
+ function normalizeSdkProvider(provider) {
42
+ return PROVIDER_NAME_MAP[provider] ?? provider;
43
+ }
44
+ /**
45
+ * Compare a measurement against benchmarks. Async because llm_judge requires
46
+ * an LLM call. The step's output is needed for output_contains/llm_judge checks.
47
+ *
48
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
49
+ * when the test does not specify judge_provider/judge_model.
50
+ */
51
+ export async function compareBenchmarks(measurement, benchmarks, stepOutput, evaluatorConfig) {
52
+ const metrics = [];
53
+ let firstFailure;
54
+ if (benchmarks.max_duration_ms !== undefined) {
55
+ const passed = measurement.duration_ms <= benchmarks.max_duration_ms;
56
+ metrics.push({
57
+ name: 'duration_ms',
58
+ value: measurement.duration_ms,
59
+ threshold: benchmarks.max_duration_ms,
60
+ passed,
61
+ });
62
+ if (!passed && !firstFailure) {
63
+ firstFailure = `duration_ms (${measurement.duration_ms}) exceeded max threshold (${benchmarks.max_duration_ms})`;
64
+ }
65
+ }
66
+ if (benchmarks.max_tokens_total !== undefined) {
67
+ const value = measurement.tokens_total ?? 0;
68
+ const passed = value <= benchmarks.max_tokens_total;
69
+ metrics.push({
70
+ name: 'tokens_total',
71
+ value,
72
+ threshold: benchmarks.max_tokens_total,
73
+ passed,
74
+ });
75
+ if (!passed && !firstFailure) {
76
+ firstFailure = `tokens_total (${value}) exceeded max threshold (${benchmarks.max_tokens_total})`;
77
+ }
78
+ }
79
+ // ─── Output contains ────────────────────────────────────────
80
+ if (benchmarks.output_contains !== undefined) {
81
+ const outputStr = stringifyOutput(stepOutput);
82
+ const passed = outputStr.toLowerCase().includes(benchmarks.output_contains.toLowerCase());
83
+ metrics.push({
84
+ name: 'output_contains',
85
+ value: passed ? 1 : 0,
86
+ threshold: 1,
87
+ passed,
88
+ detail: passed ? undefined : `output does not contain "${benchmarks.output_contains}"`,
89
+ });
90
+ if (!passed && !firstFailure) {
91
+ firstFailure = `output does not contain "${benchmarks.output_contains}"`;
92
+ }
93
+ }
94
+ // ─── Output not contains ────────────────────────────────────
95
+ if (benchmarks.output_not_contains !== undefined) {
96
+ const outputStr = stringifyOutput(stepOutput);
97
+ const passed = !outputStr.toLowerCase().includes(benchmarks.output_not_contains.toLowerCase());
98
+ metrics.push({
99
+ name: 'output_not_contains',
100
+ value: passed ? 1 : 0,
101
+ threshold: 1,
102
+ passed,
103
+ detail: passed ? undefined : `output unexpectedly contains "${benchmarks.output_not_contains}"`,
104
+ });
105
+ if (!passed && !firstFailure) {
106
+ firstFailure = `output unexpectedly contains "${benchmarks.output_not_contains}"`;
107
+ }
108
+ }
109
+ // ─── LLM-as-a-judge ────────────────────────────────────────
110
+ if (benchmarks.llm_judge) {
111
+ const judge = benchmarks.llm_judge;
112
+ const outputStr = stringifyOutput(stepOutput);
113
+ const threshold = judge.judge_score_threshold ?? 7;
114
+ // Resolve provider/model: test definition takes priority, then backend
115
+ // evaluator config, then fall back to 'openai' default.
116
+ const resolvedProvider = normalizeSdkProvider(judge.judge_provider ?? evaluatorConfig?.provider ?? 'openai');
117
+ // Model resolution: judge_model > evaluatorConfig.model (if compatible) > provider default
118
+ let resolvedModel = judge.judge_model ?? undefined;
119
+ if (!resolvedModel && evaluatorConfig?.model) {
120
+ // Only use the evaluator config model if it belongs to the resolved provider
121
+ if (isModelForProvider(evaluatorConfig.model, resolvedProvider)) {
122
+ resolvedModel = evaluatorConfig.model;
123
+ }
124
+ }
125
+ if (!resolvedModel) {
126
+ resolvedModel = DEFAULT_PROVIDER_MODELS[resolvedProvider];
127
+ }
128
+ // If the backend provided an API key and we're using its provider,
129
+ // set it in the environment so callProviderLLM can pick it up.
130
+ const envKeyMap = {
131
+ openai: 'OPENAI_API_KEY',
132
+ claude: 'ANTHROPIC_API_KEY',
133
+ gemini: 'GEMINI_API_KEY',
134
+ grok: 'GROK_API_KEY',
135
+ kimi: 'KIMI_API_KEY',
136
+ };
137
+ const envKey = envKeyMap[resolvedProvider];
138
+ let restoreEnv;
139
+ if (evaluatorConfig?.apiKey && envKey && !judge.judge_provider && !process.env[envKey]) {
140
+ const prev = process.env[envKey];
141
+ process.env[envKey] = evaluatorConfig.apiKey;
142
+ restoreEnv = () => {
143
+ if (prev === undefined)
144
+ delete process.env[envKey];
145
+ else
146
+ process.env[envKey] = prev;
147
+ };
148
+ }
149
+ try {
150
+ console.log(` [llm_judge] provider=${resolvedProvider}, model=${resolvedModel ?? '(default)'}`);
151
+ const preparedOutput = prepareOutputForJudge(outputStr, judge.judge_prompt);
152
+ const evalPrompt = `${judge.judge_prompt}\n\n<output>\n${preparedOutput}\n</output>\n\nBased on the evaluation criteria above, score this output on a scale of 0-10. Respond with only the number.`;
153
+ const result = await callProviderLLM(evalPrompt, { provider: resolvedProvider, model: resolvedModel }, 'You are an expert test judge. Return only a number between 0 and 10.', 4096, 0);
154
+ restoreEnv?.();
155
+ console.log(` [llm_judge] raw response: "${result.content}"`);
156
+ const score = parseFloat(result.content.match(/-?\d+(?:\.\d+)?/)?.[0] ?? '');
157
+ if (isNaN(score)) {
158
+ metrics.push({
159
+ name: 'llm_judge',
160
+ value: 0,
161
+ threshold,
162
+ passed: false,
163
+ detail: `Could not parse score from LLM response: "${result.content}"`,
164
+ });
165
+ if (!firstFailure) {
166
+ firstFailure = `llm_judge: could not parse score from response`;
167
+ }
168
+ }
169
+ else {
170
+ const passed = score >= threshold;
171
+ metrics.push({
172
+ name: 'llm_judge',
173
+ value: score,
174
+ threshold,
175
+ passed,
176
+ detail: `Score: ${score}/${threshold}`,
177
+ });
178
+ if (!passed && !firstFailure) {
179
+ firstFailure = `llm_judge score (${score}) below threshold (${threshold})`;
180
+ }
181
+ }
182
+ }
183
+ catch (err) {
184
+ restoreEnv?.();
185
+ const errMsg = err instanceof Error ? err.message : String(err);
186
+ metrics.push({
187
+ name: 'llm_judge',
188
+ value: 0,
189
+ threshold,
190
+ passed: false,
191
+ detail: `LLM judge error: ${errMsg}`,
192
+ });
193
+ if (!firstFailure) {
194
+ firstFailure = `llm_judge error: ${errMsg}`;
195
+ }
196
+ }
197
+ }
198
+ const allPassed = metrics.every(m => m.passed);
199
+ return {
200
+ passed: allPassed,
201
+ failure_reason: firstFailure,
202
+ metrics,
203
+ };
204
+ }
205
+ /** Converts step output to a string for text-based assertions. */
206
+ function stringifyOutput(output) {
207
+ if (output === null || output === undefined)
208
+ return '';
209
+ if (typeof output === 'string')
210
+ return output;
211
+ return JSON.stringify(output);
212
+ }
213
+ //# sourceMappingURL=benchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../../src/ci/benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACtD,OAAO,EAAE,qBAAqB,EAAE,MAAM,wBAAwB,CAAA;AAqB9D,iFAAiF;AACjF,MAAM,iBAAiB,GAA2B;IAChD,SAAS,EAAE,QAAQ;IACnB,QAAQ,EAAE,MAAM;CACjB,CAAA;AAED;+EAC+E;AAC/E,MAAM,uBAAuB,GAA2B;IACtD,MAAM,EAAE,QAAQ;IAChB,MAAM,EAAE,0BAA0B;IAClC,MAAM,EAAE,kBAAkB;IAC1B,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,kBAAkB;CACzB,CAAA;AAED,0FAA0F;AAC1F,MAAM,uBAAuB,GAA6B;IACxD,MAAM,EAAE,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,CAAC;IAC1D,MAAM,EAAE,CAAC,SAAS,CAAC;IACnB,MAAM,EAAE,CAAC,SAAS,CAAC;IACnB,IAAI,EAAE,CAAC,OAAO,CAAC;IACf,IAAI,EAAE,CAAC,WAAW,EAAE,OAAO,CAAC;CAC7B,CAAA;AAED,2DAA2D;AAC3D,SAAS,kBAAkB,CAAC,KAAa,EAAE,QAAgB;IACzD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAA;IAClD,IAAI,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAA;IAC3B,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,iEAAiE;AACjE,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,OAAO,iBAAiB,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAA;AAChD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,WAA4B,EAC5B,UAA0B,EAC1B,UAAoB,EACpB,eAAwC;IAExC,MAAM,OAAO,GAAmB,EAAE,CAAA;IAClC,IAAI,YAAgC,CAAA;IAEpC,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,IAAI,UAAU,CAAC,eAAe,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,aAAa;YACnB,KAAK,EAAE,WAAW,CAAC,WAAW;YAC9B,SAAS,EAAE,UAAU,CAAC,eAAe;YACrC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,gBAAgB,WAAW,CAAC,WAAW,6BAA6B,UAAU,CAAC,eAAe,GAAG,CAAA;QAClH,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,gBAAgB,KAAK,SAAS,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,WAAW,CAAC,YAAY,IAAI,CAAC,CAAA;QAC3C,MAAM,MAAM,GAAG,KAAK,IAAI,UAAU,CAAC,gBAAgB,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,cAAc;YACpB,KAAK;YACL,SAAS,EAAE,UAAU,CAAC,gBAAgB;YACtC,MAAM;SACP,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iBAAiB,KAAK,6BAA6B,UAAU,CAAC,gBAAgB,GAAG,CAAA;QAClG,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,eAAe,KAAK,SAAS,EAAE,CAAC;QAC7C,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC,CAAA;QACzF,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,iBAAiB;YACvB,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,4BAA4B,UAAU,CAAC,eAAe,GAAG;SACvF,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,4BAA4B,UAAU,CAAC,eAAe,GAAG,CAAA;QAC1E,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,UAAU,CAAC,mBAAmB,KAAK,SAAS,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,CAAC,CAAA;QAC9F,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,qBAAqB;YAC3B,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrB,SAAS,EAAE,CAAC;YACZ,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,iCAAiC,UAAU,CAAC,mBAAmB,GAAG;SAChG,CAAC,CAAA;QACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;YAC7B,YAAY,GAAG,iCAAiC,UAAU,CAAC,mBAAmB,GAAG,CAAA;QACnF,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,SAAS,CAAA;QAClC,MAAM,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAA;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,qBAAqB,IAAI,CAAC,CAAA;QAElD,uEAAuE;QACvE,wDAAwD;QACxD,MAAM,gBAAgB,GAAG,oBAAoB,CAC3C,KAAK,CAAC,cAAc,IAAI,eAAe,EAAE,QAAQ,IAAI,QAAQ,CAC9D,CAAA;QACD,2FAA2F;QAC3F,IAAI,aAAa,GAAG,KAAK,CAAC,WAAW,IAAI,SAAS,CAAA;QAClD,IAAI,CAAC,aAAa,IAAI,eAAe,EAAE,KAAK,EAAE,CAAC;YAC7C,6EAA6E;YAC7E,IAAI,kBAAkB,CAAC,eAAe,CAAC,KAAK,EAAE,gBAAgB,CAAC,EAAE,CAAC;gBAChE,aAAa,GAAG,eAAe,CAAC,KAAK,CAAA;YACvC,CAAC;QACH,CAAC;QACD,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,aAAa,GAAG,uBAAuB,CAAC,gBAAgB,CAAC,CAAA;QAC3D,CAAC;QAED,mEAAmE;QACnE,+DAA+D;QAC/D,MAAM,SAAS,GAA2B;YACxC,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,mBAAmB;YAC3B,MAAM,EAAE,gBAAgB;YACxB,IAAI,EAAE,cAAc;YACpB,IAAI,EAAE,cAAc;SACrB,CAAA;QACD,MAAM,MAAM,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;QAC1C,IAAI,UAAoC,CAAA;QACxC,IAAI,eAAe,EAAE,MAAM,IAAI,MAAM,IAAI,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACvF,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YAChC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,eAAe,CAAC,MAAM,CAAA;YAC5C,UAAU,GAAG,GAAG,EAAE;gBAChB,IAAI,IAAI,KAAK,SAAS;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;;oBAC7C,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,CAAA;YACjC,CAAC,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,0BAA0B,gBAAgB,WAAW,aAAa,IAAI,WAAW,EAAE,CAAC,CAAA;YAChG,MAAM,cAAc,GAAG,qBAAqB,CAAC,SAAS,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;YAC3E,MAAM,UAAU,GAAG,GAAG,KAAK,CAAC,YAAY,iBAAiB,cAAc,4HAA4H,CAAA;YAEnM,MAAM,MAAM,GAAG,MAAM,eAAe,CAClC,UAAU,EACV,EAAE,QAAQ,EAAE,gBAAoE,EAAE,KAAK,EAAE,aAAa,EAAE,EACxG,sEAAsE,EACtE,IAAI,EACJ,CAAC,CACF,CAAA;YAED,UAAU,EAAE,EAAE,CAAA;YAEd,OAAO,CAAC,GAAG,CAAC,gCAAgC,MAAM,CAAC,OAAO,GAAG,CAAC,CAAA;YAC9D,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;YAC5E,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,CAAC;oBACR,SAAS;oBACT,MAAM,EAAE,KAAK;oBACb,MAAM,EAAE,6CAA6C,MAAM,CAAC,OAAO,GAAG;iBACvE,CAAC,CAAA;gBACF,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,YAAY,GAAG,gDAAgD,CAAA;gBACjE,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,GAAG,KAAK,IAAI,SAAS,CAAA;gBACjC,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,KAAK;oBACZ,SAAS;oBACT,MAAM;oBACN,MAAM,EAAE,UAAU,KAAK,IAAI,SAAS,EAAE;iBACvC,CAAC,CAAA;gBACF,IAAI,CAAC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;oBAC7B,YAAY,GAAG,oBAAoB,KAAK,sBAAsB,SAAS,GAAG,CAAA;gBAC5E,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,EAAE,EAAE,CAAA;YACd,MAAM,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;YAC/D,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,CAAC;gBACR,SAAS;gBACT,MAAM,EAAE,KAAK;gBACb,MAAM,EAAE,oBAAoB,MAAM,EAAE;aACrC,CAAC,CAAA;YACF,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,YAAY,GAAG,oBAAoB,MAAM,EAAE,CAAA;YAC7C,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAC9C,OAAO;QACL,MAAM,EAAE,SAAS;QACjB,cAAc,EAAE,YAAY;QAC5B,OAAO;KACR,CAAA;AACH,CAAC;AAED,kEAAkE;AAClE,SAAS,eAAe,CAAC,MAAe;IACtC,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,CAAA;IACtD,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAA;IAC7C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;AAC/B,CAAC"}
@@ -0,0 +1,48 @@
1
+ import type { TestMeasurement } from './measurement.js';
2
+ import type { BenchmarkResult } from './benchmark.js';
3
+ export interface EdTestRunOptions {
4
+ cwd?: string;
5
+ filter?: string;
6
+ failFast?: boolean;
7
+ noUpload?: boolean;
8
+ reporter?: 'default' | 'json' | 'junit';
9
+ /** Number of times to run each test. Passes if any run succeeds. Defaults to 1. */
10
+ runs?: number;
11
+ }
12
+ export interface EdSingleRunResult {
13
+ status: 'pass' | 'fail';
14
+ failureReason?: string;
15
+ measurement?: TestMeasurement;
16
+ benchmarkResult?: BenchmarkResult;
17
+ output?: unknown;
18
+ durationMs: number;
19
+ startedAt: string;
20
+ finishedAt: string;
21
+ }
22
+ export interface EdTestResult {
23
+ testId: string;
24
+ testName: string;
25
+ status: 'pass' | 'fail';
26
+ failureReason?: string;
27
+ measurement?: TestMeasurement;
28
+ benchmarkResult?: BenchmarkResult;
29
+ traceRef?: string;
30
+ target?: {
31
+ type: string;
32
+ step_id: string;
33
+ };
34
+ input?: unknown;
35
+ output?: unknown;
36
+ durationMs: number;
37
+ /** All individual run results when --runs > 1 */
38
+ singleRuns?: EdSingleRunResult[];
39
+ }
40
+ export interface EdTestRunResult {
41
+ runId: string;
42
+ startedAt: string;
43
+ finishedAt: string;
44
+ results: EdTestResult[];
45
+ sdkVersion: string;
46
+ }
47
+ export declare function runEdTests(options?: EdTestRunOptions): Promise<EdTestRunResult>;
48
+ //# sourceMappingURL=ed-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ed-runner.d.ts","sourceRoot":"","sources":["../../src/ci/ed-runner.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AACvD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAOrD,MAAM,WAAW,gBAAgB;IAC/B,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,QAAQ,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,CAAA;IACvC,mFAAmF;IACnF,IAAI,CAAC,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAA;IACvB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,WAAW,CAAC,EAAE,eAAe,CAAA;IAC7B,eAAe,CAAC,EAAE,eAAe,CAAA;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,MAAM,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAA;IAC1C,KAAK,CAAC,EAAE,OAAO,CAAA;IACf,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,iDAAiD;IACjD,UAAU,CAAC,EAAE,iBAAiB,EAAE,CAAA;CACjC;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;IAClB,OAAO,EAAE,YAAY,EAAE,CAAA;IACvB,UAAU,EAAE,MAAM,CAAA;CACnB;AAID,wBAAsB,UAAU,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,eAAe,CAAC,CAyGrF"}