elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,62 @@
1
+ import { AsyncLocalStorage } from 'node:async_hooks'
2
+ import { randomUUID } from 'node:crypto'
3
+ import type { WorkflowEvent, WorkflowTrace } from './event.js'
4
+ import type { ReplayController } from './replay.js'
5
+
6
+ export class TraceRecorder {
7
+ events: WorkflowEvent[] = []
8
+ private _counter = 0
9
+ private _sideEffectCounter = 0
10
+ private _pending: Set<Promise<void>> = new Set()
11
+
12
+ record(event: WorkflowEvent): void {
13
+ this.events.push(event)
14
+ }
15
+
16
+ /** Register an in-flight async recording promise so flush() can await it. */
17
+ trackAsync(promise: Promise<void>): void {
18
+ this._pending.add(promise)
19
+ promise.finally(() => { this._pending.delete(promise) })
20
+ }
21
+
22
+ /** Await all in-flight async recordings. No-op when none are pending. */
23
+ async flush(): Promise<void> {
24
+ await Promise.allSettled([...this._pending])
25
+ }
26
+
27
+ nextId(): number {
28
+ return ++this._counter
29
+ }
30
+
31
+ /** Separate counter for Date.now / Math.random — never shares IDs with main events. */
32
+ nextSideEffectId(): number {
33
+ return ++this._sideEffectCounter
34
+ }
35
+
36
+ toTrace(traceId?: string): WorkflowTrace {
37
+ return {
38
+ traceId: traceId ?? randomUUID(),
39
+ events: [...this.events],
40
+ }
41
+ }
42
+ }
43
+
44
+ export interface CaptureContext {
45
+ recorder: TraceRecorder
46
+ replay: ReplayController
47
+ }
48
+
49
+ const g = globalThis as Record<string, unknown>
50
+ const CAPTURE_ALS_KEY = '__elasticdash_capture_als__'
51
+ const captureAls: AsyncLocalStorage<CaptureContext | undefined> =
52
+ (g[CAPTURE_ALS_KEY] as AsyncLocalStorage<CaptureContext | undefined>) ??
53
+ new AsyncLocalStorage<CaptureContext | undefined>()
54
+ if (!g[CAPTURE_ALS_KEY]) g[CAPTURE_ALS_KEY] = captureAls
55
+
56
+ export function setCaptureContext(ctx: CaptureContext | undefined): void {
57
+ captureAls.enterWith(ctx)
58
+ }
59
+
60
+ export function getCaptureContext(): CaptureContext | undefined {
61
+ return captureAls.getStore()
62
+ }
@@ -0,0 +1,55 @@
1
+ import type { WorkflowEvent } from './event.js'
2
+
3
+ export class ReplayController {
4
+ private historyMap: Map<number, WorkflowEvent>
5
+ /** Side effects keyed by their assigned sideEffectId, independent of main event IDs */
6
+ private sideEffectMap: Map<number, WorkflowEvent>
7
+
8
+ constructor(
9
+ public replayMode: boolean,
10
+ public checkpoint: number,
11
+ public history: WorkflowEvent[],
12
+ ) {
13
+ this.historyMap = new Map(history.map(e => [e.id, e]))
14
+ this.sideEffectMap = new Map(
15
+ history.filter(e => e.type === 'side_effect').map(e => [e.id, e]),
16
+ )
17
+ }
18
+
19
+ shouldReplay(eventId: number): boolean {
20
+ return this.replayMode && eventId <= this.checkpoint
21
+ }
22
+
23
+ getRecordedEvent(eventId: number): WorkflowEvent | undefined {
24
+ return this.historyMap.get(eventId)
25
+ }
26
+
27
+ getRecordedResult(eventId: number): unknown {
28
+ return this.historyMap.get(eventId)?.output
29
+ }
30
+
31
+ /** Returns true if the side effect with this sideEffectId has a recorded value to replay */
32
+ shouldReplaySideEffect(n: number): boolean {
33
+ return this.replayMode && this.sideEffectMap.has(n)
34
+ }
35
+
36
+ getSideEffectResult(n: number): unknown {
37
+ return this.sideEffectMap.get(n)?.output
38
+ }
39
+
40
+ getRecordedSideEffectEvent(n: number): WorkflowEvent | undefined {
41
+ return this.sideEffectMap.get(n)
42
+ }
43
+
44
+ shouldReplaySideEffectOfType(n: number, expectedName: string): boolean {
45
+ if (!this.replayMode) return false
46
+ const event = this.sideEffectMap.get(n)
47
+ return !!event && event.type === 'side_effect' && event.name === expectedName
48
+ }
49
+
50
+ getSideEffectResultOfType(n: number, expectedName: string): unknown {
51
+ const event = this.sideEffectMap.get(n)
52
+ if (!event || event.type !== 'side_effect' || event.name !== expectedName) return undefined
53
+ return event.output
54
+ }
55
+ }
@@ -0,0 +1,136 @@
1
+ import { randomUUID } from 'node:crypto'
2
+ import { getOriginalFetch } from '../interceptors/http.js'
3
+ import type { APITestGroup } from './types.js'
4
+
5
+ // ─── API Client ──────────────────────────────────────────────
6
+ // Uses getOriginalFetch() to bypass SDK interceptors.
7
+
8
+ /** Normalize serverUrl: strip trailing slash and trailing /api to avoid double /api/api paths */
9
+ function normalizeBase(serverUrl: string): string {
10
+ return serverUrl.replace(/\/+$/, '').replace(/\/api$/, '')
11
+ }
12
+
13
+ function headers(apiKey: string): Record<string, string> {
14
+ return {
15
+ 'Content-Type': 'application/json',
16
+ 'api-key': apiKey || '',
17
+ 'X-Correlation-ID': randomUUID(),
18
+ }
19
+ }
20
+
21
+ async function apiRequest<T>(
22
+ url: string,
23
+ apiKey: string,
24
+ options: RequestInit = {},
25
+ ): Promise<T> {
26
+ const method = (options.method || 'GET').toUpperCase()
27
+ console.log(`[elasticdash ci] ${method} ${url}`)
28
+
29
+ const res = await getOriginalFetch()(url, {
30
+ ...options,
31
+ headers: { ...headers(apiKey), ...(options.headers as Record<string, string> ?? {}) },
32
+ })
33
+
34
+ if (!res.ok) {
35
+ const text = await res.text().catch(() => '')
36
+ console.log(`[elasticdash ci] ${method} ${url} → ${res.status} ${text.substring(0, 200)}`)
37
+ throw new Error(`API ${res.status}: ${text || res.statusText}`)
38
+ }
39
+
40
+ const json = await res.json() as Record<string, unknown>
41
+ // Backend wraps responses in generalApiResponseSender which sends { result: ... }
42
+ // Use 'in' check instead of nullish coalescing — result:null is valid data, not "missing"
43
+ const result = 'result' in json ? json.result : ('data' in json ? json.data : json)
44
+ console.log(`[elasticdash ci] ${method} ${url} → ${res.status} (result keys: ${result && typeof result === 'object' ? Object.keys(result).join(',') : typeof result})`)
45
+ return result as T
46
+ }
47
+
48
+ /**
49
+ * Fetch all active test groups (with nested tests & expectations) for the project
50
+ * scoped to the API key.
51
+ */
52
+ export async function fetchTestGroups(
53
+ serverUrl: string,
54
+ apiKey: string,
55
+ filters?: { workflowName?: string; tags?: string[]; status?: string },
56
+ ): Promise<APITestGroup[]> {
57
+ const base = normalizeBase(serverUrl)
58
+ const params = new URLSearchParams()
59
+ if (filters?.workflowName) params.set('workflowName', filters.workflowName)
60
+ if (filters?.tags?.length) params.set('tags', filters.tags.join(','))
61
+ if (filters?.status) params.set('status', filters.status)
62
+
63
+ const qs = params.toString()
64
+ const url = `${base}/api/testgroups/by-project${qs ? `?${qs}` : ''}`
65
+ return apiRequest<APITestGroup[]>(url, apiKey)
66
+ }
67
+
68
+ /**
69
+ * Submit a test run result to the backend.
70
+ */
71
+ export async function submitTestRun(
72
+ serverUrl: string,
73
+ apiKey: string,
74
+ testGroupId: number,
75
+ payload: Record<string, unknown>,
76
+ ): Promise<{ id: number }> {
77
+ const base = normalizeBase(serverUrl)
78
+ const url = `${base}/api/testgroups/${testGroupId}/runs`
79
+ return apiRequest<{ id: number }>(url, apiKey, {
80
+ method: 'POST',
81
+ body: JSON.stringify(payload),
82
+ })
83
+ }
84
+
85
+ /**
86
+ * Create a batch grouping multiple test run IDs.
87
+ */
88
+ export async function createBatch(
89
+ serverUrl: string,
90
+ apiKey: string,
91
+ payload: Record<string, unknown>,
92
+ ): Promise<{ id: number }> {
93
+ const base = normalizeBase(serverUrl)
94
+ const url = `${base}/api/testgroups/batches`
95
+ return apiRequest<{ id: number }>(url, apiKey, {
96
+ method: 'POST',
97
+ body: JSON.stringify(payload),
98
+ })
99
+ }
100
+
101
+ /**
102
+ * Resolved evaluator configuration from the backend.
103
+ * Provider/model/apiKey may be null if the user has not configured an evaluator.
104
+ */
105
+ export interface EvaluatorConfig {
106
+ provider: string | null
107
+ model: string | null
108
+ apiKey: string | null
109
+ }
110
+
111
+ /** Maps backend llmProviderId to provider name. */
112
+ const LLM_PROVIDER_MAP: Record<number, string> = {
113
+ 1: 'openai',
114
+ 2: 'gemini',
115
+ 3: 'anthropic',
116
+ 4: 'moonshot',
117
+ }
118
+
119
+ /**
120
+ * Fetch the user's default evaluator config.
121
+ * Used by ed-test llm_judge benchmarks when judge_provider/judge_model
122
+ * are not specified in the test definition.
123
+ */
124
+ export async function fetchEvaluatorConfig(
125
+ serverUrl: string,
126
+ apiKey: string,
127
+ ): Promise<EvaluatorConfig> {
128
+ const base = normalizeBase(serverUrl)
129
+ const url = `${base}/api/user/settings/llm/default-evaluator`
130
+ const raw = await apiRequest<{ llmProviderId?: number; model?: string }>(url, apiKey)
131
+ return {
132
+ provider: raw?.llmProviderId ? (LLM_PROVIDER_MAP[raw.llmProviderId] ?? null) : null,
133
+ model: raw?.model ?? null,
134
+ apiKey: null,
135
+ }
136
+ }
@@ -0,0 +1,257 @@
1
+ /**
2
+ * benchmark.ts
3
+ *
4
+ * Compares recorded trace measurements against user-defined benchmarks.
5
+ * Supports duration, token, output-contains, and LLM-as-a-judge evaluations.
6
+ *
7
+ * Generated/updated on 2026-04-20.
8
+ */
9
+
10
+ import { callProviderLLM } from '../matchers/index.js'
11
+ import { prepareOutputForJudge } from '../core/judge-utils.js'
12
+ import type { TestMeasurement } from './measurement.js'
13
+ import type { TestBenchmarks } from './test-registry.js'
14
+ import type { EvaluatorConfig } from './api-client.js'
15
+
16
+ export type MetricName = 'duration_ms' | 'tokens_total' | 'output_contains' | 'output_not_contains' | 'llm_judge'
17
+
18
+ export interface MetricResult {
19
+ name: MetricName
20
+ value: number
21
+ threshold: number
22
+ passed: boolean
23
+ detail?: string
24
+ }
25
+
26
+ export interface BenchmarkResult {
27
+ passed: boolean
28
+ failure_reason?: string
29
+ metrics: MetricResult[]
30
+ }
31
+
32
+ /** Maps backend provider names to SDK provider names used by callProviderLLM. */
33
+ const PROVIDER_NAME_MAP: Record<string, string> = {
34
+ anthropic: 'claude',
35
+ moonshot: 'kimi',
36
+ }
37
+
38
+ /** Default model for each provider, used when no explicit model is set or
39
+ * when the evaluator config model doesn't belong to the resolved provider. */
40
+ const DEFAULT_PROVIDER_MODELS: Record<string, string> = {
41
+ openai: 'gpt-4o',
42
+ claude: 'claude-sonnet-4-20250514',
43
+ gemini: 'gemini-2.0-flash',
44
+ grok: 'grok-3',
45
+ kimi: 'moonshot-v1-auto',
46
+ }
47
+
48
+ /** Known model prefixes per provider — used to check if a model belongs to a provider. */
49
+ const PROVIDER_MODEL_PREFIXES: Record<string, string[]> = {
50
+ openai: ['gpt-', 'o1-', 'o3-', 'o4-', 'chatgpt-', 'omni-'],
51
+ claude: ['claude-'],
52
+ gemini: ['gemini-'],
53
+ grok: ['grok-'],
54
+ kimi: ['moonshot-', 'kimi-'],
55
+ }
56
+
57
+ /** Check if a model name belongs to the given provider. */
58
+ function isModelForProvider(model: string, provider: string): boolean {
59
+ const prefixes = PROVIDER_MODEL_PREFIXES[provider]
60
+ if (!prefixes) return false
61
+ return prefixes.some(p => model.toLowerCase().startsWith(p))
62
+ }
63
+
64
+ /** Normalize provider name from backend format to SDK format. */
65
+ function normalizeSdkProvider(provider: string): string {
66
+ return PROVIDER_NAME_MAP[provider] ?? provider
67
+ }
68
+
69
+ /**
70
+ * Compare a measurement against benchmarks. Async because llm_judge requires
71
+ * an LLM call. The step's output is needed for output_contains/llm_judge checks.
72
+ *
73
+ * @param evaluatorConfig - Optional backend evaluator config used as fallback
74
+ * when the test does not specify judge_provider/judge_model.
75
+ */
76
+ export async function compareBenchmarks(
77
+ measurement: TestMeasurement,
78
+ benchmarks: TestBenchmarks,
79
+ stepOutput?: unknown,
80
+ evaluatorConfig?: EvaluatorConfig | null,
81
+ ): Promise<BenchmarkResult> {
82
+ const metrics: MetricResult[] = []
83
+ let firstFailure: string | undefined
84
+
85
+ if (benchmarks.max_duration_ms !== undefined) {
86
+ const passed = measurement.duration_ms <= benchmarks.max_duration_ms
87
+ metrics.push({
88
+ name: 'duration_ms',
89
+ value: measurement.duration_ms,
90
+ threshold: benchmarks.max_duration_ms,
91
+ passed,
92
+ })
93
+ if (!passed && !firstFailure) {
94
+ firstFailure = `duration_ms (${measurement.duration_ms}) exceeded max threshold (${benchmarks.max_duration_ms})`
95
+ }
96
+ }
97
+
98
+ if (benchmarks.max_tokens_total !== undefined) {
99
+ const value = measurement.tokens_total ?? 0
100
+ const passed = value <= benchmarks.max_tokens_total
101
+ metrics.push({
102
+ name: 'tokens_total',
103
+ value,
104
+ threshold: benchmarks.max_tokens_total,
105
+ passed,
106
+ })
107
+ if (!passed && !firstFailure) {
108
+ firstFailure = `tokens_total (${value}) exceeded max threshold (${benchmarks.max_tokens_total})`
109
+ }
110
+ }
111
+
112
+ // ─── Output contains ────────────────────────────────────────
113
+ if (benchmarks.output_contains !== undefined) {
114
+ const outputStr = stringifyOutput(stepOutput)
115
+ const passed = outputStr.toLowerCase().includes(benchmarks.output_contains.toLowerCase())
116
+ metrics.push({
117
+ name: 'output_contains',
118
+ value: passed ? 1 : 0,
119
+ threshold: 1,
120
+ passed,
121
+ detail: passed ? undefined : `output does not contain "${benchmarks.output_contains}"`,
122
+ })
123
+ if (!passed && !firstFailure) {
124
+ firstFailure = `output does not contain "${benchmarks.output_contains}"`
125
+ }
126
+ }
127
+
128
+ // ─── Output not contains ────────────────────────────────────
129
+ if (benchmarks.output_not_contains !== undefined) {
130
+ const outputStr = stringifyOutput(stepOutput)
131
+ const passed = !outputStr.toLowerCase().includes(benchmarks.output_not_contains.toLowerCase())
132
+ metrics.push({
133
+ name: 'output_not_contains',
134
+ value: passed ? 1 : 0,
135
+ threshold: 1,
136
+ passed,
137
+ detail: passed ? undefined : `output unexpectedly contains "${benchmarks.output_not_contains}"`,
138
+ })
139
+ if (!passed && !firstFailure) {
140
+ firstFailure = `output unexpectedly contains "${benchmarks.output_not_contains}"`
141
+ }
142
+ }
143
+
144
+ // ─── LLM-as-a-judge ────────────────────────────────────────
145
+ if (benchmarks.llm_judge) {
146
+ const judge = benchmarks.llm_judge
147
+ const outputStr = stringifyOutput(stepOutput)
148
+ const threshold = judge.judge_score_threshold ?? 7
149
+
150
+ // Resolve provider/model: test definition takes priority, then backend
151
+ // evaluator config, then fall back to 'openai' default.
152
+ const resolvedProvider = normalizeSdkProvider(
153
+ judge.judge_provider ?? evaluatorConfig?.provider ?? 'openai'
154
+ )
155
+ // Model resolution: judge_model > evaluatorConfig.model (if compatible) > provider default
156
+ let resolvedModel = judge.judge_model ?? undefined
157
+ if (!resolvedModel && evaluatorConfig?.model) {
158
+ // Only use the evaluator config model if it belongs to the resolved provider
159
+ if (isModelForProvider(evaluatorConfig.model, resolvedProvider)) {
160
+ resolvedModel = evaluatorConfig.model
161
+ }
162
+ }
163
+ if (!resolvedModel) {
164
+ resolvedModel = DEFAULT_PROVIDER_MODELS[resolvedProvider]
165
+ }
166
+
167
+ // If the backend provided an API key and we're using its provider,
168
+ // set it in the environment so callProviderLLM can pick it up.
169
+ const envKeyMap: Record<string, string> = {
170
+ openai: 'OPENAI_API_KEY',
171
+ claude: 'ANTHROPIC_API_KEY',
172
+ gemini: 'GEMINI_API_KEY',
173
+ grok: 'GROK_API_KEY',
174
+ kimi: 'KIMI_API_KEY',
175
+ }
176
+ const envKey = envKeyMap[resolvedProvider]
177
+ let restoreEnv: (() => void) | undefined
178
+ if (evaluatorConfig?.apiKey && envKey && !judge.judge_provider && !process.env[envKey]) {
179
+ const prev = process.env[envKey]
180
+ process.env[envKey] = evaluatorConfig.apiKey
181
+ restoreEnv = () => {
182
+ if (prev === undefined) delete process.env[envKey]
183
+ else process.env[envKey] = prev
184
+ }
185
+ }
186
+
187
+ try {
188
+ console.log(` [llm_judge] provider=${resolvedProvider}, model=${resolvedModel ?? '(default)'}`)
189
+ const preparedOutput = prepareOutputForJudge(outputStr, judge.judge_prompt)
190
+ const evalPrompt = `${judge.judge_prompt}\n\n<output>\n${preparedOutput}\n</output>\n\nBased on the evaluation criteria above, score this output on a scale of 0-10. Respond with only the number.`
191
+
192
+ const result = await callProviderLLM(
193
+ evalPrompt,
194
+ { provider: resolvedProvider as 'openai' | 'claude' | 'gemini' | 'grok' | 'kimi', model: resolvedModel },
195
+ 'You are an expert test judge. Return only a number between 0 and 10.',
196
+ 4096,
197
+ 0,
198
+ )
199
+
200
+ restoreEnv?.()
201
+
202
+ console.log(` [llm_judge] raw response: "${result.content}"`)
203
+ const score = parseFloat(result.content.match(/-?\d+(?:\.\d+)?/)?.[0] ?? '')
204
+ if (isNaN(score)) {
205
+ metrics.push({
206
+ name: 'llm_judge',
207
+ value: 0,
208
+ threshold,
209
+ passed: false,
210
+ detail: `Could not parse score from LLM response: "${result.content}"`,
211
+ })
212
+ if (!firstFailure) {
213
+ firstFailure = `llm_judge: could not parse score from response`
214
+ }
215
+ } else {
216
+ const passed = score >= threshold
217
+ metrics.push({
218
+ name: 'llm_judge',
219
+ value: score,
220
+ threshold,
221
+ passed,
222
+ detail: `Score: ${score}/${threshold}`,
223
+ })
224
+ if (!passed && !firstFailure) {
225
+ firstFailure = `llm_judge score (${score}) below threshold (${threshold})`
226
+ }
227
+ }
228
+ } catch (err) {
229
+ restoreEnv?.()
230
+ const errMsg = err instanceof Error ? err.message : String(err)
231
+ metrics.push({
232
+ name: 'llm_judge',
233
+ value: 0,
234
+ threshold,
235
+ passed: false,
236
+ detail: `LLM judge error: ${errMsg}`,
237
+ })
238
+ if (!firstFailure) {
239
+ firstFailure = `llm_judge error: ${errMsg}`
240
+ }
241
+ }
242
+ }
243
+
244
+ const allPassed = metrics.every(m => m.passed)
245
+ return {
246
+ passed: allPassed,
247
+ failure_reason: firstFailure,
248
+ metrics,
249
+ }
250
+ }
251
+
252
+ /** Converts step output to a string for text-based assertions. */
253
+ function stringifyOutput(output: unknown): string {
254
+ if (output === null || output === undefined) return ''
255
+ if (typeof output === 'string') return output
256
+ return JSON.stringify(output)
257
+ }