elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,187 @@
1
+ import { readFile } from 'node:fs/promises'
2
+ import { resolve, dirname } from 'node:path'
3
+ import { pathToFileURL } from 'node:url'
4
+ import { discoverTestFiles } from './test-discovery.js'
5
+ import { getTestRegistry, clearTestRegistry } from './test-registry.js'
6
+ import type { TestDefinition } from './test-registry.js'
7
+ import type { DiskTrace } from './trace-schema.js'
8
+
9
+ export interface ValidatedTest extends TestDefinition {
10
+ resolvedTracePath: string
11
+ traceData: DiskTrace
12
+ }
13
+
14
+ export interface ValidationError {
15
+ file: string
16
+ testName?: string
17
+ message: string
18
+ }
19
+
20
+ export interface LoadTestsResult {
21
+ tests: ValidatedTest[]
22
+ errors: ValidationError[]
23
+ }
24
+
25
+ export async function loadTests(options?: { cwd?: string }): Promise<LoadTestsResult> {
26
+ const cwd = options?.cwd ?? process.cwd()
27
+ const files = await discoverTestFiles({ cwd })
28
+
29
+ const allDefinitions: TestDefinition[] = []
30
+ const errors: ValidationError[] = []
31
+
32
+ // Phase 1: import each test file and collect definitions
33
+ for (const file of files) {
34
+ clearTestRegistry()
35
+ try {
36
+ // Cache-bust so re-imports within the same process re-execute the module
37
+ const fileUrl = pathToFileURL(file).href + `?t=${Date.now()}`
38
+ await import(fileUrl)
39
+ } catch (err) {
40
+ errors.push({
41
+ file,
42
+ message: `Failed to import test file: ${err instanceof Error ? err.message : String(err)}`,
43
+ })
44
+ continue
45
+ }
46
+ const defs = getTestRegistry()
47
+ for (const def of defs) {
48
+ def._sourceFile = file
49
+ }
50
+ allDefinitions.push(...defs)
51
+ }
52
+ clearTestRegistry()
53
+
54
+ // Phase 2: validate definitions
55
+ const tests: ValidatedTest[] = []
56
+ const seenNames = new Map<string, string>() // name → source file
57
+
58
+ for (const def of allDefinitions) {
59
+ const file = def._sourceFile!
60
+
61
+ // Validate name
62
+ if (!def.name || typeof def.name !== 'string' || def.name.trim().length === 0) {
63
+ errors.push({ file, testName: def.name, message: 'Test name must be a non-empty string' })
64
+ continue
65
+ }
66
+
67
+ // Check uniqueness
68
+ if (seenNames.has(def.name)) {
69
+ errors.push({
70
+ file,
71
+ testName: def.name,
72
+ message: `Duplicate test name "${def.name}" (also defined in ${seenNames.get(def.name)})`,
73
+ })
74
+ continue
75
+ }
76
+ seenNames.set(def.name, file)
77
+
78
+ // Resolve trace path relative to the test file's directory
79
+ const resolvedTracePath = resolve(dirname(file), def.trace)
80
+
81
+ // Read and parse trace file
82
+ let traceData: DiskTrace
83
+ try {
84
+ const raw = await readFile(resolvedTracePath, 'utf-8')
85
+ traceData = JSON.parse(raw) as DiskTrace
86
+ } catch (err) {
87
+ errors.push({
88
+ file,
89
+ testName: def.name,
90
+ message: `Cannot read/parse trace file "${def.trace}": ${err instanceof Error ? err.message : String(err)}`,
91
+ })
92
+ continue
93
+ }
94
+
95
+ // Validate trace has steps array
96
+ if (!Array.isArray(traceData.steps)) {
97
+ errors.push({ file, testName: def.name, message: `Trace file "${def.trace}" has no steps array` })
98
+ continue
99
+ }
100
+
101
+ // Validate target
102
+ if (!def.target || !def.target.step_id || !def.target.type) {
103
+ errors.push({ file, testName: def.name, message: 'Target must have type and step_id' })
104
+ continue
105
+ }
106
+
107
+ const matchedStep = traceData.steps.find(s => s.step_id === def.target.step_id)
108
+ if (!matchedStep) {
109
+ errors.push({
110
+ file,
111
+ testName: def.name,
112
+ message: `step_id "${def.target.step_id}" not found in trace "${def.trace}"`,
113
+ })
114
+ continue
115
+ }
116
+
117
+ if (matchedStep.type !== def.target.type) {
118
+ errors.push({
119
+ file,
120
+ testName: def.name,
121
+ message: `target.type "${def.target.type}" does not match step type "${matchedStep.type}" for step_id "${def.target.step_id}"`,
122
+ })
123
+ continue
124
+ }
125
+
126
+ // Validate benchmarks
127
+ if (!def.benchmarks || typeof def.benchmarks !== 'object') {
128
+ errors.push({ file, testName: def.name, message: 'Benchmarks must be specified' })
129
+ continue
130
+ }
131
+
132
+ const hasMaxDuration = def.benchmarks.max_duration_ms !== undefined
133
+ const hasMaxTokens = def.benchmarks.max_tokens_total !== undefined
134
+ const hasOutputContains = def.benchmarks.output_contains !== undefined
135
+ const hasOutputNotContains = def.benchmarks.output_not_contains !== undefined
136
+ const hasLLMJudge = def.benchmarks.llm_judge !== undefined
137
+
138
+ if (!hasMaxDuration && !hasMaxTokens && !hasOutputContains && !hasOutputNotContains && !hasLLMJudge) {
139
+ errors.push({ file, testName: def.name, message: 'At least one benchmark must be specified' })
140
+ continue
141
+ }
142
+
143
+ let valid = true
144
+
145
+ if (hasMaxDuration) {
146
+ if (typeof def.benchmarks.max_duration_ms !== 'number' || def.benchmarks.max_duration_ms <= 0) {
147
+ errors.push({ file, testName: def.name, message: 'max_duration_ms must be a positive number' })
148
+ valid = false
149
+ }
150
+ }
151
+
152
+ if (hasMaxTokens) {
153
+ if (typeof def.benchmarks.max_tokens_total !== 'number' || def.benchmarks.max_tokens_total <= 0) {
154
+ errors.push({ file, testName: def.name, message: 'max_tokens_total must be a positive number' })
155
+ valid = false
156
+ }
157
+ if (def.target.type !== 'ai_call') {
158
+ errors.push({ file, testName: def.name, message: 'max_tokens_total can only be used with ai_call targets' })
159
+ valid = false
160
+ }
161
+ }
162
+
163
+ if (!valid) continue
164
+
165
+ // Validate run (optional but must be a function if present)
166
+ if (def.run !== undefined && typeof def.run !== 'function') {
167
+ errors.push({ file, testName: def.name, message: 'run must be a function' })
168
+ continue
169
+ }
170
+
171
+ // Validate timeout_ms (optional but must be a positive number if present)
172
+ if (def.timeout_ms !== undefined) {
173
+ if (typeof def.timeout_ms !== 'number' || def.timeout_ms <= 0) {
174
+ errors.push({ file, testName: def.name, message: 'timeout_ms must be a positive number' })
175
+ continue
176
+ }
177
+ }
178
+
179
+ tests.push({
180
+ ...def,
181
+ resolvedTracePath,
182
+ traceData,
183
+ })
184
+ }
185
+
186
+ return { tests, errors }
187
+ }
@@ -0,0 +1,62 @@
1
+ export interface TestTarget {
2
+ type: 'tool_call' | 'ai_call'
3
+ step_id: string
4
+ }
5
+
6
+ export interface LLMJudgeBenchmark {
7
+ /** The prompt sent to the LLM judge to evaluate the step's output. */
8
+ judge_prompt: string
9
+ /** Minimum score (0-10) required to pass. Defaults to 7. */
10
+ judge_score_threshold?: number
11
+ /** LLM provider to use for judging. Defaults to 'openai'. */
12
+ judge_provider?: 'openai' | 'claude' | 'gemini' | 'grok' | 'kimi'
13
+ /** Model override for the judge LLM. */
14
+ judge_model?: string
15
+ }
16
+
17
+ export interface TestBenchmarks {
18
+ max_duration_ms?: number
19
+ max_tokens_total?: number
20
+ /** Assert that the step's output contains this substring. */
21
+ output_contains?: string
22
+ /** Assert that the step's output does NOT contain this substring. */
23
+ output_not_contains?: string
24
+ /** LLM-as-a-judge evaluation of the step's output quality. */
25
+ llm_judge?: LLMJudgeBenchmark
26
+ }
27
+
28
+ export interface TestDefinition {
29
+ name: string
30
+ trace: string
31
+ target: TestTarget
32
+ benchmarks: TestBenchmarks
33
+ /** Custom input that overrides the trace's recorded input. Can be a static value or an async function for dynamic resolution (e.g. fetching from a database or API). */
34
+ input?: unknown | (() => Promise<unknown> | unknown)
35
+ /** The function that invokes the workflow under test. Receives the resolved input (custom or from trace) as its argument. Required for execution (Phase 3). */
36
+ run?: (input?: unknown) => Promise<void>
37
+ /** Per-test timeout in milliseconds. Defaults to 60000. */
38
+ timeout_ms?: number
39
+ /** Set internally by the loader to the absolute path of the source file */
40
+ _sourceFile?: string
41
+ }
42
+
43
+ // Use a globalThis-backed registry so that the CLI (global install) and the
44
+ // test file (local node_modules) share the same array even when they resolve
45
+ // to different module instances.
46
+ const g = globalThis as Record<string, unknown>
47
+ const ED_TEST_REGISTRY_KEY = '__elasticdash_ed_test_registry__'
48
+ const registry: TestDefinition[] =
49
+ (g[ED_TEST_REGISTRY_KEY] as TestDefinition[]) ?? []
50
+ if (!g[ED_TEST_REGISTRY_KEY]) g[ED_TEST_REGISTRY_KEY] = registry
51
+
52
+ export function defineTest(def: Omit<TestDefinition, '_sourceFile'>): void {
53
+ registry.push({ ...def })
54
+ }
55
+
56
+ export function getTestRegistry(): TestDefinition[] {
57
+ return [...registry]
58
+ }
59
+
60
+ export function clearTestRegistry(): void {
61
+ registry.length = 0
62
+ }
@@ -0,0 +1,96 @@
1
+ import { readFileSync } from 'node:fs'
2
+ import { join, dirname } from 'node:path'
3
+ import { fileURLToPath } from 'node:url'
4
+ import type { WorkflowEvent } from '../capture/event.js'
5
+
6
+ export let SDK_VERSION = 'unknown'
7
+ try {
8
+ // Works in both ESM (import.meta.url) and CJS (__dirname) contexts
9
+ const base = typeof __dirname !== 'undefined' ? __dirname : dirname(fileURLToPath(import.meta.url))
10
+ const pkg = JSON.parse(readFileSync(join(base, '..', '..', 'package.json'), 'utf-8'))
11
+ SDK_VERSION = pkg.version
12
+ } catch {
13
+ // Bundled context — version not critical
14
+ }
15
+
16
+ export interface DiskTraceStep {
17
+ step_id: string
18
+ type: 'tool_call' | 'ai_call'
19
+ name: string
20
+ input: unknown
21
+ output: unknown
22
+ started_at: string
23
+ ended_at: string
24
+ duration_ms: number
25
+ tokens?: { input: number; output: number; total: number } | null
26
+ }
27
+
28
+ export interface DiskTrace {
29
+ trace_id: string
30
+ created_at: string
31
+ sdk_version: string
32
+ workflow: {
33
+ name: string
34
+ input: unknown
35
+ output: unknown
36
+ }
37
+ steps: DiskTraceStep[]
38
+ }
39
+
40
+ function mapEventType(type: string): 'tool_call' | 'ai_call' | null {
41
+ if (type === 'tool') return 'tool_call'
42
+ if (type === 'ai') return 'ai_call'
43
+ return null
44
+ }
45
+
46
+ export function workflowEventsToDiskTrace(events: WorkflowEvent[], traceId: string): DiskTrace {
47
+ const workflowEvent = events.find(e => e.type === 'workflow')
48
+
49
+ const counters: Record<string, number> = { tool_call: 0, ai_call: 0 }
50
+ const steps: DiskTraceStep[] = []
51
+
52
+ for (const event of events) {
53
+ const mappedType = mapEventType(event.type)
54
+ if (!mappedType) continue
55
+
56
+ const index = counters[mappedType]++
57
+ const stepId = `${mappedType}_${index}`
58
+
59
+ const startedAt = new Date(event.timestamp).toISOString()
60
+ const endedAt = new Date(event.timestamp + event.durationMs).toISOString()
61
+
62
+ let tokens: DiskTraceStep['tokens'] = null
63
+ if (mappedType === 'ai_call' && event.usage) {
64
+ tokens = {
65
+ input: event.usage.inputTokens ?? 0,
66
+ output: event.usage.outputTokens ?? 0,
67
+ total: event.usage.totalTokens ?? ((event.usage.inputTokens ?? 0) + (event.usage.outputTokens ?? 0)),
68
+ }
69
+ }
70
+
71
+ steps.push({
72
+ step_id: stepId,
73
+ type: mappedType,
74
+ name: event.name,
75
+ input: event.input,
76
+ output: event.output,
77
+ started_at: startedAt,
78
+ ended_at: endedAt,
79
+ duration_ms: event.durationMs,
80
+ tokens,
81
+ })
82
+ }
83
+
84
+ const now = new Date()
85
+ return {
86
+ trace_id: traceId,
87
+ created_at: now.toISOString(),
88
+ sdk_version: SDK_VERSION,
89
+ workflow: {
90
+ name: workflowEvent?.name ?? 'unknown',
91
+ input: workflowEvent?.input ?? null,
92
+ output: workflowEvent?.output ?? null,
93
+ },
94
+ steps,
95
+ }
96
+ }
@@ -0,0 +1,107 @@
1
+ import { mkdir, writeFile } from 'node:fs/promises'
2
+ import { mkdirSync, writeFileSync } from 'node:fs'
3
+ import { join } from 'node:path'
4
+ import { randomBytes } from 'node:crypto'
5
+ import type { WorkflowEvent } from '../capture/event.js'
6
+ import { workflowEventsToDiskTrace } from './trace-schema.js'
7
+ import type { DiskTrace } from './trace-schema.js'
8
+
9
+ const TRACE_DIR = '.ed_traces'
10
+
11
+ const SENSITIVE_KEYS = /^(authorization|api_key|apikey|password|secret|token)$/i
12
+
13
+ export function isTraceCaptureEnabled(): boolean {
14
+ const val = process.env.ELASTICDASH_CAPTURE_TRACE
15
+ if (val === '1') return true
16
+ // Log once on first check so users can diagnose env var issues
17
+ if (!_captureCheckLogged) {
18
+ _captureCheckLogged = true
19
+ if (val !== undefined) {
20
+ console.log(`[elasticdash] ELASTICDASH_CAPTURE_TRACE=${JSON.stringify(val)} (expected "1" to enable)`)
21
+ }
22
+ }
23
+ return false
24
+ }
25
+ let _captureCheckLogged = false
26
+
27
+ export function generateTraceFilename(): string {
28
+ const now = new Date()
29
+ const ts = now.toISOString()
30
+ .replace(/\.\d{3}Z$/, '') // remove milliseconds
31
+ .replace(/:/g, '-') // filesystem-safe
32
+ const suffix = randomBytes(2).toString('hex') // 4 hex chars
33
+ return `${ts}_${suffix}.json`
34
+ }
35
+
36
+ export function scrubSecrets(obj: unknown): unknown {
37
+ if (obj === null || obj === undefined) return obj
38
+ if (Array.isArray(obj)) return obj.map(item => scrubSecrets(item))
39
+ if (typeof obj === 'object') {
40
+ const result: Record<string, unknown> = {}
41
+ for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
42
+ if (SENSITIVE_KEYS.test(key)) {
43
+ result[key] = '[REDACTED]'
44
+ } else {
45
+ result[key] = scrubSecrets(value)
46
+ }
47
+ }
48
+ return result
49
+ }
50
+ return obj
51
+ }
52
+
53
+ export async function writeTraceToDisk(trace: DiskTrace, cwd?: string): Promise<void> {
54
+ const dir = join(cwd ?? process.cwd(), TRACE_DIR)
55
+ try {
56
+ await mkdir(dir, { recursive: true })
57
+ const scrubbed = scrubSecrets(trace) as DiskTrace
58
+ const filename = `${scrubbed.trace_id}.json`
59
+ const filepath = join(dir, filename)
60
+ await writeFile(filepath, JSON.stringify(scrubbed, null, 2), 'utf-8')
61
+ console.log(`[elasticdash] Trace written: ${filepath} (${trace.steps.length} steps)`)
62
+ } catch (err) {
63
+ console.warn(`[elasticdash] Failed to write trace file: ${err instanceof Error ? err.message : String(err)}`)
64
+ }
65
+ }
66
+
67
+ /**
68
+ * Synchronous write for use in process.on('exit') handlers where async is not possible.
69
+ */
70
+ export function writeTraceToDiskSync(trace: DiskTrace, cwd?: string): void {
71
+ const dir = join(cwd ?? process.cwd(), TRACE_DIR)
72
+ try {
73
+ mkdirSync(dir, { recursive: true })
74
+ const scrubbed = scrubSecrets(trace) as DiskTrace
75
+ const filename = `${scrubbed.trace_id}.json`
76
+ const filepath = join(dir, filename)
77
+ writeFileSync(filepath, JSON.stringify(scrubbed, null, 2), 'utf-8')
78
+ } catch (err) {
79
+ // In sync exit handler, console.warn may not flush — best effort
80
+ try { console.warn(`[elasticdash] Failed to write trace file: ${err instanceof Error ? err.message : String(err)}`) } catch {}
81
+ }
82
+ }
83
+
84
+ export async function maybeCaptureTrace(events: WorkflowEvent[], traceId?: string, cwd?: string): Promise<void> {
85
+ if (!isTraceCaptureEnabled()) return
86
+ if (events.length === 0) return
87
+
88
+ const filename = generateTraceFilename()
89
+ const id = filename.replace(/\.json$/, '')
90
+ const trace = workflowEventsToDiskTrace(events, traceId ?? id)
91
+ trace.trace_id = id
92
+ await writeTraceToDisk(trace, cwd)
93
+ }
94
+
95
+ /**
96
+ * Synchronous version of maybeCaptureTrace for process exit handlers.
97
+ */
98
+ export function maybeCaptureTraceSync(events: WorkflowEvent[], traceId?: string, cwd?: string): void {
99
+ if (!isTraceCaptureEnabled()) return
100
+ if (events.length === 0) return
101
+
102
+ const filename = generateTraceFilename()
103
+ const id = filename.replace(/\.json$/, '')
104
+ const trace = workflowEventsToDiskTrace(events, traceId ?? id)
105
+ trace.trace_id = id
106
+ writeTraceToDiskSync(trace, cwd)
107
+ }
@@ -0,0 +1,115 @@
1
+ // ─── CI Runner Types ─────────────────────────────────────────
2
+
3
+ export interface CIRunConfig {
4
+ serverUrl: string
5
+ apiKey: string
6
+ workflowName?: string
7
+ tags?: string[]
8
+ triggeredBy?: 'ci' | 'api'
9
+ gitBranch?: string
10
+ gitCommit?: string
11
+ gitCommitMessage?: string
12
+ gitPrNumber?: number
13
+ gitPrUrl?: string
14
+ }
15
+
16
+ export interface CITestResult {
17
+ testGroupId: number
18
+ testGroupName: string
19
+ testId: number
20
+ testName: string | null
21
+ testType: string
22
+ passed: boolean
23
+ runId: number | null
24
+ singleRuns: CISingleRunResult[]
25
+ expectationResults: CIExpectationResult[]
26
+ error?: string
27
+ durationMs: number
28
+ }
29
+
30
+ export interface CISingleRunResult {
31
+ runIndex: number
32
+ passed: boolean
33
+ durationMs: number
34
+ inputTokens: number
35
+ outputTokens: number
36
+ totalTokens: number
37
+ output: unknown
38
+ trace: unknown
39
+ error?: string
40
+ }
41
+
42
+ export interface CIExpectationResult {
43
+ expectationId: number
44
+ type: string
45
+ passed: boolean
46
+ detail?: string
47
+ perRun?: Record<number, { passed: boolean; detail?: string }>
48
+ }
49
+
50
+ export interface CIRunSummary {
51
+ total: number
52
+ passed: number
53
+ failed: number
54
+ skipped: number
55
+ durationMs: number
56
+ batchId: number | null
57
+ results: CITestResult[]
58
+ }
59
+
60
+ // ─── API Response Types ─────────────────────────────────────
61
+
62
+ export interface APITestGroup {
63
+ id: number
64
+ name: string
65
+ description: string | null
66
+ project_id: number
67
+ workflow_name: string
68
+ trace_file: unknown
69
+ status: string
70
+ tags: string[]
71
+ tests: APITestGroupTest[]
72
+ }
73
+
74
+ export interface APITestGroupTest {
75
+ id: number
76
+ test_group_id: number
77
+ name: string | null
78
+ description: string | null
79
+ test_type: 'single-step' | 'full-flow'
80
+ target_step_index: number | null
81
+ target_step_type: string | null
82
+ target_step_name: string | null
83
+ mock_input: unknown
84
+ workflow_input: unknown
85
+ frozen_events: unknown[]
86
+ tool_mocks: Record<string, unknown>
87
+ prompt_mocks: Record<string, string>
88
+ run_count: number
89
+ pass_threshold: string
90
+ timeout_ms: number
91
+ sort_order: number
92
+ expectations: APIExpectation[]
93
+ }
94
+
95
+ export interface APIExpectation {
96
+ id: number
97
+ test_group_test_id: number
98
+ type: string
99
+ judge_prompt: string | null
100
+ judge_model: string | null
101
+ judge_provider: string | null
102
+ judge_score_threshold: number | null
103
+ max_total_tokens: number | null
104
+ max_tokens_per_run: number | null
105
+ max_duration_ms: number | null
106
+ max_total_duration_ms: number | null
107
+ contains_text: string | null
108
+ not_contains_text: string | null
109
+ case_insensitive: boolean
110
+ json_schema: unknown
111
+ similarity_threshold: number | null
112
+ required_tools: string[]
113
+ forbidden_tools: string[]
114
+ tool_call_rules: unknown
115
+ }