elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,156 @@
1
+ export interface LLMStep {
2
+ model: string
3
+ provider?: string // 'openai' | 'gemini' | 'grok' | undefined
4
+ prompt?: string
5
+ completion?: string
6
+ contains?: string
7
+ workflowEventId?: number
8
+ durationMs?: number
9
+ }
10
+
11
+ export interface ToolCall {
12
+ name: string
13
+ args?: Record<string, unknown>
14
+ result?: unknown
15
+ workflowEventId?: number
16
+ durationMs?: number
17
+ }
18
+
19
+ export type CustomStepKind = 'rag' | 'code' | 'fixed' | 'custom'
20
+
21
+ export interface CustomStep {
22
+ kind: CustomStepKind
23
+ name?: string
24
+ tags?: string[]
25
+ payload?: unknown
26
+ result?: unknown
27
+ metadata?: Record<string, unknown>
28
+ contains?: string
29
+ }
30
+
31
+ export interface TraceStep {
32
+ type: 'llm' | 'tool' | 'custom'
33
+ timestamp: number
34
+ durationMs: number
35
+ data: Record<string, unknown>
36
+ }
37
+
38
+ export interface TraceHandle {
39
+ /** All recorded steps in this trace session */
40
+ getSteps(): TraceStep[]
41
+ /** Only LLM inference steps */
42
+ getLLMSteps(): LLMStep[]
43
+ /** Only tool-call steps */
44
+ getToolCalls(): ToolCall[]
45
+ /** Only custom steps (RAG, code, fixed, etc.) */
46
+ getCustomSteps(): CustomStep[]
47
+ /** Record an LLM step (used by stubs / real adapter) */
48
+ recordLLMStep(step: LLMStep): void
49
+ /** Record a tool call (used by stubs / real adapter) */
50
+ recordToolCall(call: ToolCall): void
51
+ /** Record a custom step (e.g., RAG, code) */
52
+ recordCustomStep(step: CustomStep): void
53
+ }
54
+
55
+ export interface AITestContext {
56
+ trace: TraceHandle
57
+ }
58
+
59
+ // --- AsyncLocalStorage-backed current trace (parallel-safe) ---
60
+ import { AsyncLocalStorage } from 'node:async_hooks'
61
+ import { rawDateNow } from '../interceptors/side-effects.js'
62
+
63
+ const g = globalThis as Record<string, unknown>
64
+ const TRACE_ALS_KEY = '__elasticdash_trace_als__'
65
+ const traceAls: AsyncLocalStorage<TraceHandle | undefined> =
66
+ (g[TRACE_ALS_KEY] as AsyncLocalStorage<TraceHandle | undefined>) ??
67
+ new AsyncLocalStorage<TraceHandle | undefined>()
68
+ if (!g[TRACE_ALS_KEY]) g[TRACE_ALS_KEY] = traceAls
69
+
70
+ export function setCurrentTrace(trace: TraceHandle | undefined): void {
71
+ traceAls.enterWith(trace)
72
+ }
73
+
74
+ export function getCurrentTrace(): TraceHandle | undefined {
75
+ return traceAls.getStore()
76
+ }
77
+
78
+ /** Extension points for runner hooks (scaffold for future backend integration) */
79
+ export interface RunnerHooks {
80
+ onTestStart?(name: string): void | Promise<void>
81
+ onTestFinish?(name: string, passed: boolean, durationMs: number, error?: Error): void | Promise<void>
82
+ onTraceComplete?(name: string, trace: TraceHandle): void | Promise<void>
83
+ }
84
+
85
+ /**
86
+ * Create a stubbed trace handle for a single test execution.
87
+ * Later this can be replaced with a real ElasticDash backend call.
88
+ */
89
+ export function createTraceHandle(): TraceHandle {
90
+ const steps: TraceStep[] = []
91
+ const llmSteps: LLMStep[] = []
92
+ const toolCalls: ToolCall[] = []
93
+ const customSteps: CustomStep[] = []
94
+
95
+ return {
96
+ getSteps() {
97
+ return steps
98
+ },
99
+
100
+ getLLMSteps() {
101
+ return llmSteps
102
+ },
103
+
104
+ getToolCalls() {
105
+ return toolCalls
106
+ },
107
+
108
+ getCustomSteps() {
109
+ return customSteps
110
+ },
111
+
112
+ recordLLMStep(step: LLMStep) {
113
+ llmSteps.push(step)
114
+ steps.push({
115
+ type: 'llm',
116
+ timestamp: rawDateNow(),
117
+ durationMs: step.durationMs ?? 0,
118
+ data: step as unknown as Record<string, unknown>,
119
+ })
120
+ },
121
+
122
+ recordToolCall(call: ToolCall) {
123
+ toolCalls.push(call)
124
+ steps.push({
125
+ type: 'tool',
126
+ timestamp: rawDateNow(),
127
+ durationMs: call.durationMs ?? 0,
128
+ data: call as unknown as Record<string, unknown>,
129
+ })
130
+ },
131
+
132
+ recordCustomStep(step: CustomStep) {
133
+ customSteps.push(step)
134
+ steps.push({
135
+ type: 'custom',
136
+ timestamp: rawDateNow(),
137
+ durationMs: 0,
138
+ data: step as unknown as Record<string, unknown>,
139
+ })
140
+ },
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Start a trace session before a test and return the context + a finalise fn.
146
+ */
147
+ export function startTraceSession(): { context: AITestContext; finalise: () => void } {
148
+ const trace = createTraceHandle()
149
+ const context: AITestContext = { trace }
150
+ return {
151
+ context,
152
+ finalise() {
153
+ // Placeholder: flush / send to ElasticDash backend here in the future
154
+ },
155
+ }
156
+ }
package/src/tracing.ts ADDED
@@ -0,0 +1,62 @@
1
+ // src/tracing.ts
2
+ // ElasticDash tool call recording utility
3
+
4
+ /**
5
+ * Records a tool call for workflow tracing. Safe to call in any environment.
6
+ * If not running inside the ElasticDash runner, this is a no-op.
7
+ *
8
+ * @param name - The tool name
9
+ * @param args - The tool arguments (object or array)
10
+ * @param result - The tool result (or error)
11
+ */
12
+ import { getCurrentTrace } from './trace-adapter/context.js'
13
+ import { getCaptureContext } from './capture/recorder.js'
14
+ import { rawDateNow } from './interceptors/side-effects.js'
15
+
16
+ const TOOL_WRAPPER_ACTIVE_KEY = '__elasticdash_tool_wrapper_active__'
17
+
18
+ function wrapperRecordingActive(): boolean {
19
+ return (globalThis as Record<string, unknown>)[TOOL_WRAPPER_ACTIVE_KEY] === true
20
+ }
21
+
22
+ export function recordToolCall(name: string, args: any, result: any, durationMs = 0) {
23
+ if (!(globalThis as any).__ELASTICDASH_WORKER__) return
24
+ try {
25
+ // Avoid double-recording when a replay-aware tool wrapper is already active.
26
+ if (wrapperRecordingActive()) return
27
+
28
+ const trace = getCurrentTrace()
29
+ if (!trace || typeof trace.recordToolCall !== 'function') return
30
+
31
+ const ctx = getCaptureContext()
32
+ if (!ctx) {
33
+ trace.recordToolCall({ name, args, result, durationMs })
34
+ return
35
+ }
36
+
37
+ const { recorder, replay } = ctx
38
+ const id = recorder.nextId()
39
+
40
+ if (replay.shouldReplay(id)) {
41
+ const historical = replay.getRecordedEvent(id)
42
+ if (historical) recorder.record(historical)
43
+ const replayed = replay.getRecordedResult(id)
44
+ trace.recordToolCall({ name, args, result: replayed, workflowEventId: id })
45
+ return
46
+ }
47
+
48
+ const output = result instanceof Error ? { error: String(result) } : result
49
+ recorder.record({
50
+ id,
51
+ type: 'tool',
52
+ name,
53
+ input: args,
54
+ output,
55
+ timestamp: rawDateNow(),
56
+ durationMs,
57
+ })
58
+ trace.recordToolCall({ name, args, result: output, workflowEventId: id, durationMs })
59
+ } catch {
60
+ // Never throw, always swallow errors
61
+ }
62
+ }
@@ -0,0 +1,171 @@
1
+ import type { TriggerSignal, FrozenEvent } from './telemetry-batcher.js'
2
+ import { executePortalTask, checkToolAvailability, checkAIAvailability } from './portal-executor.js'
3
+ import { getOriginalFetch } from './interceptors/http.js'
4
+ import { getObservabilityContext } from './interceptors/telemetry-push.js'
5
+ import { scanTools } from './execution/tool-runner.js'
6
+ import { debugLog } from './utils/debug.js'
7
+
8
+ /** Track trigger IDs that are currently executing or already completed to prevent duplicate execution. */
9
+ const handledTriggers = new Set<number>()
10
+
11
+ interface StepRunResult {
12
+ runIndex: number
13
+ input: unknown
14
+ output: unknown
15
+ durationMs: number
16
+ error?: string
17
+ usageInputTokens?: number
18
+ usageOutputTokens?: number
19
+ usageTotalTokens?: number
20
+ }
21
+
22
+ interface StepResult {
23
+ originalEventDbId: number
24
+ eventType: string
25
+ eventName: string
26
+ available: boolean
27
+ unavailableReason?: string
28
+ runs: StepRunResult[]
29
+ }
30
+
31
+ /**
32
+ * Executes a trigger received from the backend's event batch response.
33
+ *
34
+ * For each step:
35
+ * 1. Pre-validates availability (tool exists? API key set?)
36
+ * 2. If unavailable: reports `available: false` with reason, skips execution
37
+ * 3. If available: re-executes `runCount` times, collects results
38
+ * 4. POSTs each step's result individually to avoid payload size limits
39
+ */
40
+ export async function executeTrigger(
41
+ serverUrl: string,
42
+ apiKey: string | undefined,
43
+ trigger: TriggerSignal,
44
+ ): Promise<void> {
45
+ // Feature flag: allow users to disable rerun acceptance per project
46
+ const acceptReruns = process.env.ELASTICDASH_ACCEPT_RERUNS
47
+ if (acceptReruns !== undefined && ['false', '0', 'no'].includes(acceptReruns.toLowerCase())) {
48
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} rejected: ELASTICDASH_ACCEPT_RERUNS=${acceptReruns}`)
49
+ return
50
+ }
51
+
52
+ // Dedup guard: skip if this trigger is already being handled
53
+ if (handledTriggers.has(trigger.triggerId)) {
54
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} already handled, skipping duplicate`)
55
+ return
56
+ }
57
+ handledTriggers.add(trigger.triggerId)
58
+
59
+ // Mark observability context as rerun so captured events are flagged
60
+ const obsCtx = getObservabilityContext()
61
+ if (obsCtx) obsCtx.isRerun = true
62
+
63
+ const cwd = process.cwd()
64
+ const tools = scanTools(cwd)
65
+ const frozenEvents = trigger.frozenEvents || []
66
+ const totalSteps = trigger.steps.length
67
+
68
+ debugLog(`[elasticdash] Executing trigger ${trigger.triggerId}: ${totalSteps} steps × ${trigger.runCount} runs, ${frozenEvents.length} frozenEvents`)
69
+
70
+ if (!serverUrl) {
71
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} aborted: serverUrl is empty`)
72
+ if (obsCtx) obsCtx.isRerun = false
73
+ return
74
+ }
75
+ const baseUrl = serverUrl.replace(/\/$/, '')
76
+ const headers: Record<string, string> = { 'Content-Type': 'application/json' }
77
+ if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
78
+
79
+ for (let stepIndex = 0; stepIndex < totalSteps; stepIndex++) {
80
+ const step = trigger.steps[stepIndex]
81
+
82
+ // Pre-validate availability
83
+ const availability = step.eventType === 'ai'
84
+ ? checkAIAvailability(step.provider, step.model ?? step.eventName)
85
+ : checkToolAvailability(step.eventName, cwd, tools)
86
+
87
+ let stepResult: StepResult
88
+
89
+ if (!availability.available) {
90
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${step.eventName} unavailable: ${availability.reason}`)
91
+ stepResult = {
92
+ originalEventDbId: step.originalEventDbId,
93
+ eventType: step.eventType,
94
+ eventName: step.eventName,
95
+ available: false,
96
+ unavailableReason: availability.reason,
97
+ runs: [],
98
+ }
99
+ } else {
100
+ // Execute runs
101
+ const runs: StepRunResult[] = []
102
+
103
+ for (let i = 0; i < trigger.runCount; i++) {
104
+ const result = await executePortalTask(
105
+ {
106
+ taskId: `trigger-${trigger.triggerId}-${step.eventName}-${i}`,
107
+ type: step.eventType === 'ai' ? 'ai' : 'tool',
108
+ name: step.eventName,
109
+ input: step.input,
110
+ model: step.eventType === 'ai' ? (step.model ?? step.eventName) : undefined,
111
+ provider: step.provider,
112
+ frozenEvents,
113
+ },
114
+ cwd,
115
+ tools,
116
+ )
117
+
118
+ if (!result.ok) {
119
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${step.eventName} run=${i} FAILED:\n${result.error}`)
120
+ }
121
+
122
+ runs.push({
123
+ runIndex: i,
124
+ input: step.input,
125
+ output: result.output,
126
+ durationMs: result.durationMs,
127
+ error: result.error,
128
+ usageInputTokens: result.usage?.inputTokens,
129
+ usageOutputTokens: result.usage?.outputTokens,
130
+ usageTotalTokens: result.usage?.totalTokens,
131
+ })
132
+
133
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} step=${step.eventName} run=${i} ok=${result.ok}`)
134
+ }
135
+
136
+ stepResult = {
137
+ originalEventDbId: step.originalEventDbId,
138
+ eventType: step.eventType,
139
+ eventName: step.eventName,
140
+ available: true,
141
+ runs,
142
+ }
143
+ }
144
+
145
+ // POST rerun results directly under the original event record
146
+ const url = `${baseUrl}/api/observability/events/${step.originalEventDbId}/reruns`
147
+ try {
148
+ const res = await getOriginalFetch()(url, {
149
+ method: 'POST',
150
+ headers,
151
+ body: JSON.stringify({
152
+ triggerId: trigger.triggerId,
153
+ eventType: stepResult.eventType,
154
+ eventName: stepResult.eventName,
155
+ available: stepResult.available,
156
+ unavailableReason: stepResult.unavailableReason,
157
+ runs: stepResult.runs,
158
+ stepIndex,
159
+ totalSteps,
160
+ }),
161
+ })
162
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} step ${stepIndex + 1}/${totalSteps} posted to event ${step.originalEventDbId} (status ${res.status})`)
163
+ } catch (err) {
164
+ debugLog(`[elasticdash] Trigger ${trigger.triggerId} step ${stepIndex + 1}/${totalSteps} POST to event ${step.originalEventDbId} failed: ${err instanceof Error ? err.message : String(err)}`)
165
+ }
166
+ }
167
+
168
+ // Restore context after rerun execution
169
+ if (obsCtx) obsCtx.isRerun = false
170
+ }
171
+
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Agent state type definitions for mid-trace replay support.
3
+ *
4
+ * These types enable structured plan/task management for agents, allowing
5
+ * them to be resumed from any task in the plan without re-executing
6
+ * already-completed steps.
7
+ */
8
+
9
+ export type AgentTaskStatus = 'pending' | 'in-progress' | 'completed' | 'failed'
10
+
11
+ export interface AgentTask {
12
+ /** Unique task identifier (e.g. "task-1", "task-2") */
13
+ id: string
14
+ /** Current execution status */
15
+ status: AgentTaskStatus
16
+ /** Human-readable description of what this task does */
17
+ description: string
18
+ /** Tool/service to invoke (e.g. 'apiService', 'queryRefinement') */
19
+ tool: string
20
+ /**
21
+ * Task input parameters. May contain placeholder references to previous
22
+ * task outputs using the syntax: { $ref: "task-N.output.fieldName" }
23
+ */
24
+ input: unknown
25
+ /** Task result, populated after successful execution */
26
+ output?: unknown
27
+ /** Error message if task failed */
28
+ error?: string
29
+ /** Unix timestamp when task execution started */
30
+ startedAt?: number
31
+ /** Unix timestamp when task execution completed */
32
+ completedAt?: number
33
+ }
34
+
35
+ export type AgentPlanStatus = 'planning' | 'executing' | 'completed' | 'failed' | 'paused'
36
+
37
+ export interface AgentPlan {
38
+ /** Unique plan identifier */
39
+ id: string
40
+ /** Ordered list of tasks to execute */
41
+ tasks: AgentTask[]
42
+ /** Overall plan execution status */
43
+ status: AgentPlanStatus
44
+ /** Zero-based index of the task currently being executed */
45
+ currentTaskIndex: number
46
+ /** Shared data/variables accessible to all tasks */
47
+ context: Record<string, unknown>
48
+ /** Additional plan metadata (user query, session ID, etc.) */
49
+ metadata: Record<string, unknown>
50
+ }
51
+
52
+ export interface AgentState {
53
+ /** The full agent plan including completed and pending tasks */
54
+ plan: AgentPlan
55
+ /** Partial trace events captured during previous execution */
56
+ trace: import('../capture/event.js').WorkflowEvent[]
57
+ /**
58
+ * Zero-based index of the task to resume from.
59
+ * Tasks 0..(resumeFromTaskIndex-1) will use cached outputs from plan.tasks.
60
+ * Tasks resumeFromTaskIndex..end will be executed fresh.
61
+ */
62
+ resumeFromTaskIndex: number
63
+ }
@@ -0,0 +1,81 @@
1
+ import 'expect';
2
+ import type { TraceHandle, CustomStep, CustomStepKind } from '../trace-adapter/context';
3
+
4
+ interface LLMStepConfig {
5
+ model?: string
6
+ contains?: string // searches prompt + completion
7
+ promptContains?: string // searches only in step.prompt
8
+ outputContains?: string // searches only in step.completion
9
+ provider?: string // 'openai' | 'claude' | 'gemini' | 'grok'
10
+ times?: number // match count must equal exactly this value
11
+ minTimes?: number // match count must be >= this value
12
+ maxTimes?: number // match count must be <= this value
13
+ }
14
+
15
+ interface CustomStepConfig {
16
+ kind?: CustomStepKind
17
+ name?: string
18
+ tag?: string
19
+ contains?: string // searches payload/result/metadata stringified
20
+ resultContains?: string // searches result only
21
+ payloadContains?: string // searches payload only
22
+ metadataContains?: string // searches metadata only
23
+ times?: number
24
+ minTimes?: number
25
+ maxTimes?: number
26
+ }
27
+
28
+ interface PromptWhereConfig {
29
+ filterContains: string // first filter: prompts that contain this substring
30
+ requireContains?: string // then assert: filtered prompts must also contain this
31
+ requireNotContains?: string // and must NOT contain this
32
+ times?: number // exact count of filtered prompts
33
+ minTimes?: number // min count of filtered prompts
34
+ maxTimes?: number // max count of filtered prompts
35
+ index?: number // optional 0-based index into filtered prompts to check specifically
36
+ nth?: number // optional 1-based alias for index
37
+ }
38
+
39
+ type SupportedProvider = 'openai' | 'claude' | 'gemini' | 'grok'
40
+
41
+ interface SemanticMatchOptions {
42
+ provider?: SupportedProvider
43
+ model?: string
44
+ sdk?: unknown // optional user-supplied SDK instance
45
+ apiKey?: string // optional API key override (useful for OpenAI-compatible endpoints)
46
+ baseURL?: string // optional base URL override for OpenAI-compatible APIs
47
+ }
48
+
49
+ type EvaluationTarget = 'prompt' | 'result'
50
+
51
+ interface EvaluationCondition {
52
+ greaterThan?: number
53
+ lessThan?: number
54
+ atLeast?: number
55
+ atMost?: number
56
+ equals?: number
57
+ }
58
+
59
+ interface EvaluateOutputMetricConfig {
60
+ evaluationPrompt: string
61
+ target?: EvaluationTarget // 'prompt' or 'result'; default 'result'
62
+ index?: number // 0-based index into LLM steps
63
+ nth?: number // 1-based alias for index
64
+ condition?: EvaluationCondition // optional; default atLeast 0.7
65
+ provider?: SupportedProvider
66
+ model?: string
67
+ sdk?: unknown // optional SDK instance
68
+ apiKey?: string // optional API key override (useful for OpenAI-compatible endpoints)
69
+ baseURL?: string // optional base URL override for OpenAI-compatible APIs
70
+ }
71
+
72
+ declare module 'expect' {
73
+ interface Matchers<R> {
74
+ toHaveLLMStep(config?: LLMStepConfig): R;
75
+ toCallTool(toolName: string): R;
76
+ toMatchSemanticOutput(expected: string, options?: SemanticMatchOptions): R;
77
+ toHaveCustomStep(config?: CustomStepConfig): R;
78
+ toHavePromptWhere(config: PromptWhereConfig): R;
79
+ toEvaluateOutputMetric(config: EvaluateOutputMetricConfig): Promise<R>;
80
+ }
81
+ }
@@ -0,0 +1,2 @@
1
+ declare module 'tsx/esm';
2
+ declare module 'tsx/cjs';
@@ -0,0 +1,69 @@
1
+ export interface PortalTask {
2
+ /** Unique task ID assigned by the backend */
3
+ taskId: string
4
+ /** What to rerun */
5
+ type: 'tool' | 'ai'
6
+ /** Tool name or model name */
7
+ name: string
8
+ /** Tool arguments or LLM prompt/messages */
9
+ input: unknown
10
+ /** Model name (for AI tasks) */
11
+ model?: string
12
+ /** LLM provider: openai, anthropic, gemini, grok, etc. */
13
+ provider?: string
14
+ /** LLM generation parameters */
15
+ modelParameters?: {
16
+ temperature?: number
17
+ max_tokens?: number
18
+ }
19
+ /** Passthrough metadata (test group ID, expectation IDs, etc.) */
20
+ metadata?: Record<string, unknown>
21
+ /** HTTP/DB events from the same trace to freeze during execution */
22
+ frozenEvents?: { id: number; type: string; name: string; input: unknown; output: unknown; timestamp: number; durationMs: number | null; streamed?: boolean; streamRaw?: string | null }[]
23
+ }
24
+
25
+ export interface PortalTaskResult {
26
+ taskId: string
27
+ ok: boolean
28
+ output: unknown
29
+ error?: string
30
+ durationMs: number
31
+ usage?: {
32
+ inputTokens?: number
33
+ outputTokens?: number
34
+ totalTokens?: number
35
+ }
36
+ /** Echoed from the original task */
37
+ metadata?: Record<string, unknown>
38
+ }
39
+
40
+ export interface PortalServerOptions {
41
+ /** Port to listen on (default 4574) */
42
+ port?: number
43
+ /** Backend URL to POST results to */
44
+ backendUrl: string
45
+ /** Auth token for incoming and outgoing requests */
46
+ apiKey?: string
47
+ /** Project root directory (default process.cwd()) */
48
+ cwd?: string
49
+ /**
50
+ * Allowed origin domains that may send requests to this portal.
51
+ * By default only the `backendUrl` domain and localhost are allowed.
52
+ * Provide additional origins (e.g. 'https://app.elasticdash.com') to extend.
53
+ */
54
+ allowedOrigins?: string[]
55
+ }
56
+
57
+ export interface PortalServerHandle {
58
+ port: number
59
+ url: string
60
+ close: () => Promise<void>
61
+ }
62
+
63
+ export interface PortalStatus {
64
+ ok: boolean
65
+ queueLength: number
66
+ processing: string | null
67
+ completed: number
68
+ failed: number
69
+ }
@@ -0,0 +1,8 @@
1
+ const DEBUG_KEY = 'ELASTICDASH_DEBUG'
2
+
3
+ /** Log only when ELASTICDASH_DEBUG=1 is set. Writes to stderr so callers parsing stdout (e.g. `elasticdash run-tool`) get a clean JSON channel. */
4
+ export function debugLog(...args: unknown[]): void {
5
+ if (typeof process !== 'undefined' && process.env?.[DEBUG_KEY] === '1') {
6
+ console.error(...args)
7
+ }
8
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * License-error notifier.
3
+ *
4
+ * The ElasticDash backend returns HTTP 402 ("Payment Required") when the
5
+ * caller's account has no available license / active plan. The SDK can hit
6
+ * this from several places (telemetry batcher, telemetry push, CI upload),
7
+ * so we centralise the user-facing message here and print it at most once
8
+ * per process to avoid spamming stderr on every retry / event.
9
+ */
10
+
11
+ const BUY_URL = 'https://app.elasticdash.com'
12
+
13
+ let alreadyNotified = false
14
+
15
+ /**
16
+ * If `status` is 402, print a one-time message directing the user to buy a
17
+ * plan and return true. Returns false otherwise so callers can keep their
18
+ * existing branching.
19
+ *
20
+ * @param status HTTP status code from the failed response.
21
+ * @param context Optional short label for the failing subsystem (e.g.
22
+ * "telemetry", "ci-upload") — included in the printed
23
+ * prefix to help users locate the source.
24
+ */
25
+ export function notifyLicenseError(status: number, context?: string): boolean {
26
+ if (status !== 402) return false
27
+ if (alreadyNotified) return true
28
+ alreadyNotified = true
29
+ const prefix = context ? `[elasticdash:${context}]` : '[elasticdash]'
30
+ // Write directly to stderr so this message is always visible regardless
31
+ // of the user's ELASTICDASH_DEBUG setting — a missing license is a hard
32
+ // blocker, not a debug detail.
33
+ console.error('')
34
+ console.error(`${prefix} No available license for your ElasticDash account (HTTP 402).`)
35
+ console.error(`${prefix} Please go to ${BUY_URL} to buy a plan.`)
36
+ console.error('')
37
+ return true
38
+ }
39
+
40
+ /** Reset the one-time guard. Exposed for tests only. */
41
+ export function resetLicenseErrorNotifier(): void {
42
+ alreadyNotified = false
43
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Deep-clones a value and replaces any object property whose key matches
3
+ * one of `keys` (case-insensitive) with "[REDACTED]".
4
+ * Returns the original value when `keys` is empty.
5
+ */
6
+ export function redactPayload(value: unknown, keys: string[]): unknown {
7
+ if (keys.length === 0) return value
8
+ const lowerKeys = new Set(keys.map((k) => k.toLowerCase()))
9
+ return redact(value, lowerKeys)
10
+ }
11
+
12
+ function redact(value: unknown, keys: Set<string>): unknown {
13
+ if (value === null || value === undefined) return value
14
+ if (typeof value !== 'object') return value
15
+
16
+ if (Array.isArray(value)) {
17
+ return value.map((item) => redact(item, keys))
18
+ }
19
+
20
+ const out: Record<string, unknown> = {}
21
+ for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
22
+ out[k] = keys.has(k.toLowerCase()) ? '[REDACTED]' : redact(v, keys)
23
+ }
24
+ return out
25
+ }