elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,424 @@
1
+ # Instrumentation Guide — `ed_tools`, `ed_workflows`, and `ed_agents`
2
+
3
+ This document describes the three instrumentation files that connect your production business logic to the ElasticDash test SDK.
4
+
5
+ > **Note — examples are from demo projects.**
6
+ > Code samples use two reference projects: a Deno-based Pokémon API agent and a Node.js/Next.js chat application. Function names such as `getPokemon`, `generateAIResponse`, `dataService`, and `chatHandler` are **placeholders** — replace them with your own tool functions, workflow functions, and source file names. The patterns and rules apply to any project; runtime-specific differences are called out explicitly in the `ed_tools.ts` section.
7
+
8
+ ---
9
+
10
+ ## Prerequisites
11
+
12
+ Before creating any of the three `ed_` files, the following conditions must hold in your production codebase.
13
+
14
+ ### 1. Tool logic must be isolated in callable functions
15
+
16
+ Each tool must be a standalone exported async function that takes a single typed input object and returns a plain value. It must not close over HTTP context, framework state, or database clients.
17
+
18
+ ```ts
19
+ // your-tools.ts — correct shape
20
+ // Replace `getPokemon` / `GetPokemonInput` with your own tool name and input type
21
+ export async function getPokemon(input: { name_or_id: string | number }) {
22
+ const res = await fetch(`https://pokeapi.co/api/v2/pokemon/${input.name_or_id}`)
23
+ return condense(await res.json())
24
+ }
25
+ ```
26
+
27
+ A tool that reads from `ctx.request`, accepts a framework `Context`, or calls a database client injected at construction time **cannot** be wrapped directly. Extract the pure logic first.
28
+
29
+ ### 2. Tool imports must be compatible with your runtime
30
+
31
+ - **Deno projects:** tools may use `https://esm.sh/` URLs and Deno-native globals
32
+ - **Node.js projects:** tools must use npm packages — no `Deno.*`, `jsr:`, or `https://deno.land/` URLs
33
+
34
+ The global `fetch` API works in both runtimes.
35
+
36
+ ### 3. A `dispatchTool(name, args)` function must exist *(Pattern A only)*
37
+
38
+ If you use the `withTrace` HOF pattern (Pattern A below), the workflow calls tools through a dispatcher keyed by string name. This is the seam `ed_tools.ts` replaces:
39
+
40
+ ```ts
41
+ // your-tools.ts — required dispatcher shape (Pattern A only)
42
+ // Replace case values with your own tool names
43
+ export async function dispatchTool(name: string, args: Record<string, unknown>) {
44
+ switch (name) {
45
+ case 'getPokemon': return getPokemon(args as GetPokemonInput)
46
+ default: throw new Error(`Unknown tool: ${name}`)
47
+ }
48
+ }
49
+ ```
50
+
51
+ The string case values (`'getPokemon'`) must match the `function.name` fields in the LLM tool definitions array. If the LLM sees `"name": "get_pokemon"` but `dispatchTool` switches on `'getPokemon'`, tool calls will silently fail.
52
+
53
+ Node.js projects using the inline async pattern (Pattern B) do not need a central dispatcher.
54
+
55
+ ### 4. Workflow logic must be a plain callable function
56
+
57
+ The agentic loop must be an exported async function whose parameters and return value are plain JSON-serialisable values — no HTTP request objects, no framework context, no live database clients.
58
+
59
+ ```ts
60
+ // your-ai.ts — correct shape
61
+ // Replace `generateAIResponse` and its parameters with your own workflow function
62
+ export async function generateAIResponse(
63
+ chatId: string,
64
+ userId: string,
65
+ history: ChatMessage[],
66
+ ): Promise<string> { ... }
67
+ ```
68
+
69
+ If the workflow currently accepts a framework `Context` or a database client, those dependencies must be extracted before `ed_workflows.ts` can re-export the function. Move them to the call site (the route handler) and pass only plain values into the workflow.
70
+
71
+ ### 5. The workflow must call tools through the dispatcher *(Pattern A only)*
72
+
73
+ For Pattern A, your workflow file must call `dispatchTool()` — not individual tool functions directly — so that swapping the import from your tools file to `ed_tools.ts` instruments all tool calls at once:
74
+
75
+ ```ts
76
+ // your-ai.ts — required pattern (Pattern A only)
77
+ const toolResult = await dispatchTool(tc.function.name, args)
78
+ ```
79
+
80
+ For Pattern B (Node.js inline), the workflow imports individual tool functions from `ed_tools.ts` directly.
81
+
82
+ ### 6. Project files for the test runner must exist
83
+
84
+ Three files are needed alongside your source:
85
+
86
+ **`package.json`**
87
+
88
+ ```json
89
+ {
90
+ "type": "module",
91
+ "scripts": {
92
+ "test": "elasticdash",
93
+ "dashboard": "elasticdash dashboard"
94
+ },
95
+ "dependencies": {
96
+ "elasticdash-sdk": "<version or local path>",
97
+ "openai": "^6.x"
98
+ }
99
+ }
100
+ ```
101
+
102
+ **`tsconfig.json`** — scoped to only the files the test runner needs:
103
+
104
+ ```json
105
+ {
106
+ "compilerOptions": {
107
+ "target": "ES2022",
108
+ "module": "ESNext",
109
+ "moduleResolution": "bundler",
110
+ "esModuleInterop": true,
111
+ "strict": true,
112
+ "skipLibCheck": true,
113
+ "types": ["node"]
114
+ },
115
+ "include": [
116
+ "your-tools.ts",
117
+ "ed_tools.ts",
118
+ "ed_workflows.ts",
119
+ "elasticdash.config.ts",
120
+ "**/*.ai.test.ts"
121
+ ]
122
+ }
123
+ ```
124
+
125
+ **`elasticdash.config.ts`**
126
+
127
+ ```ts
128
+ export default {
129
+ testMatch: ['**/*.ai.test.ts'],
130
+ traceMode: 'local' as const,
131
+ }
132
+ ```
133
+
134
+ ---
135
+
136
+ ## `ed_tools.ts` — Instrumented tool wrappers
137
+
138
+ `ed_tools.ts` wraps every tool function from your tools file with `recordToolCall()` so each invocation is captured in the active ElasticDash trace.
139
+
140
+ There are two patterns. Choose the one that fits your project structure.
141
+
142
+ > **Import style note:**
143
+ > Deno projects use `.ts` extensions in relative imports (`from './tools.ts'`).
144
+ > Node.js projects with `tsx` omit extensions (`from './services/dataService'`).
145
+
146
+ ---
147
+
148
+ ### Pattern A — `withTrace` HOF *(Deno and simple projects)*
149
+
150
+ Use this pattern when your tools are pure functions and the workflow calls them through a central `dispatchTool` dispatcher.
151
+
152
+ #### The `withTrace` helper
153
+
154
+ Copy this as-is. All tool functions go through this single private helper:
155
+
156
+ ```ts
157
+ async function withTrace<I, O>(
158
+ toolName: string,
159
+ input: I,
160
+ fn: (input: I) => Promise<O>,
161
+ ): Promise<O> {
162
+ const result = await fn(input)
163
+ try {
164
+ const { recordToolCall } = await import('elasticdash-sdk')
165
+ recordToolCall(toolName, input, result)
166
+ } catch { /* tracing must never block business logic */ }
167
+ return result
168
+ }
169
+ ```
170
+
171
+ #### Exported tool functions
172
+
173
+ Each tool gets a one-liner delegate:
174
+
175
+ ```ts
176
+ // Replace `getPokemon` / `_getPokemon` with your tool name
177
+ export function getPokemon(input: Parameters<typeof _getPokemon>[0]) {
178
+ return withTrace('getPokemon', input, _getPokemon)
179
+ }
180
+ ```
181
+
182
+ `Parameters<typeof _fn>[0]` keeps input types in sync with your tools file — no need to duplicate type definitions.
183
+
184
+ #### The instrumented dispatcher
185
+
186
+ `ed_tools.ts` re-exports a `dispatchTool` that routes to the traced functions, not the raw ones:
187
+
188
+ ```ts
189
+ // Replace case strings with your own tool names
190
+ export async function dispatchTool(name: string, args: Record<string, unknown>) {
191
+ switch (name) {
192
+ case 'getPokemon': return getPokemon(args as Parameters<typeof _getPokemon>[0])
193
+ // ... one case per tool
194
+ default: return _dispatchTool(name, args) // fallback to un-traced for unknowns
195
+ }
196
+ }
197
+ ```
198
+
199
+ Your workflow file imports `dispatchTool` from `ed_tools.ts` instead of from your raw tools file. This single import change makes the entire production workflow observable by ElasticDash.
200
+
201
+ #### When you add a new tool
202
+
203
+ 1. Add a one-liner export in `ed_tools.ts` using `withTrace`
204
+ 2. Add a `case` in `ed_tools.ts`'s `dispatchTool` switch
205
+ 3. Add a `case` in your tools file's `dispatchTool` switch (the fallback)
206
+ 4. Add a matching entry in the LLM `TOOLS` array with the same name string
207
+
208
+ All four must use the same name string or tool calls will be dispatched incorrectly.
209
+
210
+ ---
211
+
212
+ ### Pattern B — Inline async with mock support *(Node.js projects)*
213
+
214
+ Use this pattern when tools call database clients or framework services that are imported at module level (not pure `fetch`), when there is no central dispatcher, or when you want mock support in the dashboard's "Validate Updated Flow with Live Data" panel.
215
+
216
+ #### The `resolveMock` and `safeRecordToolCall` helpers
217
+
218
+ Copy these as-is — no customisation needed.
219
+
220
+ `resolveMock` enables the dashboard's **Tool Mocking** panel: it checks whether a tool should return recorded data instead of calling the real service. It is a zero-cost no-op outside the ElasticDash worker subprocess. Pattern A does not support tool mocking.
221
+
222
+ `safeRecordToolCall` records the tool call in the trace, but only when running inside the worker subprocess. It is silent in production.
223
+
224
+ ```ts
225
+ function resolveMock(toolName: string): { mocked: true; result: unknown } | { mocked: false } {
226
+ const g = globalThis as any
227
+ const mocks = g.__ELASTICDASH_TOOL_MOCKS__
228
+ if (!mocks) return { mocked: false }
229
+
230
+ const entry = mocks[toolName]
231
+ if (!entry || entry.mode === 'live') return { mocked: false }
232
+
233
+ if (!g.__ELASTICDASH_TOOL_CALL_COUNTERS__) g.__ELASTICDASH_TOOL_CALL_COUNTERS__ = {}
234
+ const counters = g.__ELASTICDASH_TOOL_CALL_COUNTERS__
235
+ counters[toolName] = (counters[toolName] ?? 0) + 1
236
+ const callNumber = counters[toolName]
237
+
238
+ if (entry.mode === 'mock-all') {
239
+ const data = entry.mockData ?? {}
240
+ const result = data[callNumber] !== undefined ? data[callNumber] : data[0]
241
+ return { mocked: true, result }
242
+ }
243
+
244
+ if (entry.mode === 'mock-specific') {
245
+ const indices = entry.callIndices ?? []
246
+ if (indices.includes(callNumber)) {
247
+ return { mocked: true, result: (entry.mockData ?? {})[callNumber] }
248
+ }
249
+ return { mocked: false }
250
+ }
251
+
252
+ return { mocked: false }
253
+ }
254
+
255
+ async function safeRecordToolCall(tool: string, input: any, result: any) {
256
+ if (!(globalThis as any).__ELASTICDASH_WORKER__) return
257
+ try {
258
+ const { recordToolCall } = await import('elasticdash-sdk')
259
+ recordToolCall(tool, input, result)
260
+ } catch { /* tracing must never block business logic */ }
261
+ }
262
+ ```
263
+
264
+ #### Per-tool inline pattern
265
+
266
+ Each exported tool function follows the same shape — mock check, real call, record:
267
+
268
+ ```ts
269
+ // Replace `dataService`, `runSelectQuery`, and import path with your own
270
+ import { runSelectQuery } from './services/dataService'
271
+
272
+ export const dataService = async (input: any) => {
273
+ const mock = resolveMock('dataService')
274
+ if (mock.mocked) {
275
+ await safeRecordToolCall('dataService', input, mock.result)
276
+ return mock.result
277
+ }
278
+
279
+ const { query } = input as { query: string }
280
+ return await runSelectQuery(query)
281
+ .then(async (res: any) => {
282
+ await safeRecordToolCall('dataService', input, res)
283
+ return res
284
+ })
285
+ .catch(async (err: any) => {
286
+ await safeRecordToolCall('dataService', input, err)
287
+ throw err
288
+ })
289
+ }
290
+ ```
291
+
292
+ This pattern does **not** require a central `dispatchTool`. The workflow imports each tool directly from `ed_tools.ts`.
293
+
294
+ #### When you add a new tool
295
+
296
+ 1. Add an import for the underlying service function at the top of `ed_tools.ts`
297
+ 2. Add an exported async function with the mock check and `safeRecordToolCall` pattern
298
+ 3. Add a matching entry in the LLM `TOOLS` array with the same name string
299
+
300
+ ---
301
+
302
+ ## `ed_workflows.ts` — Workflow adapter
303
+
304
+ `ed_workflows.ts` re-exports workflow functions to give the ElasticDash runner a clean import surface. Every exported function must:
305
+
306
+ - Accept only plain JSON-serialisable inputs (strings, numbers, arrays, plain objects)
307
+ - Return only plain JSON-serialisable outputs
308
+ - Not depend on framework runtime APIs, HTTP request context, or live service clients
309
+
310
+ Simple case — direct re-export:
311
+
312
+ ```ts
313
+ // Replace `generateAIResponse` and `./ai.ts` with your workflow function and source file
314
+ export { generateAIResponse } from './ai.ts'
315
+ ```
316
+
317
+ **Node.js / Next.js projects:** Workflow logic often lives inside a framework route handler. `ed_workflows.ts` is where you strip the framework types and expose a plain-value wrapper:
318
+
319
+ ```ts
320
+ // ed_workflows.ts — Node.js/Next.js example
321
+ // Replace names and import paths with your own
322
+ import { chatHandler as _chatHandler } from './app/api/chat/route'
323
+
324
+ export async function chatHandler(input: { message: string; sessionId: string }) {
325
+ return _chatHandler(input)
326
+ }
327
+ ```
328
+
329
+ If a workflow ever acquires a non-serialisable parameter such as a database client, instantiate it here instead of passing it in:
330
+
331
+ ```ts
332
+ // ed_workflows.ts — adapter example if your workflow gained a db parameter
333
+ import { generateAIResponse as _generateAIResponse } from './ai.ts'
334
+ import { createServiceClient } from './supabase_client.ts'
335
+
336
+ export async function generateAIResponse(
337
+ chatId: string,
338
+ userId: string,
339
+ history: { role: string; content: string }[],
340
+ ) {
341
+ const db = createServiceClient() // instantiated here, not passed in
342
+ return _generateAIResponse(chatId, userId, history, db)
343
+ }
344
+ ```
345
+
346
+ ---
347
+
348
+ ## `ed_agents.ts` — Structured plan/task execution
349
+
350
+ `ed_agents.ts` exposes a three-function API for multi-step agents with mid-trace resumption.
351
+
352
+ In your project, `ed_agents.ts` is typically a thin re-export pointing to wherever `plannerAgent` and `executorAgent` live in your codebase:
353
+
354
+ ```ts
355
+ // Replace the import path with wherever your agent logic lives
356
+ export { plannerAgent, executorAgent } from './utils/aiHandler'
357
+ ```
358
+
359
+ If you haven't written your own planner/executor yet, use the SDK's reference implementations:
360
+
361
+ ```ts
362
+ export { plannerAgent, executorAgent, resumeAgentFromTrace } from 'elasticdash-sdk'
363
+ ```
364
+
365
+ ### Preconditions
366
+
367
+ **All agent tools must be exported from `ed_tools.ts`.** The executor resolves tool functions by string name from `ed_tools.ts` exports. Any tool not exported there will cause the task to fail with `Tool "x" not found in registry`.
368
+
369
+ **Task inputs and outputs must be JSON-serialisable.** Non-serialisable values (class instances, functions, circular references) will be lost when tasks wire their outputs as inputs to subsequent tasks.
370
+
371
+ ### The three functions
372
+
373
+ **`plannerAgent(userQuery, context)`**
374
+
375
+ Converts a user query and optional context into an `AgentPlan` — a typed list of `AgentTask` objects. Each task declares:
376
+
377
+ - `id` — unique string identifier
378
+ - `tool` — the string name of the tool to call (must match an export in `ed_tools.ts`)
379
+ - `input` — the input object, which may contain `$ref` placeholders resolved from earlier task outputs
380
+
381
+ ```ts
382
+ {
383
+ id: 'task-2',
384
+ tool: 'taskSelectorService', // replace with your tool name
385
+ input: {
386
+ queryEmbedding: { $ref: 'task-1.output.embedding' }, // filled from task-1's result
387
+ topK: 3,
388
+ },
389
+ }
390
+ ```
391
+
392
+ The default `plannerAgent` builds a static plan. Replace it with LLM-based planning logic to generate tasks dynamically.
393
+
394
+ **`executorAgent(plan, resumeFrom?)`**
395
+
396
+ Executes tasks sequentially. On any failure, marks the plan `'failed'` and stops — remaining tasks do not run.
397
+
398
+ The `resumeFrom` parameter (default `0`) skips all tasks before that index, using their already-recorded outputs for `$ref` resolution. This enables mid-trace resumption from any task.
399
+
400
+ **`resumeAgentFromTrace(state)`**
401
+
402
+ Resumes a partially-completed run from a serialized agent state. Use this to restart a failed or paused agent from any task without replaying steps that already succeeded.
403
+
404
+ ### Relationship between the three files
405
+
406
+ ```text
407
+ ed_agents.ts
408
+ └── executorAgent() resolves tools by name
409
+
410
+
411
+ ed_tools.ts (all exported tool functions)
412
+ ├── yourTool1() ─┐
413
+ ├── yourTool2() ─┤─ all wrapped with withTrace() or safeRecordToolCall() → recordToolCall()
414
+ └── ... ─┘
415
+
416
+
417
+ your-tools.ts / your service files (pure business logic)
418
+
419
+ ed_workflows.ts
420
+ └── re-exports your workflow function from your workflow file
421
+ └── workflow calls dispatchTool() or individual tools imported from ed_tools.ts
422
+ ```
423
+
424
+ `ed_tools.ts` is the single instrumentation point. Whether a tool call originates from the LLM-driven agentic loop or from `executorAgent`, it flows through the tracing wrapper and is recorded identically.
@@ -0,0 +1,145 @@
1
+ # Langfuse Trace Structure for ElasticDash Replay
2
+
3
+ ElasticDash reads Langfuse traces to replay and compare workflow runs. For replay to work correctly, observations in the trace must follow two structural rules.
4
+
5
+ > **Note — examples use demo names.**
6
+ > Span names like `"generateAIResponse"`, `"tool-getPokemon"`, and `"tool-listPokemon"` are from a sample Pokémon demo project. Replace them with your own workflow function name and tool names.
7
+
8
+ ---
9
+
10
+ ## Rule A — Parent workflow span
11
+
12
+ There must be a span whose `name` exactly matches the exported workflow function name. Its `input` must be the workflow function's arguments in the same order they appear in the function signature, arranged as an array when there are multiple parameters. Its `output` must be the workflow's return value.
13
+
14
+ This is what ElasticDash uses to re-invoke the workflow with the original inputs and diff the new output against the recorded one.
15
+
16
+ ```ts
17
+ // Replace "generateAIResponse" with your workflow function name
18
+ const workflowSpan = trace.span({
19
+ name: "generateAIResponse", // must equal the exported function name exactly
20
+ input: [chatId, userId, history], // all args in declaration order, wrapped in array
21
+ startTime: workflowStartTime,
22
+ })
23
+
24
+ // ... workflow executes ...
25
+ const response = await generateAIResponse(chatId, userId, history);
26
+
27
+ workflowSpan.end({ output: response }) // must be the actual return value
28
+ ```
29
+
30
+ If the function takes a single argument, `input` can be the value directly rather than a one-element array. The only requirement is that the stored value is directly usable as the argument for re-invocation with no transformation.
31
+
32
+ ### Multiple parameters → always use an array
33
+
34
+ ```ts
35
+ // Function signature (replace with your own):
36
+ async function generateAIResponse(chatId: string, userId: string, history: ChatMessage[])
37
+
38
+ // Correct span input — preserves argument identity and order:
39
+ input: [chatId, userId, history]
40
+
41
+ // Wrong — loses argument boundaries, cannot be used to re-invoke:
42
+ input: { chatId, userId, history }
43
+
44
+ // Wrong — loses all but the last argument:
45
+ input: history
46
+ ```
47
+
48
+ ### Output must be set before `flushAsync`
49
+
50
+ `workflowSpan.end({ output })` must be called and `await langfuse.flushAsync()` must complete before the function returns. If the span is flushed without an output, ElasticDash records the run with no output and output comparison is impossible.
51
+
52
+ ```ts
53
+ // Correct — every return path ends the span and awaits flush
54
+ workflowSpan.end({ output: response })
55
+ await langfuse.flushAsync()
56
+ return response
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Rule B — Tool call spans
62
+
63
+ Every tool invocation must produce a child span of the workflow span. The span must be identifiable as a tool call by satisfying at least one of:
64
+
65
+ - Its `name` is in the format `tool-[functionName]` (e.g. `tool-getPokemon`, `tool-listPokemon`) — replace with your own tool names
66
+ - Its observation `type` is `'TOOL'` and the name is the function name (`[functionName]`)
67
+
68
+ The `[functionName]` part must match the string used in `dispatchTool` and in the LLM tool definitions array — the same name passed to `recordToolCall` in `ed_tools.ts`.
69
+
70
+ ### Input and output requirements
71
+
72
+ The span's `input` must be the exact arguments object as passed to the tool function. The span's `output` must be the complete result returned by the tool. Neither may be truncated, summarised, or transformed.
73
+
74
+ ```ts
75
+ // Replace "tool-getPokemon" and "getPokemon" with your tool name
76
+ const toolSpan = workflowSpan.span({
77
+ name: `tool-${tc.function.name}`, // e.g. "tool-getPokemon"
78
+ input: args, // the parsed args object, unchanged
79
+ })
80
+
81
+ const toolResult = await dispatchTool(tc.function.name, args)
82
+ const result = JSON.stringify(toolResult) // full result
83
+ toolSpan.end({ output: result }) // must be complete
84
+ ```
85
+
86
+ If the result is too large to display comfortably in the Langfuse UI, that is a display concern — the span data itself must be the full value. ElasticDash uses tool span outputs to replay individual tool calls in isolation and to verify that re-runs produce consistent results.
87
+
88
+ ### Span must be a child of the workflow span
89
+
90
+ Tool spans must be created from `workflowSpan.span(...)`, not from `trace.span(...)`. A tool span attached directly to the trace root is not associated with the workflow invocation and will not be linked to the correct replay context.
91
+
92
+ ```ts
93
+ // Correct — child of workflowSpan
94
+ const toolSpan = workflowSpan.span({ name: `tool-${name}`, input: args })
95
+
96
+ // Wrong — attached to trace root, not to the workflow invocation
97
+ const toolSpan = trace.span({ name: `tool-${name}`, input: args })
98
+ ```
99
+
100
+ ---
101
+
102
+ ## What breaks if these rules are not followed
103
+
104
+ | Violation | Effect |
105
+ | --- | --- |
106
+ | Workflow span name does not match the function name | ElasticDash cannot locate the workflow entry point in the trace; replay fails entirely |
107
+ | Workflow span input is not the raw function arguments | Re-invocation uses wrong inputs; the output diff compares unrelated runs |
108
+ | Workflow span input uses an object instead of an array for multiple params | Argument positions are lost; re-invocation cannot reconstruct the call correctly |
109
+ | `workflowSpan.end()` is not called on every return path | Span is recorded as open; output is null; comparison is impossible |
110
+ | `flushAsync` is not awaited before the function returns | Spans are still buffered when the process exits; trace is partially or fully lost |
111
+ | Tool span name does not match `tool-[name]` and type is not `'TOOL'` | Tool calls are invisible to the replay engine; tool-level diffs and replays are lost |
112
+ | Tool span is attached to the trace root instead of `workflowSpan` | Tool call is not associated with the workflow invocation; replay linkage breaks |
113
+ | Tool span input or output is truncated or transformed | Replayed tool call uses different args or an incomplete result; comparison is invalid |
114
+
115
+ ---
116
+
117
+ ## Reference — example implementation
118
+
119
+ The following example follows all rules. Replace `generateAIResponse`, `tc.function.name`, and `dispatchTool` with your own workflow function and tool dispatch logic:
120
+
121
+ ```ts
122
+ // Parent workflow span — name matches function, input is arg array, output is return value
123
+ const workflowSpan = trace.span({
124
+ name: "generateAIResponse", // your workflow function name
125
+ input: [chatId, userId, history], // your args in declaration order
126
+ startTime: workflowStartTime,
127
+ })
128
+
129
+ // Tool spans — child of workflowSpan, named tool-[functionName], full input/output
130
+ const toolSpan = workflowSpan.span({
131
+ name: `tool-${tc.function.name}`, // e.g. "tool-getPokemon" — use your tool name
132
+ input: args,
133
+ metadata: { tool_name: tc.function.name },
134
+ })
135
+ const toolResult = await dispatchTool(tc.function.name, args)
136
+ const result = JSON.stringify(toolResult)
137
+ toolSpan.end({ output: result })
138
+
139
+ // The rest of the workflow logic...
140
+
141
+ // Every return path ends the workflow span and awaits flush
142
+ workflowSpan.end({ output: response })
143
+ await langfuse.flushAsync()
144
+ return response
145
+ ```