elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,173 @@
1
+ # Test Matchers
2
+
3
+ ElasticDash SDK provides AI-specific matchers for asserting on workflow traces.
4
+
5
+ ## Overview
6
+
7
+ All matchers work with `expect(ctx.trace)` after importing the test setup:
8
+
9
+ ```ts
10
+ import '../node_modules/elasticdash-sdk/dist/test-setup.js'
11
+ import { expect } from 'expect'
12
+
13
+ aiTest('my test', async (ctx) => {
14
+ // ... run your workflow
15
+ expect(ctx.trace).toHaveLLMStep({ model: 'gpt-4' })
16
+ })
17
+ ```
18
+
19
+ ---
20
+
21
+ ## `toHaveLLMStep(config?)`
22
+
23
+ Assert the trace contains at least one LLM step matching the given config. All fields are optional and combined with AND logic.
24
+
25
+ ```ts
26
+ expect(ctx.trace).toHaveLLMStep({ model: 'gpt-4' })
27
+ expect(ctx.trace).toHaveLLMStep({ contains: 'order confirmed' }) // searches prompt + completion
28
+ expect(ctx.trace).toHaveLLMStep({ promptContains: 'order status' }) // searches prompt only
29
+ expect(ctx.trace).toHaveLLMStep({ outputContains: 'order confirmed' }) // searches completion only
30
+ expect(ctx.trace).toHaveLLMStep({ provider: 'openai' })
31
+ expect(ctx.trace).toHaveLLMStep({ provider: 'openai', promptContains: 'order status' })
32
+ expect(ctx.trace).toHaveLLMStep({ promptContains: 'retry', times: 3 }) // exactly 3 matching steps
33
+ expect(ctx.trace).toHaveLLMStep({ provider: 'openai', minTimes: 2 }) // at least 2 matching steps
34
+ expect(ctx.trace).toHaveLLMStep({ outputContains: 'error', maxTimes: 1 }) // at most 1 matching step
35
+ ```
36
+
37
+ ### Configuration Options
38
+
39
+ | Field | Description |
40
+ |---|---|
41
+ | `model` | Exact model name match (e.g. `'gpt-4o'`) |
42
+ | `contains` | Substring match across prompt + completion (case-insensitive) |
43
+ | `promptContains` | Substring match in prompt only (case-insensitive) |
44
+ | `outputContains` | Substring match in completion only (case-insensitive) |
45
+ | `provider` | Provider name: `'openai'`, `'gemini'`, or `'grok'` |
46
+ | `times` | Exact match count (fails unless exactly this many steps match) |
47
+ | `minTimes` | Minimum match count (steps matching must be ≥ this value) |
48
+ | `maxTimes` | Maximum match count (steps matching must be ≤ this value) |
49
+
50
+ ---
51
+
52
+ ## `toCallTool(toolName)`
53
+
54
+ Assert the trace contains a tool call with the given name.
55
+
56
+ ```ts
57
+ expect(ctx.trace).toCallTool('chargeCard')
58
+ ```
59
+
60
+ ---
61
+
62
+ ## `toMatchSemanticOutput(expected, options?)`
63
+
64
+ LLM-judged semantic match of combined LLM output vs. the expected string. Defaults to OpenAI GPT-4 with `OPENAI_API_KEY`.
65
+
66
+ ```ts
67
+ expect(ctx.trace).toMatchSemanticOutput('attack stat', {
68
+ provider: 'claude', // 'openai' (default) | 'claude' | 'gemini' | 'grok'
69
+ model: 'claude-3-opus-20240229', // overrides default model for the provider
70
+ sdk: myClaudeClient, // optional SDK instance (uses its chat/messages API)
71
+ })
72
+
73
+ // Minimal, using default OpenAI model
74
+ expect(ctx.trace).toMatchSemanticOutput('order confirmed')
75
+
76
+ // OpenAI-compatible endpoint (e.g., Moonshot/Kimi) via baseURL + apiKey
77
+ expect(ctx.trace).toMatchSemanticOutput('order confirmed', {
78
+ provider: 'openai',
79
+ model: 'kimi-k2-turbo-preview',
80
+ apiKey: process.env.KIMI_API_KEY,
81
+ baseURL: 'https://api.moonshot.ai/v1',
82
+ })
83
+ ```
84
+
85
+ Environment keys by provider: `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY` (or `GOOGLE_API_KEY`), `GROK_API_KEY`.
86
+
87
+ ---
88
+
89
+ ## `toEvaluateOutputMetric(config)`
90
+
91
+ Evaluate one LLM step's prompt or result using an LLM and assert a numeric metric condition in the range 0.0–1.0.
92
+
93
+ Defaults: `target='result'`, `condition='atLeast 0.7'`, `provider='openai'`, `model='gpt-4'`.
94
+
95
+ ```ts
96
+ // Evaluate the last LLM result with your own prompt; default condition atLeast 0.7
97
+ expect(ctx.trace).toEvaluateOutputMetric({
98
+ evaluationPrompt: 'Rate how well this answers the user question.',
99
+ })
100
+
101
+ // Check a specific step (3rd LLM prompt), target the prompt text, require >= 0.8 via Claude
102
+ expect(ctx.trace).toEvaluateOutputMetric({
103
+ evaluationPrompt: 'Score coherence of this prompt between 0 and 1.',
104
+ target: 'prompt',
105
+ nth: 3,
106
+ condition: { atLeast: 0.8 },
107
+ provider: 'claude',
108
+ model: 'claude-3-opus-20240229',
109
+ })
110
+
111
+ // Custom comparator: score must be < 0.3
112
+ expect(ctx.trace).toEvaluateOutputMetric({
113
+ evaluationPrompt: 'Rate hallucination risk (0=none, 1=high).',
114
+ condition: { lessThan: 0.3 },
115
+ })
116
+ ```
117
+
118
+ ### Configuration Options
119
+
120
+ - `evaluationPrompt` (required): your scoring instructions; model is asked to return only a number between 0 and 1.
121
+ - `target`: `'result'` (default) or `'prompt'`. Evaluates that text only.
122
+ - `index` / `nth`: pick which LLM step to score (0-based or 1-based). Defaults to the last LLM step.
123
+ - `condition`: one of `greaterThan`, `lessThan`, `atLeast`, `atMost`, `equals`; default is `{ atLeast: 0.7 }`.
124
+ - `provider` / `model` / `sdk` / `apiKey` / `baseURL`: same shape as `toMatchSemanticOutput`.
125
+
126
+ ---
127
+
128
+ ## `toHaveCustomStep(config?)`
129
+
130
+ Assert a recorded custom step (RAG/code/fixed/custom) matches filters.
131
+
132
+ ```ts
133
+ expect(ctx.trace).toHaveCustomStep({ kind: 'rag', name: 'pokemon-search' })
134
+ expect(ctx.trace).toHaveCustomStep({ tag: 'sort:asc' })
135
+ expect(ctx.trace).toHaveCustomStep({ contains: 'pikachu' })
136
+ expect(ctx.trace).toHaveCustomStep({ resultContains: '25' })
137
+ expect(ctx.trace).toHaveCustomStep({ kind: 'rag', minTimes: 1, maxTimes: 2 })
138
+ ```
139
+
140
+ ---
141
+
142
+ ## `toHavePromptWhere(config)`
143
+
144
+ Filter prompts, then assert additional constraints. Example: "all prompts containing A must also contain B".
145
+
146
+ ```ts
147
+ // Prompts that contain "order" must also contain "confirmed"
148
+ expect(ctx.trace).toHavePromptWhere({
149
+ filterContains: 'order',
150
+ requireContains: 'confirmed',
151
+ })
152
+
153
+ // Prompts containing "retry" must NOT contain "cancel"
154
+ expect(ctx.trace).toHavePromptWhere({
155
+ filterContains: 'retry',
156
+ requireNotContains: 'cancel',
157
+ })
158
+
159
+ // And control counts on the filtered subset
160
+ expect(ctx.trace).toHavePromptWhere({
161
+ filterContains: 'order',
162
+ requireContains: 'confirmed',
163
+ minTimes: 1,
164
+ maxTimes: 3,
165
+ })
166
+
167
+ // Check a specific prompt position (1-based nth or 0-based index)
168
+ expect(ctx.trace).toHavePromptWhere({
169
+ filterContains: 'order',
170
+ requireContains: 'confirmed',
171
+ nth: 3, // the 3rd prompt among those containing "order"
172
+ })
173
+ ```
@@ -0,0 +1,192 @@
1
+ # Observability SDK Contract
2
+
3
+ This document describes the event types the SDK sends and the portal (remote rerun queue) contract between the SDK and the backend.
4
+
5
+ ---
6
+
7
+ ## SDK Event Types
8
+
9
+ ### Events the SDK Sends
10
+
11
+ | `type` | `name` | When | Key fields |
12
+ |--------|--------|------|------------|
13
+ | `ai` | Model name (e.g. `gpt-4o`) | Every `wrapAI` call | `input`, `output`, `usage`, `durationMs`, `streamed` |
14
+ | `tool` | Tool name (e.g. `searchDB`) | Every `wrapTool` call | `input`, `output`, `durationMs`, `streamed` |
15
+ | `side_effect` | `__heartbeat__` | Every 30s (configurable) | `input.sessionId`, `output.uptime` |
16
+ | `side_effect` | `__session_end__` | On `shutdownObservability()` | `input.sessionId`, `output.uptime` |
17
+
18
+ ### Special Events (do not display in trace UI)
19
+
20
+ - `__heartbeat__` — update session liveness, do not store as event
21
+ - `__session_end__` — mark session as ended, do not store as event
22
+
23
+ ### Streamed Events
24
+
25
+ When `streamed === true`:
26
+ - `output` is `null`
27
+ - `streamRaw` contains the full buffered text of the stream
28
+ - Display `streamRaw` as the output in the UI
29
+
30
+ ### Error Events
31
+
32
+ When a tool or AI call throws:
33
+ - `output` is `{ "error": "Error message string" }`
34
+ - `durationMs` reflects time until failure
35
+ - Display with error styling in the UI
36
+
37
+ ---
38
+
39
+ ## Portal (Remote Rerun Queue) Contract
40
+
41
+ The SDK's `elasticdash portal` command starts an HTTP server that the backend can push rerun tasks to. The backend also needs endpoints to receive results.
42
+
43
+ ### SDK Portal Endpoints (hosted on user's machine, default port 4574)
44
+
45
+ These endpoints are served by the SDK. The backend calls them.
46
+
47
+ #### `POST /api/portal/tasks` — Push a single rerun task
48
+
49
+ **Request:**
50
+ ```json
51
+ {
52
+ "taskId": "task-uuid-from-backend",
53
+ "type": "tool",
54
+ "name": "searchDB",
55
+ "input": { "query": "pikachu" },
56
+ "metadata": { "testGroupId": 42, "expectationIds": [1, 2, 3] }
57
+ }
58
+ ```
59
+
60
+ For AI tasks:
61
+ ```json
62
+ {
63
+ "taskId": "task-uuid-from-backend",
64
+ "type": "ai",
65
+ "name": "gpt-4o",
66
+ "input": { "messages": [{ "role": "user", "content": "Hello" }] },
67
+ "model": "gpt-4o",
68
+ "provider": "openai",
69
+ "modelParameters": { "temperature": 0.7, "max_tokens": 512 },
70
+ "metadata": { "testGroupId": 42 }
71
+ }
72
+ ```
73
+
74
+ **Response:** `202 Accepted`
75
+ ```json
76
+ { "ok": true, "taskId": "task-uuid-from-backend", "position": 3 }
77
+ ```
78
+
79
+ **Auth:** `Authorization: Bearer <api_key>` (validated if portal was started with `--api-key`)
80
+
81
+ #### `POST /api/portal/tasks/batch` — Push multiple tasks
82
+
83
+ **Request:**
84
+ ```json
85
+ { "tasks": [ /* PortalTask[] */ ] }
86
+ ```
87
+
88
+ **Response:** `202 Accepted`
89
+ ```json
90
+ { "ok": true, "tasks": [{ "taskId": "...", "position": 1 }, { "taskId": "...", "position": 2 }] }
91
+ ```
92
+
93
+ #### `GET /api/portal/status` — Health check
94
+
95
+ **Response:**
96
+ ```json
97
+ {
98
+ "ok": true,
99
+ "queueLength": 5,
100
+ "processing": "task-uuid-123",
101
+ "completed": 12,
102
+ "failed": 1
103
+ }
104
+ ```
105
+
106
+ #### `DELETE /api/portal/tasks/:taskId` — Cancel a pending task
107
+
108
+ **Response:** `200` if removed, `404` if not found or already processing.
109
+
110
+ ---
111
+
112
+ ### Backend Endpoints (needed for portal to work)
113
+
114
+ These endpoints must be implemented on the backend. The SDK calls them.
115
+
116
+ #### `POST /api/portal/register` — Portal registration
117
+
118
+ Called by the SDK when `elasticdash portal` starts.
119
+
120
+ **Request:**
121
+ ```json
122
+ {
123
+ "portalUrl": "http://localhost:4574"
124
+ }
125
+ ```
126
+
127
+ **Auth:** `Authorization: Bearer <api_key>`
128
+
129
+ **Response:** `200 OK`
130
+ ```json
131
+ { "ok": true }
132
+ ```
133
+
134
+ The backend should store this portal URL and use it to push tasks. The registration should be scoped to the project resolved from the API key.
135
+
136
+ #### `POST /api/portal/results/:taskId` — Receive task result
137
+
138
+ Called by the SDK after each task completes (success or failure).
139
+
140
+ **Request:**
141
+ ```json
142
+ {
143
+ "taskId": "task-uuid-from-backend",
144
+ "ok": true,
145
+ "output": "The search returned 3 results for pikachu...",
146
+ "durationMs": 245,
147
+ "usage": {
148
+ "inputTokens": 150,
149
+ "outputTokens": 45,
150
+ "totalTokens": 195
151
+ },
152
+ "metadata": { "testGroupId": 42, "expectationIds": [1, 2, 3] }
153
+ }
154
+ ```
155
+
156
+ For failed tasks:
157
+ ```json
158
+ {
159
+ "taskId": "task-uuid-from-backend",
160
+ "ok": false,
161
+ "output": null,
162
+ "error": "Tool not found: \"searchDB\". Available tools: fetchData, sendEmail",
163
+ "durationMs": 0,
164
+ "metadata": { "testGroupId": 42 }
165
+ }
166
+ ```
167
+
168
+ **Auth:** `Authorization: Bearer <api_key>`
169
+
170
+ **Response:** `200 OK`
171
+ ```json
172
+ { "ok": true }
173
+ ```
174
+
175
+ ---
176
+
177
+ ### Error Results Reference
178
+
179
+ The SDK sends these error patterns:
180
+
181
+ | Error pattern | Meaning |
182
+ |--------------|---------|
183
+ | `Tool not found: "<name>". Available tools: ...` | Tool doesn't exist in `ed_tools.ts` |
184
+ | `Cannot find ed_tools.ts/js in workspace root.` | No tools module in the project |
185
+ | `Unsupported AI provider: "<name>"` | Unknown provider string |
186
+ | `Missing API key for provider "<name>". Expected environment variable: <VAR>` | LLM API key not configured |
187
+ | `AI task input is empty; cannot execute.` | No prompt could be extracted from input |
188
+ | `AI execution failed: <message>` | LLM API call failed (rate limit, network, invalid model) |
189
+ | `Tool subprocess produced no output.` | Subprocess exited without result |
190
+ | `Failed to spawn tool subprocess: <message>` | Could not start subprocess |
191
+ | `Missing tool name on task.` | Task had no `name` field |
192
+ | `Unknown task type: <type>` | Task type was neither `tool` nor `ai` |
@@ -0,0 +1,195 @@
1
+ # Observability Mode
2
+
3
+ Observability mode turns the ElasticDash SDK into an always-on tracing instrument. When enabled, every `wrapTool` and `wrapAI` call automatically records and streams trace events to your ElasticDash backend — no test runner required.
4
+
5
+ ## Quick Start
6
+
7
+ ### Option 1: Programmatic (recommended)
8
+
9
+ Add a single call at your app's entry point:
10
+
11
+ ```typescript
12
+ // instrumentation.ts (Next.js) or server entry point
13
+ import { initObservability } from 'elasticdash-sdk/http'
14
+
15
+ const obs = initObservability({
16
+ serverUrl: 'https://server.elasticdash.com',
17
+ })
18
+
19
+ // On shutdown (optional — auto-registered on process exit)
20
+ // await obs.shutdown()
21
+ ```
22
+
23
+ ### Option 2: Environment Variables Only
24
+
25
+ If your app already uses `wrapTool` / `wrapAI`, just set the env vars:
26
+
27
+ ```bash
28
+ ELASTICDASH_API_URL=https://server.elasticdash.com \
29
+ ELASTICDASH_API_KEY=ed_key_xxx \
30
+ node server.js
31
+ ```
32
+
33
+ ### Option 3: CLI
34
+
35
+ ```bash
36
+ elasticdash observe --server https://server.elasticdash.com
37
+ ```
38
+
39
+ ## Configuration
40
+
41
+ ### `initObservability(options?)`
42
+
43
+ | Option | Env Variable | Default | Description |
44
+ |--------|-------------|---------|-------------|
45
+ | `serverUrl` | `ELASTICDASH_API_URL` | *required* | ElasticDash backend URL |
46
+ | `apiKey` | `ELASTICDASH_API_KEY` | — | Project authentication token |
47
+ | `sessionId` | `ELASTICDASH_SESSION_ID` | auto-generated UUID | Session identifier |
48
+ | `batchIntervalMs` | — | `2000` | How often to flush events (ms) |
49
+ | `maxBatchSize` | — | `50` | Max events per batch before auto-flush |
50
+ | `heartbeatIntervalMs` | — | `30000` | Heartbeat interval (ms) |
51
+ | `sampleRate` | — | `1.0` | Fraction of events to send (0.0–1.0) |
52
+ | `redactKeys` | — | `[]` | Object keys to redact from input/output |
53
+
54
+ ### Return Value
55
+
56
+ ```typescript
57
+ interface ObservabilityHandle {
58
+ sessionId: string // The active session ID
59
+ shutdown: () => Promise<void> // Graceful shutdown
60
+ }
61
+ ```
62
+
63
+ ## Grouping Events by Workflow with `startTrace()`
64
+
65
+ By default, the SDK discovers workflow names from `ed_workflows.ts`. If exactly one workflow is exported, its name is used as the traceId prefix automatically (e.g. `chatStreamHandler::1712851200000::a1b2c3d4`). If multiple workflows are exported, the traceId defaults to `unknown-workflow` until you call `startTrace()`.
66
+
67
+ To explicitly group events under a specific workflow, call `startTrace(workflowName)` at the start of each request handler:
68
+
69
+ ```typescript
70
+ import { startTrace } from 'elasticdash-sdk/http'
71
+
72
+ // In your route handler, before any wrapTool/wrapAI calls:
73
+ startTrace('chatStreamHandler')
74
+ ```
75
+
76
+ This sets the traceId to `chatStreamHandler::1712851200000::a1b2c3d4`, so all subsequent `wrapTool` / `wrapAI` / `wrapDB` / fetch calls in that request are grouped under the `chatStreamHandler` workflow in the dashboard.
77
+
78
+ **Important:** Call `startTrace()` before any tool/AI calls execute. If you're using a streaming `ReadableStream`, place it inside the `start()` callback:
79
+
80
+ ```typescript
81
+ const stream = new ReadableStream({
82
+ async start(controller) {
83
+ startTrace('chatStreamHandler')
84
+ // ... workflow logic with wrapTool/wrapAI calls ...
85
+ },
86
+ })
87
+ ```
88
+
89
+ ### Alternative: `wrapWorkflow()`
90
+
91
+ If you control the workflow function directly, you can use `wrapWorkflow()` instead. It calls `startTrace()` automatically before each invocation:
92
+
93
+ ```typescript
94
+ import { wrapWorkflow } from 'elasticdash-sdk/http'
95
+
96
+ export const chatStreamHandler = wrapWorkflow('chatStreamHandler', async (input) => {
97
+ // All tool/AI calls here are automatically grouped under 'chatStreamHandler'
98
+ const result = await fetchUser(input.userId)
99
+ return generateReply(result)
100
+ })
101
+ ```
102
+
103
+ ## How It Works
104
+
105
+ 1. **`initObservability()`** creates an `ObservabilityContext` and installs the AI interceptor
106
+ 2. Every `wrapTool(name, fn)` and `wrapAI(model, fn)` call checks for this context
107
+ 3. When active, the wrapper executes the real function, captures timing/input/output, and enqueues the event
108
+ 4. Events are batched and flushed to `POST /api/observability/events` on the backend
109
+ 5. A heartbeat event is sent every 30 seconds (configurable) so the backend knows the service is alive
110
+ 6. On process exit, remaining events are flushed and a `session_end` event is sent
111
+
112
+ ### Event Flow
113
+
114
+ ```
115
+ wrapTool("searchDB", fn) called
116
+ → fn(...args) executes normally
117
+ → WorkflowEvent created: { type: 'tool', name: 'searchDB', input, output, durationMs }
118
+ → pushTelemetryEvent(event)
119
+ → TelemetryBatcher.enqueue(event)
120
+ → Batch flushed every 2s → POST /api/observability/events
121
+ ```
122
+
123
+ ## Sampling & Redaction
124
+
125
+ ### Sampling
126
+
127
+ For high-throughput services, use `sampleRate` to reduce event volume:
128
+
129
+ ```typescript
130
+ initObservability({
131
+ serverUrl: 'https://server.elasticdash.com',
132
+ sampleRate: 0.1, // Send only 10% of events
133
+ })
134
+ ```
135
+
136
+ ### Redaction
137
+
138
+ Strip sensitive fields from input/output before sending:
139
+
140
+ ```typescript
141
+ initObservability({
142
+ serverUrl: 'https://server.elasticdash.com',
143
+ redactKeys: ['apiKey', 'password', 'ssn', 'credit_card'],
144
+ })
145
+ ```
146
+
147
+ This deep-clones and replaces matching keys (case-insensitive) with `"[REDACTED]"` before serialization.
148
+
149
+ ## Batching & Reliability
150
+
151
+ Events are not sent individually — they are buffered and flushed in batches:
152
+
153
+ - **Interval flush**: every `batchIntervalMs` (default 2 seconds)
154
+ - **Size flush**: when the buffer reaches `maxBatchSize` (default 50 events)
155
+ - **Exit flush**: on `beforeExit`, `SIGTERM`, or `SIGINT`
156
+
157
+ Failed flushes are retried with exponential backoff (1s, 2s, 4s) up to 3 times. After max retries, events are dropped to prevent memory leaks.
158
+
159
+ ## Graceful Shutdown
160
+
161
+ Shutdown happens automatically on process exit signals. For manual control (e.g., serverless functions):
162
+
163
+ ```typescript
164
+ import { shutdownObservability } from 'elasticdash-sdk/http'
165
+
166
+ // In your cleanup handler
167
+ await shutdownObservability()
168
+ ```
169
+
170
+ This flushes all buffered events, sends a `session_end` marker, and clears the context.
171
+
172
+ ## Debug Logging
173
+
174
+ To see SDK internal logs (telemetry push status, batch flush counts, etc.), set:
175
+
176
+ ```bash
177
+ ELASTICDASH_DEBUG=1 node server.js
178
+ ```
179
+
180
+ All internal `console.log` calls are gated behind this flag and produce no output by default.
181
+
182
+ ## Comparison with Test Mode
183
+
184
+ | Feature | Test Mode | Observability Mode |
185
+ |---------|-----------|-------------------|
186
+ | Requires test runner | Yes | No |
187
+ | Mocking support | Yes | No |
188
+ | Step replay/freezing | Yes | No |
189
+ | Event delivery | Fire-and-forget per event | Batched with retry |
190
+ | Sampling | No | Yes |
191
+ | Redaction | No | Yes |
192
+ | Heartbeat | No | Yes |
193
+ | Graceful shutdown | No | Yes |
194
+
195
+ Both modes use the same `wrapTool` / `wrapAI` wrappers — the SDK detects which context is active and routes events accordingly.