elasticdash-sdk 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +775 -0
  3. package/dist/browser-ui.d.ts +43 -0
  4. package/dist/browser-ui.d.ts.map +1 -0
  5. package/dist/browser-ui.js +246 -0
  6. package/dist/browser-ui.js.map +1 -0
  7. package/dist/capture/event.d.ts +33 -0
  8. package/dist/capture/event.d.ts.map +1 -0
  9. package/dist/capture/event.js +2 -0
  10. package/dist/capture/event.js.map +1 -0
  11. package/dist/capture/index.d.ts +4 -0
  12. package/dist/capture/index.d.ts.map +1 -0
  13. package/dist/capture/index.js +4 -0
  14. package/dist/capture/index.js.map +1 -0
  15. package/dist/capture/recorder.d.ts +24 -0
  16. package/dist/capture/recorder.d.ts.map +1 -0
  17. package/dist/capture/recorder.js +46 -0
  18. package/dist/capture/recorder.js.map +1 -0
  19. package/dist/capture/replay.d.ts +20 -0
  20. package/dist/capture/replay.d.ts.map +1 -0
  21. package/dist/capture/replay.js +47 -0
  22. package/dist/capture/replay.js.map +1 -0
  23. package/dist/ci/api-client.d.ts +38 -0
  24. package/dist/ci/api-client.d.ts.map +1 -0
  25. package/dist/ci/api-client.js +96 -0
  26. package/dist/ci/api-client.js.map +1 -0
  27. package/dist/ci/benchmark.d.ts +33 -0
  28. package/dist/ci/benchmark.d.ts.map +1 -0
  29. package/dist/ci/benchmark.js +213 -0
  30. package/dist/ci/benchmark.js.map +1 -0
  31. package/dist/ci/ed-runner.d.ts +48 -0
  32. package/dist/ci/ed-runner.d.ts.map +1 -0
  33. package/dist/ci/ed-runner.js +260 -0
  34. package/dist/ci/ed-runner.js.map +1 -0
  35. package/dist/ci/executor.d.ts +13 -0
  36. package/dist/ci/executor.d.ts.map +1 -0
  37. package/dist/ci/executor.js +542 -0
  38. package/dist/ci/executor.js.map +1 -0
  39. package/dist/ci/git-info.d.ts +17 -0
  40. package/dist/ci/git-info.d.ts.map +1 -0
  41. package/dist/ci/git-info.js +102 -0
  42. package/dist/ci/git-info.js.map +1 -0
  43. package/dist/ci/index.d.ts +6 -0
  44. package/dist/ci/index.d.ts.map +1 -0
  45. package/dist/ci/index.js +4 -0
  46. package/dist/ci/index.js.map +1 -0
  47. package/dist/ci/measurement.d.ts +9 -0
  48. package/dist/ci/measurement.d.ts.map +1 -0
  49. package/dist/ci/measurement.js +15 -0
  50. package/dist/ci/measurement.js.map +1 -0
  51. package/dist/ci/replay.d.ts +31 -0
  52. package/dist/ci/replay.d.ts.map +1 -0
  53. package/dist/ci/replay.js +96 -0
  54. package/dist/ci/replay.js.map +1 -0
  55. package/dist/ci/reporters/default.d.ts +8 -0
  56. package/dist/ci/reporters/default.d.ts.map +1 -0
  57. package/dist/ci/reporters/default.js +46 -0
  58. package/dist/ci/reporters/default.js.map +1 -0
  59. package/dist/ci/reporters/index.d.ts +8 -0
  60. package/dist/ci/reporters/index.d.ts.map +1 -0
  61. package/dist/ci/reporters/index.js +14 -0
  62. package/dist/ci/reporters/index.js.map +1 -0
  63. package/dist/ci/reporters/json.d.ts +8 -0
  64. package/dist/ci/reporters/json.d.ts.map +1 -0
  65. package/dist/ci/reporters/json.js +14 -0
  66. package/dist/ci/reporters/json.js.map +1 -0
  67. package/dist/ci/reporters/junit.d.ts +8 -0
  68. package/dist/ci/reporters/junit.d.ts.map +1 -0
  69. package/dist/ci/reporters/junit.js +48 -0
  70. package/dist/ci/reporters/junit.js.map +1 -0
  71. package/dist/ci/runner.d.ts +3 -0
  72. package/dist/ci/runner.d.ts.map +1 -0
  73. package/dist/ci/runner.js +187 -0
  74. package/dist/ci/runner.js.map +1 -0
  75. package/dist/ci/test-discovery.d.ts +5 -0
  76. package/dist/ci/test-discovery.d.ts.map +1 -0
  77. package/dist/ci/test-discovery.js +11 -0
  78. package/dist/ci/test-discovery.js.map +1 -0
  79. package/dist/ci/test-loader.d.ts +19 -0
  80. package/dist/ci/test-loader.d.ts.map +1 -0
  81. package/dist/ci/test-loader.js +149 -0
  82. package/dist/ci/test-loader.js.map +1 -0
  83. package/dist/ci/test-registry.d.ts +42 -0
  84. package/dist/ci/test-registry.d.ts.map +1 -0
  85. package/dist/ci/test-registry.js +18 -0
  86. package/dist/ci/test-registry.js.map +1 -0
  87. package/dist/ci/trace-schema.d.ts +30 -0
  88. package/dist/ci/trace-schema.d.ts.map +1 -0
  89. package/dist/ci/trace-schema.js +66 -0
  90. package/dist/ci/trace-schema.js.map +1 -0
  91. package/dist/ci/trace-writer.d.ts +16 -0
  92. package/dist/ci/trace-writer.d.ts.map +1 -0
  93. package/dist/ci/trace-writer.js +108 -0
  94. package/dist/ci/trace-writer.js.map +1 -0
  95. package/dist/ci/types.d.ts +108 -0
  96. package/dist/ci/types.d.ts.map +1 -0
  97. package/dist/ci/types.js +3 -0
  98. package/dist/ci/types.js.map +1 -0
  99. package/dist/ci/upload-client.d.ts +74 -0
  100. package/dist/ci/upload-client.d.ts.map +1 -0
  101. package/dist/ci/upload-client.js +195 -0
  102. package/dist/ci/upload-client.js.map +1 -0
  103. package/dist/cli.d.ts +3 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +716 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/core/agent-state.d.ts +47 -0
  108. package/dist/core/agent-state.d.ts.map +1 -0
  109. package/dist/core/agent-state.js +137 -0
  110. package/dist/core/agent-state.js.map +1 -0
  111. package/dist/core/judge-utils.d.ts +22 -0
  112. package/dist/core/judge-utils.d.ts.map +1 -0
  113. package/dist/core/judge-utils.js +211 -0
  114. package/dist/core/judge-utils.js.map +1 -0
  115. package/dist/core/registry.d.ts +28 -0
  116. package/dist/core/registry.d.ts.map +1 -0
  117. package/dist/core/registry.js +52 -0
  118. package/dist/core/registry.js.map +1 -0
  119. package/dist/dashboard-server.d.ts +65 -0
  120. package/dist/dashboard-server.d.ts.map +1 -0
  121. package/dist/dashboard-server.js +3940 -0
  122. package/dist/dashboard-server.js.map +1 -0
  123. package/dist/execution/tool-runner.d.ts +26 -0
  124. package/dist/execution/tool-runner.d.ts.map +1 -0
  125. package/dist/execution/tool-runner.js +316 -0
  126. package/dist/execution/tool-runner.js.map +1 -0
  127. package/dist/html/dashboard.html +2218 -0
  128. package/dist/http.d.ts +14 -0
  129. package/dist/http.d.ts.map +1 -0
  130. package/dist/http.js +13 -0
  131. package/dist/http.js.map +1 -0
  132. package/dist/index.cjs +8102 -0
  133. package/dist/index.d.ts +61 -0
  134. package/dist/index.d.ts.map +1 -0
  135. package/dist/index.js +67 -0
  136. package/dist/index.js.map +1 -0
  137. package/dist/interceptors/ai-interceptor.d.ts +26 -0
  138. package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
  139. package/dist/interceptors/ai-interceptor.js +756 -0
  140. package/dist/interceptors/ai-interceptor.js.map +1 -0
  141. package/dist/interceptors/db-auto.d.ts +8 -0
  142. package/dist/interceptors/db-auto.d.ts.map +1 -0
  143. package/dist/interceptors/db-auto.js +217 -0
  144. package/dist/interceptors/db-auto.js.map +1 -0
  145. package/dist/interceptors/db.d.ts +23 -0
  146. package/dist/interceptors/db.d.ts.map +1 -0
  147. package/dist/interceptors/db.js +137 -0
  148. package/dist/interceptors/db.js.map +1 -0
  149. package/dist/interceptors/http.d.ts +28 -0
  150. package/dist/interceptors/http.d.ts.map +1 -0
  151. package/dist/interceptors/http.js +356 -0
  152. package/dist/interceptors/http.js.map +1 -0
  153. package/dist/interceptors/side-effects.d.ts +7 -0
  154. package/dist/interceptors/side-effects.d.ts.map +1 -0
  155. package/dist/interceptors/side-effects.js +72 -0
  156. package/dist/interceptors/side-effects.js.map +1 -0
  157. package/dist/interceptors/telemetry-push.d.ts +142 -0
  158. package/dist/interceptors/telemetry-push.d.ts.map +1 -0
  159. package/dist/interceptors/telemetry-push.js +463 -0
  160. package/dist/interceptors/telemetry-push.js.map +1 -0
  161. package/dist/interceptors/tool.d.ts +2 -0
  162. package/dist/interceptors/tool.d.ts.map +1 -0
  163. package/dist/interceptors/tool.js +274 -0
  164. package/dist/interceptors/tool.js.map +1 -0
  165. package/dist/interceptors/workflow-ai.d.ts +5 -0
  166. package/dist/interceptors/workflow-ai.d.ts.map +1 -0
  167. package/dist/interceptors/workflow-ai.js +382 -0
  168. package/dist/interceptors/workflow-ai.js.map +1 -0
  169. package/dist/internals/conditional-recorder.d.ts +21 -0
  170. package/dist/internals/conditional-recorder.d.ts.map +1 -0
  171. package/dist/internals/conditional-recorder.js +54 -0
  172. package/dist/internals/conditional-recorder.js.map +1 -0
  173. package/dist/internals/mock-resolver.d.ts +146 -0
  174. package/dist/internals/mock-resolver.d.ts.map +1 -0
  175. package/dist/internals/mock-resolver.js +427 -0
  176. package/dist/internals/mock-resolver.js.map +1 -0
  177. package/dist/matchers/index.d.ts +96 -0
  178. package/dist/matchers/index.d.ts.map +1 -0
  179. package/dist/matchers/index.js +668 -0
  180. package/dist/matchers/index.js.map +1 -0
  181. package/dist/observability.d.ts +82 -0
  182. package/dist/observability.d.ts.map +1 -0
  183. package/dist/observability.js +471 -0
  184. package/dist/observability.js.map +1 -0
  185. package/dist/portal-executor.d.ts +30 -0
  186. package/dist/portal-executor.d.ts.map +1 -0
  187. package/dist/portal-executor.js +324 -0
  188. package/dist/portal-executor.js.map +1 -0
  189. package/dist/portal-server.d.ts +3 -0
  190. package/dist/portal-server.d.ts.map +1 -0
  191. package/dist/portal-server.js +279 -0
  192. package/dist/portal-server.js.map +1 -0
  193. package/dist/proxy/llm-capture.d.ts +14 -0
  194. package/dist/proxy/llm-capture.d.ts.map +1 -0
  195. package/dist/proxy/llm-capture.js +264 -0
  196. package/dist/proxy/llm-capture.js.map +1 -0
  197. package/dist/reporter.d.ts +3 -0
  198. package/dist/reporter.d.ts.map +1 -0
  199. package/dist/reporter.js +72 -0
  200. package/dist/reporter.js.map +1 -0
  201. package/dist/runWorkflowSubprocess.d.ts +14 -0
  202. package/dist/runWorkflowSubprocess.d.ts.map +1 -0
  203. package/dist/runWorkflowSubprocess.js +66 -0
  204. package/dist/runWorkflowSubprocess.js.map +1 -0
  205. package/dist/runner.d.ts +16 -0
  206. package/dist/runner.d.ts.map +1 -0
  207. package/dist/runner.js +138 -0
  208. package/dist/runner.js.map +1 -0
  209. package/dist/socket-connector.d.ts +22 -0
  210. package/dist/socket-connector.d.ts.map +1 -0
  211. package/dist/socket-connector.js +104 -0
  212. package/dist/socket-connector.js.map +1 -0
  213. package/dist/telemetry-batcher.d.ts +56 -0
  214. package/dist/telemetry-batcher.d.ts.map +1 -0
  215. package/dist/telemetry-batcher.js +143 -0
  216. package/dist/telemetry-batcher.js.map +1 -0
  217. package/dist/test-setup.d.ts +12 -0
  218. package/dist/test-setup.d.ts.map +1 -0
  219. package/dist/test-setup.js +13 -0
  220. package/dist/test-setup.js.map +1 -0
  221. package/dist/tool-registry.d.ts +31 -0
  222. package/dist/tool-registry.d.ts.map +1 -0
  223. package/dist/tool-registry.js +73 -0
  224. package/dist/tool-registry.js.map +1 -0
  225. package/dist/tool-runner-worker.d.ts +2 -0
  226. package/dist/tool-runner-worker.d.ts.map +1 -0
  227. package/dist/tool-runner-worker.js +215 -0
  228. package/dist/tool-runner-worker.js.map +1 -0
  229. package/dist/trace-adapter/context.d.ts +72 -0
  230. package/dist/trace-adapter/context.d.ts.map +1 -0
  231. package/dist/trace-adapter/context.js +80 -0
  232. package/dist/trace-adapter/context.js.map +1 -0
  233. package/dist/tracing.d.ts +2 -0
  234. package/dist/tracing.d.ts.map +1 -0
  235. package/dist/tracing.js +59 -0
  236. package/dist/tracing.js.map +1 -0
  237. package/dist/trigger-executor.d.ts +12 -0
  238. package/dist/trigger-executor.d.ts.map +1 -0
  239. package/dist/trigger-executor.js +130 -0
  240. package/dist/trigger-executor.js.map +1 -0
  241. package/dist/types/portal.d.ts +76 -0
  242. package/dist/types/portal.d.ts.map +1 -0
  243. package/dist/types/portal.js +2 -0
  244. package/dist/types/portal.js.map +1 -0
  245. package/dist/utils/debug.d.ts +3 -0
  246. package/dist/utils/debug.d.ts.map +1 -0
  247. package/dist/utils/debug.js +8 -0
  248. package/dist/utils/debug.js.map +1 -0
  249. package/dist/utils/license-error.d.ts +23 -0
  250. package/dist/utils/license-error.d.ts.map +1 -0
  251. package/dist/utils/license-error.js +42 -0
  252. package/dist/utils/license-error.js.map +1 -0
  253. package/dist/utils/redact.d.ts +7 -0
  254. package/dist/utils/redact.d.ts.map +1 -0
  255. package/dist/utils/redact.js +26 -0
  256. package/dist/utils/redact.js.map +1 -0
  257. package/dist/workflow-runner-worker.d.ts +2 -0
  258. package/dist/workflow-runner-worker.d.ts.map +1 -0
  259. package/dist/workflow-runner-worker.js +329 -0
  260. package/dist/workflow-runner-worker.js.map +1 -0
  261. package/dist/workflow-runner.d.ts +14 -0
  262. package/dist/workflow-runner.d.ts.map +1 -0
  263. package/dist/workflow-runner.js +34 -0
  264. package/dist/workflow-runner.js.map +1 -0
  265. package/docs/agent-coding-instructions.md +138 -0
  266. package/docs/agent-integration-guide.md +564 -0
  267. package/docs/agents.md +140 -0
  268. package/docs/dashboard.md +394 -0
  269. package/docs/deno.md +69 -0
  270. package/docs/instrumentation.md +424 -0
  271. package/docs/langfuse-trace-structure.md +145 -0
  272. package/docs/matchers.md +173 -0
  273. package/docs/observability_contract.md +192 -0
  274. package/docs/observability_mode.md +195 -0
  275. package/docs/quickstart.md +621 -0
  276. package/docs/security-compliance.md +566 -0
  277. package/docs/test-writing-guidelines.md +444 -0
  278. package/docs/tools.md +165 -0
  279. package/docs/workflow-modes.md +253 -0
  280. package/package.json +76 -0
  281. package/src/browser-ui.ts +281 -0
  282. package/src/capture/event.ts +30 -0
  283. package/src/capture/index.ts +3 -0
  284. package/src/capture/recorder.ts +62 -0
  285. package/src/capture/replay.ts +55 -0
  286. package/src/ci/api-client.ts +136 -0
  287. package/src/ci/benchmark.ts +257 -0
  288. package/src/ci/ed-runner.ts +351 -0
  289. package/src/ci/executor.ts +671 -0
  290. package/src/ci/git-info.ts +127 -0
  291. package/src/ci/index.ts +5 -0
  292. package/src/ci/measurement.ts +25 -0
  293. package/src/ci/replay.ts +127 -0
  294. package/src/ci/reporters/default.ts +50 -0
  295. package/src/ci/reporters/index.ts +21 -0
  296. package/src/ci/reporters/json.ts +18 -0
  297. package/src/ci/reporters/junit.ts +61 -0
  298. package/src/ci/runner.ts +208 -0
  299. package/src/ci/test-discovery.ts +16 -0
  300. package/src/ci/test-loader.ts +187 -0
  301. package/src/ci/test-registry.ts +62 -0
  302. package/src/ci/trace-schema.ts +96 -0
  303. package/src/ci/trace-writer.ts +107 -0
  304. package/src/ci/types.ts +115 -0
  305. package/src/ci/upload-client.ts +300 -0
  306. package/src/cli.ts +811 -0
  307. package/src/core/agent-state.ts +162 -0
  308. package/src/core/judge-utils.ts +232 -0
  309. package/src/core/registry.ts +92 -0
  310. package/src/dashboard-server.ts +2047 -0
  311. package/src/execution/tool-runner.ts +352 -0
  312. package/src/html/dashboard.html +2218 -0
  313. package/src/http.ts +13 -0
  314. package/src/index.ts +138 -0
  315. package/src/interceptors/ai-interceptor.ts +798 -0
  316. package/src/interceptors/db-auto.ts +243 -0
  317. package/src/interceptors/db.ts +156 -0
  318. package/src/interceptors/http.ts +393 -0
  319. package/src/interceptors/side-effects.ts +83 -0
  320. package/src/interceptors/telemetry-push.ts +537 -0
  321. package/src/interceptors/tool.ts +287 -0
  322. package/src/interceptors/workflow-ai.ts +419 -0
  323. package/src/internals/conditional-recorder.ts +63 -0
  324. package/src/internals/mock-resolver.ts +492 -0
  325. package/src/matchers/index.ts +824 -0
  326. package/src/observability.ts +501 -0
  327. package/src/portal-executor.ts +355 -0
  328. package/src/portal-server.ts +304 -0
  329. package/src/proxy/llm-capture.ts +301 -0
  330. package/src/reporter.ts +81 -0
  331. package/src/runWorkflowSubprocess.ts +74 -0
  332. package/src/runner.ts +178 -0
  333. package/src/socket-connector.ts +117 -0
  334. package/src/telemetry-batcher.ts +191 -0
  335. package/src/test-setup.ts +16 -0
  336. package/src/tool-registry.ts +94 -0
  337. package/src/tool-runner-worker.ts +244 -0
  338. package/src/trace-adapter/context.ts +156 -0
  339. package/src/tracing.ts +62 -0
  340. package/src/trigger-executor.ts +171 -0
  341. package/src/types/agent.d.ts +63 -0
  342. package/src/types/expect.d.ts +81 -0
  343. package/src/types/modules.d.ts +2 -0
  344. package/src/types/portal.ts +69 -0
  345. package/src/utils/debug.ts +8 -0
  346. package/src/utils/license-error.ts +43 -0
  347. package/src/utils/redact.ts +25 -0
  348. package/src/workflow-runner-worker.ts +386 -0
  349. package/src/workflow-runner.ts +58 -0
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Agent state serialization, deserialization, and utility functions.
3
+ *
4
+ * Enables capturing agent plan state during execution and resuming
5
+ * agents from any task in the plan without re-executing completed steps.
6
+ */
7
+
8
+ import type { AgentPlan, AgentState, AgentTask } from '../types/agent.js'
9
+ import type { WorkflowEvent } from '../capture/event.js'
10
+
11
+ /**
12
+ * Serializes an agent plan and its captured trace events into an AgentState
13
+ * that can be persisted and later used for resumption.
14
+ *
15
+ * The resumeFromTaskIndex is automatically determined as the index of the
16
+ * first non-completed task. If all tasks are completed, it equals tasks.length.
17
+ */
18
+ export function serializeAgentState(plan: AgentPlan, trace: WorkflowEvent[]): AgentState {
19
+ const resumeFromTaskIndex = plan.tasks.findIndex(
20
+ (t) => t.status !== 'completed',
21
+ )
22
+ return {
23
+ plan: JSON.parse(JSON.stringify(plan)) as AgentPlan,
24
+ trace: JSON.parse(JSON.stringify(trace)) as WorkflowEvent[],
25
+ resumeFromTaskIndex: resumeFromTaskIndex === -1 ? plan.tasks.length : resumeFromTaskIndex,
26
+ }
27
+ }
28
+
29
+ /**
30
+ * Validates and hydrates an AgentState from a parsed JSON object.
31
+ * Throws if the state is invalid or cannot be safely used for resumption.
32
+ */
33
+ export function deserializeAgentState(raw: unknown): AgentState {
34
+ if (!raw || typeof raw !== 'object') {
35
+ throw new Error('AgentState must be a non-null object')
36
+ }
37
+ const obj = raw as Record<string, unknown>
38
+
39
+ if (!obj.plan || typeof obj.plan !== 'object') {
40
+ throw new Error('AgentState.plan is required')
41
+ }
42
+ const plan = obj.plan as AgentPlan
43
+
44
+ if (!Array.isArray(plan.tasks)) {
45
+ throw new Error('AgentState.plan.tasks must be an array')
46
+ }
47
+ if (typeof plan.id !== 'string') {
48
+ throw new Error('AgentState.plan.id must be a string')
49
+ }
50
+
51
+ const trace = Array.isArray(obj.trace) ? (obj.trace as WorkflowEvent[]) : []
52
+ const resumeFromTaskIndex =
53
+ typeof obj.resumeFromTaskIndex === 'number' ? obj.resumeFromTaskIndex : 0
54
+
55
+ // Validate that all tasks before resumeFromTaskIndex have outputs
56
+ for (let i = 0; i < resumeFromTaskIndex; i++) {
57
+ const task = plan.tasks[i]
58
+ if (!task) continue
59
+ if (task.status !== 'completed') {
60
+ throw new Error(
61
+ `Task at index ${i} (id="${task.id}") has status "${task.status}" but must be "completed" before resumeFromTaskIndex=${resumeFromTaskIndex}`,
62
+ )
63
+ }
64
+ if (task.output === undefined) {
65
+ throw new Error(
66
+ `Task at index ${i} (id="${task.id}") is completed but has no output. Cannot resume safely.`,
67
+ )
68
+ }
69
+ }
70
+
71
+ return { plan, trace, resumeFromTaskIndex }
72
+ }
73
+
74
+ /**
75
+ * Extracts all completed task outputs into a flat map keyed by task ID.
76
+ * Used for resolving placeholder references in subsequent task inputs.
77
+ */
78
+ export function extractTaskOutputs(plan: AgentPlan): Record<string, unknown> {
79
+ const outputs: Record<string, unknown> = {}
80
+ for (const task of plan.tasks) {
81
+ if (task.status === 'completed' && task.output !== undefined) {
82
+ outputs[task.id] = task.output
83
+ }
84
+ }
85
+ return outputs
86
+ }
87
+
88
+ /**
89
+ * Resolves placeholder references in a task input.
90
+ *
91
+ * Placeholders use the form: `{ $ref: "task-N.output.fieldName" }`
92
+ * where "task-N" is a task ID and "fieldName" is a dot-separated path
93
+ * into that task's output.
94
+ *
95
+ * Example:
96
+ * previousOutputs = { "task-1": { userId: "abc" } }
97
+ * input = { $ref: "task-1.output.userId" }
98
+ * → returns "abc"
99
+ *
100
+ * Works recursively on nested objects and arrays.
101
+ */
102
+ export function resolveTaskInput(
103
+ input: unknown,
104
+ previousOutputs: Record<string, unknown>,
105
+ ): unknown {
106
+ if (input === null || input === undefined) return input
107
+
108
+ if (Array.isArray(input)) {
109
+ return input.map((item) => resolveTaskInput(item, previousOutputs))
110
+ }
111
+
112
+ if (typeof input === 'object') {
113
+ const obj = input as Record<string, unknown>
114
+
115
+ // Check for placeholder: { $ref: "taskId.output.path" }
116
+ if (typeof obj['$ref'] === 'string') {
117
+ return resolveRef(obj['$ref'], previousOutputs)
118
+ }
119
+
120
+ // Recursively resolve nested objects
121
+ const resolved: Record<string, unknown> = {}
122
+ for (const [k, v] of Object.entries(obj)) {
123
+ resolved[k] = resolveTaskInput(v, previousOutputs)
124
+ }
125
+ return resolved
126
+ }
127
+
128
+ return input
129
+ }
130
+
131
+ /**
132
+ * Resolves a dot-separated reference path like "task-1.output.userId"
133
+ * against the previousOutputs map.
134
+ */
135
+ function resolveRef(ref: string, previousOutputs: Record<string, unknown>): unknown {
136
+ const parts = ref.split('.')
137
+ // Expected format: <taskId>.output.<...path>
138
+ // We skip the literal "output" segment to navigate into the output object
139
+ const taskId = parts[0]
140
+ const pathParts = parts.slice(1) // may start with "output"
141
+
142
+ let current: unknown = previousOutputs[taskId]
143
+ for (const part of pathParts) {
144
+ if (part === 'output') continue // "output" is implicit — skip this keyword
145
+ if (current === null || current === undefined) return undefined
146
+ current = (current as Record<string, unknown>)[part]
147
+ }
148
+ return current
149
+ }
150
+
151
+ /**
152
+ * Clones a task and marks it as completed with the given output.
153
+ * Used internally when skipping already-completed tasks during resumption.
154
+ */
155
+ export function markTaskCompleted(task: AgentTask, output: unknown): AgentTask {
156
+ return {
157
+ ...task,
158
+ status: 'completed',
159
+ output,
160
+ completedAt: task.completedAt ?? Date.now(),
161
+ }
162
+ }
@@ -0,0 +1,232 @@
1
+ /**
2
+ * judge-utils.ts
3
+ *
4
+ * Utilities for preprocessing outputs before sending to LLM-as-a-judge evaluators.
5
+ * Addresses two problems:
6
+ * 1. Large outputs cause slow inference
7
+ * 2. LLMs miss attributes in large JSON payloads due to attention degradation
8
+ */
9
+
10
+ const DEFAULT_MAX_CHARS = 8000
11
+ const MAX_STRING_VALUE_LENGTH = 500
12
+ const MAX_ARRAY_EDGE_ITEMS = 5
13
+
14
+ /**
15
+ * Prepare an output string for LLM judge evaluation.
16
+ *
17
+ * - Small outputs (< maxChars) pass through unchanged.
18
+ * - Large JSON outputs: extracts subtrees whose keys match keywords from the judge prompt.
19
+ * - Large non-JSON outputs: truncates with head + tail and a marker in between.
20
+ *
21
+ * @param output - The raw output string to prepare.
22
+ * @param judgePrompt - The judge/evaluation prompt, used to identify relevant JSON keys.
23
+ * @param maxChars - Maximum character budget for the prepared output. Default 8000.
24
+ * @returns The prepared output string, possibly trimmed.
25
+ */
26
+ export function prepareOutputForJudge(
27
+ output: string,
28
+ judgePrompt: string,
29
+ maxChars: number = DEFAULT_MAX_CHARS,
30
+ ): string {
31
+ if (!output) return output
32
+
33
+ // Always attempt JSON-aware processing (value truncation + key extraction).
34
+ // Even outputs under maxChars can contain arrays with 100+ items that
35
+ // drown out the fields the judge actually needs to evaluate.
36
+ const jsonResult = tryJsonExtract(output, judgePrompt, maxChars)
37
+ if (jsonResult !== null) return jsonResult
38
+
39
+ // Non-JSON: only truncate if over budget
40
+ if (output.length <= maxChars) return output
41
+ return truncateHeadTail(output, maxChars)
42
+ }
43
+
44
+ /**
45
+ * Attempt to parse the output as JSON and extract only the subtrees
46
+ * whose keys are relevant to the judge prompt.
47
+ * Returns null if the output is not valid JSON.
48
+ */
49
+ function tryJsonExtract(output: string, judgePrompt: string, maxChars: number): string | null {
50
+ let parsed: unknown
51
+ try {
52
+ parsed = JSON.parse(output)
53
+ } catch {
54
+ return null
55
+ }
56
+
57
+ if (typeof parsed !== 'object' || parsed === null) return null
58
+
59
+ const keywords = extractKeywords(judgePrompt)
60
+ if (keywords.length === 0) {
61
+ // No keywords to match — truncate large values then serialize
62
+ const trimmed = truncateJsonValues(parsed)
63
+ const trimmedStr = JSON.stringify(trimmed, null, 2)
64
+ if (trimmedStr.length <= maxChars) return trimmedStr
65
+ return truncateHeadTail(trimmedStr, maxChars)
66
+ }
67
+
68
+ const extracted = extractRelevantPaths(parsed, keywords)
69
+
70
+ // If extraction found relevant content, truncate large values then serialize
71
+ if (extracted !== undefined && Object.keys(extracted as Record<string, unknown>).length > 0) {
72
+ const trimmed = truncateJsonValues(extracted)
73
+ const extractedStr = JSON.stringify(trimmed, null, 2)
74
+
75
+ if (extractedStr.length <= maxChars) {
76
+ const omittedKeys = countOmittedKeys(parsed, extracted)
77
+ if (omittedKeys > 0) {
78
+ return `${extractedStr}\n\n[Note: ${omittedKeys} irrelevant key(s) omitted from original output for brevity. Total original size: ${output.length} chars.]`
79
+ }
80
+ return extractedStr
81
+ }
82
+
83
+ // Extracted content is still too large — truncate it
84
+ return truncateHeadTail(extractedStr, maxChars)
85
+ }
86
+
87
+ // Extraction found nothing relevant — truncate values in full object
88
+ const trimmedFull = truncateJsonValues(parsed)
89
+ const trimmedFullStr = JSON.stringify(trimmedFull, null, 2)
90
+ if (trimmedFullStr.length <= maxChars) return trimmedFullStr
91
+ return truncateHeadTail(trimmedFullStr, maxChars)
92
+ }
93
+
94
+ /**
95
+ * Extract lowercase keywords from the judge prompt.
96
+ * Filters out common stop words and short tokens.
97
+ */
98
+ function extractKeywords(prompt: string): string[] {
99
+ const stopWords = new Set([
100
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
101
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
102
+ 'should', 'may', 'might', 'shall', 'can', 'need', 'must',
103
+ 'and', 'or', 'but', 'if', 'then', 'else', 'when', 'where', 'how',
104
+ 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those',
105
+ 'it', 'its', 'of', 'in', 'on', 'at', 'to', 'for', 'with', 'by',
106
+ 'from', 'as', 'into', 'about', 'between', 'through', 'after', 'before',
107
+ 'not', 'no', 'nor', 'only', 'also', 'just', 'more', 'most', 'very',
108
+ 'all', 'each', 'every', 'any', 'some', 'such', 'than', 'too',
109
+ 'output', 'evaluate', 'score', 'check', 'whether', 'contains',
110
+ 'following', 'given', 'based', 'respond', 'number', 'scale',
111
+ 'text', 'result', 'response', 'answer', 'return', 'value',
112
+ ])
113
+
114
+ const words = prompt
115
+ .toLowerCase()
116
+ .replace(/[^a-z0-9_\s-]/g, ' ')
117
+ .split(/\s+/)
118
+ .filter(w => w.length > 2 && !stopWords.has(w))
119
+
120
+ return [...new Set(words)]
121
+ }
122
+
123
+ /**
124
+ * Recursively extract object entries whose keys match any of the keywords.
125
+ * For arrays, preserves items that contain matching keys.
126
+ */
127
+ function extractRelevantPaths(obj: unknown, keywords: string[]): unknown {
128
+ if (Array.isArray(obj)) {
129
+ // For arrays: keep items that have relevant keys, limit to first few items
130
+ const relevant = obj
131
+ .map(item => extractRelevantPaths(item, keywords))
132
+ .filter(item => item !== undefined)
133
+ if (relevant.length === 0) return undefined
134
+ return relevant
135
+ }
136
+
137
+ if (typeof obj === 'object' && obj !== null) {
138
+ const result: Record<string, unknown> = {}
139
+ let hasMatch = false
140
+
141
+ for (const [key, value] of Object.entries(obj)) {
142
+ const keyLower = key.toLowerCase()
143
+ const keyMatchesDirectly = keywords.some(kw =>
144
+ keyLower.includes(kw) || kw.includes(keyLower)
145
+ )
146
+
147
+ if (keyMatchesDirectly) {
148
+ result[key] = value
149
+ hasMatch = true
150
+ } else if (typeof value === 'object' && value !== null) {
151
+ // Recurse into nested objects/arrays
152
+ const nested = extractRelevantPaths(value, keywords)
153
+ if (nested !== undefined) {
154
+ result[key] = nested
155
+ hasMatch = true
156
+ }
157
+ }
158
+ }
159
+
160
+ return hasMatch ? result : undefined
161
+ }
162
+
163
+ return undefined
164
+ }
165
+
166
+ /**
167
+ * Recursively truncate large values inside a JSON structure.
168
+ * - Long strings: keep first/last portions with a marker in between.
169
+ * - Long arrays: keep first 5 and last 5 items, skip the rest with a marker.
170
+ */
171
+ function truncateJsonValues(obj: unknown): unknown {
172
+ if (obj === null || obj === undefined) return obj
173
+
174
+ if (typeof obj === 'string') {
175
+ if (obj.length <= MAX_STRING_VALUE_LENGTH) return obj
176
+ const headLen = Math.floor(MAX_STRING_VALUE_LENGTH * 0.6)
177
+ const tailLen = MAX_STRING_VALUE_LENGTH - headLen
178
+ return `${obj.slice(0, headLen)}...[${obj.length - headLen - tailLen} chars truncated]...${obj.slice(obj.length - tailLen)}`
179
+ }
180
+
181
+ if (Array.isArray(obj)) {
182
+ if (obj.length <= MAX_ARRAY_EDGE_ITEMS * 2) {
183
+ return obj.map(item => truncateJsonValues(item))
184
+ }
185
+ const head = obj.slice(0, MAX_ARRAY_EDGE_ITEMS).map(item => truncateJsonValues(item))
186
+ const tail = obj.slice(obj.length - MAX_ARRAY_EDGE_ITEMS).map(item => truncateJsonValues(item))
187
+ const skipped = obj.length - MAX_ARRAY_EDGE_ITEMS * 2
188
+ return [...head, `[...${skipped} items skipped...]`, ...tail]
189
+ }
190
+
191
+ if (typeof obj === 'object') {
192
+ const result: Record<string, unknown> = {}
193
+ for (const [key, value] of Object.entries(obj)) {
194
+ result[key] = truncateJsonValues(value)
195
+ }
196
+ return result
197
+ }
198
+
199
+ return obj
200
+ }
201
+
202
+ /**
203
+ * Count how many top-level keys from the original object are not in the extracted object.
204
+ */
205
+ function countOmittedKeys(original: unknown, extracted: unknown): number {
206
+ if (typeof original !== 'object' || original === null) return 0
207
+ if (typeof extracted !== 'object' || extracted === null) return 0
208
+ if (Array.isArray(original)) return 0
209
+
210
+ const origKeys = Object.keys(original)
211
+ const extKeys = new Set(Object.keys(extracted as Record<string, unknown>))
212
+ return origKeys.filter(k => !extKeys.has(k)).length
213
+ }
214
+
215
+ /**
216
+ * Truncate a string by keeping the head and tail portions,
217
+ * inserting a marker in the middle.
218
+ */
219
+ function truncateHeadTail(text: string, maxChars: number): string {
220
+ if (text.length <= maxChars) return text
221
+
222
+ // Reserve space for the marker
223
+ const marker = `\n\n[...truncated ${text.length - maxChars} chars out of ${text.length} total...]\n\n`
224
+ const available = maxChars - marker.length
225
+ if (available <= 0) return text.slice(0, maxChars)
226
+
227
+ // 70% head, 30% tail — head is usually more important
228
+ const headSize = Math.floor(available * 0.7)
229
+ const tailSize = available - headSize
230
+
231
+ return text.slice(0, headSize) + marker + text.slice(text.length - tailSize)
232
+ }
@@ -0,0 +1,92 @@
1
+ import type { AITestContext } from '../trace-adapter/context.js'
2
+
3
+ export type TestFunction = (ctx: AITestContext) => Promise<void> | void
4
+
5
+ export interface TestEntry {
6
+ name: string
7
+ fn: TestFunction
8
+ }
9
+
10
+ export interface Registry {
11
+ tests: TestEntry[]
12
+ beforeAllHooks: Array<() => Promise<void> | void>
13
+ afterAllHooks: Array<() => Promise<void> | void>
14
+ beforeEachHooks: Array<() => Promise<void> | void>
15
+ afterEachHooks: Array<() => Promise<void> | void>
16
+ }
17
+
18
+ const REGISTRY_KEY = '__elasticdash_registry__'
19
+
20
+ function getGlobalRegistry(): Registry {
21
+ if (!(globalThis as any)[REGISTRY_KEY]) {
22
+ (globalThis as any)[REGISTRY_KEY] = createEmptyRegistry()
23
+ }
24
+ return (globalThis as any)[REGISTRY_KEY] as Registry
25
+ }
26
+
27
+ function createEmptyRegistry(): Registry {
28
+ return {
29
+ tests: [],
30
+ beforeAllHooks: [],
31
+ afterAllHooks: [],
32
+ beforeEachHooks: [],
33
+ afterEachHooks: [],
34
+ }
35
+ }
36
+
37
+ export function clearRegistry(): void {
38
+ (globalThis as any)[REGISTRY_KEY] = createEmptyRegistry()
39
+ console.log('[elasticdash] clearRegistry called. Registry reset.')
40
+ }
41
+
42
+ export function getRegistry(): Registry {
43
+ const registry = getGlobalRegistry()
44
+ console.log('[elasticdash] getRegistry called. Current tests:', registry.tests.map(t => t.name))
45
+ return registry
46
+ }
47
+
48
+ export function aiTest(name: string, fn: TestFunction): void {
49
+ const registry = getGlobalRegistry()
50
+ registry.tests.push({ name, fn })
51
+ console.log(`[elasticdash] Registered test: ${name}`)
52
+ }
53
+
54
+ export function beforeAll(fn: () => Promise<void> | void): void {
55
+ const registry = getGlobalRegistry()
56
+ registry.beforeAllHooks.push(fn)
57
+ }
58
+
59
+ export function afterAll(fn: () => Promise<void> | void): void {
60
+ const registry = getGlobalRegistry()
61
+ registry.afterAllHooks.push(fn)
62
+ }
63
+
64
+ export function beforeEach(fn: () => Promise<void> | void): void {
65
+ const registry = getGlobalRegistry()
66
+ registry.beforeEachHooks.push(fn)
67
+ }
68
+
69
+ export function afterEach(fn: () => Promise<void> | void): void {
70
+ const registry = getGlobalRegistry()
71
+ registry.afterEachHooks.push(fn)
72
+ }
73
+
74
+ // Expose globally so test files can use without importing
75
+ declare global {
76
+ // eslint-disable-next-line no-var
77
+ var aiTest: (name: string, fn: TestFunction) => void
78
+ // eslint-disable-next-line no-var
79
+ var beforeAll: (fn: () => Promise<void> | void) => void
80
+ // eslint-disable-next-line no-var
81
+ var afterAll: (fn: () => Promise<void> | void) => void
82
+ // eslint-disable-next-line no-var
83
+ var beforeEach: (fn: () => Promise<void> | void) => void
84
+ // eslint-disable-next-line no-var
85
+ var afterEach: (fn: () => Promise<void> | void) => void
86
+ }
87
+
88
+ globalThis.aiTest = aiTest
89
+ globalThis.beforeAll = beforeAll
90
+ globalThis.afterAll = afterAll
91
+ globalThis.beforeEach = beforeEach
92
+ globalThis.afterEach = afterEach