elasticdash-sdk 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +775 -0
- package/dist/browser-ui.d.ts +43 -0
- package/dist/browser-ui.d.ts.map +1 -0
- package/dist/browser-ui.js +246 -0
- package/dist/browser-ui.js.map +1 -0
- package/dist/capture/event.d.ts +33 -0
- package/dist/capture/event.d.ts.map +1 -0
- package/dist/capture/event.js +2 -0
- package/dist/capture/event.js.map +1 -0
- package/dist/capture/index.d.ts +4 -0
- package/dist/capture/index.d.ts.map +1 -0
- package/dist/capture/index.js +4 -0
- package/dist/capture/index.js.map +1 -0
- package/dist/capture/recorder.d.ts +24 -0
- package/dist/capture/recorder.d.ts.map +1 -0
- package/dist/capture/recorder.js +46 -0
- package/dist/capture/recorder.js.map +1 -0
- package/dist/capture/replay.d.ts +20 -0
- package/dist/capture/replay.d.ts.map +1 -0
- package/dist/capture/replay.js +47 -0
- package/dist/capture/replay.js.map +1 -0
- package/dist/ci/api-client.d.ts +38 -0
- package/dist/ci/api-client.d.ts.map +1 -0
- package/dist/ci/api-client.js +96 -0
- package/dist/ci/api-client.js.map +1 -0
- package/dist/ci/benchmark.d.ts +33 -0
- package/dist/ci/benchmark.d.ts.map +1 -0
- package/dist/ci/benchmark.js +213 -0
- package/dist/ci/benchmark.js.map +1 -0
- package/dist/ci/ed-runner.d.ts +48 -0
- package/dist/ci/ed-runner.d.ts.map +1 -0
- package/dist/ci/ed-runner.js +260 -0
- package/dist/ci/ed-runner.js.map +1 -0
- package/dist/ci/executor.d.ts +13 -0
- package/dist/ci/executor.d.ts.map +1 -0
- package/dist/ci/executor.js +542 -0
- package/dist/ci/executor.js.map +1 -0
- package/dist/ci/git-info.d.ts +17 -0
- package/dist/ci/git-info.d.ts.map +1 -0
- package/dist/ci/git-info.js +102 -0
- package/dist/ci/git-info.js.map +1 -0
- package/dist/ci/index.d.ts +6 -0
- package/dist/ci/index.d.ts.map +1 -0
- package/dist/ci/index.js +4 -0
- package/dist/ci/index.js.map +1 -0
- package/dist/ci/measurement.d.ts +9 -0
- package/dist/ci/measurement.d.ts.map +1 -0
- package/dist/ci/measurement.js +15 -0
- package/dist/ci/measurement.js.map +1 -0
- package/dist/ci/replay.d.ts +31 -0
- package/dist/ci/replay.d.ts.map +1 -0
- package/dist/ci/replay.js +96 -0
- package/dist/ci/replay.js.map +1 -0
- package/dist/ci/reporters/default.d.ts +8 -0
- package/dist/ci/reporters/default.d.ts.map +1 -0
- package/dist/ci/reporters/default.js +46 -0
- package/dist/ci/reporters/default.js.map +1 -0
- package/dist/ci/reporters/index.d.ts +8 -0
- package/dist/ci/reporters/index.d.ts.map +1 -0
- package/dist/ci/reporters/index.js +14 -0
- package/dist/ci/reporters/index.js.map +1 -0
- package/dist/ci/reporters/json.d.ts +8 -0
- package/dist/ci/reporters/json.d.ts.map +1 -0
- package/dist/ci/reporters/json.js +14 -0
- package/dist/ci/reporters/json.js.map +1 -0
- package/dist/ci/reporters/junit.d.ts +8 -0
- package/dist/ci/reporters/junit.d.ts.map +1 -0
- package/dist/ci/reporters/junit.js +48 -0
- package/dist/ci/reporters/junit.js.map +1 -0
- package/dist/ci/runner.d.ts +3 -0
- package/dist/ci/runner.d.ts.map +1 -0
- package/dist/ci/runner.js +187 -0
- package/dist/ci/runner.js.map +1 -0
- package/dist/ci/test-discovery.d.ts +5 -0
- package/dist/ci/test-discovery.d.ts.map +1 -0
- package/dist/ci/test-discovery.js +11 -0
- package/dist/ci/test-discovery.js.map +1 -0
- package/dist/ci/test-loader.d.ts +19 -0
- package/dist/ci/test-loader.d.ts.map +1 -0
- package/dist/ci/test-loader.js +149 -0
- package/dist/ci/test-loader.js.map +1 -0
- package/dist/ci/test-registry.d.ts +42 -0
- package/dist/ci/test-registry.d.ts.map +1 -0
- package/dist/ci/test-registry.js +18 -0
- package/dist/ci/test-registry.js.map +1 -0
- package/dist/ci/trace-schema.d.ts +30 -0
- package/dist/ci/trace-schema.d.ts.map +1 -0
- package/dist/ci/trace-schema.js +66 -0
- package/dist/ci/trace-schema.js.map +1 -0
- package/dist/ci/trace-writer.d.ts +16 -0
- package/dist/ci/trace-writer.d.ts.map +1 -0
- package/dist/ci/trace-writer.js +108 -0
- package/dist/ci/trace-writer.js.map +1 -0
- package/dist/ci/types.d.ts +108 -0
- package/dist/ci/types.d.ts.map +1 -0
- package/dist/ci/types.js +3 -0
- package/dist/ci/types.js.map +1 -0
- package/dist/ci/upload-client.d.ts +74 -0
- package/dist/ci/upload-client.d.ts.map +1 -0
- package/dist/ci/upload-client.js +195 -0
- package/dist/ci/upload-client.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +716 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/agent-state.d.ts +47 -0
- package/dist/core/agent-state.d.ts.map +1 -0
- package/dist/core/agent-state.js +137 -0
- package/dist/core/agent-state.js.map +1 -0
- package/dist/core/judge-utils.d.ts +22 -0
- package/dist/core/judge-utils.d.ts.map +1 -0
- package/dist/core/judge-utils.js +211 -0
- package/dist/core/judge-utils.js.map +1 -0
- package/dist/core/registry.d.ts +28 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +52 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/dashboard-server.d.ts +65 -0
- package/dist/dashboard-server.d.ts.map +1 -0
- package/dist/dashboard-server.js +3940 -0
- package/dist/dashboard-server.js.map +1 -0
- package/dist/execution/tool-runner.d.ts +26 -0
- package/dist/execution/tool-runner.d.ts.map +1 -0
- package/dist/execution/tool-runner.js +316 -0
- package/dist/execution/tool-runner.js.map +1 -0
- package/dist/html/dashboard.html +2218 -0
- package/dist/http.d.ts +14 -0
- package/dist/http.d.ts.map +1 -0
- package/dist/http.js +13 -0
- package/dist/http.js.map +1 -0
- package/dist/index.cjs +8102 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +67 -0
- package/dist/index.js.map +1 -0
- package/dist/interceptors/ai-interceptor.d.ts +26 -0
- package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
- package/dist/interceptors/ai-interceptor.js +756 -0
- package/dist/interceptors/ai-interceptor.js.map +1 -0
- package/dist/interceptors/db-auto.d.ts +8 -0
- package/dist/interceptors/db-auto.d.ts.map +1 -0
- package/dist/interceptors/db-auto.js +217 -0
- package/dist/interceptors/db-auto.js.map +1 -0
- package/dist/interceptors/db.d.ts +23 -0
- package/dist/interceptors/db.d.ts.map +1 -0
- package/dist/interceptors/db.js +137 -0
- package/dist/interceptors/db.js.map +1 -0
- package/dist/interceptors/http.d.ts +28 -0
- package/dist/interceptors/http.d.ts.map +1 -0
- package/dist/interceptors/http.js +356 -0
- package/dist/interceptors/http.js.map +1 -0
- package/dist/interceptors/side-effects.d.ts +7 -0
- package/dist/interceptors/side-effects.d.ts.map +1 -0
- package/dist/interceptors/side-effects.js +72 -0
- package/dist/interceptors/side-effects.js.map +1 -0
- package/dist/interceptors/telemetry-push.d.ts +142 -0
- package/dist/interceptors/telemetry-push.d.ts.map +1 -0
- package/dist/interceptors/telemetry-push.js +463 -0
- package/dist/interceptors/telemetry-push.js.map +1 -0
- package/dist/interceptors/tool.d.ts +2 -0
- package/dist/interceptors/tool.d.ts.map +1 -0
- package/dist/interceptors/tool.js +274 -0
- package/dist/interceptors/tool.js.map +1 -0
- package/dist/interceptors/workflow-ai.d.ts +5 -0
- package/dist/interceptors/workflow-ai.d.ts.map +1 -0
- package/dist/interceptors/workflow-ai.js +382 -0
- package/dist/interceptors/workflow-ai.js.map +1 -0
- package/dist/internals/conditional-recorder.d.ts +21 -0
- package/dist/internals/conditional-recorder.d.ts.map +1 -0
- package/dist/internals/conditional-recorder.js +54 -0
- package/dist/internals/conditional-recorder.js.map +1 -0
- package/dist/internals/mock-resolver.d.ts +146 -0
- package/dist/internals/mock-resolver.d.ts.map +1 -0
- package/dist/internals/mock-resolver.js +427 -0
- package/dist/internals/mock-resolver.js.map +1 -0
- package/dist/matchers/index.d.ts +96 -0
- package/dist/matchers/index.d.ts.map +1 -0
- package/dist/matchers/index.js +668 -0
- package/dist/matchers/index.js.map +1 -0
- package/dist/observability.d.ts +82 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +471 -0
- package/dist/observability.js.map +1 -0
- package/dist/portal-executor.d.ts +30 -0
- package/dist/portal-executor.d.ts.map +1 -0
- package/dist/portal-executor.js +324 -0
- package/dist/portal-executor.js.map +1 -0
- package/dist/portal-server.d.ts +3 -0
- package/dist/portal-server.d.ts.map +1 -0
- package/dist/portal-server.js +279 -0
- package/dist/portal-server.js.map +1 -0
- package/dist/proxy/llm-capture.d.ts +14 -0
- package/dist/proxy/llm-capture.d.ts.map +1 -0
- package/dist/proxy/llm-capture.js +264 -0
- package/dist/proxy/llm-capture.js.map +1 -0
- package/dist/reporter.d.ts +3 -0
- package/dist/reporter.d.ts.map +1 -0
- package/dist/reporter.js +72 -0
- package/dist/reporter.js.map +1 -0
- package/dist/runWorkflowSubprocess.d.ts +14 -0
- package/dist/runWorkflowSubprocess.d.ts.map +1 -0
- package/dist/runWorkflowSubprocess.js +66 -0
- package/dist/runWorkflowSubprocess.js.map +1 -0
- package/dist/runner.d.ts +16 -0
- package/dist/runner.d.ts.map +1 -0
- package/dist/runner.js +138 -0
- package/dist/runner.js.map +1 -0
- package/dist/socket-connector.d.ts +22 -0
- package/dist/socket-connector.d.ts.map +1 -0
- package/dist/socket-connector.js +104 -0
- package/dist/socket-connector.js.map +1 -0
- package/dist/telemetry-batcher.d.ts +56 -0
- package/dist/telemetry-batcher.d.ts.map +1 -0
- package/dist/telemetry-batcher.js +143 -0
- package/dist/telemetry-batcher.js.map +1 -0
- package/dist/test-setup.d.ts +12 -0
- package/dist/test-setup.d.ts.map +1 -0
- package/dist/test-setup.js +13 -0
- package/dist/test-setup.js.map +1 -0
- package/dist/tool-registry.d.ts +31 -0
- package/dist/tool-registry.d.ts.map +1 -0
- package/dist/tool-registry.js +73 -0
- package/dist/tool-registry.js.map +1 -0
- package/dist/tool-runner-worker.d.ts +2 -0
- package/dist/tool-runner-worker.d.ts.map +1 -0
- package/dist/tool-runner-worker.js +215 -0
- package/dist/tool-runner-worker.js.map +1 -0
- package/dist/trace-adapter/context.d.ts +72 -0
- package/dist/trace-adapter/context.d.ts.map +1 -0
- package/dist/trace-adapter/context.js +80 -0
- package/dist/trace-adapter/context.js.map +1 -0
- package/dist/tracing.d.ts +2 -0
- package/dist/tracing.d.ts.map +1 -0
- package/dist/tracing.js +59 -0
- package/dist/tracing.js.map +1 -0
- package/dist/trigger-executor.d.ts +12 -0
- package/dist/trigger-executor.d.ts.map +1 -0
- package/dist/trigger-executor.js +130 -0
- package/dist/trigger-executor.js.map +1 -0
- package/dist/types/portal.d.ts +76 -0
- package/dist/types/portal.d.ts.map +1 -0
- package/dist/types/portal.js +2 -0
- package/dist/types/portal.js.map +1 -0
- package/dist/utils/debug.d.ts +3 -0
- package/dist/utils/debug.d.ts.map +1 -0
- package/dist/utils/debug.js +8 -0
- package/dist/utils/debug.js.map +1 -0
- package/dist/utils/license-error.d.ts +23 -0
- package/dist/utils/license-error.d.ts.map +1 -0
- package/dist/utils/license-error.js +42 -0
- package/dist/utils/license-error.js.map +1 -0
- package/dist/utils/redact.d.ts +7 -0
- package/dist/utils/redact.d.ts.map +1 -0
- package/dist/utils/redact.js +26 -0
- package/dist/utils/redact.js.map +1 -0
- package/dist/workflow-runner-worker.d.ts +2 -0
- package/dist/workflow-runner-worker.d.ts.map +1 -0
- package/dist/workflow-runner-worker.js +329 -0
- package/dist/workflow-runner-worker.js.map +1 -0
- package/dist/workflow-runner.d.ts +14 -0
- package/dist/workflow-runner.d.ts.map +1 -0
- package/dist/workflow-runner.js +34 -0
- package/dist/workflow-runner.js.map +1 -0
- package/docs/agent-coding-instructions.md +138 -0
- package/docs/agent-integration-guide.md +564 -0
- package/docs/agents.md +140 -0
- package/docs/dashboard.md +394 -0
- package/docs/deno.md +69 -0
- package/docs/instrumentation.md +424 -0
- package/docs/langfuse-trace-structure.md +145 -0
- package/docs/matchers.md +173 -0
- package/docs/observability_contract.md +192 -0
- package/docs/observability_mode.md +195 -0
- package/docs/quickstart.md +621 -0
- package/docs/security-compliance.md +566 -0
- package/docs/test-writing-guidelines.md +444 -0
- package/docs/tools.md +165 -0
- package/docs/workflow-modes.md +253 -0
- package/package.json +76 -0
- package/src/browser-ui.ts +281 -0
- package/src/capture/event.ts +30 -0
- package/src/capture/index.ts +3 -0
- package/src/capture/recorder.ts +62 -0
- package/src/capture/replay.ts +55 -0
- package/src/ci/api-client.ts +136 -0
- package/src/ci/benchmark.ts +257 -0
- package/src/ci/ed-runner.ts +351 -0
- package/src/ci/executor.ts +671 -0
- package/src/ci/git-info.ts +127 -0
- package/src/ci/index.ts +5 -0
- package/src/ci/measurement.ts +25 -0
- package/src/ci/replay.ts +127 -0
- package/src/ci/reporters/default.ts +50 -0
- package/src/ci/reporters/index.ts +21 -0
- package/src/ci/reporters/json.ts +18 -0
- package/src/ci/reporters/junit.ts +61 -0
- package/src/ci/runner.ts +208 -0
- package/src/ci/test-discovery.ts +16 -0
- package/src/ci/test-loader.ts +187 -0
- package/src/ci/test-registry.ts +62 -0
- package/src/ci/trace-schema.ts +96 -0
- package/src/ci/trace-writer.ts +107 -0
- package/src/ci/types.ts +115 -0
- package/src/ci/upload-client.ts +300 -0
- package/src/cli.ts +811 -0
- package/src/core/agent-state.ts +162 -0
- package/src/core/judge-utils.ts +232 -0
- package/src/core/registry.ts +92 -0
- package/src/dashboard-server.ts +2047 -0
- package/src/execution/tool-runner.ts +352 -0
- package/src/html/dashboard.html +2218 -0
- package/src/http.ts +13 -0
- package/src/index.ts +138 -0
- package/src/interceptors/ai-interceptor.ts +798 -0
- package/src/interceptors/db-auto.ts +243 -0
- package/src/interceptors/db.ts +156 -0
- package/src/interceptors/http.ts +393 -0
- package/src/interceptors/side-effects.ts +83 -0
- package/src/interceptors/telemetry-push.ts +537 -0
- package/src/interceptors/tool.ts +287 -0
- package/src/interceptors/workflow-ai.ts +419 -0
- package/src/internals/conditional-recorder.ts +63 -0
- package/src/internals/mock-resolver.ts +492 -0
- package/src/matchers/index.ts +824 -0
- package/src/observability.ts +501 -0
- package/src/portal-executor.ts +355 -0
- package/src/portal-server.ts +304 -0
- package/src/proxy/llm-capture.ts +301 -0
- package/src/reporter.ts +81 -0
- package/src/runWorkflowSubprocess.ts +74 -0
- package/src/runner.ts +178 -0
- package/src/socket-connector.ts +117 -0
- package/src/telemetry-batcher.ts +191 -0
- package/src/test-setup.ts +16 -0
- package/src/tool-registry.ts +94 -0
- package/src/tool-runner-worker.ts +244 -0
- package/src/trace-adapter/context.ts +156 -0
- package/src/tracing.ts +62 -0
- package/src/trigger-executor.ts +171 -0
- package/src/types/agent.d.ts +63 -0
- package/src/types/expect.d.ts +81 -0
- package/src/types/modules.d.ts +2 -0
- package/src/types/portal.ts +69 -0
- package/src/utils/debug.ts +8 -0
- package/src/utils/license-error.ts +43 -0
- package/src/utils/redact.ts +25 -0
- package/src/workflow-runner-worker.ts +386 -0
- package/src/workflow-runner.ts +58 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent state serialization, deserialization, and utility functions.
|
|
3
|
+
*
|
|
4
|
+
* Enables capturing agent plan state during execution and resuming
|
|
5
|
+
* agents from any task in the plan without re-executing completed steps.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { AgentPlan, AgentState, AgentTask } from '../types/agent.js'
|
|
9
|
+
import type { WorkflowEvent } from '../capture/event.js'
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Serializes an agent plan and its captured trace events into an AgentState
|
|
13
|
+
* that can be persisted and later used for resumption.
|
|
14
|
+
*
|
|
15
|
+
* The resumeFromTaskIndex is automatically determined as the index of the
|
|
16
|
+
* first non-completed task. If all tasks are completed, it equals tasks.length.
|
|
17
|
+
*/
|
|
18
|
+
export function serializeAgentState(plan: AgentPlan, trace: WorkflowEvent[]): AgentState {
|
|
19
|
+
const resumeFromTaskIndex = plan.tasks.findIndex(
|
|
20
|
+
(t) => t.status !== 'completed',
|
|
21
|
+
)
|
|
22
|
+
return {
|
|
23
|
+
plan: JSON.parse(JSON.stringify(plan)) as AgentPlan,
|
|
24
|
+
trace: JSON.parse(JSON.stringify(trace)) as WorkflowEvent[],
|
|
25
|
+
resumeFromTaskIndex: resumeFromTaskIndex === -1 ? plan.tasks.length : resumeFromTaskIndex,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Validates and hydrates an AgentState from a parsed JSON object.
|
|
31
|
+
* Throws if the state is invalid or cannot be safely used for resumption.
|
|
32
|
+
*/
|
|
33
|
+
export function deserializeAgentState(raw: unknown): AgentState {
|
|
34
|
+
if (!raw || typeof raw !== 'object') {
|
|
35
|
+
throw new Error('AgentState must be a non-null object')
|
|
36
|
+
}
|
|
37
|
+
const obj = raw as Record<string, unknown>
|
|
38
|
+
|
|
39
|
+
if (!obj.plan || typeof obj.plan !== 'object') {
|
|
40
|
+
throw new Error('AgentState.plan is required')
|
|
41
|
+
}
|
|
42
|
+
const plan = obj.plan as AgentPlan
|
|
43
|
+
|
|
44
|
+
if (!Array.isArray(plan.tasks)) {
|
|
45
|
+
throw new Error('AgentState.plan.tasks must be an array')
|
|
46
|
+
}
|
|
47
|
+
if (typeof plan.id !== 'string') {
|
|
48
|
+
throw new Error('AgentState.plan.id must be a string')
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const trace = Array.isArray(obj.trace) ? (obj.trace as WorkflowEvent[]) : []
|
|
52
|
+
const resumeFromTaskIndex =
|
|
53
|
+
typeof obj.resumeFromTaskIndex === 'number' ? obj.resumeFromTaskIndex : 0
|
|
54
|
+
|
|
55
|
+
// Validate that all tasks before resumeFromTaskIndex have outputs
|
|
56
|
+
for (let i = 0; i < resumeFromTaskIndex; i++) {
|
|
57
|
+
const task = plan.tasks[i]
|
|
58
|
+
if (!task) continue
|
|
59
|
+
if (task.status !== 'completed') {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`Task at index ${i} (id="${task.id}") has status "${task.status}" but must be "completed" before resumeFromTaskIndex=${resumeFromTaskIndex}`,
|
|
62
|
+
)
|
|
63
|
+
}
|
|
64
|
+
if (task.output === undefined) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`Task at index ${i} (id="${task.id}") is completed but has no output. Cannot resume safely.`,
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return { plan, trace, resumeFromTaskIndex }
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extracts all completed task outputs into a flat map keyed by task ID.
|
|
76
|
+
* Used for resolving placeholder references in subsequent task inputs.
|
|
77
|
+
*/
|
|
78
|
+
export function extractTaskOutputs(plan: AgentPlan): Record<string, unknown> {
|
|
79
|
+
const outputs: Record<string, unknown> = {}
|
|
80
|
+
for (const task of plan.tasks) {
|
|
81
|
+
if (task.status === 'completed' && task.output !== undefined) {
|
|
82
|
+
outputs[task.id] = task.output
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return outputs
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Resolves placeholder references in a task input.
|
|
90
|
+
*
|
|
91
|
+
* Placeholders use the form: `{ $ref: "task-N.output.fieldName" }`
|
|
92
|
+
* where "task-N" is a task ID and "fieldName" is a dot-separated path
|
|
93
|
+
* into that task's output.
|
|
94
|
+
*
|
|
95
|
+
* Example:
|
|
96
|
+
* previousOutputs = { "task-1": { userId: "abc" } }
|
|
97
|
+
* input = { $ref: "task-1.output.userId" }
|
|
98
|
+
* → returns "abc"
|
|
99
|
+
*
|
|
100
|
+
* Works recursively on nested objects and arrays.
|
|
101
|
+
*/
|
|
102
|
+
export function resolveTaskInput(
|
|
103
|
+
input: unknown,
|
|
104
|
+
previousOutputs: Record<string, unknown>,
|
|
105
|
+
): unknown {
|
|
106
|
+
if (input === null || input === undefined) return input
|
|
107
|
+
|
|
108
|
+
if (Array.isArray(input)) {
|
|
109
|
+
return input.map((item) => resolveTaskInput(item, previousOutputs))
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (typeof input === 'object') {
|
|
113
|
+
const obj = input as Record<string, unknown>
|
|
114
|
+
|
|
115
|
+
// Check for placeholder: { $ref: "taskId.output.path" }
|
|
116
|
+
if (typeof obj['$ref'] === 'string') {
|
|
117
|
+
return resolveRef(obj['$ref'], previousOutputs)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Recursively resolve nested objects
|
|
121
|
+
const resolved: Record<string, unknown> = {}
|
|
122
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
123
|
+
resolved[k] = resolveTaskInput(v, previousOutputs)
|
|
124
|
+
}
|
|
125
|
+
return resolved
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return input
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Resolves a dot-separated reference path like "task-1.output.userId"
|
|
133
|
+
* against the previousOutputs map.
|
|
134
|
+
*/
|
|
135
|
+
function resolveRef(ref: string, previousOutputs: Record<string, unknown>): unknown {
|
|
136
|
+
const parts = ref.split('.')
|
|
137
|
+
// Expected format: <taskId>.output.<...path>
|
|
138
|
+
// We skip the literal "output" segment to navigate into the output object
|
|
139
|
+
const taskId = parts[0]
|
|
140
|
+
const pathParts = parts.slice(1) // may start with "output"
|
|
141
|
+
|
|
142
|
+
let current: unknown = previousOutputs[taskId]
|
|
143
|
+
for (const part of pathParts) {
|
|
144
|
+
if (part === 'output') continue // "output" is implicit — skip this keyword
|
|
145
|
+
if (current === null || current === undefined) return undefined
|
|
146
|
+
current = (current as Record<string, unknown>)[part]
|
|
147
|
+
}
|
|
148
|
+
return current
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Clones a task and marks it as completed with the given output.
|
|
153
|
+
* Used internally when skipping already-completed tasks during resumption.
|
|
154
|
+
*/
|
|
155
|
+
export function markTaskCompleted(task: AgentTask, output: unknown): AgentTask {
|
|
156
|
+
return {
|
|
157
|
+
...task,
|
|
158
|
+
status: 'completed',
|
|
159
|
+
output,
|
|
160
|
+
completedAt: task.completedAt ?? Date.now(),
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* judge-utils.ts
|
|
3
|
+
*
|
|
4
|
+
* Utilities for preprocessing outputs before sending to LLM-as-a-judge evaluators.
|
|
5
|
+
* Addresses two problems:
|
|
6
|
+
* 1. Large outputs cause slow inference
|
|
7
|
+
* 2. LLMs miss attributes in large JSON payloads due to attention degradation
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const DEFAULT_MAX_CHARS = 8000
|
|
11
|
+
const MAX_STRING_VALUE_LENGTH = 500
|
|
12
|
+
const MAX_ARRAY_EDGE_ITEMS = 5
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Prepare an output string for LLM judge evaluation.
|
|
16
|
+
*
|
|
17
|
+
* - Small outputs (< maxChars) pass through unchanged.
|
|
18
|
+
* - Large JSON outputs: extracts subtrees whose keys match keywords from the judge prompt.
|
|
19
|
+
* - Large non-JSON outputs: truncates with head + tail and a marker in between.
|
|
20
|
+
*
|
|
21
|
+
* @param output - The raw output string to prepare.
|
|
22
|
+
* @param judgePrompt - The judge/evaluation prompt, used to identify relevant JSON keys.
|
|
23
|
+
* @param maxChars - Maximum character budget for the prepared output. Default 8000.
|
|
24
|
+
* @returns The prepared output string, possibly trimmed.
|
|
25
|
+
*/
|
|
26
|
+
export function prepareOutputForJudge(
|
|
27
|
+
output: string,
|
|
28
|
+
judgePrompt: string,
|
|
29
|
+
maxChars: number = DEFAULT_MAX_CHARS,
|
|
30
|
+
): string {
|
|
31
|
+
if (!output) return output
|
|
32
|
+
|
|
33
|
+
// Always attempt JSON-aware processing (value truncation + key extraction).
|
|
34
|
+
// Even outputs under maxChars can contain arrays with 100+ items that
|
|
35
|
+
// drown out the fields the judge actually needs to evaluate.
|
|
36
|
+
const jsonResult = tryJsonExtract(output, judgePrompt, maxChars)
|
|
37
|
+
if (jsonResult !== null) return jsonResult
|
|
38
|
+
|
|
39
|
+
// Non-JSON: only truncate if over budget
|
|
40
|
+
if (output.length <= maxChars) return output
|
|
41
|
+
return truncateHeadTail(output, maxChars)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Attempt to parse the output as JSON and extract only the subtrees
|
|
46
|
+
* whose keys are relevant to the judge prompt.
|
|
47
|
+
* Returns null if the output is not valid JSON.
|
|
48
|
+
*/
|
|
49
|
+
function tryJsonExtract(output: string, judgePrompt: string, maxChars: number): string | null {
|
|
50
|
+
let parsed: unknown
|
|
51
|
+
try {
|
|
52
|
+
parsed = JSON.parse(output)
|
|
53
|
+
} catch {
|
|
54
|
+
return null
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (typeof parsed !== 'object' || parsed === null) return null
|
|
58
|
+
|
|
59
|
+
const keywords = extractKeywords(judgePrompt)
|
|
60
|
+
if (keywords.length === 0) {
|
|
61
|
+
// No keywords to match — truncate large values then serialize
|
|
62
|
+
const trimmed = truncateJsonValues(parsed)
|
|
63
|
+
const trimmedStr = JSON.stringify(trimmed, null, 2)
|
|
64
|
+
if (trimmedStr.length <= maxChars) return trimmedStr
|
|
65
|
+
return truncateHeadTail(trimmedStr, maxChars)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const extracted = extractRelevantPaths(parsed, keywords)
|
|
69
|
+
|
|
70
|
+
// If extraction found relevant content, truncate large values then serialize
|
|
71
|
+
if (extracted !== undefined && Object.keys(extracted as Record<string, unknown>).length > 0) {
|
|
72
|
+
const trimmed = truncateJsonValues(extracted)
|
|
73
|
+
const extractedStr = JSON.stringify(trimmed, null, 2)
|
|
74
|
+
|
|
75
|
+
if (extractedStr.length <= maxChars) {
|
|
76
|
+
const omittedKeys = countOmittedKeys(parsed, extracted)
|
|
77
|
+
if (omittedKeys > 0) {
|
|
78
|
+
return `${extractedStr}\n\n[Note: ${omittedKeys} irrelevant key(s) omitted from original output for brevity. Total original size: ${output.length} chars.]`
|
|
79
|
+
}
|
|
80
|
+
return extractedStr
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Extracted content is still too large — truncate it
|
|
84
|
+
return truncateHeadTail(extractedStr, maxChars)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Extraction found nothing relevant — truncate values in full object
|
|
88
|
+
const trimmedFull = truncateJsonValues(parsed)
|
|
89
|
+
const trimmedFullStr = JSON.stringify(trimmedFull, null, 2)
|
|
90
|
+
if (trimmedFullStr.length <= maxChars) return trimmedFullStr
|
|
91
|
+
return truncateHeadTail(trimmedFullStr, maxChars)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Extract lowercase keywords from the judge prompt.
|
|
96
|
+
* Filters out common stop words and short tokens.
|
|
97
|
+
*/
|
|
98
|
+
function extractKeywords(prompt: string): string[] {
|
|
99
|
+
const stopWords = new Set([
|
|
100
|
+
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
101
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
|
102
|
+
'should', 'may', 'might', 'shall', 'can', 'need', 'must',
|
|
103
|
+
'and', 'or', 'but', 'if', 'then', 'else', 'when', 'where', 'how',
|
|
104
|
+
'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those',
|
|
105
|
+
'it', 'its', 'of', 'in', 'on', 'at', 'to', 'for', 'with', 'by',
|
|
106
|
+
'from', 'as', 'into', 'about', 'between', 'through', 'after', 'before',
|
|
107
|
+
'not', 'no', 'nor', 'only', 'also', 'just', 'more', 'most', 'very',
|
|
108
|
+
'all', 'each', 'every', 'any', 'some', 'such', 'than', 'too',
|
|
109
|
+
'output', 'evaluate', 'score', 'check', 'whether', 'contains',
|
|
110
|
+
'following', 'given', 'based', 'respond', 'number', 'scale',
|
|
111
|
+
'text', 'result', 'response', 'answer', 'return', 'value',
|
|
112
|
+
])
|
|
113
|
+
|
|
114
|
+
const words = prompt
|
|
115
|
+
.toLowerCase()
|
|
116
|
+
.replace(/[^a-z0-9_\s-]/g, ' ')
|
|
117
|
+
.split(/\s+/)
|
|
118
|
+
.filter(w => w.length > 2 && !stopWords.has(w))
|
|
119
|
+
|
|
120
|
+
return [...new Set(words)]
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Recursively extract object entries whose keys match any of the keywords.
|
|
125
|
+
* For arrays, preserves items that contain matching keys.
|
|
126
|
+
*/
|
|
127
|
+
function extractRelevantPaths(obj: unknown, keywords: string[]): unknown {
|
|
128
|
+
if (Array.isArray(obj)) {
|
|
129
|
+
// For arrays: keep items that have relevant keys, limit to first few items
|
|
130
|
+
const relevant = obj
|
|
131
|
+
.map(item => extractRelevantPaths(item, keywords))
|
|
132
|
+
.filter(item => item !== undefined)
|
|
133
|
+
if (relevant.length === 0) return undefined
|
|
134
|
+
return relevant
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (typeof obj === 'object' && obj !== null) {
|
|
138
|
+
const result: Record<string, unknown> = {}
|
|
139
|
+
let hasMatch = false
|
|
140
|
+
|
|
141
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
142
|
+
const keyLower = key.toLowerCase()
|
|
143
|
+
const keyMatchesDirectly = keywords.some(kw =>
|
|
144
|
+
keyLower.includes(kw) || kw.includes(keyLower)
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if (keyMatchesDirectly) {
|
|
148
|
+
result[key] = value
|
|
149
|
+
hasMatch = true
|
|
150
|
+
} else if (typeof value === 'object' && value !== null) {
|
|
151
|
+
// Recurse into nested objects/arrays
|
|
152
|
+
const nested = extractRelevantPaths(value, keywords)
|
|
153
|
+
if (nested !== undefined) {
|
|
154
|
+
result[key] = nested
|
|
155
|
+
hasMatch = true
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return hasMatch ? result : undefined
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return undefined
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Recursively truncate large values inside a JSON structure.
|
|
168
|
+
* - Long strings: keep first/last portions with a marker in between.
|
|
169
|
+
* - Long arrays: keep first 5 and last 5 items, skip the rest with a marker.
|
|
170
|
+
*/
|
|
171
|
+
function truncateJsonValues(obj: unknown): unknown {
|
|
172
|
+
if (obj === null || obj === undefined) return obj
|
|
173
|
+
|
|
174
|
+
if (typeof obj === 'string') {
|
|
175
|
+
if (obj.length <= MAX_STRING_VALUE_LENGTH) return obj
|
|
176
|
+
const headLen = Math.floor(MAX_STRING_VALUE_LENGTH * 0.6)
|
|
177
|
+
const tailLen = MAX_STRING_VALUE_LENGTH - headLen
|
|
178
|
+
return `${obj.slice(0, headLen)}...[${obj.length - headLen - tailLen} chars truncated]...${obj.slice(obj.length - tailLen)}`
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (Array.isArray(obj)) {
|
|
182
|
+
if (obj.length <= MAX_ARRAY_EDGE_ITEMS * 2) {
|
|
183
|
+
return obj.map(item => truncateJsonValues(item))
|
|
184
|
+
}
|
|
185
|
+
const head = obj.slice(0, MAX_ARRAY_EDGE_ITEMS).map(item => truncateJsonValues(item))
|
|
186
|
+
const tail = obj.slice(obj.length - MAX_ARRAY_EDGE_ITEMS).map(item => truncateJsonValues(item))
|
|
187
|
+
const skipped = obj.length - MAX_ARRAY_EDGE_ITEMS * 2
|
|
188
|
+
return [...head, `[...${skipped} items skipped...]`, ...tail]
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (typeof obj === 'object') {
|
|
192
|
+
const result: Record<string, unknown> = {}
|
|
193
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
194
|
+
result[key] = truncateJsonValues(value)
|
|
195
|
+
}
|
|
196
|
+
return result
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return obj
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Count how many top-level keys from the original object are not in the extracted object.
|
|
204
|
+
*/
|
|
205
|
+
function countOmittedKeys(original: unknown, extracted: unknown): number {
|
|
206
|
+
if (typeof original !== 'object' || original === null) return 0
|
|
207
|
+
if (typeof extracted !== 'object' || extracted === null) return 0
|
|
208
|
+
if (Array.isArray(original)) return 0
|
|
209
|
+
|
|
210
|
+
const origKeys = Object.keys(original)
|
|
211
|
+
const extKeys = new Set(Object.keys(extracted as Record<string, unknown>))
|
|
212
|
+
return origKeys.filter(k => !extKeys.has(k)).length
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Truncate a string by keeping the head and tail portions,
|
|
217
|
+
* inserting a marker in the middle.
|
|
218
|
+
*/
|
|
219
|
+
function truncateHeadTail(text: string, maxChars: number): string {
|
|
220
|
+
if (text.length <= maxChars) return text
|
|
221
|
+
|
|
222
|
+
// Reserve space for the marker
|
|
223
|
+
const marker = `\n\n[...truncated ${text.length - maxChars} chars out of ${text.length} total...]\n\n`
|
|
224
|
+
const available = maxChars - marker.length
|
|
225
|
+
if (available <= 0) return text.slice(0, maxChars)
|
|
226
|
+
|
|
227
|
+
// 70% head, 30% tail — head is usually more important
|
|
228
|
+
const headSize = Math.floor(available * 0.7)
|
|
229
|
+
const tailSize = available - headSize
|
|
230
|
+
|
|
231
|
+
return text.slice(0, headSize) + marker + text.slice(text.length - tailSize)
|
|
232
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import type { AITestContext } from '../trace-adapter/context.js'
|
|
2
|
+
|
|
3
|
+
export type TestFunction = (ctx: AITestContext) => Promise<void> | void
|
|
4
|
+
|
|
5
|
+
export interface TestEntry {
|
|
6
|
+
name: string
|
|
7
|
+
fn: TestFunction
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface Registry {
|
|
11
|
+
tests: TestEntry[]
|
|
12
|
+
beforeAllHooks: Array<() => Promise<void> | void>
|
|
13
|
+
afterAllHooks: Array<() => Promise<void> | void>
|
|
14
|
+
beforeEachHooks: Array<() => Promise<void> | void>
|
|
15
|
+
afterEachHooks: Array<() => Promise<void> | void>
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const REGISTRY_KEY = '__elasticdash_registry__'
|
|
19
|
+
|
|
20
|
+
function getGlobalRegistry(): Registry {
|
|
21
|
+
if (!(globalThis as any)[REGISTRY_KEY]) {
|
|
22
|
+
(globalThis as any)[REGISTRY_KEY] = createEmptyRegistry()
|
|
23
|
+
}
|
|
24
|
+
return (globalThis as any)[REGISTRY_KEY] as Registry
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function createEmptyRegistry(): Registry {
|
|
28
|
+
return {
|
|
29
|
+
tests: [],
|
|
30
|
+
beforeAllHooks: [],
|
|
31
|
+
afterAllHooks: [],
|
|
32
|
+
beforeEachHooks: [],
|
|
33
|
+
afterEachHooks: [],
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function clearRegistry(): void {
|
|
38
|
+
(globalThis as any)[REGISTRY_KEY] = createEmptyRegistry()
|
|
39
|
+
console.log('[elasticdash] clearRegistry called. Registry reset.')
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function getRegistry(): Registry {
|
|
43
|
+
const registry = getGlobalRegistry()
|
|
44
|
+
console.log('[elasticdash] getRegistry called. Current tests:', registry.tests.map(t => t.name))
|
|
45
|
+
return registry
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function aiTest(name: string, fn: TestFunction): void {
|
|
49
|
+
const registry = getGlobalRegistry()
|
|
50
|
+
registry.tests.push({ name, fn })
|
|
51
|
+
console.log(`[elasticdash] Registered test: ${name}`)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function beforeAll(fn: () => Promise<void> | void): void {
|
|
55
|
+
const registry = getGlobalRegistry()
|
|
56
|
+
registry.beforeAllHooks.push(fn)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function afterAll(fn: () => Promise<void> | void): void {
|
|
60
|
+
const registry = getGlobalRegistry()
|
|
61
|
+
registry.afterAllHooks.push(fn)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function beforeEach(fn: () => Promise<void> | void): void {
|
|
65
|
+
const registry = getGlobalRegistry()
|
|
66
|
+
registry.beforeEachHooks.push(fn)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function afterEach(fn: () => Promise<void> | void): void {
|
|
70
|
+
const registry = getGlobalRegistry()
|
|
71
|
+
registry.afterEachHooks.push(fn)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Expose globally so test files can use without importing
|
|
75
|
+
declare global {
|
|
76
|
+
// eslint-disable-next-line no-var
|
|
77
|
+
var aiTest: (name: string, fn: TestFunction) => void
|
|
78
|
+
// eslint-disable-next-line no-var
|
|
79
|
+
var beforeAll: (fn: () => Promise<void> | void) => void
|
|
80
|
+
// eslint-disable-next-line no-var
|
|
81
|
+
var afterAll: (fn: () => Promise<void> | void) => void
|
|
82
|
+
// eslint-disable-next-line no-var
|
|
83
|
+
var beforeEach: (fn: () => Promise<void> | void) => void
|
|
84
|
+
// eslint-disable-next-line no-var
|
|
85
|
+
var afterEach: (fn: () => Promise<void> | void) => void
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
globalThis.aiTest = aiTest
|
|
89
|
+
globalThis.beforeAll = beforeAll
|
|
90
|
+
globalThis.afterAll = afterAll
|
|
91
|
+
globalThis.beforeEach = beforeEach
|
|
92
|
+
globalThis.afterEach = afterEach
|