elasticdash-sdk 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +775 -0
- package/dist/browser-ui.d.ts +43 -0
- package/dist/browser-ui.d.ts.map +1 -0
- package/dist/browser-ui.js +246 -0
- package/dist/browser-ui.js.map +1 -0
- package/dist/capture/event.d.ts +33 -0
- package/dist/capture/event.d.ts.map +1 -0
- package/dist/capture/event.js +2 -0
- package/dist/capture/event.js.map +1 -0
- package/dist/capture/index.d.ts +4 -0
- package/dist/capture/index.d.ts.map +1 -0
- package/dist/capture/index.js +4 -0
- package/dist/capture/index.js.map +1 -0
- package/dist/capture/recorder.d.ts +24 -0
- package/dist/capture/recorder.d.ts.map +1 -0
- package/dist/capture/recorder.js +46 -0
- package/dist/capture/recorder.js.map +1 -0
- package/dist/capture/replay.d.ts +20 -0
- package/dist/capture/replay.d.ts.map +1 -0
- package/dist/capture/replay.js +47 -0
- package/dist/capture/replay.js.map +1 -0
- package/dist/ci/api-client.d.ts +38 -0
- package/dist/ci/api-client.d.ts.map +1 -0
- package/dist/ci/api-client.js +96 -0
- package/dist/ci/api-client.js.map +1 -0
- package/dist/ci/benchmark.d.ts +33 -0
- package/dist/ci/benchmark.d.ts.map +1 -0
- package/dist/ci/benchmark.js +213 -0
- package/dist/ci/benchmark.js.map +1 -0
- package/dist/ci/ed-runner.d.ts +48 -0
- package/dist/ci/ed-runner.d.ts.map +1 -0
- package/dist/ci/ed-runner.js +260 -0
- package/dist/ci/ed-runner.js.map +1 -0
- package/dist/ci/executor.d.ts +13 -0
- package/dist/ci/executor.d.ts.map +1 -0
- package/dist/ci/executor.js +542 -0
- package/dist/ci/executor.js.map +1 -0
- package/dist/ci/git-info.d.ts +17 -0
- package/dist/ci/git-info.d.ts.map +1 -0
- package/dist/ci/git-info.js +102 -0
- package/dist/ci/git-info.js.map +1 -0
- package/dist/ci/index.d.ts +6 -0
- package/dist/ci/index.d.ts.map +1 -0
- package/dist/ci/index.js +4 -0
- package/dist/ci/index.js.map +1 -0
- package/dist/ci/measurement.d.ts +9 -0
- package/dist/ci/measurement.d.ts.map +1 -0
- package/dist/ci/measurement.js +15 -0
- package/dist/ci/measurement.js.map +1 -0
- package/dist/ci/replay.d.ts +31 -0
- package/dist/ci/replay.d.ts.map +1 -0
- package/dist/ci/replay.js +96 -0
- package/dist/ci/replay.js.map +1 -0
- package/dist/ci/reporters/default.d.ts +8 -0
- package/dist/ci/reporters/default.d.ts.map +1 -0
- package/dist/ci/reporters/default.js +46 -0
- package/dist/ci/reporters/default.js.map +1 -0
- package/dist/ci/reporters/index.d.ts +8 -0
- package/dist/ci/reporters/index.d.ts.map +1 -0
- package/dist/ci/reporters/index.js +14 -0
- package/dist/ci/reporters/index.js.map +1 -0
- package/dist/ci/reporters/json.d.ts +8 -0
- package/dist/ci/reporters/json.d.ts.map +1 -0
- package/dist/ci/reporters/json.js +14 -0
- package/dist/ci/reporters/json.js.map +1 -0
- package/dist/ci/reporters/junit.d.ts +8 -0
- package/dist/ci/reporters/junit.d.ts.map +1 -0
- package/dist/ci/reporters/junit.js +48 -0
- package/dist/ci/reporters/junit.js.map +1 -0
- package/dist/ci/runner.d.ts +3 -0
- package/dist/ci/runner.d.ts.map +1 -0
- package/dist/ci/runner.js +187 -0
- package/dist/ci/runner.js.map +1 -0
- package/dist/ci/test-discovery.d.ts +5 -0
- package/dist/ci/test-discovery.d.ts.map +1 -0
- package/dist/ci/test-discovery.js +11 -0
- package/dist/ci/test-discovery.js.map +1 -0
- package/dist/ci/test-loader.d.ts +19 -0
- package/dist/ci/test-loader.d.ts.map +1 -0
- package/dist/ci/test-loader.js +149 -0
- package/dist/ci/test-loader.js.map +1 -0
- package/dist/ci/test-registry.d.ts +42 -0
- package/dist/ci/test-registry.d.ts.map +1 -0
- package/dist/ci/test-registry.js +18 -0
- package/dist/ci/test-registry.js.map +1 -0
- package/dist/ci/trace-schema.d.ts +30 -0
- package/dist/ci/trace-schema.d.ts.map +1 -0
- package/dist/ci/trace-schema.js +66 -0
- package/dist/ci/trace-schema.js.map +1 -0
- package/dist/ci/trace-writer.d.ts +16 -0
- package/dist/ci/trace-writer.d.ts.map +1 -0
- package/dist/ci/trace-writer.js +108 -0
- package/dist/ci/trace-writer.js.map +1 -0
- package/dist/ci/types.d.ts +108 -0
- package/dist/ci/types.d.ts.map +1 -0
- package/dist/ci/types.js +3 -0
- package/dist/ci/types.js.map +1 -0
- package/dist/ci/upload-client.d.ts +74 -0
- package/dist/ci/upload-client.d.ts.map +1 -0
- package/dist/ci/upload-client.js +195 -0
- package/dist/ci/upload-client.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +716 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/agent-state.d.ts +47 -0
- package/dist/core/agent-state.d.ts.map +1 -0
- package/dist/core/agent-state.js +137 -0
- package/dist/core/agent-state.js.map +1 -0
- package/dist/core/judge-utils.d.ts +22 -0
- package/dist/core/judge-utils.d.ts.map +1 -0
- package/dist/core/judge-utils.js +211 -0
- package/dist/core/judge-utils.js.map +1 -0
- package/dist/core/registry.d.ts +28 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +52 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/dashboard-server.d.ts +65 -0
- package/dist/dashboard-server.d.ts.map +1 -0
- package/dist/dashboard-server.js +3940 -0
- package/dist/dashboard-server.js.map +1 -0
- package/dist/execution/tool-runner.d.ts +26 -0
- package/dist/execution/tool-runner.d.ts.map +1 -0
- package/dist/execution/tool-runner.js +316 -0
- package/dist/execution/tool-runner.js.map +1 -0
- package/dist/html/dashboard.html +2218 -0
- package/dist/http.d.ts +14 -0
- package/dist/http.d.ts.map +1 -0
- package/dist/http.js +13 -0
- package/dist/http.js.map +1 -0
- package/dist/index.cjs +8102 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +67 -0
- package/dist/index.js.map +1 -0
- package/dist/interceptors/ai-interceptor.d.ts +26 -0
- package/dist/interceptors/ai-interceptor.d.ts.map +1 -0
- package/dist/interceptors/ai-interceptor.js +756 -0
- package/dist/interceptors/ai-interceptor.js.map +1 -0
- package/dist/interceptors/db-auto.d.ts +8 -0
- package/dist/interceptors/db-auto.d.ts.map +1 -0
- package/dist/interceptors/db-auto.js +217 -0
- package/dist/interceptors/db-auto.js.map +1 -0
- package/dist/interceptors/db.d.ts +23 -0
- package/dist/interceptors/db.d.ts.map +1 -0
- package/dist/interceptors/db.js +137 -0
- package/dist/interceptors/db.js.map +1 -0
- package/dist/interceptors/http.d.ts +28 -0
- package/dist/interceptors/http.d.ts.map +1 -0
- package/dist/interceptors/http.js +356 -0
- package/dist/interceptors/http.js.map +1 -0
- package/dist/interceptors/side-effects.d.ts +7 -0
- package/dist/interceptors/side-effects.d.ts.map +1 -0
- package/dist/interceptors/side-effects.js +72 -0
- package/dist/interceptors/side-effects.js.map +1 -0
- package/dist/interceptors/telemetry-push.d.ts +142 -0
- package/dist/interceptors/telemetry-push.d.ts.map +1 -0
- package/dist/interceptors/telemetry-push.js +463 -0
- package/dist/interceptors/telemetry-push.js.map +1 -0
- package/dist/interceptors/tool.d.ts +2 -0
- package/dist/interceptors/tool.d.ts.map +1 -0
- package/dist/interceptors/tool.js +274 -0
- package/dist/interceptors/tool.js.map +1 -0
- package/dist/interceptors/workflow-ai.d.ts +5 -0
- package/dist/interceptors/workflow-ai.d.ts.map +1 -0
- package/dist/interceptors/workflow-ai.js +382 -0
- package/dist/interceptors/workflow-ai.js.map +1 -0
- package/dist/internals/conditional-recorder.d.ts +21 -0
- package/dist/internals/conditional-recorder.d.ts.map +1 -0
- package/dist/internals/conditional-recorder.js +54 -0
- package/dist/internals/conditional-recorder.js.map +1 -0
- package/dist/internals/mock-resolver.d.ts +146 -0
- package/dist/internals/mock-resolver.d.ts.map +1 -0
- package/dist/internals/mock-resolver.js +427 -0
- package/dist/internals/mock-resolver.js.map +1 -0
- package/dist/matchers/index.d.ts +96 -0
- package/dist/matchers/index.d.ts.map +1 -0
- package/dist/matchers/index.js +668 -0
- package/dist/matchers/index.js.map +1 -0
- package/dist/observability.d.ts +82 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +471 -0
- package/dist/observability.js.map +1 -0
- package/dist/portal-executor.d.ts +30 -0
- package/dist/portal-executor.d.ts.map +1 -0
- package/dist/portal-executor.js +324 -0
- package/dist/portal-executor.js.map +1 -0
- package/dist/portal-server.d.ts +3 -0
- package/dist/portal-server.d.ts.map +1 -0
- package/dist/portal-server.js +279 -0
- package/dist/portal-server.js.map +1 -0
- package/dist/proxy/llm-capture.d.ts +14 -0
- package/dist/proxy/llm-capture.d.ts.map +1 -0
- package/dist/proxy/llm-capture.js +264 -0
- package/dist/proxy/llm-capture.js.map +1 -0
- package/dist/reporter.d.ts +3 -0
- package/dist/reporter.d.ts.map +1 -0
- package/dist/reporter.js +72 -0
- package/dist/reporter.js.map +1 -0
- package/dist/runWorkflowSubprocess.d.ts +14 -0
- package/dist/runWorkflowSubprocess.d.ts.map +1 -0
- package/dist/runWorkflowSubprocess.js +66 -0
- package/dist/runWorkflowSubprocess.js.map +1 -0
- package/dist/runner.d.ts +16 -0
- package/dist/runner.d.ts.map +1 -0
- package/dist/runner.js +138 -0
- package/dist/runner.js.map +1 -0
- package/dist/socket-connector.d.ts +22 -0
- package/dist/socket-connector.d.ts.map +1 -0
- package/dist/socket-connector.js +104 -0
- package/dist/socket-connector.js.map +1 -0
- package/dist/telemetry-batcher.d.ts +56 -0
- package/dist/telemetry-batcher.d.ts.map +1 -0
- package/dist/telemetry-batcher.js +143 -0
- package/dist/telemetry-batcher.js.map +1 -0
- package/dist/test-setup.d.ts +12 -0
- package/dist/test-setup.d.ts.map +1 -0
- package/dist/test-setup.js +13 -0
- package/dist/test-setup.js.map +1 -0
- package/dist/tool-registry.d.ts +31 -0
- package/dist/tool-registry.d.ts.map +1 -0
- package/dist/tool-registry.js +73 -0
- package/dist/tool-registry.js.map +1 -0
- package/dist/tool-runner-worker.d.ts +2 -0
- package/dist/tool-runner-worker.d.ts.map +1 -0
- package/dist/tool-runner-worker.js +215 -0
- package/dist/tool-runner-worker.js.map +1 -0
- package/dist/trace-adapter/context.d.ts +72 -0
- package/dist/trace-adapter/context.d.ts.map +1 -0
- package/dist/trace-adapter/context.js +80 -0
- package/dist/trace-adapter/context.js.map +1 -0
- package/dist/tracing.d.ts +2 -0
- package/dist/tracing.d.ts.map +1 -0
- package/dist/tracing.js +59 -0
- package/dist/tracing.js.map +1 -0
- package/dist/trigger-executor.d.ts +12 -0
- package/dist/trigger-executor.d.ts.map +1 -0
- package/dist/trigger-executor.js +130 -0
- package/dist/trigger-executor.js.map +1 -0
- package/dist/types/portal.d.ts +76 -0
- package/dist/types/portal.d.ts.map +1 -0
- package/dist/types/portal.js +2 -0
- package/dist/types/portal.js.map +1 -0
- package/dist/utils/debug.d.ts +3 -0
- package/dist/utils/debug.d.ts.map +1 -0
- package/dist/utils/debug.js +8 -0
- package/dist/utils/debug.js.map +1 -0
- package/dist/utils/license-error.d.ts +23 -0
- package/dist/utils/license-error.d.ts.map +1 -0
- package/dist/utils/license-error.js +42 -0
- package/dist/utils/license-error.js.map +1 -0
- package/dist/utils/redact.d.ts +7 -0
- package/dist/utils/redact.d.ts.map +1 -0
- package/dist/utils/redact.js +26 -0
- package/dist/utils/redact.js.map +1 -0
- package/dist/workflow-runner-worker.d.ts +2 -0
- package/dist/workflow-runner-worker.d.ts.map +1 -0
- package/dist/workflow-runner-worker.js +329 -0
- package/dist/workflow-runner-worker.js.map +1 -0
- package/dist/workflow-runner.d.ts +14 -0
- package/dist/workflow-runner.d.ts.map +1 -0
- package/dist/workflow-runner.js +34 -0
- package/dist/workflow-runner.js.map +1 -0
- package/docs/agent-coding-instructions.md +138 -0
- package/docs/agent-integration-guide.md +564 -0
- package/docs/agents.md +140 -0
- package/docs/dashboard.md +394 -0
- package/docs/deno.md +69 -0
- package/docs/instrumentation.md +424 -0
- package/docs/langfuse-trace-structure.md +145 -0
- package/docs/matchers.md +173 -0
- package/docs/observability_contract.md +192 -0
- package/docs/observability_mode.md +195 -0
- package/docs/quickstart.md +621 -0
- package/docs/security-compliance.md +566 -0
- package/docs/test-writing-guidelines.md +444 -0
- package/docs/tools.md +165 -0
- package/docs/workflow-modes.md +253 -0
- package/package.json +76 -0
- package/src/browser-ui.ts +281 -0
- package/src/capture/event.ts +30 -0
- package/src/capture/index.ts +3 -0
- package/src/capture/recorder.ts +62 -0
- package/src/capture/replay.ts +55 -0
- package/src/ci/api-client.ts +136 -0
- package/src/ci/benchmark.ts +257 -0
- package/src/ci/ed-runner.ts +351 -0
- package/src/ci/executor.ts +671 -0
- package/src/ci/git-info.ts +127 -0
- package/src/ci/index.ts +5 -0
- package/src/ci/measurement.ts +25 -0
- package/src/ci/replay.ts +127 -0
- package/src/ci/reporters/default.ts +50 -0
- package/src/ci/reporters/index.ts +21 -0
- package/src/ci/reporters/json.ts +18 -0
- package/src/ci/reporters/junit.ts +61 -0
- package/src/ci/runner.ts +208 -0
- package/src/ci/test-discovery.ts +16 -0
- package/src/ci/test-loader.ts +187 -0
- package/src/ci/test-registry.ts +62 -0
- package/src/ci/trace-schema.ts +96 -0
- package/src/ci/trace-writer.ts +107 -0
- package/src/ci/types.ts +115 -0
- package/src/ci/upload-client.ts +300 -0
- package/src/cli.ts +811 -0
- package/src/core/agent-state.ts +162 -0
- package/src/core/judge-utils.ts +232 -0
- package/src/core/registry.ts +92 -0
- package/src/dashboard-server.ts +2047 -0
- package/src/execution/tool-runner.ts +352 -0
- package/src/html/dashboard.html +2218 -0
- package/src/http.ts +13 -0
- package/src/index.ts +138 -0
- package/src/interceptors/ai-interceptor.ts +798 -0
- package/src/interceptors/db-auto.ts +243 -0
- package/src/interceptors/db.ts +156 -0
- package/src/interceptors/http.ts +393 -0
- package/src/interceptors/side-effects.ts +83 -0
- package/src/interceptors/telemetry-push.ts +537 -0
- package/src/interceptors/tool.ts +287 -0
- package/src/interceptors/workflow-ai.ts +419 -0
- package/src/internals/conditional-recorder.ts +63 -0
- package/src/internals/mock-resolver.ts +492 -0
- package/src/matchers/index.ts +824 -0
- package/src/observability.ts +501 -0
- package/src/portal-executor.ts +355 -0
- package/src/portal-server.ts +304 -0
- package/src/proxy/llm-capture.ts +301 -0
- package/src/reporter.ts +81 -0
- package/src/runWorkflowSubprocess.ts +74 -0
- package/src/runner.ts +178 -0
- package/src/socket-connector.ts +117 -0
- package/src/telemetry-batcher.ts +191 -0
- package/src/test-setup.ts +16 -0
- package/src/tool-registry.ts +94 -0
- package/src/tool-runner-worker.ts +244 -0
- package/src/trace-adapter/context.ts +156 -0
- package/src/tracing.ts +62 -0
- package/src/trigger-executor.ts +171 -0
- package/src/types/agent.d.ts +63 -0
- package/src/types/expect.d.ts +81 -0
- package/src/types/modules.d.ts +2 -0
- package/src/types/portal.ts +69 -0
- package/src/utils/debug.ts +8 -0
- package/src/utils/license-error.ts +43 -0
- package/src/utils/redact.ts +25 -0
- package/src/workflow-runner-worker.ts +386 -0
- package/src/workflow-runner.ts +58 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises'
|
|
2
|
+
import { resolve, dirname } from 'node:path'
|
|
3
|
+
import { pathToFileURL } from 'node:url'
|
|
4
|
+
import { discoverTestFiles } from './test-discovery.js'
|
|
5
|
+
import { getTestRegistry, clearTestRegistry } from './test-registry.js'
|
|
6
|
+
import type { TestDefinition } from './test-registry.js'
|
|
7
|
+
import type { DiskTrace } from './trace-schema.js'
|
|
8
|
+
|
|
9
|
+
export interface ValidatedTest extends TestDefinition {
|
|
10
|
+
resolvedTracePath: string
|
|
11
|
+
traceData: DiskTrace
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ValidationError {
|
|
15
|
+
file: string
|
|
16
|
+
testName?: string
|
|
17
|
+
message: string
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface LoadTestsResult {
|
|
21
|
+
tests: ValidatedTest[]
|
|
22
|
+
errors: ValidationError[]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function loadTests(options?: { cwd?: string }): Promise<LoadTestsResult> {
|
|
26
|
+
const cwd = options?.cwd ?? process.cwd()
|
|
27
|
+
const files = await discoverTestFiles({ cwd })
|
|
28
|
+
|
|
29
|
+
const allDefinitions: TestDefinition[] = []
|
|
30
|
+
const errors: ValidationError[] = []
|
|
31
|
+
|
|
32
|
+
// Phase 1: import each test file and collect definitions
|
|
33
|
+
for (const file of files) {
|
|
34
|
+
clearTestRegistry()
|
|
35
|
+
try {
|
|
36
|
+
// Cache-bust so re-imports within the same process re-execute the module
|
|
37
|
+
const fileUrl = pathToFileURL(file).href + `?t=${Date.now()}`
|
|
38
|
+
await import(fileUrl)
|
|
39
|
+
} catch (err) {
|
|
40
|
+
errors.push({
|
|
41
|
+
file,
|
|
42
|
+
message: `Failed to import test file: ${err instanceof Error ? err.message : String(err)}`,
|
|
43
|
+
})
|
|
44
|
+
continue
|
|
45
|
+
}
|
|
46
|
+
const defs = getTestRegistry()
|
|
47
|
+
for (const def of defs) {
|
|
48
|
+
def._sourceFile = file
|
|
49
|
+
}
|
|
50
|
+
allDefinitions.push(...defs)
|
|
51
|
+
}
|
|
52
|
+
clearTestRegistry()
|
|
53
|
+
|
|
54
|
+
// Phase 2: validate definitions
|
|
55
|
+
const tests: ValidatedTest[] = []
|
|
56
|
+
const seenNames = new Map<string, string>() // name → source file
|
|
57
|
+
|
|
58
|
+
for (const def of allDefinitions) {
|
|
59
|
+
const file = def._sourceFile!
|
|
60
|
+
|
|
61
|
+
// Validate name
|
|
62
|
+
if (!def.name || typeof def.name !== 'string' || def.name.trim().length === 0) {
|
|
63
|
+
errors.push({ file, testName: def.name, message: 'Test name must be a non-empty string' })
|
|
64
|
+
continue
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Check uniqueness
|
|
68
|
+
if (seenNames.has(def.name)) {
|
|
69
|
+
errors.push({
|
|
70
|
+
file,
|
|
71
|
+
testName: def.name,
|
|
72
|
+
message: `Duplicate test name "${def.name}" (also defined in ${seenNames.get(def.name)})`,
|
|
73
|
+
})
|
|
74
|
+
continue
|
|
75
|
+
}
|
|
76
|
+
seenNames.set(def.name, file)
|
|
77
|
+
|
|
78
|
+
// Resolve trace path relative to the test file's directory
|
|
79
|
+
const resolvedTracePath = resolve(dirname(file), def.trace)
|
|
80
|
+
|
|
81
|
+
// Read and parse trace file
|
|
82
|
+
let traceData: DiskTrace
|
|
83
|
+
try {
|
|
84
|
+
const raw = await readFile(resolvedTracePath, 'utf-8')
|
|
85
|
+
traceData = JSON.parse(raw) as DiskTrace
|
|
86
|
+
} catch (err) {
|
|
87
|
+
errors.push({
|
|
88
|
+
file,
|
|
89
|
+
testName: def.name,
|
|
90
|
+
message: `Cannot read/parse trace file "${def.trace}": ${err instanceof Error ? err.message : String(err)}`,
|
|
91
|
+
})
|
|
92
|
+
continue
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Validate trace has steps array
|
|
96
|
+
if (!Array.isArray(traceData.steps)) {
|
|
97
|
+
errors.push({ file, testName: def.name, message: `Trace file "${def.trace}" has no steps array` })
|
|
98
|
+
continue
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Validate target
|
|
102
|
+
if (!def.target || !def.target.step_id || !def.target.type) {
|
|
103
|
+
errors.push({ file, testName: def.name, message: 'Target must have type and step_id' })
|
|
104
|
+
continue
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const matchedStep = traceData.steps.find(s => s.step_id === def.target.step_id)
|
|
108
|
+
if (!matchedStep) {
|
|
109
|
+
errors.push({
|
|
110
|
+
file,
|
|
111
|
+
testName: def.name,
|
|
112
|
+
message: `step_id "${def.target.step_id}" not found in trace "${def.trace}"`,
|
|
113
|
+
})
|
|
114
|
+
continue
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (matchedStep.type !== def.target.type) {
|
|
118
|
+
errors.push({
|
|
119
|
+
file,
|
|
120
|
+
testName: def.name,
|
|
121
|
+
message: `target.type "${def.target.type}" does not match step type "${matchedStep.type}" for step_id "${def.target.step_id}"`,
|
|
122
|
+
})
|
|
123
|
+
continue
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Validate benchmarks
|
|
127
|
+
if (!def.benchmarks || typeof def.benchmarks !== 'object') {
|
|
128
|
+
errors.push({ file, testName: def.name, message: 'Benchmarks must be specified' })
|
|
129
|
+
continue
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const hasMaxDuration = def.benchmarks.max_duration_ms !== undefined
|
|
133
|
+
const hasMaxTokens = def.benchmarks.max_tokens_total !== undefined
|
|
134
|
+
const hasOutputContains = def.benchmarks.output_contains !== undefined
|
|
135
|
+
const hasOutputNotContains = def.benchmarks.output_not_contains !== undefined
|
|
136
|
+
const hasLLMJudge = def.benchmarks.llm_judge !== undefined
|
|
137
|
+
|
|
138
|
+
if (!hasMaxDuration && !hasMaxTokens && !hasOutputContains && !hasOutputNotContains && !hasLLMJudge) {
|
|
139
|
+
errors.push({ file, testName: def.name, message: 'At least one benchmark must be specified' })
|
|
140
|
+
continue
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let valid = true
|
|
144
|
+
|
|
145
|
+
if (hasMaxDuration) {
|
|
146
|
+
if (typeof def.benchmarks.max_duration_ms !== 'number' || def.benchmarks.max_duration_ms <= 0) {
|
|
147
|
+
errors.push({ file, testName: def.name, message: 'max_duration_ms must be a positive number' })
|
|
148
|
+
valid = false
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (hasMaxTokens) {
|
|
153
|
+
if (typeof def.benchmarks.max_tokens_total !== 'number' || def.benchmarks.max_tokens_total <= 0) {
|
|
154
|
+
errors.push({ file, testName: def.name, message: 'max_tokens_total must be a positive number' })
|
|
155
|
+
valid = false
|
|
156
|
+
}
|
|
157
|
+
if (def.target.type !== 'ai_call') {
|
|
158
|
+
errors.push({ file, testName: def.name, message: 'max_tokens_total can only be used with ai_call targets' })
|
|
159
|
+
valid = false
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (!valid) continue
|
|
164
|
+
|
|
165
|
+
// Validate run (optional but must be a function if present)
|
|
166
|
+
if (def.run !== undefined && typeof def.run !== 'function') {
|
|
167
|
+
errors.push({ file, testName: def.name, message: 'run must be a function' })
|
|
168
|
+
continue
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Validate timeout_ms (optional but must be a positive number if present)
|
|
172
|
+
if (def.timeout_ms !== undefined) {
|
|
173
|
+
if (typeof def.timeout_ms !== 'number' || def.timeout_ms <= 0) {
|
|
174
|
+
errors.push({ file, testName: def.name, message: 'timeout_ms must be a positive number' })
|
|
175
|
+
continue
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
tests.push({
|
|
180
|
+
...def,
|
|
181
|
+
resolvedTracePath,
|
|
182
|
+
traceData,
|
|
183
|
+
})
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return { tests, errors }
|
|
187
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
export interface TestTarget {
|
|
2
|
+
type: 'tool_call' | 'ai_call'
|
|
3
|
+
step_id: string
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface LLMJudgeBenchmark {
|
|
7
|
+
/** The prompt sent to the LLM judge to evaluate the step's output. */
|
|
8
|
+
judge_prompt: string
|
|
9
|
+
/** Minimum score (0-10) required to pass. Defaults to 7. */
|
|
10
|
+
judge_score_threshold?: number
|
|
11
|
+
/** LLM provider to use for judging. Defaults to 'openai'. */
|
|
12
|
+
judge_provider?: 'openai' | 'claude' | 'gemini' | 'grok' | 'kimi'
|
|
13
|
+
/** Model override for the judge LLM. */
|
|
14
|
+
judge_model?: string
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface TestBenchmarks {
|
|
18
|
+
max_duration_ms?: number
|
|
19
|
+
max_tokens_total?: number
|
|
20
|
+
/** Assert that the step's output contains this substring. */
|
|
21
|
+
output_contains?: string
|
|
22
|
+
/** Assert that the step's output does NOT contain this substring. */
|
|
23
|
+
output_not_contains?: string
|
|
24
|
+
/** LLM-as-a-judge evaluation of the step's output quality. */
|
|
25
|
+
llm_judge?: LLMJudgeBenchmark
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface TestDefinition {
|
|
29
|
+
name: string
|
|
30
|
+
trace: string
|
|
31
|
+
target: TestTarget
|
|
32
|
+
benchmarks: TestBenchmarks
|
|
33
|
+
/** Custom input that overrides the trace's recorded input. Can be a static value or an async function for dynamic resolution (e.g. fetching from a database or API). */
|
|
34
|
+
input?: unknown | (() => Promise<unknown> | unknown)
|
|
35
|
+
/** The function that invokes the workflow under test. Receives the resolved input (custom or from trace) as its argument. Required for execution (Phase 3). */
|
|
36
|
+
run?: (input?: unknown) => Promise<void>
|
|
37
|
+
/** Per-test timeout in milliseconds. Defaults to 60000. */
|
|
38
|
+
timeout_ms?: number
|
|
39
|
+
/** Set internally by the loader to the absolute path of the source file */
|
|
40
|
+
_sourceFile?: string
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Use a globalThis-backed registry so that the CLI (global install) and the
|
|
44
|
+
// test file (local node_modules) share the same array even when they resolve
|
|
45
|
+
// to different module instances.
|
|
46
|
+
const g = globalThis as Record<string, unknown>
|
|
47
|
+
const ED_TEST_REGISTRY_KEY = '__elasticdash_ed_test_registry__'
|
|
48
|
+
const registry: TestDefinition[] =
|
|
49
|
+
(g[ED_TEST_REGISTRY_KEY] as TestDefinition[]) ?? []
|
|
50
|
+
if (!g[ED_TEST_REGISTRY_KEY]) g[ED_TEST_REGISTRY_KEY] = registry
|
|
51
|
+
|
|
52
|
+
export function defineTest(def: Omit<TestDefinition, '_sourceFile'>): void {
|
|
53
|
+
registry.push({ ...def })
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function getTestRegistry(): TestDefinition[] {
|
|
57
|
+
return [...registry]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function clearTestRegistry(): void {
|
|
61
|
+
registry.length = 0
|
|
62
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs'
|
|
2
|
+
import { join, dirname } from 'node:path'
|
|
3
|
+
import { fileURLToPath } from 'node:url'
|
|
4
|
+
import type { WorkflowEvent } from '../capture/event.js'
|
|
5
|
+
|
|
6
|
+
export let SDK_VERSION = 'unknown'
|
|
7
|
+
try {
|
|
8
|
+
// Works in both ESM (import.meta.url) and CJS (__dirname) contexts
|
|
9
|
+
const base = typeof __dirname !== 'undefined' ? __dirname : dirname(fileURLToPath(import.meta.url))
|
|
10
|
+
const pkg = JSON.parse(readFileSync(join(base, '..', '..', 'package.json'), 'utf-8'))
|
|
11
|
+
SDK_VERSION = pkg.version
|
|
12
|
+
} catch {
|
|
13
|
+
// Bundled context — version not critical
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface DiskTraceStep {
|
|
17
|
+
step_id: string
|
|
18
|
+
type: 'tool_call' | 'ai_call'
|
|
19
|
+
name: string
|
|
20
|
+
input: unknown
|
|
21
|
+
output: unknown
|
|
22
|
+
started_at: string
|
|
23
|
+
ended_at: string
|
|
24
|
+
duration_ms: number
|
|
25
|
+
tokens?: { input: number; output: number; total: number } | null
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface DiskTrace {
|
|
29
|
+
trace_id: string
|
|
30
|
+
created_at: string
|
|
31
|
+
sdk_version: string
|
|
32
|
+
workflow: {
|
|
33
|
+
name: string
|
|
34
|
+
input: unknown
|
|
35
|
+
output: unknown
|
|
36
|
+
}
|
|
37
|
+
steps: DiskTraceStep[]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function mapEventType(type: string): 'tool_call' | 'ai_call' | null {
|
|
41
|
+
if (type === 'tool') return 'tool_call'
|
|
42
|
+
if (type === 'ai') return 'ai_call'
|
|
43
|
+
return null
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function workflowEventsToDiskTrace(events: WorkflowEvent[], traceId: string): DiskTrace {
|
|
47
|
+
const workflowEvent = events.find(e => e.type === 'workflow')
|
|
48
|
+
|
|
49
|
+
const counters: Record<string, number> = { tool_call: 0, ai_call: 0 }
|
|
50
|
+
const steps: DiskTraceStep[] = []
|
|
51
|
+
|
|
52
|
+
for (const event of events) {
|
|
53
|
+
const mappedType = mapEventType(event.type)
|
|
54
|
+
if (!mappedType) continue
|
|
55
|
+
|
|
56
|
+
const index = counters[mappedType]++
|
|
57
|
+
const stepId = `${mappedType}_${index}`
|
|
58
|
+
|
|
59
|
+
const startedAt = new Date(event.timestamp).toISOString()
|
|
60
|
+
const endedAt = new Date(event.timestamp + event.durationMs).toISOString()
|
|
61
|
+
|
|
62
|
+
let tokens: DiskTraceStep['tokens'] = null
|
|
63
|
+
if (mappedType === 'ai_call' && event.usage) {
|
|
64
|
+
tokens = {
|
|
65
|
+
input: event.usage.inputTokens ?? 0,
|
|
66
|
+
output: event.usage.outputTokens ?? 0,
|
|
67
|
+
total: event.usage.totalTokens ?? ((event.usage.inputTokens ?? 0) + (event.usage.outputTokens ?? 0)),
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
steps.push({
|
|
72
|
+
step_id: stepId,
|
|
73
|
+
type: mappedType,
|
|
74
|
+
name: event.name,
|
|
75
|
+
input: event.input,
|
|
76
|
+
output: event.output,
|
|
77
|
+
started_at: startedAt,
|
|
78
|
+
ended_at: endedAt,
|
|
79
|
+
duration_ms: event.durationMs,
|
|
80
|
+
tokens,
|
|
81
|
+
})
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const now = new Date()
|
|
85
|
+
return {
|
|
86
|
+
trace_id: traceId,
|
|
87
|
+
created_at: now.toISOString(),
|
|
88
|
+
sdk_version: SDK_VERSION,
|
|
89
|
+
workflow: {
|
|
90
|
+
name: workflowEvent?.name ?? 'unknown',
|
|
91
|
+
input: workflowEvent?.input ?? null,
|
|
92
|
+
output: workflowEvent?.output ?? null,
|
|
93
|
+
},
|
|
94
|
+
steps,
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { mkdir, writeFile } from 'node:fs/promises'
|
|
2
|
+
import { mkdirSync, writeFileSync } from 'node:fs'
|
|
3
|
+
import { join } from 'node:path'
|
|
4
|
+
import { randomBytes } from 'node:crypto'
|
|
5
|
+
import type { WorkflowEvent } from '../capture/event.js'
|
|
6
|
+
import { workflowEventsToDiskTrace } from './trace-schema.js'
|
|
7
|
+
import type { DiskTrace } from './trace-schema.js'
|
|
8
|
+
|
|
9
|
+
const TRACE_DIR = '.ed_traces'
|
|
10
|
+
|
|
11
|
+
const SENSITIVE_KEYS = /^(authorization|api_key|apikey|password|secret|token)$/i
|
|
12
|
+
|
|
13
|
+
export function isTraceCaptureEnabled(): boolean {
|
|
14
|
+
const val = process.env.ELASTICDASH_CAPTURE_TRACE
|
|
15
|
+
if (val === '1') return true
|
|
16
|
+
// Log once on first check so users can diagnose env var issues
|
|
17
|
+
if (!_captureCheckLogged) {
|
|
18
|
+
_captureCheckLogged = true
|
|
19
|
+
if (val !== undefined) {
|
|
20
|
+
console.log(`[elasticdash] ELASTICDASH_CAPTURE_TRACE=${JSON.stringify(val)} (expected "1" to enable)`)
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return false
|
|
24
|
+
}
|
|
25
|
+
let _captureCheckLogged = false
|
|
26
|
+
|
|
27
|
+
export function generateTraceFilename(): string {
|
|
28
|
+
const now = new Date()
|
|
29
|
+
const ts = now.toISOString()
|
|
30
|
+
.replace(/\.\d{3}Z$/, '') // remove milliseconds
|
|
31
|
+
.replace(/:/g, '-') // filesystem-safe
|
|
32
|
+
const suffix = randomBytes(2).toString('hex') // 4 hex chars
|
|
33
|
+
return `${ts}_${suffix}.json`
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function scrubSecrets(obj: unknown): unknown {
|
|
37
|
+
if (obj === null || obj === undefined) return obj
|
|
38
|
+
if (Array.isArray(obj)) return obj.map(item => scrubSecrets(item))
|
|
39
|
+
if (typeof obj === 'object') {
|
|
40
|
+
const result: Record<string, unknown> = {}
|
|
41
|
+
for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
|
|
42
|
+
if (SENSITIVE_KEYS.test(key)) {
|
|
43
|
+
result[key] = '[REDACTED]'
|
|
44
|
+
} else {
|
|
45
|
+
result[key] = scrubSecrets(value)
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return result
|
|
49
|
+
}
|
|
50
|
+
return obj
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export async function writeTraceToDisk(trace: DiskTrace, cwd?: string): Promise<void> {
|
|
54
|
+
const dir = join(cwd ?? process.cwd(), TRACE_DIR)
|
|
55
|
+
try {
|
|
56
|
+
await mkdir(dir, { recursive: true })
|
|
57
|
+
const scrubbed = scrubSecrets(trace) as DiskTrace
|
|
58
|
+
const filename = `${scrubbed.trace_id}.json`
|
|
59
|
+
const filepath = join(dir, filename)
|
|
60
|
+
await writeFile(filepath, JSON.stringify(scrubbed, null, 2), 'utf-8')
|
|
61
|
+
console.log(`[elasticdash] Trace written: ${filepath} (${trace.steps.length} steps)`)
|
|
62
|
+
} catch (err) {
|
|
63
|
+
console.warn(`[elasticdash] Failed to write trace file: ${err instanceof Error ? err.message : String(err)}`)
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Synchronous write for use in process.on('exit') handlers where async is not possible.
|
|
69
|
+
*/
|
|
70
|
+
export function writeTraceToDiskSync(trace: DiskTrace, cwd?: string): void {
|
|
71
|
+
const dir = join(cwd ?? process.cwd(), TRACE_DIR)
|
|
72
|
+
try {
|
|
73
|
+
mkdirSync(dir, { recursive: true })
|
|
74
|
+
const scrubbed = scrubSecrets(trace) as DiskTrace
|
|
75
|
+
const filename = `${scrubbed.trace_id}.json`
|
|
76
|
+
const filepath = join(dir, filename)
|
|
77
|
+
writeFileSync(filepath, JSON.stringify(scrubbed, null, 2), 'utf-8')
|
|
78
|
+
} catch (err) {
|
|
79
|
+
// In sync exit handler, console.warn may not flush — best effort
|
|
80
|
+
try { console.warn(`[elasticdash] Failed to write trace file: ${err instanceof Error ? err.message : String(err)}`) } catch {}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export async function maybeCaptureTrace(events: WorkflowEvent[], traceId?: string, cwd?: string): Promise<void> {
|
|
85
|
+
if (!isTraceCaptureEnabled()) return
|
|
86
|
+
if (events.length === 0) return
|
|
87
|
+
|
|
88
|
+
const filename = generateTraceFilename()
|
|
89
|
+
const id = filename.replace(/\.json$/, '')
|
|
90
|
+
const trace = workflowEventsToDiskTrace(events, traceId ?? id)
|
|
91
|
+
trace.trace_id = id
|
|
92
|
+
await writeTraceToDisk(trace, cwd)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Synchronous version of maybeCaptureTrace for process exit handlers.
|
|
97
|
+
*/
|
|
98
|
+
export function maybeCaptureTraceSync(events: WorkflowEvent[], traceId?: string, cwd?: string): void {
|
|
99
|
+
if (!isTraceCaptureEnabled()) return
|
|
100
|
+
if (events.length === 0) return
|
|
101
|
+
|
|
102
|
+
const filename = generateTraceFilename()
|
|
103
|
+
const id = filename.replace(/\.json$/, '')
|
|
104
|
+
const trace = workflowEventsToDiskTrace(events, traceId ?? id)
|
|
105
|
+
trace.trace_id = id
|
|
106
|
+
writeTraceToDiskSync(trace, cwd)
|
|
107
|
+
}
|
package/src/ci/types.ts
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// ─── CI Runner Types ─────────────────────────────────────────
|
|
2
|
+
|
|
3
|
+
export interface CIRunConfig {
|
|
4
|
+
serverUrl: string
|
|
5
|
+
apiKey: string
|
|
6
|
+
workflowName?: string
|
|
7
|
+
tags?: string[]
|
|
8
|
+
triggeredBy?: 'ci' | 'api'
|
|
9
|
+
gitBranch?: string
|
|
10
|
+
gitCommit?: string
|
|
11
|
+
gitCommitMessage?: string
|
|
12
|
+
gitPrNumber?: number
|
|
13
|
+
gitPrUrl?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface CITestResult {
|
|
17
|
+
testGroupId: number
|
|
18
|
+
testGroupName: string
|
|
19
|
+
testId: number
|
|
20
|
+
testName: string | null
|
|
21
|
+
testType: string
|
|
22
|
+
passed: boolean
|
|
23
|
+
runId: number | null
|
|
24
|
+
singleRuns: CISingleRunResult[]
|
|
25
|
+
expectationResults: CIExpectationResult[]
|
|
26
|
+
error?: string
|
|
27
|
+
durationMs: number
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface CISingleRunResult {
|
|
31
|
+
runIndex: number
|
|
32
|
+
passed: boolean
|
|
33
|
+
durationMs: number
|
|
34
|
+
inputTokens: number
|
|
35
|
+
outputTokens: number
|
|
36
|
+
totalTokens: number
|
|
37
|
+
output: unknown
|
|
38
|
+
trace: unknown
|
|
39
|
+
error?: string
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface CIExpectationResult {
|
|
43
|
+
expectationId: number
|
|
44
|
+
type: string
|
|
45
|
+
passed: boolean
|
|
46
|
+
detail?: string
|
|
47
|
+
perRun?: Record<number, { passed: boolean; detail?: string }>
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface CIRunSummary {
|
|
51
|
+
total: number
|
|
52
|
+
passed: number
|
|
53
|
+
failed: number
|
|
54
|
+
skipped: number
|
|
55
|
+
durationMs: number
|
|
56
|
+
batchId: number | null
|
|
57
|
+
results: CITestResult[]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ─── API Response Types ─────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
export interface APITestGroup {
|
|
63
|
+
id: number
|
|
64
|
+
name: string
|
|
65
|
+
description: string | null
|
|
66
|
+
project_id: number
|
|
67
|
+
workflow_name: string
|
|
68
|
+
trace_file: unknown
|
|
69
|
+
status: string
|
|
70
|
+
tags: string[]
|
|
71
|
+
tests: APITestGroupTest[]
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface APITestGroupTest {
|
|
75
|
+
id: number
|
|
76
|
+
test_group_id: number
|
|
77
|
+
name: string | null
|
|
78
|
+
description: string | null
|
|
79
|
+
test_type: 'single-step' | 'full-flow'
|
|
80
|
+
target_step_index: number | null
|
|
81
|
+
target_step_type: string | null
|
|
82
|
+
target_step_name: string | null
|
|
83
|
+
mock_input: unknown
|
|
84
|
+
workflow_input: unknown
|
|
85
|
+
frozen_events: unknown[]
|
|
86
|
+
tool_mocks: Record<string, unknown>
|
|
87
|
+
prompt_mocks: Record<string, string>
|
|
88
|
+
run_count: number
|
|
89
|
+
pass_threshold: string
|
|
90
|
+
timeout_ms: number
|
|
91
|
+
sort_order: number
|
|
92
|
+
expectations: APIExpectation[]
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface APIExpectation {
|
|
96
|
+
id: number
|
|
97
|
+
test_group_test_id: number
|
|
98
|
+
type: string
|
|
99
|
+
judge_prompt: string | null
|
|
100
|
+
judge_model: string | null
|
|
101
|
+
judge_provider: string | null
|
|
102
|
+
judge_score_threshold: number | null
|
|
103
|
+
max_total_tokens: number | null
|
|
104
|
+
max_tokens_per_run: number | null
|
|
105
|
+
max_duration_ms: number | null
|
|
106
|
+
max_total_duration_ms: number | null
|
|
107
|
+
contains_text: string | null
|
|
108
|
+
not_contains_text: string | null
|
|
109
|
+
case_insensitive: boolean
|
|
110
|
+
json_schema: unknown
|
|
111
|
+
similarity_threshold: number | null
|
|
112
|
+
required_tools: string[]
|
|
113
|
+
forbidden_tools: string[]
|
|
114
|
+
tool_call_rules: unknown
|
|
115
|
+
}
|