@ariaflowagents/core 0.8.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capabilities/AutoRetrieveCapability.d.ts +30 -0
- package/dist/capabilities/AutoRetrieveCapability.d.ts.map +1 -0
- package/dist/capabilities/AutoRetrieveCapability.js +36 -0
- package/dist/capabilities/AutoRetrieveCapability.js.map +1 -0
- package/dist/capabilities/ExtractionCapability.d.ts +25 -0
- package/dist/capabilities/ExtractionCapability.d.ts.map +1 -0
- package/dist/capabilities/ExtractionCapability.js +74 -0
- package/dist/capabilities/ExtractionCapability.js.map +1 -0
- package/dist/capabilities/FlowCapability.d.ts +81 -0
- package/dist/capabilities/FlowCapability.d.ts.map +1 -0
- package/dist/capabilities/FlowCapability.js +482 -0
- package/dist/capabilities/FlowCapability.js.map +1 -0
- package/dist/capabilities/GuardrailCapability.d.ts +30 -0
- package/dist/capabilities/GuardrailCapability.d.ts.map +1 -0
- package/dist/capabilities/GuardrailCapability.js +38 -0
- package/dist/capabilities/GuardrailCapability.js.map +1 -0
- package/dist/capabilities/HandoffCapability.d.ts +19 -0
- package/dist/capabilities/HandoffCapability.d.ts.map +1 -0
- package/dist/capabilities/HandoffCapability.js +58 -0
- package/dist/capabilities/HandoffCapability.js.map +1 -0
- package/dist/capabilities/LivePromptAssembler.d.ts +108 -0
- package/dist/capabilities/LivePromptAssembler.d.ts.map +1 -0
- package/dist/capabilities/LivePromptAssembler.js +157 -0
- package/dist/capabilities/LivePromptAssembler.js.map +1 -0
- package/dist/capabilities/TriageCapability.d.ts +16 -0
- package/dist/capabilities/TriageCapability.d.ts.map +1 -0
- package/dist/capabilities/TriageCapability.js +61 -0
- package/dist/capabilities/TriageCapability.js.map +1 -0
- package/dist/capabilities/adapters/ai-sdk.d.ts +14 -0
- package/dist/capabilities/adapters/ai-sdk.d.ts.map +1 -0
- package/dist/capabilities/adapters/ai-sdk.js +29 -0
- package/dist/capabilities/adapters/ai-sdk.js.map +1 -0
- package/dist/capabilities/adapters/gemini.d.ts +15 -0
- package/dist/capabilities/adapters/gemini.d.ts.map +1 -0
- package/dist/capabilities/adapters/gemini.js +40 -0
- package/dist/capabilities/adapters/gemini.js.map +1 -0
- package/dist/capabilities/index.d.ts +154 -0
- package/dist/capabilities/index.d.ts.map +1 -0
- package/dist/capabilities/index.js +128 -0
- package/dist/capabilities/index.js.map +1 -0
- package/dist/eval/EvalRunner.d.ts +12 -0
- package/dist/eval/EvalRunner.d.ts.map +1 -0
- package/dist/eval/EvalRunner.js +64 -0
- package/dist/eval/EvalRunner.js.map +1 -0
- package/dist/eval/scoring.d.ts +15 -0
- package/dist/eval/scoring.d.ts.map +1 -0
- package/dist/eval/scoring.js +152 -0
- package/dist/eval/scoring.js.map +1 -0
- package/dist/eval/types.d.ts +59 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +2 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/flows/FlowGraph.d.ts +3 -1
- package/dist/flows/FlowGraph.d.ts.map +1 -1
- package/dist/flows/FlowGraph.js +5 -0
- package/dist/flows/FlowGraph.js.map +1 -1
- package/dist/flows/FlowManager.d.ts +60 -1
- package/dist/flows/FlowManager.d.ts.map +1 -1
- package/dist/flows/FlowManager.js +467 -34
- package/dist/flows/FlowManager.js.map +1 -1
- package/dist/flows/extraction.d.ts +16 -1
- package/dist/flows/extraction.d.ts.map +1 -1
- package/dist/flows/extraction.js +34 -0
- package/dist/flows/extraction.js.map +1 -1
- package/dist/flows/index.d.ts +2 -0
- package/dist/flows/index.d.ts.map +1 -1
- package/dist/flows/index.js +1 -0
- package/dist/flows/index.js.map +1 -1
- package/dist/flows/validation.d.ts +1 -1
- package/dist/flows/validation.d.ts.map +1 -1
- package/dist/flows/validation.js +13 -1
- package/dist/flows/validation.js.map +1 -1
- package/dist/hooks/HookRunner.d.ts +3 -1
- package/dist/hooks/HookRunner.d.ts.map +1 -1
- package/dist/hooks/HookRunner.js +3 -0
- package/dist/hooks/HookRunner.js.map +1 -1
- package/dist/hooks/builtin/metrics.d.ts.map +1 -1
- package/dist/hooks/builtin/metrics.js +12 -0
- package/dist/hooks/builtin/metrics.js.map +1 -1
- package/dist/hooks/builtin/observability.d.ts +21 -0
- package/dist/hooks/builtin/observability.d.ts.map +1 -0
- package/dist/hooks/builtin/observability.js +535 -0
- package/dist/hooks/builtin/observability.js.map +1 -0
- package/dist/index.d.ts +11 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestration/DefaultOrchestrationAuthority.d.ts +91 -0
- package/dist/orchestration/DefaultOrchestrationAuthority.d.ts.map +1 -0
- package/dist/orchestration/DefaultOrchestrationAuthority.js +786 -0
- package/dist/orchestration/DefaultOrchestrationAuthority.js.map +1 -0
- package/dist/orchestration/OrchestrationAuthority.d.ts +119 -0
- package/dist/orchestration/OrchestrationAuthority.d.ts.map +1 -0
- package/dist/orchestration/OrchestrationAuthority.js +2 -0
- package/dist/orchestration/OrchestrationAuthority.js.map +1 -0
- package/dist/orchestration/RealtimeExtractionRunner.d.ts +25 -0
- package/dist/orchestration/RealtimeExtractionRunner.d.ts.map +1 -0
- package/dist/orchestration/RealtimeExtractionRunner.js +62 -0
- package/dist/orchestration/RealtimeExtractionRunner.js.map +1 -0
- package/dist/orchestration/index.d.ts +5 -0
- package/dist/orchestration/index.d.ts.map +1 -0
- package/dist/orchestration/index.js +4 -0
- package/dist/orchestration/index.js.map +1 -0
- package/dist/orchestration/types.d.ts +134 -0
- package/dist/orchestration/types.d.ts.map +1 -0
- package/dist/orchestration/types.js +2 -0
- package/dist/orchestration/types.js.map +1 -0
- package/dist/realtime/RealtimeAudioClient.d.ts +105 -0
- package/dist/realtime/RealtimeAudioClient.d.ts.map +1 -0
- package/dist/realtime/RealtimeAudioClient.js +15 -0
- package/dist/realtime/RealtimeAudioClient.js.map +1 -0
- package/dist/realtime/RealtimeRuntime.d.ts +136 -0
- package/dist/realtime/RealtimeRuntime.d.ts.map +1 -0
- package/dist/realtime/RealtimeRuntime.js +270 -0
- package/dist/realtime/RealtimeRuntime.js.map +1 -0
- package/dist/realtime/index.d.ts +4 -0
- package/dist/realtime/index.d.ts.map +1 -0
- package/dist/realtime/index.js +2 -0
- package/dist/realtime/index.js.map +1 -0
- package/dist/runtime/ExtractionEngine.d.ts +2 -1
- package/dist/runtime/ExtractionEngine.d.ts.map +1 -1
- package/dist/runtime/ExtractionEngine.js +11 -0
- package/dist/runtime/ExtractionEngine.js.map +1 -1
- package/dist/runtime/FlowExecutor.d.ts +7 -5
- package/dist/runtime/FlowExecutor.d.ts.map +1 -1
- package/dist/runtime/FlowExecutor.js +71 -12
- package/dist/runtime/FlowExecutor.js.map +1 -1
- package/dist/runtime/Runtime.d.ts +22 -0
- package/dist/runtime/Runtime.d.ts.map +1 -1
- package/dist/runtime/Runtime.js +47 -0
- package/dist/runtime/Runtime.js.map +1 -1
- package/dist/runtime/pipeline/AgentExecuteStage.d.ts.map +1 -1
- package/dist/runtime/pipeline/AgentExecuteStage.js +94 -25
- package/dist/runtime/pipeline/AgentExecuteStage.js.map +1 -1
- package/dist/runtime/pipeline/ContextAssembleStage.js +1 -1
- package/dist/types/index.d.ts +61 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +4 -0
- package/dist/types/index.js.map +1 -1
- package/dist/types/telemetry.d.ts +107 -0
- package/dist/types/telemetry.d.ts.map +1 -1
- package/package.json +15 -2
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import type { ZodTypeAny } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Backend-agnostic tool declaration. Produced by capabilities,
|
|
4
|
+
* consumed by adapters (Gemini, AI SDK, LiveKit).
|
|
5
|
+
*/
|
|
6
|
+
export interface ToolDeclaration {
|
|
7
|
+
name: string;
|
|
8
|
+
description: string;
|
|
9
|
+
parameters: ZodTypeAny;
|
|
10
|
+
execute: (args: any, options?: unknown) => Promise<unknown>;
|
|
11
|
+
}
|
|
12
|
+
export interface PromptSection {
|
|
13
|
+
/**
|
|
14
|
+
* Semantic role that determines ordering in the final prompt.
|
|
15
|
+
* Built-in roles: 'role' > 'task' > 'state' > 'context' > 'extraction' > 'routing' > 'policy'.
|
|
16
|
+
* Custom roles sort after built-in ones.
|
|
17
|
+
*/
|
|
18
|
+
role: string;
|
|
19
|
+
content: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Structured payload for the realtime model when an extraction submit
|
|
23
|
+
* advances collection but does not yet complete the node.
|
|
24
|
+
*/
|
|
25
|
+
export interface ExtractionToolResponseEnvelope {
|
|
26
|
+
accepted: true;
|
|
27
|
+
fieldsReceived: string[];
|
|
28
|
+
fieldsStillRequired: string[];
|
|
29
|
+
instruction: string;
|
|
30
|
+
}
|
|
31
|
+
/** Optional metadata when the flow transitions to a new node (reconfigure). */
|
|
32
|
+
export interface FlowReconfigureTransition {
|
|
33
|
+
from: string;
|
|
34
|
+
to: string;
|
|
35
|
+
/** Extraction submit tool that triggered transition to `to`, when applicable. */
|
|
36
|
+
triggerTool?: string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Returned by `Capability.processToolResult()` to tell the host what to do.
|
|
40
|
+
*/
|
|
41
|
+
export type CapabilityAction = {
|
|
42
|
+
type: 'continue';
|
|
43
|
+
toolResponseOverride?: ExtractionToolResponseEnvelope;
|
|
44
|
+
} | {
|
|
45
|
+
type: 'reconfigure';
|
|
46
|
+
transition?: FlowReconfigureTransition;
|
|
47
|
+
} | {
|
|
48
|
+
type: 'handoff';
|
|
49
|
+
targetAgent: string;
|
|
50
|
+
reason?: string;
|
|
51
|
+
} | {
|
|
52
|
+
type: 'end';
|
|
53
|
+
reason?: string;
|
|
54
|
+
} | {
|
|
55
|
+
type: 'extraction-complete';
|
|
56
|
+
data: Record<string, unknown>;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* A Capability is a self-contained unit of agent behavior that exposes
|
|
60
|
+
* its features as tools and prompt sections. Any LLM backend that supports
|
|
61
|
+
* tool calling can drive it.
|
|
62
|
+
*
|
|
63
|
+
* The Capability interface follows the Component pattern: each feature
|
|
64
|
+
* (flows, triage, extraction, handoffs, guardrails) is a capability that
|
|
65
|
+
* plugs into a CapabilityHost. The host collects tools, builds prompts,
|
|
66
|
+
* and routes tool results — the LLM backend is irrelevant.
|
|
67
|
+
*/
|
|
68
|
+
export interface Capability {
|
|
69
|
+
/** What tools does this capability expose for the current state? */
|
|
70
|
+
getTools(): ToolDeclaration[];
|
|
71
|
+
/** What does this capability contribute to the system prompt? */
|
|
72
|
+
getPromptSections(): PromptSection[];
|
|
73
|
+
/**
|
|
74
|
+
* A tool was called and executed. Does this capability handle the result?
|
|
75
|
+
*
|
|
76
|
+
* @param toolName - The name of the tool that was called
|
|
77
|
+
* @param args - The arguments passed to the tool
|
|
78
|
+
* @param result - The result returned by the tool's execute()
|
|
79
|
+
* @returns An action telling the host what to do, or null if this
|
|
80
|
+
* capability doesn't handle this tool.
|
|
81
|
+
*/
|
|
82
|
+
processToolResult(toolName: string, args: unknown, result: unknown): CapabilityAction | null;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Collects capabilities and exposes a unified interface for any LLM backend.
|
|
86
|
+
*
|
|
87
|
+
* Usage:
|
|
88
|
+
* ```typescript
|
|
89
|
+
* const host = new CapabilityHost()
|
|
90
|
+
* .use(new FlowCapability({ flow, initialNode: 'greeting' }))
|
|
91
|
+
* .use(new ExtractionCapability({ schema }))
|
|
92
|
+
* .use(new GuardrailCapability(policies));
|
|
93
|
+
*
|
|
94
|
+
* // For Gemini Live:
|
|
95
|
+
* const tools = toGeminiDeclarations(host.getAllTools());
|
|
96
|
+
* const prompt = host.getSystemPrompt(basePrompt);
|
|
97
|
+
*
|
|
98
|
+
* // For AI SDK:
|
|
99
|
+
* const tools = toAISDKTools(host.getAllTools());
|
|
100
|
+
* streamText({ system: prompt, tools });
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
103
|
+
export declare class CapabilityHost {
|
|
104
|
+
private capabilities;
|
|
105
|
+
private regularTools;
|
|
106
|
+
private _version;
|
|
107
|
+
private _lastCheckedVersion;
|
|
108
|
+
/** Add a capability (flow, triage, extraction, etc.). */
|
|
109
|
+
use(capability: Capability): this;
|
|
110
|
+
/** Add regular tools not managed by any capability. */
|
|
111
|
+
addTools(tools: ToolDeclaration[]): this;
|
|
112
|
+
/**
|
|
113
|
+
* Collect all tools from all capabilities + regular tools.
|
|
114
|
+
*
|
|
115
|
+
* Deduplicates by name — capability-provided tools take priority over
|
|
116
|
+
* regular (agent-level) tools. This prevents duplicate function
|
|
117
|
+
* declarations that providers like Gemini Live reject.
|
|
118
|
+
*/
|
|
119
|
+
getAllTools(): ToolDeclaration[];
|
|
120
|
+
/**
|
|
121
|
+
* Build complete system prompt from base prompt + all capability sections.
|
|
122
|
+
* Sections are ordered by role priority.
|
|
123
|
+
*/
|
|
124
|
+
getSystemPrompt(basePrompt?: string): string;
|
|
125
|
+
/**
|
|
126
|
+
* Route a tool result through capabilities. First capability to claim it wins.
|
|
127
|
+
* If no capability claims it, returns `{ type: 'continue' }` (regular tool).
|
|
128
|
+
*/
|
|
129
|
+
processToolResult(toolName: string, args: unknown, result: unknown): CapabilityAction;
|
|
130
|
+
/** Mark the current state as "seen" for reconfigure detection. */
|
|
131
|
+
markConfigured(): void;
|
|
132
|
+
/** Bump version to signal that tools or prompt have changed. */
|
|
133
|
+
notifyChanged(): void;
|
|
134
|
+
/** True if tools or prompt changed since last `markConfigured()` call. */
|
|
135
|
+
get needsReconfigure(): boolean;
|
|
136
|
+
/** Number of registered capabilities. */
|
|
137
|
+
get capabilityCount(): number;
|
|
138
|
+
}
|
|
139
|
+
export { FlowCapability } from './FlowCapability.js';
|
|
140
|
+
export type { FlowCapabilityConfig, FlowCapabilityState, FlowEvent } from './FlowCapability.js';
|
|
141
|
+
export { TriageCapability } from './TriageCapability.js';
|
|
142
|
+
export { ExtractionCapability } from './ExtractionCapability.js';
|
|
143
|
+
export type { ExtractionCapabilityConfig } from './ExtractionCapability.js';
|
|
144
|
+
export { HandoffCapability } from './HandoffCapability.js';
|
|
145
|
+
export type { HandoffTarget } from './HandoffCapability.js';
|
|
146
|
+
export { GuardrailCapability } from './GuardrailCapability.js';
|
|
147
|
+
export { AutoRetrieveCapability } from './AutoRetrieveCapability.js';
|
|
148
|
+
export type { AutoRetrieveCapabilityConfig, RetrieveProvider } from './AutoRetrieveCapability.js';
|
|
149
|
+
export { toGeminiDeclarations } from './adapters/gemini.js';
|
|
150
|
+
export type { GeminiFunctionDeclaration } from './adapters/gemini.js';
|
|
151
|
+
export { toAISDKTools } from './adapters/ai-sdk.js';
|
|
152
|
+
export { DefaultLivePromptAssembler, DEFAULT_VOICE_RULES, DEFAULT_GUARDRAILS } from './LivePromptAssembler.js';
|
|
153
|
+
export type { LivePromptAssembler, LivePromptContext, DefaultLivePromptAssemblerConfig, } from './LivePromptAssembler.js';
|
|
154
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/capabilities/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,KAAK,CAAC;AAItC;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;CAC7D;AAeD,MAAM,WAAW,aAAa;IAC5B;;;;OAIG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AAID;;;GAGG;AACH,MAAM,WAAW,8BAA8B;IAC7C,QAAQ,EAAE,IAAI,CAAC;IACf,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,mBAAmB,EAAE,MAAM,EAAE,CAAC;IAC9B,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,+EAA+E;AAC/E,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,iFAAiF;IACjF,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,GACxB;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,oBAAoB,CAAC,EAAE,8BAA8B,CAAA;CAAE,GAC3E;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,UAAU,CAAC,EAAE,yBAAyB,CAAA;CAAE,GAC/D;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAChC;IAAE,IAAI,EAAE,qBAAqB,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAC;AAInE;;;;;;;;;GASG;AACH,MAAM,WAAW,UAAU;IACzB,oEAAoE;IACpE,QAAQ,IAAI,eAAe,EAAE,CAAC;IAE9B,iEAAiE;IACjE,iBAAiB,IAAI,aAAa,EAAE,CAAC;IAErC;;;;;;;;OAQG;IACH,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,GAAG,gBAAgB,GAAG,IAAI,CAAC;CAC9F;AAID;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,YAAY,CAAyB;IAC7C,OAAO,CAAC,QAAQ,CAAK;IACrB,OAAO,CAAC,mBAAmB,CAAK;IAEhC,yDAAyD;IACzD,GAAG,CAAC,UAAU,EAAE,UAAU,GAAG,IAAI;IAMjC,uDAAuD;IACvD,QAAQ,CAAC,KAAK,EAAE,eAAe,EAAE,GAAG,IAAI;IAMxC;;;;;;OAMG;IACH,WAAW,IAAI,eAAe,EAAE;IAOhC;;;OAGG;IACH,eAAe,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM;IAK5C;;;OAGG;IACH,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,GAAG,gBAAgB;IAQrF,kEAAkE;IAClE,cAAc,IAAI,IAAI;IAItB,gEAAgE;IAChE,aAAa,IAAI,IAAI;IAIrB,0EAA0E;IAC1E,IAAI,gBAAgB,IAAI,OAAO,CAE9B;IAED,yCAAyC;IACzC,IAAI,eAAe,IAAI,MAAM,CAE5B;CACF;AA6BD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,YAAY,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChG,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,YAAY,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,YAAY,EAAE,aAAa,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAC/D,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AACrE,YAAY,EAAE,4BAA4B,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAClG,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,YAAY,EAAE,yBAAyB,EAAE,MAAM,sBAAsB,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,0BAA0B,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC/G,YAAY,EACV,mBAAmB,EACnB,iBAAiB,EACjB,gCAAgC,GACjC,MAAM,0BAA0B,CAAC"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// ─── Prompt Section ──────────────────────────────────────────────────────────
|
|
2
|
+
/** Ordering priority for prompt sections (lower = earlier in prompt). */
|
|
3
|
+
const SECTION_ORDER = {
|
|
4
|
+
role: 0,
|
|
5
|
+
task: 1,
|
|
6
|
+
state: 2,
|
|
7
|
+
context: 3,
|
|
8
|
+
extraction: 4,
|
|
9
|
+
routing: 5,
|
|
10
|
+
policy: 6,
|
|
11
|
+
};
|
|
12
|
+
// ─── CapabilityHost ──────────────────────────────────────────────────────────
|
|
13
|
+
/**
|
|
14
|
+
* Collects capabilities and exposes a unified interface for any LLM backend.
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const host = new CapabilityHost()
|
|
19
|
+
* .use(new FlowCapability({ flow, initialNode: 'greeting' }))
|
|
20
|
+
* .use(new ExtractionCapability({ schema }))
|
|
21
|
+
* .use(new GuardrailCapability(policies));
|
|
22
|
+
*
|
|
23
|
+
* // For Gemini Live:
|
|
24
|
+
* const tools = toGeminiDeclarations(host.getAllTools());
|
|
25
|
+
* const prompt = host.getSystemPrompt(basePrompt);
|
|
26
|
+
*
|
|
27
|
+
* // For AI SDK:
|
|
28
|
+
* const tools = toAISDKTools(host.getAllTools());
|
|
29
|
+
* streamText({ system: prompt, tools });
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export class CapabilityHost {
|
|
33
|
+
capabilities = [];
|
|
34
|
+
regularTools = [];
|
|
35
|
+
_version = 0;
|
|
36
|
+
_lastCheckedVersion = 0;
|
|
37
|
+
/** Add a capability (flow, triage, extraction, etc.). */
|
|
38
|
+
use(capability) {
|
|
39
|
+
this.capabilities.push(capability);
|
|
40
|
+
this._version++;
|
|
41
|
+
return this;
|
|
42
|
+
}
|
|
43
|
+
/** Add regular tools not managed by any capability. */
|
|
44
|
+
addTools(tools) {
|
|
45
|
+
this.regularTools.push(...tools);
|
|
46
|
+
this._version++;
|
|
47
|
+
return this;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Collect all tools from all capabilities + regular tools.
|
|
51
|
+
*
|
|
52
|
+
* Deduplicates by name — capability-provided tools take priority over
|
|
53
|
+
* regular (agent-level) tools. This prevents duplicate function
|
|
54
|
+
* declarations that providers like Gemini Live reject.
|
|
55
|
+
*/
|
|
56
|
+
getAllTools() {
|
|
57
|
+
const capabilityTools = this.capabilities.flatMap(c => c.getTools());
|
|
58
|
+
const seen = new Set(capabilityTools.map(t => t.name));
|
|
59
|
+
const dedupedRegular = this.regularTools.filter(t => !seen.has(t.name));
|
|
60
|
+
return [...capabilityTools, ...dedupedRegular];
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Build complete system prompt from base prompt + all capability sections.
|
|
64
|
+
* Sections are ordered by role priority.
|
|
65
|
+
*/
|
|
66
|
+
getSystemPrompt(basePrompt) {
|
|
67
|
+
const sections = this.capabilities.flatMap(c => c.getPromptSections());
|
|
68
|
+
return assemblePromptSections(basePrompt, sections);
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Route a tool result through capabilities. First capability to claim it wins.
|
|
72
|
+
* If no capability claims it, returns `{ type: 'continue' }` (regular tool).
|
|
73
|
+
*/
|
|
74
|
+
processToolResult(toolName, args, result) {
|
|
75
|
+
for (const cap of this.capabilities) {
|
|
76
|
+
const action = cap.processToolResult(toolName, args, result);
|
|
77
|
+
if (action)
|
|
78
|
+
return action;
|
|
79
|
+
}
|
|
80
|
+
return { type: 'continue' };
|
|
81
|
+
}
|
|
82
|
+
/** Mark the current state as "seen" for reconfigure detection. */
|
|
83
|
+
markConfigured() {
|
|
84
|
+
this._lastCheckedVersion = this._version;
|
|
85
|
+
}
|
|
86
|
+
/** Bump version to signal that tools or prompt have changed. */
|
|
87
|
+
notifyChanged() {
|
|
88
|
+
this._version++;
|
|
89
|
+
}
|
|
90
|
+
/** True if tools or prompt changed since last `markConfigured()` call. */
|
|
91
|
+
get needsReconfigure() {
|
|
92
|
+
return this._version !== this._lastCheckedVersion;
|
|
93
|
+
}
|
|
94
|
+
/** Number of registered capabilities. */
|
|
95
|
+
get capabilityCount() {
|
|
96
|
+
return this.capabilities.length;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
100
|
+
function assemblePromptSections(basePrompt, sections) {
|
|
101
|
+
const parts = [];
|
|
102
|
+
if (basePrompt) {
|
|
103
|
+
parts.push(basePrompt);
|
|
104
|
+
}
|
|
105
|
+
// Sort sections by role priority
|
|
106
|
+
const sorted = [...sections].sort((a, b) => {
|
|
107
|
+
const orderA = SECTION_ORDER[a.role] ?? 100;
|
|
108
|
+
const orderB = SECTION_ORDER[b.role] ?? 100;
|
|
109
|
+
return orderA - orderB;
|
|
110
|
+
});
|
|
111
|
+
for (const section of sorted) {
|
|
112
|
+
if (section.content.trim()) {
|
|
113
|
+
parts.push(section.content);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return parts.join('\n\n');
|
|
117
|
+
}
|
|
118
|
+
// ─── Re-exports ──────────────────────────────────────────────────────────────
|
|
119
|
+
export { FlowCapability } from './FlowCapability.js';
|
|
120
|
+
export { TriageCapability } from './TriageCapability.js';
|
|
121
|
+
export { ExtractionCapability } from './ExtractionCapability.js';
|
|
122
|
+
export { HandoffCapability } from './HandoffCapability.js';
|
|
123
|
+
export { GuardrailCapability } from './GuardrailCapability.js';
|
|
124
|
+
export { AutoRetrieveCapability } from './AutoRetrieveCapability.js';
|
|
125
|
+
export { toGeminiDeclarations } from './adapters/gemini.js';
|
|
126
|
+
export { toAISDKTools } from './adapters/ai-sdk.js';
|
|
127
|
+
export { DefaultLivePromptAssembler, DEFAULT_VOICE_RULES, DEFAULT_GUARDRAILS } from './LivePromptAssembler.js';
|
|
128
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/capabilities/index.ts"],"names":[],"mappings":"AAeA,gFAAgF;AAEhF,yEAAyE;AACzE,MAAM,aAAa,GAA2B;IAC5C,IAAI,EAAE,CAAC;IACP,IAAI,EAAE,CAAC;IACP,KAAK,EAAE,CAAC;IACR,OAAO,EAAE,CAAC;IACV,UAAU,EAAE,CAAC;IACb,OAAO,EAAE,CAAC;IACV,MAAM,EAAE,CAAC;CACV,CAAC;AA0EF,gFAAgF;AAEhF;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,OAAO,cAAc;IACjB,YAAY,GAAiB,EAAE,CAAC;IAChC,YAAY,GAAsB,EAAE,CAAC;IACrC,QAAQ,GAAG,CAAC,CAAC;IACb,mBAAmB,GAAG,CAAC,CAAC;IAEhC,yDAAyD;IACzD,GAAG,CAAC,UAAsB;QACxB,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uDAAuD;IACvD,QAAQ,CAAC,KAAwB;QAC/B,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACjC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;;;;OAMG;IACH,WAAW;QACT,MAAM,eAAe,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;QACrE,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACvD,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACxE,OAAO,CAAC,GAAG,eAAe,EAAE,GAAG,cAAc,CAAC,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,eAAe,CAAC,UAAmB;QACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC;QACvE,OAAO,sBAAsB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IACtD,CAAC;IAED;;;OAGG;IACH,iBAAiB,CAAC,QAAgB,EAAE,IAAa,EAAE,MAAe;QAChE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,GAAG,CAAC,iBAAiB,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;YAC7D,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;QAC5B,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IAC9B,CAAC;IAED,kEAAkE;IAClE,cAAc;QACZ,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC;IAC3C,CAAC;IAED,gEAAgE;IAChE,aAAa;QACX,IAAI,CAAC,QAAQ,EAAE,CAAC;IAClB,CAAC;IAED,0EAA0E;IAC1E,IAAI,gBAAgB;QAClB,OAAO,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,mBAAmB,CAAC;IACpD,CAAC;IAED,yCAAyC;IACzC,IAAI,eAAe;QACjB,OAAO,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;IAClC,CAAC;CACF;AAED,gFAAgF;AAEhF,SAAS,sBAAsB,CAAC,UAA8B,EAAE,QAAyB;IACvF,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,UAAU,EAAE,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACzB,CAAC;IAED,iCAAiC;IACjC,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QAC5C,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QAC5C,OAAO,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,KAAK,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,gFAAgF;AAEhF,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAErD,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AAEjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAC/D,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAE5D,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,0BAA0B,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { LanguageModel } from 'ai';
|
|
2
|
+
import type { HarnessHooks } from '../types/index.js';
|
|
3
|
+
import type { EvalScenario, ScenarioScore } from './types.js';
|
|
4
|
+
export declare class EvalRunner {
|
|
5
|
+
private config;
|
|
6
|
+
constructor(config: {
|
|
7
|
+
model: LanguageModel;
|
|
8
|
+
hooks?: HarnessHooks;
|
|
9
|
+
});
|
|
10
|
+
runText(scenario: EvalScenario): Promise<ScenarioScore>;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=EvalRunner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvalRunner.d.ts","sourceRoot":"","sources":["../../src/eval/EvalRunner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAGxC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAa,MAAM,YAAY,CAAC;AAGzE,qBAAa,UAAU;IACT,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,KAAK,EAAE,aAAa,CAAC;QAAC,KAAK,CAAC,EAAE,YAAY,CAAA;KAAE;IAEpE,OAAO,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC;CAoE9D"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { Runtime } from '../runtime/Runtime.js';
|
|
2
|
+
import { getFlowState } from '../runtime/pipeline/sessionUtils.js';
|
|
3
|
+
import { aggregateScores, scoreTurn } from './scoring.js';
|
|
4
|
+
export class EvalRunner {
|
|
5
|
+
config;
|
|
6
|
+
constructor(config) {
|
|
7
|
+
this.config = config;
|
|
8
|
+
}
|
|
9
|
+
async runText(scenario) {
|
|
10
|
+
const agent = scenario.agent;
|
|
11
|
+
const runtime = new Runtime({
|
|
12
|
+
agents: [agent],
|
|
13
|
+
defaultAgentId: agent.id,
|
|
14
|
+
defaultModel: this.config.model,
|
|
15
|
+
hooks: this.config.hooks,
|
|
16
|
+
});
|
|
17
|
+
let sessionId;
|
|
18
|
+
const turnScores = [];
|
|
19
|
+
for (let i = 0; i < scenario.turns.length; i++) {
|
|
20
|
+
const turn = scenario.turns[i];
|
|
21
|
+
const startTime = Date.now();
|
|
22
|
+
let response = '';
|
|
23
|
+
const toolsCalled = [];
|
|
24
|
+
const transitions = [];
|
|
25
|
+
for await (const part of runtime.stream({ input: turn.input, sessionId })) {
|
|
26
|
+
if (part.type === 'text-delta') {
|
|
27
|
+
response += part.text;
|
|
28
|
+
}
|
|
29
|
+
if (part.type === 'tool-call') {
|
|
30
|
+
toolsCalled.push(part.toolName);
|
|
31
|
+
}
|
|
32
|
+
if (part.type === 'flow-transition') {
|
|
33
|
+
transitions.push({ from: part.from, to: part.to });
|
|
34
|
+
}
|
|
35
|
+
if (part.type === 'done') {
|
|
36
|
+
sessionId = part.sessionId;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const latencyMs = Date.now() - startTime;
|
|
40
|
+
let extractionSnapshot;
|
|
41
|
+
if (sessionId && turn.expect?.extractionFields) {
|
|
42
|
+
const session = await runtime.getSession(sessionId);
|
|
43
|
+
const flow = session ? getFlowState(session, agent.id) : undefined;
|
|
44
|
+
const data = flow?.context?.collectedData;
|
|
45
|
+
if (data && typeof data === 'object') {
|
|
46
|
+
extractionSnapshot = { ...data };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const checks = scoreTurn(turn.expect, response, toolsCalled, transitions, latencyMs, extractionSnapshot);
|
|
50
|
+
turnScores.push({
|
|
51
|
+
turnIndex: i,
|
|
52
|
+
input: turn.input,
|
|
53
|
+
response: response.trim(),
|
|
54
|
+
passed: checks.length === 0 ? true : checks.every(c => c.passed),
|
|
55
|
+
checks,
|
|
56
|
+
latencyMs,
|
|
57
|
+
toolsCalled,
|
|
58
|
+
flowTransitions: transitions,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
return aggregateScores(scenario.name, scenario.mode, turnScores);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=EvalRunner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvalRunner.js","sourceRoot":"","sources":["../../src/eval/EvalRunner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,qCAAqC,CAAC;AAGnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,OAAO,UAAU;IACD;IAApB,YAAoB,MAAsD;QAAtD,WAAM,GAAN,MAAM,CAAgD;IAAG,CAAC;IAE9E,KAAK,CAAC,OAAO,CAAC,QAAsB;QAClC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,OAAO,CAAC;YAC1B,MAAM,EAAE,CAAC,KAAK,CAAC;YACf,cAAc,EAAE,KAAK,CAAC,EAAE;YACxB,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YAC/B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;SACzB,CAAC,CAAC;QAEH,IAAI,SAA6B,CAAC;QAClC,MAAM,UAAU,GAAgB,EAAE,CAAC;QAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC;YAChC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,WAAW,GAAa,EAAE,CAAC;YACjC,MAAM,WAAW,GAAwC,EAAE,CAAC;YAE5D,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;gBAC1E,IAAI,IAAI,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBAC/B,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC;gBACxB,CAAC;gBACD,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;oBAC9B,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,CAAC;gBACD,IAAI,IAAI,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;oBACpC,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;gBACrD,CAAC;gBACD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBACzB,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;gBAC7B,CAAC;YACH,CAAC;YAED,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACzC,IAAI,kBAAuD,CAAC;YAC5D,IAAI,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,CAAC;gBAC/C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;gBACpD,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBACnE,MAAM,IAAI,GAAG,IAAI,EAAE,OAAO,EAAE,aAAa,CAAC;gBAC1C,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBACrC,kBAAkB,GAAG,EAAE,GAAG,IAAI,EAA6B,CAAC;gBAC9D,CAAC;YACH,CAAC;YAED,MAAM,MAAM,GAAG,SAAS,CACtB,IAAI,CAAC,MAAM,EACX,QAAQ,EACR,WAAW,EACX,WAAW,EACX,SAAS,EACT,kBAAkB,CACnB,CAAC;YAEF,UAAU,CAAC,IAAI,CAAC;gBACd,SAAS,EAAE,CAAC;gBACZ,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE;gBACzB,MAAM,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;gBAChE,MAAM;gBACN,SAAS;gBACT,WAAW;gBACX,eAAe,EAAE,WAAW;aAC7B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IACnE,CAAC;CACF"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { EvalTurn, ScenarioScore, TurnScore } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Score a single turn against expectations.
|
|
4
|
+
* @param extractionSnapshot — flow `context.collectedData` after the turn (optional).
|
|
5
|
+
*/
|
|
6
|
+
export declare function scoreTurn(expect: EvalTurn['expect'], response: string, toolsCalled: string[], transitions: Array<{
|
|
7
|
+
from: string;
|
|
8
|
+
to: string;
|
|
9
|
+
}>, latencyMs: number, extractionSnapshot?: Record<string, unknown>): Array<{
|
|
10
|
+
name: string;
|
|
11
|
+
passed: boolean;
|
|
12
|
+
detail: string;
|
|
13
|
+
}>;
|
|
14
|
+
export declare function aggregateScores(scenarioName: string, mode: 'text' | 'voice', turnScores: TurnScore[]): ScenarioScore;
|
|
15
|
+
//# sourceMappingURL=scoring.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring.d.ts","sourceRoot":"","sources":["../../src/eval/scoring.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAUrE;;;GAGG;AACH,wBAAgB,SAAS,CACvB,MAAM,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAC1B,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,MAAM,EAAE,EACrB,WAAW,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC,EAChD,SAAS,EAAE,MAAM,EACjB,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC3C,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAoG1D;AAsBD,wBAAgB,eAAe,CAC7B,YAAY,EAAE,MAAM,EACpB,IAAI,EAAE,MAAM,GAAG,OAAO,EACtB,UAAU,EAAE,SAAS,EAAE,GACtB,aAAa,CA8Bf"}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
function valuesEqual(a, b) {
|
|
2
|
+
if (a === b)
|
|
3
|
+
return true;
|
|
4
|
+
if (typeof a === 'object' && a !== null && typeof b === 'object' && b !== null) {
|
|
5
|
+
return JSON.stringify(a) === JSON.stringify(b);
|
|
6
|
+
}
|
|
7
|
+
return false;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Score a single turn against expectations.
|
|
11
|
+
* @param extractionSnapshot — flow `context.collectedData` after the turn (optional).
|
|
12
|
+
*/
|
|
13
|
+
export function scoreTurn(expect, response, toolsCalled, transitions, latencyMs, extractionSnapshot) {
|
|
14
|
+
const checks = [];
|
|
15
|
+
if (!expect) {
|
|
16
|
+
return checks;
|
|
17
|
+
}
|
|
18
|
+
const toolSet = new Set(toolsCalled);
|
|
19
|
+
if (expect.toolCalls) {
|
|
20
|
+
for (const tool of expect.toolCalls) {
|
|
21
|
+
const passed = toolSet.has(tool);
|
|
22
|
+
checks.push({
|
|
23
|
+
name: `tool:${tool}`,
|
|
24
|
+
passed,
|
|
25
|
+
detail: passed ? `Observed tool call "${tool}"` : `Missing tool call "${tool}"; got: ${[...toolSet].join(', ') || '(none)'}`,
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (expect.noToolCalls) {
|
|
30
|
+
for (const tool of expect.noToolCalls) {
|
|
31
|
+
const passed = !toolSet.has(tool);
|
|
32
|
+
checks.push({
|
|
33
|
+
name: `no-tool:${tool}`,
|
|
34
|
+
passed,
|
|
35
|
+
detail: passed
|
|
36
|
+
? `Tool "${tool}" was not called`
|
|
37
|
+
: `Tool "${tool}" was called but should not have been`,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (expect.flowTransition) {
|
|
42
|
+
const { from, to } = expect.flowTransition;
|
|
43
|
+
const passed = transitions.some(t => t.from === from && t.to === to);
|
|
44
|
+
checks.push({
|
|
45
|
+
name: 'transition',
|
|
46
|
+
passed,
|
|
47
|
+
detail: passed
|
|
48
|
+
? `Saw transition ${from} → ${to}`
|
|
49
|
+
: `Expected transition ${from} → ${to}; saw: ${transitions.length ? transitions.map(t => `${t.from}→${t.to}`).join('; ') : '(none)'}`,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
if (expect.extractionFields && extractionSnapshot) {
|
|
53
|
+
for (const [key, expectedVal] of Object.entries(expect.extractionFields)) {
|
|
54
|
+
const actual = extractionSnapshot[key];
|
|
55
|
+
const passed = valuesEqual(actual, expectedVal);
|
|
56
|
+
checks.push({
|
|
57
|
+
name: `extraction:${key}`,
|
|
58
|
+
passed,
|
|
59
|
+
detail: passed
|
|
60
|
+
? `Field "${key}" matches expected`
|
|
61
|
+
: `Field "${key}": expected ${JSON.stringify(expectedVal)}, got ${JSON.stringify(actual)}`,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
else if (expect.extractionFields && !extractionSnapshot) {
|
|
66
|
+
checks.push({
|
|
67
|
+
name: 'extraction',
|
|
68
|
+
passed: false,
|
|
69
|
+
detail: 'Expected extractionFields checks but no flow extraction snapshot was available',
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
if (expect.responseContains) {
|
|
73
|
+
for (const phrase of expect.responseContains) {
|
|
74
|
+
const passed = response.includes(phrase);
|
|
75
|
+
checks.push({
|
|
76
|
+
name: `contains:${phrase}`,
|
|
77
|
+
passed,
|
|
78
|
+
detail: passed ? `Response contains "${phrase}"` : `Response missing substring "${phrase}"`,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (expect.responseNotContains) {
|
|
83
|
+
for (const phrase of expect.responseNotContains) {
|
|
84
|
+
const passed = !response.includes(phrase);
|
|
85
|
+
checks.push({
|
|
86
|
+
name: `notContains:${phrase}`,
|
|
87
|
+
passed,
|
|
88
|
+
detail: passed ? `Response does not contain "${phrase}"` : `Response should not contain "${phrase}"`,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (expect.maxLatencyMs !== undefined) {
|
|
93
|
+
const passed = latencyMs <= expect.maxLatencyMs;
|
|
94
|
+
checks.push({
|
|
95
|
+
name: 'latency',
|
|
96
|
+
passed,
|
|
97
|
+
detail: passed
|
|
98
|
+
? `Latency ${latencyMs}ms within ${expect.maxLatencyMs}ms`
|
|
99
|
+
: `Latency ${latencyMs}ms exceeds max ${expect.maxLatencyMs}ms`,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
return checks;
|
|
103
|
+
}
|
|
104
|
+
function accuracyForPrefix(turns, prefixes) {
|
|
105
|
+
let total = 0;
|
|
106
|
+
let passed = 0;
|
|
107
|
+
for (const t of turns) {
|
|
108
|
+
for (const c of t.checks) {
|
|
109
|
+
if (prefixes.some(p => c.name.startsWith(p))) {
|
|
110
|
+
total++;
|
|
111
|
+
if (c.passed)
|
|
112
|
+
passed++;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return total === 0 ? 1 : passed / total;
|
|
117
|
+
}
|
|
118
|
+
function percentile(sorted, p) {
|
|
119
|
+
if (sorted.length === 0)
|
|
120
|
+
return 0;
|
|
121
|
+
const pos = (p / 100) * (sorted.length - 1);
|
|
122
|
+
return sorted[Math.round(pos)];
|
|
123
|
+
}
|
|
124
|
+
export function aggregateScores(scenarioName, mode, turnScores) {
|
|
125
|
+
const totalTurns = turnScores.length;
|
|
126
|
+
const passedTurns = turnScores.filter(t => t.passed).length;
|
|
127
|
+
const failedTurns = totalTurns - passedTurns;
|
|
128
|
+
const passRate = totalTurns === 0 ? 1 : passedTurns / totalTurns;
|
|
129
|
+
const latencies = turnScores.map(t => t.latencyMs).sort((a, b) => a - b);
|
|
130
|
+
const sum = latencies.reduce((a, b) => a + b, 0);
|
|
131
|
+
const avgLatencyMs = totalTurns === 0 ? 0 : sum / totalTurns;
|
|
132
|
+
const toolCallAccuracy = accuracyForPrefix(turnScores, ['tool:', 'no-tool:']);
|
|
133
|
+
const extractionAccuracy = accuracyForPrefix(turnScores, ['extraction:']);
|
|
134
|
+
return {
|
|
135
|
+
scenario: scenarioName,
|
|
136
|
+
mode,
|
|
137
|
+
passed: failedTurns === 0,
|
|
138
|
+
turnScores,
|
|
139
|
+
aggregate: {
|
|
140
|
+
passRate,
|
|
141
|
+
totalTurns,
|
|
142
|
+
passedTurns,
|
|
143
|
+
failedTurns,
|
|
144
|
+
avgLatencyMs,
|
|
145
|
+
p50LatencyMs: percentile(latencies, 50),
|
|
146
|
+
p95LatencyMs: percentile(latencies, 95),
|
|
147
|
+
toolCallAccuracy,
|
|
148
|
+
extractionAccuracy,
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
//# sourceMappingURL=scoring.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring.js","sourceRoot":"","sources":["../../src/eval/scoring.ts"],"names":[],"mappings":"AAEA,SAAS,WAAW,CAAC,CAAU,EAAE,CAAU;IACzC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACzB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/E,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CACvB,MAA0B,EAC1B,QAAgB,EAChB,WAAqB,EACrB,WAAgD,EAChD,SAAiB,EACjB,kBAA4C;IAE5C,MAAM,MAAM,GAA6D,EAAE,CAAC;IAC5E,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;IAErC,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ,IAAI,EAAE;gBACpB,MAAM;gBACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,uBAAuB,IAAI,GAAG,CAAC,CAAC,CAAC,sBAAsB,IAAI,WAAW,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,QAAQ,EAAE;aAC7H,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,WAAW,IAAI,EAAE;gBACvB,MAAM;gBACN,MAAM,EAAE,MAAM;oBACZ,CAAC,CAAC,SAAS,IAAI,kBAAkB;oBACjC,CAAC,CAAC,SAAS,IAAI,uCAAuC;aACzD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;QAC1B,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,GAAG,MAAM,CAAC,cAAc,CAAC;QAC3C,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,YAAY;YAClB,MAAM;YACN,MAAM,EAAE,MAAM;gBACZ,CAAC,CAAC,kBAAkB,IAAI,MAAM,EAAE,EAAE;gBAClC,CAAC,CAAC,uBAAuB,IAAI,MAAM,EAAE,UACnC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAC9E,EAAE;SACL,CAAC,CAAC;IACL,CAAC;IAED,IAAI,MAAM,CAAC,gBAAgB,IAAI,kBAAkB,EAAE,CAAC;QAClD,KAAK,MAAM,CAAC,GAAG,EAAE,WAAW,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACzE,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,cAAc,GAAG,EAAE;gBACzB,MAAM;gBACN,MAAM,EAAE,MAAM;oBACZ,CAAC,CAAC,UAAU,GAAG,oBAAoB;oBACnC,CAAC,CAAC,UAAU,GAAG,eAAe,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE;aAC7F,CAAC,CAAC;QACL,CAAC;IACH,CAAC;SAAM,IAAI,MAAM,CAAC,gBAAgB,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1D,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,gFAAgF;SACzF,CAAC,CAAC;IACL,CAAC;IAED,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;QAC5B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,YAAY,MAAM,EAAE;gBAC1B,MAAM;gBACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,sBAAsB,MAAM,GAAG,CAAC,CAAC,CAAC,+BAA+B,MAAM,GAAG;aAC5F,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;QAC/B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;YAChD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,eAAe,MAAM,EAAE;gBAC7B,MAAM;gBACN,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,8BAA8B,MAAM,GAAG,CAAC,CAAC,CAAC,gCAAgC,MAAM,GAAG;aACrG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,SAAS,IAAI,MAAM,CAAC,YAAY,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,SAAS;YACf,MAAM;YACN,MAAM,EAAE,MAAM;gBACZ,CAAC,CAAC,WAAW,SAAS,aAAa,MAAM,CAAC,YAAY,IAAI;gBAC1D,CAAC,CAAC,WAAW,SAAS,kBAAkB,MAAM,CAAC,YAAY,IAAI;SAClE,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAkB,EAAE,QAAkB;IAC/D,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;YACzB,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,CAAC,MAAM;oBAAE,MAAM,EAAE,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC;AAC1C,CAAC;AAED,SAAS,UAAU,CAAC,MAAgB,EAAE,CAAS;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,MAAM,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAE,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,YAAoB,EACpB,IAAsB,EACtB,UAAuB;IAEvB,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC;IACrC,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC5D,MAAM,WAAW,GAAG,UAAU,GAAG,WAAW,CAAC;IAC7C,MAAM,QAAQ,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC;IAEjE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACzE,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,YAAY,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,UAAU,CAAC;IAE7D,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,UAAU,EAAE,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;IAC9E,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,UAAU,EAAE,CAAC,aAAa,CAAC,CAAC,CAAC;IAE1E,OAAO;QACL,QAAQ,EAAE,YAAY;QACtB,IAAI;QACJ,MAAM,EAAE,WAAW,KAAK,CAAC;QACzB,UAAU;QACV,SAAS,EAAE;YACT,QAAQ;YACR,UAAU;YACV,WAAW;YACX,WAAW;YACX,YAAY;YACZ,YAAY,EAAE,UAAU,CAAC,SAAS,EAAE,EAAE,CAAC;YACvC,YAAY,EAAE,UAAU,CAAC,SAAS,EAAE,EAAE,CAAC;YACvC,gBAAgB;YAChB,kBAAkB;SACnB;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type { AgentConfig, FlowAgentConfig } from '../types/index.js';
|
|
2
|
+
export interface EvalScenario {
|
|
3
|
+
name: string;
|
|
4
|
+
description?: string;
|
|
5
|
+
agent: AgentConfig | FlowAgentConfig;
|
|
6
|
+
turns: EvalTurn[];
|
|
7
|
+
mode: 'text' | 'voice';
|
|
8
|
+
}
|
|
9
|
+
export interface EvalTurn {
|
|
10
|
+
input: string;
|
|
11
|
+
audioFixture?: string;
|
|
12
|
+
expect?: {
|
|
13
|
+
toolCalls?: string[];
|
|
14
|
+
noToolCalls?: string[];
|
|
15
|
+
flowTransition?: {
|
|
16
|
+
from: string;
|
|
17
|
+
to: string;
|
|
18
|
+
};
|
|
19
|
+
extractionFields?: Record<string, unknown>;
|
|
20
|
+
responseContains?: string[];
|
|
21
|
+
responseNotContains?: string[];
|
|
22
|
+
maxLatencyMs?: number;
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export interface ScenarioScore {
|
|
26
|
+
scenario: string;
|
|
27
|
+
mode: 'text' | 'voice';
|
|
28
|
+
passed: boolean;
|
|
29
|
+
turnScores: TurnScore[];
|
|
30
|
+
aggregate: {
|
|
31
|
+
passRate: number;
|
|
32
|
+
totalTurns: number;
|
|
33
|
+
passedTurns: number;
|
|
34
|
+
failedTurns: number;
|
|
35
|
+
avgLatencyMs: number;
|
|
36
|
+
p50LatencyMs: number;
|
|
37
|
+
p95LatencyMs: number;
|
|
38
|
+
toolCallAccuracy: number;
|
|
39
|
+
extractionAccuracy: number;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
export interface TurnScore {
|
|
43
|
+
turnIndex: number;
|
|
44
|
+
input: string;
|
|
45
|
+
response: string;
|
|
46
|
+
passed: boolean;
|
|
47
|
+
checks: Array<{
|
|
48
|
+
name: string;
|
|
49
|
+
passed: boolean;
|
|
50
|
+
detail: string;
|
|
51
|
+
}>;
|
|
52
|
+
latencyMs: number;
|
|
53
|
+
toolsCalled: string[];
|
|
54
|
+
flowTransitions: Array<{
|
|
55
|
+
from: string;
|
|
56
|
+
to: string;
|
|
57
|
+
}>;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEtE,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,WAAW,GAAG,eAAe,CAAC;IACrC,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE;QACP,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;QACvB,cAAc,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,EAAE,EAAE,MAAM,CAAA;SAAE,CAAC;QAC9C,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;QAC5B,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;QAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;CACH;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,SAAS,EAAE;QACT,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,kBAAkB,EAAE,MAAM,CAAC;KAC5B,CAAC;CACH;AAED,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":""}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ContextStrategy, FlowConfig, FlowNodeConfig, FlowTransitionConfig } from '../types/index.js';
|
|
1
|
+
import type { ContextStrategy, ExtractionNodeConfig, FlowConfig, FlowNodeConfig, FlowTransitionConfig } from '../types/index.js';
|
|
2
2
|
import { FlowNode } from './FlowNode.js';
|
|
3
3
|
export declare class FlowGraph {
|
|
4
4
|
private nodes;
|
|
@@ -15,6 +15,8 @@ export declare class FlowGraph {
|
|
|
15
15
|
export declare class FlowGraphBuilder {
|
|
16
16
|
private graph;
|
|
17
17
|
node(id: string, config: Omit<FlowNodeConfig, 'id'>): this;
|
|
18
|
+
/** Add an extraction node that loops until a Zod schema is fully satisfied. */
|
|
19
|
+
extractionNode(id: string, config: Omit<ExtractionNodeConfig, 'id' | 'nodeType'>): this;
|
|
18
20
|
transition(from: string, to: string, config?: Omit<FlowTransitionConfig, 'from' | 'to'>): this;
|
|
19
21
|
defaultRolePrompt(prompt: string): this;
|
|
20
22
|
contextStrategy(strategy: ContextStrategy): this;
|