@botpress/adk 1.15.4 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-init/agent-project-generator.d.ts +1 -1
- package/dist/agent-init/agent-project-generator.d.ts.map +1 -1
- package/dist/agent-project/config-writer.d.ts +10 -0
- package/dist/agent-project/config-writer.d.ts.map +1 -1
- package/dist/agent-project/index.d.ts +1 -0
- package/dist/agent-project/index.d.ts.map +1 -1
- package/dist/config/coerce-config-value.d.ts +18 -0
- package/dist/config/coerce-config-value.d.ts.map +1 -0
- package/dist/config/index.d.ts +1 -0
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/manager.d.ts +25 -0
- package/dist/config/manager.d.ts.map +1 -1
- package/dist/eval/client.d.ts +25 -0
- package/dist/eval/client.d.ts.map +1 -0
- package/dist/eval/graders/index.d.ts +12 -0
- package/dist/eval/graders/index.d.ts.map +1 -0
- package/dist/eval/graders/llm.d.ts +18 -0
- package/dist/eval/graders/llm.d.ts.map +1 -0
- package/dist/eval/graders/match.d.ts +11 -0
- package/dist/eval/graders/match.d.ts.map +1 -0
- package/dist/eval/graders/outcome.d.ts +16 -0
- package/dist/eval/graders/outcome.d.ts.map +1 -0
- package/dist/eval/graders/response.d.ts +9 -0
- package/dist/eval/graders/response.d.ts.map +1 -0
- package/dist/eval/graders/state.d.ts +16 -0
- package/dist/eval/graders/state.d.ts.map +1 -0
- package/dist/eval/graders/tables.d.ts +8 -0
- package/dist/eval/graders/tables.d.ts.map +1 -0
- package/dist/eval/graders/tools.d.ts +7 -0
- package/dist/eval/graders/tools.d.ts.map +1 -0
- package/dist/eval/graders/workflow.d.ts +7 -0
- package/dist/eval/graders/workflow.d.ts.map +1 -0
- package/dist/eval/index.d.ts +10 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/loader.d.ts +10 -0
- package/dist/eval/loader.d.ts.map +1 -0
- package/dist/eval/runner.d.ts +15 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/store.d.ts +10 -0
- package/dist/eval/store.d.ts.map +1 -0
- package/dist/eval/traces.d.ts +19 -0
- package/dist/eval/traces.d.ts.map +1 -0
- package/dist/eval/types.d.ts +202 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/index.d.ts +7 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1569 -1616
- package/dist/index.js.map +23 -8
- package/package.json +3 -2
|
@@ -13,7 +13,7 @@ export declare class AgentProjectGenerator {
|
|
|
13
13
|
private createAgentJson;
|
|
14
14
|
private createGitIgnore;
|
|
15
15
|
private createReadme;
|
|
16
|
-
private
|
|
16
|
+
private createAIAssistantInstructions;
|
|
17
17
|
private createSourceStructure;
|
|
18
18
|
private writeFile;
|
|
19
19
|
private writeFormattedFile;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-project-generator.d.ts","sourceRoot":"","sources":["../../src/agent-init/agent-project-generator.ts"],"names":[],"mappings":"AAUA,qBAAa,qBAAqB;IAChC,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,cAAc,CAAQ;IAC9B,OAAO,CAAC,QAAQ,CAAQ;gBAEZ,WAAW,EAAE,MAAM,EAAE,cAAc,GAAE,MAAc,EAAE,QAAQ,GAAE,MAAgB;IAOrF,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAY/B,OAAO,CAAC,oBAAoB;IAa5B,OAAO,CAAC,iBAAiB;IA4BzB,OAAO,CAAC,sBAAsB;YAsBhB,iBAAiB;IAmC/B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,eAAe;IAKvB,OAAO,CAAC,eAAe;YAqCT,YAAY;IA+C1B,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"agent-project-generator.d.ts","sourceRoot":"","sources":["../../src/agent-init/agent-project-generator.ts"],"names":[],"mappings":"AAUA,qBAAa,qBAAqB;IAChC,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,WAAW,CAAQ;IAC3B,OAAO,CAAC,cAAc,CAAQ;IAC9B,OAAO,CAAC,QAAQ,CAAQ;gBAEZ,WAAW,EAAE,MAAM,EAAE,cAAc,GAAE,MAAc,EAAE,QAAQ,GAAE,MAAgB;IAOrF,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAY/B,OAAO,CAAC,oBAAoB;IAa5B,OAAO,CAAC,iBAAiB;IA4BzB,OAAO,CAAC,sBAAsB;YAsBhB,iBAAiB;IAmC/B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,eAAe;IAKvB,OAAO,CAAC,eAAe;YAqCT,YAAY;IA+C1B,OAAO,CAAC,6BAA6B;YAMvB,qBAAqB;IAwCnC,OAAO,CAAC,SAAS;YAKH,kBAAkB;IAKhC,OAAO,CAAC,aAAa;CAGtB"}
|
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
import type { Dependencies } from './types';
|
|
2
|
+
export interface ConfigSchemaFieldUpdate {
|
|
3
|
+
action: 'add' | 'update' | 'remove';
|
|
4
|
+
field: string;
|
|
5
|
+
/** Raw TS expression for the field type, e.g. "z.string().default('sk-...')" */
|
|
6
|
+
definition?: string;
|
|
7
|
+
}
|
|
2
8
|
/**
|
|
3
9
|
* ConfigWriter handles writing updates to agent.config.ts
|
|
4
10
|
*
|
|
@@ -20,5 +26,9 @@ export declare class ConfigWriter {
|
|
|
20
26
|
* Updates the name field in agent.config.ts
|
|
21
27
|
*/
|
|
22
28
|
updateName(name: string): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Adds, updates, or removes fields inside configuration.schema (the z.object({...}))
|
|
31
|
+
*/
|
|
32
|
+
updateConfiguration(updates: ConfigSchemaFieldUpdate[]): Promise<void>;
|
|
23
33
|
}
|
|
24
34
|
//# sourceMappingURL=config-writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config-writer.d.ts","sourceRoot":"","sources":["../../src/agent-project/config-writer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAG3C;;;;;GAKG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,UAAU,CAAQ;gBAEd,WAAW,EAAE,MAAM;IAI/B,OAAO,CAAC,UAAU;YAoBJ,UAAU;IAQxB;;;;OAIG;IACG,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBnE;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"config-writer.d.ts","sourceRoot":"","sources":["../../src/agent-project/config-writer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAG3C,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAA;IACnC,KAAK,EAAE,MAAM,CAAA;IACb,gFAAgF;IAChF,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED;;;;;GAKG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,UAAU,CAAQ;gBAEd,WAAW,EAAE,MAAM;IAI/B,OAAO,CAAC,UAAU;YAoBJ,UAAU;IAQxB;;;;OAIG;IACG,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBnE;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAW7C;;OAEG;IACG,mBAAmB,CAAC,OAAO,EAAE,uBAAuB,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;CA+D7E"}
|
|
@@ -5,5 +5,6 @@ export { ValidationErrors } from './validation-errors.js';
|
|
|
5
5
|
export { resolveAgent, hasAgentJson } from './agent-resolver.js';
|
|
6
6
|
export type { ResolveAgentOptions } from './agent-resolver.js';
|
|
7
7
|
export { ConfigWriter } from './config-writer.js';
|
|
8
|
+
export type { ConfigSchemaFieldUpdate } from './config-writer.js';
|
|
8
9
|
export * from './types.js';
|
|
9
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent-project/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,YAAY,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAA;AAChE,YAAY,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,cAAc,YAAY,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent-project/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,YAAY,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAA;AACzD,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAA;AAChE,YAAY,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,YAAY,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAA;AACjE,cAAc,YAAY,CAAA"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { z } from '@botpress/sdk';
|
|
2
|
+
/**
|
|
3
|
+
* Coerce a string value from CLI/terminal input to the type expected by a Zod field schema.
|
|
4
|
+
*
|
|
5
|
+
* CLI arguments and text inputs are always strings, but config schemas may expect
|
|
6
|
+
* numbers, booleans, etc. This function inspects the Zod schema's type and converts
|
|
7
|
+
* the raw string accordingly.
|
|
8
|
+
*
|
|
9
|
+
* Returns the coerced value, or the original string if the type is unknown or unsupported
|
|
10
|
+
* (letting Zod validation catch any remaining mismatches).
|
|
11
|
+
*/
|
|
12
|
+
export declare function coerceConfigValue(value: string, fieldSchema: z.ZodTypeAny): unknown;
|
|
13
|
+
/**
|
|
14
|
+
* Unwrap optional/default/nullable wrappers to get the inner type name.
|
|
15
|
+
* e.g. z.number().optional() has typeName 'ZodOptional' but the inner type is 'ZodNumber'.
|
|
16
|
+
*/
|
|
17
|
+
export declare function getInnerTypeName(schema: z.ZodTypeAny): string;
|
|
18
|
+
//# sourceMappingURL=coerce-config-value.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"coerce-config-value.d.ts","sourceRoot":"","sources":["../../src/config/coerce-config-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,eAAe,CAAA;AAEtC;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC,UAAU,GAAG,OAAO,CA0BnF;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,UAAU,GAAG,MAAM,CAa7D"}
|
package/dist/config/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/config/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/config/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAA;AAC5B,OAAO,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA"}
|
package/dist/config/manager.d.ts
CHANGED
|
@@ -2,6 +2,21 @@ import { z } from '@botpress/sdk';
|
|
|
2
2
|
export interface StoredConfig {
|
|
3
3
|
[key: string]: any;
|
|
4
4
|
}
|
|
5
|
+
export interface ConfigFieldDescriptor {
|
|
6
|
+
key: string;
|
|
7
|
+
type: 'string' | 'number' | 'boolean' | 'unknown';
|
|
8
|
+
required: boolean;
|
|
9
|
+
description?: string;
|
|
10
|
+
defaultValue?: unknown;
|
|
11
|
+
currentValue?: unknown;
|
|
12
|
+
}
|
|
13
|
+
export type SetResult = {
|
|
14
|
+
success: false;
|
|
15
|
+
error: string;
|
|
16
|
+
} | {
|
|
17
|
+
success: true;
|
|
18
|
+
data: any;
|
|
19
|
+
};
|
|
5
20
|
export declare class ConfigManager {
|
|
6
21
|
private botId;
|
|
7
22
|
private client;
|
|
@@ -44,5 +59,15 @@ export declare class ConfigManager {
|
|
|
44
59
|
* Check if configuration is valid
|
|
45
60
|
*/
|
|
46
61
|
isValid(schema: z.ZodObject<any>): Promise<boolean>;
|
|
62
|
+
/**
|
|
63
|
+
* Describe the configuration schema as a serializable list of field descriptors.
|
|
64
|
+
* Merges in current stored values so the UI can render a form without Zod access.
|
|
65
|
+
*/
|
|
66
|
+
describeSchema(schema: z.ZodObject<any>): Promise<ConfigFieldDescriptor[]>;
|
|
67
|
+
/**
|
|
68
|
+
* Validate and set a single configuration key.
|
|
69
|
+
* Handles coercion from string values (for CLI/UI text inputs).
|
|
70
|
+
*/
|
|
71
|
+
setWithValidation(key: string, value: unknown, schema: z.ZodObject<any>): Promise<SetResult>;
|
|
47
72
|
}
|
|
48
73
|
//# sourceMappingURL=manager.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../src/config/manager.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,eAAe,CAAA;
|
|
1
|
+
{"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../src/config/manager.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,eAAe,CAAA;AAKjC,MAAM,WAAW,YAAY;IAC3B,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;CACnB;AAED,MAAM,WAAW,qBAAqB;IACpC,GAAG,EAAE,MAAM,CAAA;IACX,IAAI,EAAE,QAAQ,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAA;IACjD,QAAQ,EAAE,OAAO,CAAA;IACjB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB,YAAY,CAAC,EAAE,OAAO,CAAA;CACvB;AAED,MAAM,MAAM,SAAS,GACjB;IACE,OAAO,EAAE,KAAK,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;CACd,GACD;IACE,OAAO,EAAE,IAAI,CAAA;IACb,IAAI,EAAE,GAAG,CAAA;CACV,CAAA;AAEL,qBAAa,aAAa;IACxB,OAAO,CAAC,KAAK,CAAQ;IACrB,OAAO,CAAC,MAAM,CAAoB;gBAEtB,KAAK,EAAE,MAAM;YAIX,SAAS;IAgBvB;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,YAAY,CAAC;IAWnC;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA6B1E;;OAEG;IACG,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,GAAG,SAAS,CAAC;IAKhD;;OAEG;IACG,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAM5E;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAI5C;;;OAGG;IACG,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC;QAChD,KAAK,EAAE,OAAO,CAAA;QACd,MAAM,EAAE,MAAM,EAAE,CAAA;QAChB,OAAO,EAAE,MAAM,EAAE,CAAA;KAClB,CAAC;IAsBF;;OAEG;IACG,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKjE;;OAEG;IACG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;IAKzD;;;OAGG;IACG,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,qBAAqB,EAAE,CAAC;IAuChF;;;OAGG;IACG,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,SAAS,CAAC;CAoBnG"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chat client for eval conversations.
|
|
3
|
+
* Drives conversations against a running ADK bot via @botpress/chat.
|
|
4
|
+
*/
|
|
5
|
+
import type { TurnResult } from './types';
|
|
6
|
+
/**
|
|
7
|
+
* A chat session that maintains a single client connection across turns.
|
|
8
|
+
*/
|
|
9
|
+
export declare class ChatSession {
|
|
10
|
+
private webhookId;
|
|
11
|
+
private client;
|
|
12
|
+
private conversationId;
|
|
13
|
+
constructor(webhookId: string);
|
|
14
|
+
connect(): Promise<void>;
|
|
15
|
+
get userId(): string;
|
|
16
|
+
sendMessage(message: string, options?: {
|
|
17
|
+
timeout?: number;
|
|
18
|
+
idleTimeout?: number;
|
|
19
|
+
}): Promise<TurnResult>;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Discover the chat integration's webhookId from a bot.
|
|
23
|
+
*/
|
|
24
|
+
export declare function discoverWebhookId(botId: string, token: string, apiUrl: string): Promise<string>;
|
|
25
|
+
//# sourceMappingURL=client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/eval/client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAe,UAAU,EAAE,MAAM,SAAS,CAAA;AAEtD;;GAEG;AACH,qBAAa,WAAW;IAIV,OAAO,CAAC,SAAS;IAH7B,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,cAAc,CAAsB;gBAExB,SAAS,EAAE,MAAM;IAE/B,OAAO;IAIb,IAAI,MAAM,IAAI,MAAM,CAKnB;IAEK,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG,OAAO,CAAC,UAAU,CAAC;CA+ElH;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAarG"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grader barrel exports.
|
|
3
|
+
*/
|
|
4
|
+
export { matchValue, operatorToString } from './match';
|
|
5
|
+
export { gradeResponse } from './response';
|
|
6
|
+
export { gradeLLMJudge, initLLMJudge } from './llm';
|
|
7
|
+
export { gradeTools } from './tools';
|
|
8
|
+
export { gradeState, snapshotState } from './state';
|
|
9
|
+
export { gradeTables } from './tables';
|
|
10
|
+
export { gradeWorkflows } from './workflow';
|
|
11
|
+
export { gradeOutcome, snapshotOutcomeState } from './outcome';
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAC1C,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,OAAO,CAAA;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,SAAS,CAAA;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAA;AACtC,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAC3C,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,WAAW,CAAA"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM judge grader using @botpress/cognitive.
|
|
3
|
+
* Replaces the OpenAI-based implementation from the standalone project.
|
|
4
|
+
*/
|
|
5
|
+
import type { GraderResult } from '../types';
|
|
6
|
+
/**
|
|
7
|
+
* Initialize the LLM judge with explicit credentials.
|
|
8
|
+
* Call this before running evals so the judge doesn't need env vars.
|
|
9
|
+
*/
|
|
10
|
+
export declare function initLLMJudge(credentials: {
|
|
11
|
+
token: string;
|
|
12
|
+
apiUrl: string;
|
|
13
|
+
botId: string;
|
|
14
|
+
}): void;
|
|
15
|
+
export declare function gradeLLMJudge(botResponse: string, criteria: string, context: {
|
|
16
|
+
userMessage: string;
|
|
17
|
+
}): Promise<GraderResult>;
|
|
18
|
+
//# sourceMappingURL=llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/llm.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAyC5C;;;GAGG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,QAOzF;AAED,wBAAsB,aAAa,CACjC,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,YAAY,CAAC,CAwDvB"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared match utilities for assertion grading.
|
|
3
|
+
* Used by tool, state, and table graders.
|
|
4
|
+
*/
|
|
5
|
+
import type { MatchOperator } from '../types';
|
|
6
|
+
/**
|
|
7
|
+
* Evaluate a single match operator against a value.
|
|
8
|
+
*/
|
|
9
|
+
export declare function matchValue(operator: MatchOperator, actual: unknown): boolean;
|
|
10
|
+
export declare function operatorToString(operator: MatchOperator): string;
|
|
11
|
+
//# sourceMappingURL=match.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"match.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/match.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAE7C;;GAEG;AACH,wBAAgB,UAAU,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,OAAO,GAAG,OAAO,CAuC5E;AAED,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,aAAa,GAAG,MAAM,CAWhE"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outcome assertion orchestrator.
|
|
3
|
+
* Runs state, table, and workflow graders after all conversation turns complete.
|
|
4
|
+
*/
|
|
5
|
+
import type { Client as BpClient } from '@botpress/client';
|
|
6
|
+
import type { EvalDefinition, EvalContext, TraceSpan, GraderResult } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* Snapshot pre-conversation state for `changed` assertions.
|
|
9
|
+
* Call this BEFORE the conversation starts.
|
|
10
|
+
*/
|
|
11
|
+
export declare function snapshotOutcomeState(client: BpClient, evalDef: EvalDefinition, ctx: EvalContext): Promise<Map<string, unknown>>;
|
|
12
|
+
/**
|
|
13
|
+
* Grade all outcome assertions after the conversation completes.
|
|
14
|
+
*/
|
|
15
|
+
export declare function gradeOutcome(client: BpClient, evalDef: EvalDefinition, ctx: EvalContext, traceSpans: TraceSpan[], preSnapshots?: Map<string, unknown>): Promise<GraderResult[]>;
|
|
16
|
+
//# sourceMappingURL=outcome.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"outcome.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/outcome.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC1D,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAKpF;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,MAAM,EAAE,QAAQ,EAChB,OAAO,EAAE,cAAc,EACvB,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAK/B;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,MAAM,EAAE,QAAQ,EAChB,OAAO,EAAE,cAAc,EACvB,GAAG,EAAE,WAAW,EAChB,UAAU,EAAE,SAAS,EAAE,EACvB,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,GAClC,OAAO,CAAC,YAAY,EAAE,CAAC,CAsBzB"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Response assertion graders.
|
|
3
|
+
* Checks bot response text against expected assertions.
|
|
4
|
+
*/
|
|
5
|
+
import type { ResponseAssertion, GraderResult } from '../types';
|
|
6
|
+
export declare function gradeResponse(botResponse: string, assertions: ResponseAssertion[], context: {
|
|
7
|
+
userMessage: string;
|
|
8
|
+
}): Promise<GraderResult[]>;
|
|
9
|
+
//# sourceMappingURL=response.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/response.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAG/D,wBAAsB,aAAa,CACjC,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,iBAAiB,EAAE,EAC/B,OAAO,EAAE;IAAE,WAAW,EAAE,MAAM,CAAA;CAAE,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC,CA+DzB"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* State assertion graders.
|
|
3
|
+
* Checks bot/user/conversation state via Botpress Client API.
|
|
4
|
+
*/
|
|
5
|
+
import type { Client as BpClient } from '@botpress/client';
|
|
6
|
+
import type { StateAssertion, EvalContext, GraderResult } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* Snapshot pre-conversation state values for `changed` assertions.
|
|
9
|
+
* Only snapshots bot and user state (conversation doesn't exist yet).
|
|
10
|
+
*/
|
|
11
|
+
export declare function snapshotState(client: BpClient, assertions: StateAssertion[], ctx: EvalContext): Promise<Map<string, unknown>>;
|
|
12
|
+
/**
|
|
13
|
+
* Grade state assertions.
|
|
14
|
+
*/
|
|
15
|
+
export declare function gradeState(client: BpClient, assertions: StateAssertion[], ctx: EvalContext, preSnapshots?: Map<string, unknown>): Promise<GraderResult[]>;
|
|
16
|
+
//# sourceMappingURL=state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/state.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC1D,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AA0EzE;;;GAGG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,QAAQ,EAChB,UAAU,EAAE,cAAc,EAAE,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAc/B;AAED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,MAAM,EAAE,QAAQ,EAChB,UAAU,EAAE,cAAc,EAAE,EAC5B,GAAG,EAAE,WAAW,EAChB,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,GAClC,OAAO,CAAC,YAAY,EAAE,CAAC,CAiDzB"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Table assertion graders.
|
|
3
|
+
* Checks table row existence and counts via Botpress Client API.
|
|
4
|
+
*/
|
|
5
|
+
import type { Client as BpClient } from '@botpress/client';
|
|
6
|
+
import type { TableAssertion, GraderResult } from '../types';
|
|
7
|
+
export declare function gradeTables(client: BpClient, assertions: TableAssertion[]): Promise<GraderResult[]>;
|
|
8
|
+
//# sourceMappingURL=tables.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tables.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/tables.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC1D,OAAO,KAAK,EAAiB,cAAc,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAwH3E,wBAAsB,WAAW,CAAC,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAYzG"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool assertion graders.
|
|
3
|
+
* Checks tool calls from traces against expected assertions.
|
|
4
|
+
*/
|
|
5
|
+
import type { ToolAssertion, ToolCall, GraderResult } from '../types';
|
|
6
|
+
export declare function gradeTools(toolCalls: ToolCall[], assertions: ToolAssertion[]): GraderResult[];
|
|
7
|
+
//# sourceMappingURL=tools.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/tools.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAGrE,wBAAgB,UAAU,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,UAAU,EAAE,aAAa,EAAE,GAAG,YAAY,EAAE,CA6F7F"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow assertion graders.
|
|
3
|
+
* Checks workflow execution via trace spans.
|
|
4
|
+
*/
|
|
5
|
+
import type { WorkflowAssertion, TraceSpan, GraderResult } from '../types';
|
|
6
|
+
export declare function gradeWorkflows(spans: TraceSpan[], assertions: WorkflowAssertion[]): GraderResult[];
|
|
7
|
+
//# sourceMappingURL=workflow.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workflow.d.ts","sourceRoot":"","sources":["../../../src/eval/graders/workflow.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAE1E,wBAAgB,cAAc,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,UAAU,EAAE,iBAAiB,EAAE,GAAG,YAAY,EAAE,CA6ClG"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ADK Eval Engine — public API.
|
|
3
|
+
*/
|
|
4
|
+
export type { EvalDefinition, EvalReport, EvalRunReport, EvalRunnerConfig, EvalFilter, EvalProgressEvent, EvalContext, GraderResult, TurnReport, ToolCall, TraceData, TraceSpan, BotConnection, BotResponse, TurnResult, MatchOperator, ResponseAssertion, ToolAssertion, StateAssertion, TableAssertion, WorkflowAssertion, TurnAssertions, ConversationTurn, OutcomeAssertions, } from './types';
|
|
5
|
+
export { defineEval } from './types';
|
|
6
|
+
export { loadEvalFile, loadEvalsFromDir, loadEvalByName, filterEvals } from './loader';
|
|
7
|
+
export { runEval, runEvalSuite } from './runner';
|
|
8
|
+
export { saveRunResult, loadRunResult, listRunResults, getLatestRun } from './store';
|
|
9
|
+
export { matchValue, operatorToString, gradeResponse, gradeLLMJudge, initLLMJudge, gradeTools, gradeState, snapshotState, gradeTables, gradeWorkflows, gradeOutcome, snapshotOutcomeState, } from './graders';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,YAAY,EACV,cAAc,EACd,UAAU,EACV,aAAa,EACb,gBAAgB,EAChB,UAAU,EACV,iBAAiB,EACjB,WAAW,EACX,YAAY,EACZ,UAAU,EACV,QAAQ,EACR,SAAS,EACT,SAAS,EACT,aAAa,EACb,WAAW,EACX,UAAU,EACV,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,SAAS,CAAA;AAGhB,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AAGpC,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,UAAU,CAAA;AAGtF,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAGhD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAGpF,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,aAAa,EACb,aAAa,EACb,YAAY,EACZ,UAAU,EACV,UAAU,EACV,aAAa,EACb,WAAW,EACX,cAAc,EACd,YAAY,EACZ,oBAAoB,GACrB,MAAM,WAAW,CAAA"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval file loader.
|
|
3
|
+
* Loads *.eval.ts files from a directory using dynamic imports.
|
|
4
|
+
*/
|
|
5
|
+
import type { EvalDefinition, EvalFilter } from './types';
|
|
6
|
+
export declare function loadEvalFile(filePath: string): Promise<EvalDefinition>;
|
|
7
|
+
export declare function loadEvalsFromDir(dirPath: string): Promise<EvalDefinition[]>;
|
|
8
|
+
export declare function loadEvalByName(dirPath: string, name: string): Promise<EvalDefinition | null>;
|
|
9
|
+
export declare function filterEvals(evals: EvalDefinition[], filter?: EvalFilter): EvalDefinition[];
|
|
10
|
+
//# sourceMappingURL=loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/eval/loader.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AAEzD,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAY5E;AAED,wBAAsB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAcjF;AAED,wBAAsB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAYlG;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,cAAc,EAAE,EAAE,MAAM,CAAC,EAAE,UAAU,GAAG,cAAc,EAAE,CAe1F"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval runner — orchestrates eval execution.
|
|
3
|
+
*/
|
|
4
|
+
import type { EvalDefinition, EvalReport, EvalRunReport, EvalRunnerConfig, EvalFilter, BotConnection } from './types';
|
|
5
|
+
/**
|
|
6
|
+
* Run a single eval against a bot.
|
|
7
|
+
*/
|
|
8
|
+
export declare function runEval(evalDef: EvalDefinition, connection: BotConnection, options?: {
|
|
9
|
+
devServerUrl?: string;
|
|
10
|
+
}): Promise<EvalReport>;
|
|
11
|
+
/**
|
|
12
|
+
* Run a suite of evals.
|
|
13
|
+
*/
|
|
14
|
+
export declare function runEvalSuite(config: EvalRunnerConfig, filter?: EvalFilter): Promise<EvalRunReport>;
|
|
15
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EACV,cAAc,EACd,UAAU,EACV,aAAa,EACb,gBAAgB,EAChB,UAAU,EAIV,aAAa,EACd,MAAM,SAAS,CAAA;AAahB;;GAEG;AACH,wBAAsB,OAAO,CAC3B,OAAO,EAAE,cAAc,EACvB,UAAU,EAAE,aAAa,EACzB,OAAO,GAAE;IAAE,YAAY,CAAC,EAAE,MAAM,CAAA;CAAO,GACtC,OAAO,CAAC,UAAU,CAAC,CAmNrB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,MAAM,EAAE,gBAAgB,EAAE,MAAM,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,CAuExG"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval results store.
|
|
3
|
+
* Persists run results as JSON files in .adk/evals/runs/.
|
|
4
|
+
*/
|
|
5
|
+
import type { EvalRunReport } from './types';
|
|
6
|
+
export declare function saveRunResult(agentPath: string, report: EvalRunReport): string;
|
|
7
|
+
export declare function loadRunResult(agentPath: string, runId: string): EvalRunReport | null;
|
|
8
|
+
export declare function listRunResults(agentPath: string, limit?: number): EvalRunReport[];
|
|
9
|
+
export declare function getLatestRun(agentPath: string): EvalRunReport | null;
|
|
10
|
+
//# sourceMappingURL=store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/eval/store.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAA;AAU5C,wBAAgB,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,GAAG,MAAM,CAM9E;AAED,wBAAgB,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAYpF;AAED,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,aAAa,EAAE,CAc7E;AAED,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAGpE"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Trace data extraction for evals.
|
|
3
|
+
* Uses the ADK dev server's HTTP API to fetch traces by conversationId.
|
|
4
|
+
* This approach is used instead of direct TraceReader access because:
|
|
5
|
+
* 1. The eval engine runs in the ADK package, not the CLI
|
|
6
|
+
* 2. The dev server is always running when evals execute
|
|
7
|
+
* 3. HTTP is the established interface for trace queries
|
|
8
|
+
*/
|
|
9
|
+
import type { TraceData } from './types';
|
|
10
|
+
/**
|
|
11
|
+
* Fetch and extract structured trace data for a conversation.
|
|
12
|
+
* If previousToolCallCount is provided, only returns tool calls after that index.
|
|
13
|
+
* When expectNewCalls is true, retries fetching until new tool calls appear.
|
|
14
|
+
*/
|
|
15
|
+
export declare function getTraceData(conversationId: string, devServerUrl: string, options?: {
|
|
16
|
+
previousToolCallCount?: number;
|
|
17
|
+
expectNewCalls?: boolean;
|
|
18
|
+
}): Promise<TraceData>;
|
|
19
|
+
//# sourceMappingURL=traces.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"traces.d.ts","sourceRoot":"","sources":["../../src/eval/traces.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAY,SAAS,EAAa,MAAM,SAAS,CAAA;AAoD7D;;;;GAIG;AACH,wBAAsB,YAAY,CAChC,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,EACpB,OAAO,GAAE;IAAE,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAO,GACzE,OAAO,CAAC,SAAS,CAAC,CAsBpB"}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval type definitions.
|
|
3
|
+
* Ported from adk-eval-standalone/src/parser/schema.ts with added report types.
|
|
4
|
+
*/
|
|
5
|
+
export type MatchOperator = string | {
|
|
6
|
+
equals: unknown;
|
|
7
|
+
} | {
|
|
8
|
+
contains: string;
|
|
9
|
+
} | {
|
|
10
|
+
not_contains: string;
|
|
11
|
+
} | {
|
|
12
|
+
matches: string;
|
|
13
|
+
} | {
|
|
14
|
+
in: unknown[];
|
|
15
|
+
} | {
|
|
16
|
+
exists: boolean;
|
|
17
|
+
} | {
|
|
18
|
+
gte: number;
|
|
19
|
+
} | {
|
|
20
|
+
lte: number;
|
|
21
|
+
};
|
|
22
|
+
export type ResponseAssertion = {
|
|
23
|
+
contains: string;
|
|
24
|
+
} | {
|
|
25
|
+
not_contains: string;
|
|
26
|
+
} | {
|
|
27
|
+
matches: string;
|
|
28
|
+
} | {
|
|
29
|
+
llm_judge: string;
|
|
30
|
+
} | {
|
|
31
|
+
similar_to: string;
|
|
32
|
+
};
|
|
33
|
+
export type ToolAssertion = {
|
|
34
|
+
called: string;
|
|
35
|
+
params?: Record<string, MatchOperator>;
|
|
36
|
+
} | {
|
|
37
|
+
not_called: string;
|
|
38
|
+
} | {
|
|
39
|
+
call_order: string[];
|
|
40
|
+
};
|
|
41
|
+
export interface StateAssertion {
|
|
42
|
+
path: string;
|
|
43
|
+
equals?: unknown;
|
|
44
|
+
changed?: boolean;
|
|
45
|
+
}
|
|
46
|
+
export type TableAssertion = {
|
|
47
|
+
table: string;
|
|
48
|
+
row_exists: Record<string, MatchOperator>;
|
|
49
|
+
} | {
|
|
50
|
+
table: string;
|
|
51
|
+
row_count: MatchOperator;
|
|
52
|
+
where?: Record<string, MatchOperator>;
|
|
53
|
+
};
|
|
54
|
+
export interface WorkflowAssertion {
|
|
55
|
+
name: string;
|
|
56
|
+
entered?: boolean;
|
|
57
|
+
completed?: boolean;
|
|
58
|
+
}
|
|
59
|
+
export interface TurnAssertions {
|
|
60
|
+
response?: ResponseAssertion[];
|
|
61
|
+
tools?: ToolAssertion[];
|
|
62
|
+
state?: StateAssertion[];
|
|
63
|
+
tables?: TableAssertion[];
|
|
64
|
+
workflow?: WorkflowAssertion[];
|
|
65
|
+
}
|
|
66
|
+
export interface ConversationTurn {
|
|
67
|
+
user: string;
|
|
68
|
+
assert?: TurnAssertions;
|
|
69
|
+
}
|
|
70
|
+
export interface OutcomeAssertions {
|
|
71
|
+
state?: StateAssertion[];
|
|
72
|
+
tables?: TableAssertion[];
|
|
73
|
+
workflow?: WorkflowAssertion[];
|
|
74
|
+
}
|
|
75
|
+
export interface EvalDefinition {
|
|
76
|
+
name: string;
|
|
77
|
+
description?: string;
|
|
78
|
+
tags?: string[];
|
|
79
|
+
type?: 'capability' | 'regression';
|
|
80
|
+
conversation: ConversationTurn[];
|
|
81
|
+
outcome?: OutcomeAssertions;
|
|
82
|
+
}
|
|
83
|
+
/** Type-safe eval definition helper. Provides intellisense for eval files. */
|
|
84
|
+
export declare function defineEval(def: EvalDefinition): EvalDefinition;
|
|
85
|
+
export interface EvalContext {
|
|
86
|
+
botId: string;
|
|
87
|
+
userId: string;
|
|
88
|
+
conversationId: string;
|
|
89
|
+
}
|
|
90
|
+
export interface GraderResult {
|
|
91
|
+
assertion: string;
|
|
92
|
+
pass: boolean;
|
|
93
|
+
expected: string;
|
|
94
|
+
actual: string;
|
|
95
|
+
}
|
|
96
|
+
export interface ToolCall {
|
|
97
|
+
name: string;
|
|
98
|
+
input: Record<string, unknown>;
|
|
99
|
+
output: string;
|
|
100
|
+
status: string;
|
|
101
|
+
}
|
|
102
|
+
export interface TraceData {
|
|
103
|
+
toolCalls: ToolCall[];
|
|
104
|
+
totalToolCallCount: number;
|
|
105
|
+
raw: TraceSpan[];
|
|
106
|
+
}
|
|
107
|
+
export interface TraceSpan {
|
|
108
|
+
t: 'start' | 'end' | 'beat';
|
|
109
|
+
traceId: string;
|
|
110
|
+
spanId: string;
|
|
111
|
+
parentSpanId?: string | null;
|
|
112
|
+
name?: string;
|
|
113
|
+
startNs?: number;
|
|
114
|
+
endNs?: number;
|
|
115
|
+
durationNs?: number;
|
|
116
|
+
attrs?: Record<string, unknown>;
|
|
117
|
+
[key: string]: unknown;
|
|
118
|
+
}
|
|
119
|
+
export interface TurnReport {
|
|
120
|
+
turnNumber: number;
|
|
121
|
+
userMessage: string;
|
|
122
|
+
botResponse: string;
|
|
123
|
+
assertions: GraderResult[];
|
|
124
|
+
pass: boolean;
|
|
125
|
+
/** Time spent waiting for the bot to respond (ms) */
|
|
126
|
+
botDuration: number;
|
|
127
|
+
/** Time spent fetching traces + running graders (ms) */
|
|
128
|
+
evalDuration: number;
|
|
129
|
+
}
|
|
130
|
+
export interface EvalReport {
|
|
131
|
+
name: string;
|
|
132
|
+
description?: string;
|
|
133
|
+
type?: 'capability' | 'regression';
|
|
134
|
+
tags?: string[];
|
|
135
|
+
turns: TurnReport[];
|
|
136
|
+
outcomeAssertions: GraderResult[];
|
|
137
|
+
pass: boolean;
|
|
138
|
+
duration: number;
|
|
139
|
+
error?: string;
|
|
140
|
+
}
|
|
141
|
+
export type EvalProgressEvent = {
|
|
142
|
+
type: 'suite_start';
|
|
143
|
+
totalEvals: number;
|
|
144
|
+
} | {
|
|
145
|
+
type: 'eval_start';
|
|
146
|
+
evalName: string;
|
|
147
|
+
index: number;
|
|
148
|
+
} | {
|
|
149
|
+
type: 'eval_complete';
|
|
150
|
+
evalName: string;
|
|
151
|
+
index: number;
|
|
152
|
+
report: EvalReport;
|
|
153
|
+
} | {
|
|
154
|
+
type: 'suite_complete';
|
|
155
|
+
report: EvalRunReport;
|
|
156
|
+
};
|
|
157
|
+
export interface EvalRunReport {
|
|
158
|
+
id: string;
|
|
159
|
+
timestamp: string;
|
|
160
|
+
evals: EvalReport[];
|
|
161
|
+
passed: number;
|
|
162
|
+
failed: number;
|
|
163
|
+
total: number;
|
|
164
|
+
duration: number;
|
|
165
|
+
filter?: {
|
|
166
|
+
names?: string[];
|
|
167
|
+
tags?: string[];
|
|
168
|
+
type?: 'capability' | 'regression';
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
export interface BotConnection {
|
|
172
|
+
webhookId: string;
|
|
173
|
+
botId: string;
|
|
174
|
+
token: string;
|
|
175
|
+
apiUrl: string;
|
|
176
|
+
}
|
|
177
|
+
export interface BotResponse {
|
|
178
|
+
text: string;
|
|
179
|
+
raw: unknown;
|
|
180
|
+
}
|
|
181
|
+
export interface TurnResult {
|
|
182
|
+
conversationId: string;
|
|
183
|
+
responses: BotResponse[];
|
|
184
|
+
}
|
|
185
|
+
export interface EvalRunnerConfig {
|
|
186
|
+
credentials: {
|
|
187
|
+
token: string;
|
|
188
|
+
apiUrl: string;
|
|
189
|
+
botId: string;
|
|
190
|
+
webhookId?: string;
|
|
191
|
+
};
|
|
192
|
+
agentPath: string;
|
|
193
|
+
/** URL of the dev server providing traces and config. Defaults to http://localhost:3001 */
|
|
194
|
+
devServerUrl?: string;
|
|
195
|
+
onProgress?: (event: EvalProgressEvent) => void;
|
|
196
|
+
}
|
|
197
|
+
export interface EvalFilter {
|
|
198
|
+
names?: string[];
|
|
199
|
+
tags?: string[];
|
|
200
|
+
type?: 'capability' | 'regression';
|
|
201
|
+
}
|
|
202
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,MAAM,MAAM,aAAa,GACrB,MAAM,GACN;IAAE,MAAM,EAAE,OAAO,CAAA;CAAE,GACnB;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GACpB;IAAE,YAAY,EAAE,MAAM,CAAA;CAAE,GACxB;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,EAAE,EAAE,OAAO,EAAE,CAAA;CAAE,GACjB;IAAE,MAAM,EAAE,OAAO,CAAA;CAAE,GACnB;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GACf;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,CAAA;AAInB,MAAM,MAAM,iBAAiB,GACzB;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GACpB;IAAE,YAAY,EAAE,MAAM,CAAA;CAAE,GACxB;IAAE,OAAO,EAAE,MAAM,CAAA;CAAE,GACnB;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GACrB;IAAE,UAAU,EAAE,MAAM,CAAA;CAAE,CAAA;AAI1B,MAAM,MAAM,aAAa,GACrB;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAA;CAAE,GAC1D;IAAE,UAAU,EAAE,MAAM,CAAA;CAAE,GACtB;IAAE,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAAA;AAI5B,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAA;CAAE,GAC5D;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,aAAa,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAA;CAAE,CAAA;AAEtF,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,SAAS,CAAC,EAAE,OAAO,CAAA;CACpB;AAID,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;IAC9B,KAAK,CAAC,EAAE,aAAa,EAAE,CAAA;IACvB,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;IACxB,MAAM,CAAC,EAAE,cAAc,EAAE,CAAA;IACzB,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;CAC/B;AAID,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,CAAC,EAAE,cAAc,CAAA;CACxB;AAID,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,cAAc,EAAE,CAAA;IACxB,MAAM,CAAC,EAAE,cAAc,EAAE,CAAA;IACzB,QAAQ,CAAC,EAAE,iBAAiB,EAAE,CAAA;CAC/B;AAID,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,CAAC,EAAE,YAAY,GAAG,YAAY,CAAA;IAClC,YAAY,EAAE,gBAAgB,EAAE,CAAA;IAChC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAC5B;AAED,8EAA8E;AAC9E,wBAAgB,UAAU,CAAC,GAAG,EAAE,cAAc,GAAG,cAAc,CAE9D;AAID,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,cAAc,EAAE,MAAM,CAAA;CACvB;AAID,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE,MAAM,CAAA;IACjB,IAAI,EAAE,OAAO,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;CACf;AAID,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;CACf;AAID,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,QAAQ,EAAE,CAAA;IACrB,kBAAkB,EAAE,MAAM,CAAA;IAC1B,GAAG,EAAE,SAAS,EAAE,CAAA;CACjB;AAID,MAAM,WAAW,SAAS;IACxB,CAAC,EAAE,OAAO,GAAG,KAAK,GAAG,MAAM,CAAA;IAC3B,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,CAAA;IACd,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAC/B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACvB;AAID,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,YAAY,EAAE,CAAA;IAC1B,IAAI,EAAE,OAAO,CAAA;IACb,qDAAqD;IACrD,WAAW,EAAE,MAAM,CAAA;IACnB,wDAAwD;IACxD,YAAY,EAAE,MAAM,CAAA;CACrB;AAID,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAA;IACZ,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,IAAI,CAAC,EAAE,YAAY,GAAG,YAAY,CAAA;IAClC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;IACf,KAAK,EAAE,UAAU,EAAE,CAAA;IACnB,iBAAiB,EAAE,YAAY,EAAE,CAAA;IACjC,IAAI,EAAE,OAAO,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAID,MAAM,MAAM,iBAAiB,GACzB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GAC3C;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,UAAU,CAAA;CAAE,GAC9E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,MAAM,EAAE,aAAa,CAAA;CAAE,CAAA;AAIrD,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,UAAU,EAAE,CAAA;IACnB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE;QACP,KAAK,CAAC,EAAE,MAAM,EAAE,CAAA;QAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;QACf,IAAI,CAAC,EAAE,YAAY,GAAG,YAAY,CAAA;KACnC,CAAA;CACF;AAID,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;CACf;AAID,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,OAAO,CAAA;CACb;AAID,MAAM,WAAW,UAAU;IACzB,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,WAAW,EAAE,CAAA;CACzB;AAID,MAAM,WAAW,gBAAgB;IAC/B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAA;QACb,MAAM,EAAE,MAAM,CAAA;QACd,KAAK,EAAE,MAAM,CAAA;QACb,SAAS,CAAC,EAAE,MAAM,CAAA;KACnB,CAAA;IACD,SAAS,EAAE,MAAM,CAAA;IACjB,2FAA2F;IAC3F,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,iBAAiB,KAAK,IAAI,CAAA;CAChD;AAID,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAA;IAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,CAAC,EAAE,YAAY,GAAG,YAAY,CAAA;CACnC"}
|