@wix/evalforge-types 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +145 -80
- package/build/index.js.map +4 -4
- package/build/index.mjs +137 -80
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/evaluation/conversation.d.ts +108 -0
- package/build/types/evaluation/eval-result.d.ts +25 -0
- package/build/types/evaluation/eval-run.d.ts +25 -0
- package/build/types/evaluation/index.d.ts +1 -0
- package/package.json +2 -2
|
@@ -2,6 +2,7 @@ import type { SkillWithLatestVersion } from '../target/skill.js';
|
|
|
2
2
|
import type { TestScenario } from '../scenario/test-scenario.js';
|
|
3
3
|
import type { ModelConfig } from '../common/models.js';
|
|
4
4
|
import type { LLMTrace } from '../evaluation/metrics.js';
|
|
5
|
+
import type { ConversationMessage } from '../evaluation/conversation.js';
|
|
5
6
|
import type { MCPEntity } from '../common/mcp.js';
|
|
6
7
|
import type { SubAgent } from '../target/sub-agent.js';
|
|
7
8
|
import type { Rule } from '../common/rule.js';
|
|
@@ -97,6 +98,8 @@ export interface AgentExecutionResult {
|
|
|
97
98
|
costUsd?: number;
|
|
98
99
|
/** LLM trace containing detailed step information */
|
|
99
100
|
llmTrace: LLMTrace;
|
|
101
|
+
/** Full conversation messages captured during execution */
|
|
102
|
+
conversation?: ConversationMessage[];
|
|
100
103
|
}
|
|
101
104
|
/**
|
|
102
105
|
* Agent adapter interface.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Individual content blocks within a conversation message.
|
|
4
|
+
*/
|
|
5
|
+
export declare const TextBlockSchema: z.ZodObject<{
|
|
6
|
+
type: z.ZodLiteral<"text">;
|
|
7
|
+
text: z.ZodString;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
export declare const ThinkingBlockSchema: z.ZodObject<{
|
|
10
|
+
type: z.ZodLiteral<"thinking">;
|
|
11
|
+
thinking: z.ZodString;
|
|
12
|
+
}, z.core.$strip>;
|
|
13
|
+
export declare const ToolUseBlockSchema: z.ZodObject<{
|
|
14
|
+
type: z.ZodLiteral<"tool_use">;
|
|
15
|
+
toolName: z.ZodString;
|
|
16
|
+
toolId: z.ZodString;
|
|
17
|
+
input: z.ZodUnknown;
|
|
18
|
+
}, z.core.$strip>;
|
|
19
|
+
export declare const ToolResultBlockSchema: z.ZodObject<{
|
|
20
|
+
type: z.ZodLiteral<"tool_result">;
|
|
21
|
+
toolUseId: z.ZodString;
|
|
22
|
+
content: z.ZodString;
|
|
23
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
24
|
+
}, z.core.$strip>;
|
|
25
|
+
export declare const ConversationBlockSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
26
|
+
type: z.ZodLiteral<"text">;
|
|
27
|
+
text: z.ZodString;
|
|
28
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
29
|
+
type: z.ZodLiteral<"thinking">;
|
|
30
|
+
thinking: z.ZodString;
|
|
31
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
+
type: z.ZodLiteral<"tool_use">;
|
|
33
|
+
toolName: z.ZodString;
|
|
34
|
+
toolId: z.ZodString;
|
|
35
|
+
input: z.ZodUnknown;
|
|
36
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
37
|
+
type: z.ZodLiteral<"tool_result">;
|
|
38
|
+
toolUseId: z.ZodString;
|
|
39
|
+
content: z.ZodString;
|
|
40
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
41
|
+
}, z.core.$strip>], "type">;
|
|
42
|
+
export type ConversationBlock = z.infer<typeof ConversationBlockSchema>;
|
|
43
|
+
/**
|
|
44
|
+
* A single message in the conversation (assistant, user, or system).
|
|
45
|
+
*/
|
|
46
|
+
export declare const ConversationMessageRoles: readonly ["assistant", "user", "system"];
|
|
47
|
+
export declare const ConversationMessageSchema: z.ZodObject<{
|
|
48
|
+
role: z.ZodEnum<{
|
|
49
|
+
system: "system";
|
|
50
|
+
user: "user";
|
|
51
|
+
assistant: "assistant";
|
|
52
|
+
}>;
|
|
53
|
+
content: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
54
|
+
type: z.ZodLiteral<"text">;
|
|
55
|
+
text: z.ZodString;
|
|
56
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
57
|
+
type: z.ZodLiteral<"thinking">;
|
|
58
|
+
thinking: z.ZodString;
|
|
59
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
60
|
+
type: z.ZodLiteral<"tool_use">;
|
|
61
|
+
toolName: z.ZodString;
|
|
62
|
+
toolId: z.ZodString;
|
|
63
|
+
input: z.ZodUnknown;
|
|
64
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
65
|
+
type: z.ZodLiteral<"tool_result">;
|
|
66
|
+
toolUseId: z.ZodString;
|
|
67
|
+
content: z.ZodString;
|
|
68
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
69
|
+
}, z.core.$strip>], "type">>;
|
|
70
|
+
timestamp: z.ZodString;
|
|
71
|
+
}, z.core.$strip>;
|
|
72
|
+
export type ConversationMessage = z.infer<typeof ConversationMessageSchema>;
|
|
73
|
+
/**
|
|
74
|
+
* Full conversation stored per scenario result.
|
|
75
|
+
*/
|
|
76
|
+
export declare const ScenarioConversationSchema: z.ZodObject<{
|
|
77
|
+
id: z.ZodString;
|
|
78
|
+
projectId: z.ZodString;
|
|
79
|
+
evalRunId: z.ZodString;
|
|
80
|
+
resultId: z.ZodString;
|
|
81
|
+
messages: z.ZodArray<z.ZodObject<{
|
|
82
|
+
role: z.ZodEnum<{
|
|
83
|
+
system: "system";
|
|
84
|
+
user: "user";
|
|
85
|
+
assistant: "assistant";
|
|
86
|
+
}>;
|
|
87
|
+
content: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
88
|
+
type: z.ZodLiteral<"text">;
|
|
89
|
+
text: z.ZodString;
|
|
90
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
91
|
+
type: z.ZodLiteral<"thinking">;
|
|
92
|
+
thinking: z.ZodString;
|
|
93
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
94
|
+
type: z.ZodLiteral<"tool_use">;
|
|
95
|
+
toolName: z.ZodString;
|
|
96
|
+
toolId: z.ZodString;
|
|
97
|
+
input: z.ZodUnknown;
|
|
98
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
99
|
+
type: z.ZodLiteral<"tool_result">;
|
|
100
|
+
toolUseId: z.ZodString;
|
|
101
|
+
content: z.ZodString;
|
|
102
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
103
|
+
}, z.core.$strip>], "type">>;
|
|
104
|
+
timestamp: z.ZodString;
|
|
105
|
+
}, z.core.$strip>>;
|
|
106
|
+
createdAt: z.ZodString;
|
|
107
|
+
}, z.core.$strip>;
|
|
108
|
+
export type ScenarioConversation = z.infer<typeof ScenarioConversationSchema>;
|
|
@@ -191,6 +191,31 @@ export declare const EvalRunResultSchema: z.ZodObject<{
|
|
|
191
191
|
modelsUsed: z.ZodArray<z.ZodString>;
|
|
192
192
|
}, z.core.$strip>;
|
|
193
193
|
}, z.core.$strip>>;
|
|
194
|
+
conversation: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
195
|
+
role: z.ZodEnum<{
|
|
196
|
+
system: "system";
|
|
197
|
+
user: "user";
|
|
198
|
+
assistant: "assistant";
|
|
199
|
+
}>;
|
|
200
|
+
content: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
201
|
+
type: z.ZodLiteral<"text">;
|
|
202
|
+
text: z.ZodString;
|
|
203
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
204
|
+
type: z.ZodLiteral<"thinking">;
|
|
205
|
+
thinking: z.ZodString;
|
|
206
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
207
|
+
type: z.ZodLiteral<"tool_use">;
|
|
208
|
+
toolName: z.ZodString;
|
|
209
|
+
toolId: z.ZodString;
|
|
210
|
+
input: z.ZodUnknown;
|
|
211
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
212
|
+
type: z.ZodLiteral<"tool_result">;
|
|
213
|
+
toolUseId: z.ZodString;
|
|
214
|
+
content: z.ZodString;
|
|
215
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
216
|
+
}, z.core.$strip>], "type">>;
|
|
217
|
+
timestamp: z.ZodString;
|
|
218
|
+
}, z.core.$strip>>>;
|
|
194
219
|
}, z.core.$strip>;
|
|
195
220
|
export type EvalRunResult = z.infer<typeof EvalRunResultSchema>;
|
|
196
221
|
/**
|
|
@@ -379,6 +379,31 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
379
379
|
modelsUsed: z.ZodArray<z.ZodString>;
|
|
380
380
|
}, z.core.$strip>;
|
|
381
381
|
}, z.core.$strip>>;
|
|
382
|
+
conversation: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
383
|
+
role: z.ZodEnum<{
|
|
384
|
+
system: "system";
|
|
385
|
+
user: "user";
|
|
386
|
+
assistant: "assistant";
|
|
387
|
+
}>;
|
|
388
|
+
content: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
389
|
+
type: z.ZodLiteral<"text">;
|
|
390
|
+
text: z.ZodString;
|
|
391
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
392
|
+
type: z.ZodLiteral<"thinking">;
|
|
393
|
+
thinking: z.ZodString;
|
|
394
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
395
|
+
type: z.ZodLiteral<"tool_use">;
|
|
396
|
+
toolName: z.ZodString;
|
|
397
|
+
toolId: z.ZodString;
|
|
398
|
+
input: z.ZodUnknown;
|
|
399
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
400
|
+
type: z.ZodLiteral<"tool_result">;
|
|
401
|
+
toolUseId: z.ZodString;
|
|
402
|
+
content: z.ZodString;
|
|
403
|
+
isError: z.ZodOptional<z.ZodBoolean>;
|
|
404
|
+
}, z.core.$strip>], "type">>;
|
|
405
|
+
timestamp: z.ZodString;
|
|
406
|
+
}, z.core.$strip>>>;
|
|
382
407
|
}, z.core.$strip>>>;
|
|
383
408
|
aggregateMetrics: z.ZodObject<{
|
|
384
409
|
totalAssertions: z.ZodNumber;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.46.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "a70c8a3f86e611eaf56129ef3fea8e9e7b8334ac70c265e8835951a3"
|
|
51
51
|
}
|