agentv 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/{chunk-D44RV4HN.js → chunk-BRXRFISD.js} +6 -6
- package/dist/chunk-BRXRFISD.js.map +1 -0
- package/dist/{chunk-HWGALLUR.js → chunk-KL3Y2C6J.js} +600 -499
- package/dist/chunk-KL3Y2C6J.js.map +1 -0
- package/dist/{chunk-7XYYGJAC.js → chunk-UE4GLFVL.js} +3 -3
- package/dist/chunk-UE4GLFVL.js.map +1 -0
- package/dist/cli.js +2 -2
- package/dist/cli.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +27 -27
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +217 -217
- package/dist/{token-O4PKPL7Y.js → token-S7Q7YUO6.js} +5 -5
- package/dist/token-S7Q7YUO6.js.map +1 -0
- package/dist/token-util-KCWYFARR.js +6 -0
- package/package.json +18 -18
- package/LICENSE +0 -21
- package/dist/chunk-7XYYGJAC.js.map +0 -1
- package/dist/chunk-D44RV4HN.js.map +0 -1
- package/dist/chunk-HWGALLUR.js.map +0 -1
- package/dist/token-O4PKPL7Y.js.map +0 -1
- package/dist/token-util-SOXXDYPK.js +0 -6
- /package/dist/{token-util-SOXXDYPK.js.map → token-util-KCWYFARR.js.map} +0 -0
|
@@ -34,9 +34,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
34
34
|
mod
|
|
35
35
|
));
|
|
36
36
|
|
|
37
|
-
// ../../node_modules/.
|
|
37
|
+
// ../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js
|
|
38
38
|
var require_token_error = __commonJS({
|
|
39
|
-
"../../node_modules/.
|
|
39
|
+
"../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js"(exports, module) {
|
|
40
40
|
"use strict";
|
|
41
41
|
var __defProp2 = Object.defineProperty;
|
|
42
42
|
var __getOwnPropDesc2 = Object.getOwnPropertyDescriptor;
|
|
@@ -83,4 +83,4 @@ export {
|
|
|
83
83
|
__toESM,
|
|
84
84
|
require_token_error
|
|
85
85
|
};
|
|
86
|
-
//# sourceMappingURL=chunk-
|
|
86
|
+
//# sourceMappingURL=chunk-UE4GLFVL.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js"],"sourcesContent":["\"use strict\";\nvar __defProp = Object.defineProperty;\nvar __getOwnPropDesc = Object.getOwnPropertyDescriptor;\nvar __getOwnPropNames = Object.getOwnPropertyNames;\nvar __hasOwnProp = Object.prototype.hasOwnProperty;\nvar __export = (target, all) => {\n for (var name in all)\n __defProp(target, name, { get: all[name], enumerable: true });\n};\nvar __copyProps = (to, from, except, desc) => {\n if (from && typeof from === \"object\" || typeof from === \"function\") {\n for (let key of __getOwnPropNames(from))\n if (!__hasOwnProp.call(to, key) && key !== except)\n __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });\n }\n return to;\n};\nvar __toCommonJS = (mod) => __copyProps(__defProp({}, \"__esModule\", { value: true }), mod);\nvar token_error_exports = {};\n__export(token_error_exports, {\n VercelOidcTokenError: () => VercelOidcTokenError\n});\nmodule.exports = __toCommonJS(token_error_exports);\nclass VercelOidcTokenError extends Error {\n constructor(message, cause) {\n super(message);\n this.name = \"VercelOidcTokenError\";\n this.cause = cause;\n }\n toString() {\n if (this.cause) {\n return `${this.name}: ${this.message}: ${this.cause}`;\n }\n return `${this.name}: ${this.message}`;\n }\n}\n// Annotate the CommonJS export names for ESM import in node:\n0 && (module.exports = {\n VercelOidcTokenError\n});\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AACA,QAAIA,aAAY,OAAO;AACvB,QAAIC,oBAAmB,OAAO;AAC9B,QAAIC,qBAAoB,OAAO;AAC/B,QAAIC,gBAAe,OAAO,UAAU;AACpC,QAAIC,YAAW,CAAC,QAAQ,QAAQ;AAC9B,eAAS,QAAQ;AACf,QAAAJ,WAAU,QAAQ,MAAM,EAAE,KAAK,IAAI,IAAI,GAAG,YAAY,KAAK,CAAC;AAAA,IAChE;AACA,QAAIK,eAAc,CAAC,IAAI,MAAM,QAAQ,SAAS;AAC5C,UAAI,QAAQ,OAAO,SAAS,YAAY,OAAO,SAAS,YAAY;AAClE,iBAAS,OAAOH,mBAAkB,IAAI;AACpC,cAAI,CAACC,cAAa,KAAK,IAAI,GAAG,KAAK,QAAQ;AACzC,YAAAH,WAAU,IAAI,KAAK,EAAE,KAAK,MAAM,KAAK,GAAG,GAAG,YAAY,EAAE,OAAOC,kBAAiB,MAAM,GAAG,MAAM,KAAK,WAAW,CAAC;AAAA,MACvH;AACA,aAAO;AAAA,IACT;AACA,QAAI,eAAe,CAAC,QAAQI,aAAYL,WAAU,CAAC,GAAG,cAAc,EAAE,OAAO,KAAK,CAAC,GAAG,GAAG;AACzF,QAAI,sBAAsB,CAAC;AAC3B,IAAAI,UAAS,qBAAqB;AAAA,MAC5B,sBAAsB,MAAM;AAAA,IAC9B,CAAC;AACD,WAAO,UAAU,aAAa,mBAAmB;AACjD,QAAM,uBAAN,cAAmC,MAAM;AAAA,MACvC,YAAY,SAAS,OAAO;AAC1B,cAAM,OAAO;AACb,aAAK,OAAO;AACZ,aAAK,QAAQ;AAAA,MACf;AAAA,MACA,WAAW;AACT,YAAI,KAAK,OAAO;AACd,iBAAO,GAAG,KAAK,IAAI,KAAK,KAAK,OAAO,KAAK,KAAK,KAAK;AAAA,QACrD;AACA,eAAO,GAAG,KAAK,IAAI,KAAK,KAAK,OAAO;AAAA,MACtC;AAAA,IACF;AAAA;AAAA;","names":["__defProp","__getOwnPropDesc","__getOwnPropNames","__hasOwnProp","__export","__copyProps"]}
|
package/dist/cli.js
CHANGED
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from \"./index.js\";\n\nvoid runCli();\n"],"mappings":";;;;;;;AAGA,KAAK,OAAO;","names":[]}
|
package/dist/index.js
CHANGED
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"title": "AgentV Config Schema",
|
|
4
|
-
"description": "Schema for .agentv/config.yaml configuration files",
|
|
5
|
-
"type": "object",
|
|
6
|
-
"properties": {
|
|
7
|
-
"$schema": {
|
|
8
|
-
"type": "string",
|
|
9
|
-
"description": "Schema identifier",
|
|
10
|
-
"enum": ["agentv-config-v2"]
|
|
11
|
-
},
|
|
12
|
-
"guideline_patterns": {
|
|
13
|
-
"type": "array",
|
|
14
|
-
"description": "Glob patterns for identifying guideline files (instructions, prompts). Files matching these patterns are treated as guidelines, while non-matching files are treated as regular file content.",
|
|
15
|
-
"items": {
|
|
16
|
-
"type": "string",
|
|
17
|
-
"description": "Glob pattern (e.g., '**/*.instructions.md', '**/prompts/**')"
|
|
18
|
-
},
|
|
19
|
-
"examples": [
|
|
20
|
-
["**/*.instructions.md", "**/instructions/**", "**/*.prompt.md", "**/prompts/**"],
|
|
21
|
-
["**/*.guide.md", "**/guidelines/**", "docs/AGENTS.md"]
|
|
22
|
-
]
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
"required": ["$schema"],
|
|
26
|
-
"additionalProperties": false
|
|
27
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "AgentV Config Schema",
|
|
4
|
+
"description": "Schema for .agentv/config.yaml configuration files",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"properties": {
|
|
7
|
+
"$schema": {
|
|
8
|
+
"type": "string",
|
|
9
|
+
"description": "Schema identifier",
|
|
10
|
+
"enum": ["agentv-config-v2"]
|
|
11
|
+
},
|
|
12
|
+
"guideline_patterns": {
|
|
13
|
+
"type": "array",
|
|
14
|
+
"description": "Glob patterns for identifying guideline files (instructions, prompts). Files matching these patterns are treated as guidelines, while non-matching files are treated as regular file content.",
|
|
15
|
+
"items": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"description": "Glob pattern (e.g., '**/*.instructions.md', '**/prompts/**')"
|
|
18
|
+
},
|
|
19
|
+
"examples": [
|
|
20
|
+
["**/*.instructions.md", "**/instructions/**", "**/*.prompt.md", "**/prompts/**"],
|
|
21
|
+
["**/*.guide.md", "**/guidelines/**", "docs/AGENTS.md"]
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"required": ["$schema"],
|
|
26
|
+
"additionalProperties": false
|
|
27
|
+
}
|
|
@@ -1,217 +1,217 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
-
"title": "AgentV Eval Schema",
|
|
4
|
-
"description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
|
|
5
|
-
"type": "object",
|
|
6
|
-
"properties": {
|
|
7
|
-
"$schema": {
|
|
8
|
-
"type": "string",
|
|
9
|
-
"description": "Schema identifier",
|
|
10
|
-
"enum": ["agentv-eval-v2"]
|
|
11
|
-
},
|
|
12
|
-
"description": {
|
|
13
|
-
"type": "string",
|
|
14
|
-
"description": "Description of what this eval suite covers"
|
|
15
|
-
},
|
|
16
|
-
"target": {
|
|
17
|
-
"type": "string",
|
|
18
|
-
"description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
|
|
19
|
-
},
|
|
20
|
-
"execution": {
|
|
21
|
-
"type": "object",
|
|
22
|
-
"description": "Default execution configuration for all eval cases (can be overridden per case)",
|
|
23
|
-
"properties": {
|
|
24
|
-
"target": {
|
|
25
|
-
"type": "string",
|
|
26
|
-
"description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
|
|
27
|
-
},
|
|
28
|
-
"evaluators": {
|
|
29
|
-
"type": "array",
|
|
30
|
-
"description": "Default evaluators for all eval cases (code-based and LLM judges)",
|
|
31
|
-
"items": {
|
|
32
|
-
"type": "object",
|
|
33
|
-
"properties": {
|
|
34
|
-
"name": {
|
|
35
|
-
"type": "string",
|
|
36
|
-
"description": "Evaluator name/identifier"
|
|
37
|
-
},
|
|
38
|
-
"type": {
|
|
39
|
-
"type": "string",
|
|
40
|
-
"enum": ["code", "llm_judge"],
|
|
41
|
-
"description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
|
|
42
|
-
},
|
|
43
|
-
"script": {
|
|
44
|
-
"type": "string",
|
|
45
|
-
"description": "Path to evaluator script (for type: code)"
|
|
46
|
-
},
|
|
47
|
-
"prompt": {
|
|
48
|
-
"type": "string",
|
|
49
|
-
"description": "Path to judge prompt file (for type: llm_judge)"
|
|
50
|
-
}
|
|
51
|
-
},
|
|
52
|
-
"required": ["name", "type"],
|
|
53
|
-
"additionalProperties": true
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
},
|
|
57
|
-
"additionalProperties": true
|
|
58
|
-
},
|
|
59
|
-
"evalcases": {
|
|
60
|
-
"type": "array",
|
|
61
|
-
"description": "Array of evaluation cases",
|
|
62
|
-
"minItems": 1,
|
|
63
|
-
"items": {
|
|
64
|
-
"type": "object",
|
|
65
|
-
"properties": {
|
|
66
|
-
"id": {
|
|
67
|
-
"type": "string",
|
|
68
|
-
"description": "Unique identifier for the eval case"
|
|
69
|
-
},
|
|
70
|
-
"conversation_id": {
|
|
71
|
-
"type": "string",
|
|
72
|
-
"description": "Optional conversation identifier for threading multiple eval cases together"
|
|
73
|
-
},
|
|
74
|
-
"outcome": {
|
|
75
|
-
"type": "string",
|
|
76
|
-
"description": "Description of what the AI should accomplish in this eval"
|
|
77
|
-
},
|
|
78
|
-
"note": {
|
|
79
|
-
"type": "string",
|
|
80
|
-
"description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
|
|
81
|
-
},
|
|
82
|
-
"input_messages": {
|
|
83
|
-
"type": "array",
|
|
84
|
-
"description": "Input messages for the conversation",
|
|
85
|
-
"minItems": 1,
|
|
86
|
-
"items": {
|
|
87
|
-
"type": "object",
|
|
88
|
-
"properties": {
|
|
89
|
-
"role": {
|
|
90
|
-
"type": "string",
|
|
91
|
-
"enum": ["system", "user", "assistant", "tool"],
|
|
92
|
-
"description": "Message role"
|
|
93
|
-
},
|
|
94
|
-
"content": {
|
|
95
|
-
"oneOf": [
|
|
96
|
-
{
|
|
97
|
-
"type": "string",
|
|
98
|
-
"description": "Simple text content"
|
|
99
|
-
},
|
|
100
|
-
{
|
|
101
|
-
"type": "array",
|
|
102
|
-
"description": "Mixed content items (text and file references)",
|
|
103
|
-
"items": {
|
|
104
|
-
"type": "object",
|
|
105
|
-
"properties": {
|
|
106
|
-
"type": {
|
|
107
|
-
"type": "string",
|
|
108
|
-
"enum": ["text", "file"],
|
|
109
|
-
"description": "Content type: 'text' for inline content, 'file' for file references"
|
|
110
|
-
},
|
|
111
|
-
"value": {
|
|
112
|
-
"type": "string",
|
|
113
|
-
"description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
|
|
114
|
-
}
|
|
115
|
-
},
|
|
116
|
-
"required": ["type", "value"],
|
|
117
|
-
"additionalProperties": false
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
]
|
|
121
|
-
}
|
|
122
|
-
},
|
|
123
|
-
"required": ["role", "content"],
|
|
124
|
-
"additionalProperties": false
|
|
125
|
-
}
|
|
126
|
-
},
|
|
127
|
-
"expected_messages": {
|
|
128
|
-
"type": "array",
|
|
129
|
-
"description": "Expected response messages",
|
|
130
|
-
"minItems": 1,
|
|
131
|
-
"items": {
|
|
132
|
-
"type": "object",
|
|
133
|
-
"properties": {
|
|
134
|
-
"role": {
|
|
135
|
-
"type": "string",
|
|
136
|
-
"enum": ["system", "user", "assistant", "tool"],
|
|
137
|
-
"description": "Message role"
|
|
138
|
-
},
|
|
139
|
-
"content": {
|
|
140
|
-
"oneOf": [
|
|
141
|
-
{
|
|
142
|
-
"type": "string",
|
|
143
|
-
"description": "Simple text content"
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
"type": "array",
|
|
147
|
-
"description": "Mixed content items",
|
|
148
|
-
"items": {
|
|
149
|
-
"type": "object",
|
|
150
|
-
"properties": {
|
|
151
|
-
"type": {
|
|
152
|
-
"type": "string",
|
|
153
|
-
"enum": ["text", "file"]
|
|
154
|
-
},
|
|
155
|
-
"value": {
|
|
156
|
-
"type": "string"
|
|
157
|
-
}
|
|
158
|
-
},
|
|
159
|
-
"required": ["type", "value"],
|
|
160
|
-
"additionalProperties": false
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
]
|
|
164
|
-
}
|
|
165
|
-
},
|
|
166
|
-
"required": ["role", "content"],
|
|
167
|
-
"additionalProperties": false
|
|
168
|
-
}
|
|
169
|
-
},
|
|
170
|
-
"execution": {
|
|
171
|
-
"type": "object",
|
|
172
|
-
"description": "Per-case execution configuration",
|
|
173
|
-
"properties": {
|
|
174
|
-
"target": {
|
|
175
|
-
"type": "string",
|
|
176
|
-
"description": "Override target for this specific eval case"
|
|
177
|
-
},
|
|
178
|
-
"evaluators": {
|
|
179
|
-
"type": "array",
|
|
180
|
-
"description": "Multiple evaluators (code-based and LLM judges)",
|
|
181
|
-
"items": {
|
|
182
|
-
"type": "object",
|
|
183
|
-
"properties": {
|
|
184
|
-
"name": {
|
|
185
|
-
"type": "string",
|
|
186
|
-
"description": "Evaluator name/identifier"
|
|
187
|
-
},
|
|
188
|
-
"type": {
|
|
189
|
-
"type": "string",
|
|
190
|
-
"enum": ["code", "llm_judge"],
|
|
191
|
-
"description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
|
|
192
|
-
},
|
|
193
|
-
"script": {
|
|
194
|
-
"type": "string",
|
|
195
|
-
"description": "Path to evaluator script (for type: code)"
|
|
196
|
-
},
|
|
197
|
-
"prompt": {
|
|
198
|
-
"type": "string",
|
|
199
|
-
"description": "Path to judge prompt file (for type: llm_judge)"
|
|
200
|
-
}
|
|
201
|
-
},
|
|
202
|
-
"required": ["name", "type"],
|
|
203
|
-
"additionalProperties": true
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
},
|
|
207
|
-
"additionalProperties": true
|
|
208
|
-
}
|
|
209
|
-
},
|
|
210
|
-
"required": ["id", "outcome", "input_messages", "expected_messages"],
|
|
211
|
-
"additionalProperties": false
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
},
|
|
215
|
-
"required": ["evalcases"],
|
|
216
|
-
"additionalProperties": false
|
|
217
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "AgentV Eval Schema",
|
|
4
|
+
"description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"properties": {
|
|
7
|
+
"$schema": {
|
|
8
|
+
"type": "string",
|
|
9
|
+
"description": "Schema identifier",
|
|
10
|
+
"enum": ["agentv-eval-v2"]
|
|
11
|
+
},
|
|
12
|
+
"description": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Description of what this eval suite covers"
|
|
15
|
+
},
|
|
16
|
+
"target": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
|
|
19
|
+
},
|
|
20
|
+
"execution": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "Default execution configuration for all eval cases (can be overridden per case)",
|
|
23
|
+
"properties": {
|
|
24
|
+
"target": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
|
|
27
|
+
},
|
|
28
|
+
"evaluators": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"description": "Default evaluators for all eval cases (code-based and LLM judges)",
|
|
31
|
+
"items": {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"properties": {
|
|
34
|
+
"name": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"description": "Evaluator name/identifier"
|
|
37
|
+
},
|
|
38
|
+
"type": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"enum": ["code", "llm_judge"],
|
|
41
|
+
"description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
|
|
42
|
+
},
|
|
43
|
+
"script": {
|
|
44
|
+
"type": "string",
|
|
45
|
+
"description": "Path to evaluator script (for type: code)"
|
|
46
|
+
},
|
|
47
|
+
"prompt": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"description": "Path to judge prompt file (for type: llm_judge)"
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"required": ["name", "type"],
|
|
53
|
+
"additionalProperties": true
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
"additionalProperties": true
|
|
58
|
+
},
|
|
59
|
+
"evalcases": {
|
|
60
|
+
"type": "array",
|
|
61
|
+
"description": "Array of evaluation cases",
|
|
62
|
+
"minItems": 1,
|
|
63
|
+
"items": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {
|
|
66
|
+
"id": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"description": "Unique identifier for the eval case"
|
|
69
|
+
},
|
|
70
|
+
"conversation_id": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"description": "Optional conversation identifier for threading multiple eval cases together"
|
|
73
|
+
},
|
|
74
|
+
"outcome": {
|
|
75
|
+
"type": "string",
|
|
76
|
+
"description": "Description of what the AI should accomplish in this eval"
|
|
77
|
+
},
|
|
78
|
+
"note": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
|
|
81
|
+
},
|
|
82
|
+
"input_messages": {
|
|
83
|
+
"type": "array",
|
|
84
|
+
"description": "Input messages for the conversation",
|
|
85
|
+
"minItems": 1,
|
|
86
|
+
"items": {
|
|
87
|
+
"type": "object",
|
|
88
|
+
"properties": {
|
|
89
|
+
"role": {
|
|
90
|
+
"type": "string",
|
|
91
|
+
"enum": ["system", "user", "assistant", "tool"],
|
|
92
|
+
"description": "Message role"
|
|
93
|
+
},
|
|
94
|
+
"content": {
|
|
95
|
+
"oneOf": [
|
|
96
|
+
{
|
|
97
|
+
"type": "string",
|
|
98
|
+
"description": "Simple text content"
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"type": "array",
|
|
102
|
+
"description": "Mixed content items (text and file references)",
|
|
103
|
+
"items": {
|
|
104
|
+
"type": "object",
|
|
105
|
+
"properties": {
|
|
106
|
+
"type": {
|
|
107
|
+
"type": "string",
|
|
108
|
+
"enum": ["text", "file"],
|
|
109
|
+
"description": "Content type: 'text' for inline content, 'file' for file references"
|
|
110
|
+
},
|
|
111
|
+
"value": {
|
|
112
|
+
"type": "string",
|
|
113
|
+
"description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
"required": ["type", "value"],
|
|
117
|
+
"additionalProperties": false
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
]
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
"required": ["role", "content"],
|
|
124
|
+
"additionalProperties": false
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"expected_messages": {
|
|
128
|
+
"type": "array",
|
|
129
|
+
"description": "Expected response messages",
|
|
130
|
+
"minItems": 1,
|
|
131
|
+
"items": {
|
|
132
|
+
"type": "object",
|
|
133
|
+
"properties": {
|
|
134
|
+
"role": {
|
|
135
|
+
"type": "string",
|
|
136
|
+
"enum": ["system", "user", "assistant", "tool"],
|
|
137
|
+
"description": "Message role"
|
|
138
|
+
},
|
|
139
|
+
"content": {
|
|
140
|
+
"oneOf": [
|
|
141
|
+
{
|
|
142
|
+
"type": "string",
|
|
143
|
+
"description": "Simple text content"
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"type": "array",
|
|
147
|
+
"description": "Mixed content items",
|
|
148
|
+
"items": {
|
|
149
|
+
"type": "object",
|
|
150
|
+
"properties": {
|
|
151
|
+
"type": {
|
|
152
|
+
"type": "string",
|
|
153
|
+
"enum": ["text", "file"]
|
|
154
|
+
},
|
|
155
|
+
"value": {
|
|
156
|
+
"type": "string"
|
|
157
|
+
}
|
|
158
|
+
},
|
|
159
|
+
"required": ["type", "value"],
|
|
160
|
+
"additionalProperties": false
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
]
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
"required": ["role", "content"],
|
|
167
|
+
"additionalProperties": false
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
"execution": {
|
|
171
|
+
"type": "object",
|
|
172
|
+
"description": "Per-case execution configuration",
|
|
173
|
+
"properties": {
|
|
174
|
+
"target": {
|
|
175
|
+
"type": "string",
|
|
176
|
+
"description": "Override target for this specific eval case"
|
|
177
|
+
},
|
|
178
|
+
"evaluators": {
|
|
179
|
+
"type": "array",
|
|
180
|
+
"description": "Multiple evaluators (code-based and LLM judges)",
|
|
181
|
+
"items": {
|
|
182
|
+
"type": "object",
|
|
183
|
+
"properties": {
|
|
184
|
+
"name": {
|
|
185
|
+
"type": "string",
|
|
186
|
+
"description": "Evaluator name/identifier"
|
|
187
|
+
},
|
|
188
|
+
"type": {
|
|
189
|
+
"type": "string",
|
|
190
|
+
"enum": ["code", "llm_judge"],
|
|
191
|
+
"description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
|
|
192
|
+
},
|
|
193
|
+
"script": {
|
|
194
|
+
"type": "string",
|
|
195
|
+
"description": "Path to evaluator script (for type: code)"
|
|
196
|
+
},
|
|
197
|
+
"prompt": {
|
|
198
|
+
"type": "string",
|
|
199
|
+
"description": "Path to judge prompt file (for type: llm_judge)"
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
"required": ["name", "type"],
|
|
203
|
+
"additionalProperties": true
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
"additionalProperties": true
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"required": ["id", "outcome", "input_messages", "expected_messages"],
|
|
211
|
+
"additionalProperties": false
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
"required": ["evalcases"],
|
|
216
|
+
"additionalProperties": false
|
|
217
|
+
}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_token_util
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-BRXRFISD.js";
|
|
4
4
|
import {
|
|
5
5
|
__commonJS,
|
|
6
6
|
require_token_error
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-UE4GLFVL.js";
|
|
8
8
|
|
|
9
|
-
// ../../node_modules/.
|
|
9
|
+
// ../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token.js
|
|
10
10
|
var require_token = __commonJS({
|
|
11
|
-
"../../node_modules/.
|
|
11
|
+
"../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token.js"(exports, module) {
|
|
12
12
|
var __defProp = Object.defineProperty;
|
|
13
13
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
14
14
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
@@ -60,4 +60,4 @@ var require_token = __commonJS({
|
|
|
60
60
|
}
|
|
61
61
|
});
|
|
62
62
|
export default require_token();
|
|
63
|
-
//# sourceMappingURL=token-
|
|
63
|
+
//# sourceMappingURL=token-S7Q7YUO6.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token.js"],"sourcesContent":["\"use strict\";\nvar __defProp = Object.defineProperty;\nvar __getOwnPropDesc = Object.getOwnPropertyDescriptor;\nvar __getOwnPropNames = Object.getOwnPropertyNames;\nvar __hasOwnProp = Object.prototype.hasOwnProperty;\nvar __export = (target, all) => {\n for (var name in all)\n __defProp(target, name, { get: all[name], enumerable: true });\n};\nvar __copyProps = (to, from, except, desc) => {\n if (from && typeof from === \"object\" || typeof from === \"function\") {\n for (let key of __getOwnPropNames(from))\n if (!__hasOwnProp.call(to, key) && key !== except)\n __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });\n }\n return to;\n};\nvar __toCommonJS = (mod) => __copyProps(__defProp({}, \"__esModule\", { value: true }), mod);\nvar token_exports = {};\n__export(token_exports, {\n refreshToken: () => refreshToken\n});\nmodule.exports = __toCommonJS(token_exports);\nvar import_token_error = require(\"./token-error\");\nvar import_token_util = require(\"./token-util\");\nasync function refreshToken() {\n const { projectId, teamId } = (0, import_token_util.findProjectInfo)();\n let maybeToken = (0, import_token_util.loadToken)(projectId);\n if (!maybeToken || (0, import_token_util.isExpired)((0, import_token_util.getTokenPayload)(maybeToken.token))) {\n const authToken = (0, import_token_util.getVercelCliToken)();\n if (!authToken) {\n throw new import_token_error.VercelOidcTokenError(\n \"Failed to refresh OIDC token: login to vercel cli\"\n );\n }\n if (!projectId) {\n throw new import_token_error.VercelOidcTokenError(\n \"Failed to refresh OIDC token: project id not found\"\n );\n }\n maybeToken = await (0, import_token_util.getVercelOidcToken)(authToken, projectId, teamId);\n if (!maybeToken) {\n throw new import_token_error.VercelOidcTokenError(\"Failed to refresh OIDC token\");\n }\n (0, import_token_util.saveToken)(maybeToken, projectId);\n }\n process.env.VERCEL_OIDC_TOKEN = maybeToken.token;\n return;\n}\n// Annotate the CommonJS export names for ESM import in node:\n0 && (module.exports = {\n refreshToken\n});\n"],"mappings":";;;;;;;;;AAAA;AAAA;AACA,QAAI,YAAY,OAAO;AACvB,QAAI,mBAAmB,OAAO;AAC9B,QAAI,oBAAoB,OAAO;AAC/B,QAAI,eAAe,OAAO,UAAU;AACpC,QAAI,WAAW,CAAC,QAAQ,QAAQ;AAC9B,eAAS,QAAQ;AACf,kBAAU,QAAQ,MAAM,EAAE,KAAK,IAAI,IAAI,GAAG,YAAY,KAAK,CAAC;AAAA,IAChE;AACA,QAAI,cAAc,CAAC,IAAI,MAAM,QAAQ,SAAS;AAC5C,UAAI,QAAQ,OAAO,SAAS,YAAY,OAAO,SAAS,YAAY;AAClE,iBAAS,OAAO,kBAAkB,IAAI;AACpC,cAAI,CAAC,aAAa,KAAK,IAAI,GAAG,KAAK,QAAQ;AACzC,sBAAU,IAAI,KAAK,EAAE,KAAK,MAAM,KAAK,GAAG,GAAG,YAAY,EAAE,OAAO,iBAAiB,MAAM,GAAG,MAAM,KAAK,WAAW,CAAC;AAAA,MACvH;AACA,aAAO;AAAA,IACT;AACA,QAAI,eAAe,CAAC,QAAQ,YAAY,UAAU,CAAC,GAAG,cAAc,EAAE,OAAO,KAAK,CAAC,GAAG,GAAG;AACzF,QAAI,gBAAgB,CAAC;AACrB,aAAS,eAAe;AAAA,MACtB,cAAc,MAAM;AAAA,IACtB,CAAC;AACD,WAAO,UAAU,aAAa,aAAa;AAC3C,QAAI,qBAAqB;AACzB,QAAI,oBAAoB;AACxB,mBAAe,eAAe;AAC5B,YAAM,EAAE,WAAW,OAAO,KAAK,GAAG,kBAAkB,iBAAiB;AACrE,UAAI,cAAc,GAAG,kBAAkB,WAAW,SAAS;AAC3D,UAAI,CAAC,eAAe,GAAG,kBAAkB,YAAY,GAAG,kBAAkB,iBAAiB,WAAW,KAAK,CAAC,GAAG;AAC7G,cAAM,aAAa,GAAG,kBAAkB,mBAAmB;AAC3D,YAAI,CAAC,WAAW;AACd,gBAAM,IAAI,mBAAmB;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AACA,YAAI,CAAC,WAAW;AACd,gBAAM,IAAI,mBAAmB;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AACA,qBAAa,OAAO,GAAG,kBAAkB,oBAAoB,WAAW,WAAW,MAAM;AACzF,YAAI,CAAC,YAAY;AACf,gBAAM,IAAI,mBAAmB,qBAAqB,8BAA8B;AAAA,QAClF;AACA,SAAC,GAAG,kBAAkB,WAAW,YAAY,SAAS;AAAA,MACxD;AACA,cAAQ,IAAI,oBAAoB,WAAW;AAC3C;AAAA,IACF;AAAA;AAAA;","names":[]}
|