agentv 2.5.4 → 2.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -6
- package/dist/{chunk-LTPZBEJU.js → chunk-BKMQNEUD.js} +9 -3
- package/dist/{chunk-LTPZBEJU.js.map → chunk-BKMQNEUD.js.map} +1 -1
- package/dist/{chunk-A7TQUSVG.js → chunk-LJVS3JAK.js} +2 -2
- package/dist/{chunk-I4EMT5Q2.js → chunk-LTEYARQG.js} +297 -112
- package/dist/chunk-LTEYARQG.js.map +1 -0
- package/dist/cli.js +2 -2
- package/dist/index.js +2 -2
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +123 -244
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +56 -271
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +55 -180
- package/dist/{token-DVVSDOYP.js → token-D3IYDJQZ.js} +3 -3
- package/dist/{token-util-YEKFTEJA.js → token-util-FWFPR2BV.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-I4EMT5Q2.js.map +0 -1
- /package/dist/{chunk-A7TQUSVG.js.map → chunk-LJVS3JAK.js.map} +0 -0
- /package/dist/{token-DVVSDOYP.js.map → token-D3IYDJQZ.js.map} +0 -0
- /package/dist/{token-util-YEKFTEJA.js.map → token-util-FWFPR2BV.js.map} +0 -0
package/README.md
CHANGED
|
@@ -184,7 +184,7 @@ execution:
|
|
|
184
184
|
script: ./validators/check_answer.py
|
|
185
185
|
```
|
|
186
186
|
|
|
187
|
-
For complete templates, examples, and evaluator patterns, see: [custom-evaluators
|
|
187
|
+
For complete templates, examples, and evaluator patterns, see: [custom-evaluators](https://agentv.dev/evaluators/custom-evaluators/)
|
|
188
188
|
|
|
189
189
|
### Compare Evaluation Results
|
|
190
190
|
|
|
@@ -238,7 +238,7 @@ Write validators in any language (Python, TypeScript, Node, etc.):
|
|
|
238
238
|
```
|
|
239
239
|
|
|
240
240
|
For complete examples and patterns, see:
|
|
241
|
-
- [custom-evaluators
|
|
241
|
+
- [custom-evaluators](https://agentv.dev/evaluators/custom-evaluators/)
|
|
242
242
|
- [code-judge-sdk example](examples/features/code-judge-sdk)
|
|
243
243
|
|
|
244
244
|
### LLM Judges
|
|
@@ -281,7 +281,7 @@ Auto-generate rubrics from expected outcomes:
|
|
|
281
281
|
agentv generate rubrics evals/my-eval.yaml
|
|
282
282
|
```
|
|
283
283
|
|
|
284
|
-
See [rubric
|
|
284
|
+
See [rubric evaluator](https://agentv.dev/evaluation/rubrics/) for detailed patterns.
|
|
285
285
|
|
|
286
286
|
## Advanced Configuration
|
|
287
287
|
|
|
@@ -310,9 +310,15 @@ Automatically retries on rate limits, transient 5xx errors, and network failures
|
|
|
310
310
|
- AI agents: Ask Claude Code to `/agentv-eval-builder` to create and iterate on evals
|
|
311
311
|
|
|
312
312
|
**Detailed Guides:**
|
|
313
|
-
- [Evaluation format and structure](.
|
|
314
|
-
- [Custom evaluators](.
|
|
315
|
-
- [
|
|
313
|
+
- [Evaluation format and structure](https://agentv.dev/evaluation/eval-files/)
|
|
314
|
+
- [Custom evaluators](https://agentv.dev/evaluators/custom-evaluators/)
|
|
315
|
+
- [Rubric evaluator](https://agentv.dev/evaluation/rubrics/)
|
|
316
|
+
- [Composite evaluator](https://agentv.dev/evaluators/composite/)
|
|
317
|
+
- [Tool trajectory evaluator](https://agentv.dev/evaluators/tool-trajectory/)
|
|
318
|
+
- [Structured data evaluators](https://agentv.dev/evaluators/structured-data/)
|
|
319
|
+
- [Batch CLI evaluation](https://agentv.dev/evaluation/batch-cli/)
|
|
320
|
+
- [Compare results](https://agentv.dev/tools/compare/)
|
|
321
|
+
- [Example evaluations](https://agentv.dev/evaluation/examples/)
|
|
316
322
|
|
|
317
323
|
**Reference:**
|
|
318
324
|
- Monorepo structure: `packages/core/` (engine), `packages/eval/` (evaluation logic), `apps/cli/` (commands)
|
|
@@ -11,6 +11,9 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
|
|
|
11
11
|
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
12
12
|
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
13
13
|
});
|
|
14
|
+
var __esm = (fn, res) => function __init() {
|
|
15
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
16
|
+
};
|
|
14
17
|
var __commonJS = (cb, mod) => function __require2() {
|
|
15
18
|
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
16
19
|
};
|
|
@@ -34,6 +37,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
34
37
|
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
35
38
|
mod
|
|
36
39
|
));
|
|
40
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
37
41
|
|
|
38
42
|
// ../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js
|
|
39
43
|
var require_token_error = __commonJS({
|
|
@@ -55,12 +59,12 @@ var require_token_error = __commonJS({
|
|
|
55
59
|
}
|
|
56
60
|
return to;
|
|
57
61
|
};
|
|
58
|
-
var
|
|
62
|
+
var __toCommonJS2 = (mod) => __copyProps2(__defProp2({}, "__esModule", { value: true }), mod);
|
|
59
63
|
var token_error_exports = {};
|
|
60
64
|
__export2(token_error_exports, {
|
|
61
65
|
VercelOidcTokenError: () => VercelOidcTokenError
|
|
62
66
|
});
|
|
63
|
-
module.exports =
|
|
67
|
+
module.exports = __toCommonJS2(token_error_exports);
|
|
64
68
|
var VercelOidcTokenError = class extends Error {
|
|
65
69
|
constructor(message, cause) {
|
|
66
70
|
super(message);
|
|
@@ -79,9 +83,11 @@ var require_token_error = __commonJS({
|
|
|
79
83
|
|
|
80
84
|
export {
|
|
81
85
|
__require,
|
|
86
|
+
__esm,
|
|
82
87
|
__commonJS,
|
|
83
88
|
__export,
|
|
84
89
|
__toESM,
|
|
90
|
+
__toCommonJS,
|
|
85
91
|
require_token_error
|
|
86
92
|
};
|
|
87
|
-
//# sourceMappingURL=chunk-
|
|
93
|
+
//# sourceMappingURL=chunk-BKMQNEUD.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js"],"sourcesContent":["\"use strict\";\nvar __defProp = Object.defineProperty;\nvar __getOwnPropDesc = Object.getOwnPropertyDescriptor;\nvar __getOwnPropNames = Object.getOwnPropertyNames;\nvar __hasOwnProp = Object.prototype.hasOwnProperty;\nvar __export = (target, all) => {\n for (var name in all)\n __defProp(target, name, { get: all[name], enumerable: true });\n};\nvar __copyProps = (to, from, except, desc) => {\n if (from && typeof from === \"object\" || typeof from === \"function\") {\n for (let key of __getOwnPropNames(from))\n if (!__hasOwnProp.call(to, key) && key !== except)\n __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });\n }\n return to;\n};\nvar __toCommonJS = (mod) => __copyProps(__defProp({}, \"__esModule\", { value: true }), mod);\nvar token_error_exports = {};\n__export(token_error_exports, {\n VercelOidcTokenError: () => VercelOidcTokenError\n});\nmodule.exports = __toCommonJS(token_error_exports);\nclass VercelOidcTokenError extends Error {\n constructor(message, cause) {\n super(message);\n this.name = \"VercelOidcTokenError\";\n this.cause = cause;\n }\n toString() {\n if (this.cause) {\n return `${this.name}: ${this.message}: ${this.cause}`;\n }\n return `${this.name}: ${this.message}`;\n }\n}\n// Annotate the CommonJS export names for ESM import in node:\n0 && (module.exports = {\n VercelOidcTokenError\n});\n"],"mappings":"
|
|
1
|
+
{"version":3,"sources":["../../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-error.js"],"sourcesContent":["\"use strict\";\nvar __defProp = Object.defineProperty;\nvar __getOwnPropDesc = Object.getOwnPropertyDescriptor;\nvar __getOwnPropNames = Object.getOwnPropertyNames;\nvar __hasOwnProp = Object.prototype.hasOwnProperty;\nvar __export = (target, all) => {\n for (var name in all)\n __defProp(target, name, { get: all[name], enumerable: true });\n};\nvar __copyProps = (to, from, except, desc) => {\n if (from && typeof from === \"object\" || typeof from === \"function\") {\n for (let key of __getOwnPropNames(from))\n if (!__hasOwnProp.call(to, key) && key !== except)\n __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });\n }\n return to;\n};\nvar __toCommonJS = (mod) => __copyProps(__defProp({}, \"__esModule\", { value: true }), mod);\nvar token_error_exports = {};\n__export(token_error_exports, {\n VercelOidcTokenError: () => VercelOidcTokenError\n});\nmodule.exports = __toCommonJS(token_error_exports);\nclass VercelOidcTokenError extends Error {\n constructor(message, cause) {\n super(message);\n this.name = \"VercelOidcTokenError\";\n this.cause = cause;\n }\n toString() {\n if (this.cause) {\n return `${this.name}: ${this.message}: ${this.cause}`;\n }\n return `${this.name}: ${this.message}`;\n }\n}\n// Annotate the CommonJS export names for ESM import in node:\n0 && (module.exports = {\n VercelOidcTokenError\n});\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AACA,QAAIA,aAAY,OAAO;AACvB,QAAIC,oBAAmB,OAAO;AAC9B,QAAIC,qBAAoB,OAAO;AAC/B,QAAIC,gBAAe,OAAO,UAAU;AACpC,QAAIC,YAAW,CAAC,QAAQ,QAAQ;AAC9B,eAAS,QAAQ;AACf,QAAAJ,WAAU,QAAQ,MAAM,EAAE,KAAK,IAAI,IAAI,GAAG,YAAY,KAAK,CAAC;AAAA,IAChE;AACA,QAAIK,eAAc,CAAC,IAAI,MAAM,QAAQ,SAAS;AAC5C,UAAI,QAAQ,OAAO,SAAS,YAAY,OAAO,SAAS,YAAY;AAClE,iBAAS,OAAOH,mBAAkB,IAAI;AACpC,cAAI,CAACC,cAAa,KAAK,IAAI,GAAG,KAAK,QAAQ;AACzC,YAAAH,WAAU,IAAI,KAAK,EAAE,KAAK,MAAM,KAAK,GAAG,GAAG,YAAY,EAAE,OAAOC,kBAAiB,MAAM,GAAG,MAAM,KAAK,WAAW,CAAC;AAAA,MACvH;AACA,aAAO;AAAA,IACT;AACA,QAAIK,gBAAe,CAAC,QAAQD,aAAYL,WAAU,CAAC,GAAG,cAAc,EAAE,OAAO,KAAK,CAAC,GAAG,GAAG;AACzF,QAAI,sBAAsB,CAAC;AAC3B,IAAAI,UAAS,qBAAqB;AAAA,MAC5B,sBAAsB,MAAM;AAAA,IAC9B,CAAC;AACD,WAAO,UAAUE,cAAa,mBAAmB;AACjD,QAAM,uBAAN,cAAmC,MAAM;AAAA,MACvC,YAAY,SAAS,OAAO;AAC1B,cAAM,OAAO;AACb,aAAK,OAAO;AACZ,aAAK,QAAQ;AAAA,MACf;AAAA,MACA,WAAW;AACT,YAAI,KAAK,OAAO;AACd,iBAAO,GAAG,KAAK,IAAI,KAAK,KAAK,OAAO,KAAK,KAAK,KAAK;AAAA,QACrD;AACA,eAAO,GAAG,KAAK,IAAI,KAAK,KAAK,OAAO;AAAA,MACtC;AAAA,IACF;AAAA;AAAA;","names":["__defProp","__getOwnPropDesc","__getOwnPropNames","__hasOwnProp","__export","__copyProps","__toCommonJS"]}
|
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
__commonJS,
|
|
4
4
|
__require,
|
|
5
5
|
require_token_error
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-BKMQNEUD.js";
|
|
7
7
|
|
|
8
8
|
// ../../node_modules/.bun/@vercel+oidc@3.0.5/node_modules/@vercel/oidc/dist/token-io.js
|
|
9
9
|
var require_token_io = __commonJS({
|
|
@@ -256,4 +256,4 @@ var require_token_util = __commonJS({
|
|
|
256
256
|
export {
|
|
257
257
|
require_token_util
|
|
258
258
|
};
|
|
259
|
-
//# sourceMappingURL=chunk-
|
|
259
|
+
//# sourceMappingURL=chunk-LJVS3JAK.js.map
|