agentv 2.13.0 → 2.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -14
- package/dist/{chunk-UWDI4UVN.js → chunk-5646K2XJ.js} +15 -14
- package/dist/{chunk-UWDI4UVN.js.map → chunk-5646K2XJ.js.map} +1 -1
- package/dist/{chunk-FSBZM3HT.js → chunk-OQN2GDEU.js} +188 -162
- package/dist/chunk-OQN2GDEU.js.map +1 -0
- package/dist/{chunk-M6JYP6A6.js → chunk-YVWP4Z3W.js} +26 -26
- package/dist/chunk-YVWP4Z3W.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-CCUHG3SN.js → dist-QR5OZ4DH.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-P3D5O673.js → interactive-Z6ZV5OGM.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-FSBZM3HT.js.map +0 -1
- package/dist/chunk-M6JYP6A6.js.map +0 -1
- /package/dist/{dist-CCUHG3SN.js.map → dist-QR5OZ4DH.js.map} +0 -0
- /package/dist/{interactive-P3D5O673.js.map → interactive-Z6ZV5OGM.js.map} +0 -0
package/README.md
CHANGED
|
@@ -60,7 +60,7 @@ tests:
|
|
|
60
60
|
|
|
61
61
|
assert:
|
|
62
62
|
- name: math_check
|
|
63
|
-
type:
|
|
63
|
+
type: code-judge
|
|
64
64
|
script: ./validators/check_math.py
|
|
65
65
|
```
|
|
66
66
|
|
|
@@ -154,10 +154,10 @@ Optional sidecar YAML metadata file (`dataset.eval.yaml` alongside `dataset.json
|
|
|
154
154
|
description: Math evaluation dataset
|
|
155
155
|
dataset: math-tests
|
|
156
156
|
execution:
|
|
157
|
-
target:
|
|
157
|
+
target: azure-base
|
|
158
158
|
assert:
|
|
159
159
|
- name: correctness
|
|
160
|
-
type:
|
|
160
|
+
type: llm-judge
|
|
161
161
|
prompt: ./judges/correctness.md
|
|
162
162
|
```
|
|
163
163
|
|
|
@@ -175,7 +175,7 @@ agentv validate evals/my-eval.yaml
|
|
|
175
175
|
agentv eval evals/my-eval.yaml
|
|
176
176
|
|
|
177
177
|
# Override target
|
|
178
|
-
agentv eval --target
|
|
178
|
+
agentv eval --target azure-base evals/**/*.yaml
|
|
179
179
|
|
|
180
180
|
# Run specific test
|
|
181
181
|
agentv eval --test-id case-123 evals/my-eval.yaml
|
|
@@ -219,7 +219,7 @@ Reference evaluators in your eval file:
|
|
|
219
219
|
```yaml
|
|
220
220
|
assert:
|
|
221
221
|
- name: my_validator
|
|
222
|
-
type:
|
|
222
|
+
type: code-judge
|
|
223
223
|
script: ./validators/check_answer.py
|
|
224
224
|
```
|
|
225
225
|
|
|
@@ -339,7 +339,7 @@ Define execution targets in `.agentv/targets.yaml` to decouple evals from provid
|
|
|
339
339
|
|
|
340
340
|
```yaml
|
|
341
341
|
targets:
|
|
342
|
-
- name:
|
|
342
|
+
- name: azure-base
|
|
343
343
|
provider: azure
|
|
344
344
|
endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
|
|
345
345
|
api_key: ${{ AZURE_OPENAI_API_KEY }}
|
|
@@ -348,12 +348,12 @@ targets:
|
|
|
348
348
|
- name: vscode_dev
|
|
349
349
|
provider: vscode
|
|
350
350
|
workspace_template: ${{ WORKSPACE_PATH }}
|
|
351
|
-
judge_target:
|
|
351
|
+
judge_target: azure-base
|
|
352
352
|
|
|
353
353
|
- name: local_agent
|
|
354
354
|
provider: cli
|
|
355
355
|
command: 'python agent.py --prompt-file {PROMPT_FILE} --output {OUTPUT_FILE}'
|
|
356
|
-
judge_target:
|
|
356
|
+
judge_target: azure-base
|
|
357
357
|
```
|
|
358
358
|
|
|
359
359
|
Supports: `azure`, `anthropic`, `gemini`, `codex`, `copilot`, `pi-coding-agent`, `claude`, `vscode`, `vscode-insiders`, `cli`, and `mock`.
|
|
@@ -398,12 +398,12 @@ All assertions support `weight`, `required`, and `negate` flags. Use `negate: tr
|
|
|
398
398
|
```yaml
|
|
399
399
|
assert:
|
|
400
400
|
# Case-insensitive matching for natural language variation
|
|
401
|
-
- type:
|
|
401
|
+
- type: icontains-any
|
|
402
402
|
value: ["missing rule code", "need rule code", "provide rule code"]
|
|
403
403
|
required: true
|
|
404
404
|
|
|
405
405
|
# Multiple required terms
|
|
406
|
-
- type:
|
|
406
|
+
- type: icontains-all
|
|
407
407
|
value: ["country code", "rule codes"]
|
|
408
408
|
|
|
409
409
|
# Case-insensitive regex
|
|
@@ -423,10 +423,10 @@ targets:
|
|
|
423
423
|
# Agent target — requires judge_target for LLM-based evaluation
|
|
424
424
|
- name: codex_local
|
|
425
425
|
provider: codex
|
|
426
|
-
judge_target:
|
|
426
|
+
judge_target: azure-base # Required: LLM provider for judging
|
|
427
427
|
|
|
428
428
|
# LLM target — no judge_target needed (judges itself)
|
|
429
|
-
- name:
|
|
429
|
+
- name: azure-base
|
|
430
430
|
provider: azure
|
|
431
431
|
```
|
|
432
432
|
|
|
@@ -445,7 +445,7 @@ Create markdown judge files with evaluation criteria and scoring guidelines:
|
|
|
445
445
|
```yaml
|
|
446
446
|
assert:
|
|
447
447
|
- name: semantic_check
|
|
448
|
-
type:
|
|
448
|
+
type: llm-judge
|
|
449
449
|
prompt: ./judges/correctness.md
|
|
450
450
|
```
|
|
451
451
|
|
|
@@ -487,7 +487,7 @@ Configure automatic retry with exponential backoff:
|
|
|
487
487
|
|
|
488
488
|
```yaml
|
|
489
489
|
targets:
|
|
490
|
-
- name:
|
|
490
|
+
- name: azure-base
|
|
491
491
|
provider: azure
|
|
492
492
|
max_retries: 5
|
|
493
493
|
retry_initial_delay_ms: 2000
|
|
@@ -25,12 +25,12 @@ import {
|
|
|
25
25
|
subscribeToCopilotCliLogEntries,
|
|
26
26
|
subscribeToCopilotSdkLogEntries,
|
|
27
27
|
subscribeToPiLogEntries
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-OQN2GDEU.js";
|
|
29
29
|
|
|
30
30
|
// package.json
|
|
31
31
|
var package_default = {
|
|
32
32
|
name: "agentv",
|
|
33
|
-
version: "2.
|
|
33
|
+
version: "2.14.1",
|
|
34
34
|
description: "CLI entry point for AgentV",
|
|
35
35
|
type: "module",
|
|
36
36
|
repository: {
|
|
@@ -1266,16 +1266,16 @@ function inferFileTypeFromPath(filePath) {
|
|
|
1266
1266
|
var ASSERTION_TYPES_WITH_STRING_VALUE = /* @__PURE__ */ new Set([
|
|
1267
1267
|
"contains",
|
|
1268
1268
|
"icontains",
|
|
1269
|
-
"
|
|
1270
|
-
"
|
|
1269
|
+
"starts-with",
|
|
1270
|
+
"ends-with",
|
|
1271
1271
|
"equals",
|
|
1272
1272
|
"regex"
|
|
1273
1273
|
]);
|
|
1274
1274
|
var ASSERTION_TYPES_WITH_ARRAY_VALUE = /* @__PURE__ */ new Set([
|
|
1275
|
-
"
|
|
1276
|
-
"
|
|
1277
|
-
"
|
|
1278
|
-
"
|
|
1275
|
+
"contains-any",
|
|
1276
|
+
"contains-all",
|
|
1277
|
+
"icontains-any",
|
|
1278
|
+
"icontains-all"
|
|
1279
1279
|
]);
|
|
1280
1280
|
var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
|
|
1281
1281
|
var NAME_PATTERN = /^[a-z0-9-]+$/;
|
|
@@ -1641,8 +1641,8 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
|
1641
1641
|
});
|
|
1642
1642
|
continue;
|
|
1643
1643
|
}
|
|
1644
|
-
const
|
|
1645
|
-
if (
|
|
1644
|
+
const rawTypeValue = item.type;
|
|
1645
|
+
if (rawTypeValue === void 0 || typeof rawTypeValue !== "string") {
|
|
1646
1646
|
errors.push({
|
|
1647
1647
|
severity: "warning",
|
|
1648
1648
|
filePath,
|
|
@@ -1651,12 +1651,13 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
|
1651
1651
|
});
|
|
1652
1652
|
continue;
|
|
1653
1653
|
}
|
|
1654
|
+
const typeValue = rawTypeValue.replace(/_/g, "-");
|
|
1654
1655
|
if (!isEvaluatorKind(typeValue)) {
|
|
1655
1656
|
errors.push({
|
|
1656
1657
|
severity: "warning",
|
|
1657
1658
|
filePath,
|
|
1658
1659
|
location: `${location}.type`,
|
|
1659
|
-
message: `Unknown assertion type '${
|
|
1660
|
+
message: `Unknown assertion type '${rawTypeValue}'.`
|
|
1660
1661
|
});
|
|
1661
1662
|
continue;
|
|
1662
1663
|
}
|
|
@@ -1881,7 +1882,7 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
1881
1882
|
"delayMinMs",
|
|
1882
1883
|
"delayMaxMs",
|
|
1883
1884
|
"trace"
|
|
1884
|
-
// For testing
|
|
1885
|
+
// For testing tool-trajectory evaluator
|
|
1885
1886
|
]);
|
|
1886
1887
|
var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
|
|
1887
1888
|
...COMMON_SETTINGS,
|
|
@@ -3011,7 +3012,7 @@ async function runEvalCommand(input) {
|
|
|
3011
3012
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
3012
3013
|
if (options.exportOtel || useFileExport) {
|
|
3013
3014
|
try {
|
|
3014
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
3015
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QR5OZ4DH.js");
|
|
3015
3016
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
3016
3017
|
let headers = {};
|
|
3017
3018
|
if (options.otelBackend) {
|
|
@@ -3293,4 +3294,4 @@ export {
|
|
|
3293
3294
|
selectTarget,
|
|
3294
3295
|
runEvalCommand
|
|
3295
3296
|
};
|
|
3296
|
-
//# sourceMappingURL=chunk-
|
|
3297
|
+
//# sourceMappingURL=chunk-5646K2XJ.js.map
|