agentv 2.13.0 → 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,7 +60,7 @@ tests:
60
60
 
61
61
  assert:
62
62
  - name: math_check
63
- type: code_judge
63
+ type: code-judge
64
64
  script: ./validators/check_math.py
65
65
  ```
66
66
 
@@ -154,10 +154,10 @@ Optional sidecar YAML metadata file (`dataset.eval.yaml` alongside `dataset.json
154
154
  description: Math evaluation dataset
155
155
  dataset: math-tests
156
156
  execution:
157
- target: azure_base
157
+ target: azure-base
158
158
  assert:
159
159
  - name: correctness
160
- type: llm_judge
160
+ type: llm-judge
161
161
  prompt: ./judges/correctness.md
162
162
  ```
163
163
 
@@ -175,7 +175,7 @@ agentv validate evals/my-eval.yaml
175
175
  agentv eval evals/my-eval.yaml
176
176
 
177
177
  # Override target
178
- agentv eval --target azure_base evals/**/*.yaml
178
+ agentv eval --target azure-base evals/**/*.yaml
179
179
 
180
180
  # Run specific test
181
181
  agentv eval --test-id case-123 evals/my-eval.yaml
@@ -219,7 +219,7 @@ Reference evaluators in your eval file:
219
219
  ```yaml
220
220
  assert:
221
221
  - name: my_validator
222
- type: code_judge
222
+ type: code-judge
223
223
  script: ./validators/check_answer.py
224
224
  ```
225
225
 
@@ -339,7 +339,7 @@ Define execution targets in `.agentv/targets.yaml` to decouple evals from provid
339
339
 
340
340
  ```yaml
341
341
  targets:
342
- - name: azure_base
342
+ - name: azure-base
343
343
  provider: azure
344
344
  endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
345
345
  api_key: ${{ AZURE_OPENAI_API_KEY }}
@@ -348,12 +348,12 @@ targets:
348
348
  - name: vscode_dev
349
349
  provider: vscode
350
350
  workspace_template: ${{ WORKSPACE_PATH }}
351
- judge_target: azure_base
351
+ judge_target: azure-base
352
352
 
353
353
  - name: local_agent
354
354
  provider: cli
355
355
  command: 'python agent.py --prompt-file {PROMPT_FILE} --output {OUTPUT_FILE}'
356
- judge_target: azure_base
356
+ judge_target: azure-base
357
357
  ```
358
358
 
359
359
  Supports: `azure`, `anthropic`, `gemini`, `codex`, `copilot`, `pi-coding-agent`, `claude`, `vscode`, `vscode-insiders`, `cli`, and `mock`.
@@ -398,12 +398,12 @@ All assertions support `weight`, `required`, and `negate` flags. Use `negate: tr
398
398
  ```yaml
399
399
  assert:
400
400
  # Case-insensitive matching for natural language variation
401
- - type: icontains_any
401
+ - type: icontains-any
402
402
  value: ["missing rule code", "need rule code", "provide rule code"]
403
403
  required: true
404
404
 
405
405
  # Multiple required terms
406
- - type: icontains_all
406
+ - type: icontains-all
407
407
  value: ["country code", "rule codes"]
408
408
 
409
409
  # Case-insensitive regex
@@ -423,10 +423,10 @@ targets:
423
423
  # Agent target — requires judge_target for LLM-based evaluation
424
424
  - name: codex_local
425
425
  provider: codex
426
- judge_target: azure_base # Required: LLM provider for judging
426
+ judge_target: azure-base # Required: LLM provider for judging
427
427
 
428
428
  # LLM target — no judge_target needed (judges itself)
429
- - name: azure_base
429
+ - name: azure-base
430
430
  provider: azure
431
431
  ```
432
432
 
@@ -445,7 +445,7 @@ Create markdown judge files with evaluation criteria and scoring guidelines:
445
445
  ```yaml
446
446
  assert:
447
447
  - name: semantic_check
448
- type: llm_judge
448
+ type: llm-judge
449
449
  prompt: ./judges/correctness.md
450
450
  ```
451
451
 
@@ -487,7 +487,7 @@ Configure automatic retry with exponential backoff:
487
487
 
488
488
  ```yaml
489
489
  targets:
490
- - name: azure_base
490
+ - name: azure-base
491
491
  provider: azure
492
492
  max_retries: 5
493
493
  retry_initial_delay_ms: 2000
@@ -25,12 +25,12 @@ import {
25
25
  subscribeToCopilotCliLogEntries,
26
26
  subscribeToCopilotSdkLogEntries,
27
27
  subscribeToPiLogEntries
28
- } from "./chunk-FSBZM3HT.js";
28
+ } from "./chunk-OQN2GDEU.js";
29
29
 
30
30
  // package.json
31
31
  var package_default = {
32
32
  name: "agentv",
33
- version: "2.13.0",
33
+ version: "2.14.1",
34
34
  description: "CLI entry point for AgentV",
35
35
  type: "module",
36
36
  repository: {
@@ -1266,16 +1266,16 @@ function inferFileTypeFromPath(filePath) {
1266
1266
  var ASSERTION_TYPES_WITH_STRING_VALUE = /* @__PURE__ */ new Set([
1267
1267
  "contains",
1268
1268
  "icontains",
1269
- "starts_with",
1270
- "ends_with",
1269
+ "starts-with",
1270
+ "ends-with",
1271
1271
  "equals",
1272
1272
  "regex"
1273
1273
  ]);
1274
1274
  var ASSERTION_TYPES_WITH_ARRAY_VALUE = /* @__PURE__ */ new Set([
1275
- "contains_any",
1276
- "contains_all",
1277
- "icontains_any",
1278
- "icontains_all"
1275
+ "contains-any",
1276
+ "contains-all",
1277
+ "icontains-any",
1278
+ "icontains-all"
1279
1279
  ]);
1280
1280
  var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
1281
1281
  var NAME_PATTERN = /^[a-z0-9-]+$/;
@@ -1641,8 +1641,8 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
1641
1641
  });
1642
1642
  continue;
1643
1643
  }
1644
- const typeValue = item.type;
1645
- if (typeValue === void 0 || typeof typeValue !== "string") {
1644
+ const rawTypeValue = item.type;
1645
+ if (rawTypeValue === void 0 || typeof rawTypeValue !== "string") {
1646
1646
  errors.push({
1647
1647
  severity: "warning",
1648
1648
  filePath,
@@ -1651,12 +1651,13 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
1651
1651
  });
1652
1652
  continue;
1653
1653
  }
1654
+ const typeValue = rawTypeValue.replace(/_/g, "-");
1654
1655
  if (!isEvaluatorKind(typeValue)) {
1655
1656
  errors.push({
1656
1657
  severity: "warning",
1657
1658
  filePath,
1658
1659
  location: `${location}.type`,
1659
- message: `Unknown assertion type '${typeValue}'.`
1660
+ message: `Unknown assertion type '${rawTypeValue}'.`
1660
1661
  });
1661
1662
  continue;
1662
1663
  }
@@ -1881,7 +1882,7 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
1881
1882
  "delayMinMs",
1882
1883
  "delayMaxMs",
1883
1884
  "trace"
1884
- // For testing tool_trajectory evaluator
1885
+ // For testing tool-trajectory evaluator
1885
1886
  ]);
1886
1887
  var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
1887
1888
  ...COMMON_SETTINGS,
@@ -3011,7 +3012,7 @@ async function runEvalCommand(input) {
3011
3012
  const useFileExport = !!(options.otelFile || options.traceFile);
3012
3013
  if (options.exportOtel || useFileExport) {
3013
3014
  try {
3014
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-CCUHG3SN.js");
3015
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QR5OZ4DH.js");
3015
3016
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
3016
3017
  let headers = {};
3017
3018
  if (options.otelBackend) {
@@ -3293,4 +3294,4 @@ export {
3293
3294
  selectTarget,
3294
3295
  runEvalCommand
3295
3296
  };
3296
- //# sourceMappingURL=chunk-UWDI4UVN.js.map
3297
+ //# sourceMappingURL=chunk-5646K2XJ.js.map