agentv 3.10.2 → 3.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/{chunk-6UE665XI.js → chunk-7LC3VNOC.js} +4 -4
  2. package/dist/{chunk-KGK5NUFG.js → chunk-JUQCB3ZW.js} +56 -15
  3. package/dist/chunk-JUQCB3ZW.js.map +1 -0
  4. package/dist/{chunk-F7LAJMTO.js → chunk-U556GRI3.js} +4 -4
  5. package/dist/{chunk-F7LAJMTO.js.map → chunk-U556GRI3.js.map} +1 -1
  6. package/dist/cli.js +3 -3
  7. package/dist/{dist-3QUJEJUT.js → dist-2X7A3TTC.js} +2 -2
  8. package/dist/index.js +3 -3
  9. package/dist/{interactive-EO6AR2R3.js → interactive-CSA4KIND.js} +3 -3
  10. package/dist/templates/.agentv/.env.example +9 -11
  11. package/dist/templates/.agentv/config.yaml +13 -4
  12. package/dist/templates/.agentv/targets.yaml +16 -0
  13. package/package.json +1 -1
  14. package/dist/chunk-KGK5NUFG.js.map +0 -1
  15. package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +0 -84
  16. package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +0 -144
  17. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +0 -67
  18. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +0 -101
  19. package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +0 -458
  20. package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +0 -36
  21. package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +0 -118
  22. package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +0 -12753
  23. package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -77
  24. package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +0 -50
  25. package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  26. package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +0 -177
  27. package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +0 -316
  28. package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +0 -137
  29. package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +0 -215
  30. package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +0 -27
  31. package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +0 -115
  32. package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +0 -278
  33. package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +0 -333
  34. package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -79
  35. package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +0 -121
  36. package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +0 -298
  37. package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  38. package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +0 -5
  39. package/dist/templates/.github/prompts/agentv-optimize.prompt.md +0 -4
  40. /package/dist/{chunk-6UE665XI.js.map → chunk-7LC3VNOC.js.map} +0 -0
  41. /package/dist/{dist-3QUJEJUT.js.map → dist-2X7A3TTC.js.map} +0 -0
  42. /package/dist/{interactive-EO6AR2R3.js.map → interactive-CSA4KIND.js.map} +0 -0
package/dist/cli.js CHANGED
@@ -2,9 +2,9 @@
2
2
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
3
3
  import {
4
4
  runCli
5
- } from "./chunk-6UE665XI.js";
6
- import "./chunk-F7LAJMTO.js";
7
- import "./chunk-KGK5NUFG.js";
5
+ } from "./chunk-7LC3VNOC.js";
6
+ import "./chunk-U556GRI3.js";
7
+ import "./chunk-JUQCB3ZW.js";
8
8
  import "./chunk-C5GOHBQM.js";
9
9
  import "./chunk-JK6V4KVD.js";
10
10
  import "./chunk-HQDCIXVH.js";
@@ -141,7 +141,7 @@ import {
141
141
  transpileEvalYaml,
142
142
  transpileEvalYamlFile,
143
143
  trimBaselineResult
144
- } from "./chunk-KGK5NUFG.js";
144
+ } from "./chunk-JUQCB3ZW.js";
145
145
  import {
146
146
  OtlpJsonFileExporter
147
147
  } from "./chunk-C5GOHBQM.js";
@@ -300,4 +300,4 @@ export {
300
300
  transpileEvalYamlFile,
301
301
  trimBaselineResult
302
302
  };
303
- //# sourceMappingURL=dist-3QUJEJUT.js.map
303
+ //# sourceMappingURL=dist-2X7A3TTC.js.map
package/dist/index.js CHANGED
@@ -3,9 +3,9 @@ import {
3
3
  app,
4
4
  preprocessArgv,
5
5
  runCli
6
- } from "./chunk-6UE665XI.js";
7
- import "./chunk-F7LAJMTO.js";
8
- import "./chunk-KGK5NUFG.js";
6
+ } from "./chunk-7LC3VNOC.js";
7
+ import "./chunk-U556GRI3.js";
8
+ import "./chunk-JUQCB3ZW.js";
9
9
  import "./chunk-C5GOHBQM.js";
10
10
  import "./chunk-JK6V4KVD.js";
11
11
  import "./chunk-HQDCIXVH.js";
@@ -4,14 +4,14 @@ import {
4
4
  fileExists,
5
5
  findRepoRoot,
6
6
  runEvalCommand
7
- } from "./chunk-F7LAJMTO.js";
7
+ } from "./chunk-U556GRI3.js";
8
8
  import {
9
9
  DEFAULT_EVAL_PATTERNS,
10
10
  getAgentvHome,
11
11
  listTargetNames,
12
12
  loadConfig,
13
13
  readTargetDefinitions
14
- } from "./chunk-KGK5NUFG.js";
14
+ } from "./chunk-JUQCB3ZW.js";
15
15
  import "./chunk-C5GOHBQM.js";
16
16
  import "./chunk-JK6V4KVD.js";
17
17
  import "./chunk-HQDCIXVH.js";
@@ -371,4 +371,4 @@ ${ANSI_DIM}Retrying execution errors...${ANSI_RESET}
371
371
  export {
372
372
  launchInteractiveWizard
373
373
  };
374
- //# sourceMappingURL=interactive-EO6AR2R3.js.map
374
+ //# sourceMappingURL=interactive-CSA4KIND.js.map
@@ -1,25 +1,23 @@
1
1
  # Copy this file to .env and fill in your credentials
2
2
 
3
- # Eval run mode (used by agentv-bench skill)
4
- AGENT_EVAL_MODE=agent # agent | cli
5
-
6
3
  # Azure OpenAI Configuration
7
4
  AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
8
5
  AZURE_OPENAI_API_KEY=your-openai-api-key-here
9
- AZURE_DEPLOYMENT_NAME=gpt-5-mini
6
+ AZURE_DEPLOYMENT_NAME=gpt-5-chat
10
7
  AZURE_OPENAI_API_VERSION=2024-12-01-preview
11
8
 
12
- # OpenAI
13
- OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
14
- OPENAI_API_KEY=your-openai-api-key-here
15
- OPENAI_MODEL=gpt-5-mini
16
-
17
9
  # Google Gemini
18
10
  GOOGLE_GENERATIVE_AI_API_KEY=your-gemini-api-key-here
19
- GEMINI_MODEL_NAME=gemini-3-flash-preview
11
+ GEMINI_MODEL_NAME=gemini-2.5-flash
20
12
 
21
13
  # Anthropic
22
14
  ANTHROPIC_API_KEY=your-anthropic-api-key-here
23
15
 
16
+ # VS Code Workspace Paths for Execution Targets
17
+ # Note: Using forward slashes is recommended for paths in .env files
18
+ # to avoid issues with escape characters.
19
+ PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
20
+
24
21
  # CLI provider sample (used by the local_cli target)
25
- CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
22
+ CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
23
+ LOCAL_AGENT_TOKEN=dummytoken
@@ -1,6 +1,15 @@
1
1
  $schema: agentv-config-v2
2
2
 
3
- # Execution defaults (overridden by CLI flags)
4
- # execution:
5
- # pool_workspaces: true # Reuse materialized workspaces across eval runs
6
- # pool_slots: 10 # Max pool slots on disk (1-50, default: 10)
3
+ # Customize which files are treated as guidelines vs regular file content
4
+
5
+ # Custom guideline patterns:
6
+ guideline_patterns:
7
+ - "**/*.instructions.md"
8
+ - "**/*.prompt.md"
9
+ - "**/SKILL.md"
10
+
11
+ # Notes:
12
+ # - Patterns use standard glob syntax (via micromatch library)
13
+ # - Paths are normalized to forward slashes for cross-platform compatibility
14
+ # - Only files matching these patterns are loaded as guidelines
15
+ # - All other files referenced in tests are treated as regular file content
@@ -10,6 +10,10 @@ targets:
10
10
  model: ${{ AZURE_DEPLOYMENT_NAME }}
11
11
  # version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (2024-12-01-preview)
12
12
 
13
+ - name: vscode
14
+ provider: vscode
15
+ judge_target: azure-llm
16
+
13
17
  - name: codex
14
18
  provider: codex
15
19
  judge_target: azure-llm
@@ -39,6 +43,18 @@ targets:
39
43
  log_format: json # Optional: 'summary' (default) or 'json' for raw event logs
40
44
  # system_prompt: optional override (default instructs agent to include code in response)
41
45
 
46
+ - name: vscode_projectx
47
+ provider: vscode
48
+ workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
49
+ provider_batching: false
50
+ judge_target: azure-llm
51
+
52
+ - name: vscode_insiders_projectx
53
+ provider: vscode-insiders
54
+ workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
55
+ provider_batching: false
56
+ judge_target: azure-llm
57
+
42
58
  - name: azure-llm
43
59
  provider: azure
44
60
  endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentv",
3
- "version": "3.10.2",
3
+ "version": "3.10.3",
4
4
  "description": "CLI entry point for AgentV",
5
5
  "type": "module",
6
6
  "repository": {