agentv 1.3.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +460 -441
- package/dist/{chunk-6R2YRXCQ.js → chunk-HU4B6ODF.js} +1859 -641
- package/dist/chunk-HU4B6ODF.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.agentv/.env.template +23 -23
- package/dist/templates/.agentv/config.yaml +15 -15
- package/dist/templates/.agentv/targets.yaml +71 -73
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +211 -211
- package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +316 -288
- package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +115 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -215
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +241 -213
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +333 -333
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +137 -139
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +224 -179
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +77 -77
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +4 -4
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +3 -3
- package/package.json +1 -1
- package/dist/chunk-6R2YRXCQ.js.map +0 -1
package/dist/cli.js
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
# Copy this file to .env and fill in your credentials
|
|
2
|
-
|
|
3
|
-
# Azure OpenAI Configuration
|
|
4
|
-
AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
|
|
5
|
-
AZURE_OPENAI_API_KEY=your-openai-api-key-here
|
|
6
|
-
AZURE_DEPLOYMENT_NAME=gpt-5-chat
|
|
7
|
-
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
|
8
|
-
|
|
9
|
-
# Google Gemini
|
|
10
|
-
GOOGLE_GENERATIVE_AI_API_KEY=your-gemini-api-key-here
|
|
11
|
-
GEMINI_MODEL_NAME=gemini-2.5-flash
|
|
12
|
-
|
|
13
|
-
# Anthropic
|
|
14
|
-
ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
|
15
|
-
|
|
16
|
-
# VS Code Workspace Paths for Execution Targets
|
|
17
|
-
# Note: Using forward slashes is recommended for paths in .env files
|
|
18
|
-
# to avoid issues with escape characters.
|
|
19
|
-
PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
|
|
20
|
-
|
|
21
|
-
# CLI provider sample (used by the local_cli target)
|
|
22
|
-
CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
|
|
23
|
-
LOCAL_AGENT_TOKEN=dummytoken
|
|
1
|
+
# Copy this file to .env and fill in your credentials
|
|
2
|
+
|
|
3
|
+
# Azure OpenAI Configuration
|
|
4
|
+
AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
|
|
5
|
+
AZURE_OPENAI_API_KEY=your-openai-api-key-here
|
|
6
|
+
AZURE_DEPLOYMENT_NAME=gpt-5-chat
|
|
7
|
+
AZURE_OPENAI_API_VERSION=2024-12-01-preview
|
|
8
|
+
|
|
9
|
+
# Google Gemini
|
|
10
|
+
GOOGLE_GENERATIVE_AI_API_KEY=your-gemini-api-key-here
|
|
11
|
+
GEMINI_MODEL_NAME=gemini-2.5-flash
|
|
12
|
+
|
|
13
|
+
# Anthropic
|
|
14
|
+
ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
|
15
|
+
|
|
16
|
+
# VS Code Workspace Paths for Execution Targets
|
|
17
|
+
# Note: Using forward slashes is recommended for paths in .env files
|
|
18
|
+
# to avoid issues with escape characters.
|
|
19
|
+
PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
|
|
20
|
+
|
|
21
|
+
# CLI provider sample (used by the local_cli target)
|
|
22
|
+
CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
|
|
23
|
+
LOCAL_AGENT_TOKEN=dummytoken
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
$schema: agentv-config-v2
|
|
2
|
-
|
|
3
|
-
# Customize which files are treated as guidelines vs regular file content
|
|
4
|
-
|
|
5
|
-
# Custom guideline patterns:
|
|
6
|
-
guideline_patterns:
|
|
7
|
-
- "**/*.instructions.md"
|
|
8
|
-
- "**/*.prompt.md"
|
|
9
|
-
- "**/SKILL.md"
|
|
10
|
-
|
|
11
|
-
# Notes:
|
|
12
|
-
# - Patterns use standard glob syntax (via micromatch library)
|
|
13
|
-
# - Paths are normalized to forward slashes for cross-platform compatibility
|
|
14
|
-
# - Only files matching these patterns are loaded as guidelines
|
|
15
|
-
# - All other files referenced in eval cases are treated as regular file content
|
|
1
|
+
$schema: agentv-config-v2
|
|
2
|
+
|
|
3
|
+
# Customize which files are treated as guidelines vs regular file content
|
|
4
|
+
|
|
5
|
+
# Custom guideline patterns:
|
|
6
|
+
guideline_patterns:
|
|
7
|
+
- "**/*.instructions.md"
|
|
8
|
+
- "**/*.prompt.md"
|
|
9
|
+
- "**/SKILL.md"
|
|
10
|
+
|
|
11
|
+
# Notes:
|
|
12
|
+
# - Patterns use standard glob syntax (via micromatch library)
|
|
13
|
+
# - Paths are normalized to forward slashes for cross-platform compatibility
|
|
14
|
+
# - Only files matching these patterns are loaded as guidelines
|
|
15
|
+
# - All other files referenced in eval cases are treated as regular file content
|
|
@@ -1,73 +1,71 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
# - --
|
|
26
|
-
# - ${{
|
|
27
|
-
# - --
|
|
28
|
-
# - ${{
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
#
|
|
65
|
-
|
|
66
|
-
#
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
type: command
|
|
73
|
-
command_template: uv run ./mock_cli.py --healthcheck
|
|
1
|
+
# A list of all supported evaluation targets for the project.
|
|
2
|
+
# Each target defines a provider and its specific configuration.
|
|
3
|
+
# Actual values for paths/keys are stored in the local .env file.
|
|
4
|
+
|
|
5
|
+
targets:
|
|
6
|
+
- name: default
|
|
7
|
+
provider: azure
|
|
8
|
+
endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
|
|
9
|
+
api_key: ${{ AZURE_OPENAI_API_KEY }}
|
|
10
|
+
model: ${{ AZURE_DEPLOYMENT_NAME }}
|
|
11
|
+
# version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (2024-12-01-preview)
|
|
12
|
+
|
|
13
|
+
- name: vscode
|
|
14
|
+
provider: vscode
|
|
15
|
+
judge_target: azure_base
|
|
16
|
+
|
|
17
|
+
- name: codex
|
|
18
|
+
provider: codex
|
|
19
|
+
judge_target: azure_base
|
|
20
|
+
# Uses the Codex CLI (defaults to `codex` on PATH)
|
|
21
|
+
# executable: ${{ CODEX_CLI_PATH }} # Optional: override executable path
|
|
22
|
+
# args: # Optional additional CLI arguments
|
|
23
|
+
# - --profile
|
|
24
|
+
# - ${{ CODEX_PROFILE }}
|
|
25
|
+
# - --model
|
|
26
|
+
# - ${{ CODEX_MODEL }}
|
|
27
|
+
# - --ask-for-approval
|
|
28
|
+
# - ${{ CODEX_APPROVAL_PRESET }}
|
|
29
|
+
timeout_seconds: 180
|
|
30
|
+
cwd: ${{ CODEX_WORKSPACE_DIR }} # Where scratch workspaces are created
|
|
31
|
+
log_dir: ${{ CODEX_LOG_DIR }} # Optional: where Codex CLI stream logs are stored (defaults to ./.agentv/logs/codex)
|
|
32
|
+
log_format: json # Optional: 'summary' (default) or 'json' for raw event logs
|
|
33
|
+
|
|
34
|
+
- name: vscode_projectx
|
|
35
|
+
provider: vscode
|
|
36
|
+
workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
|
|
37
|
+
provider_batching: false
|
|
38
|
+
judge_target: azure_base
|
|
39
|
+
|
|
40
|
+
- name: vscode_insiders_projectx
|
|
41
|
+
provider: vscode-insiders
|
|
42
|
+
workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
|
|
43
|
+
provider_batching: false
|
|
44
|
+
judge_target: azure_base
|
|
45
|
+
|
|
46
|
+
- name: azure_base
|
|
47
|
+
provider: azure
|
|
48
|
+
endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
|
|
49
|
+
api_key: ${{ AZURE_OPENAI_API_KEY }}
|
|
50
|
+
model: ${{ AZURE_DEPLOYMENT_NAME }}
|
|
51
|
+
version: ${{ AZURE_OPENAI_API_VERSION }}
|
|
52
|
+
|
|
53
|
+
- name: gemini_base
|
|
54
|
+
provider: gemini
|
|
55
|
+
api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
|
|
56
|
+
model: ${{ GEMINI_MODEL_NAME }}
|
|
57
|
+
|
|
58
|
+
- name: local_cli
|
|
59
|
+
provider: cli
|
|
60
|
+
judge_target: azure_base
|
|
61
|
+
# Passes the fully rendered prompt and any attached files to a local Python script
|
|
62
|
+
# NOTE: Do not add quotes around {PROMPT} or {FILES} - they are already shell-escaped
|
|
63
|
+
command_template: uv run ./mock_cli.py --prompt {PROMPT} {FILES} --output {OUTPUT_FILE}
|
|
64
|
+
# Format for each file in {FILES}. {path} and {basename} are automatically shell-escaped, so no quotes needed
|
|
65
|
+
files_format: --file {path}
|
|
66
|
+
# Optional working directory resolved from .env
|
|
67
|
+
cwd: ${{ CLI_EVALS_DIR }}
|
|
68
|
+
timeout_seconds: 30
|
|
69
|
+
healthcheck:
|
|
70
|
+
type: command
|
|
71
|
+
command_template: uv run ./mock_cli.py --healthcheck
|