agentv 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ runCli
4
+ } from "./chunk-S3RN2GSO.js";
5
+
6
+ // src/cli.ts
7
+ void runCli();
8
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { runCli } from './index.js';\r\n\r\nvoid runCli();\r\n"],"mappings":";;;;;;AAGA,KAAK,OAAO;","names":[]}
package/dist/index.js ADDED
@@ -0,0 +1,9 @@
1
+ import {
2
+ createProgram,
3
+ runCli
4
+ } from "./chunk-S3RN2GSO.js";
5
+ export {
6
+ createProgram,
7
+ runCli
8
+ };
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -0,0 +1,100 @@
1
+ ---
2
+ description: 'Apply when writing evals in YAML format'
3
+ ---
4
+
5
+ ## Schema Reference
6
+ - Schema: #file:../contexts/eval-schema.json (JSON Schema for validation and tooling)
7
+ - Format: YAML with structured content arrays
8
+
9
+ ## Structure Requirements
10
+ - Root level: `version` (required: "2.0"), `description` (optional), `target` (optional), `evalcases` (required)
11
+ - Eval case fields: `id` (required), `outcome` (required), `input_messages` (required), `expected_messages` (required)
12
+ - Optional fields: `conversation_id`, `note`, `execution`
13
+ - Message fields: `role` (required), `content` (required)
14
+ - Message roles: `system`, `user`, `assistant`, `tool`
15
+ - Content types: `text` (inline), `file` (relative or absolute path)
16
+ - File paths must start with "/" for absolute paths (e.g., "/prompts/file.md")
17
+
18
+ ## Example
19
+ ```yaml
20
+ version: 2.0
21
+ description: Example showing basic features and conversation threading
22
+ target: default
23
+
24
+ evalcases:
25
+ # Basic eval case with file references
26
+ - id: code-review-basic
27
+ outcome: Assistant provides helpful code analysis
28
+
29
+ input_messages:
30
+ - role: system
31
+ content: You are an expert code reviewer.
32
+ - role: user
33
+ content:
34
+ - type: text
35
+ value: |-
36
+ Review this function:
37
+
38
+ ```python
39
+ def add(a, b):
40
+ return a + b
41
+ ```
42
+ # File paths can be relative or absolute
43
+ - type: file
44
+ value: /prompts/python.instructions.md
45
+
46
+ expected_messages:
47
+ - role: assistant
48
+ content: |-
49
+ The function is simple and correct. Suggestions:
50
+ - Add type hints: `def add(a: int, b: int) -> int:`
51
+ - Add docstring
52
+ - Consider validation for edge cases
53
+
54
+ # Advanced: conversation threading, multiple evaluators
55
+ - id: python-coding-session
56
+ conversation_id: python-coding-session
57
+ outcome: Generates correct code with proper error handling
58
+
59
+ execution:
60
+ target: azure_base
61
+ evaluators:
62
+ - name: keyword_check
63
+ type: code
64
+ script: /evaluators/scripts/check_keywords.py
65
+ - name: semantic_judge
66
+ type: llm_judge
67
+ prompt: /evaluators/prompts/correctness.md
68
+ model: gpt-5-chat
69
+
70
+ input_messages:
71
+ - role: system
72
+ content: You are a code generator.
73
+ - role: user
74
+ content:
75
+ - type: text
76
+ value: Create a function to find the second largest number in a list.
77
+ - type: file
78
+ value: /prompts/python.instructions.md
79
+
80
+ expected_messages:
81
+ - role: assistant
82
+ content: |-
83
+ ```python
84
+ from typing import List, Union
85
+
86
+ def find_second_largest(numbers: List[int]) -> Union[int, None]:
87
+ """Find the second largest number."""
88
+ if not isinstance(numbers, list):
89
+ raise TypeError("Input must be a list")
90
+ if not numbers:
91
+ raise ValueError("List cannot be empty")
92
+
93
+ unique = list(set(numbers))
94
+ if len(unique) < 2:
95
+ return None
96
+
97
+ unique.sort(reverse=True)
98
+ return unique[1]
99
+ ```
100
+ ```
@@ -0,0 +1,182 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "AgentV Eval Schema",
4
+ "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
+ "type": "object",
6
+ "properties": {
7
+ "version": {
8
+ "type": "string",
9
+ "description": "Schema version",
10
+ "enum": ["2.0"]
11
+ },
12
+ "description": {
13
+ "type": "string",
14
+ "description": "Description of what this eval suite covers"
15
+ },
16
+ "target": {
17
+ "type": "string",
18
+ "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
19
+ },
20
+ "evalcases": {
21
+ "type": "array",
22
+ "description": "Array of evaluation cases",
23
+ "minItems": 1,
24
+ "items": {
25
+ "type": "object",
26
+ "properties": {
27
+ "id": {
28
+ "type": "string",
29
+ "description": "Unique identifier for the eval case"
30
+ },
31
+ "conversation_id": {
32
+ "type": "string",
33
+ "description": "Optional conversation identifier for threading multiple eval cases together"
34
+ },
35
+ "outcome": {
36
+ "type": "string",
37
+ "description": "Description of what the AI should accomplish in this eval"
38
+ },
39
+ "note": {
40
+ "type": "string",
41
+ "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
42
+ },
43
+ "input_messages": {
44
+ "type": "array",
45
+ "description": "Input messages for the conversation",
46
+ "minItems": 1,
47
+ "items": {
48
+ "type": "object",
49
+ "properties": {
50
+ "role": {
51
+ "type": "string",
52
+ "enum": ["system", "user", "assistant", "tool"],
53
+ "description": "Message role"
54
+ },
55
+ "content": {
56
+ "oneOf": [
57
+ {
58
+ "type": "string",
59
+ "description": "Simple text content"
60
+ },
61
+ {
62
+ "type": "array",
63
+ "description": "Mixed content items (text and file references)",
64
+ "items": {
65
+ "type": "object",
66
+ "properties": {
67
+ "type": {
68
+ "type": "string",
69
+ "enum": ["text", "file"],
70
+ "description": "Content type: 'text' for inline content, 'file' for file references"
71
+ },
72
+ "value": {
73
+ "type": "string",
74
+ "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
75
+ }
76
+ },
77
+ "required": ["type", "value"],
78
+ "additionalProperties": false
79
+ }
80
+ }
81
+ ]
82
+ }
83
+ },
84
+ "required": ["role", "content"],
85
+ "additionalProperties": false
86
+ }
87
+ },
88
+ "expected_messages": {
89
+ "type": "array",
90
+ "description": "Expected response messages",
91
+ "minItems": 1,
92
+ "items": {
93
+ "type": "object",
94
+ "properties": {
95
+ "role": {
96
+ "type": "string",
97
+ "enum": ["system", "user", "assistant", "tool"],
98
+ "description": "Message role"
99
+ },
100
+ "content": {
101
+ "oneOf": [
102
+ {
103
+ "type": "string",
104
+ "description": "Simple text content"
105
+ },
106
+ {
107
+ "type": "array",
108
+ "description": "Mixed content items",
109
+ "items": {
110
+ "type": "object",
111
+ "properties": {
112
+ "type": {
113
+ "type": "string",
114
+ "enum": ["text", "file"]
115
+ },
116
+ "value": {
117
+ "type": "string"
118
+ }
119
+ },
120
+ "required": ["type", "value"],
121
+ "additionalProperties": false
122
+ }
123
+ }
124
+ ]
125
+ }
126
+ },
127
+ "required": ["role", "content"],
128
+ "additionalProperties": false
129
+ }
130
+ },
131
+ "execution": {
132
+ "type": "object",
133
+ "description": "Per-case execution configuration",
134
+ "properties": {
135
+ "target": {
136
+ "type": "string",
137
+ "description": "Override target for this specific eval case"
138
+ },
139
+ "evaluators": {
140
+ "type": "array",
141
+ "description": "Multiple evaluators (code-based and LLM judges)",
142
+ "items": {
143
+ "type": "object",
144
+ "properties": {
145
+ "name": {
146
+ "type": "string",
147
+ "description": "Evaluator name/identifier"
148
+ },
149
+ "type": {
150
+ "type": "string",
151
+ "enum": ["code", "llm_judge"],
152
+ "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
153
+ },
154
+ "script": {
155
+ "type": "string",
156
+ "description": "Path to evaluator script (for type: code)"
157
+ },
158
+ "prompt": {
159
+ "type": "string",
160
+ "description": "Path to judge prompt file (for type: llm_judge)"
161
+ },
162
+ "model": {
163
+ "type": "string",
164
+ "description": "Model to use for LLM judge (for type: llm_judge)"
165
+ }
166
+ },
167
+ "required": ["name", "type"],
168
+ "additionalProperties": true
169
+ }
170
+ }
171
+ },
172
+ "additionalProperties": true
173
+ }
174
+ },
175
+ "required": ["id", "outcome", "input_messages", "expected_messages"],
176
+ "additionalProperties": false
177
+ }
178
+ }
179
+ },
180
+ "required": ["evalcases"],
181
+ "additionalProperties": false
182
+ }
package/package.json ADDED
@@ -0,0 +1,40 @@
1
+ {
2
+ "name": "agentv",
3
+ "version": "0.2.3",
4
+ "description": "CLI entry point for AgentV",
5
+ "type": "module",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/EntityProcess/agentv.git"
9
+ },
10
+ "homepage": "https://github.com/EntityProcess/agentv#readme",
11
+ "bugs": {
12
+ "url": "https://github.com/EntityProcess/agentv/issues"
13
+ },
14
+ "bin": {
15
+ "agentv": "./dist/cli.js"
16
+ },
17
+ "files": [
18
+ "dist",
19
+ "README.md"
20
+ ],
21
+ "dependencies": {
22
+ "commander": "^12.1.0",
23
+ "dotenv": "^16.4.5",
24
+ "log-update": "^7.0.1",
25
+ "yaml": "^2.6.1",
26
+ "@agentv/core": "0.2.3"
27
+ },
28
+ "devDependencies": {
29
+ "execa": "^9.3.0"
30
+ },
31
+ "scripts": {
32
+ "dev": "tsx watch src/index.ts",
33
+ "build": "tsup",
34
+ "typecheck": "tsc --noEmit",
35
+ "lint": "eslint . --ext .ts",
36
+ "test": "vitest run",
37
+ "test:watch": "vitest",
38
+ "test:coverage": "vitest run --coverage"
39
+ }
40
+ }