@agentv/sdk 4.41.3-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -0
- package/package.json +37 -0
package/README.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# @agentv/sdk
|
|
2
|
+
|
|
3
|
+
Evaluation SDK for AgentV - build custom graders and prompt templates around the canonical AgentV eval model.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @agentv/sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
### defineAssertion (simplest way)
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
#!/usr/bin/env bun
|
|
17
|
+
import { defineAssertion } from '@agentv/sdk';
|
|
18
|
+
|
|
19
|
+
export default defineAssertion(({ output }) => ({
|
|
20
|
+
pass: (output ?? '').toLowerCase().includes('hello'),
|
|
21
|
+
reasoning: 'Checks for greeting',
|
|
22
|
+
}));
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Assertions support `pass: boolean` for simple checks and `score: number` (0-1) for granular scoring.
|
|
26
|
+
|
|
27
|
+
### defineCodeGrader (full control)
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
#!/usr/bin/env bun
|
|
31
|
+
import { defineCodeGrader } from '@agentv/sdk';
|
|
32
|
+
|
|
33
|
+
export default defineCodeGrader(({ output, traceSummary }) => ({
|
|
34
|
+
score: (output ?? '').length > 0 ? 1.0 : 0.0,
|
|
35
|
+
assertions: [
|
|
36
|
+
{ text: 'Output received', passed: (output ?? '').length > 0 },
|
|
37
|
+
{ text: 'Trace summary available', passed: traceSummary !== null },
|
|
38
|
+
],
|
|
39
|
+
}));
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Both functions handle stdin/stdout parsing, snake_case conversion, Zod validation, and error handling automatically.
|
|
43
|
+
|
|
44
|
+
## Exports
|
|
45
|
+
|
|
46
|
+
- `defineAssertion(handler)` - Define a custom assertion (pass/fail + optional score)
|
|
47
|
+
- `defineCodeGrader(handler)` - Define a code grader grader (full score control)
|
|
48
|
+
- `definePromptTemplate(handler)` - Define a dynamic prompt template
|
|
49
|
+
- `AssertionContext`, `AssertionScore` - Assertion types
|
|
50
|
+
- `CodeGraderInput`, `CodeGraderResult` - Code grader types
|
|
51
|
+
- `TraceSummary`, `Message`, `ToolCall` - Trace data types
|
|
52
|
+
- `createTargetClient()` - LLM target proxy for graders
|
|
53
|
+
- `z` - Re-exported Zod for custom config schemas
|
|
54
|
+
|
|
55
|
+
## Documentation
|
|
56
|
+
|
|
57
|
+
For complete documentation including:
|
|
58
|
+
- Full input/output schemas
|
|
59
|
+
- Typed config examples
|
|
60
|
+
- Execution metrics usage
|
|
61
|
+
- Best practices
|
|
62
|
+
|
|
63
|
+
See the docs site guides under `apps/web/src/content/docs/docs/graders/` or run `agentv skills get agentv-eval-writer`.
|
|
64
|
+
|
|
65
|
+
## Repository
|
|
66
|
+
|
|
67
|
+
[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
MIT License - see [LICENSE](../../LICENSE) for details.
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@agentv/sdk",
|
|
3
|
+
"version": "4.41.3-next.1",
|
|
4
|
+
"description": "Evaluation SDK for AgentV - build custom code judges",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/EntityProcess/agentv.git"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://agentv.dev",
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/EntityProcess/agentv/issues"
|
|
13
|
+
},
|
|
14
|
+
"main": "./dist/index.js",
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js",
|
|
20
|
+
"require": "./dist/index.cjs"
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"scripts": {
|
|
24
|
+
"prepublishOnly": "node -e \"if(process.env.ALLOW_PUBLISH!=='1'){console.error('ERROR: Use bun run publish:next, then bun run promote:latest');process.exit(1)}\"",
|
|
25
|
+
"build": "tsup",
|
|
26
|
+
"dev": "tsup --watch",
|
|
27
|
+
"typecheck": "tsc --noEmit",
|
|
28
|
+
"lint": "biome check .",
|
|
29
|
+
"format": "biome format --write .",
|
|
30
|
+
"fix": "biome check --write .",
|
|
31
|
+
"test": "bun test"
|
|
32
|
+
},
|
|
33
|
+
"files": ["dist", "README.md"],
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"zod": "^3.23.8"
|
|
36
|
+
}
|
|
37
|
+
}
|