@evalview/node 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -42
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,15 +1,30 @@
|
|
|
1
|
-
# @evalview/node
|
|
1
|
+
# @evalview/node — Proof that your agent still works.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
> You changed a prompt. Swapped a model. Updated a tool.
|
|
4
|
+
> Did anything break? **Run EvalView. Know for sure.**
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
Drop-in Node.js/Next.js middleware for [EvalView](https://github.com/hidai25/eval-view) — the regression testing framework for AI agents.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 🔍 What EvalView Catches
|
|
11
|
+
|
|
12
|
+
| Status | What it means | What you do |
|
|
13
|
+
|--------|--------------|-------------|
|
|
14
|
+
| ✅ **PASSED** | Agent behavior matches baseline | Ship with confidence |
|
|
15
|
+
| ⚠️ **TOOLS_CHANGED** | Agent is calling different tools | Review the diff |
|
|
16
|
+
| ⚠️ **OUTPUT_CHANGED** | Same tools, output quality shifted | Review the diff |
|
|
17
|
+
| ❌ **REGRESSION** | Score dropped significantly | Fix before shipping |
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## 🚀 Quick Start
|
|
6
22
|
|
|
7
23
|
```bash
|
|
8
24
|
npm install @evalview/node
|
|
25
|
+
pip install evalview
|
|
9
26
|
```
|
|
10
27
|
|
|
11
|
-
## Quick Start
|
|
12
|
-
|
|
13
28
|
### Next.js App Router
|
|
14
29
|
|
|
15
30
|
```typescript
|
|
@@ -17,7 +32,7 @@ npm install @evalview/node
|
|
|
17
32
|
import { createEvalViewMiddleware } from '@evalview/node';
|
|
18
33
|
|
|
19
34
|
export const POST = createEvalViewMiddleware({
|
|
20
|
-
targetEndpoint: '/api/
|
|
35
|
+
targetEndpoint: '/api/your-agent',
|
|
21
36
|
});
|
|
22
37
|
```
|
|
23
38
|
|
|
@@ -31,33 +46,72 @@ app.post('/api/evalview', createEvalViewMiddleware({
|
|
|
31
46
|
}));
|
|
32
47
|
```
|
|
33
48
|
|
|
34
|
-
|
|
49
|
+
Then point EvalView at your endpoint:
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
# .evalview/config.yaml
|
|
53
|
+
adapter: http
|
|
54
|
+
endpoint: http://localhost:3000/api/evalview
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Capture baseline and check for regressions:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
evalview snapshot # save current behavior as baseline
|
|
61
|
+
evalview check # detect regressions on every change
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 🤖 Claude Code Integration (MCP)
|
|
67
|
+
|
|
68
|
+
Test your agent without leaving the conversation:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
claude mcp add --transport stdio evalview -- evalview mcp serve
|
|
72
|
+
cp CLAUDE.md.example CLAUDE.md
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Ask Claude Code naturally:
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
You: Did my refactor break anything?
|
|
79
|
+
Claude: [run_check] ✨ All clean! No regressions detected.
|
|
80
|
+
|
|
81
|
+
You: Add a test for my weather agent
|
|
82
|
+
Claude: [create_test] ✅ Created tests/weather-lookup.yaml
|
|
83
|
+
[run_snapshot] 📸 Baseline captured.
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
No YAML. No terminal switching. No context loss.
|
|
87
|
+
|
|
88
|
+
[Full MCP docs →](https://github.com/hidai25/eval-view#-claude-code-integration-mcp)
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## ⚙️ Configuration
|
|
35
93
|
|
|
36
94
|
```typescript
|
|
37
95
|
createEvalViewMiddleware({
|
|
38
|
-
// Required:
|
|
39
|
-
targetEndpoint: '/api/
|
|
96
|
+
// Required: your agent's endpoint
|
|
97
|
+
targetEndpoint: '/api/your-agent',
|
|
40
98
|
|
|
41
|
-
// Optional:
|
|
42
|
-
// Use an existing user ID from your database
|
|
99
|
+
// Optional: default user ID for test requests
|
|
43
100
|
defaultUserId: 'your-dev-user-id',
|
|
44
101
|
|
|
45
|
-
// Optional:
|
|
46
|
-
// Useful for creating users on-the-fly or looking up existing ones
|
|
102
|
+
// Optional: dynamic user ID resolution
|
|
47
103
|
getUserId: async (req) => {
|
|
48
|
-
// Example: Look up or create user
|
|
49
104
|
const user = await findOrCreateUser('test@example.com');
|
|
50
105
|
return user.id;
|
|
51
106
|
},
|
|
52
107
|
|
|
53
|
-
// Optional:
|
|
108
|
+
// Optional: transform EvalView request to your API format
|
|
54
109
|
transformRequest: (req) => ({
|
|
55
110
|
message: req.query,
|
|
56
|
-
userId: req.context?.userId,
|
|
57
|
-
// ... your custom mapping
|
|
111
|
+
userId: req.context?.userId,
|
|
58
112
|
}),
|
|
59
113
|
|
|
60
|
-
// Optional:
|
|
114
|
+
// Optional: parse your API response to EvalView format
|
|
61
115
|
parseResponse: (responseText, startTime) => ({
|
|
62
116
|
session_id: `session-${startTime}`,
|
|
63
117
|
output: '...',
|
|
@@ -65,41 +119,28 @@ createEvalViewMiddleware({
|
|
|
65
119
|
cost: 0.05,
|
|
66
120
|
latency: Date.now() - startTime,
|
|
67
121
|
}),
|
|
68
|
-
|
|
69
|
-
// Optional: Base URL for requests
|
|
70
|
-
baseUrl: process.env.API_BASE_URL,
|
|
71
122
|
});
|
|
72
123
|
```
|
|
73
124
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
Works out-of-the-box with Tapescope-style APIs that:
|
|
77
|
-
- Accept: `{ message, userId, conversationId, route, history }`
|
|
78
|
-
- Return: NDJSON stream with `tool_call` and `message_complete` events
|
|
79
|
-
|
|
80
|
-
## Testing
|
|
125
|
+
---
|
|
81
126
|
|
|
82
|
-
|
|
127
|
+
## 🔧 Automate It
|
|
83
128
|
|
|
84
129
|
```yaml
|
|
85
|
-
# .evalview
|
|
86
|
-
|
|
87
|
-
|
|
130
|
+
# .github/workflows/evalview.yml
|
|
131
|
+
- run: evalview check --fail-on REGRESSION --json
|
|
132
|
+
env:
|
|
133
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
88
134
|
```
|
|
89
135
|
|
|
90
|
-
|
|
91
|
-
```bash
|
|
92
|
-
evalview run
|
|
93
|
-
```
|
|
136
|
+
---
|
|
94
137
|
|
|
95
|
-
##
|
|
138
|
+
## 📚 Documentation
|
|
96
139
|
|
|
97
|
-
|
|
140
|
+
[Full docs →](https://github.com/hidai25/eval-view) • [Examples →](https://github.com/hidai25/eval-view/tree/main/examples) • [Issues →](https://github.com/hidai25/eval-view/issues)
|
|
98
141
|
|
|
99
142
|
---
|
|
100
143
|
|
|
101
|
-
|
|
102
|
-
- hallucinating tools that don't exist
|
|
103
|
-
- tool-calling itself into bankruptcy
|
|
144
|
+
## License
|
|
104
145
|
|
|
105
|
-
|
|
146
|
+
Apache-2.0
|