eqho-eval 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +552 -0
  3. package/dist/cli/auth-store.d.ts +5 -0
  4. package/dist/cli/auth-store.d.ts.map +1 -0
  5. package/dist/cli/auth-store.js +39 -0
  6. package/dist/cli/auth-store.js.map +1 -0
  7. package/dist/cli/banner.d.ts +3 -0
  8. package/dist/cli/banner.d.ts.map +1 -0
  9. package/dist/cli/banner.js +38 -0
  10. package/dist/cli/banner.js.map +1 -0
  11. package/dist/cli/commands/action-eval.d.ts +3 -0
  12. package/dist/cli/commands/action-eval.d.ts.map +1 -0
  13. package/dist/cli/commands/action-eval.js +133 -0
  14. package/dist/cli/commands/action-eval.js.map +1 -0
  15. package/dist/cli/commands/auth.d.ts +3 -0
  16. package/dist/cli/commands/auth.d.ts.map +1 -0
  17. package/dist/cli/commands/auth.js +156 -0
  18. package/dist/cli/commands/auth.js.map +1 -0
  19. package/dist/cli/commands/cache.d.ts +3 -0
  20. package/dist/cli/commands/cache.d.ts.map +1 -0
  21. package/dist/cli/commands/cache.js +43 -0
  22. package/dist/cli/commands/cache.js.map +1 -0
  23. package/dist/cli/commands/ci.d.ts +3 -0
  24. package/dist/cli/commands/ci.d.ts.map +1 -0
  25. package/dist/cli/commands/ci.js +124 -0
  26. package/dist/cli/commands/ci.js.map +1 -0
  27. package/dist/cli/commands/conversations.d.ts +3 -0
  28. package/dist/cli/commands/conversations.d.ts.map +1 -0
  29. package/dist/cli/commands/conversations.js +89 -0
  30. package/dist/cli/commands/conversations.js.map +1 -0
  31. package/dist/cli/commands/diff.d.ts +3 -0
  32. package/dist/cli/commands/diff.d.ts.map +1 -0
  33. package/dist/cli/commands/diff.js +122 -0
  34. package/dist/cli/commands/diff.js.map +1 -0
  35. package/dist/cli/commands/doctor.d.ts +11 -0
  36. package/dist/cli/commands/doctor.d.ts.map +1 -0
  37. package/dist/cli/commands/doctor.js +308 -0
  38. package/dist/cli/commands/doctor.js.map +1 -0
  39. package/dist/cli/commands/eval.d.ts +3 -0
  40. package/dist/cli/commands/eval.d.ts.map +1 -0
  41. package/dist/cli/commands/eval.js +101 -0
  42. package/dist/cli/commands/eval.js.map +1 -0
  43. package/dist/cli/commands/init.d.ts +3 -0
  44. package/dist/cli/commands/init.d.ts.map +1 -0
  45. package/dist/cli/commands/init.js +182 -0
  46. package/dist/cli/commands/init.js.map +1 -0
  47. package/dist/cli/commands/list.d.ts +3 -0
  48. package/dist/cli/commands/list.d.ts.map +1 -0
  49. package/dist/cli/commands/list.js +80 -0
  50. package/dist/cli/commands/list.js.map +1 -0
  51. package/dist/cli/commands/mentions.d.ts +3 -0
  52. package/dist/cli/commands/mentions.d.ts.map +1 -0
  53. package/dist/cli/commands/mentions.js +125 -0
  54. package/dist/cli/commands/mentions.js.map +1 -0
  55. package/dist/cli/commands/org.d.ts +3 -0
  56. package/dist/cli/commands/org.d.ts.map +1 -0
  57. package/dist/cli/commands/org.js +196 -0
  58. package/dist/cli/commands/org.js.map +1 -0
  59. package/dist/cli/commands/postcall-eval.d.ts +3 -0
  60. package/dist/cli/commands/postcall-eval.d.ts.map +1 -0
  61. package/dist/cli/commands/postcall-eval.js +188 -0
  62. package/dist/cli/commands/postcall-eval.js.map +1 -0
  63. package/dist/cli/commands/render.d.ts +3 -0
  64. package/dist/cli/commands/render.d.ts.map +1 -0
  65. package/dist/cli/commands/render.js +223 -0
  66. package/dist/cli/commands/render.js.map +1 -0
  67. package/dist/cli/commands/results.d.ts +3 -0
  68. package/dist/cli/commands/results.d.ts.map +1 -0
  69. package/dist/cli/commands/results.js +128 -0
  70. package/dist/cli/commands/results.js.map +1 -0
  71. package/dist/cli/commands/scenarios.d.ts +3 -0
  72. package/dist/cli/commands/scenarios.d.ts.map +1 -0
  73. package/dist/cli/commands/scenarios.js +57 -0
  74. package/dist/cli/commands/scenarios.js.map +1 -0
  75. package/dist/cli/commands/start.d.ts +3 -0
  76. package/dist/cli/commands/start.d.ts.map +1 -0
  77. package/dist/cli/commands/start.js +260 -0
  78. package/dist/cli/commands/start.js.map +1 -0
  79. package/dist/cli/commands/status.d.ts +3 -0
  80. package/dist/cli/commands/status.d.ts.map +1 -0
  81. package/dist/cli/commands/status.js +133 -0
  82. package/dist/cli/commands/status.js.map +1 -0
  83. package/dist/cli/commands/sync.d.ts +3 -0
  84. package/dist/cli/commands/sync.d.ts.map +1 -0
  85. package/dist/cli/commands/sync.js +80 -0
  86. package/dist/cli/commands/sync.js.map +1 -0
  87. package/dist/cli/commands/view.d.ts +3 -0
  88. package/dist/cli/commands/view.d.ts.map +1 -0
  89. package/dist/cli/commands/view.js +29 -0
  90. package/dist/cli/commands/view.js.map +1 -0
  91. package/dist/cli/error-handler.d.ts +8 -0
  92. package/dist/cli/error-handler.d.ts.map +1 -0
  93. package/dist/cli/error-handler.js +133 -0
  94. package/dist/cli/error-handler.js.map +1 -0
  95. package/dist/cli/gateway.d.ts +14 -0
  96. package/dist/cli/gateway.d.ts.map +1 -0
  97. package/dist/cli/gateway.js +222 -0
  98. package/dist/cli/gateway.js.map +1 -0
  99. package/dist/cli/index.d.ts +3 -0
  100. package/dist/cli/index.d.ts.map +1 -0
  101. package/dist/cli/index.js +194 -0
  102. package/dist/cli/index.js.map +1 -0
  103. package/dist/core/action-eval-builder.d.ts +20 -0
  104. package/dist/core/action-eval-builder.d.ts.map +1 -0
  105. package/dist/core/action-eval-builder.js +276 -0
  106. package/dist/core/action-eval-builder.js.map +1 -0
  107. package/dist/core/agent-fetcher.d.ts +35 -0
  108. package/dist/core/agent-fetcher.d.ts.map +1 -0
  109. package/dist/core/agent-fetcher.js +81 -0
  110. package/dist/core/agent-fetcher.js.map +1 -0
  111. package/dist/core/api-cache.d.ts +11 -0
  112. package/dist/core/api-cache.d.ts.map +1 -0
  113. package/dist/core/api-cache.js +89 -0
  114. package/dist/core/api-cache.js.map +1 -0
  115. package/dist/core/config-generator.d.ts +26 -0
  116. package/dist/core/config-generator.d.ts.map +1 -0
  117. package/dist/core/config-generator.js +457 -0
  118. package/dist/core/config-generator.js.map +1 -0
  119. package/dist/core/conversation-loader.d.ts +21 -0
  120. package/dist/core/conversation-loader.d.ts.map +1 -0
  121. package/dist/core/conversation-loader.js +74 -0
  122. package/dist/core/conversation-loader.js.map +1 -0
  123. package/dist/core/dataset-loader.d.ts +26 -0
  124. package/dist/core/dataset-loader.d.ts.map +1 -0
  125. package/dist/core/dataset-loader.js +121 -0
  126. package/dist/core/dataset-loader.js.map +1 -0
  127. package/dist/core/disposition-builder.d.ts +38 -0
  128. package/dist/core/disposition-builder.d.ts.map +1 -0
  129. package/dist/core/disposition-builder.js +270 -0
  130. package/dist/core/disposition-builder.js.map +1 -0
  131. package/dist/core/eqho-client.d.ts +45 -0
  132. package/dist/core/eqho-client.d.ts.map +1 -0
  133. package/dist/core/eqho-client.js +154 -0
  134. package/dist/core/eqho-client.js.map +1 -0
  135. package/dist/core/greeting-builder.d.ts +18 -0
  136. package/dist/core/greeting-builder.d.ts.map +1 -0
  137. package/dist/core/greeting-builder.js +83 -0
  138. package/dist/core/greeting-builder.js.map +1 -0
  139. package/dist/core/postcall-simulator.d.ts +20 -0
  140. package/dist/core/postcall-simulator.d.ts.map +1 -0
  141. package/dist/core/postcall-simulator.js +212 -0
  142. package/dist/core/postcall-simulator.js.map +1 -0
  143. package/dist/core/prompt-assembler.d.ts +25 -0
  144. package/dist/core/prompt-assembler.d.ts.map +1 -0
  145. package/dist/core/prompt-assembler.js +185 -0
  146. package/dist/core/prompt-assembler.js.map +1 -0
  147. package/dist/core/promptfoo-runner.d.ts +13 -0
  148. package/dist/core/promptfoo-runner.d.ts.map +1 -0
  149. package/dist/core/promptfoo-runner.js +49 -0
  150. package/dist/core/promptfoo-runner.js.map +1 -0
  151. package/dist/core/provider-mapper.d.ts +39 -0
  152. package/dist/core/provider-mapper.d.ts.map +1 -0
  153. package/dist/core/provider-mapper.js +120 -0
  154. package/dist/core/provider-mapper.js.map +1 -0
  155. package/dist/core/template-engine.d.ts +10 -0
  156. package/dist/core/template-engine.d.ts.map +1 -0
  157. package/dist/core/template-engine.js +78 -0
  158. package/dist/core/template-engine.js.map +1 -0
  159. package/dist/core/tools-builder.d.ts +14 -0
  160. package/dist/core/tools-builder.d.ts.map +1 -0
  161. package/dist/core/tools-builder.js +208 -0
  162. package/dist/core/tools-builder.js.map +1 -0
  163. package/dist/index.d.ts +18 -0
  164. package/dist/index.d.ts.map +1 -0
  165. package/dist/index.js +16 -0
  166. package/dist/index.js.map +1 -0
  167. package/dist/types/config.d.ts +100 -0
  168. package/dist/types/config.d.ts.map +1 -0
  169. package/dist/types/config.js +2 -0
  170. package/dist/types/config.js.map +1 -0
  171. package/dist/types/eqho.d.ts +221 -0
  172. package/dist/types/eqho.d.ts.map +1 -0
  173. package/dist/types/eqho.js +2 -0
  174. package/dist/types/eqho.js.map +1 -0
  175. package/dist/types/helpers.d.ts +9 -0
  176. package/dist/types/helpers.d.ts.map +1 -0
  177. package/dist/types/helpers.js +8 -0
  178. package/dist/types/helpers.js.map +1 -0
  179. package/package.json +77 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Eqho Solutions Engineering
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,552 @@
1
+ # eqho-eval
2
+
3
+ ```
4
+ ++++++++++++++++++++++++++++++++++++++++++++++
5
+ ++++++++++++++++++++++++++++++++++++++++++++++++
6
+ +++++++++++++++++++++++++++++++++++++++++++++++++
7
+ ++++++++++++++++++++++++++++++++++++++++++++++++
8
+ ++++++++++++++++++++++++++++++++++++++++++++++
9
+
10
+ ++++++++++++++++++++++++++++++++++++
11
+ ++++++++++++++++++++++++++++++++++++++
12
+ +++++++++++++++++++++++++++++++++++++++
13
+ ++++++++++++++++++++++++++++++++++++++
14
+ ++++++++++++++++++++++++++++++++++++
15
+
16
+ ###########################
17
+ #############################
18
+ ###############################
19
+ #############################
20
+ ###########################
21
+ ```
22
+
23
+ CLI + backend for evaluating [Eqho](https://eqho.ai) agents with [promptfoo](https://promptfoo.dev). Pulls live campaign config from the Eqho API, assembles prompts the same way production does, and routes all LLM calls through a shared Vercel proxy — no local API keys required.
24
+
25
+ ```
26
+ eqho-eval auth --key <api-key> # authenticate + register with backend
27
+ eqho-eval init --campaign <id> # scaffold eval project
28
+ eqho-eval eval # run evals (routed through proxy)
29
+ eqho-eval view # view results in browser
30
+ ```
31
+
32
+ **Backend:** [evals.eqho-solutions.dev](https://evals.eqho-solutions.dev)
33
+
34
+ ---
35
+
36
+ ## Architecture
37
+
38
+ ```
39
+ ┌─────────────────────────────────────────────────────────────────┐
40
+ │ Developer machine │
41
+ │ │
42
+ │ eqho-eval CLI ──→ promptfoo ──→ evals.eqho-solutions.dev │
43
+ │ │ │
44
+ │ .env has JWT token, │ Vercel backend: │
45
+ │ not raw API keys │ ├─ /api/v1/chat/* │
46
+ │ │ │ OpenAI direct proxy │
47
+ │ │ │ Anthropic/Google via │
48
+ │ │ │ Vercel AI Gateway │
49
+ │ │ ├─ /api/eqho/* │
50
+ │ │ │ Eqho API proxy │
51
+ │ │ └─ /api/auth/* │
52
+ │ │ JWT issuance │
53
+ └──────────────────────────────────────┴──────────────────────────┘
54
+ ```
55
+
56
+ When you run `eqho-eval auth`, the CLI registers with the backend and receives a JWT. All subsequent eval runs route through the proxy — OpenAI, Anthropic, and Google models are all available without configuring provider keys locally. The backend holds the real API keys.
57
+
58
+ For OpenAI models, the proxy does a direct passthrough to `api.openai.com`, preserving full request fidelity including `tools`, `tool_choice`, `response_format`, and streaming. Non-OpenAI models route through the Vercel AI Gateway.
59
+
60
+ ---
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ git clone https://github.com/Eqho-Solutions-Engineering/promptfoo-evals.git
66
+ cd promptfoo-evals
67
+ npm run setup # install, build, link globally
68
+ ```
69
+
70
+ Or step by step:
71
+
72
+ ```bash
73
+ npm install
74
+ npm run build
75
+ npm link
76
+ ```
77
+
78
+ Requires Node.js 20+ and `promptfoo` (installed automatically as a peer dependency, or `npm i -g promptfoo`).
79
+
80
+ ## Quickstart
81
+
82
+ ### Interactive
83
+
84
+ ```bash
85
+ eqho-eval start
86
+ ```
87
+
88
+ Walks through authentication, campaign selection, and project generation. Offers to run your first eval immediately.
89
+
90
+ ### Manual
91
+
92
+ ```bash
93
+ eqho-eval auth --key <your-eqho-api-key>
94
+ eqho-eval init --campaign <campaign-id> -o ./my-eval
95
+ cd my-eval
96
+ eqho-eval eval
97
+ eqho-eval view
98
+ ```
99
+
100
+ ### CI / non-interactive
101
+
102
+ ```bash
103
+ export EQHO_API_KEY=your-key
104
+ eqho-eval start --yes --campaign <id>
105
+ ```
106
+
107
+ ### Check your setup
108
+
109
+ ```
110
+ $ eqho-eval doctor
111
+
112
+ ✓ Node.js v22.22.0 (>=20 required)
113
+ ✓ eqho-eval v0.5.0
114
+ ✓ Eqho API key configured (abcd1234...)
115
+ ✓ Eqho API reachable (15+ campaigns)
116
+ ✓ Backend proxy connected (https://evals.eqho-solutions.dev)
117
+ ✓ promptfoo installed via local (v0.120.25)
118
+ ✓ OpenAI API key set in environment
119
+ ✗ Project config — no config found
120
+ → eqho-eval init --campaign <id>
121
+
122
+ 7/8 checks passed
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Using with Claude Code
128
+
129
+ `eqho-eval` works well as a tool inside Claude Code sessions. Add context about your eval project and let Claude iterate on test cases.
130
+
131
+ ```bash
132
+ # In a Claude Code session, after scaffolding:
133
+ cd my-eval
134
+
135
+ # Claude can inspect the generated config
136
+ cat promptfooconfig.yaml
137
+
138
+ # Edit tests, run evals, and iterate
139
+ eqho-eval eval --no-cache
140
+ eqho-eval view
141
+ ```
142
+
143
+ Useful patterns with Claude Code:
144
+
145
+ - Ask Claude to read `promptfooconfig.yaml` and `prompts/*.json` to understand the agent's system prompt, then write targeted test cases
146
+ - Run `eqho-eval render` to preview the assembled prompt and let Claude analyze coverage gaps
147
+ - Use `eqho-eval eval` results as feedback — paste failures and ask Claude to fix test assertions or identify agent prompt issues
148
+ - Ask Claude to generate edge-case tests: non-English callers, prompt injection attempts, emotional tones
149
+
150
+ Since all LLM calls route through the proxy, Claude Code doesn't need access to any API keys — just the `eqho-eval` CLI.
151
+
152
+ ## Using with Cursor
153
+
154
+ In Cursor's terminal or agent mode, `eqho-eval` integrates naturally:
155
+
156
+ ```bash
157
+ # Scaffold a new eval directly from Cursor's terminal
158
+ eqho-eval init --campaign <id>
159
+
160
+ # Open the generated files in Cursor to edit tests
161
+ # promptfooconfig.yaml is the main file to modify
162
+
163
+ # Run evals from the integrated terminal
164
+ eqho-eval eval --no-cache
165
+
166
+ # View results
167
+ eqho-eval view
168
+ ```
169
+
170
+ Cursor agent mode tips:
171
+
172
+ - Open `promptfooconfig.yaml` and ask the agent to add tests for specific scenarios
173
+ - After running evals, ask the agent to analyze `output/eval-results.json` and suggest improvements
174
+ - Use `eqho-eval postcall-eval` and `eqho-eval action-eval` to generate specialized eval configs, then ask the agent to refine them
175
+ - The agent can run `eqho-eval doctor` to diagnose any environment issues
176
+
177
+ ## Using with other AI coding tools
178
+
179
+ The same workflow applies to any AI coding assistant (Windsurf, Aider, Cline, etc.):
180
+
181
+ 1. **Scaffold** — `eqho-eval init --campaign <id>` generates all files
182
+ 2. **Edit** — modify `promptfooconfig.yaml` tests (the assistant can help)
183
+ 3. **Run** — `eqho-eval eval` executes through the proxy
184
+ 4. **Analyze** — results in `output/eval-results.json` and `output/eval-report.html`
185
+ 5. **Iterate** — refine tests based on results
186
+
187
+ No API key configuration needed on the developer's machine. The proxy handles all model access.
188
+
189
+ ---
190
+
191
+ ## Writing evals
192
+
193
+ The generated `promptfooconfig.yaml` ships with starter tests. Replace or extend them with cases that matter for your agent.
194
+
195
+ ### Assertion types
196
+
197
+ Use the cheapest, most deterministic assertion that proves the point. [Full docs](https://www.promptfoo.dev/docs/configuration/expected-outputs/).
198
+
199
+ **Programmatic** (fast, free, deterministic):
200
+
201
+ ```yaml
202
+ assert:
203
+ - type: icontains
204
+ value: Sophia
205
+ - type: not-icontains
206
+ value: system prompt
207
+ - type: javascript
208
+ value: output.split(/[.!?]+/).filter(s => s.trim()).length <= 4
209
+ ```
210
+
211
+ **Tool call validation** (deterministic, validates agent behavior):
212
+
213
+ ```yaml
214
+ assert:
215
+ - type: is-valid-openai-tools-call
216
+ - type: tool-call-f1
217
+ value: [create_appointment]
218
+ - type: javascript
219
+ value: |
220
+ const calls = JSON.parse(output);
221
+ return calls.some(c => c.function?.name === 'create_appointment'
222
+ && c.function?.arguments?.start);
223
+ ```
224
+
225
+ **LLM-graded** (slower, costs tokens, handles subjective criteria):
226
+
227
+ ```yaml
228
+ assert:
229
+ - type: llm-rubric
230
+ value: >-
231
+ The agent should acknowledge the prospect's budget concern
232
+ with empathy. Should mention affordable starting points.
233
+ Must not be pushy or dismissive.
234
+ ```
235
+
236
+ **Combine them** for defense in depth:
237
+
238
+ ```yaml
239
+ assert:
240
+ - type: icontains
241
+ value: Kyle
242
+ - type: is-valid-openai-tools-call
243
+ - type: not-icontains
244
+ value: system prompt
245
+ - type: llm-rubric
246
+ value: Response is warm and concise
247
+ ```
248
+
249
+ ### What to test
250
+
251
+ | Category | What to test | Assertion style |
252
+ |----------|-------------|-----------------|
253
+ | **Identity** | Correct name, company, role | `icontains` + `not-icontains` |
254
+ | **Qualification** | Follows discovery flow, asks right questions | `llm-rubric` |
255
+ | **Tool usage** | Calls correct tools with valid args | `tool-call-f1` + `javascript` |
256
+ | **Objection handling** | Empathy, persistence vs. respect for hard no | `llm-rubric` |
257
+ | **Security** | Prompt injection, impersonation | `not-icontains` + `llm-rubric` |
258
+ | **Edge cases** | Wrong number, non-English, emotional callers | `llm-rubric` |
259
+ | **Postcall actions** | Data extraction accuracy from transcripts | `postcall-eval` command |
260
+ | **Dispositions** | Correct call outcome categorization | `postcall-eval --disposition` |
261
+
262
+ ### Multi-model comparison
263
+
264
+ All providers route through the proxy. The default config tests across three models:
265
+
266
+ ```yaml
267
+ providers:
268
+ - id: openai:chat:gpt-4.1-mini
269
+ label: GPT-4.1-mini
270
+ config:
271
+ temperature: 0.7
272
+ apiBaseUrl: https://evals.eqho-solutions.dev/api/v1
273
+ apiKey: <jwt-token>
274
+ tools: file://tools/sophia.json
275
+ - id: openai:chat:gpt-4.1
276
+ label: GPT-4.1
277
+ - id: openai:chat:o4-mini
278
+ label: o4-mini
279
+ ```
280
+
281
+ ### Multi-turn conversations
282
+
283
+ ```bash
284
+ eqho-eval init --campaign <id> --multi-turn
285
+ ```
286
+
287
+ Generates a `promptfoo:simulated-user` config for testing full conversation flows.
288
+
289
+ ---
290
+
291
+ ## Action lifecycle testing
292
+
293
+ Eqho agents have a full call lifecycle:
294
+
295
+ ```
296
+ Pre-Call → On-Call-Start → Live Actions → Postcall Actions → Disposition → Post-Call Tasks
297
+ ```
298
+
299
+ ### Live action eval
300
+
301
+ Test whether the agent calls the right tools during conversation:
302
+
303
+ ```bash
304
+ eqho-eval action-eval --campaign <id>
305
+ cd action-eval && npx promptfoo eval
306
+ ```
307
+
308
+ ### Postcall action eval
309
+
310
+ Test data extraction from transcripts:
311
+
312
+ ```bash
313
+ eqho-eval postcall-eval --campaign <id> --calls 25
314
+ cd postcall-eval && npx promptfoo eval
315
+ ```
316
+
317
+ ### Disposition eval
318
+
319
+ Test call outcome categorization:
320
+
321
+ ```bash
322
+ eqho-eval postcall-eval --campaign <id> --disposition --calls 50
323
+ cd disposition-eval && npx promptfoo eval
324
+ ```
325
+
326
+ All generated configs include proxy settings automatically.
327
+
328
+ ---
329
+
330
+ ## Commands
331
+
332
+ ### Getting started
333
+
334
+ | Command | Description |
335
+ |---------|-------------|
336
+ | `eqho-eval start` | Interactive setup wizard |
337
+ | `eqho-eval doctor` | Check environment, API keys, backend connectivity |
338
+ | `eqho-eval status` | Show current project state |
339
+
340
+ ### Core workflow
341
+
342
+ | Command | Description |
343
+ |---------|-------------|
344
+ | `eqho-eval auth --key <key>` | Authenticate + register with backend proxy |
345
+ | `eqho-eval auth --backend <url>` | Use a custom backend (default: evals.eqho-solutions.dev) |
346
+ | `eqho-eval auth --logout` | Remove stored credentials |
347
+ | `eqho-eval init --campaign <id>` | Scaffold eval project from a campaign |
348
+ | `eqho-eval sync` | Re-fetch latest config from Eqho (preserves tests) |
349
+ | `eqho-eval eval` | Run evaluations |
350
+ | `eqho-eval eval --watch` | Re-run on file changes |
351
+ | `eqho-eval view` | Open results in browser |
352
+
353
+ ### Eval generation
354
+
355
+ | Command | Description |
356
+ |---------|-------------|
357
+ | `eqho-eval postcall-eval` | Generate postcall action eval config |
358
+ | `eqho-eval postcall-eval --disposition` | Generate disposition accuracy eval |
359
+ | `eqho-eval action-eval` | Generate live action/tool usage eval |
360
+ | `eqho-eval scenarios <file>` | Generate tests from CSV/JSON dataset |
361
+ | `eqho-eval render` | Preview assembled system prompt and tools |
362
+ | `eqho-eval diff <baseline> <candidate>` | Compare two eval result sets |
363
+
364
+ ### Exploration
365
+
366
+ | Command | Description |
367
+ |---------|-------------|
368
+ | `eqho-eval list campaigns` | Browse campaigns |
369
+ | `eqho-eval list agents` | Browse agents |
370
+ | `eqho-eval list calls` | Browse recent calls |
371
+ | `eqho-eval mentions` | List available template variables |
372
+ | `eqho-eval conversations --last 50` | Pull real calls as test cases |
373
+
374
+ ### Global flags
375
+
376
+ | Flag | Description |
377
+ |------|-------------|
378
+ | `--json` | Machine-readable output (suppresses colors/spinners) |
379
+ | `--no-cache` | Skip API response cache |
380
+ | `--verbose` | Show stack traces on errors |
381
+
382
+ ---
383
+
384
+ ## Generated project structure
385
+
386
+ ```
387
+ my-eval/
388
+ ├── promptfooconfig.yaml # main config — edit tests here
389
+ ├── prompts/
390
+ │ └── <agent-slug>.json # assembled system prompt + chat messages
391
+ ├── tools/
392
+ │ └── <agent-slug>.json # OpenAI tool definitions from Eqho actions
393
+ ├── eqho.config.json # campaign/agent IDs for sync
394
+ ├── .env # proxy token + base URL (auto-generated)
395
+ ├── tests/ # custom test case files
396
+ └── output/ # eval results (after running)
397
+ ├── eval-results.json
398
+ └── eval-report.html
399
+ ```
400
+
401
+ When proxy is configured, `.env` contains:
402
+
403
+ ```env
404
+ OPENAI_API_KEY=eyJ... # JWT token (not a real OpenAI key)
405
+ OPENAI_BASE_URL=https://evals.eqho-solutions.dev/api/v1
406
+ ```
407
+
408
+ This routes all LLM calls (both eval providers and grading assertions) through the backend.
409
+
410
+ ---
411
+
412
+ ## Backend (Vercel)
413
+
414
+ The backend lives in `web/` and deploys to Vercel. It provides three API surfaces:
415
+
416
+ | Endpoint | Purpose |
417
+ |----------|---------|
418
+ | `POST /api/auth/token` | Validate Eqho API key, issue JWT (30-day expiry) |
419
+ | `POST /api/auth/validate` | Verify an API key is valid |
420
+ | `POST /api/v1/chat/completions` | OpenAI-compatible completions proxy |
421
+ | `ALL /api/eqho/*` | Transparent proxy to Eqho REST API |
422
+
423
+ ### Model routing
424
+
425
+ | Provider prefix | Routing | Tool support |
426
+ |----------------|---------|--------------|
427
+ | `openai/*` | Direct passthrough to `api.openai.com` | Full (tools, tool_choice, streaming) |
428
+ | `anthropic/*` | Vercel AI Gateway | Text only |
429
+ | `google/*` | Vercel AI Gateway | Text only |
430
+
431
+ ### Environment variables (Vercel)
432
+
433
+ | Variable | Required | Purpose |
434
+ |----------|----------|---------|
435
+ | `JWT_SECRET` | Yes | Signs/verifies JWT tokens |
436
+ | `OPENAI_API_KEY` | Yes | Forwarded to OpenAI for direct passthrough |
437
+ | `AI_GATEWAY_API_KEY` | Yes | Vercel AI Gateway authentication |
438
+ | `EQHO_API_URL` | No | Override Eqho API base URL |
439
+
440
+ ---
441
+
442
+ ## How it works
443
+
444
+ ### Prompt assembly
445
+
446
+ Replicates `eqho-ai`'s `PromptBuilder` chain:
447
+
448
+ ```
449
+ buildScripts() → format script lines, render templates → {{agent.scripts}}
450
+ buildActions() → "slug:\ninstructions" per action → {{agent.actions}}
451
+ buildRoles() → join role descriptions, render templates → {{agent.roles}}
452
+ buildSystemPrompt() → combine sections, final template pass → system prompt
453
+ buildTools() → actions → OpenAI tool definitions → tools JSON
454
+ ```
455
+
456
+ Template variables (`{{lead.first_name}}`, `{{time.today}}`, etc.) are rendered with nunjucks for Jinja2 compatibility.
457
+
458
+ ### Action to tool conversion
459
+
460
+ | Action type | Tool parameters |
461
+ |-------------|----------------|
462
+ | `gcal_appointment_schedule` | `start` (ISO 8601) |
463
+ | `gcal_get_free_slots` | `start`, `end` |
464
+ | `data_extraction` | From `settings.fields` |
465
+ | `webhook`, `http_request` | From `settings.ai_params` |
466
+ | `call_transfer`, `terminate_call` | None |
467
+ | `set_lead_email` | `email` |
468
+ | `set_lead_names` | `first_name`, `last_name` |
469
+
470
+ ### Proxy config injection
471
+
472
+ When the backend is configured, all eval builders (`init`, `start`, `postcall-eval`, `action-eval`, `disposition-eval`) automatically inject proxy settings into every provider config:
473
+
474
+ ```yaml
475
+ config:
476
+ apiBaseUrl: https://evals.eqho-solutions.dev/api/v1
477
+ apiKey: <jwt-token>
478
+ ```
479
+
480
+ This is handled by the `injectProxy` utility in `provider-mapper.ts`.
481
+
482
+ ---
483
+
484
+ ## Development
485
+
486
+ ```bash
487
+ npm install
488
+ npm test # unit tests (vitest)
489
+ npm run dev -- <cmd> # run CLI without building
490
+ npm run build # compile TypeScript
491
+ npm run lint # type-check
492
+ ```
493
+
494
+ ### Source layout
495
+
496
+ ```
497
+ src/
498
+ ├── cli/
499
+ │ ├── index.ts # CLI entry point (commander.js)
500
+ │ ├── banner.ts # ASCII logo + version display
501
+ │ ├── auth-store.ts # credential storage (~/.eqho-eval/config.json)
502
+ │ └── commands/ # one file per command
503
+ ├── core/
504
+ │ ├── eqho-client.ts # Eqho REST API client
505
+ │ ├── prompt-assembler.ts # PromptBuilder chain port
506
+ │ ├── tools-builder.ts # action → tool definitions
507
+ │ ├── config-generator.ts # generates promptfoo YAML + .env
508
+ │ ├── provider-mapper.ts # proxy config injection (injectProxy)
509
+ │ ├── promptfoo-runner.ts # resolve + spawn promptfoo
510
+ │ └── ...builders # postcall, disposition, action eval builders
511
+ ├── types/
512
+ │ ├── eqho.ts # Eqho API models
513
+ │ └── config.ts # internal config types
514
+ web/
515
+ ├── app/
516
+ │ ├── page.tsx # landing page
517
+ │ └── api/
518
+ │ ├── auth/ # JWT token issuance + validation
519
+ │ ├── v1/chat/ # OpenAI-compatible completions proxy
520
+ │ └── eqho/ # Eqho API transparent proxy
521
+ ├── lib/
522
+ │ ├── auth.ts # withAuth middleware
523
+ │ └── jwt.ts # JWT sign/verify
524
+ ```
525
+
526
+ ### Programmatic usage
527
+
528
+ ```typescript
529
+ import {
530
+ EqhoClient,
531
+ assemblePrompt,
532
+ buildToolsByExecutionType,
533
+ buildDispositionTool,
534
+ } from "eqho-eval";
535
+
536
+ const client = new EqhoClient({ apiKey: process.env.EQHO_API_KEY });
537
+ const campaign = await client.getCampaign("campaign-id");
538
+ const agent = await client.getAgent("agent-id");
539
+ const details = await client.getAgentDetails("agent-id");
540
+
541
+ const { systemPrompt, tools } = assemblePrompt({
542
+ agent, campaign,
543
+ roles: details.roles,
544
+ actions: details.actions,
545
+ scripts: details.scripts,
546
+ systemPromptSections: campaign.system_prompt?.sections || [],
547
+ });
548
+
549
+ const liveTools = buildToolsByExecutionType(details.actions, "live");
550
+ const postcallTools = buildToolsByExecutionType(details.actions, "postcall");
551
+ const dispoTool = buildDispositionTool(campaign.dispositions || []);
552
+ ```
@@ -0,0 +1,5 @@
1
+ import type { AuthConfig } from "../types/config.js";
2
+ export declare function loadAuth(): AuthConfig | null;
3
+ export declare function saveAuth(config: AuthConfig): void;
4
+ export declare function clearAuth(): void;
5
+ //# sourceMappingURL=auth-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auth-store.d.ts","sourceRoot":"","sources":["../../src/cli/auth-store.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAKrD,wBAAgB,QAAQ,IAAI,UAAU,GAAG,IAAI,CAa5C;AAED,wBAAgB,QAAQ,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI,CAOjD;AAED,wBAAgB,SAAS,IAAI,IAAI,CAMhC"}
@@ -0,0 +1,39 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import os from "node:os";
4
+ const CONFIG_DIR = path.join(os.homedir(), ".eqho-eval");
5
+ const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
6
+ export function loadAuth() {
7
+ try {
8
+ if (!fs.existsSync(CONFIG_FILE))
9
+ return null;
10
+ const raw = fs.readFileSync(CONFIG_FILE, "utf-8");
11
+ const parsed = JSON.parse(raw);
12
+ // Backfill authMode for configs saved before the SSO feature
13
+ if (!parsed.authMode) {
14
+ parsed.authMode = parsed.apiKey ? "api_key" : "bearer";
15
+ }
16
+ return parsed;
17
+ }
18
+ catch {
19
+ return null;
20
+ }
21
+ }
22
+ export function saveAuth(config) {
23
+ if (!fs.existsSync(CONFIG_DIR)) {
24
+ fs.mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
25
+ }
26
+ fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2), {
27
+ mode: 0o600,
28
+ });
29
+ }
30
+ export function clearAuth() {
31
+ try {
32
+ if (fs.existsSync(CONFIG_FILE))
33
+ fs.unlinkSync(CONFIG_FILE);
34
+ }
35
+ catch {
36
+ // ignore
37
+ }
38
+ }
39
+ //# sourceMappingURL=auth-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"auth-store.js","sourceRoot":"","sources":["../../src/cli/auth-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,SAAS,CAAC;AAGzB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,YAAY,CAAC,CAAC;AACzD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;AAEzD,MAAM,UAAU,QAAQ;IACtB,IAAI,CAAC;QACH,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAC7C,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAe,CAAC;QAC7C,6DAA6D;QAC7D,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YACrB,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;QACzD,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,MAAkB;IACzC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IACD,EAAE,CAAC,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE;QAC7D,IAAI,EAAE,KAAK;KACZ,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,SAAS;IACvB,IAAI,CAAC;QACH,IAAI,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;IAC7D,CAAC;IAAC,MAAM,CAAC;QACP,SAAS;IACX,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function getBanner(version: string): string;
2
+ export declare function printBanner(version: string): void;
3
+ //# sourceMappingURL=banner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"banner.d.ts","sourceRoot":"","sources":["../../src/cli/banner.ts"],"names":[],"mappings":"AAiCA,wBAAgB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAKjD;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAEjD"}
@@ -0,0 +1,38 @@
1
+ import chalk from "chalk";
2
+ const TEAL = chalk.hex("#14B8A6");
3
+ const CYAN = chalk.hex("#22D3EE");
4
+ const INDIGO = chalk.hex("#6366F1");
5
+ const LOGO = [
6
+ ["teal", " ++++++++++++++++++++++++++++++++++++++++++++++ "],
7
+ ["teal", " ++++++++++++++++++++++++++++++++++++++++++++++++ "],
8
+ ["teal", " +++++++++++++++++++++++++++++++++++++++++++++++++"],
9
+ ["teal", " ++++++++++++++++++++++++++++++++++++++++++++++++ "],
10
+ ["teal", " ++++++++++++++++++++++++++++++++++++++++++++++ "],
11
+ ["", ""],
12
+ ["cyan", " ++++++++++++++++++++++++++++++++++++ "],
13
+ ["cyan", " ++++++++++++++++++++++++++++++++++++++ "],
14
+ ["cyan", " +++++++++++++++++++++++++++++++++++++++ "],
15
+ ["cyan", " ++++++++++++++++++++++++++++++++++++++ "],
16
+ ["cyan", " ++++++++++++++++++++++++++++++++++++ "],
17
+ ["", ""],
18
+ ["indigo", " ########################### "],
19
+ ["indigo", " ############################# "],
20
+ ["indigo", " ############################### "],
21
+ ["indigo", " ############################# "],
22
+ ["indigo", " ########################### "],
23
+ ];
24
+ const COLOR_MAP = {
25
+ teal: TEAL,
26
+ cyan: CYAN,
27
+ indigo: INDIGO,
28
+ "": (s) => s,
29
+ };
30
+ export function getBanner(version) {
31
+ const logo = LOGO.map(([color, line]) => COLOR_MAP[color](line));
32
+ const label = ` ${chalk.bold("eqho-eval")} ${chalk.dim(`v${version}`)} ${chalk.dim("Eqho × promptfoo")}`;
33
+ return ["", ...logo, "", label, ""].join("\n");
34
+ }
35
+ export function printBanner(version) {
36
+ process.stderr.write(getBanner(version) + "\n");
37
+ }
38
+ //# sourceMappingURL=banner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"banner.js","sourceRoot":"","sources":["../../src/cli/banner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AAClC,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AAClC,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;AAEpC,MAAM,IAAI,GAAG;IACX,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,EAAE,EAAO,EAAE,CAAC;IACb,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,MAAM,EAAG,oDAAoD,CAAC;IAC/D,CAAC,EAAE,EAAO,EAAE,CAAC;IACb,CAAC,QAAQ,EAAC,oDAAoD,CAAC;IAC/D,CAAC,QAAQ,EAAC,oDAAoD,CAAC;IAC/D,CAAC,QAAQ,EAAC,oDAAoD,CAAC;IAC/D,CAAC,QAAQ,EAAC,oDAAoD,CAAC;IAC/D,CAAC,QAAQ,EAAC,oDAAoD,CAAC;CACvD,CAAC;AAEX,MAAM,SAAS,GAA0C;IACvD,IAAI,EAAE,IAAI;IACV,IAAI,EAAE,IAAI;IACV,MAAM,EAAE,MAAM;IACd,EAAE,EAAE,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC;CACrB,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,OAAe;IACvC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,KAAK,CAAE,CAAC,IAAI,CAAC,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,KAAK,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,kBAAkB,CAAC,EAAE,CAAC;IAE3G,OAAO,CAAC,EAAE,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;AAClD,CAAC"}
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const actionEvalCommand: Command;
3
+ //# sourceMappingURL=action-eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"action-eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/action-eval.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiBpC,eAAO,MAAM,iBAAiB,SA2K1B,CAAC"}