vent-hq 0.12.1 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +117 -0
- package/dist/index.mjs +16 -135
- package/dist/{package-QLGBTYZS.mjs → package-GODDS4TH.mjs} +1 -1
- package/package.json +10 -11
package/LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
package/README.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# vent-hq
|
|
2
|
+
|
|
3
|
+
**Agent CLI for voice AI development.** Lets coding agents (Claude Code, Cursor, Codex, Windsurf) place real calls against your voice agent and read back transcripts, latency, audio, tool calls, and 60+ computed metrics — so they can iterate on prompts, flows, and platform config based on what actually happened.
|
|
4
|
+
|
|
5
|
+
Works with **Vapi, Retell, LiveKit, ElevenLabs, Bland, and custom WebSocket endpoints**.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npx vent-hq@latest init
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## How it works
|
|
12
|
+
|
|
13
|
+
1. Your coding agent writes a Vent caller config (`.vent/suite.json`) with a persona and a call goal.
|
|
14
|
+
2. It runs `vent-hq run -f .vent/suite.json`. Vent joins the call as a voice caller, drives the conversation, and records everything.
|
|
15
|
+
3. Vent returns a single JSON result: full transcript, recorded audio URL, per-turn latency, tool-call trace, and call metadata. Your agent reads it and decides what to change.
|
|
16
|
+
|
|
17
|
+
The agent is the brain. Vent is the instrument.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npx vent-hq@latest init
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
`init` will:
|
|
26
|
+
- Authenticate via GitHub (if you have `gh` installed) or open a browser for device-code auth
|
|
27
|
+
- Install skill files for Claude Code (`.claude/skills/vent/SKILL.md`), Cursor (`.cursor/rules/vent.mdc`), Codex (`AGENTS.md`), and Windsurf (`.windsurf/skills/vent/SKILL.md`)
|
|
28
|
+
- Scaffold a starter suite at `.vent/suite.json`
|
|
29
|
+
|
|
30
|
+
After `init`, your coding agent reads the skill file and takes over from there.
|
|
31
|
+
|
|
32
|
+
## Example suite
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"connection": {
|
|
37
|
+
"adapter": "vapi",
|
|
38
|
+
"vapi_assistant_id": "asst_..."
|
|
39
|
+
},
|
|
40
|
+
"calls": {
|
|
41
|
+
"happy-path": {
|
|
42
|
+
"caller_prompt": "You're a customer wanting to book a haircut for Friday at 3pm. Be friendly but a little vague about the date at first.",
|
|
43
|
+
"max_turns": 8
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Set `VAPI_API_KEY` in `.env` and run:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
npx vent-hq run -f .vent/suite.json
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Swap `adapter` for `retell`, `livekit`, `elevenlabs`, `bland`, or `websocket` to target a different platform. See [the docs](https://docs.vent.dev) for the per-platform connection block.
|
|
56
|
+
|
|
57
|
+
## Commands
|
|
58
|
+
|
|
59
|
+
| Command | Purpose |
|
|
60
|
+
|-------------------------------------|------------------------------------------------------------|
|
|
61
|
+
| `vent-hq init` | One-time setup: auth, skill files, starter suite |
|
|
62
|
+
| `vent-hq run -f <suite.json>` | Run a call (or all calls) from a suite, stream results |
|
|
63
|
+
| `vent-hq run -f <s> --call <name>` | Run a single named call |
|
|
64
|
+
| `vent-hq stop <run-id>` | Cancel a queued or running call |
|
|
65
|
+
| `vent-hq agent start -f <s>` | Keep a relay session open for a local WebSocket agent |
|
|
66
|
+
| `vent-hq login` / `logout` | Manage credentials |
|
|
67
|
+
|
|
68
|
+
Run `vent-hq <command> --help` for command-specific options.
|
|
69
|
+
|
|
70
|
+
## What you get back
|
|
71
|
+
|
|
72
|
+
Every `run` returns a single JSON object on stdout. Shape:
|
|
73
|
+
|
|
74
|
+
```json
|
|
75
|
+
{
|
|
76
|
+
"run_id": "01J...",
|
|
77
|
+
"status": "complete",
|
|
78
|
+
"calls": [
|
|
79
|
+
{
|
|
80
|
+
"name": "happy-path",
|
|
81
|
+
"status": "complete",
|
|
82
|
+
"duration_ms": 42180,
|
|
83
|
+
"latency": { "p50_ms": 612, "p95_ms": 1180, "time_to_first_audio_ms": 540 },
|
|
84
|
+
"transcript": [
|
|
85
|
+
{ "role": "agent", "text": "Hi, this is Acme Salon, how can I help?" },
|
|
86
|
+
{ "role": "caller", "text": "Hey, I'd like to book a haircut for Friday." }
|
|
87
|
+
],
|
|
88
|
+
"tool_calls": [{ "name": "check_availability", "args": {...}, "result": {...} }],
|
|
89
|
+
"recording_url": "https://...",
|
|
90
|
+
"call_metadata": { ... }
|
|
91
|
+
}
|
|
92
|
+
]
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Verbose fields are gated behind `--verbose` to keep agent context lean.
|
|
97
|
+
|
|
98
|
+
## Platform notes
|
|
99
|
+
|
|
100
|
+
- **Vapi, Retell, ElevenLabs** — hosted only. Set the API key + assistant/agent ID in `.env`.
|
|
101
|
+
- **LiveKit** — works against local dev agents and LiveKit Cloud with the same config (different `LIVEKIT_URL`). Install [`@vent-hq/livekit`](https://www.npmjs.com/package/@vent-hq/livekit) (Node) or [`vent-livekit`](https://pypi.org/project/vent-livekit/) (Python) for tool-call and component-latency observability.
|
|
102
|
+
- **Bland** — supports pathways (`bland_pathway_id`), personas (`persona_id`), or inline `task` prompts.
|
|
103
|
+
- **Custom (WebSocket)** — point at a hosted endpoint with `agent_url`, or run a local agent with `start_command` + `agent_port`. Vent tunnels audio through a relay so your machine doesn't need a public IP.
|
|
104
|
+
|
|
105
|
+
Platform credentials are encrypted at rest (AES-256-GCM) and never appear in chat logs or run payloads.
|
|
106
|
+
|
|
107
|
+
## Links
|
|
108
|
+
|
|
109
|
+
- [Website](https://venthq.dev)
|
|
110
|
+
- [Documentation](https://docs.vent.dev)
|
|
111
|
+
- [Source on GitHub](https://github.com/vent-hq/vent)
|
|
112
|
+
- [Changelog](https://github.com/vent-hq/vent/blob/main/packages/cli/CHANGELOG.md)
|
|
113
|
+
- [@vent_hq on X](https://x.com/vent_hq)
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT
|
package/dist/index.mjs
CHANGED
|
@@ -83,14 +83,6 @@ async function ensurePlatformConnection(apiKey, platform) {
|
|
|
83
83
|
// src/lib/output.ts
|
|
84
84
|
import { writeFileSync } from "node:fs";
|
|
85
85
|
|
|
86
|
-
// ../shared/src/types.ts
|
|
87
|
-
var AUDIO_ACTION_TYPES = [
|
|
88
|
-
"interrupt",
|
|
89
|
-
"inject_noise",
|
|
90
|
-
"split_sentence",
|
|
91
|
-
"noise_on_caller"
|
|
92
|
-
];
|
|
93
|
-
|
|
94
86
|
// ../shared/src/constants.ts
|
|
95
87
|
var RUNNER_CALLBACK_MAX_SKEW_MS = 5 * 6e4;
|
|
96
88
|
|
|
@@ -4136,32 +4128,12 @@ var coerce = {
|
|
|
4136
4128
|
var NEVER = INVALID;
|
|
4137
4129
|
|
|
4138
4130
|
// ../shared/src/schemas.ts
|
|
4139
|
-
var AudioActionSchema = external_exports.object({
|
|
4140
|
-
at_turn: external_exports.number().int().min(0),
|
|
4141
|
-
action: external_exports.enum(AUDIO_ACTION_TYPES),
|
|
4142
|
-
prompt: external_exports.string().optional(),
|
|
4143
|
-
duration_ms: external_exports.number().int().min(1e3).max(3e4).optional(),
|
|
4144
|
-
noise_type: external_exports.enum(["babble", "white", "pink"]).optional(),
|
|
4145
|
-
snr_db: external_exports.number().min(0).max(40).optional(),
|
|
4146
|
-
split: external_exports.object({
|
|
4147
|
-
part_a: external_exports.string().min(1),
|
|
4148
|
-
part_b: external_exports.string().min(1),
|
|
4149
|
-
pause_ms: external_exports.number().int().min(500).max(5e3)
|
|
4150
|
-
}).optional()
|
|
4151
|
-
});
|
|
4152
|
-
var AudioActionResultSchema = external_exports.object({
|
|
4153
|
-
at_turn: external_exports.number().int().min(0),
|
|
4154
|
-
action: external_exports.string(),
|
|
4155
|
-
metrics: external_exports.record(external_exports.union([external_exports.number(), external_exports.boolean()])),
|
|
4156
|
-
transcriptions: external_exports.record(external_exports.union([external_exports.string(), external_exports.null()])).optional()
|
|
4157
|
-
});
|
|
4158
4131
|
var CallerPersonaSchema = external_exports.object({
|
|
4159
4132
|
pace: external_exports.enum(["slow", "normal", "fast"]).optional(),
|
|
4160
4133
|
clarity: external_exports.enum(["clear", "vague", "rambling"]).optional(),
|
|
4161
4134
|
disfluencies: external_exports.boolean().optional(),
|
|
4162
4135
|
cooperation: external_exports.enum(["cooperative", "reluctant", "hostile"]).optional(),
|
|
4163
4136
|
emotion: external_exports.enum(["neutral", "cheerful", "confused", "frustrated", "skeptical", "rushed"]).optional(),
|
|
4164
|
-
interruption_style: external_exports.enum(["low", "high"]).optional(),
|
|
4165
4137
|
memory: external_exports.enum(["reliable", "unreliable"]).optional(),
|
|
4166
4138
|
intent_clarity: external_exports.enum(["clear", "indirect", "vague"]).optional(),
|
|
4167
4139
|
confirmation_style: external_exports.enum(["explicit", "vague"]).optional()
|
|
@@ -4185,8 +4157,6 @@ var ConversationCallSpecSchema = external_exports.object({
|
|
|
4185
4157
|
max_turns: external_exports.number().int().min(1).max(50).default(6),
|
|
4186
4158
|
silence_threshold_ms: external_exports.number().int().min(200).max(1e4).optional(),
|
|
4187
4159
|
persona: CallerPersonaSchema,
|
|
4188
|
-
audio_actions: external_exports.array(AudioActionSchema).optional(),
|
|
4189
|
-
prosody: external_exports.boolean().optional(),
|
|
4190
4160
|
caller_audio: CallerAudioEffectsSchema.optional(),
|
|
4191
4161
|
/** ISO 639-1 language code for multilingual calls (e.g., "es", "fr", "de"). Caller speaks this language, STT transcribes it, judge evaluates in it. */
|
|
4192
4162
|
language: external_exports.string().min(2).max(5).optional(),
|
|
@@ -4253,7 +4223,7 @@ var LiveKitPlatformSchema = BasePlatformSchema.extend({
|
|
|
4253
4223
|
livekit_api_secret: external_exports.string().optional(),
|
|
4254
4224
|
livekit_url: external_exports.string().optional(),
|
|
4255
4225
|
livekit_agent_name: external_exports.string().optional()
|
|
4256
|
-
});
|
|
4226
|
+
}).strict();
|
|
4257
4227
|
var VapiPlatformSchema = BasePlatformSchema.extend({
|
|
4258
4228
|
provider: external_exports.literal("vapi"),
|
|
4259
4229
|
vapi_api_key: external_exports.string().optional(),
|
|
@@ -4327,25 +4297,6 @@ var LatencyMetricsSchema = external_exports.object({
|
|
|
4327
4297
|
mouth_to_ear_est_ms: external_exports.number().optional(),
|
|
4328
4298
|
drift_slope_ms_per_turn: external_exports.number().optional()
|
|
4329
4299
|
});
|
|
4330
|
-
var TurnEmotionProfileSchema = external_exports.object({
|
|
4331
|
-
turn_index: external_exports.number().int().min(0),
|
|
4332
|
-
emotions: external_exports.record(external_exports.string(), external_exports.number()),
|
|
4333
|
-
calmness: external_exports.number(),
|
|
4334
|
-
confidence: external_exports.number(),
|
|
4335
|
-
frustration: external_exports.number(),
|
|
4336
|
-
warmth: external_exports.number(),
|
|
4337
|
-
uncertainty: external_exports.number()
|
|
4338
|
-
});
|
|
4339
|
-
var ProsodyMetricsSchema = external_exports.object({
|
|
4340
|
-
per_turn: external_exports.array(TurnEmotionProfileSchema),
|
|
4341
|
-
mean_calmness: external_exports.number(),
|
|
4342
|
-
mean_confidence: external_exports.number(),
|
|
4343
|
-
peak_frustration: external_exports.number(),
|
|
4344
|
-
emotion_consistency: external_exports.number(),
|
|
4345
|
-
naturalness: external_exports.number(),
|
|
4346
|
-
emotion_trajectory: external_exports.enum(["stable", "improving", "degrading", "volatile"]),
|
|
4347
|
-
hume_latency_ms: external_exports.number()
|
|
4348
|
-
});
|
|
4349
4300
|
var HarnessOverheadSchema = external_exports.object({
|
|
4350
4301
|
tts_per_turn_ms: external_exports.array(external_exports.number()),
|
|
4351
4302
|
stt_per_turn_ms: external_exports.array(external_exports.number()),
|
|
@@ -4427,7 +4378,6 @@ var ConversationMetricsSchema = external_exports.object({
|
|
|
4427
4378
|
latency: LatencyMetricsSchema.optional(),
|
|
4428
4379
|
tool_calls: ToolCallMetricsSchema.optional(),
|
|
4429
4380
|
signal_quality: SignalQualityMetricsSchema.optional(),
|
|
4430
|
-
prosody: ProsodyMetricsSchema.optional(),
|
|
4431
4381
|
harness_overhead: HarnessOverheadSchema.optional(),
|
|
4432
4382
|
component_latency: ComponentLatencyMetricsSchema.optional()
|
|
4433
4383
|
});
|
|
@@ -4437,7 +4387,6 @@ var ConversationCallResultSchema = external_exports.object({
|
|
|
4437
4387
|
status: external_exports.enum(["completed", "error"]),
|
|
4438
4388
|
transcript: external_exports.array(ConversationTurnSchema),
|
|
4439
4389
|
observed_tool_calls: external_exports.array(ObservedToolCallSchema).optional(),
|
|
4440
|
-
audio_action_results: external_exports.array(AudioActionResultSchema).optional(),
|
|
4441
4390
|
duration_ms: external_exports.number(),
|
|
4442
4391
|
metrics: ConversationMetricsSchema,
|
|
4443
4392
|
error: external_exports.string().optional(),
|
|
@@ -4487,8 +4436,6 @@ function formatConversationResult(raw, options = {}) {
|
|
|
4487
4436
|
const warnings = dedupeStrings([
|
|
4488
4437
|
...formatProviderWarningMessages(r.call_metadata?.provider_warnings)
|
|
4489
4438
|
]);
|
|
4490
|
-
const audioActions = r.audio_action_results ?? [];
|
|
4491
|
-
const emotion = r.metrics?.prosody ? formatEmotion(r.metrics.prosody) : null;
|
|
4492
4439
|
const result = {
|
|
4493
4440
|
name: r.name ?? null,
|
|
4494
4441
|
status: r.status,
|
|
@@ -4501,8 +4448,6 @@ function formatConversationResult(raw, options = {}) {
|
|
|
4501
4448
|
call_metadata: formatCallMetadata(r.call_metadata, verbose)
|
|
4502
4449
|
};
|
|
4503
4450
|
if (warnings.length > 0) result.warnings = warnings;
|
|
4504
|
-
if (audioActions.length > 0) result.audio_actions = audioActions;
|
|
4505
|
-
if (emotion) result.emotion = emotion;
|
|
4506
4451
|
if (verbose) result.caller_prompt = r.caller_prompt;
|
|
4507
4452
|
if (debug2) result.debug = debug2;
|
|
4508
4453
|
return result;
|
|
@@ -4609,15 +4554,6 @@ function stripExecutionMessage(args) {
|
|
|
4609
4554
|
const { execution_message: _drop, ...rest } = args;
|
|
4610
4555
|
return rest;
|
|
4611
4556
|
}
|
|
4612
|
-
function formatEmotion(prosody) {
|
|
4613
|
-
return {
|
|
4614
|
-
naturalness: prosody.naturalness,
|
|
4615
|
-
mean_calmness: prosody.mean_calmness,
|
|
4616
|
-
mean_confidence: prosody.mean_confidence,
|
|
4617
|
-
peak_frustration: prosody.peak_frustration,
|
|
4618
|
-
emotion_trajectory: prosody.emotion_trajectory
|
|
4619
|
-
};
|
|
4620
|
-
}
|
|
4621
4557
|
function formatComponentLatency(cl, verbose) {
|
|
4622
4558
|
if (!cl) return null;
|
|
4623
4559
|
const result = {
|
|
@@ -4680,7 +4616,6 @@ function formatDebug(result) {
|
|
|
4680
4616
|
const debug2 = compactUnknownRecord({
|
|
4681
4617
|
signal_quality: result.metrics?.signal_quality,
|
|
4682
4618
|
harness_overhead: result.metrics?.harness_overhead,
|
|
4683
|
-
prosody: result.metrics?.prosody,
|
|
4684
4619
|
provider_warnings: nonEmptyArray(result.call_metadata?.provider_warnings),
|
|
4685
4620
|
component_latency_per_turn: nonEmptyArray(result.metrics?.component_latency?.per_turn),
|
|
4686
4621
|
observed_tool_calls: formatDebugToolCalls(result.observed_tool_calls),
|
|
@@ -4742,9 +4677,6 @@ function stdoutSync(data) {
|
|
|
4742
4677
|
}
|
|
4743
4678
|
}
|
|
4744
4679
|
}
|
|
4745
|
-
function writeJsonStdout(value) {
|
|
4746
|
-
stdoutSync(JSON.stringify(value, null, 2) + "\n");
|
|
4747
|
-
}
|
|
4748
4680
|
var bold = (s) => isTTY ? `\x1B[1m${s}\x1B[0m` : s;
|
|
4749
4681
|
var dim = (s) => isTTY ? `\x1B[2m${s}\x1B[0m` : s;
|
|
4750
4682
|
var green = (s) => isTTY ? `\x1B[32m${s}\x1B[0m` : s;
|
|
@@ -4851,7 +4783,7 @@ function printSummary(callResults, runComplete, runId, options = {}) {
|
|
|
4851
4783
|
stdoutSync(" " + parts.join(" ") + "\n");
|
|
4852
4784
|
}
|
|
4853
4785
|
}
|
|
4854
|
-
stdoutSync(dim(`
|
|
4786
|
+
stdoutSync(dim(`Run ID: ${runId}`) + "\n");
|
|
4855
4787
|
}
|
|
4856
4788
|
function buildRunSummaryJson(options) {
|
|
4857
4789
|
const calls = options.rawCalls ? formatRawCalls(options.rawCalls, options.verbose ?? false) : options.formattedCalls ?? [];
|
|
@@ -5820,45 +5752,6 @@ function findFreePort() {
|
|
|
5820
5752
|
});
|
|
5821
5753
|
}
|
|
5822
5754
|
|
|
5823
|
-
// src/commands/status.ts
|
|
5824
|
-
async function statusCommand(args) {
|
|
5825
|
-
const accessToken = await loadAccessToken();
|
|
5826
|
-
if (!accessToken) {
|
|
5827
|
-
printError("No Vent access token found. Run `npx vent-hq init` first.");
|
|
5828
|
-
return 2;
|
|
5829
|
-
}
|
|
5830
|
-
try {
|
|
5831
|
-
const res = await apiFetch(`/runs/${args.runId}`, accessToken);
|
|
5832
|
-
const data = await res.json();
|
|
5833
|
-
const aggregate = data.aggregate;
|
|
5834
|
-
const counts = aggregate?.conversation_calls;
|
|
5835
|
-
const results = Array.isArray(data.results) ? data.results : [];
|
|
5836
|
-
const summary = buildRunSummaryJson({
|
|
5837
|
-
runId: typeof data.id === "string" ? data.id : args.runId,
|
|
5838
|
-
status: data.status,
|
|
5839
|
-
total: counts?.total,
|
|
5840
|
-
passed: counts?.passed,
|
|
5841
|
-
failed: counts?.failed,
|
|
5842
|
-
rawCalls: results,
|
|
5843
|
-
verbose: args.verbose,
|
|
5844
|
-
runDetails: {
|
|
5845
|
-
created_at: data.created_at,
|
|
5846
|
-
started_at: data.started_at,
|
|
5847
|
-
finished_at: data.finished_at,
|
|
5848
|
-
duration_ms: data.duration_ms,
|
|
5849
|
-
error_text: data.error_text,
|
|
5850
|
-
aggregate: data.aggregate
|
|
5851
|
-
}
|
|
5852
|
-
});
|
|
5853
|
-
writeJsonStdout(summary);
|
|
5854
|
-
const status = data.status;
|
|
5855
|
-
return status === "pass" ? 0 : status === "fail" ? 1 : 0;
|
|
5856
|
-
} catch (err) {
|
|
5857
|
-
printError(err.message);
|
|
5858
|
-
return 2;
|
|
5859
|
-
}
|
|
5860
|
-
}
|
|
5861
|
-
|
|
5862
5755
|
// src/lib/browser.ts
|
|
5863
5756
|
import { exec } from "node:child_process";
|
|
5864
5757
|
function openBrowser(url) {
|
|
@@ -5968,13 +5861,16 @@ import * as fs5 from "node:fs/promises";
|
|
|
5968
5861
|
import * as path3 from "node:path";
|
|
5969
5862
|
|
|
5970
5863
|
// src/skills/claude-code.md
|
|
5971
|
-
var claude_code_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Claude Code Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nClaude Code serializes separate Bash tool calls for `npx vent-hq ...`, so run multiple calls from one suite by invoking each named call with `--call <name>` in one Bash command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\nFor long-running composite commands \u2014 e.g. LiveKit\'s required `kill workers \u2192 start fresh worker \u2192 wait 60s \u2192 npx vent-hq run` preflight \u2014 keep the entire sequence in **one foreground Bash call**. Do not use `Monitor` or `run_in_background: true` for the wait. Both end the turn until an event fires, which means Vent never sees an inflight call and the user reads it as "you backgrounded the test." The harness also blocks bare `sleep 60` (any long leading sleep), so fold the readiness check and the wait into a single `until`-loop \u2014 short sleeps inside a polling loop are allowed because each iteration is real work:\n\n```bash\nnohup npm run dev > /tmp/lk-agent.log 2>&1 &\nstart=$(date +%s); \\\nuntil grep -q "registered worker" /tmp/lk-agent.log 2>/dev/null \\\n && [ $(($(date +%s) - start)) -ge 60 ]; do sleep 2; done \\\n&& npx vent-hq run -f .vent/suite.livekit.json --call <name>\n```\n\nFor these composites, raise the Bash timeout to **10 minutes** (`600000` ms) \u2014 startup + 60s wait + call can easily exceed the 5-minute default.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally. (See `Claude Code Execution` for how to compose the kill\u2192start\u2192wait\u2192submit as a single foreground command.)\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5864
|
+
var claude_code_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Claude Code Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nClaude Code serializes separate Bash tool calls for `npx vent-hq ...`, so run multiple calls from one suite by invoking each named call with `--call <name>` in one Bash command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\nFor long-running composite commands \u2014 e.g. LiveKit\'s required `kill workers \u2192 start fresh worker \u2192 wait 60s \u2192 npx vent-hq run` preflight \u2014 keep the entire sequence in **one foreground Bash call**. Do not use `Monitor` or `run_in_background: true` for the wait. Both end the turn until an event fires, which means Vent never sees an inflight call and the user reads it as "you backgrounded the test." The harness also blocks bare `sleep 60` (any long leading sleep), so fold the readiness check and the wait into a single `until`-loop \u2014 short sleeps inside a polling loop are allowed because each iteration is real work:\n\n```bash\nnohup npm run dev > /tmp/lk-agent.log 2>&1 &\nstart=$(date +%s); \\\nuntil grep -q "registered worker" /tmp/lk-agent.log 2>/dev/null \\\n && [ $(($(date +%s) - start)) -ge 60 ]; do sleep 2; done \\\n&& npx vent-hq run -f .vent/suite.livekit.json --call <name>\n```\n\nFor these composites, raise the Bash timeout to **10 minutes** (`600000` ms) \u2014 startup + 60s wait + call can easily exceed the 5-minute default.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally. (See `Claude Code Execution` for how to compose the kill\u2192start\u2192wait\u2192submit as a single foreground command.)\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5972
5865
|
|
|
5973
5866
|
// src/skills/cursor.md
|
|
5974
|
-
var cursor_default = '---\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nalwaysApply: true\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Cursor Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nCursor cannot run separate shell tool calls concurrently \u2014 for multiple calls from one suite, invoke each named call with `--call <name>` in one shell command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5867
|
+
var cursor_default = '---\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nalwaysApply: true\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Cursor Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nCursor cannot run separate shell tool calls concurrently \u2014 for multiple calls from one suite, invoke each named call with `--call <name>` in one shell command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5975
5868
|
|
|
5976
5869
|
// src/skills/codex.md
|
|
5977
|
-
var codex_default = '# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Codex Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nCodex can run shell tool calls concurrently \u2014 for multiple calls from one suite, run each named call as its own parallel shell tool call (do not combine them with `&`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5870
|
+
var codex_default = '# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Codex Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nCodex can run shell tool calls concurrently \u2014 for multiple calls from one suite, run each named call as its own parallel shell tool call (do not combine them with `&`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5871
|
+
|
|
5872
|
+
// src/skills/windsurf.md
|
|
5873
|
+
var windsurf_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Windsurf Execution\n\nVent calls typically take 30 seconds to 2 minutes. Each `vent-hq run` is one shell tool call \u2014 wait for stdout (the JSON result) before responding. Don\'t background; use the JSON returned by `npx vent-hq run` directly. If Cascade\'s auto-execution level prompts for approval on `npx vent-hq` commands, ask the user to add `npx vent-hq *` to the workspace allow list once so subsequent calls flow without interruption.\n\nCascade runs shell tool calls in parallel within a turn \u2014 for multiple calls from one suite, issue each named call as its own separate shell tool call in the same turn (do not combine them with `&` and `wait`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\nStay within Cascade\'s per-turn parallel-tool-call budget \u2014 fan out at most ~6 calls in one turn and respect the provider concurrency caps below.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
|
|
5978
5874
|
|
|
5979
5875
|
// src/lib/setup.ts
|
|
5980
5876
|
var SUITE_SCAFFOLD = JSON.stringify(
|
|
@@ -6006,6 +5902,12 @@ async function installCursor(cwd) {
|
|
|
6006
5902
|
await fs5.writeFile(path3.join(dir, "vent.mdc"), cursor_default);
|
|
6007
5903
|
printSuccess("Cursor: .cursor/rules/vent.mdc", { force: true });
|
|
6008
5904
|
}
|
|
5905
|
+
async function installWindsurf(cwd) {
|
|
5906
|
+
const dir = path3.join(cwd, ".windsurf", "skills", "vent");
|
|
5907
|
+
await fs5.mkdir(dir, { recursive: true });
|
|
5908
|
+
await fs5.writeFile(path3.join(dir, "SKILL.md"), windsurf_default);
|
|
5909
|
+
printSuccess("Windsurf: .windsurf/skills/vent/SKILL.md", { force: true });
|
|
5910
|
+
}
|
|
6009
5911
|
var VENT_MARKERS = [
|
|
6010
5912
|
"# Vent - Voice Agent Calls",
|
|
6011
5913
|
"# Vent \u2014 Voice Agent Calls"
|
|
@@ -6031,6 +5933,7 @@ async function installSkillsAndScaffold(cwd) {
|
|
|
6031
5933
|
await installClaudeCode(cwd);
|
|
6032
5934
|
await installCursor(cwd);
|
|
6033
5935
|
await installCodex(cwd);
|
|
5936
|
+
await installWindsurf(cwd);
|
|
6034
5937
|
const suitePath = path3.join(cwd, ".vent", "suite.json");
|
|
6035
5938
|
let suiteExists = false;
|
|
6036
5939
|
try {
|
|
@@ -6151,7 +6054,6 @@ Commands:
|
|
|
6151
6054
|
agent Manage a shared local agent session
|
|
6152
6055
|
run Run a call from a suite file
|
|
6153
6056
|
stop Cancel a queued or running call
|
|
6154
|
-
status Check status of a previous run
|
|
6155
6057
|
login Authenticate via browser
|
|
6156
6058
|
logout Remove saved credentials
|
|
6157
6059
|
Options:
|
|
@@ -6178,7 +6080,6 @@ Start options:
|
|
|
6178
6080
|
|
|
6179
6081
|
Stop options:
|
|
6180
6082
|
vent-hq agent stop <session-id>`;
|
|
6181
|
-
var STATUS_USAGE = `Usage: vent-hq status <run-id> [--verbose]`;
|
|
6182
6083
|
async function main() {
|
|
6183
6084
|
loadDotenv();
|
|
6184
6085
|
const args = process.argv.slice(2);
|
|
@@ -6188,7 +6089,7 @@ async function main() {
|
|
|
6188
6089
|
return 0;
|
|
6189
6090
|
}
|
|
6190
6091
|
if (command === "--version" || command === "-v") {
|
|
6191
|
-
const pkg = await import("./package-
|
|
6092
|
+
const pkg = await import("./package-GODDS4TH.mjs");
|
|
6192
6093
|
console.log(`vent-hq ${pkg.default.version}`);
|
|
6193
6094
|
return 0;
|
|
6194
6095
|
}
|
|
@@ -6256,26 +6157,6 @@ async function main() {
|
|
|
6256
6157
|
console.log(AGENT_USAGE);
|
|
6257
6158
|
return 2;
|
|
6258
6159
|
}
|
|
6259
|
-
case "status": {
|
|
6260
|
-
if (commandArgs.includes("--help") || commandArgs.length === 0) {
|
|
6261
|
-
console.log(STATUS_USAGE);
|
|
6262
|
-
return 0;
|
|
6263
|
-
}
|
|
6264
|
-
const { values, positionals } = parseArgs({
|
|
6265
|
-
args: commandArgs,
|
|
6266
|
-
options: {
|
|
6267
|
-
verbose: { type: "boolean", short: "v", default: false }
|
|
6268
|
-
},
|
|
6269
|
-
allowPositionals: true,
|
|
6270
|
-
strict: true
|
|
6271
|
-
});
|
|
6272
|
-
const runId = positionals[0];
|
|
6273
|
-
if (!runId) {
|
|
6274
|
-
console.log(STATUS_USAGE);
|
|
6275
|
-
return 2;
|
|
6276
|
-
}
|
|
6277
|
-
return statusCommand({ runId, verbose: values.verbose });
|
|
6278
|
-
}
|
|
6279
6160
|
case "stop": {
|
|
6280
6161
|
const runId = commandArgs[0];
|
|
6281
6162
|
if (!runId || commandArgs.includes("--help")) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vent-hq",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Vent CLI — CI/CD for voice AI agents",
|
|
6
6
|
"bin": {
|
|
@@ -9,11 +9,6 @@
|
|
|
9
9
|
"files": [
|
|
10
10
|
"dist"
|
|
11
11
|
],
|
|
12
|
-
"scripts": {
|
|
13
|
-
"build": "node scripts/bundle.mjs",
|
|
14
|
-
"clean": "rm -rf dist",
|
|
15
|
-
"prepack": "pnpm clean && pnpm build"
|
|
16
|
-
},
|
|
17
12
|
"keywords": [
|
|
18
13
|
"vent",
|
|
19
14
|
"cli",
|
|
@@ -36,9 +31,13 @@
|
|
|
36
31
|
"ws": "^8.18.0"
|
|
37
32
|
},
|
|
38
33
|
"devDependencies": {
|
|
39
|
-
"@types/ws": "
|
|
40
|
-
"
|
|
41
|
-
"@vent/
|
|
42
|
-
"
|
|
34
|
+
"@types/ws": "^8.5.0",
|
|
35
|
+
"esbuild": "^0.24.0",
|
|
36
|
+
"@vent/relay-client": "0.1.0",
|
|
37
|
+
"@vent/shared": "0.0.1"
|
|
38
|
+
},
|
|
39
|
+
"scripts": {
|
|
40
|
+
"build": "node scripts/bundle.mjs",
|
|
41
|
+
"clean": "rm -rf dist"
|
|
43
42
|
}
|
|
44
|
-
}
|
|
43
|
+
}
|