vent-hq 0.12.1 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
package/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # vent-hq
2
+
3
+ **Agent CLI for voice AI development.** Lets coding agents (Claude Code, Cursor, Codex, Windsurf) place real calls against your voice agent and read back transcripts, latency, audio, tool calls, and 60+ computed metrics — so they can iterate on prompts, flows, and platform config based on what actually happened.
4
+
5
+ Works with **Vapi, Retell, LiveKit, ElevenLabs, Bland, and custom WebSocket endpoints**.
6
+
7
+ ```bash
8
+ npx vent-hq@latest init
9
+ ```
10
+
11
+ ## How it works
12
+
13
+ 1. Your coding agent writes a Vent caller config (`.vent/suite.json`) with a persona and a call goal.
14
+ 2. It runs `vent-hq run -f .vent/suite.json`. Vent joins the call as a voice caller, drives the conversation, and records everything.
15
+ 3. Vent returns a single JSON result: full transcript, recorded audio URL, per-turn latency, tool-call trace, and call metadata. Your agent reads it and decides what to change.
16
+
17
+ The agent is the brain. Vent is the instrument.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ npx vent-hq@latest init
23
+ ```
24
+
25
+ `init` will:
26
+ - Authenticate via GitHub (if you have `gh` installed) or open a browser for device-code auth
27
+ - Install skill files for Claude Code (`.claude/skills/vent/SKILL.md`), Cursor (`.cursor/rules/vent.mdc`), Codex (`AGENTS.md`), and Windsurf (`.windsurf/skills/vent/SKILL.md`)
28
+ - Scaffold a starter suite at `.vent/suite.json`
29
+
30
+ After `init`, your coding agent reads the skill file and takes over from there.
31
+
32
+ ## Example suite
33
+
34
+ ```json
35
+ {
36
+ "connection": {
37
+ "adapter": "vapi",
38
+ "vapi_assistant_id": "asst_..."
39
+ },
40
+ "calls": {
41
+ "happy-path": {
42
+ "caller_prompt": "You're a customer wanting to book a haircut for Friday at 3pm. Be friendly but a little vague about the date at first.",
43
+ "max_turns": 8
44
+ }
45
+ }
46
+ }
47
+ ```
48
+
49
+ Set `VAPI_API_KEY` in `.env` and run:
50
+
51
+ ```bash
52
+ npx vent-hq run -f .vent/suite.json
53
+ ```
54
+
55
+ Swap `adapter` for `retell`, `livekit`, `elevenlabs`, `bland`, or `websocket` to target a different platform. See [the docs](https://docs.vent.dev) for the per-platform connection block.
56
+
57
+ ## Commands
58
+
59
+ | Command | Purpose |
60
+ |-------------------------------------|------------------------------------------------------------|
61
+ | `vent-hq init` | One-time setup: auth, skill files, starter suite |
62
+ | `vent-hq run -f <suite.json>` | Run a call (or all calls) from a suite, stream results |
63
+ | `vent-hq run -f <s> --call <name>` | Run a single named call |
64
+ | `vent-hq stop <run-id>` | Cancel a queued or running call |
65
+ | `vent-hq agent start -f <s>` | Keep a relay session open for a local WebSocket agent |
66
+ | `vent-hq login` / `logout` | Manage credentials |
67
+
68
+ Run `vent-hq <command> --help` for command-specific options.
69
+
70
+ ## What you get back
71
+
72
+ Every `run` returns a single JSON object on stdout. Shape:
73
+
74
+ ```json
75
+ {
76
+ "run_id": "01J...",
77
+ "status": "complete",
78
+ "calls": [
79
+ {
80
+ "name": "happy-path",
81
+ "status": "complete",
82
+ "duration_ms": 42180,
83
+ "latency": { "p50_ms": 612, "p95_ms": 1180, "time_to_first_audio_ms": 540 },
84
+ "transcript": [
85
+ { "role": "agent", "text": "Hi, this is Acme Salon, how can I help?" },
86
+ { "role": "caller", "text": "Hey, I'd like to book a haircut for Friday." }
87
+ ],
88
+ "tool_calls": [{ "name": "check_availability", "args": {...}, "result": {...} }],
89
+ "recording_url": "https://...",
90
+ "call_metadata": { ... }
91
+ }
92
+ ]
93
+ }
94
+ ```
95
+
96
+ Verbose fields are gated behind `--verbose` to keep agent context lean.
97
+
98
+ ## Platform notes
99
+
100
+ - **Vapi, Retell, ElevenLabs** — hosted only. Set the API key + assistant/agent ID in `.env`.
101
+ - **LiveKit** — works against local dev agents and LiveKit Cloud with the same config (different `LIVEKIT_URL`). Install [`@vent-hq/livekit`](https://www.npmjs.com/package/@vent-hq/livekit) (Node) or [`vent-livekit`](https://pypi.org/project/vent-livekit/) (Python) for tool-call and component-latency observability.
102
+ - **Bland** — supports pathways (`bland_pathway_id`), personas (`persona_id`), or inline `task` prompts.
103
+ - **Custom (WebSocket)** — point at a hosted endpoint with `agent_url`, or run a local agent with `start_command` + `agent_port`. Vent tunnels audio through a relay so your machine doesn't need a public IP.
104
+
105
+ Platform credentials are encrypted at rest (AES-256-GCM) and never appear in chat logs or run payloads.
106
+
107
+ ## Links
108
+
109
+ - [Website](https://venthq.dev)
110
+ - [Documentation](https://docs.vent.dev)
111
+ - [Source on GitHub](https://github.com/vent-hq/vent)
112
+ - [Changelog](https://github.com/vent-hq/vent/blob/main/packages/cli/CHANGELOG.md)
113
+ - [@vent_hq on X](https://x.com/vent_hq)
114
+
115
+ ## License
116
+
117
+ MIT
package/dist/index.mjs CHANGED
@@ -83,14 +83,6 @@ async function ensurePlatformConnection(apiKey, platform) {
83
83
  // src/lib/output.ts
84
84
  import { writeFileSync } from "node:fs";
85
85
 
86
- // ../shared/src/types.ts
87
- var AUDIO_ACTION_TYPES = [
88
- "interrupt",
89
- "inject_noise",
90
- "split_sentence",
91
- "noise_on_caller"
92
- ];
93
-
94
86
  // ../shared/src/constants.ts
95
87
  var RUNNER_CALLBACK_MAX_SKEW_MS = 5 * 6e4;
96
88
 
@@ -4136,32 +4128,12 @@ var coerce = {
4136
4128
  var NEVER = INVALID;
4137
4129
 
4138
4130
  // ../shared/src/schemas.ts
4139
- var AudioActionSchema = external_exports.object({
4140
- at_turn: external_exports.number().int().min(0),
4141
- action: external_exports.enum(AUDIO_ACTION_TYPES),
4142
- prompt: external_exports.string().optional(),
4143
- duration_ms: external_exports.number().int().min(1e3).max(3e4).optional(),
4144
- noise_type: external_exports.enum(["babble", "white", "pink"]).optional(),
4145
- snr_db: external_exports.number().min(0).max(40).optional(),
4146
- split: external_exports.object({
4147
- part_a: external_exports.string().min(1),
4148
- part_b: external_exports.string().min(1),
4149
- pause_ms: external_exports.number().int().min(500).max(5e3)
4150
- }).optional()
4151
- });
4152
- var AudioActionResultSchema = external_exports.object({
4153
- at_turn: external_exports.number().int().min(0),
4154
- action: external_exports.string(),
4155
- metrics: external_exports.record(external_exports.union([external_exports.number(), external_exports.boolean()])),
4156
- transcriptions: external_exports.record(external_exports.union([external_exports.string(), external_exports.null()])).optional()
4157
- });
4158
4131
  var CallerPersonaSchema = external_exports.object({
4159
4132
  pace: external_exports.enum(["slow", "normal", "fast"]).optional(),
4160
4133
  clarity: external_exports.enum(["clear", "vague", "rambling"]).optional(),
4161
4134
  disfluencies: external_exports.boolean().optional(),
4162
4135
  cooperation: external_exports.enum(["cooperative", "reluctant", "hostile"]).optional(),
4163
4136
  emotion: external_exports.enum(["neutral", "cheerful", "confused", "frustrated", "skeptical", "rushed"]).optional(),
4164
- interruption_style: external_exports.enum(["low", "high"]).optional(),
4165
4137
  memory: external_exports.enum(["reliable", "unreliable"]).optional(),
4166
4138
  intent_clarity: external_exports.enum(["clear", "indirect", "vague"]).optional(),
4167
4139
  confirmation_style: external_exports.enum(["explicit", "vague"]).optional()
@@ -4185,8 +4157,6 @@ var ConversationCallSpecSchema = external_exports.object({
4185
4157
  max_turns: external_exports.number().int().min(1).max(50).default(6),
4186
4158
  silence_threshold_ms: external_exports.number().int().min(200).max(1e4).optional(),
4187
4159
  persona: CallerPersonaSchema,
4188
- audio_actions: external_exports.array(AudioActionSchema).optional(),
4189
- prosody: external_exports.boolean().optional(),
4190
4160
  caller_audio: CallerAudioEffectsSchema.optional(),
4191
4161
  /** ISO 639-1 language code for multilingual calls (e.g., "es", "fr", "de"). Caller speaks this language, STT transcribes it, judge evaluates in it. */
4192
4162
  language: external_exports.string().min(2).max(5).optional(),
@@ -4253,7 +4223,7 @@ var LiveKitPlatformSchema = BasePlatformSchema.extend({
4253
4223
  livekit_api_secret: external_exports.string().optional(),
4254
4224
  livekit_url: external_exports.string().optional(),
4255
4225
  livekit_agent_name: external_exports.string().optional()
4256
- });
4226
+ }).strict();
4257
4227
  var VapiPlatformSchema = BasePlatformSchema.extend({
4258
4228
  provider: external_exports.literal("vapi"),
4259
4229
  vapi_api_key: external_exports.string().optional(),
@@ -4327,25 +4297,6 @@ var LatencyMetricsSchema = external_exports.object({
4327
4297
  mouth_to_ear_est_ms: external_exports.number().optional(),
4328
4298
  drift_slope_ms_per_turn: external_exports.number().optional()
4329
4299
  });
4330
- var TurnEmotionProfileSchema = external_exports.object({
4331
- turn_index: external_exports.number().int().min(0),
4332
- emotions: external_exports.record(external_exports.string(), external_exports.number()),
4333
- calmness: external_exports.number(),
4334
- confidence: external_exports.number(),
4335
- frustration: external_exports.number(),
4336
- warmth: external_exports.number(),
4337
- uncertainty: external_exports.number()
4338
- });
4339
- var ProsodyMetricsSchema = external_exports.object({
4340
- per_turn: external_exports.array(TurnEmotionProfileSchema),
4341
- mean_calmness: external_exports.number(),
4342
- mean_confidence: external_exports.number(),
4343
- peak_frustration: external_exports.number(),
4344
- emotion_consistency: external_exports.number(),
4345
- naturalness: external_exports.number(),
4346
- emotion_trajectory: external_exports.enum(["stable", "improving", "degrading", "volatile"]),
4347
- hume_latency_ms: external_exports.number()
4348
- });
4349
4300
  var HarnessOverheadSchema = external_exports.object({
4350
4301
  tts_per_turn_ms: external_exports.array(external_exports.number()),
4351
4302
  stt_per_turn_ms: external_exports.array(external_exports.number()),
@@ -4427,7 +4378,6 @@ var ConversationMetricsSchema = external_exports.object({
4427
4378
  latency: LatencyMetricsSchema.optional(),
4428
4379
  tool_calls: ToolCallMetricsSchema.optional(),
4429
4380
  signal_quality: SignalQualityMetricsSchema.optional(),
4430
- prosody: ProsodyMetricsSchema.optional(),
4431
4381
  harness_overhead: HarnessOverheadSchema.optional(),
4432
4382
  component_latency: ComponentLatencyMetricsSchema.optional()
4433
4383
  });
@@ -4437,7 +4387,6 @@ var ConversationCallResultSchema = external_exports.object({
4437
4387
  status: external_exports.enum(["completed", "error"]),
4438
4388
  transcript: external_exports.array(ConversationTurnSchema),
4439
4389
  observed_tool_calls: external_exports.array(ObservedToolCallSchema).optional(),
4440
- audio_action_results: external_exports.array(AudioActionResultSchema).optional(),
4441
4390
  duration_ms: external_exports.number(),
4442
4391
  metrics: ConversationMetricsSchema,
4443
4392
  error: external_exports.string().optional(),
@@ -4487,8 +4436,6 @@ function formatConversationResult(raw, options = {}) {
4487
4436
  const warnings = dedupeStrings([
4488
4437
  ...formatProviderWarningMessages(r.call_metadata?.provider_warnings)
4489
4438
  ]);
4490
- const audioActions = r.audio_action_results ?? [];
4491
- const emotion = r.metrics?.prosody ? formatEmotion(r.metrics.prosody) : null;
4492
4439
  const result = {
4493
4440
  name: r.name ?? null,
4494
4441
  status: r.status,
@@ -4501,8 +4448,6 @@ function formatConversationResult(raw, options = {}) {
4501
4448
  call_metadata: formatCallMetadata(r.call_metadata, verbose)
4502
4449
  };
4503
4450
  if (warnings.length > 0) result.warnings = warnings;
4504
- if (audioActions.length > 0) result.audio_actions = audioActions;
4505
- if (emotion) result.emotion = emotion;
4506
4451
  if (verbose) result.caller_prompt = r.caller_prompt;
4507
4452
  if (debug2) result.debug = debug2;
4508
4453
  return result;
@@ -4609,15 +4554,6 @@ function stripExecutionMessage(args) {
4609
4554
  const { execution_message: _drop, ...rest } = args;
4610
4555
  return rest;
4611
4556
  }
4612
- function formatEmotion(prosody) {
4613
- return {
4614
- naturalness: prosody.naturalness,
4615
- mean_calmness: prosody.mean_calmness,
4616
- mean_confidence: prosody.mean_confidence,
4617
- peak_frustration: prosody.peak_frustration,
4618
- emotion_trajectory: prosody.emotion_trajectory
4619
- };
4620
- }
4621
4557
  function formatComponentLatency(cl, verbose) {
4622
4558
  if (!cl) return null;
4623
4559
  const result = {
@@ -4680,7 +4616,6 @@ function formatDebug(result) {
4680
4616
  const debug2 = compactUnknownRecord({
4681
4617
  signal_quality: result.metrics?.signal_quality,
4682
4618
  harness_overhead: result.metrics?.harness_overhead,
4683
- prosody: result.metrics?.prosody,
4684
4619
  provider_warnings: nonEmptyArray(result.call_metadata?.provider_warnings),
4685
4620
  component_latency_per_turn: nonEmptyArray(result.metrics?.component_latency?.per_turn),
4686
4621
  observed_tool_calls: formatDebugToolCalls(result.observed_tool_calls),
@@ -4742,9 +4677,6 @@ function stdoutSync(data) {
4742
4677
  }
4743
4678
  }
4744
4679
  }
4745
- function writeJsonStdout(value) {
4746
- stdoutSync(JSON.stringify(value, null, 2) + "\n");
4747
- }
4748
4680
  var bold = (s) => isTTY ? `\x1B[1m${s}\x1B[0m` : s;
4749
4681
  var dim = (s) => isTTY ? `\x1B[2m${s}\x1B[0m` : s;
4750
4682
  var green = (s) => isTTY ? `\x1B[32m${s}\x1B[0m` : s;
@@ -4851,7 +4783,7 @@ function printSummary(callResults, runComplete, runId, options = {}) {
4851
4783
  stdoutSync(" " + parts.join(" ") + "\n");
4852
4784
  }
4853
4785
  }
4854
- stdoutSync(dim(`Full details: vent status ${runId}${options.verbose ? " --verbose" : ""}`) + "\n");
4786
+ stdoutSync(dim(`Run ID: ${runId}`) + "\n");
4855
4787
  }
4856
4788
  function buildRunSummaryJson(options) {
4857
4789
  const calls = options.rawCalls ? formatRawCalls(options.rawCalls, options.verbose ?? false) : options.formattedCalls ?? [];
@@ -5820,45 +5752,6 @@ function findFreePort() {
5820
5752
  });
5821
5753
  }
5822
5754
 
5823
- // src/commands/status.ts
5824
- async function statusCommand(args) {
5825
- const accessToken = await loadAccessToken();
5826
- if (!accessToken) {
5827
- printError("No Vent access token found. Run `npx vent-hq init` first.");
5828
- return 2;
5829
- }
5830
- try {
5831
- const res = await apiFetch(`/runs/${args.runId}`, accessToken);
5832
- const data = await res.json();
5833
- const aggregate = data.aggregate;
5834
- const counts = aggregate?.conversation_calls;
5835
- const results = Array.isArray(data.results) ? data.results : [];
5836
- const summary = buildRunSummaryJson({
5837
- runId: typeof data.id === "string" ? data.id : args.runId,
5838
- status: data.status,
5839
- total: counts?.total,
5840
- passed: counts?.passed,
5841
- failed: counts?.failed,
5842
- rawCalls: results,
5843
- verbose: args.verbose,
5844
- runDetails: {
5845
- created_at: data.created_at,
5846
- started_at: data.started_at,
5847
- finished_at: data.finished_at,
5848
- duration_ms: data.duration_ms,
5849
- error_text: data.error_text,
5850
- aggregate: data.aggregate
5851
- }
5852
- });
5853
- writeJsonStdout(summary);
5854
- const status = data.status;
5855
- return status === "pass" ? 0 : status === "fail" ? 1 : 0;
5856
- } catch (err) {
5857
- printError(err.message);
5858
- return 2;
5859
- }
5860
- }
5861
-
5862
5755
  // src/lib/browser.ts
5863
5756
  import { exec } from "node:child_process";
5864
5757
  function openBrowser(url) {
@@ -5968,13 +5861,16 @@ import * as fs5 from "node:fs/promises";
5968
5861
  import * as path3 from "node:path";
5969
5862
 
5970
5863
  // src/skills/claude-code.md
5971
- var claude_code_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Claude Code Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nClaude Code serializes separate Bash tool calls for `npx vent-hq ...`, so run multiple calls from one suite by invoking each named call with `--call <name>` in one Bash command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\nFor long-running composite commands \u2014 e.g. LiveKit\'s required `kill workers \u2192 start fresh worker \u2192 wait 60s \u2192 npx vent-hq run` preflight \u2014 keep the entire sequence in **one foreground Bash call**. Do not use `Monitor` or `run_in_background: true` for the wait. Both end the turn until an event fires, which means Vent never sees an inflight call and the user reads it as "you backgrounded the test." The harness also blocks bare `sleep 60` (any long leading sleep), so fold the readiness check and the wait into a single `until`-loop \u2014 short sleeps inside a polling loop are allowed because each iteration is real work:\n\n```bash\nnohup npm run dev > /tmp/lk-agent.log 2>&1 &\nstart=$(date +%s); \\\nuntil grep -q "registered worker" /tmp/lk-agent.log 2>/dev/null \\\n && [ $(($(date +%s) - start)) -ge 60 ]; do sleep 2; done \\\n&& npx vent-hq run -f .vent/suite.livekit.json --call <name>\n```\n\nFor these composites, raise the Bash timeout to **10 minutes** (`600000` ms) \u2014 startup + 60s wait + call can easily exceed the 5-minute default.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally. (See `Claude Code Execution` for how to compose the kill\u2192start\u2192wait\u2192submit as a single foreground command.)\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5864
+ var claude_code_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Claude Code Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nClaude Code serializes separate Bash tool calls for `npx vent-hq ...`, so run multiple calls from one suite by invoking each named call with `--call <name>` in one Bash command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\nFor long-running composite commands \u2014 e.g. LiveKit\'s required `kill workers \u2192 start fresh worker \u2192 wait 60s \u2192 npx vent-hq run` preflight \u2014 keep the entire sequence in **one foreground Bash call**. Do not use `Monitor` or `run_in_background: true` for the wait. Both end the turn until an event fires, which means Vent never sees an inflight call and the user reads it as "you backgrounded the test." The harness also blocks bare `sleep 60` (any long leading sleep), so fold the readiness check and the wait into a single `until`-loop \u2014 short sleeps inside a polling loop are allowed because each iteration is real work:\n\n```bash\nnohup npm run dev > /tmp/lk-agent.log 2>&1 &\nstart=$(date +%s); \\\nuntil grep -q "registered worker" /tmp/lk-agent.log 2>/dev/null \\\n && [ $(($(date +%s) - start)) -ge 60 ]; do sleep 2; done \\\n&& npx vent-hq run -f .vent/suite.livekit.json --call <name>\n```\n\nFor these composites, raise the Bash timeout to **10 minutes** (`600000` ms) \u2014 startup + 60s wait + call can easily exceed the 5-minute default.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally. (See `Claude Code Execution` for how to compose the kill\u2192start\u2192wait\u2192submit as a single foreground command.)\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5972
5865
 
5973
5866
  // src/skills/cursor.md
5974
- var cursor_default = '---\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nalwaysApply: true\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Cursor Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nCursor cannot run separate shell tool calls concurrently \u2014 for multiple calls from one suite, invoke each named call with `--call <name>` in one shell command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5867
+ var cursor_default = '---\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\nalwaysApply: true\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Cursor Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nCursor cannot run separate shell tool calls concurrently \u2014 for multiple calls from one suite, invoke each named call with `--call <name>` in one shell command using `&` and a final `wait`:\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path & \\\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path & \\\nwait\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once with `&` and `wait`), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5975
5868
 
5976
5869
  // src/skills/codex.md
5977
- var codex_default = '# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, interruption handling, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Codex Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly; do not call `vent status` unless checking an older run.\n\nCodex can run shell tool calls concurrently \u2014 for multiple calls from one suite, run each named call as its own parallel shell tool call (do not combine them with `&`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run or status; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq status <run-id> # Fetch results for a previous run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200,\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one." }\n ]\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `interruption_style`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `audio_actions` supports `interrupt`, `inject_noise`, `split_sentence`, and `noise_on_caller`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n- `prosody: true` enables emotion analysis and requires Hume access.\n- Prefer explicit `audio_actions.interrupt` over `persona.interruption_style` for deterministic barge-in tests. `persona.interruption_style` is only a preplanned caller tendency.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit does not support multiple concurrent Vent calls against one agent process yet. Run LiveKit calls sequentially unless you intentionally start separate agent worker processes and route each call to its own process. For Node agents, that means separate Node.js processes. Do not treat parallel calls against a single LiveKit worker as a valid concurrency test until multi-call support is engineered.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\n`audio_actions` lists turns with injected interrupts; check the next turn to judge whether the agent acknowledged or restarted. Overtalk needs the recording and isn\'t evaluable from text alone.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5870
+ var codex_default = '# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Codex Execution\n\nUse a 5-minute shell-tool timeout (`300000` ms) on Vent run commands so normal calls are not killed by the default 2-minute Bash timeout. This is not backgrounding; wait for stdout/results before ending your response. Use the JSON returned by `npx vent-hq run` directly.\n\nCodex can run shell tool calls concurrently \u2014 for multiple calls from one suite, run each named call as its own parallel shell tool call (do not combine them with `&`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5871
+
5872
+ // src/skills/windsurf.md
5873
+ var windsurf_default = '---\nname: vent\ndescription: Place real calls against the user\'s voice agent to verify recent code changes worked end-to-end. Trigger on test requests or after any non-trivial voice-agent change.\n---\n\n# Vent - Voice Agent Calls\n\n## When to Test\n\nVent is the voice CI gate. Auto-trigger a call after any non-trivial voice-affecting change \u2014 don\'t wait to be asked. Non-trivial means anything that affects what the caller hears, how the agent responds, what tools are called, how the call routes, or audio timing: prompt rewrites or sections (not one-word tweaks), tool schema or handler changes, state/memory logic, transfer logic, provider/audio config, platform integration, and any voice-behavior bug fix. One-line copy or comment edits don\'t need a call.\n\nBefore launching, scan `.vent/runs/` for unresolved issues from prior runs (failing status, regressions, warnings, latency jumps); fix in-scope ones as part of this change and verify they\'re resolved by the new run, surface out-of-scope ones rather than silently leaving them. The task is not done until you\'ve reviewed the transcript, tool calls, and observed behavior and judged that the change works \u2014 Vent\'s `status` and exit code only reflect pipeline completion, not mission success.\n\n## Autonomous Iteration\n\n**Default: iterate.** Run a fix \u2192 Vent-call \u2192 fix \u2192 Vent-call loop whenever a call surfaces a fixable issue. Don\'t report a failure (or a degraded-quality success) back to the user before attempting to fix it \u2014 they want the agent working, not a diagnosis. After each call, if the result reveals a problem you can fix with a concrete plan, apply the fix and run again. Fixable issues include: tool schema or registry gaps, missing handlers, prompt phrasing, **STT vocabulary mismatches** (e.g. domain terms misheard \u2014 Deepgram `keyterms`, custom vocabulary, prompt-level term lists), **latency tunables** (endpointing thresholds, preemptive generation, model selection), and audio quality config. If you\'d be guessing at the next attempt, stop and ask the user instead of trying.\n\n**"Mission success but with quality problems" still requires iteration.** If the call ran end-to-end but had noticeably bad latency, STT mishearing the agent\'s domain vocabulary, audio artifacts, or any other caller-experience degradation that has a concrete fix, the call is *not done* \u2014 apply the fix and re-run. Don\'t downgrade fixable problems to "observations" or "things worth flagging." The pipeline running cleanly is the floor, not the ceiling.\n\n**Never ask permission to iterate.** "Want me to apply this and re-run?" / "Should I fix it?" / "Let me know if you want me to..." \u2014 these are skill violations. If you have a concrete fix, apply it and run the next call. The user opts out by saying so explicitly; silence is not opt-out.\n\nStop iterating and report when one of:\n\n- The next call confirms the fix worked AND no further fixable issues remain (success \u2014 done).\n- The same fix fails twice (your hypothesis is wrong; ask the user).\n- The failure mode shifts between attempts (you\'re chasing a moving target).\n- You can\'t justify the next call against its provider cost \u2014 each call spends real money and provider quota.\n\nIf the user explicitly opts out ("just report", "don\'t iterate", "stop after the first call", "I\'ll fix it myself"), respect that \u2014 run once, report, stop. Otherwise iterate by default. The first time you start an autonomous loop in a session, mention it once so the user knows it\'s happening (e.g. "the call surfaced X \u2014 fixing and re-running; I\'ll report back when it converges or stops making progress").\n\n## Windsurf Execution\n\nVent calls typically take 30 seconds to 2 minutes. Each `vent-hq run` is one shell tool call \u2014 wait for stdout (the JSON result) before responding. Don\'t background; use the JSON returned by `npx vent-hq run` directly. If Cascade\'s auto-execution level prompts for approval on `npx vent-hq` commands, ask the user to add `npx vent-hq *` to the workspace allow list once so subsequent calls flow without interruption.\n\nCascade runs shell tool calls in parallel within a turn \u2014 for multiple calls from one suite, issue each named call as its own separate shell tool call in the same turn (do not combine them with `&` and `wait`):\n\n```bash\nnpx vent-hq run -f .vent/suite.vapi.json --call happy-path\nnpx vent-hq run -f .vent/suite.vapi.json --call tool-path\n```\n\nStay within Cascade\'s per-turn parallel-tool-call budget \u2014 fan out at most ~6 calls in one turn and respect the provider concurrency caps below.\n\n## Workflow\n\n1. Identify the behavior under test. Read enough of the agent codebase to understand its system prompt, tools, handlers, routes, provider config, platform wiring, and expected handoffs.\n2. Reuse an existing `.vent/suite.<adapter>.json` when possible. If `.vent/` contains multiple suites, inspect `connection.adapter` and report which suite file produced the result.\n3. Create or update a suite only when the existing calls do not cover the changed behavior. Name calls after real flows, for example `reschedule-appointment`, not `call-1`.\n4. If the suite uses `start_command`, start one shared local session first with `npx vent-hq agent start -f .vent/suite.<adapter>.json`, then pass `--session <session-id>` to each run.\n\n **For locally-run LiveKit agents: every run requires killing *all* workers, starting one fresh worker, and waiting a full 60 seconds before submitting.** Unconditional \u2014 LiveKit Cloud round-robins across registered workers, so a single survivor with a dead inference subprocess fails ~N-1 of N calls. Don\'t rely on `pkill -f <path-pattern>`; bare command lines like `node --import tsx agent.ts dev` won\'t match a path filter. Use `ps aux | grep -E "node.*agent\\.ts|@livekit/agents.*ipc"`, `kill -9` by PID, re-run `ps` to confirm zero survivors, then start the fresh worker. Skipping the 60s wait fails with `did not publish audio track`; if that error appears alongside `Error [ERR_IPC_CHANNEL_CLOSED] from InferenceProcExecutor.doInference` in the agent log right after a "running EOU detection" line, that\'s a straggler \u2014 redo the kill sweep. Hosted LiveKit Cloud agents don\'t need any of this; run normally.\n5. Pick which call(s) to run based on the change. Fixed bug: replay the failing scenario. Changed tool: include a call that triggers that tool. Prompt or routing change: include the relevant happy path and any important edge path.\n6. Compare against the previous JSON in `.vent/runs/` when validating a fix or regression. Check status flips, latency jumps, tool-call success drops, cost jumps, and transcript divergence. Correlate with `git diff` between saved `git_sha` values when available; skip if no previous run exists.\n\n## Commands\n\n```bash\nnpx vent-hq init # First-time setup (auth + skill install + starter suite)\nnpx vent-hq login # Log in to existing account\nnpx vent-hq login --status # Print whether credentials are present\nnpx vent-hq logout # Remove saved credentials from ~/.vent/credentials\nnpx vent-hq run -f .vent/suite.X.json # Run a single-call suite\nnpx vent-hq run -f .vent/suite.X.json --call NAME # Run one named call from a multi-call suite\nnpx vent-hq run ... --session <session-id> # Add to any run; routes through an existing local relay session\nnpx vent-hq run ... --verbose # Add to any run; include verbose debug fields\nnpx vent-hq stop <run-id> # Cancel a queued or running run\nnpx vent-hq agent start -f .vent/suite.X.json # Start a shared local relay session\nnpx vent-hq agent stop <session-id> # Stop a shared local relay session\n```\n\nIf `~/.vent/credentials` is missing and `VENT_ACCESS_TOKEN` is not set, run `npx vent-hq init`. For an existing account, run `npx vent-hq login` or set `VENT_ACCESS_TOKEN`.\n\n## Suite Config\n\nSuites live in `.vent/suite.<adapter>.json`. `connection` is declared once per suite. `calls` is a named map, and each key becomes the call name used with `--call`.\n\nLocal websocket suite:\n\n```json\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8,\n "silence_threshold_ms": 1200\n }\n }\n}\n```\n\nPlatform-direct suite:\n\n```json\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "happy-path": {\n "caller_prompt": "You are Maria calling to reschedule her appointment to next Tuesday.",\n "max_turns": 8\n }\n }\n}\n```\n\nWrite `caller_prompt` as a realistic caller with a name, goal, mood, constraints, and conditional behavior. Set `max_turns` based on flow complexity: FAQ `4-6`, booking or tool use `8-12`, complex flows `12-20`.\n\nCall fields:\n\n- `caller_prompt` and `max_turns` are required.\n- `silence_threshold_ms` must be `200-10000`. Common ranges: FAQ `800-1200`, tool calls `2000-3000`, complex reasoning `3000-5000`.\n- `persona` supports `pace`, `clarity`, `disfluencies`, `cooperation`, `emotion`, `memory`, `intent_clarity`, and `confirmation_style`.\n- `caller_audio` supports noise, speed, speakerphone, mic distance, clarity, accent, packet loss, and jitter.\n- `language` is an ISO 639-1 code such as `en`, `es`, `fr`, `de`, `it`, `nl`, or `ja`.\n- `voice` is `"male"` or `"female"` (English only; default female). Use to flip the caller\'s perceived gender. Ignored if `caller_audio.accent` is set or `language` is non-English.\n\n## Connections and Credentials\n\n### Adapter choice\n\nUse `websocket` for your own local or hosted runtime. Use `start_command` for local agents or `agent_url` for hosted custom endpoints. For `start_command` and `agent_url`, do not put Deepgram, ElevenLabs, OpenAI, or other agent runtime keys into Vent config unless the Vent adapter itself needs them \u2014 the tested agent owns its own runtime credentials.\n\nUse `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` for platform-direct testing. In this mode Vent itself talks to the provider on the user\'s behalf.\n\nVent provides `DEEPGRAM_API_KEY` and `ANTHROPIC_API_KEY` for its hosted caller/evaluation stack \u2014 those are Vent\'s, not the tested agent\'s.\n\n### Credential resolution\n\nIn platform-direct mode the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell environment. Do not run `source .env && export` before Vent commands. If you include credential fields in JSON, use the actual value, not the env var name. Do not manually author `platform_connection_id`; the CLI creates or updates the saved platform connection automatically.\n\nAuto-resolved env vars and JSON fields:\n\n- Vapi: `VAPI_API_KEY` -> `vapi_api_key`; `VAPI_ASSISTANT_ID` or `VAPI_AGENT_ID` -> `vapi_assistant_id`\n- Bland: `BLAND_API_KEY` -> `bland_api_key`; `BLAND_PATHWAY_ID` -> `bland_pathway_id`; `BLAND_PERSONA_ID` -> `persona_id`\n- LiveKit: `LIVEKIT_API_KEY` -> `livekit_api_key`; `LIVEKIT_API_SECRET` -> `livekit_api_secret`; `LIVEKIT_URL` -> `livekit_url`\n- Retell: `RETELL_API_KEY` -> `retell_api_key`; `RETELL_AGENT_ID` -> `retell_agent_id`\n- ElevenLabs: `ELEVENLABS_API_KEY` -> `elevenlabs_api_key`; `ELEVENLABS_AGENT_ID` -> `elevenlabs_agent_id`\n\n### Provider config\n\nUse existing provider config when possible: Vapi assistant, Retell agent, ElevenLabs agent, Bland pathway, or LiveKit agent. Bland uniquely supports inline config \u2014 `platform` may use `bland_pathway_id`, `persona_id`, or an inline `task` (with optional voice, model, and turn-handling overrides; see Bland\'s API docs for the full field list).\n\n### Concurrency\n\nWhen you fan out multiple Vent calls in parallel against the same provider (for example, running several named calls from one suite at once), respect the provider\'s per-account concurrency limit. Exceeding it makes calls queue or fail at the provider \u2014 Vent does not enforce these caps for you.\n\nRecord the limit as `max_concurrency` in the suite\'s `platform` block so it\'s visible on future runs. Ask the user which plan they\'re on if sizing matters; otherwise use the conservative default in bold.\n\n- **Vapi**: **10** included per account; reserved lines can be purchased self-serve; Enterprise is unlimited.\n- **Retell**: Pay-as-you-go includes **20**; Enterprise has no cap.\n- **Bland**: Start=**10**, Build=50, Scale=100, Enterprise=unlimited.\n- **ElevenLabs**: Free=**4**, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x base.\n- **LiveKit Cloud**: Build=**5**, Ship=20, Scale=50 managed inference sessions (the usual gate for voice agents); agent-session concurrency can go higher (Scale up to 600).\n\n## WebSocket\n\nFor `adapter: "websocket"`, Vent sends binary 16-bit mono PCM audio over one websocket connection. Websocket text frames are optional JSON events. Audio-only websocket agents still work, but events improve turn detection and observability. Vent sends `{"type":"end-call"}` when the test is done.\n\nUseful websocket text frames:\n\n```jsonc\n{"type":"speech-update","status":"started"}\n{"type":"speech-update","status":"stopped"}\n{"type":"tool_call","name":"check_availability","arguments":{},"result":{},"successful":true,"duration_ms":150}\n{"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\n{"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n{"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12}}\n{"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n{"type":"vent:transfer","destination":"+15551234567","status":"attempted"}\n{"type":"vent:debug-url","label":"trace","url":"https://..."}\n{"type":"vent:warning","message":"provider warning","code":"provider_warning"}\n```\n\n`vent:session-report` is **not** handled by the websocket adapter \u2014 it\'s only consumed by the LiveKit helper. Do not emit it from a websocket agent.\n\nPlatform adapters capture tool calls automatically. Websocket agents must emit `tool_call` frames for tool observability. Platform adapters get component latency automatically. Websocket agents should emit `vent:timing` after each agent response when STT/LLM/TTS breakdown is available.\n\n## LiveKit\n\nBefore running LiveKit tests, install and add the Vent helper to the LiveKit agent entrypoint. Node: `npm install @vent-hq/livekit`, then call `instrumentLiveKitAgent({ ctx, session })`. Python: `pip install vent-livekit`, then call `instrument_livekit_agent(ctx=ctx, session=session)`.\n\nLiveKit direct mode requires the LiveKit Agents SDK. Custom LiveKit participants should use the websocket adapter with a relay. If the LiveKit agent registered with an explicit dispatch name, set `livekit_agent_name` in `platform`.\n\nLiveKit parallel calls are supported, but capacity depends on where the user runs their agent. LiveKit Cloud only routes audio \u2014 the agent code runs on the user\'s own infrastructure (laptop, Fly.io, Railway, k8s, etc.), and that box\'s CPU/RAM is the real bottleneck. Before firing parallel calls, derive `platform.max_concurrency` from the minimum of: (1) the user\'s LiveKit plan limit (Build=5, Ship=20, Scale=50+ concurrent agent sessions), (2) their agent worker box capacity (rough rule: 1c/1GB\u21921\u20133 jobs, 2c/4GB\u21925\u201310, 4c/8GB\u219210\u201325, 8c/16GB\u219225\u201350; multiply by number of worker boxes), and (3) their STT/TTS provider concurrency (e.g. Deepgram TTS streaming = 2). If the user wants more parallel calls than that minimum allows, push back \u2014 explain which limit is the bottleneck (plan / box / provider) and either reduce the count or suggest scaling that limit. Architecture: register one `agent_name` and run N workers under it (one big worker, or several smaller ones \u2014 same name); LiveKit\'s dispatcher round-robins jobs across them with built-in failover. Don\'t use numbered names (`agent-0`, `agent-1`) \u2014 that reinvents what the dispatcher does and breaks failover.\n\nUse the LiveKit helper for observability; do not publish `vent:*` topics manually. Do not hand-roll `vent:session-report` from `ctx.addShutdownCallback`; after `room.disconnect()` it can fail with `engine is closed`. The helper captures SDK metrics, tool events, conversation items, usage, and close events. Native LiveKit `lk.transcription` and `lk.agent.state` provide transcript and agent-state timing.\n\n## Output\n\n### Live result\n\n`npx vent-hq run` returns a single JSON result on stdout in non-TTY mode (not an SSE JSONL stream). Exit codes: `0` = call ran through the pipeline; `1` = pipeline-level failure; `2` = harness error.\n\nMost result fields are always present; `latency`, `component_latency`, `call_metadata`, and `emotion` may be `null` when the underlying analysis didn\'t run; `debug` is absent without `--verbose`. Branch on null before reading nested fields. Use `--verbose` only when the default doesn\'t explain a failure \u2014 when you need `platform_transcript` (to check Vent\'s STT), per-turn or component-level latency breakdowns, the raw tool-call timeline, or provider-native artifacts in `debug.provider_metadata`. Otherwise skip \u2014 it just adds noise.\n\nVent\'s transcript is ground truth. Judge on semantic intent: ignore homophones and minor mis-hears (`"check teach hat"` for `"check that"`, missing question marks on short tails) \u2014 those are streaming-STT noise on Vent\'s caller side, not agent bugs, and **don\'t surface them in the report** (they\'re Vent-side artifacts, not actionable for the user). But clear gibberish or word-soup (e.g. `"Cristoxin"` where the agent should have said `"Of course, talk soon"`) is **not** a Vent artifact \u2014 Vent\'s STT does not invent words like that. It means the platform\'s TTS produced corrupted audio or the agent\'s STT/LLM generated the wrong text, and the fix lives there (TTS voice config, agent prompt, model temperature, codec issue). Never dismiss the run as a "Vent harness STT" issue; iterate on the agent or flag the platform.\n\nFor transfers: `call_metadata.transfer_attempted` (provider claimed) and `transfer_completed` (Vent-verified) can disagree \u2014 report both. `transfers[]` carries destination, type, and per-attempt status.\n\n### Saved history\n\nAfter every run, Vent writes the full result JSON to `.vent/runs/`. Shape:\n\n```jsonc\n{\n "run_id": "...",\n "timestamp": "2026-04-21T...Z",\n "git_sha": "...",\n "summary": { "calls_total": 2, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "completed", "duration_ms": 6123, "transcript": [], "observed_tool_calls": [], "latency": { "response_time_ms": 420, "p95_response_time_ms": 980 }, "call_metadata": { "cost_usd": 0.004 } }\n ]\n}\n```\n\nWhen comparing against a prior run (Workflow step 6), inspect:\n\n- Run-completion status flips: `call_results[i].status` (pipeline-only \u2014 judge mission success from the transcript)\n- Latency: `call_results[i].latency.response_time_ms` (mean) or `latency.p95_response_time_ms` increased >20%\n- Tool calls: count of `call_results[i].observed_tool_calls[].successful` dropped\n- Cost: `summary.total_cost_usd` or `call_results[i].call_metadata.cost_usd` increased >30%\n- Transcript: `call_results[i].transcript` diverged in semantic content (ignore STT noise)\n\n## Reporting Results\n\nBefore reporting, read the agent\'s code to locate where the observed behavior originates. If the issue is small and you can fix it, fix it and explain what you did \u2014 don\'t ask permission first.\n\nAdapt the report shape to the call \u2014 a clean pass needs little, a regression with a multi-layer cause needs more. Use a transcript excerpt when it helps the user see what happened.\n\nHard rules:\n\n- Pair raw numbers with their plain-English meaning \u2014 don\'t drop the number, but don\'t leave it unexplained. E.g. "p95 latency was 850ms, which is snappy and well within natural conversational pacing" or "p95 hit 1.6 seconds with the LLM as the bottleneck \u2014 noticeably sluggish to a caller."\n- Name the user\'s voice agent by platform on first mention (e.g. "the Vapi agent responded snappily throughout") so the user knows immediately which agent the observation is about. After that, just say "the agent" \u2014 don\'t repeat the platform name on every line.\n- Always include the recording from `call_metadata.recording_url` as an inline `[Recording](url)` link, placed in **one block at the very end of the report** \u2014 never sprinkled through the prose. Single call: one link as the last line. Multi-call: one labeled link per call (e.g. `reschedule-appointment: [Recording](url)`). Never paste a bare URL.\n- Mission success is your judgment, not Vent\'s. The per-call `status` is only `"completed"` (pipeline ran) or `"error"` (pipeline failed); decide whether the agent actually accomplished the scenario from the transcript and tool calls.\n- Similar-sounding word substitutions (e.g. "ocean" for "OSHA") are STT ambiguity, not comprehension failure. The fix lives in STT keyword hints, custom vocabulary, or a prompt-level term list \u2014 not the agent\'s reasoning.\n- Surface only what the user can act on in their own agent\'s code or config \u2014 never `warnings[]` (infrastructure noise), Vent-side artifacts (caller wait modes, harness timing, internal pipeline quirks), or `cost_usd` unless asked.\n\nFor multi-call runs, lead with your own judgment of what happened across the calls (e.g. "3 of 4 did what they were supposed to; `cancel-appointment` never actually canceled"), not a parroted pass/fail count. Then cover each call with whatever depth it needs.\n';
5978
5874
 
5979
5875
  // src/lib/setup.ts
5980
5876
  var SUITE_SCAFFOLD = JSON.stringify(
@@ -6006,6 +5902,12 @@ async function installCursor(cwd) {
6006
5902
  await fs5.writeFile(path3.join(dir, "vent.mdc"), cursor_default);
6007
5903
  printSuccess("Cursor: .cursor/rules/vent.mdc", { force: true });
6008
5904
  }
5905
+ async function installWindsurf(cwd) {
5906
+ const dir = path3.join(cwd, ".windsurf", "skills", "vent");
5907
+ await fs5.mkdir(dir, { recursive: true });
5908
+ await fs5.writeFile(path3.join(dir, "SKILL.md"), windsurf_default);
5909
+ printSuccess("Windsurf: .windsurf/skills/vent/SKILL.md", { force: true });
5910
+ }
6009
5911
  var VENT_MARKERS = [
6010
5912
  "# Vent - Voice Agent Calls",
6011
5913
  "# Vent \u2014 Voice Agent Calls"
@@ -6031,6 +5933,7 @@ async function installSkillsAndScaffold(cwd) {
6031
5933
  await installClaudeCode(cwd);
6032
5934
  await installCursor(cwd);
6033
5935
  await installCodex(cwd);
5936
+ await installWindsurf(cwd);
6034
5937
  const suitePath = path3.join(cwd, ".vent", "suite.json");
6035
5938
  let suiteExists = false;
6036
5939
  try {
@@ -6151,7 +6054,6 @@ Commands:
6151
6054
  agent Manage a shared local agent session
6152
6055
  run Run a call from a suite file
6153
6056
  stop Cancel a queued or running call
6154
- status Check status of a previous run
6155
6057
  login Authenticate via browser
6156
6058
  logout Remove saved credentials
6157
6059
  Options:
@@ -6178,7 +6080,6 @@ Start options:
6178
6080
 
6179
6081
  Stop options:
6180
6082
  vent-hq agent stop <session-id>`;
6181
- var STATUS_USAGE = `Usage: vent-hq status <run-id> [--verbose]`;
6182
6083
  async function main() {
6183
6084
  loadDotenv();
6184
6085
  const args = process.argv.slice(2);
@@ -6188,7 +6089,7 @@ async function main() {
6188
6089
  return 0;
6189
6090
  }
6190
6091
  if (command === "--version" || command === "-v") {
6191
- const pkg = await import("./package-QLGBTYZS.mjs");
6092
+ const pkg = await import("./package-GODDS4TH.mjs");
6192
6093
  console.log(`vent-hq ${pkg.default.version}`);
6193
6094
  return 0;
6194
6095
  }
@@ -6256,26 +6157,6 @@ async function main() {
6256
6157
  console.log(AGENT_USAGE);
6257
6158
  return 2;
6258
6159
  }
6259
- case "status": {
6260
- if (commandArgs.includes("--help") || commandArgs.length === 0) {
6261
- console.log(STATUS_USAGE);
6262
- return 0;
6263
- }
6264
- const { values, positionals } = parseArgs({
6265
- args: commandArgs,
6266
- options: {
6267
- verbose: { type: "boolean", short: "v", default: false }
6268
- },
6269
- allowPositionals: true,
6270
- strict: true
6271
- });
6272
- const runId = positionals[0];
6273
- if (!runId) {
6274
- console.log(STATUS_USAGE);
6275
- return 2;
6276
- }
6277
- return statusCommand({ runId, verbose: values.verbose });
6278
- }
6279
6160
  case "stop": {
6280
6161
  const runId = commandArgs[0];
6281
6162
  if (!runId || commandArgs.includes("--help")) {
@@ -4,7 +4,7 @@ import "./chunk-XYDL7GY6.mjs";
4
4
  // package.json
5
5
  var package_default = {
6
6
  name: "vent-hq",
7
- version: "0.12.1",
7
+ version: "0.13.1",
8
8
  type: "module",
9
9
  description: "Vent CLI \u2014 CI/CD for voice AI agents",
10
10
  bin: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vent-hq",
3
- "version": "0.12.1",
3
+ "version": "0.13.1",
4
4
  "type": "module",
5
5
  "description": "Vent CLI — CI/CD for voice AI agents",
6
6
  "bin": {
@@ -9,11 +9,6 @@
9
9
  "files": [
10
10
  "dist"
11
11
  ],
12
- "scripts": {
13
- "build": "node scripts/bundle.mjs",
14
- "clean": "rm -rf dist",
15
- "prepack": "pnpm clean && pnpm build"
16
- },
17
12
  "keywords": [
18
13
  "vent",
19
14
  "cli",
@@ -36,9 +31,13 @@
36
31
  "ws": "^8.18.0"
37
32
  },
38
33
  "devDependencies": {
39
- "@types/ws": "catalog:",
40
- "@vent/relay-client": "workspace:*",
41
- "@vent/shared": "workspace:*",
42
- "esbuild": "catalog:"
34
+ "@types/ws": "^8.5.0",
35
+ "esbuild": "^0.24.0",
36
+ "@vent/relay-client": "0.1.0",
37
+ "@vent/shared": "0.0.1"
38
+ },
39
+ "scripts": {
40
+ "build": "node scripts/bundle.mjs",
41
+ "clean": "rm -rf dist"
43
42
  }
44
- }
43
+ }