vent-hq 0.9.22 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
package/dist/index.mjs CHANGED
@@ -4297,22 +4297,6 @@ var RunPlatformSummarySchema = external_exports.object({
4297
4297
  platform_connection: PlatformConnectionSummarySchema.nullable(),
4298
4298
  platform: PlatformSummarySchema.nullable()
4299
4299
  });
4300
- var AudioAnalysisGradeThresholdsSchema = external_exports.object({
4301
- agent_speech_ratio_min: external_exports.number().min(0).max(1).optional(),
4302
- talk_ratio_vad_max: external_exports.number().min(0).max(1).optional(),
4303
- talk_ratio_vad_min: external_exports.number().min(0).max(1).optional(),
4304
- longest_monologue_max_ms: external_exports.number().min(1e3).optional(),
4305
- silence_gaps_over_2s_max: external_exports.number().int().min(0).optional(),
4306
- mean_segment_min_ms: external_exports.number().min(0).optional(),
4307
- mean_segment_max_ms: external_exports.number().min(0).optional()
4308
- }).optional();
4309
- var AudioAnalysisWarningSchema = external_exports.object({
4310
- metric: external_exports.string(),
4311
- value: external_exports.number(),
4312
- threshold: external_exports.number(),
4313
- severity: external_exports.enum(["warning", "critical"]),
4314
- message: external_exports.string()
4315
- });
4316
4300
  var ConversationTurnSchema = external_exports.object({
4317
4301
  role: external_exports.enum(["caller", "agent"]),
4318
4302
  text: external_exports.string(),
@@ -4331,25 +4315,7 @@ var ConversationTurnSchema = external_exports.object({
4331
4315
  tts_ms: external_exports.number().optional(),
4332
4316
  speech_duration_ms: external_exports.number().optional()
4333
4317
  }).optional(),
4334
- platform_transcript: external_exports.string().optional(),
4335
- interrupted: external_exports.boolean().optional(),
4336
- is_interruption: external_exports.boolean().optional()
4337
- });
4338
- var HallucinationEventSchema = external_exports.object({
4339
- error_count: external_exports.number().int().min(1),
4340
- reference_text: external_exports.string(),
4341
- hypothesis_text: external_exports.string()
4342
- });
4343
- var TranscriptMetricsSchema = external_exports.object({
4344
- wer: external_exports.number().min(0).optional(),
4345
- cer: external_exports.number().min(0).optional(),
4346
- hallucination_events: external_exports.array(HallucinationEventSchema).optional(),
4347
- repetition_score: external_exports.number().min(0).max(1).optional(),
4348
- reprompt_count: external_exports.number().int().min(0).optional(),
4349
- reprompt_rate: external_exports.number().min(0).max(1).optional(),
4350
- filler_word_rate: external_exports.number().min(0).optional(),
4351
- words_per_minute: external_exports.number().min(0).optional(),
4352
- vocabulary_diversity: external_exports.number().min(0).max(1).optional()
4318
+ platform_transcript: external_exports.string().optional()
4353
4319
  });
4354
4320
  var LatencyMetricsSchema = external_exports.object({
4355
4321
  ttfb_per_turn_ms: external_exports.array(external_exports.number()),
@@ -4358,8 +4324,6 @@ var LatencyMetricsSchema = external_exports.object({
4358
4324
  p95_ttfb_ms: external_exports.number(),
4359
4325
  p99_ttfb_ms: external_exports.number(),
4360
4326
  first_turn_ttfb_ms: external_exports.number(),
4361
- total_silence_ms: external_exports.number(),
4362
- mean_turn_gap_ms: external_exports.number(),
4363
4327
  ttfw_per_turn_ms: external_exports.array(external_exports.number()).optional(),
4364
4328
  p50_ttfw_ms: external_exports.number().optional(),
4365
4329
  p90_ttfw_ms: external_exports.number().optional(),
@@ -4370,24 +4334,6 @@ var LatencyMetricsSchema = external_exports.object({
4370
4334
  mouth_to_ear_est_ms: external_exports.number().optional(),
4371
4335
  drift_slope_ms_per_turn: external_exports.number().optional()
4372
4336
  });
4373
- var AudioAnalysisMetricsSchema = external_exports.object({
4374
- caller_talk_time_ms: external_exports.number().min(0),
4375
- agent_talk_time_ms: external_exports.number().min(0),
4376
- agent_speech_ratio: external_exports.number(),
4377
- talk_ratio_vad: external_exports.number(),
4378
- interruption_rate: external_exports.number().min(0).max(1),
4379
- interruption_count: external_exports.number().int().min(0),
4380
- agent_overtalk_after_barge_in_ms: external_exports.number().min(0).optional(),
4381
- agent_interrupting_user_rate: external_exports.number().min(0).max(1),
4382
- agent_interrupting_user_count: external_exports.number().int().min(0),
4383
- missed_response_windows: external_exports.number().int().min(0),
4384
- longest_monologue_ms: external_exports.number(),
4385
- silence_gaps_over_2s: external_exports.number().int().min(0),
4386
- total_internal_silence_ms: external_exports.number(),
4387
- per_turn_speech_segments: external_exports.array(external_exports.number().int().min(0)),
4388
- per_turn_internal_silence_ms: external_exports.array(external_exports.number().int().min(0)),
4389
- mean_agent_speech_segment_ms: external_exports.number()
4390
- });
4391
4337
  var TurnEmotionProfileSchema = external_exports.object({
4392
4338
  turn_index: external_exports.number().int().min(0),
4393
4339
  emotions: external_exports.record(external_exports.string(), external_exports.number()),
@@ -4407,13 +4353,6 @@ var ProsodyMetricsSchema = external_exports.object({
4407
4353
  emotion_trajectory: external_exports.enum(["stable", "improving", "degrading", "volatile"]),
4408
4354
  hume_latency_ms: external_exports.number()
4409
4355
  });
4410
- var ProsodyWarningSchema = external_exports.object({
4411
- metric: external_exports.string(),
4412
- value: external_exports.number(),
4413
- threshold: external_exports.number(),
4414
- severity: external_exports.enum(["warning", "critical"]),
4415
- message: external_exports.string()
4416
- });
4417
4356
  var HarnessOverheadSchema = external_exports.object({
4418
4357
  tts_per_turn_ms: external_exports.array(external_exports.number()),
4419
4358
  stt_per_turn_ms: external_exports.array(external_exports.number()),
@@ -4426,8 +4365,7 @@ var SignalQualityMetricsSchema = external_exports.object({
4426
4365
  energy_consistency: external_exports.number(),
4427
4366
  sudden_drops: external_exports.number().int().min(0),
4428
4367
  sudden_spikes: external_exports.number().int().min(0),
4429
- clean_edges: external_exports.boolean(),
4430
- f0_hz: external_exports.number()
4368
+ clean_edges: external_exports.boolean()
4431
4369
  });
4432
4370
  var ComponentLatencySchema = external_exports.object({
4433
4371
  stt_ms: external_exports.number().optional(),
@@ -4493,14 +4431,10 @@ var CallMetadataSchema = external_exports.object({
4493
4431
  var ConversationMetricsSchema = external_exports.object({
4494
4432
  mean_ttfb_ms: external_exports.number(),
4495
4433
  mean_ttfw_ms: external_exports.number().optional(),
4496
- transcript: TranscriptMetricsSchema.optional(),
4497
4434
  latency: LatencyMetricsSchema.optional(),
4498
4435
  tool_calls: ToolCallMetricsSchema.optional(),
4499
4436
  signal_quality: SignalQualityMetricsSchema.optional(),
4500
- audio_analysis: AudioAnalysisMetricsSchema.optional(),
4501
- audio_analysis_warnings: external_exports.array(AudioAnalysisWarningSchema).optional(),
4502
4437
  prosody: ProsodyMetricsSchema.optional(),
4503
- prosody_warnings: external_exports.array(ProsodyWarningSchema).optional(),
4504
4438
  harness_overhead: HarnessOverheadSchema.optional(),
4505
4439
  component_latency: ComponentLatencyMetricsSchema.optional()
4506
4440
  });
@@ -4547,14 +4481,10 @@ function formatConversationResult(raw, options = {}) {
4547
4481
  error: r.error ?? null,
4548
4482
  transcript: formatTranscript(r.transcript, options),
4549
4483
  latency: r.metrics?.latency ? formatLatency(r.metrics.latency, r.metrics) : null,
4550
- transcript_quality: r.metrics?.transcript && hasContent(r.metrics.transcript) ? r.metrics.transcript : null,
4551
- audio_analysis: r.metrics?.audio_analysis && hasContent(r.metrics.audio_analysis) ? formatAudioAnalysis(r.metrics.audio_analysis) : null,
4552
4484
  tool_calls: formatToolCalls(r.metrics?.tool_calls, r.observed_tool_calls),
4553
4485
  component_latency: formatComponentLatency(r.metrics?.component_latency),
4554
4486
  call_metadata: formatCallMetadata(r.call_metadata),
4555
4487
  warnings: dedupeStrings([
4556
- ...(r.metrics?.audio_analysis_warnings ?? []).map((w) => w.message),
4557
- ...(r.metrics?.prosody_warnings ?? []).map((w) => w.message),
4558
4488
  ...formatProviderWarningMessages(r.call_metadata?.provider_warnings)
4559
4489
  ]),
4560
4490
  audio_actions: r.audio_action_results ?? [],
@@ -4572,8 +4502,6 @@ function formatTranscript(turns, options) {
4572
4502
  if (t.ttfb_ms != null) turn.ttfb_ms = t.ttfb_ms;
4573
4503
  if (t.ttfw_ms != null) turn.ttfw_ms = t.ttfw_ms;
4574
4504
  if (t.audio_duration_ms != null) turn.audio_duration_ms = t.audio_duration_ms;
4575
- if (t.interrupted != null) turn.interrupted = t.interrupted;
4576
- if (t.is_interruption != null) turn.is_interruption = t.is_interruption;
4577
4505
  if (options.verbose) {
4578
4506
  const debug2 = compactUnknownRecord({
4579
4507
  timestamp_ms: t.timestamp_ms,
@@ -4602,9 +4530,7 @@ function formatLatency(latency, metrics) {
4602
4530
  p90_response_time_ms: hasTtfw ? latency.p90_ttfw_ms ?? latency.p90_ttfb_ms : latency.p90_ttfb_ms,
4603
4531
  p95_response_time_ms: hasTtfw ? latency.p95_ttfw_ms : latency.p95_ttfb_ms,
4604
4532
  p99_response_time_ms: hasTtfw ? latency.p99_ttfw_ms ?? latency.p99_ttfb_ms : latency.p99_ttfb_ms,
4605
- first_response_time_ms: hasTtfw ? latency.first_turn_ttfw_ms ?? latency.first_turn_ttfb_ms : latency.first_turn_ttfb_ms,
4606
- total_silence_ms: latency.total_silence_ms,
4607
- mean_turn_gap_ms: latency.mean_turn_gap_ms
4533
+ first_response_time_ms: hasTtfw ? latency.first_turn_ttfw_ms ?? latency.first_turn_ttfb_ms : latency.first_turn_ttfb_ms
4608
4534
  };
4609
4535
  if (hasTtfw) {
4610
4536
  result.mean_ttfw_ms = metrics.mean_ttfw_ms;
@@ -4619,24 +4545,6 @@ function formatLatency(latency, metrics) {
4619
4545
  if (latency.mouth_to_ear_est_ms != null) result.mouth_to_ear_est_ms = latency.mouth_to_ear_est_ms;
4620
4546
  return result;
4621
4547
  }
4622
- function formatAudioAnalysis(audio) {
4623
- return {
4624
- caller_talk_time_ms: audio.caller_talk_time_ms,
4625
- agent_talk_time_ms: audio.agent_talk_time_ms,
4626
- agent_speech_ratio: audio.agent_speech_ratio,
4627
- talk_ratio_vad: audio.talk_ratio_vad,
4628
- interruption_rate: audio.interruption_rate,
4629
- interruption_count: audio.interruption_count,
4630
- agent_overtalk_after_barge_in_ms: audio.agent_overtalk_after_barge_in_ms,
4631
- agent_interrupting_user_rate: audio.agent_interrupting_user_rate,
4632
- agent_interrupting_user_count: audio.agent_interrupting_user_count,
4633
- missed_response_windows: audio.missed_response_windows,
4634
- longest_monologue_ms: audio.longest_monologue_ms,
4635
- silence_gaps_over_2s: audio.silence_gaps_over_2s,
4636
- total_internal_silence_ms: audio.total_internal_silence_ms,
4637
- mean_agent_speech_segment_ms: audio.mean_agent_speech_segment_ms
4638
- };
4639
- }
4640
4548
  function formatToolCalls(summary, observed) {
4641
4549
  return {
4642
4550
  total: summary?.total ?? observed?.length ?? 0,
@@ -4723,8 +4631,6 @@ function formatDebug(result) {
4723
4631
  signal_quality: result.metrics?.signal_quality,
4724
4632
  harness_overhead: result.metrics?.harness_overhead,
4725
4633
  prosody: result.metrics?.prosody,
4726
- audio_analysis_warnings: nonEmptyArray(result.metrics?.audio_analysis_warnings),
4727
- prosody_warnings: nonEmptyArray(result.metrics?.prosody_warnings),
4728
4634
  provider_warnings: nonEmptyArray(result.call_metadata?.provider_warnings),
4729
4635
  component_latency_per_turn: nonEmptyArray(result.metrics?.component_latency?.per_turn),
4730
4636
  observed_tool_calls: formatDebugToolCalls(result.observed_tool_calls),
@@ -4759,9 +4665,6 @@ function compactUnknownRecord(record) {
4759
4665
  const entries = Object.entries(record).filter(([, value]) => value != null);
4760
4666
  return entries.length > 0 ? Object.fromEntries(entries) : void 0;
4761
4667
  }
4762
- function hasContent(obj) {
4763
- return Object.values(obj).some((v) => v != null);
4764
- }
4765
4668
 
4766
4669
  // src/lib/output.ts
4767
4670
  var isTTY = process.stdout.isTTY;
@@ -5133,21 +5036,17 @@ async function* streamRunEvents(runId, apiKey, signal) {
5133
5036
  import * as fs2 from "node:fs/promises";
5134
5037
  import * as path2 from "node:path";
5135
5038
  import { execSync } from "node:child_process";
5136
- function gitInfo() {
5039
+ function gitSha() {
5137
5040
  try {
5138
- const sha = execSync("git rev-parse HEAD", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
5139
- const branch = execSync("git branch --show-current", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim() || null;
5140
- const status = execSync("git status --porcelain", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
5141
- return { sha, branch, dirty: status.length > 0 };
5041
+ return execSync("git rev-parse HEAD", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
5142
5042
  } catch {
5143
- return { sha: null, branch: null, dirty: false };
5043
+ return null;
5144
5044
  }
5145
5045
  }
5146
5046
  async function saveRunHistory(runId, callResults, runCompleteData) {
5147
5047
  try {
5148
5048
  const dir = path2.join(process.cwd(), ".vent", "runs");
5149
5049
  await fs2.mkdir(dir, { recursive: true });
5150
- const git = gitInfo();
5151
5050
  const now = /* @__PURE__ */ new Date();
5152
5051
  const timestamp = now.toISOString().replace(/[:.]/g, "-").slice(0, 19);
5153
5052
  const shortId = runId.slice(0, 8);
@@ -5159,9 +5058,7 @@ async function saveRunHistory(runId, callResults, runCompleteData) {
5159
5058
  const entry = {
5160
5059
  run_id: runId,
5161
5060
  timestamp: now.toISOString(),
5162
- git_sha: git.sha,
5163
- git_branch: git.branch,
5164
- git_dirty: git.dirty,
5061
+ git_sha: gitSha(),
5165
5062
  summary: {
5166
5063
  status: runCompleteData.status ?? "unknown",
5167
5064
  calls_total: total,
@@ -5170,7 +5067,11 @@ async function saveRunHistory(runId, callResults, runCompleteData) {
5170
5067
  total_duration_ms: aggregate?.total_duration_ms,
5171
5068
  total_cost_usd: aggregate?.total_cost_usd
5172
5069
  },
5173
- call_results: callResults.map((e) => e.metadata_json ?? {})
5070
+ call_results: callResults.map((e) => {
5071
+ const meta = e.metadata_json ?? {};
5072
+ const result = meta.result ?? {};
5073
+ return { ...result, status: meta.status };
5074
+ })
5174
5075
  };
5175
5076
  const filename = `${timestamp}_${shortId}.json`;
5176
5077
  const filepath = path2.join(dir, filename);
@@ -6000,13 +5901,13 @@ import * as fs5 from "node:fs/promises";
6000
5901
  import * as path3 from "node:path";
6001
5902
 
6002
5903
  // src/skills/claude-code.md
6003
- var claude_code_default = '---\nname: vent\ndescription: Voice agent calls \u2014 run calls against your voice agent, get pass/fail results with latency and latency metrics\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud \u2014 results stream back.\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **Run all calls in parallel in ONE Bash command** \u2014 Claude Code cannot run multiple Bash tool calls concurrently (`npx` is not on the read-only allowlist). Instead, launch all calls in a **single** Bash tool call using `&` and `wait`:\n ```bash\n npx vent-hq run -f .vent/suite.bland.json --call book-fire-inspection & npx vent-hq run -f .vent/suite.bland.json --call cancel-inspection & wait\n ```\n Set `timeout: 300000` (5 min) on the Bash call. NEVER run calls as separate Bash tool calls \u2014 they will serialize.\n2. **If a call gets backgrounded** \u2014 Wait for it to complete before proceeding. Never end your response without the result.\n3. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n4. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```bash\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```bash\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, **run them in parallel in one Bash command**:\n ```bash\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path & npx vent-hq run -f .vent/suite.vapi.json --call edge-case & wait\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare it against the current run:\n - Status flips: pass\u2192fail (obvious regression)\n - Latency: TTFW p50/p95 increased >20%\n - Tool calls: success count dropped\n - Cost: cost_usd increased >30%\n - Transcripts: agent responses diverged significantly\n Report what regressed and correlate with the code diff (`git diff` between the two runs\' git SHAs). If no previous run exists, skip \u2014 this is the baseline.\n\n### After modifying voice agent code\n\nRe-run the existing suite \u2014 no need to recreate it.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit agents, prefer the first-party helper instead of manual forwarding:\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? (e.g., "Sure, the earliest is 9 AM" after being cut off mid-availability-list) | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? (e.g., still knows the caller\'s name, booking details, etc.) | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run. Flag any turn where the agent ignores the interruption, repeats itself from scratch, or loses context.\n</output_conversation_test>\n</call_config>\n\n\n## Output\n\n- **Exit codes**: 0=pass, 1=fail, 2=error\n- The `run` command outputs **complete results as pretty-printed JSON** \u2014 including full transcript, latency metrics, tool calls, and audio analysis for every call. Do NOT run a separate `vent status` command \u2014 all data is already in the output.\n\n## Vent Access Token\n\nRun `npx vent-hq login` or set `VENT_ACCESS_TOKEN` env var.\nVent provides DEEPGRAM_API_KEY and ANTHROPIC_API_KEY automatically.\n';
5904
+ var claude_code_default = '---\nname: vent\ndescription: Voice agent calls \u2014 run calls against your voice agent, get pass/fail results with latency and latency metrics\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud \u2014 results stream back.\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **Run all calls in parallel in ONE Bash command** \u2014 Claude Code cannot run multiple Bash tool calls concurrently (`npx` is not on the read-only allowlist). Instead, launch all calls in a **single** Bash tool call using `&` and `wait`:\n ```bash\n npx vent-hq run -f .vent/suite.bland.json --call book-fire-inspection & npx vent-hq run -f .vent/suite.bland.json --call cancel-inspection & wait\n ```\n Set `timeout: 300000` (5 min) on the Bash call. NEVER run calls as separate Bash tool calls \u2014 they will serialize.\n2. **If a call gets backgrounded** \u2014 Wait for it to complete before proceeding. Never end your response without the result.\n3. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n4. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```bash\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```bash\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, **run them in parallel in one Bash command**:\n ```bash\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path & npx vent-hq run -f .vent/suite.vapi.json --call edge-case & wait\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare it against the current run. The saved file shape:\n\n ```jsonc\n {\n "run_id": "\u2026",\n "timestamp": "2026-04-21T\u2026Z",\n "git_sha": "\u2026",\n "summary": { "status": "completed", "calls_total": 2, "calls_passed": 2, "calls_failed": 0, "total_duration_ms": 12345, "total_cost_usd": 0.01 },\n "call_results": [\n { "name": "happy-path", "status": "pass", "duration_ms": 6123, "transcript": [...], "observed_tool_calls": [...], "metrics": { "latency_p50_ms": 420, "latency_p95_ms": 980 }, "cost_usd": 0.004 }\n ]\n }\n ```\n\n Compare at these paths:\n - Status flips: `call_results[i].status` pass\u2192fail\n - Latency: `call_results[i].metrics.latency_p50_ms` / `latency_p95_ms` increased >20%\n - Tool calls: `call_results[i].observed_tool_calls[].successful` count dropped\n - Cost: `summary.total_cost_usd` or `call_results[i].cost_usd` increased >30%\n - Transcripts: `call_results[i].transcript` diverged significantly\n\n Correlate with the code diff (`git diff` between the two runs\' `git_sha` values). If no previous run exists, skip \u2014 this is the baseline.\n\n### After modifying voice agent code\n\nRe-run the existing suite \u2014 no need to recreate it.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if your LiveKit agent registered with an explicit dispatch name in the SDK, e.g. Python `@server.rtc_session(agent_name="\u2026")` or `WorkerOptions(agent_name="\u2026")`, Node.js `new ServerOptions({ agentName: "\u2026" })`. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket: user\'s agent must emit a JSON text frame per tool call: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\nLiveKit: use the `@vent-hq/livekit` (Node) or `vent-livekit` (Python) helper. See the "LiveKit Agent Setup" section. The helper captures tool calls automatically from Agents SDK session events \u2014 do not publish on Vent topics manually.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket agents can emit richer observability metadata as JSON text frames:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n\nLiveKit agents get all metadata through the `@vent-hq/livekit` (Node) / `vent-livekit` (Python) helper \u2014 it subscribes to Agents SDK session events (`metrics_collected`, `function_tools_executed`, `conversation_item_added`, `session_usage_updated`, close) and publishes on Vent topics automatically. Transcript and agent-state timing come from native LiveKit room signals (`lk.transcription`, `lk.agent.state`) \u2014 the helper does not duplicate them.\n\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({ ctx, session });\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\n\nThe helper is the only supported integration path for LiveKit Agents SDK agents. Do not publish on `vent:*` topics manually \u2014 let the helper forward SDK events.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check availability.", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400 },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900 },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\nIgnore minor STT mis-transcriptions in `transcript` text (e.g. `"check teach hat"` for `"check that"`, swapped homophones, missing question marks on short tails). These are streaming-STT artifacts, not agent bugs. Judge on semantic intent, not exact spelling. Only flag transcript quality when it prevents understanding what the agent actually said.\n\n### Interruption evaluation\n\nEvaluate interruption handling by reading the transcript and listening to the recording. Flag any turn where the agent ignores a barge-in, repeats itself from scratch, or loses context after being cut off.\n\n| Dimension | How to evaluate |\n|--------|----------------|\n| **Recovery** | After a caller cuts in, does the agent\'s next reply acknowledge or address the barge-in rather than restarting from scratch? |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state (caller name, booking details, earlier answers)? |\n| **Overtalk** | Does the agent keep speaking for long after the caller starts, or does it yield promptly? Use the recording to judge. |\n</output_conversation_test>\n</call_config>\n\n\n## Output\n\n- **Exit codes**: 0=pass, 1=fail, 2=error\n- The `run` command outputs **complete results as pretty-printed JSON** \u2014 including full transcript, latency metrics, tool calls, and audio analysis for every call. Do NOT run a separate `vent status` command \u2014 all data is already in the output.\n\n## Vent Access Token\n\nRun `npx vent-hq login` or set `VENT_ACCESS_TOKEN` env var.\nVent provides DEEPGRAM_API_KEY and ANTHROPIC_API_KEY automatically.\n';
6004
5905
 
6005
5906
  // src/skills/cursor.md
6006
- var cursor_default = '---\ndescription: Vent \u2014 Voice agent calls. Run calls against your voice agent, get pass/fail results. Use when the user asks to run voice-agent calls or when you need to verify voice agent behavior after code changes.\nalwaysApply: true\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **Run all calls in parallel in ONE shell command** \u2014 Cursor cannot run multiple shell tool calls concurrently. Instead, launch all calls in a **single** shell command using `&` and `wait`. Example: `npx vent-hq run -f .vent/suite.bland.json --call call-1 & npx vent-hq run -f .vent/suite.bland.json --call call-2 & wait`. Set a 300-second (5 min) timeout. NEVER run calls as separate commands \u2014 they will serialize.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n5. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly \u2014 do NOT run `vent status` afterwards unless you are re-checking a past run.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Subsequent runs \u2014 reuse the existing suite\n\nA matching `.vent/suite.<adapter>.json` already exists? Just re-run it. No need to recreate.\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, **run them in parallel in one shell command**:\n ```\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path & npx vent-hq run -f .vent/suite.vapi.json --call edge-case & wait\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare against the current run: status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%, transcript divergence. Correlate with `git diff` between the two runs\' git SHAs. Skip if no previous run exists.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit agents, prefer the first-party helper instead of manual forwarding:\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n\n## Vent Access Token\n\nSet `VENT_ACCESS_TOKEN` env var or run `npx vent-hq login`.\n';
5907
+ var cursor_default = '---\ndescription: Vent \u2014 Voice agent calls. Run calls against your voice agent, get pass/fail results. Use when the user asks to run voice-agent calls or when you need to verify voice agent behavior after code changes.\nalwaysApply: true\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **Run all calls in parallel in ONE shell command** \u2014 Cursor cannot run multiple shell tool calls concurrently. Instead, launch all calls in a **single** shell command using `&` and `wait`. Example: `npx vent-hq run -f .vent/suite.bland.json --call call-1 & npx vent-hq run -f .vent/suite.bland.json --call call-2 & wait`. Set a 300-second (5 min) timeout. NEVER run calls as separate commands \u2014 they will serialize.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n5. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly \u2014 do NOT run `vent status` afterwards unless you are re-checking a past run.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Subsequent runs \u2014 reuse the existing suite\n\nA matching `.vent/suite.<adapter>.json` already exists? Just re-run it. No need to recreate.\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, **run them in parallel in one shell command**:\n ```\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path & npx vent-hq run -f .vent/suite.vapi.json --call edge-case & wait\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare against the current run. Shape: `{ run_id, timestamp, git_sha, summary, call_results: [...] }`. Each entry in `call_results` is a flat normalized per-call result: `{ name, status, duration_ms, transcript, observed_tool_calls, metrics, cost_usd, ... }`. Compare: `call_results[i].status` flips, `call_results[i].metrics.latency_p50_ms` / `latency_p95_ms` changes >20%, `call_results[i].observed_tool_calls[].successful` count drops, `summary.total_cost_usd` increases >30%, `call_results[i].transcript` divergence. Correlate with `git diff` between the two runs\' `git_sha` values. Skip if no previous run exists.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if your LiveKit agent registered with an explicit dispatch name in the SDK, e.g. Python `@server.rtc_session(agent_name="\u2026")` or `WorkerOptions(agent_name="\u2026")`, Node.js `new ServerOptions({ agentName: "\u2026" })`. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket: user\'s agent must emit a JSON text frame per tool call: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\nLiveKit: use the `@vent-hq/livekit` (Node) or `vent-livekit` (Python) helper. See the "LiveKit Agent Setup" section. The helper captures tool calls automatically from Agents SDK session events \u2014 do not publish on Vent topics manually.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket agents can emit richer observability metadata as JSON text frames:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n\nLiveKit agents get all metadata through the `@vent-hq/livekit` (Node) / `vent-livekit` (Python) helper \u2014 it subscribes to Agents SDK session events (`metrics_collected`, `function_tools_executed`, `conversation_item_added`, `session_usage_updated`, close) and publishes on Vent topics automatically. Transcript and agent-state timing come from native LiveKit room signals (`lk.transcription`, `lk.agent.state`) \u2014 the helper does not duplicate them.\n\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({ ctx, session });\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\n\nThe helper is the only supported integration path for LiveKit Agents SDK agents. Do not publish on `vent:*` topics manually \u2014 let the helper forward SDK events.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check availability.", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400 },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900 },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\nIgnore minor STT mis-transcriptions in `transcript` text (e.g. `"check teach hat"` for `"check that"`, swapped homophones, missing question marks on short tails). These are streaming-STT artifacts, not agent bugs. Judge on semantic intent, not exact spelling. Only flag transcript quality when it prevents understanding what the agent actually said.\n\n### Interruption evaluation\n\nEvaluate interruption handling by reading the transcript and listening to the recording. Flag any turn where the agent ignores a barge-in, repeats itself from scratch, or loses context after being cut off.\n\n| Dimension | How to evaluate |\n|--------|----------------|\n| **Recovery** | After a caller cuts in, does the agent\'s next reply acknowledge or address the barge-in rather than restarting from scratch? |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? |\n| **Overtalk** | Does the agent keep speaking for long after the caller starts, or does it yield promptly? Use the recording to judge. |\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n\n## Vent Access Token\n\nSet `VENT_ACCESS_TOKEN` env var or run `npx vent-hq login`.\n';
6007
5908
 
6008
5909
  // src/skills/codex.md
6009
- var codex_default = '# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Get full results for a completed run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most iterations. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n## Workflow\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the config schema below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n4. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 first start relay, then add --session\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n5. To run multiple calls, **run each as a separate shell command** \u2014 Codex executes parallel shell calls concurrently, so each call runs simultaneously. Example: emit one `npx vent-hq run --call happy-path` and one `npx vent-hq run --call edge-case` as separate tool calls.\n6. After results return, **compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Use `--verbose` only when the default result is not enough to explain the failure. Compare status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%. Skip if no previous run exists.\n7. After code changes, re-run the same way.\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n## Critical Rules\n\n1. **Run all calls in parallel as separate shell commands** \u2014 When a suite has multiple calls, emit each `npx vent-hq run` as its own shell tool call in the same response. Codex runs shell calls concurrently \u2014 they execute simultaneously. Set a 300-second (5 min) timeout on each. Do NOT combine calls into one command with `&`.\n2. **Wait for all results** \u2014 Do not end your response until every call has returned results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit agents, prefer the first-party helper instead of manual forwarding:\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n';
5910
+ var codex_default = '# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n\n## LiveKit Agent Setup (REQUIRED before running LiveKit tests)\n\nWhen the user asks to test, configure, or run anything with a LiveKit agent, you MUST install the Vent LiveKit helper in the agent project and add the instrumentation call to the agent entrypoint BEFORE running any Vent calls.\n- Node.js: `npm install @vent-hq/livekit`, then add `instrumentLiveKitAgent({ ctx, session })` to the agent entrypoint.\n- Python: `pip install vent-livekit`, then add `instrument_livekit_agent(ctx=ctx, session=session)` to the agent entrypoint.\nWithout this, Vent cannot capture metrics, tool calls, usage, or session reports from the LiveKit agent.\n\nBefore running a LiveKit test, verify the agent process is running (`ps aux | grep <agent_process>`). If it\'s not running, start it before attempting any calls.\n\nLiveKit worker restart caveat: After restarting a LiveKit agent process, wait at least 60 seconds before running a Vent call. If calls were attempted against stale workers (failed with "agent did not speak" or "no speech detected"), kill the agent, wait 60 seconds for ALL stale workers to deregister, then restart. Do not restart the agent multiple times in quick succession \u2014 each restart creates another stale worker registration that compounds the problem.\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Get full results for a completed run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most iterations. It already includes:\n- transcript\n- latency\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n## Workflow\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the config schema below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n4. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 first start relay, then add --session\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n5. To run multiple calls, **run each as a separate shell command** \u2014 Codex executes parallel shell calls concurrently, so each call runs simultaneously. Example: emit one `npx vent-hq run --call happy-path` and one `npx vent-hq run --call edge-case` as separate tool calls.\n6. After results return, **compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Shape: `{ run_id, timestamp, git_sha, summary, call_results: [...] }`. Each entry in `call_results` is a flat normalized per-call result: `{ name, status, duration_ms, transcript, observed_tool_calls, metrics, cost_usd, ... }`. Compare: `call_results[i].status` flips, `call_results[i].metrics.latency_p50_ms` / `latency_p95_ms` changes >20%, `call_results[i].observed_tool_calls[].successful` count drops, `summary.total_cost_usd` increases >30%. Correlate with `git diff` between the two runs\' `git_sha` values. Use `--verbose` only when the default result is not enough to explain the failure. Skip if no previous run exists.\n7. After code changes, re-run the same way.\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n## Critical Rules\n\n1. **Run all calls in parallel as separate shell commands** \u2014 When a suite has multiple calls, emit each `npx vent-hq run` as its own shell tool call in the same response. Codex runs shell calls concurrently \u2014 they execute simultaneously. Set a 300-second (5 min) timeout on each. Do NOT combine calls into one command with `&`.\n2. **Wait for all results** \u2014 Do not end your response until every call has returned results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n6. **IMPORTANT: `npx vent-hq` commands auto-load `.env` files \u2014 never use `source .env && export` before running them.** Only your own custom scripts (e.g. `npx tsx my-script.ts`) need manual env loading. To add a new credential, just append it to `.env` and the CLI picks it up automatically on the next run.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID or VAPI_AGENT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| Bland | persona_id | BLAND_PERSONA_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for Retell: Pay-as-you-go includes 20 concurrent calls, with more available on demand; Enterprise has no cap. Ask the user which plan they\'re on. If unknown, default to 20.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID, BLAND_PERSONA_ID. Only add bland_api_key/bland_pathway_id/persona_id to the JSON if those env vars are not already available.\nmax_concurrency for Bland: Start=10, Build=50, Scale=100, Enterprise=unlimited. Ask the user which plan they\'re on. If unknown, default to 10.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID (or VAPI_AGENT_ID). Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: every account includes 10 concurrent call slots by default; self-serve accounts can buy extra reserved lines, and Enterprise includes unlimited concurrency. Set this to the user\'s purchased limit. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\nmax_concurrency for ElevenLabs: Free=4, Starter=6, Creator=10, Pro=20, Scale=30, Business=30. Burst pricing can temporarily allow up to 3x the base limit. Ask the user which plan they\'re on and whether burst is enabled. If unknown, default to 4.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if your LiveKit agent registered with an explicit dispatch name in the SDK, e.g. Python `@server.rtc_session(agent_name="\u2026")` or `WorkerOptions(agent_name="\u2026")`, Node.js `new ServerOptions({ agentName: "\u2026" })`. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency for LiveKit Cloud: Build=5, Ship=20, Scale=50 managed inference sessions. Agent session concurrency can be higher (Build=5, Ship=20, Scale up to 600), but managed inference is the usual gating limit for voice agents. Ask the user which tier they\'re on. If unknown, default to 5.\nKnow the provider/account concurrency limits and use them in planning, but Vent does not enforce provider caps at runtime. Hosted worker throughput is an infra setting: `WORKER_TOTAL_CONCURRENCY` caps one worker Machine.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket: user\'s agent must emit a JSON text frame per tool call: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\nLiveKit: use the `@vent-hq/livekit` (Node) or `vent-livekit` (Python) helper. See the "LiveKit Agent Setup" section. The helper captures tool calls automatically from Agents SDK session events \u2014 do not publish on Vent topics manually.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket agents can emit richer observability metadata as JSON text frames:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0}\n\nLiveKit agents get all metadata through the `@vent-hq/livekit` (Node) / `vent-livekit` (Python) helper \u2014 it subscribes to Agents SDK session events (`metrics_collected`, `function_tools_executed`, `conversation_item_added`, `session_usage_updated`, close) and publishes on Vent topics automatically. Transcript and agent-state timing come from native LiveKit room signals (`lk.transcription`, `lk.agent.state`) \u2014 the helper does not duplicate them.\n\nNode.js \u2014 `npm install @vent-hq/livekit`:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({ ctx, session });\n```\nPython \u2014 `pip install vent-livekit`:\n```python\nfrom vent_livekit import instrument_livekit_agent\n\nvent = instrument_livekit_agent(ctx=ctx, session=session)\n```\n\nThe helper is the only supported integration path for LiveKit Agents SDK agents. Do not publish on `vent:*` topics manually \u2014 let the helper forward SDK events.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check availability.", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400 },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900 },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\nIgnore minor STT mis-transcriptions in `transcript` text (e.g. `"check teach hat"` for `"check that"`, swapped homophones, missing question marks on short tails). These are streaming-STT artifacts, not agent bugs. Judge on semantic intent, not exact spelling. Only flag transcript quality when it prevents understanding what the agent actually said.\n\n### Interruption evaluation\n\nEvaluate interruption handling by reading the transcript and listening to the recording. Flag any turn where the agent ignores a barge-in, repeats itself from scratch, or loses context after being cut off.\n\n| Dimension | How to evaluate |\n|--------|----------------|\n| **Recovery** | After a caller cuts in, does the agent\'s next reply acknowledge or address the barge-in rather than restarting from scratch? |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? |\n| **Overtalk** | Does the agent keep speaking for long after the caller starts, or does it yield promptly? Use the recording to judge. |\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n';
6010
5911
 
6011
5912
  // src/lib/setup.ts
6012
5913
  var SUITE_SCAFFOLD = JSON.stringify(
@@ -6245,7 +6146,7 @@ async function main() {
6245
6146
  return 0;
6246
6147
  }
6247
6148
  if (command === "--version" || command === "-v") {
6248
- const pkg = await import("./package-YEV3QM6F.mjs");
6149
+ const pkg = await import("./package-GZOOKNXW.mjs");
6249
6150
  console.log(`vent-hq ${pkg.default.version}`);
6250
6151
  return 0;
6251
6152
  }
@@ -4,7 +4,7 @@ import "./chunk-XYDL7GY6.mjs";
4
4
  // package.json
5
5
  var package_default = {
6
6
  name: "vent-hq",
7
- version: "0.9.22",
7
+ version: "0.10.0",
8
8
  type: "module",
9
9
  description: "Vent CLI \u2014 CI/CD for voice AI agents",
10
10
  bin: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vent-hq",
3
- "version": "0.9.22",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "description": "Vent CLI — CI/CD for voice AI agents",
6
6
  "bin": {
@@ -9,11 +9,6 @@
9
9
  "files": [
10
10
  "dist"
11
11
  ],
12
- "scripts": {
13
- "build": "node scripts/bundle.mjs",
14
- "clean": "rm -rf dist",
15
- "prepack": "pnpm clean && pnpm build"
16
- },
17
12
  "keywords": [
18
13
  "vent",
19
14
  "cli",
@@ -36,9 +31,13 @@
36
31
  "ws": "^8.18.0"
37
32
  },
38
33
  "devDependencies": {
39
- "@types/ws": "catalog:",
40
- "@vent/relay-client": "workspace:*",
41
- "@vent/shared": "workspace:*",
42
- "esbuild": "catalog:"
34
+ "@types/ws": "^8.5.0",
35
+ "esbuild": "^0.24.0",
36
+ "@vent/relay-client": "0.1.0",
37
+ "@vent/shared": "0.0.1"
38
+ },
39
+ "scripts": {
40
+ "build": "node scripts/bundle.mjs",
41
+ "clean": "rm -rf dist"
43
42
  }
44
- }
43
+ }