vent-hq 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -73,8 +73,8 @@ import * as path from "node:path";
73
73
  import { homedir } from "node:os";
74
74
  var CONFIG_DIR = path.join(homedir(), ".vent");
75
75
  var CREDENTIALS_FILE = path.join(CONFIG_DIR, "credentials");
76
- var API_BASE = process.env.VENT_API_URL ?? "https://vent-api.fly.dev";
77
- var DASHBOARD_URL = process.env.VENT_DASHBOARD_URL ?? "https://ventmcp.dev";
76
+ var API_BASE = process.env.VENT_API_URL ?? "https://api.venthq.dev";
77
+ var DASHBOARD_URL = process.env.VENT_DASHBOARD_URL ?? "https://venthq.dev";
78
78
  async function loadAccessToken() {
79
79
  if (process.env.VENT_ACCESS_TOKEN) return process.env.VENT_ACCESS_TOKEN;
80
80
  try {
@@ -152,372 +152,10 @@ function openBrowser(url) {
152
152
 
153
153
  // src/lib/output.ts
154
154
  import { writeFileSync } from "node:fs";
155
- var isTTY = process.stdout.isTTY;
156
- var _verbose = false;
157
- function debug(msg) {
158
- if (!_verbose) return;
159
- const ts = (/* @__PURE__ */ new Date()).toISOString().slice(11, 23);
160
- process.stderr.write(`[vent ${ts}] ${msg}
161
- `);
162
- }
163
- function isVerbose() {
164
- return _verbose;
165
- }
166
- function stdoutSync(data) {
167
- if (isTTY) {
168
- process.stdout.write(data);
169
- } else {
170
- try {
171
- writeFileSync(1, data);
172
- } catch {
173
- process.stdout.write(data);
174
- }
175
- }
176
- }
177
- var bold = (s) => isTTY ? `\x1B[1m${s}\x1B[0m` : s;
178
- var dim = (s) => isTTY ? `\x1B[2m${s}\x1B[0m` : s;
179
- var green = (s) => isTTY ? `\x1B[32m${s}\x1B[0m` : s;
180
- var red = (s) => isTTY ? `\x1B[31m${s}\x1B[0m` : s;
181
- var blue = (s) => isTTY ? `\x1B[34m${s}\x1B[0m` : s;
182
- function printEvent(event) {
183
- if (!isTTY) return;
184
- const meta = event.metadata_json ?? {};
185
- switch (event.event_type) {
186
- case "call_completed":
187
- printCallResult(meta);
188
- break;
189
- case "run_complete":
190
- printRunComplete(meta);
191
- break;
192
- case "call_started": {
193
- const name = meta.call_name ?? "call";
194
- process.stderr.write(dim(` \u25B8 ${name}\u2026`) + "\n");
195
- break;
196
- }
197
- default:
198
- process.stderr.write(dim(` [${event.event_type}]`) + "\n");
199
- }
200
- }
201
- function printCallResult(meta) {
202
- const result = meta.result;
203
- const callName = result?.name ?? meta.call_name ?? "call";
204
- const callStatus = result?.status ?? meta.status;
205
- const durationMs = result?.duration_ms ?? meta.duration_ms;
206
- const statusIcon = callStatus === "completed" || callStatus === "pass" ? green("\u2714") : red("\u2718");
207
- const duration = durationMs != null ? (durationMs / 1e3).toFixed(1) + "s" : "\u2014";
208
- const parts = [statusIcon, bold(callName), dim(duration)];
209
- if (result?.latency?.p50_response_time_ms != null) {
210
- parts.push(`p50: ${result.latency.p50_response_time_ms}ms`);
211
- }
212
- if (result?.call_metadata?.transfer_attempted) {
213
- const transferLabel = result.call_metadata.transfer_completed ? "transfer: completed" : "transfer: attempted";
214
- parts.push(transferLabel);
215
- }
216
- stdoutSync(parts.join(" ") + "\n");
217
- }
218
- function printRunComplete(meta) {
219
- const status = meta.status;
220
- const agg = meta.aggregate;
221
- const counts = agg?.conversation_calls;
222
- const total = meta.total_calls ?? counts?.total;
223
- const passed = meta.passed_calls ?? counts?.passed;
224
- const failed = meta.failed_calls ?? counts?.failed;
225
- stdoutSync("\n");
226
- if (status === "pass") {
227
- stdoutSync(green(bold("Run passed")) + "\n");
228
- } else {
229
- stdoutSync(red(bold("Run failed")) + "\n");
230
- }
231
- if (total != null) {
232
- const parts = [];
233
- if (passed) parts.push(green(`${passed} passed`));
234
- if (failed) parts.push(red(`${failed} failed`));
235
- parts.push(`${total} total`);
236
- stdoutSync(parts.join(dim(" \xB7 ")) + "\n");
237
- }
238
- }
239
- function printSummary(callResults, runComplete, runId) {
240
- const allCalls = callResults.map((e) => {
241
- const meta = e.metadata_json ?? {};
242
- const r = meta.result;
243
- if (r) return r;
244
- return {
245
- name: meta.call_name ?? "call",
246
- status: meta.status ?? "unknown",
247
- duration_ms: meta.duration_ms,
248
- error: null
249
- };
250
- });
251
- const agg = runComplete.aggregate;
252
- const counts = agg?.conversation_calls;
253
- const summaryData = {
254
- run_id: runId,
255
- status: runComplete.status,
256
- total: runComplete.total_calls ?? counts?.total,
257
- passed: runComplete.passed_calls ?? counts?.passed,
258
- failed: runComplete.failed_calls ?? counts?.failed,
259
- calls: allCalls
260
- };
261
- if (!isTTY) {
262
- stdoutSync(JSON.stringify(summaryData, null, 2) + "\n");
263
- return;
264
- }
265
- const failures = allCalls.filter((t2) => t2.status && t2.status !== "completed" && t2.status !== "pass");
266
- if (failures.length > 0) {
267
- stdoutSync("\n" + bold("Failed calls:") + "\n");
268
- for (const t2 of failures) {
269
- const duration = t2.duration_ms != null ? (t2.duration_ms / 1e3).toFixed(1) + "s" : "\u2014";
270
- const parts = [red("\u2718"), bold(t2.name ?? "call"), dim(duration)];
271
- stdoutSync(" " + parts.join(" ") + "\n");
272
- }
273
- }
274
- process.stderr.write(dim(`Full details: vent status ${runId} --json`) + "\n");
275
- }
276
- function printError(message) {
277
- const line = red(bold("error")) + ` ${message}
278
- `;
279
- process.stderr.write(line);
280
- if (!isTTY) {
281
- stdoutSync(line);
282
- }
283
- }
284
- function printInfo(message, { force } = {}) {
285
- if (!force && !isTTY && !_verbose) return;
286
- const line = blue("\u25B8") + ` ${message}
287
- `;
288
- process.stderr.write(line);
289
- if (!isTTY && force) stdoutSync(line);
290
- }
291
- function printSuccess(message, { force } = {}) {
292
- if (!force && !isTTY && !_verbose) return;
293
- const line = green("\u2714") + ` ${message}
294
- `;
295
- process.stderr.write(line);
296
- if (!isTTY && force) stdoutSync(line);
297
- }
298
-
299
- // src/lib/auth.ts
300
- var POLL_INTERVAL_MS = 2e3;
301
- function sleep(ms) {
302
- return new Promise((r) => setTimeout(r, ms));
303
- }
304
- async function deviceAuthFlow() {
305
- let startData;
306
- try {
307
- const res = await fetch(`${API_BASE}/device/start`, { method: "POST" });
308
- if (!res.ok) {
309
- return { ok: false, error: `Failed to start device auth: ${res.status}` };
310
- }
311
- startData = await res.json();
312
- } catch {
313
- return { ok: false, error: "Could not reach Vent API. Check your connection." };
314
- }
315
- printInfo(`Your authorization code: ${startData.user_code}`, { force: true });
316
- printInfo(`Opening browser to log in...`, { force: true });
317
- printInfo(`If the browser doesn't open, visit: ${startData.verification_url}`, { force: true });
318
- openBrowser(startData.verification_url);
319
- const deadline = new Date(startData.expires_at).getTime();
320
- while (Date.now() < deadline) {
321
- await sleep(POLL_INTERVAL_MS);
322
- try {
323
- const res = await fetch(`${API_BASE}/device/exchange`, {
324
- method: "POST",
325
- headers: { "Content-Type": "application/json" },
326
- body: JSON.stringify({ session_id: startData.session_id })
327
- });
328
- if (!res.ok) continue;
329
- const data = await res.json();
330
- const accessToken = data.access_token;
331
- if (data.status === "approved" && accessToken) {
332
- await saveAccessToken(accessToken);
333
- return { ok: true, accessToken };
334
- }
335
- if (data.status === "expired") {
336
- return { ok: false, error: "Session expired. Run `npx vent-hq login` again." };
337
- }
338
- if (data.status === "consumed" || data.status === "invalid") {
339
- return { ok: false, error: "Session invalid. Run `npx vent-hq login` again." };
340
- }
341
- } catch {
342
- }
343
- }
344
- return { ok: false, error: "Login timed out. Run `npx vent-hq login` again." };
345
- }
346
-
347
- // src/lib/sse.ts
348
- function log(msg) {
349
- if (!isVerbose()) return;
350
- const ts = (/* @__PURE__ */ new Date()).toISOString().slice(11, 23);
351
- const line = `[vent:sse ${ts}] ${msg}
352
- `;
353
- process.stderr.write(line);
354
- }
355
- var MAX_RETRIES = 5;
356
- var RETRY_DELAY_MS = 2e3;
357
- async function* streamRunEvents(runId, apiKey, signal) {
358
- const url = `${API_BASE}/runs/${runId}/stream`;
359
- const seenIds = /* @__PURE__ */ new Set();
360
- let retries = 0;
361
- while (retries <= MAX_RETRIES) {
362
- if (retries > 0) {
363
- log(`reconnecting (attempt ${retries}/${MAX_RETRIES}) after ${RETRY_DELAY_MS}ms\u2026`);
364
- await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
365
- }
366
- log(`connecting to ${url}`);
367
- let res;
368
- try {
369
- res = await fetch(url, {
370
- headers: { Authorization: `Bearer ${apiKey}` },
371
- signal
372
- });
373
- } catch (err) {
374
- if (err.name === "AbortError") throw err;
375
- log(`fetch error: ${err.message}`);
376
- retries++;
377
- continue;
378
- }
379
- log(`response: status=${res.status} content-type=${res.headers.get("content-type")}`);
380
- if (!res.ok) {
381
- const body = await res.text();
382
- log(`error body: ${body}`);
383
- throw new Error(`SSE stream failed (${res.status}): ${body}`);
384
- }
385
- if (!res.body) {
386
- throw new Error("SSE stream returned no body");
387
- }
388
- const reader = res.body.getReader();
389
- const decoder = new TextDecoder();
390
- let buffer = "";
391
- let chunkCount = 0;
392
- let eventCount = 0;
393
- let gotRunComplete = false;
394
- let streamError = null;
395
- try {
396
- while (true) {
397
- let readResult;
398
- try {
399
- readResult = await reader.read();
400
- } catch (err) {
401
- if (err.name === "AbortError") throw err;
402
- streamError = err;
403
- log(`read error: ${streamError.message}`);
404
- break;
405
- }
406
- const { done, value } = readResult;
407
- if (done) {
408
- log(`stream done after ${chunkCount} chunks, ${eventCount} events`);
409
- break;
410
- }
411
- chunkCount++;
412
- const chunk = decoder.decode(value, { stream: true });
413
- buffer += chunk;
414
- if (chunkCount <= 3 || chunkCount % 10 === 0) {
415
- log(`chunk #${chunkCount} (${chunk.length} bytes) buffer=${buffer.length} bytes`);
416
- }
417
- const lines = buffer.split("\n");
418
- buffer = lines.pop();
419
- for (const line of lines) {
420
- if (line.startsWith("data: ")) {
421
- const raw = line.slice(6);
422
- try {
423
- const event = JSON.parse(raw);
424
- eventCount++;
425
- if (event.id && seenIds.has(event.id)) {
426
- log(`skipping duplicate event ${event.id}`);
427
- continue;
428
- }
429
- if (event.id) seenIds.add(event.id);
430
- log(`parsed event #${eventCount}: type=${event.event_type}`);
431
- yield event;
432
- if (event.event_type === "run_complete") {
433
- log("run_complete received \u2014 closing stream");
434
- gotRunComplete = true;
435
- return;
436
- }
437
- } catch {
438
- log(`malformed JSON: ${raw.slice(0, 200)}`);
439
- }
440
- } else if (line.startsWith(": ")) {
441
- if (chunkCount <= 3) {
442
- log(`heartbeat: "${line}"`);
443
- }
444
- }
445
- }
446
- }
447
- } finally {
448
- reader.releaseLock();
449
- log("reader released");
450
- }
451
- if (gotRunComplete) return;
452
- retries++;
453
- if (retries <= MAX_RETRIES) {
454
- log(`stream ended without run_complete \u2014 will retry (${retries}/${MAX_RETRIES})`);
455
- }
456
- }
457
- log(`exhausted ${MAX_RETRIES} retries without run_complete`);
458
- }
459
-
460
- // src/lib/run-history.ts
461
- import * as fs2 from "node:fs/promises";
462
- import * as path2 from "node:path";
463
- import { execSync } from "node:child_process";
464
- function gitInfo() {
465
- try {
466
- const sha = execSync("git rev-parse HEAD", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
467
- const branch = execSync("git branch --show-current", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim() || null;
468
- const status = execSync("git status --porcelain", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
469
- return { sha, branch, dirty: status.length > 0 };
470
- } catch {
471
- return { sha: null, branch: null, dirty: false };
472
- }
473
- }
474
- async function saveRunHistory(runId, callResults, runCompleteData) {
475
- try {
476
- const dir = path2.join(process.cwd(), ".vent", "runs");
477
- await fs2.mkdir(dir, { recursive: true });
478
- const git = gitInfo();
479
- const now = /* @__PURE__ */ new Date();
480
- const timestamp = now.toISOString().replace(/[:.]/g, "-").slice(0, 19);
481
- const shortId = runId.slice(0, 8);
482
- const aggregate = runCompleteData.aggregate;
483
- const convCalls = aggregate?.conversation_calls;
484
- const total = convCalls?.total ?? 0;
485
- const passed = convCalls?.passed ?? 0;
486
- const failed = convCalls?.failed ?? 0;
487
- const entry = {
488
- run_id: runId,
489
- timestamp: now.toISOString(),
490
- git_sha: git.sha,
491
- git_branch: git.branch,
492
- git_dirty: git.dirty,
493
- summary: {
494
- status: runCompleteData.status ?? "unknown",
495
- calls_total: total,
496
- calls_passed: passed,
497
- calls_failed: failed,
498
- total_duration_ms: aggregate?.total_duration_ms,
499
- total_cost_usd: aggregate?.total_cost_usd
500
- },
501
- call_results: callResults.map((e) => e.metadata_json ?? {})
502
- };
503
- const filename = `${timestamp}_${shortId}.json`;
504
- const filepath = path2.join(dir, filename);
505
- await fs2.writeFile(filepath, JSON.stringify(entry, null, 2) + "\n");
506
- return filepath;
507
- } catch {
508
- return null;
509
- }
510
- }
511
155
 
512
156
  // ../shared/src/types.ts
513
- var AUDIO_CALL_NAMES = [
514
- "audio_quality",
515
- "latency",
516
- "echo"
517
- ];
518
157
  var AUDIO_ACTION_TYPES = [
519
158
  "interrupt",
520
- "silence",
521
159
  "inject_noise",
522
160
  "split_sentence",
523
161
  "noise_on_caller"
@@ -4565,7 +4203,6 @@ var coerce = {
4565
4203
  var NEVER = INVALID;
4566
4204
 
4567
4205
  // ../shared/src/schemas.ts
4568
- var AudioCallNameSchema = external_exports.enum(AUDIO_CALL_NAMES);
4569
4206
  var AudioActionSchema = external_exports.object({
4570
4207
  at_turn: external_exports.number().int().min(0),
4571
4208
  action: external_exports.enum(AUDIO_ACTION_TYPES),
@@ -4631,6 +4268,7 @@ var ObservedToolCallSchema = external_exports.object({
4631
4268
  arguments: external_exports.record(external_exports.unknown()),
4632
4269
  result: external_exports.unknown().optional(),
4633
4270
  successful: external_exports.boolean().optional(),
4271
+ provider_tool_type: external_exports.string().optional(),
4634
4272
  timestamp_ms: external_exports.number().optional(),
4635
4273
  latency_ms: external_exports.number().optional(),
4636
4274
  turn_index: external_exports.number().int().min(0).optional()
@@ -4732,19 +4370,6 @@ var AudioAnalysisWarningSchema = external_exports.object({
4732
4370
  severity: external_exports.enum(["warning", "critical"]),
4733
4371
  message: external_exports.string()
4734
4372
  });
4735
- var CallDiagnosticsSchema = external_exports.object({
4736
- error_origin: external_exports.enum(["platform", "agent"]).nullable(),
4737
- error_detail: external_exports.string().nullable(),
4738
- timing: external_exports.object({
4739
- channel_connect_ms: external_exports.number()
4740
- }),
4741
- channel: external_exports.object({
4742
- connected: external_exports.boolean(),
4743
- error_events: external_exports.array(external_exports.string()),
4744
- audio_bytes_sent: external_exports.number(),
4745
- audio_bytes_received: external_exports.number()
4746
- })
4747
- });
4748
4373
  var ConversationTurnSchema = external_exports.object({
4749
4374
  role: external_exports.enum(["caller", "agent"]),
4750
4375
  text: external_exports.string(),
@@ -4760,7 +4385,8 @@ var ConversationTurnSchema = external_exports.object({
4760
4385
  component_latency: external_exports.object({
4761
4386
  stt_ms: external_exports.number().optional(),
4762
4387
  llm_ms: external_exports.number().optional(),
4763
- tts_ms: external_exports.number().optional()
4388
+ tts_ms: external_exports.number().optional(),
4389
+ speech_duration_ms: external_exports.number().optional()
4764
4390
  }).optional(),
4765
4391
  platform_transcript: external_exports.string().optional(),
4766
4392
  interrupted: external_exports.boolean().optional(),
@@ -4772,7 +4398,8 @@ var HallucinationEventSchema = external_exports.object({
4772
4398
  hypothesis_text: external_exports.string()
4773
4399
  });
4774
4400
  var TranscriptMetricsSchema = external_exports.object({
4775
- wer: external_exports.number().min(0).max(1).optional(),
4401
+ wer: external_exports.number().min(0).optional(),
4402
+ cer: external_exports.number().min(0).optional(),
4776
4403
  hallucination_events: external_exports.array(HallucinationEventSchema).optional(),
4777
4404
  repetition_score: external_exports.number().min(0).max(1).optional(),
4778
4405
  reprompt_count: external_exports.number().int().min(0).optional(),
@@ -4807,7 +4434,7 @@ var AudioAnalysisMetricsSchema = external_exports.object({
4807
4434
  talk_ratio_vad: external_exports.number(),
4808
4435
  interruption_rate: external_exports.number().min(0).max(1),
4809
4436
  interruption_count: external_exports.number().int().min(0),
4810
- barge_in_recovery_time_ms: external_exports.number().min(0).optional(),
4437
+ agent_overtalk_after_barge_in_ms: external_exports.number().min(0).optional(),
4811
4438
  agent_interrupting_user_rate: external_exports.number().min(0).max(1),
4812
4439
  agent_interrupting_user_count: external_exports.number().int().min(0),
4813
4440
  missed_response_windows: external_exports.number().int().min(0),
@@ -4885,6 +4512,11 @@ var CostBreakdownSchema = external_exports.object({
4885
4512
  llm_prompt_tokens: external_exports.number().int().optional(),
4886
4513
  llm_completion_tokens: external_exports.number().int().optional()
4887
4514
  });
4515
+ var ProviderWarningSchema = external_exports.object({
4516
+ message: external_exports.string().optional(),
4517
+ code: external_exports.string().optional(),
4518
+ detail: external_exports.unknown().optional()
4519
+ });
4888
4520
  var CallTransferSchema = external_exports.object({
4889
4521
  type: external_exports.string(),
4890
4522
  destination: external_exports.string().optional(),
@@ -4894,16 +4526,17 @@ var CallTransferSchema = external_exports.object({
4894
4526
  });
4895
4527
  var CallMetadataSchema = external_exports.object({
4896
4528
  platform: external_exports.string(),
4529
+ provider_call_id: external_exports.string().optional(),
4530
+ provider_session_id: external_exports.string().optional(),
4897
4531
  ended_reason: external_exports.string().optional(),
4898
- duration_s: external_exports.number().optional(),
4899
4532
  cost_usd: external_exports.number().optional(),
4900
4533
  cost_breakdown: CostBreakdownSchema.optional(),
4901
- recording_url: external_exports.string().nullable().optional(),
4902
- summary: external_exports.string().nullable().optional(),
4903
- success_evaluation: external_exports.string().nullable().optional(),
4904
- user_sentiment: external_exports.string().nullable().optional(),
4905
- call_successful: external_exports.boolean().optional(),
4534
+ recording_url: external_exports.string().optional(),
4535
+ recording_variants: external_exports.record(external_exports.string()).optional(),
4536
+ provider_debug_urls: external_exports.record(external_exports.string()).optional(),
4906
4537
  variables: external_exports.record(external_exports.unknown()).optional(),
4538
+ provider_warnings: external_exports.array(ProviderWarningSchema).optional(),
4539
+ provider_metadata: external_exports.record(external_exports.unknown()).optional(),
4907
4540
  transfers: external_exports.array(CallTransferSchema).optional()
4908
4541
  });
4909
4542
  var ConversationMetricsSchema = external_exports.object({
@@ -4920,15 +4553,6 @@ var ConversationMetricsSchema = external_exports.object({
4920
4553
  harness_overhead: HarnessOverheadSchema.optional(),
4921
4554
  component_latency: ComponentLatencyMetricsSchema.optional()
4922
4555
  });
4923
- var AudioCallResultSchema = external_exports.object({
4924
- call_name: AudioCallNameSchema,
4925
- status: external_exports.enum(["completed", "error"]),
4926
- metrics: external_exports.record(external_exports.union([external_exports.number(), external_exports.boolean(), external_exports.array(external_exports.number())])),
4927
- transcriptions: external_exports.record(external_exports.union([external_exports.string(), external_exports.array(external_exports.string()), external_exports.null()])),
4928
- duration_ms: external_exports.number(),
4929
- error: external_exports.string().optional(),
4930
- diagnostics: CallDiagnosticsSchema.optional()
4931
- });
4932
4556
  var ConversationCallResultSchema = external_exports.object({
4933
4557
  name: external_exports.string().optional(),
4934
4558
  caller_prompt: external_exports.string(),
@@ -4958,6 +4582,653 @@ var RunnerCallbackV2Schema = external_exports.object({
4958
4582
  error_text: external_exports.string().optional()
4959
4583
  });
4960
4584
 
4585
+ // ../shared/src/format-result.ts
4586
+ function formatConversationResult(raw, options = {}) {
4587
+ if (!raw || typeof raw !== "object") return null;
4588
+ const r = raw;
4589
+ if (typeof r.caller_prompt !== "string") return null;
4590
+ const debug2 = options.verbose ? formatDebug(r) : void 0;
4591
+ return {
4592
+ name: r.name ?? null,
4593
+ status: r.status,
4594
+ caller_prompt: r.caller_prompt,
4595
+ duration_ms: r.duration_ms,
4596
+ error: r.error ?? null,
4597
+ transcript: formatTranscript(r.transcript, options),
4598
+ latency: r.metrics?.latency ? formatLatency(r.metrics.latency, r.metrics) : null,
4599
+ transcript_quality: r.metrics?.transcript && hasContent(r.metrics.transcript) ? r.metrics.transcript : null,
4600
+ audio_analysis: r.metrics?.audio_analysis && hasContent(r.metrics.audio_analysis) ? formatAudioAnalysis(r.metrics.audio_analysis) : null,
4601
+ tool_calls: formatToolCalls(r.metrics?.tool_calls, r.observed_tool_calls),
4602
+ component_latency: formatComponentLatency(r.metrics?.component_latency),
4603
+ call_metadata: formatCallMetadata(r.call_metadata),
4604
+ warnings: dedupeStrings([
4605
+ ...(r.metrics?.audio_analysis_warnings ?? []).map((w) => w.message),
4606
+ ...(r.metrics?.prosody_warnings ?? []).map((w) => w.message),
4607
+ ...formatProviderWarningMessages(r.call_metadata?.provider_warnings)
4608
+ ]),
4609
+ audio_actions: r.audio_action_results ?? [],
4610
+ emotion: r.metrics?.prosody ? formatEmotion(r.metrics.prosody) : null,
4611
+ ...debug2 ? { debug: debug2 } : {}
4612
+ };
4613
+ }
4614
+ function formatTranscript(turns, options) {
4615
+ if (!turns) return [];
4616
+ return turns.map((t2) => {
4617
+ const turn = {
4618
+ role: t2.role,
4619
+ text: t2.text
4620
+ };
4621
+ if (t2.ttfb_ms != null) turn.ttfb_ms = t2.ttfb_ms;
4622
+ if (t2.ttfw_ms != null) turn.ttfw_ms = t2.ttfw_ms;
4623
+ if (t2.audio_duration_ms != null) turn.audio_duration_ms = t2.audio_duration_ms;
4624
+ if (t2.interrupted != null) turn.interrupted = t2.interrupted;
4625
+ if (t2.is_interruption != null) turn.is_interruption = t2.is_interruption;
4626
+ if (options.verbose) {
4627
+ const debug2 = compactUnknownRecord({
4628
+ timestamp_ms: t2.timestamp_ms,
4629
+ caller_decision_mode: t2.caller_decision_mode,
4630
+ silence_pad_ms: t2.silence_pad_ms,
4631
+ stt_confidence: t2.stt_confidence,
4632
+ harness_tts_ms: t2.tts_ms,
4633
+ harness_stt_ms: t2.stt_ms,
4634
+ component_latency: t2.component_latency,
4635
+ platform_transcript: t2.platform_transcript
4636
+ });
4637
+ if (debug2 && Object.keys(debug2).length > 0) {
4638
+ turn.debug = debug2;
4639
+ }
4640
+ }
4641
+ return turn;
4642
+ });
4643
+ }
4644
+ function formatLatency(latency, metrics) {
4645
+ const hasTtfw = metrics.mean_ttfw_ms != null && latency.p50_ttfw_ms != null && latency.p95_ttfw_ms != null;
4646
+ const responseTimeSource = hasTtfw ? "ttfw" : "ttfb";
4647
+ const result = {
4648
+ response_time_ms: hasTtfw ? metrics.mean_ttfw_ms : metrics.mean_ttfb_ms,
4649
+ response_time_source: responseTimeSource,
4650
+ p50_response_time_ms: hasTtfw ? latency.p50_ttfw_ms : latency.p50_ttfb_ms,
4651
+ p90_response_time_ms: hasTtfw ? latency.p90_ttfw_ms ?? latency.p90_ttfb_ms : latency.p90_ttfb_ms,
4652
+ p95_response_time_ms: hasTtfw ? latency.p95_ttfw_ms : latency.p95_ttfb_ms,
4653
+ p99_response_time_ms: hasTtfw ? latency.p99_ttfw_ms ?? latency.p99_ttfb_ms : latency.p99_ttfb_ms,
4654
+ first_response_time_ms: hasTtfw ? latency.first_turn_ttfw_ms ?? latency.first_turn_ttfb_ms : latency.first_turn_ttfb_ms,
4655
+ total_silence_ms: latency.total_silence_ms,
4656
+ mean_turn_gap_ms: latency.mean_turn_gap_ms
4657
+ };
4658
+ if (hasTtfw) {
4659
+ result.mean_ttfw_ms = metrics.mean_ttfw_ms;
4660
+ result.p50_ttfw_ms = latency.p50_ttfw_ms;
4661
+ result.p90_ttfw_ms = latency.p90_ttfw_ms ?? latency.p90_ttfb_ms;
4662
+ result.p95_ttfw_ms = latency.p95_ttfw_ms;
4663
+ result.p99_ttfw_ms = latency.p99_ttfw_ms ?? latency.p99_ttfb_ms;
4664
+ result.first_turn_ttfw_ms = latency.first_turn_ttfw_ms ?? latency.first_turn_ttfb_ms;
4665
+ }
4666
+ if (latency.drift_slope_ms_per_turn != null) result.drift_slope_ms_per_turn = latency.drift_slope_ms_per_turn;
4667
+ if (latency.mean_silence_pad_ms != null) result.mean_silence_pad_ms = latency.mean_silence_pad_ms;
4668
+ if (latency.mouth_to_ear_est_ms != null) result.mouth_to_ear_est_ms = latency.mouth_to_ear_est_ms;
4669
+ return result;
4670
+ }
4671
+ function formatAudioAnalysis(audio) {
4672
+ return {
4673
+ caller_talk_time_ms: audio.caller_talk_time_ms,
4674
+ agent_talk_time_ms: audio.agent_talk_time_ms,
4675
+ agent_speech_ratio: audio.agent_speech_ratio,
4676
+ talk_ratio_vad: audio.talk_ratio_vad,
4677
+ interruption_rate: audio.interruption_rate,
4678
+ interruption_count: audio.interruption_count,
4679
+ agent_overtalk_after_barge_in_ms: audio.agent_overtalk_after_barge_in_ms,
4680
+ agent_interrupting_user_rate: audio.agent_interrupting_user_rate,
4681
+ agent_interrupting_user_count: audio.agent_interrupting_user_count,
4682
+ missed_response_windows: audio.missed_response_windows,
4683
+ longest_monologue_ms: audio.longest_monologue_ms,
4684
+ silence_gaps_over_2s: audio.silence_gaps_over_2s,
4685
+ total_internal_silence_ms: audio.total_internal_silence_ms,
4686
+ mean_agent_speech_segment_ms: audio.mean_agent_speech_segment_ms
4687
+ };
4688
+ }
4689
+ function formatToolCalls(summary, observed) {
4690
+ return {
4691
+ total: summary?.total ?? observed?.length ?? 0,
4692
+ successful: summary?.successful ?? observed?.filter((c) => c.successful).length ?? 0,
4693
+ failed: summary?.failed ?? observed?.filter((c) => c.successful === false).length ?? 0,
4694
+ mean_latency_ms: summary?.mean_latency_ms,
4695
+ names: summary?.names ?? [...new Set((observed ?? []).map((c) => c.name))],
4696
+ observed: (observed ?? []).map((c) => ({
4697
+ name: c.name,
4698
+ arguments: c.arguments,
4699
+ result: c.result,
4700
+ successful: c.successful,
4701
+ provider_tool_type: c.provider_tool_type,
4702
+ latency_ms: c.latency_ms,
4703
+ turn_index: c.turn_index
4704
+ }))
4705
+ };
4706
+ }
4707
+ function formatEmotion(prosody) {
4708
+ return {
4709
+ naturalness: prosody.naturalness,
4710
+ mean_calmness: prosody.mean_calmness,
4711
+ mean_confidence: prosody.mean_confidence,
4712
+ peak_frustration: prosody.peak_frustration,
4713
+ emotion_trajectory: prosody.emotion_trajectory
4714
+ };
4715
+ }
4716
+ function formatComponentLatency(cl) {
4717
+ if (!cl) return null;
4718
+ const speechDurations = cl.per_turn.map((t2) => t2.speech_duration_ms).filter((v) => v != null);
4719
+ const meanSpeech = speechDurations.length > 0 ? Math.round(speechDurations.reduce((a, b) => a + b, 0) / speechDurations.length) : void 0;
4720
+ return {
4721
+ mean_stt_ms: cl.mean_stt_ms,
4722
+ mean_llm_ms: cl.mean_llm_ms,
4723
+ mean_tts_ms: cl.mean_tts_ms,
4724
+ p95_stt_ms: cl.p95_stt_ms,
4725
+ p95_llm_ms: cl.p95_llm_ms,
4726
+ p95_tts_ms: cl.p95_tts_ms,
4727
+ mean_speech_duration_ms: meanSpeech,
4728
+ bottleneck: cl.bottleneck
4729
+ };
4730
+ }
4731
+ function formatCallMetadata(meta) {
4732
+ if (!meta) return null;
4733
+ const transfers = meta.transfers?.map((transfer) => {
4734
+ const formattedTransfer = {
4735
+ type: transfer.type,
4736
+ destination: transfer.destination,
4737
+ status: transfer.status,
4738
+ sources: transfer.sources
4739
+ };
4740
+ if (transfer.timestamp_ms != null) {
4741
+ formattedTransfer.timestamp_ms = transfer.timestamp_ms;
4742
+ }
4743
+ return formattedTransfer;
4744
+ });
4745
+ const result = {
4746
+ platform: meta.platform,
4747
+ provider_call_id: meta.provider_call_id,
4748
+ provider_session_id: meta.provider_session_id,
4749
+ ended_reason: meta.ended_reason,
4750
+ cost_usd: meta.cost_usd,
4751
+ cost_breakdown: meta.cost_breakdown,
4752
+ recording_url: meta.recording_url,
4753
+ recording_variants: meta.recording_variants,
4754
+ provider_debug_urls: meta.provider_debug_urls,
4755
+ variables: meta.variables
4756
+ };
4757
+ if (transfers && transfers.length > 0) {
4758
+ const completedTransferCount = transfers.filter((transfer) => transfer.status === "completed").length;
4759
+ const transferCompleted = completedTransferCount > 0;
4760
+ result.transfer_attempted = true;
4761
+ result.transfer_completed = transferCompleted;
4762
+ result.escalated = transferCompleted;
4763
+ result.transfer_count = transfers.length;
4764
+ result.completed_transfer_count = completedTransferCount;
4765
+ result.transfers = transfers;
4766
+ }
4767
+ return result;
4768
+ }
4769
+ function formatDebug(result) {
4770
+ const debug2 = compactUnknownRecord({
4771
+ signal_quality: result.metrics?.signal_quality,
4772
+ harness_overhead: result.metrics?.harness_overhead,
4773
+ prosody: result.metrics?.prosody,
4774
+ audio_analysis_warnings: nonEmptyArray(result.metrics?.audio_analysis_warnings),
4775
+ prosody_warnings: nonEmptyArray(result.metrics?.prosody_warnings),
4776
+ provider_warnings: nonEmptyArray(result.call_metadata?.provider_warnings),
4777
+ component_latency_per_turn: nonEmptyArray(result.metrics?.component_latency?.per_turn),
4778
+ observed_tool_calls: formatDebugToolCalls(result.observed_tool_calls),
4779
+ provider_metadata: result.call_metadata?.provider_metadata
4780
+ });
4781
+ return debug2 && Object.keys(debug2).length > 0 ? debug2 : void 0;
4782
+ }
4783
+ function formatDebugToolCalls(observed) {
4784
+ if (!observed || observed.length === 0) return void 0;
4785
+ return observed.map((call) => ({
4786
+ name: call.name,
4787
+ arguments: call.arguments,
4788
+ result: call.result,
4789
+ successful: call.successful,
4790
+ provider_tool_type: call.provider_tool_type,
4791
+ timestamp_ms: call.timestamp_ms,
4792
+ latency_ms: call.latency_ms,
4793
+ turn_index: call.turn_index
4794
+ }));
4795
+ }
4796
+ function nonEmptyArray(value) {
4797
+ return value && value.length > 0 ? value : void 0;
4798
+ }
4799
+ function formatProviderWarningMessages(warnings) {
4800
+ if (!warnings || warnings.length === 0) return [];
4801
+ return warnings.map((warning) => warning.message ?? warning.code).filter((message) => typeof message === "string" && message.length > 0);
4802
+ }
4803
+ function dedupeStrings(values) {
4804
+ return [...new Set(values)];
4805
+ }
4806
+ function compactUnknownRecord(record) {
4807
+ const entries = Object.entries(record).filter(([, value]) => value != null);
4808
+ return entries.length > 0 ? Object.fromEntries(entries) : void 0;
4809
+ }
4810
+ function hasContent(obj) {
4811
+ return Object.values(obj).some((v) => v != null);
4812
+ }
4813
+
4814
+ // src/lib/output.ts
4815
+ var isTTY = process.stdout.isTTY;
4816
+ var _verbose = false;
4817
+ function debug(msg) {
4818
+ if (!_verbose) return;
4819
+ const ts = (/* @__PURE__ */ new Date()).toISOString().slice(11, 23);
4820
+ stdoutSync(`[vent ${ts}] ${msg}
4821
+ `);
4822
+ }
4823
+ function isVerbose() {
4824
+ return _verbose;
4825
+ }
4826
+ function stdoutSync(data) {
4827
+ if (isTTY) {
4828
+ process.stdout.write(data);
4829
+ } else {
4830
+ try {
4831
+ writeFileSync(1, data);
4832
+ } catch {
4833
+ process.stdout.write(data);
4834
+ }
4835
+ }
4836
+ }
4837
+ function writeJsonStdout(value) {
4838
+ stdoutSync(JSON.stringify(value, null, 2) + "\n");
4839
+ }
4840
+ var bold = (s) => isTTY ? `\x1B[1m${s}\x1B[0m` : s;
4841
+ var dim = (s) => isTTY ? `\x1B[2m${s}\x1B[0m` : s;
4842
+ var green = (s) => isTTY ? `\x1B[32m${s}\x1B[0m` : s;
4843
+ var red = (s) => isTTY ? `\x1B[31m${s}\x1B[0m` : s;
4844
+ var yellow = (s) => isTTY ? `\x1B[33m${s}\x1B[0m` : s;
4845
+ var blue = (s) => isTTY ? `\x1B[34m${s}\x1B[0m` : s;
4846
+ function printEvent(event) {
4847
+ if (!isTTY) return;
4848
+ const meta = event.metadata_json ?? {};
4849
+ switch (event.event_type) {
4850
+ case "call_completed":
4851
+ printCallResult(meta);
4852
+ break;
4853
+ case "run_complete":
4854
+ printRunComplete(meta);
4855
+ break;
4856
+ case "call_started": {
4857
+ const name = meta.call_name ?? "call";
4858
+ stdoutSync(dim(` \u25B8 ${name}\u2026`) + "\n");
4859
+ break;
4860
+ }
4861
+ default:
4862
+ stdoutSync(dim(` [${event.event_type}]`) + "\n");
4863
+ }
4864
+ }
4865
+ function printCallResult(meta) {
4866
+ const result = meta.result;
4867
+ const callName = result?.name ?? meta.call_name ?? "call";
4868
+ const callStatus = result?.status ?? meta.status;
4869
+ const durationMs = result?.duration_ms ?? meta.duration_ms;
4870
+ const statusIcon = callStatus === "completed" || callStatus === "pass" ? green("\u2714") : red("\u2718");
4871
+ const duration = durationMs != null ? (durationMs / 1e3).toFixed(1) + "s" : "\u2014";
4872
+ const parts = [statusIcon, bold(callName), dim(duration)];
4873
+ if (result?.latency?.p50_response_time_ms != null) {
4874
+ parts.push(`p50: ${result.latency.p50_response_time_ms}ms`);
4875
+ }
4876
+ if (result?.call_metadata?.transfer_attempted) {
4877
+ const transferLabel = result.call_metadata.transfer_completed ? "transfer: completed" : "transfer: attempted";
4878
+ parts.push(transferLabel);
4879
+ }
4880
+ stdoutSync(parts.join(" ") + "\n");
4881
+ const providerCallId = result?.call_metadata?.provider_call_id;
4882
+ const providerSessionId = result?.call_metadata?.provider_session_id;
4883
+ if (providerCallId) {
4884
+ stdoutSync(dim(` provider id: ${providerCallId}`) + "\n");
4885
+ } else if (providerSessionId) {
4886
+ stdoutSync(dim(` provider session: ${providerSessionId}`) + "\n");
4887
+ }
4888
+ const recordingUrl = result?.call_metadata?.recording_url;
4889
+ if (recordingUrl) {
4890
+ stdoutSync(dim(` recording: ${recordingUrl}`) + "\n");
4891
+ }
4892
+ const debugUrls = result?.call_metadata?.provider_debug_urls;
4893
+ if (debugUrls) {
4894
+ for (const [label, url] of Object.entries(debugUrls)) {
4895
+ stdoutSync(dim(` ${label}: ${url}`) + "\n");
4896
+ }
4897
+ }
4898
+ }
4899
+ function printRunComplete(meta) {
4900
+ const status = meta.status;
4901
+ const agg = meta.aggregate;
4902
+ const counts = agg?.conversation_calls;
4903
+ const total = meta.total_calls ?? counts?.total;
4904
+ const passed = meta.passed_calls ?? counts?.passed;
4905
+ const failed = meta.failed_calls ?? counts?.failed;
4906
+ stdoutSync("\n");
4907
+ if (status === "pass") {
4908
+ stdoutSync(green(bold("Run passed")) + "\n");
4909
+ } else {
4910
+ stdoutSync(red(bold("Run failed")) + "\n");
4911
+ }
4912
+ if (total != null) {
4913
+ const parts = [];
4914
+ if (passed) parts.push(green(`${passed} passed`));
4915
+ if (failed) parts.push(red(`${failed} failed`));
4916
+ parts.push(`${total} total`);
4917
+ stdoutSync(parts.join(dim(" \xB7 ")) + "\n");
4918
+ }
4919
+ }
4920
+ function printSummary(callResults, runComplete, runId, options = {}) {
4921
+ const allCalls = options.rawCalls ? formatRawCalls(options.rawCalls, options.verbose ?? false) : callResults.map((e) => {
4922
+ const meta = e.metadata_json ?? {};
4923
+ const r = meta.result;
4924
+ if (r) return r;
4925
+ return {
4926
+ name: meta.call_name ?? "call",
4927
+ status: meta.status ?? "unknown",
4928
+ duration_ms: meta.duration_ms,
4929
+ error: null
4930
+ };
4931
+ });
4932
+ const agg = runComplete.aggregate;
4933
+ const counts = agg?.conversation_calls;
4934
+ const summaryData = buildRunSummaryJson({
4935
+ runId,
4936
+ status: runComplete.status,
4937
+ total: runComplete.total_calls ?? counts?.total,
4938
+ passed: runComplete.passed_calls ?? counts?.passed,
4939
+ failed: runComplete.failed_calls ?? counts?.failed,
4940
+ formattedCalls: allCalls,
4941
+ verbose: options.verbose,
4942
+ runDetails: options.runDetails ?? { aggregate: runComplete.aggregate }
4943
+ });
4944
+ if (!isTTY) {
4945
+ stdoutSync(JSON.stringify(summaryData, null, 2) + "\n");
4946
+ return;
4947
+ }
4948
+ const failures = allCalls.filter((t2) => t2.status && t2.status !== "completed" && t2.status !== "pass");
4949
+ if (failures.length > 0) {
4950
+ stdoutSync("\n" + bold("Failed calls:") + "\n");
4951
+ for (const t2 of failures) {
4952
+ const duration = t2.duration_ms != null ? (t2.duration_ms / 1e3).toFixed(1) + "s" : "\u2014";
4953
+ const parts = [red("\u2718"), bold(t2.name ?? "call"), dim(duration)];
4954
+ stdoutSync(" " + parts.join(" ") + "\n");
4955
+ }
4956
+ }
4957
+ stdoutSync(dim(`Full details: vent status ${runId}${options.verbose ? " --verbose" : ""}`) + "\n");
4958
+ }
4959
+ function buildRunSummaryJson(options) {
4960
+ const calls = options.rawCalls ? formatRawCalls(options.rawCalls, options.verbose ?? false) : options.formattedCalls ?? [];
4961
+ const summaryData = {
4962
+ run_id: options.runId,
4963
+ status: options.status,
4964
+ total: options.total,
4965
+ passed: options.passed,
4966
+ failed: options.failed,
4967
+ calls
4968
+ };
4969
+ const details = options.runDetails;
4970
+ if (details?.created_at != null) summaryData["created_at"] = details.created_at;
4971
+ if (details?.started_at != null) summaryData["started_at"] = details.started_at;
4972
+ if (details?.finished_at != null) summaryData["finished_at"] = details.finished_at;
4973
+ if (details?.duration_ms != null) summaryData["duration_ms"] = details.duration_ms;
4974
+ if (details?.error_text != null) summaryData["error_text"] = details.error_text;
4975
+ if (details?.aggregate != null) summaryData["aggregate"] = details.aggregate;
4976
+ return summaryData;
4977
+ }
4978
+ function formatRawCalls(rawCalls, verbose) {
4979
+ return rawCalls.map((raw) => {
4980
+ const formatted = formatConversationResult(raw, { verbose });
4981
+ if (formatted) return formatted;
4982
+ const fallback = raw;
4983
+ return {
4984
+ name: typeof fallback["name"] === "string" ? fallback["name"] : "call",
4985
+ status: typeof fallback["status"] === "string" ? fallback["status"] : "unknown",
4986
+ duration_ms: typeof fallback["duration_ms"] === "number" ? fallback["duration_ms"] : void 0,
4987
+ error: typeof fallback["error"] === "string" ? fallback["error"] : null
4988
+ };
4989
+ });
4990
+ }
4991
+ function printError(message) {
4992
+ const line = red(bold("error")) + ` ${message}
4993
+ `;
4994
+ stdoutSync(line);
4995
+ }
4996
+ function printInfo(message, { force } = {}) {
4997
+ if (!force && !isTTY && !_verbose) return;
4998
+ const line = blue("\u25B8") + ` ${message}
4999
+ `;
5000
+ stdoutSync(line);
5001
+ }
5002
+ function printSuccess(message, { force } = {}) {
5003
+ if (!force && !isTTY && !_verbose) return;
5004
+ const line = green("\u2714") + ` ${message}
5005
+ `;
5006
+ stdoutSync(line);
5007
+ }
5008
+ function printWarn(message, { force } = {}) {
5009
+ if (!force && !isTTY && !_verbose) return;
5010
+ const line = yellow("\u26A0") + ` ${message}
5011
+ `;
5012
+ stdoutSync(line);
5013
+ }
5014
+
5015
+ // src/lib/auth.ts
5016
+ var POLL_INTERVAL_MS = 2e3;
5017
+ function sleep(ms) {
5018
+ return new Promise((r) => setTimeout(r, ms));
5019
+ }
5020
+ async function deviceAuthFlow() {
5021
+ let startData;
5022
+ try {
5023
+ const res = await fetch(`${API_BASE}/device/start`, { method: "POST" });
5024
+ if (!res.ok) {
5025
+ return { ok: false, error: `Failed to start device auth: ${res.status}` };
5026
+ }
5027
+ startData = await res.json();
5028
+ } catch {
5029
+ return { ok: false, error: "Could not reach Vent API. Check your connection." };
5030
+ }
5031
+ printInfo(`Your authorization code: ${startData.user_code}`, { force: true });
5032
+ printInfo(`Opening browser to log in...`, { force: true });
5033
+ printInfo(`If the browser doesn't open, visit: ${startData.verification_url}`, { force: true });
5034
+ openBrowser(startData.verification_url);
5035
+ const deadline = new Date(startData.expires_at).getTime();
5036
+ while (Date.now() < deadline) {
5037
+ await sleep(POLL_INTERVAL_MS);
5038
+ try {
5039
+ const res = await fetch(`${API_BASE}/device/exchange`, {
5040
+ method: "POST",
5041
+ headers: { "Content-Type": "application/json" },
5042
+ body: JSON.stringify({ session_id: startData.session_id })
5043
+ });
5044
+ if (!res.ok) continue;
5045
+ const data = await res.json();
5046
+ const accessToken = data.access_token;
5047
+ if (data.status === "approved" && accessToken) {
5048
+ await saveAccessToken(accessToken);
5049
+ return { ok: true, accessToken };
5050
+ }
5051
+ if (data.status === "expired") {
5052
+ return { ok: false, error: "Session expired. Run `npx vent-hq login` again." };
5053
+ }
5054
+ if (data.status === "consumed" || data.status === "invalid") {
5055
+ return { ok: false, error: "Session invalid. Run `npx vent-hq login` again." };
5056
+ }
5057
+ } catch {
5058
+ }
5059
+ }
5060
+ return { ok: false, error: "Login timed out. Run `npx vent-hq login` again." };
5061
+ }
5062
+
5063
+ // src/lib/sse.ts
5064
+ function log(msg) {
5065
+ if (!isVerbose()) return;
5066
+ const ts = (/* @__PURE__ */ new Date()).toISOString().slice(11, 23);
5067
+ const line = `[vent:sse ${ts}] ${msg}
5068
+ `;
5069
+ process.stdout.write(line);
5070
+ }
5071
+ var MAX_RETRIES = 5;
5072
+ var RETRY_DELAY_MS = 2e3;
5073
+ async function* streamRunEvents(runId, apiKey, signal) {
5074
+ const url = `${API_BASE}/runs/${runId}/stream`;
5075
+ const seenIds = /* @__PURE__ */ new Set();
5076
+ let retries = 0;
5077
+ while (retries <= MAX_RETRIES) {
5078
+ if (retries > 0) {
5079
+ log(`reconnecting (attempt ${retries}/${MAX_RETRIES}) after ${RETRY_DELAY_MS}ms\u2026`);
5080
+ await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
5081
+ }
5082
+ log(`connecting to ${url}`);
5083
+ let res;
5084
+ try {
5085
+ res = await fetch(url, {
5086
+ headers: { Authorization: `Bearer ${apiKey}` },
5087
+ signal
5088
+ });
5089
+ } catch (err) {
5090
+ if (err.name === "AbortError") throw err;
5091
+ log(`fetch error: ${err.message}`);
5092
+ retries++;
5093
+ continue;
5094
+ }
5095
+ log(`response: status=${res.status} content-type=${res.headers.get("content-type")}`);
5096
+ if (!res.ok) {
5097
+ const body = await res.text();
5098
+ log(`error body: ${body}`);
5099
+ throw new Error(`SSE stream failed (${res.status}): ${body}`);
5100
+ }
5101
+ if (!res.body) {
5102
+ throw new Error("SSE stream returned no body");
5103
+ }
5104
+ const reader = res.body.getReader();
5105
+ const decoder = new TextDecoder();
5106
+ let buffer = "";
5107
+ let chunkCount = 0;
5108
+ let eventCount = 0;
5109
+ let gotRunComplete = false;
5110
+ let streamError = null;
5111
+ try {
5112
+ while (true) {
5113
+ let readResult;
5114
+ try {
5115
+ readResult = await reader.read();
5116
+ } catch (err) {
5117
+ if (err.name === "AbortError") throw err;
5118
+ streamError = err;
5119
+ log(`read error: ${streamError.message}`);
5120
+ break;
5121
+ }
5122
+ const { done, value } = readResult;
5123
+ if (done) {
5124
+ log(`stream done after ${chunkCount} chunks, ${eventCount} events`);
5125
+ break;
5126
+ }
5127
+ chunkCount++;
5128
+ const chunk = decoder.decode(value, { stream: true });
5129
+ buffer += chunk;
5130
+ if (chunkCount <= 3 || chunkCount % 10 === 0) {
5131
+ log(`chunk #${chunkCount} (${chunk.length} bytes) buffer=${buffer.length} bytes`);
5132
+ }
5133
+ const lines = buffer.split("\n");
5134
+ buffer = lines.pop();
5135
+ for (const line of lines) {
5136
+ if (line.startsWith("data: ")) {
5137
+ const raw = line.slice(6);
5138
+ try {
5139
+ const event = JSON.parse(raw);
5140
+ eventCount++;
5141
+ if (event.id && seenIds.has(event.id)) {
5142
+ log(`skipping duplicate event ${event.id}`);
5143
+ continue;
5144
+ }
5145
+ if (event.id) seenIds.add(event.id);
5146
+ log(`parsed event #${eventCount}: type=${event.event_type}`);
5147
+ yield event;
5148
+ if (event.event_type === "run_complete") {
5149
+ log("run_complete received \u2014 closing stream");
5150
+ gotRunComplete = true;
5151
+ return;
5152
+ }
5153
+ } catch {
5154
+ log(`malformed JSON: ${raw.slice(0, 200)}`);
5155
+ }
5156
+ } else if (line.startsWith(": ")) {
5157
+ if (chunkCount <= 3) {
5158
+ log(`heartbeat: "${line}"`);
5159
+ }
5160
+ }
5161
+ }
5162
+ }
5163
+ } finally {
5164
+ reader.releaseLock();
5165
+ log("reader released");
5166
+ }
5167
+ if (gotRunComplete) return;
5168
+ retries++;
5169
+ if (retries <= MAX_RETRIES) {
5170
+ log(`stream ended without run_complete \u2014 will retry (${retries}/${MAX_RETRIES})`);
5171
+ }
5172
+ }
5173
+ log(`exhausted ${MAX_RETRIES} retries without run_complete`);
5174
+ yield {
5175
+ event_type: "error",
5176
+ message: `Stream lost after ${MAX_RETRIES} reconnect attempts without receiving run_complete`
5177
+ };
5178
+ }
5179
+
5180
+ // src/lib/run-history.ts
5181
+ import * as fs2 from "node:fs/promises";
5182
+ import * as path2 from "node:path";
5183
+ import { execSync } from "node:child_process";
5184
+ function gitInfo() {
5185
+ try {
5186
+ const sha = execSync("git rev-parse HEAD", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
5187
+ const branch = execSync("git branch --show-current", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim() || null;
5188
+ const status = execSync("git status --porcelain", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
5189
+ return { sha, branch, dirty: status.length > 0 };
5190
+ } catch {
5191
+ return { sha: null, branch: null, dirty: false };
5192
+ }
5193
+ }
5194
+ async function saveRunHistory(runId, callResults, runCompleteData) {
5195
+ try {
5196
+ const dir = path2.join(process.cwd(), ".vent", "runs");
5197
+ await fs2.mkdir(dir, { recursive: true });
5198
+ const git = gitInfo();
5199
+ const now = /* @__PURE__ */ new Date();
5200
+ const timestamp = now.toISOString().replace(/[:.]/g, "-").slice(0, 19);
5201
+ const shortId = runId.slice(0, 8);
5202
+ const aggregate = runCompleteData.aggregate;
5203
+ const convCalls = aggregate?.conversation_calls;
5204
+ const total = convCalls?.total ?? 0;
5205
+ const passed = convCalls?.passed ?? 0;
5206
+ const failed = convCalls?.failed ?? 0;
5207
+ const entry = {
5208
+ run_id: runId,
5209
+ timestamp: now.toISOString(),
5210
+ git_sha: git.sha,
5211
+ git_branch: git.branch,
5212
+ git_dirty: git.dirty,
5213
+ summary: {
5214
+ status: runCompleteData.status ?? "unknown",
5215
+ calls_total: total,
5216
+ calls_passed: passed,
5217
+ calls_failed: failed,
5218
+ total_duration_ms: aggregate?.total_duration_ms,
5219
+ total_cost_usd: aggregate?.total_cost_usd
5220
+ },
5221
+ call_results: callResults.map((e) => e.metadata_json ?? {})
5222
+ };
5223
+ const filename = `${timestamp}_${shortId}.json`;
5224
+ const filepath = path2.join(dir, filename);
5225
+ await fs2.writeFile(filepath, JSON.stringify(entry, null, 2) + "\n");
5226
+ return filepath;
5227
+ } catch {
5228
+ return null;
5229
+ }
5230
+ }
5231
+
4961
5232
  // src/lib/platform-connections.ts
4962
5233
  var PLATFORM_ENV_MAP = {
4963
5234
  vapi: { vapi_api_key: "VAPI_API_KEY", vapi_assistant_id: "VAPI_ASSISTANT_ID" },
@@ -5220,6 +5491,10 @@ async function runCommand(args) {
5220
5491
  exitCode = status === "pass" ? 0 : 1;
5221
5492
  debug(`run_complete: status=${status} exitCode=${exitCode}`);
5222
5493
  }
5494
+ if (event.event_type === "error") {
5495
+ printError(event.message ?? "Stream connection lost");
5496
+ exitCode = 2;
5497
+ }
5223
5498
  }
5224
5499
  debug(`SSE stream ended \u2014 received ${eventCount} events total`);
5225
5500
  } catch (err) {
@@ -5236,7 +5511,28 @@ async function runCommand(args) {
5236
5511
  }
5237
5512
  debug(`summary: callResults=${callResults.length} runComplete=${!!runCompleteData} exitCode=${exitCode}`);
5238
5513
  if (runCompleteData) {
5239
- printSummary(callResults, runCompleteData, run_id);
5514
+ let rawRunDetails = null;
5515
+ if (args.verbose && !isTTY2) {
5516
+ try {
5517
+ const res = await apiFetch(`/runs/${run_id}`, activeAccessToken);
5518
+ rawRunDetails = await res.json();
5519
+ } catch (err) {
5520
+ debug(`verbose status fetch failed: ${err.message}`);
5521
+ printWarn("Verbose result fetch failed; falling back to streamed summary.");
5522
+ }
5523
+ }
5524
+ printSummary(callResults, runCompleteData, run_id, {
5525
+ verbose: args.verbose,
5526
+ rawCalls: Array.isArray(rawRunDetails?.["results"]) ? rawRunDetails["results"] : void 0,
5527
+ runDetails: rawRunDetails ? {
5528
+ created_at: rawRunDetails["created_at"],
5529
+ started_at: rawRunDetails["started_at"],
5530
+ finished_at: rawRunDetails["finished_at"],
5531
+ duration_ms: rawRunDetails["duration_ms"],
5532
+ error_text: rawRunDetails["error_text"],
5533
+ aggregate: rawRunDetails["aggregate"]
5534
+ } : void 0
5535
+ });
5240
5536
  }
5241
5537
  if (runCompleteData) {
5242
5538
  const savedPath = await saveRunHistory(run_id, callResults, runCompleteData);
@@ -5357,10 +5653,10 @@ var RelayClient = class {
5357
5653
  this.controlWs.send(JSON.stringify(msg));
5358
5654
  }
5359
5655
  }
5360
- sendBinaryFrame(connId, payload) {
5656
+ sendDataFrame(connId, payload, frameType) {
5361
5657
  if (!this.controlWs || this.controlWs.readyState !== WebSocket.OPEN) return;
5362
5658
  const header = new Uint8Array(37);
5363
- header[0] = 1;
5659
+ header[0] = frameType;
5364
5660
  const connIdBytes = new TextEncoder().encode(connId);
5365
5661
  header.set(connIdBytes, 1);
5366
5662
  const frame = new Uint8Array(37 + payload.byteLength);
@@ -5372,12 +5668,18 @@ var RelayClient = class {
5372
5668
  ws.addEventListener("message", (event) => {
5373
5669
  if (event.data instanceof ArrayBuffer) {
5374
5670
  const data = new Uint8Array(event.data);
5375
- if (data.length < 37 || data[0] !== 1) return;
5671
+ if (data.length < 37) return;
5672
+ const frameType = data[0];
5673
+ if (frameType !== 1 && frameType !== 2) return;
5376
5674
  const connId = new TextDecoder().decode(data.subarray(1, 37));
5377
5675
  const payload = data.subarray(37);
5378
5676
  const conn = this.localConnections.get(connId);
5379
5677
  if (conn?.local.readyState === WebSocket.OPEN) {
5380
- conn.local.send(payload);
5678
+ if (frameType === 2) {
5679
+ conn.local.send(new TextDecoder().decode(payload));
5680
+ } else {
5681
+ conn.local.send(payload);
5682
+ }
5381
5683
  }
5382
5684
  return;
5383
5685
  }
@@ -5425,8 +5727,11 @@ var RelayClient = class {
5425
5727
  this.localConnections.set(connId, { local: localWs, connId });
5426
5728
  });
5427
5729
  localWs.addEventListener("message", (event) => {
5428
- const payload = event.data instanceof ArrayBuffer ? new Uint8Array(event.data) : new TextEncoder().encode(event.data);
5429
- this.sendBinaryFrame(connId, payload);
5730
+ if (event.data instanceof ArrayBuffer) {
5731
+ this.sendDataFrame(connId, new Uint8Array(event.data), 1);
5732
+ } else {
5733
+ this.sendDataFrame(connId, new TextEncoder().encode(event.data), 2);
5734
+ }
5430
5735
  });
5431
5736
  let cleaned = false;
5432
5737
  const cleanup = (reason) => {
@@ -5460,7 +5765,7 @@ async function startAgentSession(relayConfig) {
5460
5765
  };
5461
5766
  const client = new RelayClient(clientConfig);
5462
5767
  client.on("log", (msg) => {
5463
- if (isVerbose()) process.stderr.write(`${msg}
5768
+ if (isVerbose()) process.stdout.write(`${msg}
5464
5769
  `);
5465
5770
  });
5466
5771
  await client.connect();
@@ -5473,13 +5778,13 @@ async function startAgentSession(relayConfig) {
5473
5778
  env
5474
5779
  });
5475
5780
  agentProcess.stdout?.on("data", (data) => {
5476
- if (isVerbose()) process.stderr.write(`[agent] ${data}`);
5781
+ if (isVerbose()) process.stdout.write(`[agent] ${data}`);
5477
5782
  });
5478
5783
  agentProcess.stderr?.on("data", (data) => {
5479
- if (isVerbose()) process.stderr.write(`[agent] ${data}`);
5784
+ if (isVerbose()) process.stdout.write(`[agent] ${data}`);
5480
5785
  });
5481
5786
  agentProcess.on("error", (err) => {
5482
- process.stderr.write(`Agent process error: ${err.message}
5787
+ process.stdout.write(`Agent process error: ${err.message}
5483
5788
  `);
5484
5789
  });
5485
5790
  }
@@ -5661,7 +5966,27 @@ async function statusCommand(args) {
5661
5966
  try {
5662
5967
  const res = await apiFetch(`/runs/${args.runId}`, accessToken);
5663
5968
  const data = await res.json();
5664
- process.stdout.write(JSON.stringify(data, null, 2) + "\n");
5969
+ const aggregate = data.aggregate;
5970
+ const counts = aggregate?.conversation_calls;
5971
+ const results = Array.isArray(data.results) ? data.results : [];
5972
+ const summary = buildRunSummaryJson({
5973
+ runId: typeof data.id === "string" ? data.id : args.runId,
5974
+ status: data.status,
5975
+ total: counts?.total,
5976
+ passed: counts?.passed,
5977
+ failed: counts?.failed,
5978
+ rawCalls: results,
5979
+ verbose: args.verbose,
5980
+ runDetails: {
5981
+ created_at: data.created_at,
5982
+ started_at: data.started_at,
5983
+ finished_at: data.finished_at,
5984
+ duration_ms: data.duration_ms,
5985
+ error_text: data.error_text,
5986
+ aggregate: data.aggregate
5987
+ }
5988
+ });
5989
+ writeJsonStdout(summary);
5665
5990
  const status = data.status;
5666
5991
  return status === "pass" ? 0 : status === "fail" ? 1 : 0;
5667
5992
  } catch (err) {
@@ -6417,13 +6742,13 @@ var ze = { light: I2("\u2500", "-"), heavy: I2("\u2501", "="), block: I2("\u2588
6417
6742
  var Qe = `${t("gray", h)} `;
6418
6743
 
6419
6744
  // src/skills/claude-code.md
6420
- var claude_code_default = '---\nname: vent\ndescription: Voice agent calls \u2014 run calls against your voice agent, get pass/fail results with latency and latency metrics\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud \u2014 results stream back.\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n\n\n## Critical Rules\n\n1. **5-minute timeout** \u2014 Set `timeout: 300000` on each Bash call. Individual calls can still take up to 5 minutes.\n2. **If a call gets backgrounded** \u2014 Wait for it to complete before proceeding. Never end your response without the result.\n3. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n4. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Analyze this output directly.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```bash\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```bash\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, run each as a separate command:\n ```bash\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path\n npx vent-hq run -f .vent/suite.vapi.json --call edge-case\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare it against the current run:\n - Status flips: pass\u2192fail (obvious regression)\n - Latency: TTFW p50/p95 increased >20%\n - Tool calls: success count dropped\n - Cost: cost_usd increased >30%\n - Transcripts: agent responses diverged significantly\n Report what regressed and correlate with the code diff (`git diff` between the two runs\' git SHAs). If no previous run exists, skip \u2014 this is the baseline.\n\n### After modifying voice agent code\n\nRe-run the existing suite \u2014 no need to recreate it.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "low (~3/10 turns) | high (~7/10 turns)",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "silence", "at_turn": "N", "duration_ms": "1000-30000" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "agent_speech_ratio": 0.72,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "barge_in_recovery_time_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "call_metadata": {\n "platform": "vapi",\n "recording_url": "https://example.com/recording"\n },\n "warnings": [],\n "audio_actions": [\n { "at_turn": 5, "action": "silence", "metrics": { "agent_prompted": false, "unprompted_utterance_count": 0, "silence_duration_ms": 8000 } }\n ],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAll fields optional except name, status, caller_prompt, duration_ms, transcript. Fields appear only when relevant analysis ran (e.g., emotion requires prosody: true).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed. If `call_metadata.transfers[*].verification` is present, use it to mention second-leg observation, connect latency, transcript/context summary, and whether context passing was verified.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? (e.g., "Sure, the earliest is 9 AM" after being cut off mid-availability-list) | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? (e.g., still knows the caller\'s name, booking details, etc.) | >95% |\n| **Barge-in recovery time** | Use `audio_analysis.barge_in_recovery_time_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run. Flag any turn where the agent ignores the interruption, repeats itself from scratch, or loses context.\n</output_conversation_test>\n</call_config>\n\n\n## Output\n\n- **Exit codes**: 0=pass, 1=fail, 2=error\n- The `run` command outputs **complete results as pretty-printed JSON** \u2014 including full transcript, latency metrics, tool calls, and audio analysis for every call. Do NOT run a separate `vent status` command \u2014 all data is already in the output.\n\n## Vent Access Token\n\nRun `npx vent-hq login` or set `VENT_ACCESS_TOKEN` env var.\nVent provides DEEPGRAM_API_KEY and ANTHROPIC_API_KEY automatically.\n';
6745
+ var claude_code_default = '---\nname: vent\ndescription: Voice agent calls \u2014 run calls against your voice agent, get pass/fail results with latency and latency metrics\nallowed-tools: Bash(npx vent-hq *)\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud \u2014 results stream back.\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **5-minute timeout** \u2014 Set `timeout: 300000` on each Bash call. Individual calls can still take up to 5 minutes.\n2. **If a call gets backgrounded** \u2014 Wait for it to complete before proceeding. Never end your response without the result.\n3. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n4. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```bash\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```bash\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, run each as a separate command:\n ```bash\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path\n npx vent-hq run -f .vent/suite.vapi.json --call edge-case\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare it against the current run:\n - Status flips: pass\u2192fail (obvious regression)\n - Latency: TTFW p50/p95 increased >20%\n - Tool calls: success count dropped\n - Cost: cost_usd increased >30%\n - Transcripts: agent responses diverged significantly\n Report what regressed and correlate with the code diff (`git diff` between the two runs\' git SHAs). If no previous run exists, skip \u2014 this is the baseline.\n\n### After modifying voice agent code\n\nRe-run the existing suite \u2014 no need to recreate it.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit Node agents, prefer the first-party helper instead of manual forwarding:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nInstall it with `npm install @vent-hq/livekit` after the package is published to the `vent-hq` npm org. Until then, use the workspace package from this repo.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? (e.g., "Sure, the earliest is 9 AM" after being cut off mid-availability-list) | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? (e.g., still knows the caller\'s name, booking details, etc.) | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run. Flag any turn where the agent ignores the interruption, repeats itself from scratch, or loses context.\n</output_conversation_test>\n</call_config>\n\n\n## Output\n\n- **Exit codes**: 0=pass, 1=fail, 2=error\n- The `run` command outputs **complete results as pretty-printed JSON** \u2014 including full transcript, latency metrics, tool calls, and audio analysis for every call. Do NOT run a separate `vent status` command \u2014 all data is already in the output.\n\n## Vent Access Token\n\nRun `npx vent-hq login` or set `VENT_ACCESS_TOKEN` env var.\nVent provides DEEPGRAM_API_KEY and ANTHROPIC_API_KEY automatically.\n';
6421
6746
 
6422
6747
  // src/skills/cursor.md
6423
- var cursor_default = '---\ndescription: Vent \u2014 Voice agent calls. Run calls against your voice agent, get pass/fail results. Use when the user asks to run voice-agent calls or when you need to verify voice agent behavior after code changes.\nalwaysApply: true\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n\n\n## Critical Rules\n\n1. **Set timeout on shell calls** \u2014 Calls take 30-120s but can reach 5 minutes. Always set a 300-second (5 min) timeout on shell commands that run calls.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n5. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Analyze this output directly \u2014 do NOT run `vent status` afterwards, the data is already there.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Subsequent runs \u2014 reuse the existing suite\n\nA matching `.vent/suite.<adapter>.json` already exists? Just re-run it. No need to recreate.\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, run each as a separate command:\n ```\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path\n npx vent-hq run -f .vent/suite.vapi.json --call edge-case\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare against the current run: status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%, transcript divergence. Correlate with `git diff` between the two runs\' git SHAs. Skip if no previous run exists.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "low (~3/10 turns) | high (~7/10 turns)",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "silence", "at_turn": "N", "duration_ms": "1000-30000" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "agent_speech_ratio": 0.72,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "barge_in_recovery_time_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "call_metadata": {\n "platform": "vapi",\n "recording_url": "https://example.com/recording"\n },\n "warnings": [],\n "audio_actions": [\n { "at_turn": 5, "action": "silence", "metrics": { "agent_prompted": false, "unprompted_utterance_count": 0, "silence_duration_ms": 8000 } }\n ],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAll fields optional except name, status, caller_prompt, duration_ms, transcript. Fields appear only when relevant analysis ran (e.g., emotion requires prosody: true).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed. If `call_metadata.transfers[*].verification` is present, use it to mention second-leg observation, connect latency, transcript/context summary, and whether context passing was verified.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Barge-in recovery time** | Use `audio_analysis.barge_in_recovery_time_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n\n## Vent Access Token\n\nSet `VENT_ACCESS_TOKEN` env var or run `npx vent-hq login`.\n';
6748
+ var cursor_default = '---\ndescription: Vent \u2014 Voice agent calls. Run calls against your voice agent, get pass/fail results. Use when the user asks to run voice-agent calls or when you need to verify voice agent behavior after code changes.\nalwaysApply: true\n---\n\n# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call (read this first)\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n- Do NOT skip call verification \u2014 voice agents are non-deterministic, small changes can break flows\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Check results of a previous run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most work. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n\n## Critical Rules\n\n1. **Set timeout on shell calls** \u2014 Calls take 30-120s but can reach 5 minutes. Always set a 300-second (5 min) timeout on shell commands that run calls.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below. Do NOT re-read this file.\n5. **Always analyze results** \u2014 The run command outputs complete JSON with full transcript, latency, and tool calls. Use `--verbose` only when the default result is not enough to explain the failure. Analyze this output directly \u2014 do NOT run `vent status` afterwards unless you are re-checking a past run.\n\n## Workflow\n\n### First time: create the call suite\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the **Full Config Schema** section below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n - Name calls after specific flows (e.g., `"reschedule-appointment"`, not `"call-1"`)\n - Write `caller_prompt` as a realistic persona with a specific goal, based on the agent\'s domain\n - Set `max_turns` based on the flow complexity (simple FAQ: 4-6, booking: 8-12, complex: 12-20)\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n### Subsequent runs \u2014 reuse the existing suite\n\nA matching `.vent/suite.<adapter>.json` already exists? Just re-run it. No need to recreate.\n\n### Run calls\n\n1. If the suite uses `start_command`, start the shared local session first:\n ```\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n ```\n\n2. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 add --session\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n\n3. To run multiple calls from the same suite, run each as a separate command:\n ```\n npx vent-hq run -f .vent/suite.vapi.json --call happy-path\n npx vent-hq run -f .vent/suite.vapi.json --call edge-case\n ```\n\n4. Analyze each result, identify failures, correlate with the codebase, and fix.\n5. **Compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Read the second-most-recent JSON in `.vent/runs/` and compare against the current run: status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%, transcript divergence. Correlate with `git diff` between the two runs\' git SHAs. Skip if no previous run exists.\n\n## Connection\n\n- **BYO agent runtime**: your agent owns its own provider credentials. Use `start_command` for a local agent or `agent_url` for a hosted custom endpoint.\n- **Platform-direct runtime**: use adapter `vapi | retell | elevenlabs | bland | livekit`. This is the only mode where Vent itself needs provider credentials and saved platform connections apply.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit Node agents, prefer the first-party helper instead of manual forwarding:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nInstall it with `npm install @vent-hq/livekit` after the package is published to the `vent-hq` npm org. Until then, use the workspace package from this repo.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n\n## Vent Access Token\n\nSet `VENT_ACCESS_TOKEN` env var or run `npx vent-hq login`.\n';
6424
6749
 
6425
6750
  // src/skills/codex.md
6426
- var codex_default = '# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Get full results for a completed run |\n\n\n## Workflow\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the config schema below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n4. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 first start relay, then add --session\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n5. To run multiple calls, run each as a separate command.\n6. After results return, **compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Compare status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%. Skip if no previous run exists.\n7. After code changes, re-run the same way.\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n## Critical Rules\n\n1. **Run calls in parallel with 5min timeout** \u2014 Each call is a separate shell command, run them all at once. Set a 300-second (5 min) timeout on each \u2014 calls can take up to 5 minutes.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "low (~3/10 turns) | high (~7/10 turns)",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "silence", "at_turn": "N", "duration_ms": "1000-30000" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "agent_speech_ratio": 0.72,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "barge_in_recovery_time_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "call_metadata": {\n "platform": "vapi",\n "recording_url": "https://example.com/recording"\n },\n "warnings": [],\n "audio_actions": [\n { "at_turn": 5, "action": "silence", "metrics": { "agent_prompted": false, "unprompted_utterance_count": 0, "silence_duration_ms": 8000 } }\n ],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAll fields optional except name, status, caller_prompt, duration_ms, transcript. Fields appear only when relevant analysis ran (e.g., emotion requires prosody: true).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed. If `call_metadata.transfers[*].verification` is present, use it to mention second-leg observation, connect latency, transcript/context summary, and whether context passing was verified.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Barge-in recovery time** | Use `audio_analysis.barge_in_recovery_time_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n';
6751
+ var codex_default = '# Vent \u2014 Voice Agent Calls\n\nCall voice agents from the terminal. Calls run in the cloud (30-120s).\n\n## When to Call\n\n- After modifying voice agent code (system prompt, tools, handlers): ALWAYS run calls\n- After changing audio config: run calls\n- Before marking a task complete that touches agent behavior: run calls\n\n## Setup (if no Vent access token)\n\nIf `~/.vent/credentials` does not exist and `VENT_ACCESS_TOKEN` is not set:\n\n```bash\nnpx vent-hq init\n```\n\nOne command. No email, no browser, no human interaction needed. You get 10 free runs instantly.\n\n## Commands\n\n| Command | Purpose |\n|---------|---------|\n| `npx vent-hq init` | First-time setup (creates account + installs skills) |\n| `npx vent-hq agent start -f .vent/suite.<adapter>.json` | Start one shared local agent session (required for `start_command`) |\n| `npx vent-hq agent stop <session-id>` | Close a shared local agent session |\n| `npx vent-hq run -f .vent/suite.<adapter>.json` | Run a call from suite file (auto-selects if only one call) |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --verbose` | Include debug fields in the result JSON |\n| `npx vent-hq run -f .vent/suite.<adapter>.json --call <name>` | Run a specific named call |\n| `npx vent-hq stop <run-id>` | Cancel a queued or running call |\n| `npx vent-hq status <run-id>` | Get full results for a completed run |\n| `npx vent-hq status <run-id> --verbose` | Re-print a run with debug fields included |\n\n## When To Use `--verbose`\n\nDefault output is enough for most iterations. It already includes:\n- transcript\n- latency\n- transcript quality (`wer` / `cer`)\n- audio analysis\n- tool calls\n- summary cost / recording / transfers\n\nUse `--verbose` only when you need debugging detail that is not in the default result:\n- per-turn debug fields: timestamps, caller decision mode, silence pad, STT confidence, platform transcript\n- raw signal analysis: `debug.signal_quality`\n- harness timings: `debug.harness_overhead`\n- raw prosody payload and warnings\n- raw provider warnings\n- per-turn component latency arrays\n- raw observed tool-call timeline\n- provider-specific metadata in `debug.provider_metadata`\n\nTrigger `--verbose` when:\n- transcript accuracy looks wrong and you need to inspect `platform_transcript`\n- latency is bad and you need per-turn/component breakdowns\n- interruptions/barge-in behavior looks wrong\n- tool-call execution looks inconsistent or missing\n- the provider returned warnings/errors or you need provider-native artifacts\n\nSkip `--verbose` when:\n- you only need pass/fail, transcript, latency, tool calls, recording, or summary\n- you are doing quick iteration on prompt wording and the normal result already explains the failure\n\n## Normalization Contract\n\nVent always returns one normalized result shape on `stdout` across adapters. Treat these as the stable categories:\n- `transcript`\n- `latency`\n- `transcript_quality`\n- `audio_analysis`\n- `tool_calls`\n- `component_latency`\n- `call_metadata`\n- `warnings`\n- `audio_actions`\n- `emotion`\n\nSource-of-truth policy:\n- Vent computes transcript, latency, and audio-quality metrics itself.\n- Hosted adapters choose the best source per category, usually provider post-call data for tool calls, call metadata, transfers, provider transcripts, and recordings.\n- Realtime provider events are fallback or enrichment only when post-call data is missing, delayed, weaker for that category, or provider-specific.\n- `LiveKit` helper events are the provider-native path for rich in-agent observability.\n- `websocket`/custom agents are realtime-native but still map into the same normalized categories.\n- Keep adapter-specific details in `call_metadata.provider_metadata` or `debug.provider_metadata`, not in new top-level fields.\n\n## Workflow\n\n1. Read the voice agent\'s codebase \u2014 understand its system prompt, tools, intents, and domain.\n2. Read the config schema below for all available fields.\n3. Create the suite file in `.vent/` using the naming convention: `.vent/suite.<adapter>.json` (e.g., `.vent/suite.vapi.json`, `.vent/suite.websocket.json`, `.vent/suite.retell.json`). This prevents confusion when multiple adapters are tested in the same project.\n4. Run calls:\n ```\n # suite with one call (auto-selects)\n npx vent-hq run -f .vent/suite.<adapter>.json\n\n # suite with multiple calls \u2014 pick one by name\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path\n\n # local start_command \u2014 first start relay, then add --session\n npx vent-hq agent start -f .vent/suite.<adapter>.json\n npx vent-hq run -f .vent/suite.<adapter>.json --call happy-path --session <session-id>\n ```\n5. To run multiple calls, run each as a separate command.\n6. After results return, **compare with previous run** \u2014 Vent saves full result JSON to `.vent/runs/` after every run. Use `--verbose` only when the default result is not enough to explain the failure. Compare status flips, TTFW p50/p95 changes >20%, tool call count drops, cost increases >30%. Skip if no previous run exists.\n7. After code changes, re-run the same way.\n\n### Multiple suite files\n\nIf `.vent/` contains more than one suite file, **always check which adapter each suite uses before running**. Read the `connection.adapter` field in each file. Never run a suite intended for a different adapter \u2014 results will be meaningless or fail. When reporting results, always state which suite file produced them (e.g., "Results from `.vent/suite.vapi.json`:").\n\n## Critical Rules\n\n1. **Run calls in parallel with 5min timeout** \u2014 Each call is a separate shell command, run them all at once. Set a 300-second (5 min) timeout on each \u2014 calls can take up to 5 minutes.\n2. **Handle backgrounded commands** \u2014 If a call command gets moved to background by the system, wait for it to complete before proceeding. Never end your response without delivering call results.\n3. **Output format** \u2014 In non-TTY mode (when run by an agent), every SSE event is written to stdout as a JSON line. Results are always in stdout.\n4. **This skill is self-contained** \u2014 The full config schema is below.\n\n## WebSocket Protocol (BYO agents)\n\nWhen using `adapter: "websocket"`, Vent communicates with the agent over a single WebSocket connection:\n\n- **Binary frames** \u2192 PCM audio (16-bit mono, configurable sample rate)\n- **Text frames** \u2192 optional JSON events the agent can send for better test accuracy:\n\n| Event | Format | Purpose |\n|-------|--------|---------|\n| `speech-update` | `{"type":"speech-update","status":"started"\\|"stopped"}` | Enables platform-assisted turn detection (more accurate than VAD alone) |\n| `tool_call` | `{"type":"tool_call","name":"...","arguments":{...},"result":...,"successful":bool,"duration_ms":number}` | Reports tool calls for observability |\n| `vent:timing` | `{"type":"vent:timing","stt_ms":number,"llm_ms":number,"tts_ms":number}` | Reports component latency breakdown per turn |\n| `vent:session` | `{"type":"vent:session","platform":"custom","provider_call_id":"...","provider_session_id":"..."}` | Reports stable provider/session identifiers |\n| `vent:call-metadata` | `{"type":"vent:call-metadata","call_metadata":{...}}` | Reports post-call metadata such as cost, recordings, variables, and provider-specific artifacts |\n| `vent:transcript` | `{"type":"vent:transcript","role":"caller"\\|"agent","text":"...","turn_index":0}` | Reports platform/native transcript text for caller or agent |\n| `vent:transfer` | `{"type":"vent:transfer","destination":"...","status":"attempted"\\|"completed"}` | Reports transfer attempts and outcomes |\n| `vent:debug-url` | `{"type":"vent:debug-url","label":"log","url":"https://..."}` | Reports provider debug/deep-link URLs |\n| `vent:warning` | `{"type":"vent:warning","message":"...","code":"..."}` | Reports provider/runtime warnings worth preserving in run metadata |\n\nVent sends `{"type":"end-call"}` to the agent when the test is done.\n\nAll text frames are optional \u2014 audio-only agents work fine with VAD-based turn detection.\n\n## Full Config Schema\n\n- ALL calls MUST reference the agent\'s real context (system prompt, tools, knowledge base) from the codebase.\n\n<vent_run>\n{\n "connection": { ... },\n "calls": {\n "happy-path": { ... },\n "edge-case": { ... }\n }\n}\n</vent_run>\n\nOne suite file per platform/adapter. `connection` is declared once, `calls` is a named map of call specs. Each key becomes the call name. Run one call at a time with `--call <name>`.\n\n<config_connection>\n{\n "connection": {\n "adapter": "required -- websocket | livekit | vapi | retell | elevenlabs | bland",\n "start_command": "shell command to start agent (relay only, required for local)",\n "health_endpoint": "health check path after start_command (default: /health, relay only, required for local)",\n "agent_url": "hosted custom agent URL (wss:// or https://). Use for BYO hosted agents.",\n "agent_port": "local agent port (default: 3001, required for local)",\n "platform": "optional authoring convenience for platform-direct adapters only. The CLI resolves this locally, creates/updates a saved platform connection, and strips raw provider secrets before submit. Do not use for websocket start_command or agent_url runs."\n }\n}\n\n<credential_resolution>\nIMPORTANT: How to handle platform credentials (API keys, secrets, agent IDs):\n\nThere are two product modes:\n- `BYO agent runtime`: your agent owns its own provider credentials. This covers both `start_command` (local) and `agent_url` (hosted custom endpoint).\n- `Platform-direct runtime`: Vent talks to `vapi`, `retell`, `elevenlabs`, `bland`, or `livekit` directly. This is the only mode that uses saved platform connections.\n\n1. For `start_command` and `agent_url` runs, do NOT put Deepgram / ElevenLabs / OpenAI / other provider keys into Vent config unless the Vent adapter itself needs them. Those credentials belong to the user\'s local or hosted agent runtime.\n2. For platform-direct adapters (`vapi`, `retell`, `elevenlabs`, `bland`, `livekit`), the CLI auto-resolves credentials from `.env.local`, `.env`, and the current shell env. If those env vars already exist, you can omit credential fields from the config JSON entirely.\n3. If you include credential fields in the config, put the ACTUAL VALUE, NOT the env var name. WRONG: `"vapi_api_key": "VAPI_API_KEY"`. RIGHT: `"vapi_api_key": "sk-abc123..."` or omit the field.\n4. The CLI uses the resolved provider config to create or update a saved platform connection server-side, then submits only `platform_connection_id`. Users should not manually author `platform_connection_id`.\n5. To check whether credentials are already available, inspect `.env.local`, `.env`, and any relevant shell env visible to the CLI process.\n\nAuto-resolved env vars per platform:\n| Platform | Config field | Env var (auto-resolved from `.env.local`, `.env`, or shell env) |\n|----------|-------------|-----------------------------------|\n| Vapi | vapi_api_key | VAPI_API_KEY |\n| Vapi | vapi_assistant_id | VAPI_ASSISTANT_ID |\n| Bland | bland_api_key | BLAND_API_KEY |\n| Bland | bland_pathway_id | BLAND_PATHWAY_ID |\n| LiveKit | livekit_api_key | LIVEKIT_API_KEY |\n| LiveKit | livekit_api_secret | LIVEKIT_API_SECRET |\n| LiveKit | livekit_url | LIVEKIT_URL |\n| Retell | retell_api_key | RETELL_API_KEY |\n| Retell | retell_agent_id | RETELL_AGENT_ID |\n| ElevenLabs | elevenlabs_api_key | ELEVENLABS_API_KEY |\n| ElevenLabs | elevenlabs_agent_id | ELEVENLABS_AGENT_ID |\n\nThe CLI strips raw platform secrets before `/runs/submit`. Platform-direct runs go through a saved `platform_connection_id` automatically. BYO agent runs (`start_command` and `agent_url`) do not.\n</credential_resolution>\n\n<config_adapter_rules>\nWebSocket (local agent via relay):\n{\n "connection": {\n "adapter": "websocket",\n "start_command": "npm run start",\n "health_endpoint": "/health",\n "agent_port": 3001\n }\n}\n\nWebSocket (hosted custom agent):\n{\n "connection": {\n "adapter": "websocket",\n "agent_url": "https://my-agent.fly.dev"\n }\n}\n\nRetell:\n{\n "connection": {\n "adapter": "retell",\n "platform": { "provider": "retell" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: RETELL_API_KEY, RETELL_AGENT_ID. Only add retell_api_key/retell_agent_id to the JSON if those env vars are not already available.\n\nBland:\n{\n "connection": {\n "adapter": "bland",\n "platform": { "provider": "bland" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: BLAND_API_KEY, BLAND_PATHWAY_ID. Only add bland_api_key/bland_pathway_id to the JSON if those env vars are not already available.\nNote: All agent config (voice, model, tools, etc.) is set on the pathway itself, not in Vent config.\n\nVapi:\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: VAPI_API_KEY, VAPI_ASSISTANT_ID. Only add vapi_api_key/vapi_assistant_id to the JSON if those env vars are not already available.\nmax_concurrency for Vapi: Starter=10, Growth=50, Enterprise=100+. Ask the user which tier they\'re on. If unknown, default to 10.\nAll assistant config (voice, model, transcriber, interruption settings, etc.) is set on the Vapi assistant itself, not in Vent config.\n\nElevenLabs:\n{\n "connection": {\n "adapter": "elevenlabs",\n "platform": { "provider": "elevenlabs" }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: ELEVENLABS_API_KEY, ELEVENLABS_AGENT_ID. Only add elevenlabs_api_key/elevenlabs_agent_id to the JSON if those env vars are not already available.\n\nLiveKit:\n{\n "connection": {\n "adapter": "livekit",\n "platform": {\n "provider": "livekit",\n "livekit_agent_name": "my-agent",\n "max_concurrency": 5\n }\n }\n}\nCredentials auto-resolve from `.env.local`, `.env`, or shell env: LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL. Only add these to the JSON if those env vars are not already available.\nlivekit_agent_name is optional -- only needed if the agent registers with an explicit agent_name in WorkerOptions. Omit for automatic dispatch.\nThe livekit adapter requires the LiveKit Agents SDK. It depends on Agents SDK signals (lk.agent.state, lk.transcription) for readiness detection, turn timing, and component latency. Custom LiveKit participants not using the Agents SDK should use the websocket adapter with a relay instead.\nmax_concurrency: Free/Build=5, Ship=20, Scale=50+. Ask the user which tier they\'re on. If unknown, default to 5.\n</config_adapter_rules>\n</config_connection>\n\n\n<call_config>\n<tool_call_capture>\nvapi/retell/elevenlabs/bland: automatic via platform API (no user code needed).\nWebSocket/WebRTC: user\'s agent must emit tool calls:\n WebSocket \u2014 JSON text frame: {"type":"tool_call","name":"...","arguments":{},"result":{},"successful":true,"duration_ms":150}\n WebRTC/LiveKit \u2014 publishData() or sendText() on topic "vent:tool-calls". Same JSON.\n</tool_call_capture>\n\n<component_timing>\nPlatform adapters (vapi/retell/elevenlabs/bland/livekit) get STT/LLM/TTS breakdown automatically.\nWebSocket agents can opt in by sending a JSON text frame after each agent turn:\n {"type":"vent:timing","stt_ms":120,"llm_ms":450,"tts_ms":80}\nAll fields optional. Send one per agent response. Without this, component_latency is omitted from results.\nWhen modifying a WebSocket agent\'s code, add this text frame after TTS completes to enable component latency reporting.\n</component_timing>\n\n<metadata_capture>\nWebSocket and LiveKit/WebRTC agents can also emit richer observability metadata:\n {"type":"vent:session","platform":"custom","provider_call_id":"call_123","provider_session_id":"session_abc"}\n {"type":"vent:call-metadata","call_metadata":{"recording_url":"https://...","cost_usd":0.12,"provider_debug_urls":{"log":"https://..."}}}\n {"type":"vent:debug-url","label":"trace","url":"https://..."}\n {"type":"vent:session-report","report":{"room_name":"room-123","events":[...],"metrics":[...]}}\n {"type":"vent:metrics","event":"metrics_collected","metric_type":"eou","metrics":{"speechId":"speech_123","endOfUtteranceDelayMs":420}}\n {"type":"vent:function-tools-executed","event":"function_tools_executed","hasAgentHandoff":true,"tool_calls":[{"name":"lookup_customer","arguments":{"id":"123"}}]}\n {"type":"vent:conversation-item","event":"conversation_item_added","item":{"type":"agent_handoff","newAgentId":"billing-agent"}}\n {"type":"vent:session-usage","usage":{"llm":{"promptTokens":123,"completionTokens":45}}}\nTransport:\n WebSocket \u2014 send JSON text frames with these payloads. WebSocket agents may also emit {"type":"vent:transcript","role":"caller","text":"I need to reschedule","turn_index":0} when they have native transcript text.\n WebRTC/LiveKit \u2014 publishData() or sendText() on the matching "vent:*" topic, e.g. topic "vent:call-metadata" with the JSON body above.\nFor LiveKit, transcript and timing stay authoritative from native room signals (`lk.transcription`, `lk.agent.state`). Do not emit `vent:transcript` from LiveKit agents.\nFor LiveKit Node agents, prefer the first-party helper instead of manual forwarding:\n```ts\nimport { instrumentLiveKitAgent } from "@vent-hq/livekit";\n\nconst vent = instrumentLiveKitAgent({\n ctx,\n session,\n});\n```\nThis helper must run inside the LiveKit agent runtime with the existing Agents SDK `session` and `ctx` objects. It is the Vent integration layer on top of the Agents SDK, not a replacement for it.\nInstall it with `npm install @vent-hq/livekit` after the package is published to the `vent-hq` npm org. Until then, use the workspace package from this repo.\nThis automatically publishes only the in-agent-only LiveKit signals: `metrics_collected`, `function_tools_executed`, `conversation_item_added`, and a session report on close/shutdown.\nDo not use it to mirror room-visible signals like transcript, agent state timing, or room/session ID \u2014 Vent already gets those from LiveKit itself.\nFor LiveKit inside-agent forwarding, prefer sending the raw LiveKit event payloads on:\n `vent:metrics`\n `vent:function-tools-executed`\n `vent:conversation-item`\n `vent:session-usage`\nUse these metadata events when the agent runtime already knows native IDs, recordings, warnings, debug links, session reports, metrics events, or handoff artifacts. This gives custom and LiveKit agents parity with hosted adapters without needing a LiveKit Cloud connector.\n</metadata_capture>\n\n<config_call>\nEach call in the `calls` map. The key is the call name (e.g. `"reschedule-appointment"`, not `"call-1"`).\n{\n "caller_prompt": "required \u2014 caller persona and behavior (name -> goal -> emotion -> conditional behavior)",\n "max_turns": "required \u2014 default 6",\n "silence_threshold_ms": "optional \u2014 end-of-turn threshold ms (default 800, 200-10000). 800-1200 FAQ, 2000-3000 tool calls, 3000-5000 complex reasoning.",\n "persona": "optional \u2014 caller behavior controls",\n {\n "pace": "slow | normal | fast",\n "clarity": "clear | vague | rambling",\n "disfluencies": "true | false",\n "cooperation": "cooperative | reluctant | hostile",\n "emotion": "neutral | cheerful | confused | frustrated | skeptical | rushed",\n "interruption_style": "optional preplanned interrupt tendency: low | high. If set, Vent may pre-plan a caller cut-in before the agent turn starts. It does NOT make a mid-turn interrupt LLM call.",\n "memory": "reliable | unreliable",\n "intent_clarity": "clear | indirect | vague",\n "confirmation_style": "explicit | vague"\n },\n "audio_actions": "optional \u2014 per-turn audio stress calls",\n [\n { "action": "interrupt", "at_turn": "N", "prompt": "what caller says" },\n { "action": "inject_noise", "at_turn": "N", "noise_type": "babble | white | pink", "snr_db": "0-40" },\n { "action": "split_sentence", "at_turn": "N", "split": { "part_a": "...", "part_b": "...", "pause_ms": "500-5000" } },\n { "action": "noise_on_caller", "at_turn": "N" }\n ],\n "prosody": "optional \u2014 Hume emotion analysis (default false)",\n "caller_audio": "optional \u2014 omit for clean audio",\n {\n "noise": { "type": "babble | white | pink", "snr_db": "0-40" },\n "speed": "0.5-2.0 (1.0 = normal)",\n "speakerphone": "true | false",\n "mic_distance": "close | normal | far",\n "clarity": "0.0-1.0 (1.0 = perfect)",\n "accent": "american | british | australian | filipino | spanish_mexican | spanish_peninsular | spanish_colombian | spanish_argentine | german | french | italian | dutch | japanese",\n "packet_loss": "0.0-0.3",\n "jitter_ms": "0-100"\n },\n "language": "optional \u2014 ISO 639-1: en, es, fr, de, it, nl, ja"\n}\n\nInterruption rules:\n- `audio_actions: [{ "action": "interrupt", ... }]` is the deterministic per-turn interrupt test. Prefer this for evaluation.\n- `persona.interruption_style` is only a preplanned caller tendency. If used, Vent decides before the agent response starts whether this turn may cut in.\n- Vent no longer pauses mid-turn to ask a second LLM whether to interrupt.\n- For production-faithful testing, prefer explicit `audio_actions.interrupt` over persona interruption.\n\n<examples_call>\n<simple_suite_example>\n{\n "connection": {\n "adapter": "vapi",\n "platform": { "provider": "vapi" }\n },\n "calls": {\n "reschedule-appointment": {\n "caller_prompt": "You are Maria, calling to reschedule her dentist appointment from Thursday to next Tuesday. She\'s in a hurry and wants this done quickly.",\n "max_turns": 8\n },\n "cancel-appointment": {\n "caller_prompt": "You are Tom, calling to cancel his appointment for Friday. He\'s calm and just wants confirmation.",\n "max_turns": 6\n }\n }\n}\n</simple_suite_example>\n\n<advanced_call_example>\nA call entry with advanced options (persona, audio actions, prosody):\n{\n "noisy-interruption-booking": {\n "caller_prompt": "You are James, an impatient customer calling from a loud coffee shop to book a plumber for tomorrow morning. You interrupt the agent mid-sentence when they start listing availability \u2014 you just want the earliest slot.",\n "max_turns": 12,\n "persona": { "pace": "fast", "cooperation": "reluctant", "emotion": "rushed", "interruption_style": "high" },\n "audio_actions": [\n { "action": "interrupt", "at_turn": 3, "prompt": "Just give me the earliest one!" },\n { "action": "inject_noise", "at_turn": 1, "noise_type": "babble", "snr_db": 15 }\n ],\n "caller_audio": { "noise": { "type": "babble", "snr_db": 20 }, "speed": 1.3 },\n "prosody": true\n }\n}\n</advanced_call_example>\n\n</examples_call>\n</config_call>\n\n<output_conversation_test>\n{\n "name": "sarah-hotel-booking",\n "status": "completed",\n "caller_prompt": "You are Sarah, calling to book...",\n "duration_ms": 45200,\n "error": null,\n "transcript": [\n { "role": "caller", "text": "Hi, I\'d like to book..." },\n { "role": "agent", "text": "Sure! What date?", "ttfb_ms": 650, "ttfw_ms": 780, "audio_duration_ms": 2400 },\n { "role": "agent", "text": "Let me check avail\u2014", "ttfb_ms": 540, "ttfw_ms": 620, "audio_duration_ms": 1400, "interrupted": true },\n { "role": "caller", "text": "Just the earliest slot please", "audio_duration_ms": 900, "is_interruption": true },\n { "role": "agent", "text": "Sure, the earliest is 9 AM tomorrow.", "ttfb_ms": 220, "ttfw_ms": 260, "audio_duration_ms": 2100 }\n ],\n "latency": {\n "response_time_ms": 890, "response_time_source": "ttfw",\n "p50_response_time_ms": 850, "p90_response_time_ms": 1100, "p95_response_time_ms": 1400, "p99_response_time_ms": 1550,\n "first_response_time_ms": 1950,\n "mean_ttfw_ms": 890, "p50_ttfw_ms": 850, "p95_ttfw_ms": 1400, "p99_ttfw_ms": 1550,\n "first_turn_ttfw_ms": 1950, "total_silence_ms": 4200, "mean_turn_gap_ms": 380,\n "drift_slope_ms_per_turn": -45.2, "mean_silence_pad_ms": 128, "mouth_to_ear_est_ms": 1020\n },\n "transcript_quality": {\n "wer": 0.04,\n "hallucination_events": [\n { "error_count": 5, "reference_text": "triple five one two", "hypothesis_text": "five five five nine two" }\n ],\n "repetition_score": 0.05,\n "reprompt_count": 0,\n "filler_word_rate": 0.8,\n "words_per_minute": 148\n },\n "audio_analysis": {\n "caller_talk_time_ms": 12400,\n "agent_talk_time_ms": 28500,\n "agent_speech_ratio": 0.72,\n "talk_ratio_vad": 0.69,\n "interruption_rate": 0.25,\n "interruption_count": 1,\n "agent_overtalk_after_barge_in_ms": 280,\n "agent_interrupting_user_rate": 0.0,\n "agent_interrupting_user_count": 0,\n "missed_response_windows": 0,\n "longest_monologue_ms": 5800,\n "silence_gaps_over_2s": 1,\n "total_internal_silence_ms": 2400,\n "mean_agent_speech_segment_ms": 3450\n },\n "tool_calls": {\n "total": 2, "successful": 2, "failed": 0, "mean_latency_ms": 340,\n "names": ["check_availability", "book_appointment"],\n "observed": [{ "name": "check_availability", "arguments": { "date": "2026-03-12" }, "result": { "slots": ["09:00", "10:00"] }, "successful": true, "latency_ms": 280, "turn_index": 3 }]\n },\n "component_latency": {\n "mean_stt_ms": 120, "mean_llm_ms": 450, "mean_tts_ms": 80,\n "p95_stt_ms": 180, "p95_llm_ms": 620, "p95_tts_ms": 110,\n "mean_speech_duration_ms": 2100,\n "bottleneck": "llm"\n },\n "call_metadata": {\n "platform": "vapi",\n "cost_usd": 0.08,\n "recording_url": "https://example.com/recording",\n "ended_reason": "customer_ended_call",\n "transfers": []\n },\n "warnings": [],\n "audio_actions": [],\n "emotion": {\n "naturalness": 0.72, "mean_calmness": 0.65, "mean_confidence": 0.58, "peak_frustration": 0.08, "emotion_trajectory": "stable"\n }\n}\n\nAlways present: name, status, caller_prompt, duration_ms, error, transcript, tool_calls, warnings, audio_actions. Nullable when analysis didn\'t run: latency, transcript_quality, audio_analysis, component_latency, call_metadata, emotion (requires prosody: true), debug (requires --verbose).\n\n### Result presentation\n\nWhen you report a conversation result to the user, always include:\n\n1. **Summary** \u2014 the overall verdict and the 1-3 most important findings.\n2. **Transcript summary** \u2014 a short narrative of what happened in the call.\n3. **Recording URL** \u2014 include `call_metadata.recording_url` when present; explicitly say when it is unavailable.\n4. **Next steps** \u2014 concrete fixes, follow-up tests, or why no change is needed.\n\nUse metrics to support the summary, not as the whole answer. Do not dump raw numbers without interpretation.\n\nWhen `call_metadata.transfer_attempted` is present, explicitly say whether the transfer only appeared attempted or was mechanically verified as completed (`call_metadata.transfer_completed`). Use `call_metadata.transfers[]` to report transfer type, destination, status, and sources.\n\n### Judging guidance\n\nUse the transcript, metrics, test scenario, and relevant agent instructions/system prompt to judge:\n\n| Dimension | What to check |\n|--------|----------------|\n| **Hallucination detection** | Check whether the agent stated anything not grounded in its instructions, tools, or the conversation itself. Treat `transcript_quality.hallucination_events` only as a speech-recognition warning signal, not proof of agent hallucination. |\n| **Instruction following** | Compare the agent\'s behavior against its system prompt and the test\'s expected constraints. |\n| **Context retention** | Check whether the agent forgot or contradicted information established earlier in the call. |\n| **Semantic accuracy** | Check whether the agent correctly understood the caller\'s intent and responded to the real request. |\n| **Goal completion** | Decide whether the agent achieved what the test scenario was designed to verify. |\n| **Transfer correctness** | For transfer scenarios, judge whether transfer was appropriate, whether it completed, whether it went to the expected destination, and whether enough context was passed during the handoff. |\n\n### Interruption evaluation\n\nWhen the transcript contains `interrupted: true` / `is_interruption: true` turns, evaluate these metrics by reading the transcript:\n\n| Metric | How to evaluate | Target |\n|--------|----------------|--------|\n| **Recovery rate** | For each interrupted turn: does the post-interrupt agent response acknowledge or address the interruption? | >90% |\n| **Context retention** | After the interruption, does the agent remember pre-interrupt conversation state? | >95% |\n| **Agent overtalk after barge-in** | Use `audio_analysis.agent_overtalk_after_barge_in_ms` when available. Lower is better because it measures how long the agent kept speaking after the caller cut in. | <500ms acceptable |\n| **Agent interrupting user rate** | Use `audio_analysis.agent_interrupting_user_rate` and the transcript to see whether the agent starts speaking before the caller finished. | 0 ideal |\n\nReport these alongside standard metrics when interruption calls run.\n</output_conversation_test>\n</call_config>\n\n\n## Exit Codes\n\n0=pass, 1=fail, 2=error\n';
6427
6752
 
6428
6753
  // src/lib/setup.ts
6429
6754
  var SUITE_SCAFFOLD = JSON.stringify(
@@ -6675,7 +7000,8 @@ var RUN_USAGE = `Usage: vent-hq run -f <suite.json> [options]
6675
7000
  Options:
6676
7001
  --file, -f Path to suite JSON file (required)
6677
7002
  --call Name of the call to run (required if suite has multiple calls)
6678
- --session, -s Reuse an existing local agent session`;
7003
+ --session, -s Reuse an existing local agent session
7004
+ --verbose, -v Include verbose fields in the result JSON`;
6679
7005
  var AGENT_USAGE = `Usage: vent-hq agent <command> [options]
6680
7006
 
6681
7007
  Commands:
@@ -6688,7 +7014,7 @@ Start options:
6688
7014
 
6689
7015
  Stop options:
6690
7016
  vent-hq agent stop <session-id>`;
6691
- var STATUS_USAGE = `Usage: vent-hq status <run-id>`;
7017
+ var STATUS_USAGE = `Usage: vent-hq status <run-id> [--verbose]`;
6692
7018
  async function main() {
6693
7019
  loadDotenv();
6694
7020
  const args = process.argv.slice(2);
@@ -6698,7 +7024,7 @@ async function main() {
6698
7024
  return 0;
6699
7025
  }
6700
7026
  if (command === "--version" || command === "-v") {
6701
- const pkg = await import("./package-YOCP6D2K.mjs");
7027
+ const pkg = await import("./package-767KASWC.mjs");
6702
7028
  console.log(`vent-hq ${pkg.default.version}`);
6703
7029
  return 0;
6704
7030
  }
@@ -6717,7 +7043,8 @@ async function main() {
6717
7043
  options: {
6718
7044
  file: { type: "string", short: "f" },
6719
7045
  call: { type: "string" },
6720
- session: { type: "string", short: "s" }
7046
+ session: { type: "string", short: "s" },
7047
+ verbose: { type: "boolean", short: "v", default: false }
6721
7048
  },
6722
7049
  strict: true
6723
7050
  });
@@ -6729,7 +7056,8 @@ async function main() {
6729
7056
  return runCommand({
6730
7057
  file: values.file,
6731
7058
  call: values.call,
6732
- session: values.session
7059
+ session: values.session,
7060
+ verbose: values.verbose
6733
7061
  });
6734
7062
  }
6735
7063
  case "agent": {
@@ -6769,8 +7097,20 @@ async function main() {
6769
7097
  console.log(STATUS_USAGE);
6770
7098
  return 0;
6771
7099
  }
6772
- const runId = commandArgs[0];
6773
- return statusCommand({ runId });
7100
+ const { values, positionals } = parseArgs({
7101
+ args: commandArgs,
7102
+ options: {
7103
+ verbose: { type: "boolean", short: "v", default: false }
7104
+ },
7105
+ allowPositionals: true,
7106
+ strict: true
7107
+ });
7108
+ const runId = positionals[0];
7109
+ if (!runId) {
7110
+ console.log(STATUS_USAGE);
7111
+ return 2;
7112
+ }
7113
+ return statusCommand({ runId, verbose: values.verbose });
6774
7114
  }
6775
7115
  case "stop": {
6776
7116
  const runId = commandArgs[0];