@telnyx/voice-agent-tester 0.4.1 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.4](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.3...v0.4.4) (2026-03-11)
4
+
5
+ ### Features
6
+
7
+ * fix speechend race condition, add --retries flag ([#21](https://github.com/team-telnyx/voice-agent-tester/issues/21)) ([09e3b65](https://github.com/team-telnyx/voice-agent-tester/commit/09e3b6578face6c407d058991ab5495d9463e544))
8
+
9
+ ### Chores
10
+
11
+ * release v0.4.3 ([#20](https://github.com/team-telnyx/voice-agent-tester/issues/20)) ([bdeb87b](https://github.com/team-telnyx/voice-agent-tester/commit/bdeb87bed502919a9fed9950e69242b1c2aefcfc))
12
+
13
+ ## [0.4.3](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.2...v0.4.3) (2026-03-11)
14
+
15
+ ### Features
16
+
17
+ * add click_with_retry action and fix audio event race conditions ([#19](https://github.com/team-telnyx/voice-agent-tester/issues/19)) ([#19](https://github.com/team-telnyx/voice-agent-tester/issues/19)) ([13e2009](https://github.com/team-telnyx/voice-agent-tester/commit/13e2009a94b4e2f7e05972f01a47c9b31758bf58))
18
+
19
+ ### Chores
20
+
21
+ * release v0.4.2 ([#18](https://github.com/team-telnyx/voice-agent-tester/issues/18)) ([1cf64ef](https://github.com/team-telnyx/voice-agent-tester/commit/1cf64ef563e813c2f06b2b655bfcc414637594cb))
22
+
23
+ ## [0.4.2](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.1...v0.4.2) (2026-02-23)
24
+
25
+ ### Features
26
+
27
+ * add dashboard hints for Vapi and ElevenLabs comparison mode params ([#16](https://github.com/team-telnyx/voice-agent-tester/issues/16)) ([7fda40b](https://github.com/team-telnyx/voice-agent-tester/commit/7fda40b6971a968dde1fc1c3466662227a3bc77e))
28
+
29
+ ### Chores
30
+
31
+ * improve event logs and comparison mode docs ([#17](https://github.com/team-telnyx/voice-agent-tester/issues/17)) ([24a9683](https://github.com/team-telnyx/voice-agent-tester/commit/24a968337a0b4a6c2d6baddd0aa507d5a87c9488))
32
+
3
33
  ## [0.4.1](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.0...v0.4.1) (2026-02-18)
4
34
 
5
35
  ### Features
package/README.md CHANGED
@@ -31,6 +31,8 @@ voice-agent-tester -a applications/telnyx.yaml -s scenarios/appointment.yaml --a
31
31
  | `--provider` | | Import from provider (`vapi`, `elevenlabs`, `retell`) |
32
32
  | `--provider-api-key` | | External provider API key (required with `--provider`) |
33
33
  | `--provider-import-id` | | Provider assistant ID to import (required with `--provider`) |
34
+ | `--share-key` | | Vapi share key for comparison mode (prompted if missing) |
35
+ | `--branch-id` | | ElevenLabs branch ID for comparison mode (prompted if missing) |
34
36
  | `--compare` | `true` | Run both provider direct and Telnyx import benchmarks |
35
37
  | `--no-compare` | | Disable comparison (run only Telnyx import) |
36
38
  | `-d, --debug` | `false` | Enable detailed timeout diagnostics |
@@ -190,20 +192,45 @@ When importing from an external provider, the tool automatically runs both bench
190
192
  1. **Provider Direct** - Benchmarks the assistant on the original provider's widget
191
193
  2. **Telnyx Import** - Benchmarks the same assistant after importing to Telnyx
192
194
 
195
+ ### Provider-Specific Keys
196
+
197
+ Comparison mode requires a provider-specific key to load the provider's direct widget. If not passed via CLI, the tool will prompt you with instructions on how to find it.
198
+
199
+ | Provider | Flag | How to find it |
200
+ |----------|------|----------------|
201
+ | Vapi | `--share-key` | In the Vapi Dashboard, select your assistant, then click the link icon (🔗) next to the assistant ID at the top. This copies the demo link containing your share key. |
202
+ | ElevenLabs | `--branch-id` | In the ElevenLabs Dashboard, go to Agents, select your target agent, then click the dropdown next to Publish and select "Copy shareable link". This copies the demo link containing your branch ID. |
203
+
193
204
  ### Import and Compare (Default)
194
205
 
206
+ **Vapi:**
207
+
195
208
  ```bash
196
209
  npx @telnyx/voice-agent-tester@latest \
197
210
  -a applications/telnyx.yaml \
198
211
  -s scenarios/appointment.yaml \
199
212
  --provider vapi \
213
+ --share-key <VAPI_SHARE_KEY> \
200
214
  --api-key <TELNYX_KEY> \
201
215
  --provider-api-key <VAPI_KEY> \
202
216
  --provider-import-id <VAPI_ASSISTANT_ID>
203
217
  ```
204
218
 
219
+ **ElevenLabs:**
220
+
221
+ ```bash
222
+ npx @telnyx/voice-agent-tester@latest \
223
+ -a applications/telnyx.yaml \
224
+ -s scenarios/appointment.yaml \
225
+ --provider elevenlabs \
226
+ --branch-id <ELEVENLABS_BRANCH_ID> \
227
+ --api-key <TELNYX_KEY> \
228
+ --provider-api-key <ELEVENLABS_KEY> \
229
+ --provider-import-id <ELEVENLABS_AGENT_ID>
230
+ ```
231
+
205
232
  This will:
206
- - Run Phase 1: VAPI direct benchmark
233
+ - Run Phase 1: Provider direct benchmark
207
234
  - Run Phase 2: Telnyx import benchmark
208
235
  - Generate a side-by-side latency comparison report
209
236
 
@@ -4,7 +4,8 @@ steps:
4
4
  selector: "telnyx-ai-agent"
5
5
  - action: sleep
6
6
  time: 3000
7
- - action: click
7
+ - action: click_with_retry
8
8
  selector: "telnyx-ai-agent >>> button"
9
- - action: sleep
10
- time: 4000
9
+ retries: 5
10
+ checkDelay: 4000
11
+ retryDelay: 5000
@@ -13,7 +13,3 @@ steps:
13
13
  time: 2000
14
14
  - action: speak
15
15
  text: "Hello, what can you do?"
16
- - action: wait_for_voice
17
- metrics: elapsed_time
18
- - action: wait_for_silence
19
- metrics: elapsed_time
@@ -62,20 +62,24 @@ function createControlledMediaStream() {
62
62
  }
63
63
 
64
64
  // Replace getUserMedia to return our controlled stream
65
- const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
66
- navigator.mediaDevices.getUserMedia = function (constraints) {
67
- console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
68
-
69
- // If audio is requested, return our controlled stream
70
- if (constraints && constraints.audio) {
71
- console.log("🎤 Returning controlled MediaStream instead of real microphone");
72
- const controlledStream = createControlledMediaStream();
73
- return Promise.resolve(controlledStream);
74
- }
65
+ if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
66
+ const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
67
+ navigator.mediaDevices.getUserMedia = function (constraints) {
68
+ console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
69
+
70
+ // If audio is requested, return our controlled stream
71
+ if (constraints && constraints.audio) {
72
+ console.log("🎤 Returning controlled MediaStream instead of real microphone");
73
+ const controlledStream = createControlledMediaStream();
74
+ return Promise.resolve(controlledStream);
75
+ }
75
76
 
76
- // For video-only or other requests, use original implementation
77
- return originalGetUserMedia(constraints);
78
- };
77
+ // For video-only or other requests, use original implementation
78
+ return originalGetUserMedia(constraints);
79
+ };
80
+ } else {
81
+ console.warn("🎤 navigator.mediaDevices.getUserMedia not available, skipping microphone intercept");
82
+ }
79
83
 
80
84
  // Expose __speak method to be called from voice-agent-tester.js
81
85
  window.__speak = function (textOrUrl) {
@@ -152,6 +156,24 @@ function playAudioInMediaStream(url) {
152
156
  const audio = new Audio(url);
153
157
  audio.crossOrigin = 'anonymous'; // Enable CORS if needed
154
158
 
159
+ // Keep a strong reference so the element is not garbage collected
160
+ currentSpeakAudio = audio;
161
+
162
+ let speechEndFired = false;
163
+ let safetyTimeoutId = null;
164
+
165
+ function fireSpeechEnd(reason) {
166
+ if (speechEndFired) return;
167
+ speechEndFired = true;
168
+ if (safetyTimeoutId) clearTimeout(safetyTimeoutId);
169
+ console.log(`🎤 Audio playback ended (${reason})`);
170
+ if (typeof __publishEvent === 'function') {
171
+ __publishEvent('speechend', { url: url, reason: reason });
172
+ }
173
+ // Release reference
174
+ if (currentSpeakAudio === audio) currentSpeakAudio = null;
175
+ }
176
+
155
177
  // Set up audio routing through all MediaStreams
156
178
  audio.addEventListener('canplaythrough', function () {
157
179
  console.log(`🎤 Audio ready to play, routing to ${mediaStreams.length} MediaStreams`);
@@ -181,7 +203,33 @@ function playAudioInMediaStream(url) {
181
203
  }
182
204
 
183
205
  // Play the audio
184
- audio.play();
206
+ audio.play().then(() => {
207
+ // Set up safety timeout based on audio duration
208
+ // audio.duration should be available after canplaythrough
209
+ const duration = audio.duration;
210
+ if (duration && isFinite(duration)) {
211
+ const safetyMs = Math.max((duration * 1000) + 5000, 15000);
212
+ console.log(`🎤 Audio duration: ${duration.toFixed(1)}s, safety timeout: ${(safetyMs / 1000).toFixed(1)}s`);
213
+ safetyTimeoutId = setTimeout(() => {
214
+ if (!speechEndFired) {
215
+ console.warn(`🎤 Safety timeout: speechend not fired after ${(safetyMs / 1000).toFixed(1)}s (audio paused=${audio.paused}, ended=${audio.ended}, currentTime=${audio.currentTime.toFixed(1)})`);
216
+ fireSpeechEnd('safety_timeout');
217
+ }
218
+ }, safetyMs);
219
+ } else {
220
+ // Unknown duration — use 20s fallback
221
+ console.warn('🎤 Audio duration unknown, using 20s safety timeout');
222
+ safetyTimeoutId = setTimeout(() => {
223
+ if (!speechEndFired) {
224
+ console.warn('🎤 Safety timeout: speechend not fired after 20s');
225
+ fireSpeechEnd('safety_timeout');
226
+ }
227
+ }, 20000);
228
+ }
229
+ }).catch(error => {
230
+ console.error('Error playing audio:', error);
231
+ fireSpeechEnd('play_error');
232
+ });
185
233
  } catch (error) {
186
234
  console.error('Error setting up audio source:', error);
187
235
  if (typeof __publishEvent === 'function') {
@@ -190,11 +238,19 @@ function playAudioInMediaStream(url) {
190
238
  }
191
239
  });
192
240
 
193
- // Handle audio end
241
+ // Handle audio end — primary path
194
242
  audio.addEventListener('ended', function () {
195
- console.log('🎤 Audio playback ended');
196
- if (typeof __publishEvent === 'function') {
197
- __publishEvent('speechend', { url: url });
243
+ fireSpeechEnd('ended');
244
+ });
245
+
246
+ // Handle pause — if something pauses the audio externally
247
+ audio.addEventListener('pause', function () {
248
+ // Only treat as speechend if the audio is past 90% of its duration (near end)
249
+ // or if it was paused externally (not by us)
250
+ if (audio.ended || (audio.duration && audio.currentTime >= audio.duration * 0.9)) {
251
+ fireSpeechEnd('pause_near_end');
252
+ } else {
253
+ console.warn(`🎤 Audio paused at ${audio.currentTime.toFixed(1)}s / ${(audio.duration || 0).toFixed(1)}s`);
198
254
  }
199
255
  });
200
256
 
@@ -204,17 +260,31 @@ function playAudioInMediaStream(url) {
204
260
  if (typeof __publishEvent === 'function') {
205
261
  __publishEvent('speecherror', { error: 'Audio playback failed', url: url });
206
262
  }
263
+ fireSpeechEnd('error');
207
264
  });
208
265
 
209
266
  // Start loading the audio
210
267
  audio.load();
211
268
  }
212
269
 
270
+ // Keep a reference to the current speak Audio element so it doesn't get GC'd
271
+ let currentSpeakAudio = null;
272
+
213
273
  // Helper function to stop current audio and reset to silence
214
274
  function stopCurrentAudio() {
275
+ // Stop the speak audio element if playing
276
+ if (currentSpeakAudio) {
277
+ try {
278
+ currentSpeakAudio.pause();
279
+ currentSpeakAudio.currentTime = 0;
280
+ } catch (e) {
281
+ console.warn('Error stopping speak audio:', e);
282
+ }
283
+ currentSpeakAudio = null;
284
+ }
285
+
215
286
  currentPlaybackNodes.forEach((sourceNode, index) => {
216
287
  try {
217
- sourceNode.stop();
218
288
  sourceNode.disconnect();
219
289
  console.log(`🎤 Stopped audio source ${index}`);
220
290
  } catch (e) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@telnyx/voice-agent-tester",
3
- "version": "0.4.1",
3
+ "version": "0.4.4",
4
4
  "description": "A command-line tool to test voice agents using Puppeteer",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/index.js CHANGED
@@ -100,12 +100,22 @@ function getCompareRequiredParams(argv) {
100
100
  switch (argv.provider) {
101
101
  case 'vapi':
102
102
  if (!argv.shareKey) {
103
- missing.push({ key: 'shareKey', flag: '--share-key', description: 'Vapi share key' });
103
+ missing.push({
104
+ key: 'shareKey',
105
+ flag: '--share-key',
106
+ description: 'Vapi share key',
107
+ hint: 'In the Vapi Dashboard, select your assistant, then click the link icon (🔗) next to the assistant ID at the top. This copies the demo link containing your share key.'
108
+ });
104
109
  }
105
110
  break;
106
111
  case 'elevenlabs':
107
112
  if (!argv.branchId) {
108
- missing.push({ key: 'branchId', flag: '--branch-id', description: 'ElevenLabs branch ID' });
113
+ missing.push({
114
+ key: 'branchId',
115
+ flag: '--branch-id',
116
+ description: 'ElevenLabs branch ID',
117
+ hint: 'In the ElevenLabs Dashboard, go to Agents, select your target agent, then click the dropdown next to Publish and select "Copy shareable link". This copies the demo link containing your branch ID.'
118
+ });
109
119
  }
110
120
  break;
111
121
  // retell and others: no extra params needed yet
@@ -317,6 +327,11 @@ const argv = yargs(hideBin(process.argv))
317
327
  description: 'Volume level for audio input (0.0 to 1.0)',
318
328
  default: 1.0
319
329
  })
330
+ .option('retries', {
331
+ type: 'number',
332
+ description: 'Number of retries for failed test runs (0 = no retries)',
333
+ default: 0
334
+ })
320
335
  .help()
321
336
  .argv;
322
337
 
@@ -399,22 +414,49 @@ async function runBenchmark({ applications, scenarios, repeat, concurrency, argv
399
414
  audioVolume: argv.audioVolume
400
415
  });
401
416
 
402
- try {
403
- await tester.runScenario(targetUrl, app.steps, scenario.steps, app.name, scenario.name, repetition);
404
- console.log(`✅ Completed successfully (Run ${runNumber}/${totalRuns})`);
405
- return { success: true };
406
- } catch (error) {
407
- // Store only the first line for summary, but print full message here (with diagnostics)
408
- const shortMessage = error.message.split('\n')[0];
409
- const errorInfo = {
410
- app: app.name,
411
- scenario: scenario.name,
412
- repetition,
413
- error: shortMessage
414
- };
415
- // Print full diagnostics here (only place they appear)
416
- console.error(`❌ Error (Run ${runNumber}/${totalRuns}):\n${error.message}`);
417
- return { success: false, error: errorInfo };
417
+ const maxAttempts = (argv.retries || 0) + 1;
418
+
419
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
420
+ // Create a fresh tester for each attempt (after first, original tester is closed)
421
+ const currentTester = attempt === 1 ? tester : new VoiceAgentTester({
422
+ verbose: argv.verbose,
423
+ headless: argv.headless,
424
+ assetsServerUrl: argv.assetsServer,
425
+ reportGenerator: reportGenerator,
426
+ record: argv.record,
427
+ debug: argv.debug,
428
+ audioUrl: argv.audioUrl,
429
+ audioVolume: argv.audioVolume
430
+ });
431
+
432
+ try {
433
+ await currentTester.runScenario(targetUrl, app.steps, scenario.steps, app.name, scenario.name, repetition);
434
+ console.log(`✅ Completed successfully (Run ${runNumber}/${totalRuns})`);
435
+ return { success: true };
436
+ } catch (error) {
437
+ const shortMessage = error.message.split('\n')[0];
438
+
439
+ if (attempt < maxAttempts) {
440
+ console.warn(`\n⚠️ Attempt ${attempt}/${maxAttempts} failed: ${shortMessage}`);
441
+ console.warn(`🔄 Retrying in 3s... (${maxAttempts - attempt} retries left)\n`);
442
+ await new Promise(r => setTimeout(r, 3000));
443
+ continue;
444
+ }
445
+
446
+ // Final attempt failed
447
+ const errorInfo = {
448
+ app: app.name,
449
+ scenario: scenario.name,
450
+ repetition,
451
+ error: shortMessage
452
+ };
453
+ // Print full diagnostics here (only place they appear)
454
+ console.error(`❌ Error (Run ${runNumber}/${totalRuns}):\n${error.message}`);
455
+ if (maxAttempts > 1) {
456
+ console.error(` Failed after ${maxAttempts} attempts`);
457
+ }
458
+ return { success: false, error: errorInfo };
459
+ }
418
460
  }
419
461
  }
420
462
 
@@ -550,6 +592,9 @@ async function main() {
550
592
  if (missingParams.length > 0) {
551
593
  for (const param of missingParams) {
552
594
  console.log(`\n🔑 ${param.description} is required for comparison mode`);
595
+ if (param.hint) {
596
+ console.log(` ${param.hint}`);
597
+ }
553
598
  const inputVal = await promptUserInput(`Enter ${param.description} (or press Enter to skip comparison): `);
554
599
  if (inputVal) {
555
600
  argv[param.key] = inputVal;
@@ -238,6 +238,7 @@ export class VoiceAgentTester {
238
238
  } else {
239
239
  errorMessage += '\n (Could not collect browser diagnostics)';
240
240
  }
241
+
241
242
  }
242
243
 
243
244
  reject(new Error(errorMessage));
@@ -330,7 +331,8 @@ export class VoiceAgentTester {
330
331
  await this.page.exposeFunction('__publishEvent', (eventType, data) => {
331
332
  const event = { eventType, data, timestamp: Date.now() };
332
333
 
333
- console.log(`\t📢 Event received: ${eventType}`);
334
+ const elementSuffix = data && data.elementId ? ` (audio element: ${data.elementId})` : '';
335
+ console.log(`\t📢 ${eventType}${elementSuffix}`);
334
336
 
335
337
  // Check if there are any pending promises waiting for this event type
336
338
  const pendingPromises = this.pendingPromises.get(eventType);
@@ -362,6 +364,7 @@ export class VoiceAgentTester {
362
364
  console.error(error.stack);
363
365
  }
364
366
  });
367
+
365
368
  }
366
369
 
367
370
  async close() {
@@ -534,6 +537,9 @@ export class VoiceAgentTester {
534
537
  case 'screenshot':
535
538
  handlerResult = await this.handleScreenshot(step);
536
539
  break;
540
+ case 'click_with_retry':
541
+ handlerResult = await this.handleClickWithRetry(step);
542
+ break;
537
543
 
538
544
  default:
539
545
  console.log(`Unknown action: ${action}`);
@@ -576,10 +582,173 @@ export class VoiceAgentTester {
576
582
  await this.page.click(selector);
577
583
  }
578
584
 
585
+ async handleClickWithRetry(step) {
586
+ const selector = step.selector;
587
+ if (!selector) {
588
+ throw new Error('No selector specified for click_with_retry action');
589
+ }
590
+
591
+ const maxRetries = step.retries || 2;
592
+ const retryDelay = step.retryDelay || 3000;
593
+ const checkDelay = step.checkDelay || 4000;
594
+
595
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
596
+ let clicked = false;
597
+ try {
598
+ await this.page.waitForSelector(selector, { timeout: attempt === 1 ? 30000 : 5000 });
599
+ await this.page.click(selector);
600
+ clicked = true;
601
+ } catch {
602
+ // Selector not found — will check for widget config errors below
603
+ }
604
+
605
+ if (!clicked) {
606
+ // Check if the widget is showing a configuration error
607
+ const widgetState = await this._getWidgetErrorState(selector);
608
+
609
+ if (widgetState.isConfigError) {
610
+ // Widget is showing "unauthenticated web calls" or similar config error.
611
+ // This means the API config hasn't propagated to the widget yet.
612
+ if (attempt < maxRetries) {
613
+ console.log(`\t⚠️ Click attempt ${attempt}/${maxRetries}: widget not ready — "${widgetState.errorText}"`);
614
+ console.log(`\t⏳ Waiting for configuration to propagate (reloading in ${retryDelay}ms)...`);
615
+ await this.sleep(retryDelay);
616
+ await this.page.reload({ waitUntil: 'networkidle0', timeout: 30000 });
617
+ await this.sleep(2000); // extra time after reload
618
+ continue;
619
+ }
620
+ throw new Error(
621
+ `Widget configuration not ready after ${maxRetries} attempts: "${widgetState.errorText}"\n` +
622
+ `The "Supports Unauthenticated Web Calls" setting may not have propagated yet.\n` +
623
+ `Try running again in a few seconds, or verify the setting in the Telnyx portal.`
624
+ );
625
+ }
626
+
627
+ // Not a config error — genuinely missing selector
628
+ if (attempt < maxRetries) {
629
+ console.log(`\t⚠️ Click attempt ${attempt}/${maxRetries}: selector not found, retrying in ${retryDelay}ms...`);
630
+ await this.sleep(retryDelay);
631
+ continue;
632
+ }
633
+ throw new Error(`Selector "${selector}" not found after ${maxRetries} attempts`);
634
+ }
635
+
636
+ console.log(`\t🖱️ Click attempt ${attempt}/${maxRetries}`);
637
+
638
+ // Wait for connection to establish
639
+ await this.sleep(checkDelay);
640
+
641
+ // Check if audio elements are monitored or WebRTC connections exist
642
+ const status = await this._checkConnectionStatus();
643
+
644
+ if (status.isConnected) {
645
+ console.log(`\t✅ Connection established (monitored: ${status.monitoredElements}, rtc: ${status.rtcConnections})`);
646
+ return;
647
+ }
648
+
649
+ if (attempt < maxRetries) {
650
+ console.log(`\t⚠️ No connection detected (monitored: ${status.monitoredElements}, rtc: ${status.rtcConnections}), retrying in ${retryDelay}ms...`);
651
+ await this.sleep(retryDelay);
652
+ } else {
653
+ console.log(`\t⚠️ No connection detected after ${maxRetries} attempts, proceeding anyway`);
654
+ }
655
+ }
656
+ }
657
+
658
+ /**
659
+ * Check if a widget is showing a configuration error (e.g., "unauthenticated web calls" not enabled).
660
+ * Inspects the shadow DOM for error indicators.
661
+ */
662
+ async _getWidgetErrorState(selector) {
663
+ const parts = selector.split('>>>').map(s => s.trim());
664
+ const hostSelector = parts[0];
665
+
666
+ return await this.page.evaluate((host) => {
667
+ const el = document.querySelector(host);
668
+ if (!el || !el.shadowRoot) return { isConfigError: false };
669
+
670
+ const text = el.shadowRoot.textContent || '';
671
+
672
+ // Check for known configuration error messages
673
+ const configErrors = [
674
+ 'unauthenticated web calls',
675
+ 'support unauthenticated',
676
+ 'not configured',
677
+ 'configuration required'
678
+ ];
679
+
680
+ const lowerText = text.toLowerCase();
681
+ for (const pattern of configErrors) {
682
+ if (lowerText.includes(pattern)) {
683
+ // Extract a readable error message
684
+ const errorText = text.trim().replace(/\s+/g, ' ').substring(0, 200);
685
+ return { isConfigError: true, errorText };
686
+ }
687
+ }
688
+
689
+ return { isConfigError: false };
690
+ }, hostSelector);
691
+ }
692
+
693
+ async _checkConnectionStatus() {
694
+ const status = await this.page.evaluate(() => {
695
+ const info = { monitoredElements: 0, hasActiveConnection: false };
696
+
697
+ if (window.audioMonitor && window.audioMonitor.monitoredElements) {
698
+ info.monitoredElements = window.audioMonitor.monitoredElements.size;
699
+ }
700
+
701
+ document.querySelectorAll('audio').forEach(el => {
702
+ if (el.srcObject) info.hasActiveConnection = true;
703
+ });
704
+
705
+ return info;
706
+ });
707
+
708
+ let rtcConnections = 0;
709
+ try {
710
+ const rtpStats = await this.page.evaluate(async () => {
711
+ if (typeof window.__getRtpStats === 'function') {
712
+ return await window.__getRtpStats();
713
+ }
714
+ return null;
715
+ });
716
+ if (rtpStats) rtcConnections = rtpStats.connectionCount || 0;
717
+ } catch {
718
+ // Ignore RTP stats errors
719
+ }
720
+
721
+ return {
722
+ monitoredElements: status.monitoredElements,
723
+ rtcConnections,
724
+ isConnected: status.monitoredElements > 0 || status.hasActiveConnection || rtcConnections > 0
725
+ };
726
+ }
727
+
579
728
  async handleWaitForVoice() {
580
729
  if (this.debug) {
581
730
  console.log('\t⏳ Waiting for audio to start (AI agent response)...');
582
731
  }
732
+
733
+ // Check if audio is already playing before waiting for a new event.
734
+ // This handles the case where audiostart fired before we started listening
735
+ // (e.g., during click_with_retry or between steps).
736
+ const alreadyPlaying = await this.page.evaluate(() => {
737
+ if (window.audioMonitor && window.audioMonitor.monitoredElements) {
738
+ for (const [, data] of window.audioMonitor.monitoredElements) {
739
+ if (data.isPlaying) return true;
740
+ }
741
+ }
742
+ return false;
743
+ });
744
+
745
+ if (alreadyPlaying) {
746
+ if (this.debug) {
747
+ console.log('\t✅ Audio already playing');
748
+ }
749
+ return;
750
+ }
751
+
583
752
  await this.waitForAudioEvent('audiostart');
584
753
  if (this.debug) {
585
754
  console.log('\t✅ Audio detected');
@@ -590,6 +759,27 @@ export class VoiceAgentTester {
590
759
  if (this.debug) {
591
760
  console.log('\t⏳ Waiting for audio to stop (silence)...');
592
761
  }
762
+
763
+ // Check if all monitored elements are already silent.
764
+ // This handles the case where audiostop fired before we started listening.
765
+ const allSilent = await this.page.evaluate(() => {
766
+ if (window.audioMonitor && window.audioMonitor.monitoredElements) {
767
+ if (window.audioMonitor.monitoredElements.size === 0) return false; // no elements yet
768
+ for (const [, data] of window.audioMonitor.monitoredElements) {
769
+ if (data.isPlaying) return false;
770
+ }
771
+ return true; // all elements exist and are silent
772
+ }
773
+ return false;
774
+ });
775
+
776
+ if (allSilent) {
777
+ if (this.debug) {
778
+ console.log('\t✅ Already silent');
779
+ }
780
+ return;
781
+ }
782
+
593
783
  await this.waitForAudioEvent('audiostop');
594
784
  if (this.debug) {
595
785
  console.log('\t✅ Silence detected');
@@ -678,10 +868,40 @@ export class VoiceAgentTester {
678
868
 
679
869
  // Wait for speech to complete by listening for speechend event
680
870
  try {
681
- await this.waitForAudioEvent('speechend');
871
+ // Use a shorter timeout for speechend (15s) since we have safety fallback in browser
872
+ await this.waitForAudioEvent('speechend', 15000);
682
873
  } catch (error) {
683
- console.error('Timeout waiting for speech to complete:', error.message);
684
- throw error;
874
+ // speechend timeout is recoverable the audio likely finished but the event was lost
875
+ // (e.g., agent started responding and disrupted the audio element)
876
+ if (this.debug) {
877
+ // Check the state of the speak audio in the browser
878
+ const speakState = await this.page.evaluate(() => {
879
+ const info = {
880
+ currentSpeakAudio: null,
881
+ audioContextState: null,
882
+ };
883
+ try {
884
+ if (window.currentSpeakAudio) {
885
+ info.currentSpeakAudio = {
886
+ paused: window.currentSpeakAudio.paused,
887
+ ended: window.currentSpeakAudio.ended,
888
+ currentTime: window.currentSpeakAudio.currentTime,
889
+ duration: window.currentSpeakAudio.duration,
890
+ readyState: window.currentSpeakAudio.readyState,
891
+ };
892
+ }
893
+ if (window.globalAudioContext) {
894
+ info.audioContextState = window.globalAudioContext.state;
895
+ }
896
+ } catch (e) { /* ignore */ }
897
+ return info;
898
+ }).catch(() => null);
899
+
900
+ console.warn(`\t⚠️ speechend timeout (recovered) — speak audio state:`, JSON.stringify(speakState));
901
+ } else {
902
+ console.warn(`\t⚠️ speechend timeout — continuing (audio likely finished)`);
903
+ }
904
+ // Don't throw — treat speechend timeout as recoverable
685
905
  }
686
906
  }
687
907
 
@@ -44,8 +44,9 @@ describe('Integration Tests', () => {
44
44
  this.text = text;
45
45
  };
46
46
 
47
- // Mock __speak function that will be called by the tester
48
- // This needs to be in the page itself since evaluateOnNewDocument runs before navigation
47
+ // Mock __speak and __waitForMediaStream functions
48
+ // These override the injected audio hooks since inline scripts run after evaluateOnNewDocument
49
+ window.__waitForMediaStream = () => Promise.resolve();
49
50
  window.__speak = (text) => {
50
51
  document.getElementById('speech-output').textContent = text;
51
52
  // Signal speech end after a small delay to allow waitForAudioEvent to be set up
@@ -75,7 +76,7 @@ describe('Integration Tests', () => {
75
76
 
76
77
  // The scenario should complete without throwing errors
77
78
  expect(true).toBe(true);
78
- });
79
+ }, 15000);
79
80
 
80
81
  test('should handle scenario with wait step', async () => {
81
82
  const testPageContent = `