@telnyx/voice-agent-tester 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1033 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+ import { glob } from 'glob';
5
+ import puppeteer from 'puppeteer';
6
+ import { launch as launchWithStream, getStream, wss } from 'puppeteer-stream';
7
+ import { getInstalledBrowsers } from '@puppeteer/browsers';
8
+ import { transcribeAudio, evaluateTranscription, pcmToWav } from './transcription.js';
9
+
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = path.dirname(__filename);
12
+
13
+ export class VoiceAgentTester {
14
+ constructor(options = {}) {
15
+ this.verbose = options.verbose || false;
16
+ this.headless = options.headless || false;
17
+ this.debug = options.debug || false;
18
+ this.browser = null;
19
+ this.page = null;
20
+ this.pendingPromises = new Map(); // Map of eventType -> Array of {resolve, reject, timeoutId}
21
+ const defaultPort = process.env.HTTP_PORT || process.env.PORT || 3333;
22
+ this.assetsServerUrl = options.assetsServerUrl || `http://localhost:${defaultPort}`;
23
+ this.reportGenerator = options.reportGenerator || null;
24
+ this.record = options.record || false;
25
+ this.recordingStream = null;
26
+ this.recordingFile = null;
27
+ }
28
+
29
+ sleep(time) {
30
+ return new Promise(r => setTimeout(r, time));
31
+ }
32
+
33
+ waitForAudioEvent(eventType, timeout = 30000) {
34
+ return new Promise((resolve, reject) => {
35
+ let diagnosticIntervalId = null;
36
+
37
+ // Helper function to collect diagnostics (only used when debug is enabled)
38
+ const collectDiagnostics = async () => {
39
+ if (!this.debug) return null;
40
+
41
+ try {
42
+ if (this.page) {
43
+ // Collect audio diagnostics
44
+ const audioInfo = await this.page.evaluate(() => {
45
+ const info = {
46
+ audioMonitorAvailable: typeof window.audioMonitor !== 'undefined',
47
+ audioDiagnosticsAvailable: typeof window.__getAudioDiagnostics === 'function',
48
+ rtpStatsAvailable: typeof window.__getRtpStats === 'function',
49
+ monitoredElementsCount: 0,
50
+ monitoredElements: [],
51
+ mediaStreamsInfo: null,
52
+ audioContextState: null,
53
+ timestamp: Date.now()
54
+ };
55
+
56
+ // Use detailed diagnostics function if available
57
+ if (typeof window.__getAudioDiagnostics === 'function') {
58
+ const detailed = window.__getAudioDiagnostics();
59
+ info.monitoredElementsCount = detailed.monitoredElementsCount;
60
+ info.audioContextState = detailed.audioContextState;
61
+ info.monitoredElements = detailed.elements;
62
+ } else if (window.audioMonitor && window.audioMonitor.monitoredElements) {
63
+ // Fallback to basic info
64
+ info.monitoredElementsCount = window.audioMonitor.monitoredElements.size;
65
+ window.audioMonitor.monitoredElements.forEach((data, elementId) => {
66
+ info.monitoredElements.push({
67
+ elementId,
68
+ isPlaying: data.isPlaying,
69
+ lastAudioTime: data.lastAudioTime,
70
+ silenceThreshold: data.silenceThreshold,
71
+ isProgrammatic: data.isProgrammatic || false,
72
+ timeSinceLastAudio: data.lastAudioTime ? Date.now() - data.lastAudioTime : null
73
+ });
74
+ });
75
+ }
76
+
77
+ if (typeof window.__getMediaStreamInfo === 'function') {
78
+ info.mediaStreamsInfo = window.__getMediaStreamInfo();
79
+ }
80
+
81
+ return info;
82
+ });
83
+
84
+ // Collect RTP stats separately (async function in browser)
85
+ let rtpStats = null;
86
+ try {
87
+ rtpStats = await this.page.evaluate(async () => {
88
+ if (typeof window.__getRtpStats === 'function') {
89
+ return await window.__getRtpStats();
90
+ }
91
+ return null;
92
+ });
93
+ } catch (rtpError) {
94
+ // RTP stats collection failed, continue without them
95
+ }
96
+
97
+ return {
98
+ ...audioInfo,
99
+ rtpStats
100
+ };
101
+ }
102
+ } catch (diagError) {
103
+ console.error('Failed to collect diagnostics:', diagError.message);
104
+ }
105
+ return null;
106
+ };
107
+
108
+ // Start periodic diagnostic logging (every 10 seconds) - only when debug is enabled
109
+ const startTime = Date.now();
110
+ if (this.debug) {
111
+ diagnosticIntervalId = setInterval(async () => {
112
+ const elapsed = Math.round((Date.now() - startTime) / 1000);
113
+ const diagnostics = await collectDiagnostics();
114
+
115
+ if (diagnostics) {
116
+ const elementsInfo = diagnostics.monitoredElements.length > 0
117
+ ? diagnostics.monitoredElements.map(e =>
118
+ `${e.elementId}(playing=${e.isPlaying})`
119
+ ).join(', ')
120
+ : 'none';
121
+ console.log(`\t⏱️ Still waiting for '${eventType}'... [${elapsed}s elapsed, monitored: ${diagnostics.monitoredElementsCount}, elements: ${elementsInfo}]`);
122
+ } else {
123
+ console.log(`\t⏱️ Still waiting for '${eventType}'... [${elapsed}s elapsed]`);
124
+ }
125
+ }, 10000);
126
+ }
127
+
128
+ // Set up timeout
129
+ const timeoutId = setTimeout(async () => {
130
+ // Clear diagnostic interval
131
+ if (diagnosticIntervalId) {
132
+ clearInterval(diagnosticIntervalId);
133
+ }
134
+
135
+ // Remove this promise from pending list
136
+ const promises = this.pendingPromises.get(eventType) || [];
137
+ const index = promises.findIndex(p => p.resolve === resolve);
138
+ if (index !== -1) {
139
+ promises.splice(index, 1);
140
+ if (promises.length === 0) {
141
+ this.pendingPromises.delete(eventType);
142
+ }
143
+ }
144
+
145
+ // Build error message - detailed only when debug is enabled
146
+ let errorMessage = `Timeout waiting for '${eventType}' event after ${timeout}ms`;
147
+
148
+ if (this.debug) {
149
+ // Collect browser-side diagnostics before rejecting
150
+ const diagnostics = await collectDiagnostics();
151
+
152
+ if (diagnostics) {
153
+ errorMessage += '\n\n📊 Audio Monitor Diagnostics:';
154
+ errorMessage += `\n - Audio monitor available: ${diagnostics.audioMonitorAvailable}`;
155
+ if (diagnostics.audioContextState) {
156
+ errorMessage += `\n - AudioContext state: ${diagnostics.audioContextState}`;
157
+ }
158
+ errorMessage += `\n - Monitored elements count: ${diagnostics.monitoredElementsCount}`;
159
+
160
+ if (diagnostics.monitoredElements.length > 0) {
161
+ errorMessage += '\n - Monitored elements:';
162
+ for (const elem of diagnostics.monitoredElements) {
163
+ errorMessage += `\n • ${elem.elementId}:`;
164
+ errorMessage += `\n isPlaying=${elem.isPlaying}, isProgrammatic=${elem.isProgrammatic}`;
165
+
166
+ // Include audio level info if available
167
+ if (elem.currentAudioLevel !== undefined) {
168
+ errorMessage += `\n audioLevel=${elem.currentAudioLevel} (threshold=${elem.silenceThreshold})`;
169
+ if (elem.wouldTriggerAudioStart !== undefined) {
170
+ errorMessage += `, wouldTrigger=${elem.wouldTriggerAudioStart}`;
171
+ }
172
+ }
173
+
174
+ if (elem.timeSinceLastAudio !== null) {
175
+ errorMessage += `\n lastAudioAge=${elem.timeSinceLastAudio}ms`;
176
+ }
177
+ }
178
+ } else {
179
+ errorMessage += '\n ⚠️ No audio elements are being monitored. This may indicate:';
180
+ errorMessage += '\n • The page has not created an audio element yet';
181
+ errorMessage += '\n • The audio element does not have a valid srcObject/src';
182
+ errorMessage += '\n • The audio hooks were not properly injected';
183
+ }
184
+
185
+ if (diagnostics.mediaStreamsInfo) {
186
+ errorMessage += `\n - Media streams (input): ${diagnostics.mediaStreamsInfo.totalStreams} stream(s)`;
187
+ }
188
+
189
+ // Add RTP stats if available
190
+ if (diagnostics.rtpStats) {
191
+ errorMessage += '\n\n📡 WebRTC/RTP Stats:';
192
+ errorMessage += `\n - Active connections: ${diagnostics.rtpStats.connectionCount}`;
193
+
194
+ if (diagnostics.rtpStats.connections && diagnostics.rtpStats.connections.length > 0) {
195
+ diagnostics.rtpStats.connections.forEach((conn, idx) => {
196
+ errorMessage += `\n - Connection ${idx + 1}:`;
197
+ errorMessage += `\n state=${conn.connectionState}, ice=${conn.iceConnectionState}`;
198
+
199
+ if (conn.inboundAudio && conn.inboundAudio.length > 0) {
200
+ conn.inboundAudio.forEach((audio, audioIdx) => {
201
+ errorMessage += `\n Inbound Audio ${audioIdx + 1}:`;
202
+ errorMessage += `\n packets: received=${audio.packetsReceived}, lost=${audio.packetsLost}`;
203
+ errorMessage += `\n bytes: ${audio.bytesReceived}`;
204
+ if (audio.jitter !== undefined) {
205
+ errorMessage += `, jitter=${audio.jitter.toFixed(4)}s`;
206
+ }
207
+ if (audio.audioLevel !== undefined) {
208
+ errorMessage += `\n audioLevel=${audio.audioLevel.toFixed(4)}`;
209
+ }
210
+ if (audio.concealedSamples !== undefined) {
211
+ errorMessage += `\n concealed=${audio.concealedSamples}, silentConcealed=${audio.silentConcealedSamples}`;
212
+ }
213
+ });
214
+ } else {
215
+ errorMessage += '\n ⚠️ No inbound audio streams';
216
+ }
217
+
218
+ if (conn.outboundAudio && conn.outboundAudio.length > 0) {
219
+ conn.outboundAudio.forEach((audio, audioIdx) => {
220
+ errorMessage += `\n Outbound Audio ${audioIdx + 1}:`;
221
+ errorMessage += `\n packets: sent=${audio.packetsSent}, bytes=${audio.bytesSent}`;
222
+ });
223
+ }
224
+
225
+ if (conn.candidatePairs && conn.candidatePairs.length > 0) {
226
+ const pair = conn.candidatePairs[0];
227
+ if (pair.currentRoundTripTime !== undefined) {
228
+ errorMessage += `\n RTT: ${(pair.currentRoundTripTime * 1000).toFixed(1)}ms`;
229
+ }
230
+ }
231
+ });
232
+ } else if (diagnostics.rtpStats.connectionCount === 0) {
233
+ errorMessage += '\n ⚠️ No WebRTC connections established';
234
+ }
235
+ }
236
+ } else {
237
+ errorMessage += '\n (Could not collect browser diagnostics)';
238
+ }
239
+ }
240
+
241
+ reject(new Error(errorMessage));
242
+ }, timeout);
243
+
244
+ // Store reference to clear interval on resolve
245
+ const originalResolve = resolve;
246
+ const wrappedResolve = (value) => {
247
+ if (diagnosticIntervalId) {
248
+ clearInterval(diagnosticIntervalId);
249
+ }
250
+ originalResolve(value);
251
+ };
252
+
253
+ // Register this promise to be resolved when event arrives
254
+ if (!this.pendingPromises.has(eventType)) {
255
+ this.pendingPromises.set(eventType, []);
256
+ }
257
+ this.pendingPromises.get(eventType).push({ resolve: wrappedResolve, reject, timeoutId });
258
+ });
259
+ }
260
+
261
+ clearAudioEventQueue() {
262
+ // Also clear any pending promises and reject them
263
+ for (const [eventType, promises] of this.pendingPromises.entries()) {
264
+ for (const { reject, timeoutId } of promises) {
265
+ clearTimeout(timeoutId);
266
+ reject(new Error(`Event queue cleared while waiting for ${eventType}`));
267
+ }
268
+ }
269
+ this.pendingPromises.clear();
270
+ }
271
+
272
+ async launch(url) {
273
+ if (this.browser) {
274
+ return;
275
+ }
276
+
277
+ // Log installed browsers
278
+ const homeDir = process.env.HOME || process.env.USERPROFILE;
279
+ const cacheDir = path.join(homeDir, '.cache', 'puppeteer');
280
+ const browsers = await getInstalledBrowsers({ cacheDir });
281
+ console.log(`Installed browsers: ${browsers.map(b => b.browser + ' ' + b.buildId).join(', ')}`);
282
+
283
+ const launchOptions = {
284
+ headless: this.headless,
285
+ args: [
286
+ '--no-sandbox',
287
+ '--disable-setuid-sandbox',
288
+ // This is not compatible with puppeteer-stream
289
+ // Use context.overridePermissions instead
290
+ // '--use-fake-ui-for-media-stream',
291
+ '--autoplay-policy=no-user-gesture-required',
292
+ '--disable-web-security',
293
+ '--allow-running-insecure-content',
294
+ '--no-first-run',
295
+ '--no-default-browser-check',
296
+ '--allowlisted-extension-id=jjndjgheafjngoipoacpjgeicjeomjli' // puppeteer-stream extension id
297
+ ]
298
+ };
299
+
300
+ // Use puppeteer-stream launch when recording is enabled
301
+ if (this.record) {
302
+ this.browser = await launchWithStream({
303
+ ...launchOptions,
304
+ headless: launchOptions.headless ? 'new' : launchOptions.headless,
305
+ executablePath: browsers
306
+ .filter(b => b.browser === 'chrome')
307
+ .sort((a, b) => (a.buildId < b.buildId ? 1 : a.buildId > b.buildId ? -1 : 0))
308
+ .at(0).executablePath
309
+ });
310
+ } else {
311
+ this.browser = await puppeteer.launch(launchOptions);
312
+ }
313
+
314
+ // Log browser info
315
+ const browserVersion = await this.browser.version();
316
+ console.log(`Browser launched: ${browserVersion}`);
317
+
318
+ // Override permissions for media stream (only for http/https URLs, not data: or file: URLs)
319
+ if (url && (url.startsWith('http://') || url.startsWith('https://'))) {
320
+ const context = this.browser.defaultBrowserContext();
321
+ await context.clearPermissionOverrides();
322
+ await context.overridePermissions(url, ['camera', 'microphone']);
323
+ }
324
+
325
+ this.page = await this.browser.newPage();
326
+
327
+ // Register __publishEvent function for browser to call back to Node.js
328
+ await this.page.exposeFunction('__publishEvent', (eventType, data) => {
329
+ const event = { eventType, data, timestamp: Date.now() };
330
+
331
+ console.log(`\t📢 Event received: ${eventType}`);
332
+
333
+ // Check if there are any pending promises waiting for this event type
334
+ const pendingPromises = this.pendingPromises.get(eventType);
335
+ if (pendingPromises && pendingPromises.length > 0) {
336
+ // Resolve the first pending promise immediately
337
+ const { resolve, timeoutId } = pendingPromises.shift();
338
+ clearTimeout(timeoutId);
339
+
340
+ // Clean up empty arrays
341
+ if (pendingPromises.length === 0) {
342
+ this.pendingPromises.delete(eventType);
343
+ }
344
+
345
+ resolve(event);
346
+ }
347
+ });
348
+
349
+ // Enable console logging if verbose mode is enabled
350
+ if (this.verbose) {
351
+ this.page.on('console', (msg) => {
352
+ console.log(`[BROWSER] ${msg.text()}`);
353
+ });
354
+ }
355
+
356
+ // Always listen for page errors
357
+ this.page.on('pageerror', (error) => {
358
+ console.error(`[PAGE ERROR] ${error.message}`);
359
+ if (this.verbose) {
360
+ console.error(error.stack);
361
+ }
362
+ });
363
+ }
364
+
365
+ async close() {
366
+ if (this.browser) {
367
+ // Stop recording if active
368
+ if (this.recordingStream) {
369
+ await this.stopRecording();
370
+ }
371
+
372
+ // Clear any pending promises before closing
373
+ for (const [eventType, promises] of this.pendingPromises.entries()) {
374
+ for (const { reject, timeoutId } of promises) {
375
+ clearTimeout(timeoutId);
376
+ reject(new Error(`Browser closed while waiting for ${eventType}`));
377
+ }
378
+ }
379
+ this.pendingPromises.clear();
380
+
381
+ await this.browser.close();
382
+ this.browser = null;
383
+ this.page = null;
384
+
385
+ // Close the websocket server if recording was used
386
+ if (this.record) {
387
+ try {
388
+ (await wss).close();
389
+ } catch (error) {
390
+ // Ignore errors when closing wss
391
+ }
392
+ }
393
+ }
394
+ }
395
+
396
+ async startRecording(appName, scenarioName, repetition) {
397
+ if (!this.record || !this.page) {
398
+ return;
399
+ }
400
+
401
+ // Ensure output directory exists
402
+ const outputDir = path.join(__dirname, '..', 'output');
403
+ if (!fs.existsSync(outputDir)) {
404
+ fs.mkdirSync(outputDir, { recursive: true });
405
+ }
406
+
407
+ // Create filename with timestamp and test info
408
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
409
+ const sanitizedAppName = appName.replace(/[^a-zA-Z0-9]/g, '_');
410
+ const sanitizedScenarioName = scenarioName.replace(/[^a-zA-Z0-9]/g, '_');
411
+ const filename = `recording_${sanitizedAppName}_${sanitizedScenarioName}_${repetition}_${timestamp}.webm`;
412
+ const filePath = path.join(outputDir, filename);
413
+
414
+ // Create write stream for the recording
415
+ this.recordingFile = fs.createWriteStream(filePath);
416
+
417
+ console.log('Starting stream');
418
+ // Start the stream with audio and video
419
+ this.recordingStream = await getStream(this.page, {
420
+ audio: true,
421
+ video: true,
422
+ mimeType: 'video/webm;codecs=vp8,opus'
423
+ });
424
+
425
+ console.log('Stream started');
426
+
427
+ // Pipe the stream to the file
428
+ this.recordingStream.pipe(this.recordingFile);
429
+
430
+ console.log(`🎥 Recording started: ${filename}`);
431
+ this.recordingFilePath = filePath;
432
+ }
433
+
434
+ async stopRecording() {
435
+ if (!this.recordingStream) {
436
+ return;
437
+ }
438
+
439
+ return new Promise((resolve) => {
440
+ // Destroy the stream to stop recording
441
+ this.recordingStream.destroy();
442
+
443
+ // Close the file stream
444
+ this.recordingFile.on('close', () => {
445
+ console.log(`🎥 Recording saved: ${this.recordingFilePath}`);
446
+ this.recordingStream = null;
447
+ this.recordingFile = null;
448
+ this.recordingFilePath = null;
449
+ resolve();
450
+ });
451
+
452
+ this.recordingFile.close();
453
+ });
454
+ }
455
+
456
+ async injectJavaScriptFiles() {
457
+ if (!this.page) {
458
+ throw new Error('Browser not launched. Call launch() first.');
459
+ }
460
+
461
+ // Set the assets server URL in the page context for every navigation
462
+ await this.page.evaluateOnNewDocument((url) => {
463
+ window.__assetsServerUrl = url;
464
+ }, this.assetsServerUrl);
465
+
466
+ const jsFolder = path.join(__dirname, '..', 'javascript');
467
+
468
+ if (!fs.existsSync(jsFolder)) {
469
+ console.log('JavaScript folder not found, skipping injection');
470
+ return;
471
+ }
472
+
473
+ const jsFiles = await glob(path.join(jsFolder, '*.js'));
474
+
475
+ for (const jsFile of jsFiles) {
476
+ try {
477
+ const content = fs.readFileSync(jsFile, 'utf8');
478
+ await this.page.evaluateOnNewDocument(content);
479
+ if (this.verbose) {
480
+ console.log(`Configured injection on navigation: ${path.basename(jsFile)}`);
481
+ }
482
+ } catch (error) {
483
+ console.error(`Error configuring injection for ${jsFile}:`, error.message);
484
+ }
485
+ }
486
+ }
487
+
488
+ async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1) {
489
+ if (!this.page) {
490
+ throw new Error('Browser not launched. Call launch() first.');
491
+ }
492
+
493
+ const action = step.action;
494
+ const beginTime = Date.now();
495
+
496
+ try {
497
+ let handlerResult;
498
+ switch (action) {
499
+ case 'click':
500
+ handlerResult = await this.handleClick(step);
501
+ break;
502
+ case 'wait_for_voice':
503
+ handlerResult = await this.handleWaitForVoice();
504
+ break;
505
+ case 'wait_for_silence':
506
+ handlerResult = await this.handleWaitForSilence();
507
+ break;
508
+ case 'wait':
509
+ handlerResult = await this.handleWait(step);
510
+ break;
511
+ case 'speak':
512
+ handlerResult = await this.handleSpeak(step);
513
+ break;
514
+ case 'listen':
515
+ handlerResult = await this.handleListen(step);
516
+ break;
517
+ case 'sleep':
518
+ handlerResult = await this.handleSleep(step);
519
+ break;
520
+ case 'wait_for_element':
521
+ handlerResult = await this.handleWaitForElement(step);
522
+ break;
523
+ case 'type':
524
+ handlerResult = await this.handleType(step);
525
+ break;
526
+ case 'fill':
527
+ handlerResult = await this.handleFill(step);
528
+ break;
529
+ case 'select':
530
+ handlerResult = await this.handleSelect(step);
531
+ break;
532
+ case 'screenshot':
533
+ handlerResult = await this.handleScreenshot(step);
534
+ break;
535
+
536
+ default:
537
+ console.log(`Unknown action: ${action}`);
538
+ }
539
+
540
+ // Record elapsed time for all steps
541
+ const elapsedTimeMs = Date.now() - beginTime;
542
+ const elapsedTimeSec = elapsedTimeMs / 1000;
543
+ console.log(`\tElapsed time: ${elapsedTimeSec.toFixed(3)} seconds`);
544
+
545
+ // Record metrics for report if enabled and step has metrics attribute
546
+ if (this.reportGenerator && step.metrics) {
547
+ if (step.metrics.includes('elapsed_time')) {
548
+ this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs);
549
+ }
550
+ // Record any additional metrics returned by the handler
551
+ if (handlerResult && typeof handlerResult === 'object') {
552
+ for (const [metricName, metricValue] of Object.entries(handlerResult)) {
553
+ if (step.metrics.includes(metricName)) {
554
+ this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue);
555
+ }
556
+ }
557
+ }
558
+ }
559
+ } catch (error) {
560
+ // Only print the first line of error (before diagnostics) to avoid duplication
561
+ const shortMessage = error.message.split('\n')[0];
562
+ console.error(`Error executing step ${stepIndex + 1} (${action}): ${shortMessage}`);
563
+ throw error;
564
+ }
565
+ }
566
+
567
+ async handleClick(step) {
568
+ const selector = step.selector;
569
+ if (!selector) {
570
+ throw new Error('No selector specified for click action');
571
+ }
572
+
573
+ await this.page.waitForSelector(selector);
574
+ await this.page.click(selector);
575
+ }
576
+
577
+ async handleWaitForVoice() {
578
+ if (this.debug) {
579
+ console.log('\t⏳ Waiting for audio to start (AI agent response)...');
580
+ }
581
+ await this.waitForAudioEvent('audiostart');
582
+ if (this.debug) {
583
+ console.log('\t✅ Audio detected');
584
+ }
585
+ }
586
+
587
+ async handleWaitForSilence() {
588
+ if (this.debug) {
589
+ console.log('\t⏳ Waiting for audio to stop (silence)...');
590
+ }
591
+ await this.waitForAudioEvent('audiostop');
592
+ if (this.debug) {
593
+ console.log('\t✅ Silence detected');
594
+ }
595
+ }
596
+
597
+ async handleWait(step) {
598
+ const selector = step.selector;
599
+ if (!selector) {
600
+ throw new Error('No selector specified for wait action');
601
+ }
602
+
603
+ console.log(`Waiting for selector: ${selector}`);
604
+ await this.page.waitForSelector(selector);
605
+ }
606
+
607
+ async handleSpeak(step) {
608
+ const text = step.text;
609
+ const file = step.file
610
+
611
+ if (!text && !file) {
612
+ throw new Error('No text or file specified for speak action');
613
+ }
614
+
615
+ if (text && file) {
616
+ throw new Error('Cannot specify both text and file for speak action');
617
+ }
618
+
619
+ if (file) {
620
+ const assetsPath = path.join(__dirname, '..', 'assets');
621
+ const filePath = path.join(assetsPath, file);
622
+
623
+ if (!fs.existsSync(filePath)) {
624
+ throw new Error(`Audio file not found: ${file}`);
625
+ }
626
+
627
+ const fileBuffer = fs.readFileSync(filePath);
628
+ const base64 = fileBuffer.toString('base64');
629
+ const mimeType = file.endsWith('.wav') ? 'audio/wav' : 'audio/mpeg';
630
+ const fileUrl = `data:${mimeType};base64,${base64}`;
631
+
632
+ await this.page.evaluate(async (url) => {
633
+ if (typeof window.__waitForMediaStream === 'function') {
634
+ try {
635
+ await window.__waitForMediaStream();
636
+ } catch (e) {
637
+ console.error(e.message);
638
+ throw e;
639
+ }
640
+ }
641
+
642
+ console.log('Checking for __speakFromUrl function...');
643
+ console.log('typeof window.__speakFromUrl:', typeof window.__speakFromUrl);
644
+ console.log('typeof window.__speak:', typeof window.__speak);
645
+
646
+ if (typeof window.__speakFromUrl === 'function') {
647
+ console.log('Calling __speakFromUrl with:', url);
648
+ window.__speakFromUrl(url);
649
+ } else if (typeof window.__speak === 'function') {
650
+ console.log('__speakFromUrl not available, but __speak is available. Calling __speak with URL:', url);
651
+ window.__speak(url);
652
+ } else {
653
+ console.error('Neither __speakFromUrl nor __speak is available');
654
+ console.log('Available window properties:', Object.keys(window).filter(k => k.startsWith('__')));
655
+ throw new Error('__speakFromUrl method not available');
656
+ }
657
+ }, fileUrl);
658
+ } else {
659
+ await this.page.evaluate(async (textToSpeak) => {
660
+ if (typeof window.__waitForMediaStream === 'function') {
661
+ try {
662
+ await window.__waitForMediaStream();
663
+ } catch (e) {
664
+ console.error(e.message);
665
+ throw e;
666
+ }
667
+ }
668
+
669
+ if (typeof window.__speak === 'function') {
670
+ window.__speak(textToSpeak);
671
+ } else {
672
+ throw new Error('__speak method not available');
673
+ }
674
+ }, text);
675
+ }
676
+
677
+ // Wait for speech to complete by listening for speechend event
678
+ try {
679
+ await this.waitForAudioEvent('speechend');
680
+ } catch (error) {
681
+ console.error('Timeout waiting for speech to complete:', error.message);
682
+ throw error;
683
+ }
684
+ }
685
+
686
+ async handleListen(step) {
687
+ const evaluation = step.evaluation;
688
+ if (!evaluation) {
689
+ throw new Error('No evaluation prompt specified for listen action');
690
+ }
691
+
692
+ try {
693
+ // Start recording
694
+ await this.page.evaluate(() => {
695
+ if (typeof window.__startRecording === 'function') {
696
+ window.__startRecording();
697
+ } else {
698
+ throw new Error('__startRecording method not available');
699
+ }
700
+ });
701
+
702
+ await this.waitForAudioEvent('recordingstart');
703
+ await this.waitForAudioEvent('audiostart');
704
+ await this.waitForAudioEvent('audiostop');
705
+
706
+ // Stop recording
707
+ await this.page.evaluate(() => {
708
+ if (typeof window.__stopRecording === 'function') {
709
+ window.__stopRecording();
710
+ } else {
711
+ throw new Error('__stopRecording method not available');
712
+ }
713
+ });
714
+
715
+ // Wait for recording to complete and get the audio data
716
+ const recordingEvent = await this.waitForAudioEvent('recordingcomplete');
717
+
718
+ const audioMetadata = {
719
+ mimeType: recordingEvent.data.mimeType,
720
+ sampleRate: recordingEvent.data.sampleRate,
721
+ channels: recordingEvent.data.channels,
722
+ bitsPerSample: recordingEvent.data.bitsPerSample
723
+ };
724
+
725
+ const audioFilePath = await this.saveAudioAsWAV(recordingEvent.data.audioData, audioMetadata);
726
+ console.log(`\tAudio saved as: ${audioFilePath}`);
727
+
728
+ // Process the audio with OpenAI
729
+ const transcription = await transcribeAudio(audioFilePath);
730
+ console.log(`\tTranscription: ${transcription}`);
731
+
732
+ // Evaluate the transcription against the evaluation prompt
733
+ const evaluationResult = await evaluateTranscription(transcription, evaluation);
734
+ console.log(`\tEvaluation result: ${evaluationResult.score} "${evaluationResult.explanation}"`);
735
+
736
+ return {
737
+ score: evaluationResult.score,
738
+ }
739
+ } catch (error) {
740
+ console.error('Error in listen command:', error.message);
741
+ throw error;
742
+ }
743
+ }
744
+
745
+ async handleSleep(step) {
746
+ const time = step.time;
747
+ if (!time) {
748
+ throw new Error('No time specified for sleep action');
749
+ }
750
+
751
+ await this.sleep(time);
752
+ }
753
+
754
+ async handleWaitForElement(step) {
755
+ const selector = step.selector;
756
+ if (!selector) {
757
+ throw new Error('No selector specified for wait_for_element action');
758
+ }
759
+
760
+ await this.page.waitForSelector(selector);
761
+ }
762
+
763
+ async handleType(step) {
764
+ const selector = step.selector;
765
+ const text = step.text;
766
+
767
+ if (!selector) {
768
+ throw new Error('No selector specified for type action');
769
+ }
770
+
771
+ if (!text) {
772
+ throw new Error('No text specified for type action');
773
+ }
774
+
775
+ // Wait for the element to be available
776
+ await this.page.waitForSelector(selector);
777
+
778
+ // Focus the element and type the text
779
+ await this.page.focus(selector);
780
+ await this.page.type(selector, text);
781
+ }
782
+
783
+ async handleFill(step) {
784
+ const selector = step.selector;
785
+ const text = step.text;
786
+
787
+ if (!selector) {
788
+ throw new Error('No selector specified for fill action');
789
+ }
790
+
791
+ if (text === undefined) {
792
+ throw new Error('No text specified for fill action');
793
+ }
794
+
795
+ // Wait for the element to be available
796
+ await this.page.waitForSelector(selector);
797
+
798
+ // Use $eval for cleaner element manipulation
799
+ await this.page.$eval(selector, (el, value) => {
800
+ // Check if it's an input or textarea element
801
+ if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA') {
802
+ el.value = value;
803
+ // Trigger input event to notify any listeners
804
+ el.dispatchEvent(new Event('input', { bubbles: true }));
805
+ el.dispatchEvent(new Event('change', { bubbles: true }));
806
+ } else {
807
+ throw new Error(`Fill action can only be used on input or textarea elements, found: ${el.tagName}`);
808
+ }
809
+ }, text);
810
+ }
811
+
812
+ async handleSelect(step) {
813
+ const selector = step.selector;
814
+ const value = step.value;
815
+ const values = step.values;
816
+ const text = step.text;
817
+ const checked = step.checked;
818
+
819
+ if (!selector) {
820
+ throw new Error('No selector specified for select action');
821
+ }
822
+
823
+ // Wait for the element to be available
824
+ await this.page.waitForSelector(selector);
825
+
826
+ // Determine the element type and handle accordingly
827
+ const elementInfo = await this.page.$eval(selector, (el) => {
828
+ return {
829
+ tagName: el.tagName,
830
+ type: el.type || null,
831
+ multiple: el.multiple || false
832
+ };
833
+ });
834
+
835
+ switch (elementInfo.tagName) {
836
+ case 'SELECT':
837
+ await this.handleSelectDropdown(selector, value, values, text, elementInfo.multiple);
838
+ break;
839
+ case 'INPUT':
840
+ if (elementInfo.type === 'checkbox') {
841
+ await this.handleSelectCheckbox(selector, checked);
842
+ } else if (elementInfo.type === 'radio') {
843
+ await this.handleSelectRadio(selector);
844
+ } else {
845
+ throw new Error(`Select action not supported for input type: ${elementInfo.type}`);
846
+ }
847
+ break;
848
+ default:
849
+ // For custom dropdowns or clickable elements, try clicking
850
+ await this.handleSelectCustom(selector, text);
851
+ }
852
+ }
853
+
854
+ async handleSelectDropdown(selector, value, values, text, isMultiple) {
855
+ if (values && Array.isArray(values)) {
856
+ // Multiple values for multi-select
857
+ if (!isMultiple) {
858
+ throw new Error('Cannot select multiple values on a single-select dropdown');
859
+ }
860
+ await this.page.select(selector, ...values);
861
+ } else if (value !== undefined) {
862
+ // Single value selection
863
+ await this.page.select(selector, value);
864
+ } else if (text !== undefined) {
865
+ // Select by visible text when no value attribute
866
+ await this.page.$eval(selector, (selectEl, optionText) => {
867
+ const option = Array.from(selectEl.options).find(opt =>
868
+ opt.textContent.trim() === optionText.trim()
869
+ );
870
+ if (!option) {
871
+ throw new Error(`Option with text "${optionText}" not found`);
872
+ }
873
+ selectEl.value = option.value;
874
+ selectEl.dispatchEvent(new Event('change', { bubbles: true }));
875
+ selectEl.dispatchEvent(new Event('input', { bubbles: true }));
876
+ }, text);
877
+ } else {
878
+ throw new Error('No value, values, or text specified for select dropdown');
879
+ }
880
+ }
881
+
882
+ async handleSelectCheckbox(selector, checked) {
883
+ const currentState = await this.page.$eval(selector, el => el.checked);
884
+ const targetState = checked !== undefined ? checked : !currentState;
885
+
886
+ if (currentState !== targetState) {
887
+ await this.page.click(selector);
888
+ }
889
+ }
890
+
891
+ async handleSelectRadio(selector) {
892
+ // For radio buttons, always click to select
893
+ await this.page.click(selector);
894
+ }
895
+
896
+ async handleSelectCustom(selector, text) {
897
+ if (text !== undefined) {
898
+ // For custom dropdowns, try to find and click an option with matching text
899
+ await this.page.evaluate((parentSelector, optionText) => {
900
+ const parent = document.querySelector(parentSelector);
901
+ if (!parent) {
902
+ throw new Error(`Custom dropdown not found: ${parentSelector}`);
903
+ }
904
+
905
+ // Try different selectors for options
906
+ const possibleSelectors = ['[role="option"]', 'li', 'a', '.option', 'div'];
907
+ let option = null;
908
+
909
+ for (const sel of possibleSelectors) {
910
+ const options = parent.querySelectorAll(sel);
911
+ option = Array.from(options).find(opt =>
912
+ opt.textContent.trim() === optionText.trim()
913
+ );
914
+ if (option) break;
915
+ }
916
+
917
+ if (!option) {
918
+ throw new Error(`Option with text "${optionText}" not found in custom dropdown`);
919
+ }
920
+
921
+ option.click();
922
+ }, selector, text);
923
+ } else {
924
+ // If no text specified, just click the element itself
925
+ await this.page.click(selector);
926
+ }
927
+ }
928
+
929
+ async handleScreenshot(step) {
930
+ if (!this.page) {
931
+ throw new Error('Browser not launched. Call launch() first.');
932
+ }
933
+
934
+ const filename = step.filename || `screenshot_${Date.now()}.png`;
935
+ const outputDir = step.outputDir || path.join(__dirname, '..', 'output');
936
+
937
+ // Ensure output directory exists
938
+ if (!fs.existsSync(outputDir)) {
939
+ fs.mkdirSync(outputDir, { recursive: true });
940
+ }
941
+
942
+ const screenshotPath = path.join(outputDir, filename);
943
+
944
+ // Take screenshot with optional parameters
945
+ const screenshotOptions = {
946
+ path: screenshotPath,
947
+ };
948
+
949
+ await this.page.screenshot(screenshotOptions);
950
+
951
+ return screenshotPath;
952
+ }
953
+
954
+
955
+
956
+ async saveAudioAsWAV(base64Audio, audioMetadata) {
957
+ try {
958
+ // Convert base64 to buffer
959
+ const pcmBuffer = Buffer.from(base64Audio, 'base64');
960
+
961
+ // Convert PCM to WAV format
962
+ const wavBuffer = pcmToWav(pcmBuffer, audioMetadata.sampleRate, audioMetadata.channels, audioMetadata.bitsPerSample);
963
+
964
+ // Save to file
965
+ const outputDir = path.join(__dirname, '..', 'output');
966
+ if (!fs.existsSync(outputDir)) {
967
+ fs.mkdirSync(outputDir, { recursive: true });
968
+ }
969
+
970
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
971
+ const wavFilePath = path.join(outputDir, `recording_${timestamp}.wav`);
972
+ fs.writeFileSync(wavFilePath, wavBuffer);
973
+
974
+ return wavFilePath;
975
+ } catch (error) {
976
+ console.error('Error saving audio as WAV:', error);
977
+ throw new Error(`Failed to save WAV: ${error.message}`);
978
+ }
979
+ }
980
+
981
+ async runScenario(url, appSteps, scenarioSteps, appName = '', scenarioName = '', repetition = 1, backgroundFile = null) {
982
+ let success = true;
983
+ try {
984
+ // Start tracking this run with app and scenario names
985
+ if (this.reportGenerator) {
986
+ this.reportGenerator.beginRun(appName, scenarioName, repetition);
987
+ }
988
+
989
+ // Combine app steps and scenario steps
990
+ const steps = [...appSteps, ...scenarioSteps];
991
+
992
+ await this.launch(url);
993
+
994
+ // Inject JavaScript files before loading the page
995
+ await this.injectJavaScriptFiles();
996
+
997
+ await this.page.goto(url, { waitUntil: 'load' });
998
+
999
+ await this.page.waitForNetworkIdle({ timeout: 5000, concurrency: 2 });
1000
+
1001
+ // Small wait to ensure injected scripts are fully loaded
1002
+ await this.sleep(500);
1003
+
1004
+ // Start recording if enabled
1005
+ await this.startRecording(appName, scenarioName, repetition);
1006
+
1007
+ // Execute all configured steps
1008
+ for (let i = 0; i < steps.length; i++) {
1009
+ const step = steps[i];
1010
+ console.log(`Executing step ${i + 1}: ${JSON.stringify(step)}`);
1011
+ await this.executeStep(step, i, appName, scenarioName, repetition);
1012
+ }
1013
+
1014
+ // Keep the browser open for a bit after all steps
1015
+ await this.sleep(500);
1016
+
1017
+ } catch (error) {
1018
+ // Log the error but still finish the run for report generation
1019
+ success = false;
1020
+ // Only print the first line to avoid duplicating diagnostics
1021
+ const shortMessage = error.message.split('\n')[0];
1022
+ console.error(`Error during scenario execution: ${shortMessage}`);
1023
+ throw error;
1024
+ } finally {
1025
+ // Always finish the run for report generation, even if there was an error
1026
+ if (this.reportGenerator) {
1027
+ this.reportGenerator.endRun(appName, scenarioName, repetition, success);
1028
+ }
1029
+
1030
+ await this.close();
1031
+ }
1032
+ }
1033
+ }