@telnyx/voice-agent-tester 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.5](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.4...v0.4.5) (2026-03-16)
4
+
5
+ ### Bug Fixes
6
+
7
+ * add event-based fallback for audio monitoring (ElevenLabs support) ([#27](https://github.com/team-telnyx/voice-agent-tester/issues/27)) ([6051b5e](https://github.com/team-telnyx/voice-agent-tester/commit/6051b5e949376951f0fb046cffcc5a2a5c250e19))
8
+ * align comparison metrics by scenario step index, not absolute step number ([#23](https://github.com/team-telnyx/voice-agent-tester/issues/23)) ([e4c485b](https://github.com/team-telnyx/voice-agent-tester/commit/e4c485b6eae5e9a6d60f11745b46997a183fc180)), closes [#1](https://github.com/team-telnyx/voice-agent-tester/issues/1) [#2](https://github.com/team-telnyx/voice-agent-tester/issues/2)
9
+ * make ElevenLabs branch-id optional for comparison mode ([#24](https://github.com/team-telnyx/voice-agent-tester/issues/24)) ([3f1735a](https://github.com/team-telnyx/voice-agent-tester/commit/3f1735a6a02e6c1edc4b6e17a6be4087127bded8))
10
+ * single headline number in comparison, per-response in --debug ([#26](https://github.com/team-telnyx/voice-agent-tester/issues/26)) ([a482129](https://github.com/team-telnyx/voice-agent-tester/commit/a482129c1bfe49d28aca7dec8230d30e5b6d8f8a)), closes [#1](https://github.com/team-telnyx/voice-agent-tester/issues/1) [#2](https://github.com/team-telnyx/voice-agent-tester/issues/2)
11
+
12
+ ### Documentation
13
+
14
+ * restructure README with comparison mode front and center ([#25](https://github.com/team-telnyx/voice-agent-tester/issues/25)) ([f15cbcd](https://github.com/team-telnyx/voice-agent-tester/commit/f15cbcd8707cded8081d00b90accf09fd77be169))
15
+
16
+ ## [0.4.4](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.3...v0.4.4) (2026-03-11)
17
+
18
+ ### Features
19
+
20
+ * fix speechend race condition, add --retries flag ([#21](https://github.com/team-telnyx/voice-agent-tester/issues/21)) ([09e3b65](https://github.com/team-telnyx/voice-agent-tester/commit/09e3b6578face6c407d058991ab5495d9463e544))
21
+
22
+ ### Chores
23
+
24
+ * release v0.4.3 ([#20](https://github.com/team-telnyx/voice-agent-tester/issues/20)) ([bdeb87b](https://github.com/team-telnyx/voice-agent-tester/commit/bdeb87bed502919a9fed9950e69242b1c2aefcfc))
25
+
3
26
  ## [0.4.3](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.2...v0.4.3) (2026-03-11)
4
27
 
5
28
  ### Features
package/README.md CHANGED
@@ -3,160 +3,119 @@
3
3
  [![CI](https://github.com/team-telnyx/voice-agent-tester/actions/workflows/ci.yml/badge.svg)](https://github.com/team-telnyx/voice-agent-tester/actions/workflows/ci.yml)
4
4
  [![npm version](https://img.shields.io/npm/v/@telnyx/voice-agent-tester.svg)](https://www.npmjs.com/package/@telnyx/voice-agent-tester)
5
5
 
6
- A CLI tool for automated benchmarking and testing of voice AI agents. Supports Telnyx, ElevenLabs, Vapi, and Retell.
6
+ Automated benchmarking CLI for voice AI agents. Import your assistant from any provider, run identical test scenarios on both platforms, and get a side-by-side latency comparison.
7
7
 
8
- ## Quick Start
8
+ Supports **Telnyx**, **ElevenLabs**, **Vapi**, and **Retell**.
9
9
 
10
- Run directly with npx (no installation required):
10
+ ## Compare Your Voice Agent Against Telnyx
11
11
 
12
- ```bash
13
- npx @telnyx/voice-agent-tester@latest -a applications/telnyx.yaml -s scenarios/appointment.yaml --assistant-id <YOUR_ASSISTANT_ID>
14
- ```
12
+ The tool imports your assistant from an external provider into Telnyx, then runs the **same scenario** on both platforms and produces a head-to-head latency report:
15
13
 
16
- Or install globally:
17
-
18
- ```bash
19
- npm install -g @telnyx/voice-agent-tester
20
- voice-agent-tester -a applications/telnyx.yaml -s scenarios/appointment.yaml --assistant-id <YOUR_ASSISTANT_ID>
14
+ ```
15
+ 📈 Latency Comparison (elapsed_time):
16
+ --------------------------------------------------------------------------------
17
+ Metric vapi Telnyx Delta Winner
18
+ --------------------------------------------------------------------------------
19
+ Response #1 (wait_for_voice_elapsed_time) 2849ms 1552ms -1297ms (-45.5%) 🏆 Telnyx
20
+ Response #2 (wait_for_voice_elapsed_time) 3307ms 704ms -2603ms (-78.7%) 🏆 Telnyx
21
+ --------------------------------------------------------------------------------
22
+
23
+ 📊 Overall Summary:
24
+ Compared 2 matched response latencies
25
+ vapi total latency: 6156ms
26
+ Telnyx total latency: 2256ms
27
+ Difference: -3900ms (-63.3%)
28
+
29
+ 🏆 Result: Telnyx is faster overall
21
30
  ```
22
31
 
23
- ## CLI Options
24
-
25
- | Option | Default | Description |
26
- |--------|---------|-------------|
27
- | `-a, --applications` | required | Application config path(s) or folder |
28
- | `-s, --scenarios` | required | Scenario config path(s) or folder |
29
- | `--assistant-id` | | Telnyx or provider assistant ID |
30
- | `--api-key` | | Telnyx API key for authentication |
31
- | `--provider` | | Import from provider (`vapi`, `elevenlabs`, `retell`) |
32
- | `--provider-api-key` | | External provider API key (required with `--provider`) |
33
- | `--provider-import-id` | | Provider assistant ID to import (required with `--provider`) |
34
- | `--share-key` | | Vapi share key for comparison mode (prompted if missing) |
35
- | `--branch-id` | | ElevenLabs branch ID for comparison mode (prompted if missing) |
36
- | `--compare` | `true` | Run both provider direct and Telnyx import benchmarks |
37
- | `--no-compare` | | Disable comparison (run only Telnyx import) |
38
- | `-d, --debug` | `false` | Enable detailed timeout diagnostics |
39
- | `-v, --verbose` | `false` | Show browser console logs |
40
- | `--headless` | `true` | Run browser in headless mode |
41
- | `--repeat` | `1` | Number of repetitions per combination |
42
- | `-c, --concurrency` | `1` | Number of parallel tests |
43
- | `-r, --report` | | Generate CSV report to specified file |
44
- | `-p, --params` | | URL template params (e.g., `key=value,key2=value2`) |
45
- | `--application-tags` | | Filter applications by comma-separated tags |
46
- | `--scenario-tags` | | Filter scenarios by comma-separated tags |
47
- | `--assets-server` | `http://localhost:3333` | Assets server URL |
48
- | `--audio-url` | | URL to audio file to play as input during entire benchmark |
49
- | `--audio-volume` | `1.0` | Volume level for audio input (0.0 to 1.0) |
50
-
51
- ## Bundled Configs
52
-
53
- | Application Config | Provider |
54
- |-------------------|----------|
55
- | `applications/telnyx.yaml` | Telnyx AI Widget |
56
- | `applications/elevenlabs.yaml` | ElevenLabs |
57
- | `applications/vapi.yaml` | Vapi |
58
- | `applications/retell.yaml` | Retell |
59
- | `applications/livetok.yaml` | Livetok |
60
-
61
- Scenarios:
62
- - `scenarios/appointment.yaml` - Basic appointment booking test
63
- - `scenarios/appointment_with_noise.yaml` - Appointment with background noise (pre-mixed audio)
64
-
65
- ## Background Noise Testing
66
-
67
- Test voice agents' performance with ambient noise (e.g., crowd chatter, cafe environment). Background noise is pre-mixed into audio files to simulate real-world conditions where users speak to voice agents in noisy environments.
68
-
69
- ### Running with Background Noise
32
+ ### Vapi vs Telnyx
70
33
 
71
34
  ```bash
72
- # Telnyx with background noise
73
- npx @telnyx/voice-agent-tester@latest \
74
- -a applications/telnyx.yaml \
75
- -s scenarios/appointment_with_noise.yaml \
76
- --assistant-id <YOUR_ASSISTANT_ID>
77
-
78
- # Compare with no noise (same assistant)
79
35
  npx @telnyx/voice-agent-tester@latest \
80
36
  -a applications/telnyx.yaml \
81
37
  -s scenarios/appointment.yaml \
82
- --assistant-id <YOUR_ASSISTANT_ID>
38
+ --provider vapi \
39
+ --share-key <VAPI_SHARE_KEY> \
40
+ --api-key <TELNYX_API_KEY> \
41
+ --provider-api-key <VAPI_API_KEY> \
42
+ --provider-import-id <VAPI_ASSISTANT_ID>
43
+ ```
44
+
45
+ ### ElevenLabs vs Telnyx
83
46
 
84
- # Generate CSV report with metrics
47
+ ```bash
85
48
  npx @telnyx/voice-agent-tester@latest \
86
49
  -a applications/telnyx.yaml \
87
- -s scenarios/appointment_with_noise.yaml \
88
- --assistant-id <YOUR_ASSISTANT_ID> \
89
- -r output/noise_benchmark.csv
50
+ -s scenarios/appointment.yaml \
51
+ --provider elevenlabs \
52
+ --api-key <TELNYX_API_KEY> \
53
+ --provider-api-key <ELEVENLABS_API_KEY> \
54
+ --provider-import-id <ELEVENLABS_AGENT_ID>
90
55
  ```
91
56
 
92
- ### Custom Audio Input from URL
93
-
94
- Play any audio file from a URL as input throughout the entire benchmark run. The audio is sent to the voice agent as microphone input.
57
+ ### Retell vs Telnyx
95
58
 
96
59
  ```bash
97
- # Use custom audio input from URL
98
60
  npx @telnyx/voice-agent-tester@latest \
99
61
  -a applications/telnyx.yaml \
100
62
  -s scenarios/appointment.yaml \
101
- --assistant-id <YOUR_ASSISTANT_ID> \
102
- --audio-url "https://example.com/test-audio.mp3" \
103
- --audio-volume 0.8
63
+ --provider retell \
64
+ --api-key <TELNYX_API_KEY> \
65
+ --provider-api-key <RETELL_API_KEY> \
66
+ --provider-import-id <RETELL_AGENT_ID>
104
67
  ```
105
68
 
106
- This is useful for:
107
- - Testing with custom audio inputs
108
- - Using longer audio tracks that play throughout the benchmark
109
- - A/B testing different audio sources
69
+ ### How Comparison Works
110
70
 
111
- ### Bundled Audio Files
71
+ 1. **Import** The assistant is imported from the external provider into Telnyx
72
+ 2. **Phase 1: Provider Direct** — Runs the scenario on the provider's native widget
73
+ 3. **Phase 2: Telnyx Import** — Runs the same scenario on the Telnyx-imported assistant
74
+ 4. **Report** — Produces a side-by-side comparison with latency delta and winner per response
112
75
 
113
- | File | Description |
114
- |------|-------------|
115
- | `hello_make_an_appointment.mp3` | Clean appointment request |
116
- | `hello_make_an_appointment_with_noise.mp3` | Appointment request with crowd noise |
117
- | `appointment_data.mp3` | Clean appointment details |
118
- | `appointment_data_with_noise.mp3` | Appointment details with crowd noise |
76
+ ### Provider-Specific Keys
119
77
 
120
- ### Scenario Configuration
78
+ Some providers need an extra key to load their demo widget. If not passed via CLI, the tool prompts with instructions.
121
79
 
122
- The noise scenario uses pre-mixed audio files:
80
+ | Provider | Flag | Required? | How to find it |
81
+ |----------|------|-----------|----------------|
82
+ | Vapi | `--share-key` | Yes | Dashboard → select assistant → click 🔗 link icon next to the assistant ID |
83
+ | ElevenLabs | `--branch-id` | No | Dashboard → Agents → select agent → Publish dropdown → "Copy shareable link" |
123
84
 
124
- ```yaml
125
- # scenarios/appointment_with_noise.yaml
126
- tags:
127
- - default
128
- - noise
129
- steps:
130
- - action: wait_for_voice
131
- - action: wait_for_silence
132
- - action: sleep
133
- time: 1000
134
- - action: speak
135
- file: hello_make_an_appointment_with_noise.mp3
136
- - action: wait_for_voice
137
- metrics: elapsed_time
138
- - action: wait_for_silence
139
- - action: speak
140
- file: appointment_data_with_noise.mp3
141
- - action: wait_for_voice
142
- metrics: elapsed_time
85
+ ### Import Only (Skip Comparison)
86
+
87
+ To import without running the provider benchmark:
88
+
89
+ ```bash
90
+ npx @telnyx/voice-agent-tester@latest \
91
+ -a applications/telnyx.yaml \
92
+ -s scenarios/appointment.yaml \
93
+ --provider vapi \
94
+ --no-compare \
95
+ --api-key <TELNYX_API_KEY> \
96
+ --provider-api-key <VAPI_API_KEY> \
97
+ --provider-import-id <VAPI_ASSISTANT_ID>
143
98
  ```
144
99
 
145
- ### Metrics and Reports
100
+ ## Quick Start
146
101
 
147
- The benchmark collects response latency metrics at each `wait_for_voice` step with `metrics: elapsed_time`. Generated CSV reports include:
102
+ Run directly with npx (no installation required):
148
103
 
149
- ```csv
150
- app, scenario, repetition, success, duration, step_9_wait_for_voice_elapsed_time, step_12_wait_for_voice_elapsed_time
151
- telnyx, appointment_with_noise, 0, 1, 29654, 1631, 1225
104
+ ```bash
105
+ npx @telnyx/voice-agent-tester@latest \
106
+ -a applications/telnyx.yaml \
107
+ -s scenarios/appointment.yaml \
108
+ --assistant-id <YOUR_ASSISTANT_ID>
152
109
  ```
153
110
 
154
- Compare results with and without noise to measure how background noise affects your voice agent's:
155
- - Response latency
156
- - Speech recognition accuracy
157
- - Overall conversation flow
111
+ Or install globally:
158
112
 
159
- ## Examples
113
+ ```bash
114
+ npm install -g @telnyx/voice-agent-tester
115
+ voice-agent-tester -a applications/telnyx.yaml -s scenarios/appointment.yaml --assistant-id <YOUR_ASSISTANT_ID>
116
+ ```
117
+
118
+ ## Provider Examples
160
119
 
161
120
  ### Telnyx
162
121
 
@@ -185,78 +144,143 @@ npx @telnyx/voice-agent-tester@latest \
185
144
  --assistant-id <ASSISTANT_ID>
186
145
  ```
187
146
 
188
- ## Comparison Mode
147
+ ## CLI Reference
189
148
 
190
- When importing from an external provider, the tool automatically runs both benchmarks in sequence and generates a comparison report:
149
+ | Option | Default | Description |
150
+ |--------|---------|-------------|
151
+ | `-a, --applications` | required | Application config path(s) or folder |
152
+ | `-s, --scenarios` | required | Scenario config path(s) or folder |
153
+ | `--assistant-id` | | Telnyx or provider assistant ID |
154
+ | `--api-key` | | Telnyx API key |
155
+ | `--provider` | | Import from provider (`vapi`, `elevenlabs`, `retell`) |
156
+ | `--provider-api-key` | | External provider API key |
157
+ | `--provider-import-id` | | Provider assistant/agent ID to import |
158
+ | `--share-key` | | Vapi share key for comparison mode |
159
+ | `--branch-id` | | ElevenLabs branch ID (optional) |
160
+ | `--compare` | `true` | Run provider direct + Telnyx import benchmarks |
161
+ | `--no-compare` | | Skip provider direct benchmark |
162
+ | `-d, --debug` | `false` | Detailed timeout diagnostics |
163
+ | `-v, --verbose` | `false` | Show browser console logs |
164
+ | `--headless` | `true` | Run browser in headless mode |
165
+ | `--repeat` | `1` | Repetitions per app+scenario combination |
166
+ | `-c, --concurrency` | `1` | Parallel test runs |
167
+ | `-r, --report` | | CSV report output path |
168
+ | `-p, --params` | | URL template params (`key=value,key2=value2`) |
169
+ | `--retries` | `0` | Retry failed runs |
170
+ | `--application-tags` | | Filter applications by tags |
171
+ | `--scenario-tags` | | Filter scenarios by tags |
172
+ | `--record` | `false` | Record video+audio (webm) |
173
+ | `--audio-url` | | URL to audio file played as input during run |
174
+ | `--audio-volume` | `1.0` | Audio input volume (0.0–1.0) |
175
+ | `--assets-server` | `http://localhost:3333` | Assets server URL |
191
176
 
192
- 1. **Provider Direct** - Benchmarks the assistant on the original provider's widget
193
- 2. **Telnyx Import** - Benchmarks the same assistant after importing to Telnyx
177
+ ## Bundled Configs
194
178
 
195
- ### Provider-Specific Keys
179
+ **Applications:**
196
180
 
197
- Comparison mode requires a provider-specific key to load the provider's direct widget. If not passed via CLI, the tool will prompt you with instructions on how to find it.
181
+ | Config | Provider |
182
+ |--------|----------|
183
+ | `applications/telnyx.yaml` | Telnyx AI Widget |
184
+ | `applications/elevenlabs.yaml` | ElevenLabs |
185
+ | `applications/vapi.yaml` | Vapi |
186
+ | `applications/retell.yaml` | Retell |
187
+
188
+ **Scenarios:**
198
189
 
199
- | Provider | Flag | How to find it |
200
- |----------|------|----------------|
201
- | Vapi | `--share-key` | In the Vapi Dashboard, select your assistant, then click the link icon (🔗) next to the assistant ID at the top. This copies the demo link containing your share key. |
202
- | ElevenLabs | `--branch-id` | In the ElevenLabs Dashboard, go to Agents, select your target agent, then click the dropdown next to Publish and select "Copy shareable link". This copies the demo link containing your branch ID. |
190
+ | Config | Description |
191
+ |--------|-------------|
192
+ | `scenarios/appointment.yaml` | Appointment booking test |
193
+ | `scenarios/appointment_with_noise.yaml` | Appointment with background crowd noise |
203
194
 
204
- ### Import and Compare (Default)
195
+ ## Background Noise Testing
205
196
 
206
- **Vapi:**
197
+ Test how voice agents perform with ambient noise by using pre-mixed audio files:
207
198
 
208
199
  ```bash
200
+ # With background noise
201
+ npx @telnyx/voice-agent-tester@latest \
202
+ -a applications/telnyx.yaml \
203
+ -s scenarios/appointment_with_noise.yaml \
204
+ --assistant-id <ASSISTANT_ID>
205
+
206
+ # Without noise (same assistant, compare results)
209
207
  npx @telnyx/voice-agent-tester@latest \
210
208
  -a applications/telnyx.yaml \
211
209
  -s scenarios/appointment.yaml \
212
- --provider vapi \
213
- --share-key <VAPI_SHARE_KEY> \
214
- --api-key <TELNYX_KEY> \
215
- --provider-api-key <VAPI_KEY> \
216
- --provider-import-id <VAPI_ASSISTANT_ID>
210
+ --assistant-id <ASSISTANT_ID>
217
211
  ```
218
212
 
219
- **ElevenLabs:**
213
+ ### Custom Audio Input
214
+
215
+ Play any audio file from a URL as microphone input throughout the benchmark:
220
216
 
221
217
  ```bash
222
218
  npx @telnyx/voice-agent-tester@latest \
223
219
  -a applications/telnyx.yaml \
224
220
  -s scenarios/appointment.yaml \
225
- --provider elevenlabs \
226
- --branch-id <ELEVENLABS_BRANCH_ID> \
227
- --api-key <TELNYX_KEY> \
228
- --provider-api-key <ELEVENLABS_KEY> \
229
- --provider-import-id <ELEVENLABS_AGENT_ID>
221
+ --assistant-id <ASSISTANT_ID> \
222
+ --audio-url "https://example.com/test-audio.mp3" \
223
+ --audio-volume 0.8
230
224
  ```
231
225
 
232
- This will:
233
- - Run Phase 1: Provider direct benchmark
234
- - Run Phase 2: Telnyx import benchmark
235
- - Generate a side-by-side latency comparison report
226
+ ### Audio Assets
236
227
 
237
- ### Import Only (No Comparison)
228
+ | File | Description |
229
+ |------|-------------|
230
+ | `hello_make_an_appointment.mp3` | Clean appointment request |
231
+ | `hello_make_an_appointment_with_noise.mp3` | Appointment request + crowd noise |
232
+ | `appointment_data.mp3` | Clean appointment details |
233
+ | `appointment_data_with_noise.mp3` | Appointment details + crowd noise |
238
234
 
239
- To skip the provider direct benchmark and only run the Telnyx import:
235
+ ## Scenario Configuration
240
236
 
241
- ```bash
242
- npx @telnyx/voice-agent-tester@latest \
243
- -a applications/telnyx.yaml \
244
- -s scenarios/appointment.yaml \
245
- --provider vapi \
246
- --no-compare \
247
- --api-key <TELNYX_KEY> \
248
- --provider-api-key <VAPI_KEY> \
249
- --provider-import-id <VAPI_ASSISTANT_ID>
237
+ Scenarios are YAML files with a sequence of steps. Steps with `metrics: elapsed_time` are included in the latency report.
238
+
239
+ ```yaml
240
+ # scenarios/appointment.yaml
241
+ steps:
242
+ - action: wait_for_voice # Wait for agent greeting
243
+ - action: wait_for_silence # Wait for greeting to finish
244
+ - action: speak
245
+ file: hello_make_an_appointment.mp3
246
+ - action: wait_for_voice # ← Measured: time to first response
247
+ metrics: elapsed_time
248
+ - action: wait_for_silence
249
+ - action: speak
250
+ file: appointment_data.mp3
251
+ - action: wait_for_voice # ← Measured: time to second response
252
+ metrics: elapsed_time
250
253
  ```
251
254
 
252
- ### Debugging Failures
255
+ ### Available Actions
256
+
257
+ | Action | Description |
258
+ |--------|-------------|
259
+ | `speak` | Play audio (`file`) or synthesize text (`text`) as microphone input |
260
+ | `wait_for_voice` | Wait for the AI agent to start speaking |
261
+ | `wait_for_silence` | Wait for the AI agent to stop speaking |
262
+ | `sleep` | Pause for a fixed duration (`time` in ms) |
263
+ | `click` | Click an element (`selector`) |
264
+ | `click_with_retry` | Click with retries and connection verification |
265
+ | `wait_for_element` | Wait for a DOM element to appear |
266
+ | `type` | Type text into an input field |
267
+ | `fill` | Set an input field value directly |
268
+ | `select` | Select dropdown/checkbox/radio option |
269
+ | `screenshot` | Capture a screenshot |
270
+ | `listen` | Record agent audio, transcribe, and evaluate |
253
271
 
254
- If benchmarks fail, rerun with `--debug` for detailed diagnostics:
272
+ ## Debugging
273
+
274
+ If benchmarks fail or time out, use `--debug` for detailed diagnostics including audio monitor state, WebRTC connection info, and RTP stats:
255
275
 
256
276
  ```bash
257
- voice-agent-tester --provider vapi --debug [other options...]
277
+ npx @telnyx/voice-agent-tester@latest \
278
+ -a applications/telnyx.yaml \
279
+ -s scenarios/appointment.yaml \
280
+ --assistant-id <ASSISTANT_ID> \
281
+ --debug
258
282
  ```
259
283
 
260
284
  ## License
261
285
 
262
- MIT
286
+ MIT
@@ -1,4 +1,4 @@
1
- url: "https://elevenlabs.io/app/talk-to?agent_id={{assistantId}}&branch_id={{branchId}}"
1
+ url: "https://elevenlabs.io/app/talk-to?agent_id={{assistantId}}"
2
2
  tags:
3
3
  - provider
4
4
  - elevenlabs
@@ -62,20 +62,24 @@ function createControlledMediaStream() {
62
62
  }
63
63
 
64
64
  // Replace getUserMedia to return our controlled stream
65
- const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
66
- navigator.mediaDevices.getUserMedia = function (constraints) {
67
- console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
68
-
69
- // If audio is requested, return our controlled stream
70
- if (constraints && constraints.audio) {
71
- console.log("🎤 Returning controlled MediaStream instead of real microphone");
72
- const controlledStream = createControlledMediaStream();
73
- return Promise.resolve(controlledStream);
74
- }
65
+ if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
66
+ const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
67
+ navigator.mediaDevices.getUserMedia = function (constraints) {
68
+ console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
69
+
70
+ // If audio is requested, return our controlled stream
71
+ if (constraints && constraints.audio) {
72
+ console.log("🎤 Returning controlled MediaStream instead of real microphone");
73
+ const controlledStream = createControlledMediaStream();
74
+ return Promise.resolve(controlledStream);
75
+ }
75
76
 
76
- // For video-only or other requests, use original implementation
77
- return originalGetUserMedia(constraints);
78
- };
77
+ // For video-only or other requests, use original implementation
78
+ return originalGetUserMedia(constraints);
79
+ };
80
+ } else {
81
+ console.warn("🎤 navigator.mediaDevices.getUserMedia not available, skipping microphone intercept");
82
+ }
79
83
 
80
84
  // Expose __speak method to be called from voice-agent-tester.js
81
85
  window.__speak = function (textOrUrl) {
@@ -152,6 +156,24 @@ function playAudioInMediaStream(url) {
152
156
  const audio = new Audio(url);
153
157
  audio.crossOrigin = 'anonymous'; // Enable CORS if needed
154
158
 
159
+ // Keep a strong reference so the element is not garbage collected
160
+ currentSpeakAudio = audio;
161
+
162
+ let speechEndFired = false;
163
+ let safetyTimeoutId = null;
164
+
165
+ function fireSpeechEnd(reason) {
166
+ if (speechEndFired) return;
167
+ speechEndFired = true;
168
+ if (safetyTimeoutId) clearTimeout(safetyTimeoutId);
169
+ console.log(`🎤 Audio playback ended (${reason})`);
170
+ if (typeof __publishEvent === 'function') {
171
+ __publishEvent('speechend', { url: url, reason: reason });
172
+ }
173
+ // Release reference
174
+ if (currentSpeakAudio === audio) currentSpeakAudio = null;
175
+ }
176
+
155
177
  // Set up audio routing through all MediaStreams
156
178
  audio.addEventListener('canplaythrough', function () {
157
179
  console.log(`🎤 Audio ready to play, routing to ${mediaStreams.length} MediaStreams`);
@@ -181,7 +203,33 @@ function playAudioInMediaStream(url) {
181
203
  }
182
204
 
183
205
  // Play the audio
184
- audio.play();
206
+ audio.play().then(() => {
207
+ // Set up safety timeout based on audio duration
208
+ // audio.duration should be available after canplaythrough
209
+ const duration = audio.duration;
210
+ if (duration && isFinite(duration)) {
211
+ const safetyMs = Math.max((duration * 1000) + 5000, 15000);
212
+ console.log(`🎤 Audio duration: ${duration.toFixed(1)}s, safety timeout: ${(safetyMs / 1000).toFixed(1)}s`);
213
+ safetyTimeoutId = setTimeout(() => {
214
+ if (!speechEndFired) {
215
+ console.warn(`🎤 Safety timeout: speechend not fired after ${(safetyMs / 1000).toFixed(1)}s (audio paused=${audio.paused}, ended=${audio.ended}, currentTime=${audio.currentTime.toFixed(1)})`);
216
+ fireSpeechEnd('safety_timeout');
217
+ }
218
+ }, safetyMs);
219
+ } else {
220
+ // Unknown duration — use 20s fallback
221
+ console.warn('🎤 Audio duration unknown, using 20s safety timeout');
222
+ safetyTimeoutId = setTimeout(() => {
223
+ if (!speechEndFired) {
224
+ console.warn('🎤 Safety timeout: speechend not fired after 20s');
225
+ fireSpeechEnd('safety_timeout');
226
+ }
227
+ }, 20000);
228
+ }
229
+ }).catch(error => {
230
+ console.error('Error playing audio:', error);
231
+ fireSpeechEnd('play_error');
232
+ });
185
233
  } catch (error) {
186
234
  console.error('Error setting up audio source:', error);
187
235
  if (typeof __publishEvent === 'function') {
@@ -190,11 +238,19 @@ function playAudioInMediaStream(url) {
190
238
  }
191
239
  });
192
240
 
193
- // Handle audio end
241
+ // Handle audio end — primary path
194
242
  audio.addEventListener('ended', function () {
195
- console.log('🎤 Audio playback ended');
196
- if (typeof __publishEvent === 'function') {
197
- __publishEvent('speechend', { url: url });
243
+ fireSpeechEnd('ended');
244
+ });
245
+
246
+ // Handle pause — if something pauses the audio externally
247
+ audio.addEventListener('pause', function () {
248
+ // Only treat as speechend if the audio is past 90% of its duration (near end)
249
+ // or if it was paused externally (not by us)
250
+ if (audio.ended || (audio.duration && audio.currentTime >= audio.duration * 0.9)) {
251
+ fireSpeechEnd('pause_near_end');
252
+ } else {
253
+ console.warn(`🎤 Audio paused at ${audio.currentTime.toFixed(1)}s / ${(audio.duration || 0).toFixed(1)}s`);
198
254
  }
199
255
  });
200
256
 
@@ -204,17 +260,31 @@ function playAudioInMediaStream(url) {
204
260
  if (typeof __publishEvent === 'function') {
205
261
  __publishEvent('speecherror', { error: 'Audio playback failed', url: url });
206
262
  }
263
+ fireSpeechEnd('error');
207
264
  });
208
265
 
209
266
  // Start loading the audio
210
267
  audio.load();
211
268
  }
212
269
 
270
+ // Keep a reference to the current speak Audio element so it doesn't get GC'd
271
+ let currentSpeakAudio = null;
272
+
213
273
  // Helper function to stop current audio and reset to silence
214
274
  function stopCurrentAudio() {
275
+ // Stop the speak audio element if playing
276
+ if (currentSpeakAudio) {
277
+ try {
278
+ currentSpeakAudio.pause();
279
+ currentSpeakAudio.currentTime = 0;
280
+ } catch (e) {
281
+ console.warn('Error stopping speak audio:', e);
282
+ }
283
+ currentSpeakAudio = null;
284
+ }
285
+
215
286
  currentPlaybackNodes.forEach((sourceNode, index) => {
216
287
  try {
217
- sourceNode.stop();
218
288
  sourceNode.disconnect();
219
289
  console.log(`🎤 Stopped audio source ${index}`);
220
290
  } catch (e) {