@telnyx/voice-agent-tester 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ ---
2
+ description: Ralph Loop - Iterative AI development with persistent iteration until task completion
3
+ ---
4
+
5
+ # Ralph Loop Workflow
6
+
7
+ This workflow implements the Ralph Loop (Ralph Wiggum) technique for iterative, autonomous coding.
8
+
9
+ ## Usage
10
+
11
+ Invoke with: `/ralph-loop <task description>`
12
+
13
+ Or provide detailed options:
14
+ ```
15
+ /ralph-loop "Build feature X" --max-iterations 30 --completion-promise "COMPLETE"
16
+ ```
17
+
18
+ ## Workflow Steps
19
+
20
+ 1. **Read the Ralph Loop skill instructions**
21
+ - View the skill file at `.gemini/skills/ralph-loop/SKILL.md`
22
+ - Understand the iteration pattern and best practices
23
+
24
+ 2. **Parse the user's task**
25
+ - Identify the main objective
26
+ - Extract success criteria
27
+ - Set max iterations (default: 30)
28
+ - Set completion promise (default: "COMPLETE")
29
+
30
+ 3. **Enter the loop**
31
+ - Execute the task iteratively
32
+ - Self-correct on failures
33
+ - Track progress
34
+ - Continue until success criteria met or max iterations reached
35
+
36
+ 4. **Report completion**
37
+ - Summarize accomplishments
38
+ - Output the completion promise
39
+ - List any remaining issues
40
+
41
+ ## Quick Commands
42
+
43
+ - **Start a loop**: `/ralph-loop "Your task here"`
44
+ - **Cancel loop**: Say "stop", "cancel", or "abort"
45
+ - **Check skill docs**: View `.gemini/skills/ralph-loop/SKILL.md`
46
+
47
+ ## Examples
48
+
49
+ ### Feature Implementation
50
+ ```
51
+ /ralph-loop "Implement user authentication with JWT tokens. Requirements: login/logout endpoints, password hashing, token refresh. Tests must pass."
52
+ ```
53
+
54
+ ### Bug Fix
55
+ ```
56
+ /ralph-loop "Fix the 404 error when importing VAPI assistants. Add retry logic with exponential backoff."
57
+ ```
58
+
59
+ ### Refactoring
60
+ ```
61
+ /ralph-loop "Refactor the CLI options to be more provider-agnostic. All existing tests must pass."
62
+ ```
@@ -0,0 +1,240 @@
1
+ ---
2
+ name: ralph-loop
3
+ description: Ralph Loop - AI Loop Technique for iterative, autonomous coding. Implements persistent iteration until task completion with self-correction patterns.
4
+ ---
5
+
6
+ # Ralph Loop - AI Loop Technique
7
+
8
+ The Ralph Loop (also known as "Ralph Wiggum") is an iterative AI development methodology. It embodies the philosophy of **persistent iteration despite setbacks**.
9
+
10
+ ## Core Philosophy
11
+
12
+ 1. **Iteration > Perfection**: Don't aim for perfect on first try. Let the loop refine the work.
13
+ 2. **Failures Are Data**: Deterministically bad means failures are predictable and informative.
14
+ 3. **Operator Skill Matters**: Success depends on writing good prompts, not just having a good model.
15
+ 4. **Persistence Wins**: Keep trying until success. Handle retry logic automatically.
16
+
17
+ ---
18
+
19
+ ## How to Use This Skill
20
+
21
+ When the user invokes this skill (e.g., `/ralph-loop` or asks for iterative development), follow these instructions:
22
+
23
+ ### Step 1: Understand the Task
24
+
25
+ Parse the user's request and identify:
26
+ - **The main objective** - What needs to be built/fixed/refactored
27
+ - **Success criteria** - How to know when it's complete
28
+ - **Max iterations** - Safety limit (default: 30)
29
+ - **Completion promise** - The signal word (default: "COMPLETE")
30
+
31
+ ### Step 2: Enter the Ralph Loop
32
+
33
+ Execute the following loop pattern:
34
+
35
+ ```
36
+ ITERATION = 1
37
+ MAX_ITERATIONS = [specified or 30]
38
+ COMPLETION_PROMISE = [specified or "COMPLETE"]
39
+
40
+ WHILE (ITERATION <= MAX_ITERATIONS) AND (NOT COMPLETED):
41
+ 1. Assess current state
42
+ 2. Identify next step toward goal
43
+ 3. Execute the step (write code, run tests, fix bugs, etc.)
44
+ 4. Evaluate results
45
+ 5. If success criteria met → output COMPLETION_PROMISE → EXIT LOOP
46
+ 6. If not complete → increment ITERATION → CONTINUE
47
+ 7. If blocked → document issue → try alternative approach
48
+ END WHILE
49
+
50
+ IF MAX_ITERATIONS reached without completion:
51
+ - Document what was accomplished
52
+ - List blocking issues
53
+ - Suggest next steps
54
+ ```
55
+
56
+ ### Step 3: Self-Correction Pattern
57
+
58
+ During each iteration, follow this TDD-inspired pattern:
59
+
60
+ 1. **Plan** - Identify what needs to happen next
61
+ 2. **Execute** - Make the change (code, config, etc.)
62
+ 3. **Verify** - Run tests, check results, validate
63
+ 4. **If failing** - Debug and fix in the same iteration if possible
64
+ 5. **If passing** - Move to next requirement
65
+ 6. **Refactor** - Clean up if needed before proceeding
66
+
67
+ ### Step 4: Report Progress
68
+
69
+ After each significant iteration, briefly report:
70
+ - Current iteration number
71
+ - What was attempted
72
+ - Result (success/failure/partial)
73
+ - Next step
74
+
75
+ ### Step 5: Completion
76
+
77
+ When all success criteria are met:
78
+ 1. Summarize what was accomplished
79
+ 2. List any tests/validations that passed
80
+ 3. Output the completion promise: `<promise>COMPLETE</promise>`
81
+
82
+ ---
83
+
84
+ ## Prompt Templates
85
+
86
+ ### Feature Implementation
87
+
88
+ ```
89
+ Implement [FEATURE_NAME].
90
+
91
+ Requirements:
92
+ - [Requirement 1]
93
+ - [Requirement 2]
94
+ - [Requirement 3]
95
+
96
+ Success criteria:
97
+ - All requirements implemented
98
+ - Tests passing with >80% coverage
99
+ - No linter errors
100
+ - Documentation updated
101
+
102
+ Output <promise>COMPLETE</promise> when done.
103
+ ```
104
+
105
+ ### TDD Development
106
+
107
+ ```
108
+ Implement [FEATURE] using TDD.
109
+
110
+ Process:
111
+ 1. Write failing test for next requirement
112
+ 2. Implement minimal code to pass
113
+ 3. Run tests
114
+ 4. If failing, fix and retry
115
+ 5. Refactor if needed
116
+ 6. Repeat for all requirements
117
+
118
+ Requirements: [LIST]
119
+
120
+ Output <promise>DONE</promise> when all tests green.
121
+ ```
122
+
123
+ ### Bug Fixing
124
+
125
+ ```
126
+ Fix bug: [DESCRIPTION]
127
+
128
+ Steps:
129
+ 1. Reproduce the bug
130
+ 2. Identify root cause
131
+ 3. Implement fix
132
+ 4. Write regression test
133
+ 5. Verify fix works
134
+ 6. Check no new issues introduced
135
+
136
+ After 15 iterations if not fixed:
137
+ - Document blocking issues
138
+ - List attempted approaches
139
+ - Suggest alternatives
140
+
141
+ Output <promise>FIXED</promise> when resolved.
142
+ ```
143
+
144
+ ### Refactoring
145
+
146
+ ```
147
+ Refactor [COMPONENT] for [GOAL].
148
+
149
+ Constraints:
150
+ - All existing tests must pass
151
+ - No behavior changes
152
+ - Incremental commits
153
+
154
+ Checklist:
155
+ - [ ] Tests passing before start
156
+ - [ ] Apply refactoring step
157
+ - [ ] Tests still passing
158
+ - [ ] Repeat until done
159
+
160
+ Output <promise>REFACTORED</promise> when complete.
161
+ ```
162
+
163
+ ---
164
+
165
+ ## Advanced Patterns
166
+
167
+ ### Multi-Phase Development
168
+
169
+ For complex projects, chain multiple loops:
170
+
171
+ ```
172
+ Phase 1: Core implementation → <promise>PHASE1_DONE</promise>
173
+ Phase 2: API layer → <promise>PHASE2_DONE</promise>
174
+ Phase 3: Frontend → <promise>PHASE3_DONE</promise>
175
+ ```
176
+
177
+ ### Incremental Goals
178
+
179
+ Break large tasks into phases:
180
+
181
+ ```
182
+ Phase 1: User authentication (JWT, tests)
183
+ Phase 2: Product catalog (list/search, tests)
184
+ Phase 3: Shopping cart (add/remove, tests)
185
+
186
+ Output <promise>COMPLETE</promise> when all phases done.
187
+ ```
188
+
189
+ ---
190
+
191
+ ## Best Practices for Writing Prompts
192
+
193
+ ### ❌ Bad Prompt
194
+ ```
195
+ Build a todo API and make it good.
196
+ ```
197
+
198
+ ### ✅ Good Prompt
199
+ ```
200
+ Build a REST API for todos.
201
+
202
+ When complete:
203
+ - All CRUD endpoints working
204
+ - Input validation in place
205
+ - Tests passing (coverage > 80%)
206
+ - README with API docs
207
+
208
+ Output: <promise>COMPLETE</promise>
209
+ ```
210
+
211
+ ---
212
+
213
+ ## When to Use Ralph Loop
214
+
215
+ ### ✅ Good For:
216
+ - Feature implementation with clear requirements
217
+ - Bug fixing with reproducible issues
218
+ - Refactoring with existing test coverage
219
+ - TDD-style development
220
+ - Tasks that benefit from iteration
221
+
222
+ ### ❌ Not Good For:
223
+ - Exploratory research without clear goals
224
+ - Tasks requiring human judgment at each step
225
+ - Real-time interactive sessions
226
+ - Tasks with no verifiable success criteria
227
+
228
+ ---
229
+
230
+ ## Cancellation
231
+
232
+ The user can cancel the loop at any time by:
233
+ - Saying "stop", "cancel", or "abort"
234
+ - Providing new instructions that supersede the current task
235
+
236
+ ---
237
+
238
+ ## Attribution
239
+
240
+ Based on the Ralph Wiggum technique from [Awesome Claude](https://awesomeclaude.ai/ralph-wiggum) and the official Claude plugins marketplace (`ralph-loop@claude-plugins-official`).
@@ -46,8 +46,8 @@ jobs:
46
46
 
47
47
  - name: Setup Git user
48
48
  run: |
49
- git config user.name TelnyxIntegrations
50
- git config user.email integrations@telnyx.com
49
+ git config user.name "github-actions[bot]"
50
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
51
51
 
52
52
  - name: Use Node.js 20.x
53
53
  uses: actions/setup-node@v4
@@ -64,9 +64,44 @@ jobs:
64
64
  env:
65
65
  CI: true
66
66
 
67
- - name: Create draft release
67
+ - name: Determine next version
68
+ id: version
68
69
  run: |
69
- npx release-it --ci --github.draft --no-npm.publish${{ env.INCREMENT_ARG }}${{ env.PRERELEASE_ARGS }}
70
+ NEXT=$(npx release-it --ci --release-version${{ env.INCREMENT_ARG }}${{ env.PRERELEASE_ARGS }} 2>/dev/null)
71
+ echo "next=$NEXT" >> "$GITHUB_OUTPUT"
72
+ echo "branch=release/v$NEXT" >> "$GITHUB_OUTPUT"
73
+ env:
74
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
75
+
76
+ - name: Create release branch
77
+ run: |
78
+ git checkout -b "${{ steps.version.outputs.branch }}"
79
+
80
+ - name: Create draft release on branch
81
+ run: |
82
+ npx release-it --ci --github.draft --no-npm.publish --no-git.push --no-git.requireUpstream${{ env.INCREMENT_ARG }}${{ env.PRERELEASE_ARGS }}
70
83
  env:
71
84
  NPM_TOKEN: ${{ secrets.NPM_CI_TOKEN }}
72
85
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
86
+
87
+ - name: Push release branch
88
+ run: |
89
+ git push origin "${{ steps.version.outputs.branch }}"
90
+
91
+ - name: Create pull request
92
+ run: |
93
+ gh pr create \
94
+ --title "chore: release v${{ steps.version.outputs.next }}" \
95
+ --body "## Release v${{ steps.version.outputs.next }}
96
+
97
+ Automated release PR created by the draft-release workflow.
98
+
99
+ - Version bump in \`package.json\`
100
+ - Updated \`CHANGELOG.md\`
101
+ - Draft GitHub release created
102
+
103
+ **After merging**, publish the release from the [releases page](https://github.com/${{ github.repository }}/releases)." \
104
+ --base "${{ env.TARGET_REF }}" \
105
+ --head "${{ steps.version.outputs.branch }}"
106
+ env:
107
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -19,8 +19,8 @@ jobs:
19
19
 
20
20
  - name: Setup Git user
21
21
  run: |
22
- git config user.name TelnyxIntegrations
23
- git config user.email integrations@telnyx.com
22
+ git config user.name "github-actions[bot]"
23
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
24
24
 
25
25
  - name: Use Node.js 20.x
26
26
  uses: actions/setup-node@v4
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.1](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.0...v0.4.1) (2026-02-18)
4
+
5
+ ### Features
6
+
7
+ * require provider-specific params for comparison mode ([#10](https://github.com/team-telnyx/voice-agent-tester/issues/10)) ([db9eb27](https://github.com/team-telnyx/voice-agent-tester/commit/db9eb273c139374a9f6358126113cab92f8f5b32))
8
+ * use Qwen/Qwen3-235B-A22B as model for imported assistants ([#11](https://github.com/team-telnyx/voice-agent-tester/issues/11)) ([3c4ed0a](https://github.com/team-telnyx/voice-agent-tester/commit/3c4ed0a14498833544f1797426b234585adcb49b))
9
+
10
+ ### Bug Fixes
11
+
12
+ * add --no-git.requireUpstream to release-it in draft workflow ([#14](https://github.com/team-telnyx/voice-agent-tester/issues/14)) ([9553e65](https://github.com/team-telnyx/voice-agent-tester/commit/9553e65bdc6f0094853895da6b806befc5a898f6))
13
+ * use triggering user as git author and create PR for releases ([#13](https://github.com/team-telnyx/voice-agent-tester/issues/13)) ([8ebecba](https://github.com/team-telnyx/voice-agent-tester/commit/8ebecba1839985949e46bec457f327711f89138d))
14
+
15
+ ## [0.4.0](https://github.com/team-telnyx/voice-agent-tester/compare/v0.3.0...v0.4.0) (2026-01-26)
16
+
17
+ ### Features
18
+
19
+ * add audio input from URL for benchmark runs ([c347de8](https://github.com/team-telnyx/voice-agent-tester/commit/c347de83b8318827bac098bff4328502908ee981))
20
+ * add background noise benchmark with pre-mixed audio files ([9f64179](https://github.com/team-telnyx/voice-agent-tester/commit/9f6417936514451270c4d1bc929771446c366b08))
21
+
3
22
  ## [0.3.0](https://github.com/team-telnyx/voice-agent-tester/compare/v0.2.3...v0.3.0) (2026-01-23)
4
23
 
5
24
  ### Features
package/README.md CHANGED
@@ -40,10 +40,11 @@ voice-agent-tester -a applications/telnyx.yaml -s scenarios/appointment.yaml --a
40
40
  | `-c, --concurrency` | `1` | Number of parallel tests |
41
41
  | `-r, --report` | | Generate CSV report to specified file |
42
42
  | `-p, --params` | | URL template params (e.g., `key=value,key2=value2`) |
43
- | `--record` | `false` | Record video and audio in webm format |
44
43
  | `--application-tags` | | Filter applications by comma-separated tags |
45
44
  | `--scenario-tags` | | Filter scenarios by comma-separated tags |
46
45
  | `--assets-server` | `http://localhost:3333` | Assets server URL |
46
+ | `--audio-url` | | URL to audio file to play as input during entire benchmark |
47
+ | `--audio-volume` | `1.0` | Volume level for audio input (0.0 to 1.0) |
47
48
 
48
49
  ## Bundled Configs
49
50
 
@@ -55,7 +56,103 @@ voice-agent-tester -a applications/telnyx.yaml -s scenarios/appointment.yaml --a
55
56
  | `applications/retell.yaml` | Retell |
56
57
  | `applications/livetok.yaml` | Livetok |
57
58
 
58
- Scenario: `scenarios/appointment.yaml`
59
+ Scenarios:
60
+ - `scenarios/appointment.yaml` - Basic appointment booking test
61
+ - `scenarios/appointment_with_noise.yaml` - Appointment with background noise (pre-mixed audio)
62
+
63
+ ## Background Noise Testing
64
+
65
+ Test voice agents' performance with ambient noise (e.g., crowd chatter, cafe environment). Background noise is pre-mixed into audio files to simulate real-world conditions where users speak to voice agents in noisy environments.
66
+
67
+ ### Running with Background Noise
68
+
69
+ ```bash
70
+ # Telnyx with background noise
71
+ npx @telnyx/voice-agent-tester@latest \
72
+ -a applications/telnyx.yaml \
73
+ -s scenarios/appointment_with_noise.yaml \
74
+ --assistant-id <YOUR_ASSISTANT_ID>
75
+
76
+ # Compare with no noise (same assistant)
77
+ npx @telnyx/voice-agent-tester@latest \
78
+ -a applications/telnyx.yaml \
79
+ -s scenarios/appointment.yaml \
80
+ --assistant-id <YOUR_ASSISTANT_ID>
81
+
82
+ # Generate CSV report with metrics
83
+ npx @telnyx/voice-agent-tester@latest \
84
+ -a applications/telnyx.yaml \
85
+ -s scenarios/appointment_with_noise.yaml \
86
+ --assistant-id <YOUR_ASSISTANT_ID> \
87
+ -r output/noise_benchmark.csv
88
+ ```
89
+
90
+ ### Custom Audio Input from URL
91
+
92
+ Play any audio file from a URL as input throughout the entire benchmark run. The audio is sent to the voice agent as microphone input.
93
+
94
+ ```bash
95
+ # Use custom audio input from URL
96
+ npx @telnyx/voice-agent-tester@latest \
97
+ -a applications/telnyx.yaml \
98
+ -s scenarios/appointment.yaml \
99
+ --assistant-id <YOUR_ASSISTANT_ID> \
100
+ --audio-url "https://example.com/test-audio.mp3" \
101
+ --audio-volume 0.8
102
+ ```
103
+
104
+ This is useful for:
105
+ - Testing with custom audio inputs
106
+ - Using longer audio tracks that play throughout the benchmark
107
+ - A/B testing different audio sources
108
+
109
+ ### Bundled Audio Files
110
+
111
+ | File | Description |
112
+ |------|-------------|
113
+ | `hello_make_an_appointment.mp3` | Clean appointment request |
114
+ | `hello_make_an_appointment_with_noise.mp3` | Appointment request with crowd noise |
115
+ | `appointment_data.mp3` | Clean appointment details |
116
+ | `appointment_data_with_noise.mp3` | Appointment details with crowd noise |
117
+
118
+ ### Scenario Configuration
119
+
120
+ The noise scenario uses pre-mixed audio files:
121
+
122
+ ```yaml
123
+ # scenarios/appointment_with_noise.yaml
124
+ tags:
125
+ - default
126
+ - noise
127
+ steps:
128
+ - action: wait_for_voice
129
+ - action: wait_for_silence
130
+ - action: sleep
131
+ time: 1000
132
+ - action: speak
133
+ file: hello_make_an_appointment_with_noise.mp3
134
+ - action: wait_for_voice
135
+ metrics: elapsed_time
136
+ - action: wait_for_silence
137
+ - action: speak
138
+ file: appointment_data_with_noise.mp3
139
+ - action: wait_for_voice
140
+ metrics: elapsed_time
141
+ ```
142
+
143
+ ### Metrics and Reports
144
+
145
+ The benchmark collects response latency metrics at each `wait_for_voice` step with `metrics: elapsed_time`. Generated CSV reports include:
146
+
147
+ ```csv
148
+ app, scenario, repetition, success, duration, step_9_wait_for_voice_elapsed_time, step_12_wait_for_voice_elapsed_time
149
+ telnyx, appointment_with_noise, 0, 1, 29654, 1631, 1225
150
+ ```
151
+
152
+ Compare results with and without noise to measure how background noise affects your voice agent's:
153
+ - Response latency
154
+ - Speech recognition accuracy
155
+ - Overall conversation flow
59
156
 
60
157
  ## Examples
61
158
 
@@ -1,10 +1,13 @@
1
1
  url: "https://elevenlabs.io/app/talk-to?agent_id={{assistantId}}&branch_id={{branchId}}"
2
+ tags:
3
+ - provider
4
+ - elevenlabs
2
5
  steps:
3
6
  - action: wait_for_element
4
- selector: "button[data-agent-id]"
7
+ selector: "text=Call AI agent"
5
8
  - action: sleep
6
9
  time: 3000
7
10
  - action: click
8
- selector: "button[data-agent-id]"
11
+ selector: "text=Call AI agent"
9
12
  - action: sleep
10
13
  time: 2000
@@ -0,0 +1,16 @@
1
+ url: "https://rti.livetok.io/demo/index.html"
2
+ tags:
3
+ - default
4
+ - basic
5
+ steps:
6
+ - action: fill
7
+ selector: "input[type='password']"
8
+ text: "GOOGLE_API_KEY HERE"
9
+ # - action: select
10
+ # selector: "#model"
11
+ # value: "gemini-2.5-flash-preview-native-audio-dialog"
12
+ # - action: fill
13
+ # selector: "#tools"
14
+ # text: "[]"
15
+ - action: click
16
+ selector: "#start"
@@ -5,6 +5,6 @@ steps:
5
5
  - action: sleep
6
6
  time: 3000
7
7
  - action: click
8
- selector: "telnyx-ai-agent"
8
+ selector: "telnyx-ai-agent >>> button"
9
9
  - action: sleep
10
10
  time: 4000
@@ -0,0 +1,19 @@
1
+ url: "https://vapi.ai?demo=true&shareKey={{shareKey}}&assistantId={{assistantId}}"
2
+ tags:
3
+ - provider
4
+ - vapi
5
+ steps:
6
+ - action: wait_for_element
7
+ selector: "button[aria-label=\"Talk to Vapi\"]"
8
+ - action: sleep
9
+ time: 5000
10
+ - action: click
11
+ selector: "button[aria-label=\"Talk to Vapi\"]"
12
+ - action: sleep
13
+ time: 2000
14
+ - action: speak
15
+ text: "Hello, what can you do?"
16
+ - action: wait_for_voice
17
+ metrics: elapsed_time
18
+ - action: wait_for_silence
19
+ metrics: elapsed_time
@@ -289,3 +289,107 @@ window.__waitForMediaStream = function (timeout = 10000) {
289
289
  });
290
290
  };
291
291
 
292
+ // ============= AUDIO INPUT FROM URL =============
293
+ // For playing audio from a URL as input during the entire benchmark
294
+
295
+ let urlAudioElement = null;
296
+ let urlAudioSourceNode = null;
297
+ let urlAudioGainNode = null;
298
+
299
+ // Start playing audio from URL (sent as microphone input)
300
+ window.__startAudioFromUrl = function (url, volume = 1.0) {
301
+ console.log(`🔊 Starting audio from URL: ${url} (volume: ${volume})`);
302
+
303
+ if (!globalAudioContext) {
304
+ console.error('AudioContext not initialized');
305
+ return Promise.reject(new Error('AudioContext not initialized'));
306
+ }
307
+
308
+ // Stop any existing URL audio
309
+ window.__stopAudioFromUrl();
310
+
311
+ return new Promise((resolve, reject) => {
312
+ urlAudioElement = new Audio(url);
313
+ urlAudioElement.crossOrigin = 'anonymous';
314
+ urlAudioElement.loop = true;
315
+
316
+ urlAudioElement.addEventListener('canplaythrough', function onCanPlay() {
317
+ urlAudioElement.removeEventListener('canplaythrough', onCanPlay);
318
+
319
+ try {
320
+ // Create media element source
321
+ urlAudioSourceNode = globalAudioContext.createMediaElementSource(urlAudioElement);
322
+
323
+ // Create gain node for volume control
324
+ urlAudioGainNode = globalAudioContext.createGain();
325
+ urlAudioGainNode.gain.setValueAtTime(volume, globalAudioContext.currentTime);
326
+
327
+ // Connect: source -> gain -> all MediaStreams
328
+ urlAudioSourceNode.connect(urlAudioGainNode);
329
+
330
+ // Connect to all MediaStream gain nodes (sent as microphone input)
331
+ mediaStreams.forEach((streamData) => {
332
+ urlAudioGainNode.connect(streamData.gainNode);
333
+ console.log(`🔊 Connected URL audio to stream ${streamData.id}`);
334
+ });
335
+
336
+ // Also make audible through speakers if speak audio is audible
337
+ if (MAKE_SPEAK_AUDIO_AUDIBLE) {
338
+ urlAudioGainNode.connect(globalAudioContext.destination);
339
+ }
340
+
341
+ // Start playing
342
+ urlAudioElement.play().then(() => {
343
+ console.log('🔊 Audio from URL started playing');
344
+ if (typeof __publishEvent === 'function') {
345
+ __publishEvent('urlaudiostart', { url: url, volume: volume });
346
+ }
347
+ resolve();
348
+ }).catch(reject);
349
+
350
+ } catch (error) {
351
+ console.error('Error setting up audio from URL:', error);
352
+ reject(error);
353
+ }
354
+ });
355
+
356
+ urlAudioElement.addEventListener('error', function (event) {
357
+ console.error('URL audio error:', event);
358
+ reject(new Error('Failed to load audio from URL'));
359
+ });
360
+
361
+ urlAudioElement.load();
362
+ });
363
+ };
364
+
365
+ // Stop audio from URL
366
+ window.__stopAudioFromUrl = function () {
367
+ if (urlAudioElement) {
368
+ console.log('🔊 Stopping audio from URL');
369
+ urlAudioElement.pause();
370
+ urlAudioElement.currentTime = 0;
371
+ urlAudioElement = null;
372
+ }
373
+
374
+ if (urlAudioSourceNode) {
375
+ try {
376
+ urlAudioSourceNode.disconnect();
377
+ } catch (e) {
378
+ // Already disconnected
379
+ }
380
+ urlAudioSourceNode = null;
381
+ }
382
+
383
+ if (urlAudioGainNode) {
384
+ try {
385
+ urlAudioGainNode.disconnect();
386
+ } catch (e) {
387
+ // Already disconnected
388
+ }
389
+ urlAudioGainNode = null;
390
+ }
391
+
392
+ if (typeof __publishEvent === 'function') {
393
+ __publishEvent('urlaudiostop', {});
394
+ }
395
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@telnyx/voice-agent-tester",
3
- "version": "0.3.0",
3
+ "version": "0.4.1",
4
4
  "description": "A command-line tool to test voice agents using Puppeteer",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -3,8 +3,6 @@ tags:
3
3
  steps:
4
4
  - action: wait_for_voice
5
5
  - action: wait_for_silence
6
- - action: sleep
7
- time: 1000
8
6
  - action: speak
9
7
  file: hello_make_an_appointment.mp3
10
8
  - action: wait_for_voice
@@ -0,0 +1,17 @@
1
+ tags:
2
+ - default
3
+ - noise
4
+ steps:
5
+ - action: wait_for_voice
6
+ - action: wait_for_silence
7
+ - action: sleep
8
+ time: 1000
9
+ - action: speak
10
+ file: hello_make_an_appointment_with_noise.mp3
11
+ - action: wait_for_voice
12
+ metrics: elapsed_time
13
+ - action: wait_for_silence
14
+ - action: speak
15
+ file: appointment_data_with_noise.mp3
16
+ - action: wait_for_voice
17
+ metrics: elapsed_time
package/src/index.js CHANGED
@@ -87,6 +87,50 @@ function substituteUrlParams(url, params) {
87
87
  return result;
88
88
  }
89
89
 
90
+ /**
91
+ * Get the list of missing provider-specific parameters required for comparison mode.
92
+ * Each provider has its own set of required params for the direct widget benchmark.
93
+ *
94
+ * @param {Object} argv - Parsed CLI arguments
95
+ * @returns {Array<{key: string, flag: string, description: string}>} Missing params
96
+ */
97
+ function getCompareRequiredParams(argv) {
98
+ const missing = [];
99
+
100
+ switch (argv.provider) {
101
+ case 'vapi':
102
+ if (!argv.shareKey) {
103
+ missing.push({ key: 'shareKey', flag: '--share-key', description: 'Vapi share key' });
104
+ }
105
+ break;
106
+ case 'elevenlabs':
107
+ if (!argv.branchId) {
108
+ missing.push({ key: 'branchId', flag: '--branch-id', description: 'ElevenLabs branch ID' });
109
+ }
110
+ break;
111
+ // retell and others: no extra params needed yet
112
+ }
113
+
114
+ return missing;
115
+ }
116
+
117
+ /**
118
+ * Get provider-specific template parameters for comparison mode URL/HTML substitution.
119
+ *
120
+ * @param {Object} argv - Parsed CLI arguments
121
+ * @returns {Object} Template params to merge into provider params
122
+ */
123
+ function getCompareTemplateParams(argv) {
124
+ switch (argv.provider) {
125
+ case 'vapi':
126
+ return { shareKey: argv.shareKey };
127
+ case 'elevenlabs':
128
+ return { branchId: argv.branchId };
129
+ default:
130
+ return {};
131
+ }
132
+ }
133
+
90
134
  // Helper function to load and validate application config
91
135
  function loadApplicationConfig(configPath, params = {}) {
92
136
  const configFile = fs.readFileSync(configPath, 'utf8');
@@ -118,7 +162,6 @@ function loadScenarioConfig(configPath) {
118
162
  name: path.basename(configPath, path.extname(configPath)),
119
163
  path: configPath,
120
164
  steps: config.steps || [],
121
- background: config.background || null,
122
165
  tags: config.tags || []
123
166
  };
124
167
  }
@@ -236,6 +279,14 @@ const argv = yargs(hideBin(process.argv))
236
279
  type: 'string',
237
280
  description: 'Provider assistant/agent ID to import (required with --provider)'
238
281
  })
282
+ .option('share-key', {
283
+ type: 'string',
284
+ description: 'Vapi share key for direct widget testing (required for comparison mode with --provider vapi)'
285
+ })
286
+ .option('branch-id', {
287
+ type: 'string',
288
+ description: 'ElevenLabs branch ID for direct widget testing (required for comparison mode with --provider elevenlabs)'
289
+ })
239
290
  .option('assistant-id', {
240
291
  type: 'string',
241
292
  description: 'Assistant/agent ID for direct benchmarking (works with all providers)'
@@ -256,6 +307,16 @@ const argv = yargs(hideBin(process.argv))
256
307
  description: 'Disable comparison benchmarks (run only Telnyx import)',
257
308
  default: false
258
309
  })
310
+ .option('audio-url', {
311
+ type: 'string',
312
+ description: 'URL to audio file to play as input during entire benchmark run',
313
+ default: null
314
+ })
315
+ .option('audio-volume', {
316
+ type: 'number',
317
+ description: 'Volume level for audio input (0.0 to 1.0)',
318
+ default: 1.0
319
+ })
259
320
  .help()
260
321
  .argv;
261
322
 
@@ -333,11 +394,13 @@ async function runBenchmark({ applications, scenarios, repeat, concurrency, argv
333
394
  assetsServerUrl: argv.assetsServer,
334
395
  reportGenerator: reportGenerator,
335
396
  record: argv.record,
336
- debug: argv.debug
397
+ debug: argv.debug,
398
+ audioUrl: argv.audioUrl,
399
+ audioVolume: argv.audioVolume
337
400
  });
338
401
 
339
402
  try {
340
- await tester.runScenario(targetUrl, app.steps, scenario.steps, app.name, scenario.name, repetition, scenario.background);
403
+ await tester.runScenario(targetUrl, app.steps, scenario.steps, app.name, scenario.name, repetition);
341
404
  console.log(`✅ Completed successfully (Run ${runNumber}/${totalRuns})`);
342
405
  return { success: true };
343
406
  } catch (error) {
@@ -445,8 +508,8 @@ async function main() {
445
508
  // Parse URL parameters for template substitution
446
509
  const params = parseParams(argv.params);
447
510
 
448
- // Determine if we should run comparison benchmark
449
- const shouldCompare = argv.provider && argv.compare && !argv.noCompare;
511
+ // Determine if we should run comparison benchmark (may be updated later if public key is missing)
512
+ let shouldCompare = argv.provider && argv.compare && !argv.noCompare;
450
513
 
451
514
  // Store credentials for potential comparison run
452
515
  let telnyxApiKey = argv.apiKey;
@@ -481,6 +544,29 @@ async function main() {
481
544
  }
482
545
  }
483
546
 
547
+ // Require provider-specific params when comparison mode is enabled
548
+ if (shouldCompare) {
549
+ const missingParams = getCompareRequiredParams(argv);
550
+ if (missingParams.length > 0) {
551
+ for (const param of missingParams) {
552
+ console.log(`\n🔑 ${param.description} is required for comparison mode`);
553
+ const inputVal = await promptUserInput(`Enter ${param.description} (or press Enter to skip comparison): `);
554
+ if (inputVal) {
555
+ argv[param.key] = inputVal;
556
+ } else {
557
+ console.warn(`⚠️ Missing ${param.flag}. Disabling comparison mode (--no-compare).`);
558
+ console.warn(` To run comparison benchmarks, pass ${param.flag} <value>\n`);
559
+ argv.compare = false;
560
+ argv.noCompare = true;
561
+ break;
562
+ }
563
+ }
564
+ }
565
+ }
566
+
567
+ // Re-evaluate shouldCompare after potential public key prompt
568
+ shouldCompare = argv.provider && argv.compare && !argv.noCompare;
569
+
484
570
  const importResult = await importAssistantsFromProvider({
485
571
  provider: argv.provider,
486
572
  providerApiKey: providerApiKey,
@@ -572,7 +658,7 @@ async function main() {
572
658
 
573
659
  // Phase 1: Provider Direct Benchmark
574
660
  // Load provider-specific application config with provider assistant ID
575
- const providerParams = { ...params, assistantId: providerImportId };
661
+ const providerParams = { ...params, assistantId: providerImportId, ...getCompareTemplateParams(argv) };
576
662
  const providerAppPath = path.resolve(__packageDir, 'applications', `${argv.provider}.yaml`);
577
663
 
578
664
  if (!fs.existsSync(providerAppPath)) {
@@ -200,7 +200,7 @@ async function configureImportedAssistant({ assistantId, assistantName, telnyxAp
200
200
  },
201
201
  body: JSON.stringify({
202
202
  name: newName,
203
- model: 'Qwen/Qwen3-235B-A22',
203
+ model: 'Qwen/Qwen3-235B-A22B',
204
204
  telephony_settings: {
205
205
  supports_unauthenticated_web_calls: true
206
206
  },
@@ -24,6 +24,8 @@ export class VoiceAgentTester {
24
24
  this.record = options.record || false;
25
25
  this.recordingStream = null;
26
26
  this.recordingFile = null;
27
+ this.audioUrl = options.audioUrl || null;
28
+ this.audioVolume = options.audioVolume || 1.0;
27
29
  }
28
30
 
29
31
  sleep(time) {
@@ -951,8 +953,6 @@ export class VoiceAgentTester {
951
953
  return screenshotPath;
952
954
  }
953
955
 
954
-
955
-
956
956
  async saveAudioAsWAV(base64Audio, audioMetadata) {
957
957
  try {
958
958
  // Convert base64 to buffer
@@ -978,7 +978,43 @@ export class VoiceAgentTester {
978
978
  }
979
979
  }
980
980
 
981
- async runScenario(url, appSteps, scenarioSteps, appName = '', scenarioName = '', repetition = 1, backgroundFile = null) {
981
+ async startAudioFromUrl(audioUrl, volume = 1.0) {
982
+ console.log(`🔊 Starting audio from URL: ${audioUrl} (volume: ${volume})`);
983
+
984
+ try {
985
+ await this.page.evaluate(async (url, vol) => {
986
+ // Wait for media stream to be ready
987
+ if (typeof window.__waitForMediaStream === 'function') {
988
+ await window.__waitForMediaStream();
989
+ }
990
+
991
+ if (typeof window.__startAudioFromUrl === 'function') {
992
+ await window.__startAudioFromUrl(url, vol);
993
+ } else {
994
+ throw new Error('__startAudioFromUrl not available in browser context');
995
+ }
996
+ }, audioUrl, volume);
997
+
998
+ console.log(`🔊 Audio from URL started successfully`);
999
+ } catch (error) {
1000
+ console.warn(`⚠️ Failed to start audio from URL: ${error.message}`);
1001
+ }
1002
+ }
1003
+
1004
+ async stopAudioFromUrl() {
1005
+ try {
1006
+ await this.page.evaluate(() => {
1007
+ if (typeof window.__stopAudioFromUrl === 'function') {
1008
+ window.__stopAudioFromUrl();
1009
+ }
1010
+ });
1011
+ console.log(`🔊 Audio from URL stopped`);
1012
+ } catch (error) {
1013
+ // Ignore errors when stopping (page might be closed)
1014
+ }
1015
+ }
1016
+
1017
+ async runScenario(url, appSteps, scenarioSteps, appName = '', scenarioName = '', repetition = 1) {
982
1018
  let success = true;
983
1019
  try {
984
1020
  // Start tracking this run with app and scenario names
@@ -1004,6 +1040,11 @@ export class VoiceAgentTester {
1004
1040
  // Start recording if enabled
1005
1041
  await this.startRecording(appName, scenarioName, repetition);
1006
1042
 
1043
+ // Start audio from URL if specified via CLI
1044
+ if (this.audioUrl) {
1045
+ await this.startAudioFromUrl(this.audioUrl, this.audioVolume);
1046
+ }
1047
+
1007
1048
  // Execute all configured steps
1008
1049
  for (let i = 0; i < steps.length; i++) {
1009
1050
  const step = steps[i];
@@ -1022,6 +1063,15 @@ export class VoiceAgentTester {
1022
1063
  console.error(`Error during scenario execution: ${shortMessage}`);
1023
1064
  throw error;
1024
1065
  } finally {
1066
+ // Stop audio from URL if it was started
1067
+ if (this.audioUrl && this.page) {
1068
+ try {
1069
+ await this.stopAudioFromUrl();
1070
+ } catch (e) {
1071
+ // Page might already be closed
1072
+ }
1073
+ }
1074
+
1025
1075
  // Always finish the run for report generation, even if there was an error
1026
1076
  if (this.reportGenerator) {
1027
1077
  this.reportGenerator.endRun(appName, scenarioName, repetition, success);
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,10 +0,0 @@
1
- url: "https://vapi.ai/?demo=true&shareKey={{shareKey}}&assistantId={{assistantId}}"
2
- steps:
3
- - action: wait_for_element
4
- selector: "button[aria-label='Talk to Vapi']"
5
- - action: sleep
6
- time: 3000
7
- - action: click
8
- selector: "button[aria-label='Talk to Vapi']"
9
- - action: sleep
10
- time: 2000