@telnyx/ai-agent-lib 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -5
- package/dist/audio-stream-monitor.d.ts +12 -1
- package/dist/client.d.ts +5 -1
- package/dist/index.js +1058 -1047
- package/dist/react/client-context.d.ts +1 -1
- package/dist/types.d.ts +31 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -149,6 +149,7 @@ function VoiceChat() {
|
|
|
149
149
|
| `versionId` | `string` | ❌ | `"main"` | Agent version to use |
|
|
150
150
|
| `environment` | `"production" \| "development"` | ❌ | `"production"` | Telnyx environment |
|
|
151
151
|
| `debug` | `boolean` | ❌ | `false` | Enable debug logging |
|
|
152
|
+
| `vad` | `VADOptions` | ❌ | See below | Voice Activity Detection configuration |
|
|
152
153
|
|
|
153
154
|
### Debug Logging
|
|
154
155
|
|
|
@@ -339,14 +340,70 @@ agent.on('conversation.update', (notification) => {
|
|
|
339
340
|
The library automatically measures round-trip latency using client-side Voice Activity Detection (VAD). This provides accurate timing from when the user stops speaking until the agent's response audio begins.
|
|
340
341
|
|
|
341
342
|
**How it works:**
|
|
342
|
-
1. **Local VAD (User's microphone)**: Monitors the user's audio stream. After detecting
|
|
343
|
+
1. **Local VAD (User's microphone)**: Monitors the user's audio stream. After detecting silence following speech, the library records `thinkingStartedAt` timestamp and transitions to "thinking" state.
|
|
343
344
|
2. **Remote VAD (Agent's audio)**: Monitors the agent's audio stream. When audio volume crosses the threshold, the library calculates `userPerceivedLatencyMs` as the time elapsed since user went silent and transitions to "speaking" state.
|
|
344
345
|
3. **Greeting Latency**: For the first agent speech (greeting), the library calculates `greetingLatencyMs` from when the audio stream monitoring started.
|
|
345
346
|
|
|
346
|
-
**Configuration
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
347
|
+
**VAD Configuration:**
|
|
348
|
+
|
|
349
|
+
The VAD behavior can be customized using the `vad` option:
|
|
350
|
+
|
|
351
|
+
```typescript
|
|
352
|
+
// React
|
|
353
|
+
<TelnyxAIAgentProvider
|
|
354
|
+
agentId="your-agent-id"
|
|
355
|
+
vad={{
|
|
356
|
+
volumeThreshold: 10, // 0-255, audio level to detect speech
|
|
357
|
+
silenceDurationMs: 500, // ms of silence before "thinking" state
|
|
358
|
+
minSpeechDurationMs: 100, // min ms of speech to count as real (filters noise)
|
|
359
|
+
maxLatencyMs: 15000, // ignore latency above this (optional, filters stale)
|
|
360
|
+
}}
|
|
361
|
+
>
|
|
362
|
+
|
|
363
|
+
// Direct usage
|
|
364
|
+
const agent = new TelnyxAIAgent({
|
|
365
|
+
agentId: 'your-agent-id',
|
|
366
|
+
vad: {
|
|
367
|
+
volumeThreshold: 10,
|
|
368
|
+
silenceDurationMs: 500,
|
|
369
|
+
minSpeechDurationMs: 100,
|
|
370
|
+
maxLatencyMs: 15000,
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
**VAD Options:**
|
|
376
|
+
|
|
377
|
+
| Option | Type | Default | Description |
|
|
378
|
+
|--------|------|---------|-------------|
|
|
379
|
+
| `volumeThreshold` | `number` | `10` | Audio level (0-255) to detect speech |
|
|
380
|
+
| `silenceDurationMs` | `number` | `500` | Silence duration before triggering "thinking" state |
|
|
381
|
+
| `minSpeechDurationMs` | `number` | `100` | Minimum speech duration to count as real user speech (filters brief noise) |
|
|
382
|
+
| `maxLatencyMs` | `number` | `undefined` | Maximum latency to report (values above are ignored as stale) |
|
|
383
|
+
|
|
384
|
+
**Tuning for different scenarios:**
|
|
385
|
+
|
|
386
|
+
```typescript
|
|
387
|
+
// Fast-paced conversation (aggressive turn detection)
|
|
388
|
+
vad: {
|
|
389
|
+
silenceDurationMs: 500,
|
|
390
|
+
minSpeechDurationMs: 80,
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Thoughtful conversation (tolerant of pauses)
|
|
394
|
+
vad: {
|
|
395
|
+
silenceDurationMs: 1500,
|
|
396
|
+
minSpeechDurationMs: 150,
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Noisy environment
|
|
400
|
+
vad: {
|
|
401
|
+
volumeThreshold: 20,
|
|
402
|
+
minSpeechDurationMs: 200,
|
|
403
|
+
}
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
**Note:** Silence-based VAD has inherent tradeoffs. Lower `silenceDurationMs` values detect turn-endings faster but may cut off natural pauses ("I need to... think about that"). Higher values are more tolerant but add latency. For production use cases requiring precise turn detection, consider integrating server-side semantic endpointing.
|
|
350
407
|
|
|
351
408
|
```tsx
|
|
352
409
|
const agentState = useAgentState();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { VADOptions } from "./types";
|
|
1
2
|
export declare class AudioStreamMonitor {
|
|
2
3
|
private remoteIntervalId;
|
|
3
4
|
private localIntervalId;
|
|
@@ -13,10 +14,15 @@ export declare class AudioStreamMonitor {
|
|
|
13
14
|
private lastState;
|
|
14
15
|
private userIsSpeaking;
|
|
15
16
|
private lastUserAudioTime;
|
|
17
|
+
private userSpeechStartTime;
|
|
16
18
|
private userSilenceStartTime;
|
|
17
19
|
private isFirstAgentSpeech;
|
|
18
20
|
private monitorStartTime;
|
|
19
|
-
|
|
21
|
+
private volumeThreshold;
|
|
22
|
+
private silenceDurationMs;
|
|
23
|
+
private minSpeechDurationMs;
|
|
24
|
+
private maxLatencyMs;
|
|
25
|
+
constructor(options?: VADOptions);
|
|
20
26
|
private updateAgentState;
|
|
21
27
|
/**
|
|
22
28
|
* Set the remote audio stream (agent's voice) to monitor for speech detection
|
|
@@ -29,6 +35,11 @@ export declare class AudioStreamMonitor {
|
|
|
29
35
|
setMonitoredAudioStream(stream: MediaStream): void;
|
|
30
36
|
private stopRemoteMonitor;
|
|
31
37
|
private stopLocalMonitor;
|
|
38
|
+
/**
|
|
39
|
+
* Reset all latency tracking state for a fresh call.
|
|
40
|
+
* Should be called when a call ends to prevent stale data affecting the next call.
|
|
41
|
+
*/
|
|
42
|
+
private resetLatencyState;
|
|
32
43
|
stopAudioStreamMonitor(): void;
|
|
33
44
|
/**
|
|
34
45
|
* Monitor remote stream (agent's audio) for speech detection
|
package/dist/client.d.ts
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import { Call } from "@telnyx/webrtc";
|
|
2
2
|
import EventEmitter from "eventemitter3";
|
|
3
|
-
import type { AIAgentEvents, TranscriptItem } from "./types";
|
|
3
|
+
import type { AIAgentEvents, TranscriptItem, VADOptions } from "./types";
|
|
4
4
|
export type TelnyxAIAgentConstructorParams = {
|
|
5
5
|
agentId: string;
|
|
6
6
|
versionId?: string;
|
|
7
7
|
environment?: "production" | "development";
|
|
8
8
|
debug?: boolean;
|
|
9
9
|
trickleIce?: boolean;
|
|
10
|
+
/**
|
|
11
|
+
* Voice Activity Detection options for controlling speech detection and latency measurement.
|
|
12
|
+
*/
|
|
13
|
+
vad?: VADOptions;
|
|
10
14
|
};
|
|
11
15
|
export declare class TelnyxAIAgent extends EventEmitter<AIAgentEvents> {
|
|
12
16
|
private telnyxRTC;
|