@telnyx/ai-agent-lib 0.1.10 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +307 -1
- package/dist/audio-stream-monitor.d.ts +36 -6
- package/dist/client.d.ts +8 -0
- package/dist/index.js +1945 -1689
- package/dist/logger.d.ts +3 -0
- package/dist/react/use-agent-state.d.ts +3 -3
- package/dist/types.d.ts +8 -1
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -11,6 +11,7 @@ A TypeScript/React library for building AI-powered voice conversation applicatio
|
|
|
11
11
|
- 🔄 **State Management** - Automatic state synchronization using Jotai
|
|
12
12
|
- 📱 **Connection Management** - Robust connection handling with error recovery
|
|
13
13
|
- 🎚️ **Agent State Tracking** - Monitor agent states (listening, speaking, thinking)
|
|
14
|
+
- ⏱️ **Latency Measurement** - Automatic round-trip latency tracking using client-side Voice Activity Detection (VAD)
|
|
14
15
|
|
|
15
16
|
## Installation
|
|
16
17
|
|
|
@@ -149,6 +150,22 @@ function VoiceChat() {
|
|
|
149
150
|
| `environment` | `"production" \| "development"` | ❌ | `"production"` | Telnyx environment |
|
|
150
151
|
| `debug` | `boolean` | ❌ | `false` | Enable debug logging |
|
|
151
152
|
|
|
153
|
+
### Debug Logging
|
|
154
|
+
|
|
155
|
+
When `debug: true` is set, the library outputs detailed logs to the console using a timestamped format. This is useful for troubleshooting connection issues, audio stream problems, or understanding the internal state transitions.
|
|
156
|
+
|
|
157
|
+
```tsx
|
|
158
|
+
<TelnyxAIAgentProvider agentId="your-agent-id" debug={true}>
|
|
159
|
+
<App />
|
|
160
|
+
</TelnyxAIAgentProvider>
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Debug logs include:
|
|
164
|
+
- Audio stream monitoring events (local and remote)
|
|
165
|
+
- Agent state transitions with timing information
|
|
166
|
+
- AudioContext state changes
|
|
167
|
+
- Volume threshold detection
|
|
168
|
+
|
|
152
169
|
### Hooks
|
|
153
170
|
|
|
154
171
|
#### `useClient()`
|
|
@@ -160,6 +177,7 @@ Returns the `TelnyxAIAgent` instance for direct API access.
|
|
|
160
177
|
- `startConversation(options?)` - Start a new conversation with optional caller metadata and headers
|
|
161
178
|
- `endConversation()` - End the current conversation
|
|
162
179
|
- `sendConversationMessage(message: string)` - Send a text message during an active conversation
|
|
180
|
+
- `setRemoteStream(stream: MediaStream)` - Manually set the remote audio stream for monitoring (useful when `call.remoteStream` is not available)
|
|
163
181
|
- `transcript` - Get current transcript array
|
|
164
182
|
|
|
165
183
|
**`startConversation` Options:**
|
|
@@ -316,6 +334,33 @@ agent.on('conversation.update', (notification) => {
|
|
|
316
334
|
- The agent will receive and process text messages just like spoken input
|
|
317
335
|
- Text messages may appear in the transcript depending on the agent configuration
|
|
318
336
|
|
|
337
|
+
### Latency Measurement
|
|
338
|
+
|
|
339
|
+
The library automatically measures round-trip latency using client-side Voice Activity Detection (VAD). This provides accurate timing from when the user stops speaking until the agent's response audio begins.
|
|
340
|
+
|
|
341
|
+
**How it works:**
|
|
342
|
+
1. **Local VAD (User's microphone)**: Monitors the user's audio stream. After detecting 1 second of silence following speech, the library records `thinkingStartedAt` timestamp and transitions to "thinking" state.
|
|
343
|
+
2. **Remote VAD (Agent's audio)**: Monitors the agent's audio stream. When audio volume crosses the threshold, the library calculates `latencyMs` as the time elapsed since `thinkingStartedAt` and transitions to "speaking" state.
|
|
344
|
+
|
|
345
|
+
**Configuration constants:**
|
|
346
|
+
- Volume threshold: 10 (frequency data average)
|
|
347
|
+
- Silence duration: 1000ms (time of silence before triggering "thinking" state)
|
|
348
|
+
- Check interval: 20ms (polling frequency for local audio)
|
|
349
|
+
|
|
350
|
+
```tsx
|
|
351
|
+
const agentState = useAgentState();
|
|
352
|
+
|
|
353
|
+
// Access latency when agent starts speaking
|
|
354
|
+
useEffect(() => {
|
|
355
|
+
if (agentState.latencyMs !== undefined) {
|
|
356
|
+
console.log(`Response latency: ${agentState.latencyMs}ms`);
|
|
357
|
+
}
|
|
358
|
+
if (agentState.thinkingStartedAt) {
|
|
359
|
+
console.log(`Started thinking at: ${agentState.thinkingStartedAt}`);
|
|
360
|
+
}
|
|
361
|
+
}, [agentState]);
|
|
362
|
+
```
|
|
363
|
+
|
|
319
364
|
### Custom Audio Handling
|
|
320
365
|
|
|
321
366
|
The library automatically handles audio stream monitoring and agent state detection based on audio levels. The audio stream is available through the conversation object:
|
|
@@ -325,6 +370,28 @@ const conversation = useConversation();
|
|
|
325
370
|
const audioStream = conversation?.call?.remoteStream;
|
|
326
371
|
```
|
|
327
372
|
|
|
373
|
+
If `call.remoteStream` is not available, you can manually provide the stream using `setRemoteStream`:
|
|
374
|
+
|
|
375
|
+
```tsx
|
|
376
|
+
const client = useClient();
|
|
377
|
+
const conversation = useConversation();
|
|
378
|
+
|
|
379
|
+
useEffect(() => {
|
|
380
|
+
const call = conversation?.call;
|
|
381
|
+
if (call?.state === 'active') {
|
|
382
|
+
// Get stream from peer connection if remoteStream is not available
|
|
383
|
+
const peerConnection = call.peer?.instance;
|
|
384
|
+
const receivers = peerConnection?.getReceivers?.();
|
|
385
|
+
const audioReceiver = receivers?.find(r => r.track?.kind === 'audio');
|
|
386
|
+
|
|
387
|
+
if (audioReceiver?.track) {
|
|
388
|
+
const stream = new MediaStream([audioReceiver.track]);
|
|
389
|
+
client.setRemoteStream(stream);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}, [conversation, client]);
|
|
393
|
+
```
|
|
394
|
+
|
|
328
395
|
### Error Handling
|
|
329
396
|
|
|
330
397
|
```tsx
|
|
@@ -348,6 +415,244 @@ useEffect(() => {
|
|
|
348
415
|
|
|
349
416
|
The library uses Jotai for state management, which automatically handles state updates across components. All state is ephemeral and resets when the provider unmounts.
|
|
350
417
|
|
|
418
|
+
## Events Reference
|
|
419
|
+
|
|
420
|
+
The `TelnyxAIAgent` class extends `EventEmitter` and provides a comprehensive set of events for monitoring connection status, conversation state, and agent behavior. Events can be subscribed to using `on()`, `once()`, or `addListener()` and unsubscribed using `off()` or `removeListener()`.
|
|
421
|
+
|
|
422
|
+
### Event Types
|
|
423
|
+
|
|
424
|
+
| Event | Payload | Description |
|
|
425
|
+
|-------|---------|-------------|
|
|
426
|
+
| `agent.connected` | - | Emitted when successfully connected to the Telnyx platform |
|
|
427
|
+
| `agent.disconnected` | - | Emitted when disconnected from the Telnyx platform |
|
|
428
|
+
| `agent.error` | `Error` | Emitted when any operational error occurs |
|
|
429
|
+
| `transcript.item` | `TranscriptItem` | Emitted when a new transcript item is received |
|
|
430
|
+
| `conversation.update` | `INotification` | Emitted when conversation state changes |
|
|
431
|
+
| `conversation.agent.state` | `AgentStateData` | Emitted when agent state changes (listening/speaking/thinking) |
|
|
432
|
+
| `agent.audio.mute` | `boolean` | Emitted when agent audio is muted or unmuted |
|
|
433
|
+
|
|
434
|
+
### Data Types
|
|
435
|
+
|
|
436
|
+
```typescript
|
|
437
|
+
// Transcript item representing a message in the conversation
|
|
438
|
+
type TranscriptItem = {
|
|
439
|
+
id: string;
|
|
440
|
+
role: "user" | "assistant";
|
|
441
|
+
content: string;
|
|
442
|
+
timestamp: Date;
|
|
443
|
+
attachments?: Array<{ type: "image"; url: string }>;
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
// Agent state with optional latency information
|
|
447
|
+
type AgentStateData = {
|
|
448
|
+
state: "speaking" | "listening" | "thinking";
|
|
449
|
+
// Round-trip latency in ms from when user stopped speaking until agent response began.
|
|
450
|
+
// Only present when state is "speaking"
|
|
451
|
+
latencyMs?: number;
|
|
452
|
+
// UTC timestamp (ISO 8601) when user stopped speaking and thinking state began.
|
|
453
|
+
// Only present when state is "thinking"
|
|
454
|
+
thinkingStartedAt?: string;
|
|
455
|
+
};
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
### Usage Examples
|
|
459
|
+
|
|
460
|
+
#### Connection Events
|
|
461
|
+
|
|
462
|
+
Monitor connection lifecycle to handle reconnection logic or update UI:
|
|
463
|
+
|
|
464
|
+
```typescript
|
|
465
|
+
const agent = new TelnyxAIAgent({ agentId: 'your-agent-id' });
|
|
466
|
+
|
|
467
|
+
agent.on('agent.connected', () => {
|
|
468
|
+
console.log('Connected to Telnyx platform');
|
|
469
|
+
// Enable UI controls, start conversation, etc.
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
agent.on('agent.disconnected', () => {
|
|
473
|
+
console.log('Disconnected from Telnyx platform');
|
|
474
|
+
// Disable UI controls, show reconnection prompt, etc.
|
|
475
|
+
});
|
|
476
|
+
|
|
477
|
+
agent.on('agent.error', (error) => {
|
|
478
|
+
console.error('Agent error:', error.message);
|
|
479
|
+
// Handle error: show notification, attempt reconnection, etc.
|
|
480
|
+
});
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
#### Agent State Events
|
|
484
|
+
|
|
485
|
+
Track whether the agent is listening, thinking, or speaking. This is useful for visual feedback like animated indicators. The library uses client-side Voice Activity Detection (VAD) to detect when the user stops speaking (after 1 second of silence) and when the agent starts responding, providing accurate round-trip latency measurements.
|
|
486
|
+
|
|
487
|
+
```typescript
|
|
488
|
+
agent.on('conversation.agent.state', (data) => {
|
|
489
|
+
console.log(`Agent state: ${data.state}`);
|
|
490
|
+
|
|
491
|
+
switch (data.state) {
|
|
492
|
+
case 'listening':
|
|
493
|
+
// Show listening indicator (e.g., pulsing microphone)
|
|
494
|
+
break;
|
|
495
|
+
case 'thinking':
|
|
496
|
+
// Show thinking indicator (e.g., loading spinner)
|
|
497
|
+
// thinkingStartedAt contains the UTC timestamp when user stopped speaking
|
|
498
|
+
if (data.thinkingStartedAt) {
|
|
499
|
+
console.log(`Thinking started at: ${data.thinkingStartedAt}`);
|
|
500
|
+
}
|
|
501
|
+
break;
|
|
502
|
+
case 'speaking':
|
|
503
|
+
// Show speaking indicator (e.g., animated waveform)
|
|
504
|
+
if (data.latencyMs !== undefined) {
|
|
505
|
+
console.log(`Response latency: ${data.latencyMs}ms`);
|
|
506
|
+
// Track latency for analytics or display to user
|
|
507
|
+
}
|
|
508
|
+
break;
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
#### Transcript Events
|
|
514
|
+
|
|
515
|
+
Build a real-time chat interface by listening to transcript updates:
|
|
516
|
+
|
|
517
|
+
```typescript
|
|
518
|
+
const conversationHistory: TranscriptItem[] = [];
|
|
519
|
+
|
|
520
|
+
agent.on('transcript.item', (item) => {
|
|
521
|
+
conversationHistory.push(item);
|
|
522
|
+
|
|
523
|
+
// Display the new message
|
|
524
|
+
console.log(`[${item.timestamp.toLocaleTimeString()}] ${item.role}: ${item.content}`);
|
|
525
|
+
|
|
526
|
+
// Handle attachments if present
|
|
527
|
+
if (item.attachments?.length) {
|
|
528
|
+
item.attachments.forEach((attachment) => {
|
|
529
|
+
if (attachment.type === 'image') {
|
|
530
|
+
console.log(`Image attachment: ${attachment.url}`);
|
|
531
|
+
}
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
});
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
#### Conversation Update Events
|
|
538
|
+
|
|
539
|
+
Monitor call state changes to know when to enable/disable features:
|
|
540
|
+
|
|
541
|
+
```typescript
|
|
542
|
+
agent.on('conversation.update', (notification) => {
|
|
543
|
+
const call = notification.call;
|
|
544
|
+
|
|
545
|
+
if (!call) return;
|
|
546
|
+
|
|
547
|
+
console.log(`Call state: ${call.state}`);
|
|
548
|
+
|
|
549
|
+
switch (call.state) {
|
|
550
|
+
case 'new':
|
|
551
|
+
console.log('Call initiated');
|
|
552
|
+
break;
|
|
553
|
+
case 'trying':
|
|
554
|
+
console.log('Connecting...');
|
|
555
|
+
break;
|
|
556
|
+
case 'ringing':
|
|
557
|
+
console.log('Ringing...');
|
|
558
|
+
break;
|
|
559
|
+
case 'active':
|
|
560
|
+
console.log('Call is active - voice and text messaging enabled');
|
|
561
|
+
// Enable send message button, show active call UI
|
|
562
|
+
break;
|
|
563
|
+
case 'hangup':
|
|
564
|
+
case 'destroy':
|
|
565
|
+
console.log('Call ended');
|
|
566
|
+
// Clean up UI, show call summary
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
});
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
#### React Hook Pattern
|
|
573
|
+
|
|
574
|
+
In React applications, use the hooks with `useEffect` to manage event subscriptions:
|
|
575
|
+
|
|
576
|
+
```tsx
|
|
577
|
+
import { useClient, useAgentState, useConnectionState } from '@telnyx/ai-agent-lib';
|
|
578
|
+
|
|
579
|
+
function ConversationMonitor() {
|
|
580
|
+
const client = useClient();
|
|
581
|
+
const agentState = useAgentState();
|
|
582
|
+
const connectionState = useConnectionState();
|
|
583
|
+
const [latencyHistory, setLatencyHistory] = useState<number[]>([]);
|
|
584
|
+
|
|
585
|
+
// Subscribe to events for additional handling beyond the built-in hooks
|
|
586
|
+
useEffect(() => {
|
|
587
|
+
const handleAgentState = (data: AgentStateData) => {
|
|
588
|
+
if (data.latencyMs !== undefined) {
|
|
589
|
+
setLatencyHistory(prev => [...prev, data.latencyMs!]);
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
const handleError = (error: Error) => {
|
|
594
|
+
// Custom error handling (e.g., send to error tracking service)
|
|
595
|
+
console.error('Agent error:', error);
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
client.on('conversation.agent.state', handleAgentState);
|
|
599
|
+
client.on('agent.error', handleError);
|
|
600
|
+
|
|
601
|
+
return () => {
|
|
602
|
+
client.off('conversation.agent.state', handleAgentState);
|
|
603
|
+
client.off('agent.error', handleError);
|
|
604
|
+
};
|
|
605
|
+
}, [client]);
|
|
606
|
+
|
|
607
|
+
const averageLatency = latencyHistory.length > 0
|
|
608
|
+
? Math.round(latencyHistory.reduce((a, b) => a + b, 0) / latencyHistory.length)
|
|
609
|
+
: null;
|
|
610
|
+
|
|
611
|
+
return (
|
|
612
|
+
<div>
|
|
613
|
+
<p>Connection: {connectionState}</p>
|
|
614
|
+
<p>Agent: {agentState.state}</p>
|
|
615
|
+
{averageLatency && <p>Avg Response Time: {averageLatency}ms</p>}
|
|
616
|
+
</div>
|
|
617
|
+
);
|
|
618
|
+
}
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
#### One-Time Event Listeners
|
|
622
|
+
|
|
623
|
+
Use `once()` for events you only need to handle one time:
|
|
624
|
+
|
|
625
|
+
```typescript
|
|
626
|
+
// Wait for connection before starting conversation
|
|
627
|
+
agent.once('agent.connected', () => {
|
|
628
|
+
agent.startConversation({ callerName: 'User' });
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
await agent.connect();
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
#### Removing Event Listeners
|
|
635
|
+
|
|
636
|
+
Clean up listeners when no longer needed:
|
|
637
|
+
|
|
638
|
+
```typescript
|
|
639
|
+
const handleTranscript = (item: TranscriptItem) => {
|
|
640
|
+
console.log(`${item.role}: ${item.content}`);
|
|
641
|
+
};
|
|
642
|
+
|
|
643
|
+
// Add listener
|
|
644
|
+
agent.on('transcript.item', handleTranscript);
|
|
645
|
+
|
|
646
|
+
// Remove specific listener
|
|
647
|
+
agent.off('transcript.item', handleTranscript);
|
|
648
|
+
|
|
649
|
+
// Remove all listeners for an event
|
|
650
|
+
agent.removeAllListeners('transcript.item');
|
|
651
|
+
|
|
652
|
+
// Remove all listeners for all events
|
|
653
|
+
agent.removeAllListeners();
|
|
654
|
+
```
|
|
655
|
+
|
|
351
656
|
## TypeScript Support
|
|
352
657
|
|
|
353
658
|
This library is built with TypeScript and provides full type definitions. All hooks and components are fully typed for the best development experience.
|
|
@@ -361,6 +666,7 @@ Check out the [example](https://github.com/team-telnyx/telnyx-ai-agent-lib-examp
|
|
|
361
666
|
- `@telnyx/webrtc` - Telnyx WebRTC SDK
|
|
362
667
|
- `eventemitter3` - Event handling
|
|
363
668
|
- `jotai` - State management
|
|
669
|
+
- `loglevel` - Lightweight logging with level control
|
|
364
670
|
|
|
365
671
|
## License
|
|
366
672
|
|
|
@@ -372,4 +678,4 @@ For support, please contact Telnyx support or check the [Telnyx documentation](h
|
|
|
372
678
|
|
|
373
679
|
## Contributing
|
|
374
680
|
|
|
375
|
-
This library is maintained by Telnyx. For bug reports or feature requests, please contact Telnyx support.
|
|
681
|
+
This library is maintained by Telnyx. For bug reports or feature requests, please contact Telnyx support.
|
|
@@ -1,11 +1,41 @@
|
|
|
1
1
|
export declare class AudioStreamMonitor {
|
|
2
|
-
private
|
|
3
|
-
private
|
|
4
|
-
private
|
|
5
|
-
private
|
|
6
|
-
private
|
|
2
|
+
private remoteIntervalId;
|
|
3
|
+
private localIntervalId;
|
|
4
|
+
private remoteStream;
|
|
5
|
+
private localStream;
|
|
6
|
+
private remoteAudioContext;
|
|
7
|
+
private remoteSource;
|
|
8
|
+
private remoteAnalyser;
|
|
9
|
+
private localAudioContext;
|
|
10
|
+
private localSource;
|
|
11
|
+
private localAnalyser;
|
|
12
|
+
private thinkingStartTime;
|
|
13
|
+
private lastState;
|
|
14
|
+
private userIsSpeaking;
|
|
15
|
+
private lastUserAudioTime;
|
|
16
|
+
constructor();
|
|
7
17
|
private updateAgentState;
|
|
18
|
+
/**
|
|
19
|
+
* Set the remote audio stream (agent's voice) to monitor for speech detection
|
|
20
|
+
*/
|
|
21
|
+
setRemoteStream(stream: MediaStream): void;
|
|
22
|
+
/**
|
|
23
|
+
* Set the local audio stream (user's microphone) to monitor for VAD
|
|
24
|
+
*/
|
|
25
|
+
setLocalStream(stream: MediaStream): void;
|
|
8
26
|
setMonitoredAudioStream(stream: MediaStream): void;
|
|
27
|
+
private stopRemoteMonitor;
|
|
28
|
+
private stopLocalMonitor;
|
|
9
29
|
stopAudioStreamMonitor(): void;
|
|
10
|
-
|
|
30
|
+
/**
|
|
31
|
+
* Monitor remote stream (agent's audio) for speech detection
|
|
32
|
+
* Detects when agent starts speaking to calculate latency
|
|
33
|
+
*/
|
|
34
|
+
private startRemoteMonitor;
|
|
35
|
+
/**
|
|
36
|
+
* Monitor local stream (user's microphone) for VAD
|
|
37
|
+
* Detects when user stops speaking (1000ms of silence) to start latency measurement
|
|
38
|
+
*/
|
|
39
|
+
private startLocalMonitor;
|
|
40
|
+
destroy(): void;
|
|
11
41
|
}
|
package/dist/client.d.ts
CHANGED
|
@@ -89,6 +89,14 @@ export declare class TelnyxAIAgent extends EventEmitter<AIAgentEvents> {
|
|
|
89
89
|
* @returns Promise that resolves when the call is hung up, or undefined if there is no active call
|
|
90
90
|
*/
|
|
91
91
|
endConversation(): void | undefined;
|
|
92
|
+
/**
|
|
93
|
+
* Sets the remote audio stream for monitoring agent speech.
|
|
94
|
+
* Use this when call.remoteStream is not available and you need to provide
|
|
95
|
+
* the stream from the peer connection receiver.
|
|
96
|
+
*
|
|
97
|
+
* @param stream - The MediaStream containing the remote (agent) audio
|
|
98
|
+
*/
|
|
99
|
+
setRemoteStream(stream: MediaStream): void;
|
|
92
100
|
private onClientReady;
|
|
93
101
|
private onClientOrSocketError;
|
|
94
102
|
private onNotification;
|