@azure/ai-voicelive 1.0.0-alpha.20251117.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +358 -0
- package/dist/browser/auth/credentialHandler.d.ts +43 -0
- package/dist/browser/auth/credentialHandler.js +147 -0
- package/dist/browser/auth/credentialHandler.js.map +1 -0
- package/dist/browser/errors/connectionErrors.d.ts +68 -0
- package/dist/browser/errors/connectionErrors.js +136 -0
- package/dist/browser/errors/connectionErrors.js.map +1 -0
- package/dist/browser/errors/index.d.ts +2 -0
- package/dist/browser/errors/index.js +4 -0
- package/dist/browser/errors/index.js.map +1 -0
- package/dist/browser/handlers/sessionHandlers.d.ts +250 -0
- package/dist/browser/handlers/sessionHandlers.js +4 -0
- package/dist/browser/handlers/sessionHandlers.js.map +1 -0
- package/dist/browser/handlers/subscriptionManager.d.ts +54 -0
- package/dist/browser/handlers/subscriptionManager.js +250 -0
- package/dist/browser/handlers/subscriptionManager.js.map +1 -0
- package/dist/browser/index.d.ts +7 -0
- package/dist/browser/index.js +12 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/logger.d.ts +2 -0
- package/dist/browser/logger.js +5 -0
- package/dist/browser/logger.js.map +1 -0
- package/dist/browser/models/index.d.ts +2 -0
- package/dist/browser/models/index.js +4 -0
- package/dist/browser/models/index.js.map +1 -0
- package/dist/browser/models/models.d.ts +2154 -0
- package/dist/browser/models/models.js +2251 -0
- package/dist/browser/models/models.js.map +1 -0
- package/dist/browser/package.json +3 -0
- package/dist/browser/protocol/messageParser.d.ts +42 -0
- package/dist/browser/protocol/messageParser.js +150 -0
- package/dist/browser/protocol/messageParser.js.map +1 -0
- package/dist/browser/voiceLiveClient.d.ts +65 -0
- package/dist/browser/voiceLiveClient.js +81 -0
- package/dist/browser/voiceLiveClient.js.map +1 -0
- package/dist/browser/voiceLiveSession.d.ts +138 -0
- package/dist/browser/voiceLiveSession.js +429 -0
- package/dist/browser/voiceLiveSession.js.map +1 -0
- package/dist/browser/websocket/connectionManager.d.ts +88 -0
- package/dist/browser/websocket/connectionManager.js +183 -0
- package/dist/browser/websocket/connectionManager.js.map +1 -0
- package/dist/browser/websocket/websocketBrowser.d.ts +26 -0
- package/dist/browser/websocket/websocketBrowser.js +175 -0
- package/dist/browser/websocket/websocketBrowser.js.map +1 -0
- package/dist/browser/websocket/websocketFactory.d.ts +23 -0
- package/dist/browser/websocket/websocketFactory.js +80 -0
- package/dist/browser/websocket/websocketFactory.js.map +1 -0
- package/dist/browser/websocket/websocketLike.d.ts +78 -0
- package/dist/browser/websocket/websocketLike.js +13 -0
- package/dist/browser/websocket/websocketLike.js.map +1 -0
- package/dist/browser/websocket/websocketNode.d.ts +26 -0
- package/dist/browser/websocket/websocketNode.js +180 -0
- package/dist/browser/websocket/websocketNode.js.map +1 -0
- package/dist/commonjs/auth/credentialHandler.d.ts +43 -0
- package/dist/commonjs/auth/credentialHandler.js +151 -0
- package/dist/commonjs/auth/credentialHandler.js.map +1 -0
- package/dist/commonjs/errors/connectionErrors.d.ts +68 -0
- package/dist/commonjs/errors/connectionErrors.js +146 -0
- package/dist/commonjs/errors/connectionErrors.js.map +1 -0
- package/dist/commonjs/errors/index.d.ts +2 -0
- package/dist/commonjs/errors/index.js +7 -0
- package/dist/commonjs/errors/index.js.map +1 -0
- package/dist/commonjs/handlers/sessionHandlers.d.ts +250 -0
- package/dist/commonjs/handlers/sessionHandlers.js +5 -0
- package/dist/commonjs/handlers/sessionHandlers.js.map +1 -0
- package/dist/commonjs/handlers/subscriptionManager.d.ts +54 -0
- package/dist/commonjs/handlers/subscriptionManager.js +255 -0
- package/dist/commonjs/handlers/subscriptionManager.js.map +1 -0
- package/dist/commonjs/index.d.ts +7 -0
- package/dist/commonjs/index.js +45 -0
- package/dist/commonjs/index.js.map +1 -0
- package/dist/commonjs/logger.d.ts +2 -0
- package/dist/commonjs/logger.js +8 -0
- package/dist/commonjs/logger.js.map +1 -0
- package/dist/commonjs/models/index.d.ts +2 -0
- package/dist/commonjs/models/index.js +27 -0
- package/dist/commonjs/models/index.js.map +1 -0
- package/dist/commonjs/models/models.d.ts +2154 -0
- package/dist/commonjs/models/models.js +2463 -0
- package/dist/commonjs/models/models.js.map +1 -0
- package/dist/commonjs/package.json +3 -0
- package/dist/commonjs/protocol/messageParser.d.ts +42 -0
- package/dist/commonjs/protocol/messageParser.js +154 -0
- package/dist/commonjs/protocol/messageParser.js.map +1 -0
- package/dist/commonjs/tsdoc-metadata.json +11 -0
- package/dist/commonjs/voiceLiveClient.d.ts +65 -0
- package/dist/commonjs/voiceLiveClient.js +85 -0
- package/dist/commonjs/voiceLiveClient.js.map +1 -0
- package/dist/commonjs/voiceLiveSession.d.ts +138 -0
- package/dist/commonjs/voiceLiveSession.js +433 -0
- package/dist/commonjs/voiceLiveSession.js.map +1 -0
- package/dist/commonjs/websocket/connectionManager.d.ts +88 -0
- package/dist/commonjs/websocket/connectionManager.js +187 -0
- package/dist/commonjs/websocket/connectionManager.js.map +1 -0
- package/dist/commonjs/websocket/websocketBrowser.d.ts +26 -0
- package/dist/commonjs/websocket/websocketBrowser.js +179 -0
- package/dist/commonjs/websocket/websocketBrowser.js.map +1 -0
- package/dist/commonjs/websocket/websocketFactory.d.ts +23 -0
- package/dist/commonjs/websocket/websocketFactory.js +86 -0
- package/dist/commonjs/websocket/websocketFactory.js.map +1 -0
- package/dist/commonjs/websocket/websocketLike.d.ts +78 -0
- package/dist/commonjs/websocket/websocketLike.js +16 -0
- package/dist/commonjs/websocket/websocketLike.js.map +1 -0
- package/dist/commonjs/websocket/websocketNode.d.ts +26 -0
- package/dist/commonjs/websocket/websocketNode.js +185 -0
- package/dist/commonjs/websocket/websocketNode.js.map +1 -0
- package/dist/esm/auth/credentialHandler.d.ts +43 -0
- package/dist/esm/auth/credentialHandler.js +147 -0
- package/dist/esm/auth/credentialHandler.js.map +1 -0
- package/dist/esm/errors/connectionErrors.d.ts +68 -0
- package/dist/esm/errors/connectionErrors.js +136 -0
- package/dist/esm/errors/connectionErrors.js.map +1 -0
- package/dist/esm/errors/index.d.ts +2 -0
- package/dist/esm/errors/index.js +4 -0
- package/dist/esm/errors/index.js.map +1 -0
- package/dist/esm/handlers/sessionHandlers.d.ts +250 -0
- package/dist/esm/handlers/sessionHandlers.js +4 -0
- package/dist/esm/handlers/sessionHandlers.js.map +1 -0
- package/dist/esm/handlers/subscriptionManager.d.ts +54 -0
- package/dist/esm/handlers/subscriptionManager.js +250 -0
- package/dist/esm/handlers/subscriptionManager.js.map +1 -0
- package/dist/esm/index.d.ts +7 -0
- package/dist/esm/index.js +12 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/logger.d.ts +2 -0
- package/dist/esm/logger.js +5 -0
- package/dist/esm/logger.js.map +1 -0
- package/dist/esm/models/index.d.ts +2 -0
- package/dist/esm/models/index.js +4 -0
- package/dist/esm/models/index.js.map +1 -0
- package/dist/esm/models/models.d.ts +2154 -0
- package/dist/esm/models/models.js +2251 -0
- package/dist/esm/models/models.js.map +1 -0
- package/dist/esm/package.json +3 -0
- package/dist/esm/protocol/messageParser.d.ts +42 -0
- package/dist/esm/protocol/messageParser.js +150 -0
- package/dist/esm/protocol/messageParser.js.map +1 -0
- package/dist/esm/voiceLiveClient.d.ts +65 -0
- package/dist/esm/voiceLiveClient.js +81 -0
- package/dist/esm/voiceLiveClient.js.map +1 -0
- package/dist/esm/voiceLiveSession.d.ts +138 -0
- package/dist/esm/voiceLiveSession.js +429 -0
- package/dist/esm/voiceLiveSession.js.map +1 -0
- package/dist/esm/websocket/connectionManager.d.ts +88 -0
- package/dist/esm/websocket/connectionManager.js +183 -0
- package/dist/esm/websocket/connectionManager.js.map +1 -0
- package/dist/esm/websocket/websocketBrowser.d.ts +26 -0
- package/dist/esm/websocket/websocketBrowser.js +175 -0
- package/dist/esm/websocket/websocketBrowser.js.map +1 -0
- package/dist/esm/websocket/websocketFactory.d.ts +23 -0
- package/dist/esm/websocket/websocketFactory.js +80 -0
- package/dist/esm/websocket/websocketFactory.js.map +1 -0
- package/dist/esm/websocket/websocketLike.d.ts +78 -0
- package/dist/esm/websocket/websocketLike.js +13 -0
- package/dist/esm/websocket/websocketLike.js.map +1 -0
- package/dist/esm/websocket/websocketNode.d.ts +26 -0
- package/dist/esm/websocket/websocketNode.js +180 -0
- package/dist/esm/websocket/websocketNode.js.map +1 -0
- package/dist/react-native/auth/credentialHandler.d.ts +43 -0
- package/dist/react-native/auth/credentialHandler.js +147 -0
- package/dist/react-native/auth/credentialHandler.js.map +1 -0
- package/dist/react-native/errors/connectionErrors.d.ts +68 -0
- package/dist/react-native/errors/connectionErrors.js +136 -0
- package/dist/react-native/errors/connectionErrors.js.map +1 -0
- package/dist/react-native/errors/index.d.ts +2 -0
- package/dist/react-native/errors/index.js +4 -0
- package/dist/react-native/errors/index.js.map +1 -0
- package/dist/react-native/handlers/sessionHandlers.d.ts +250 -0
- package/dist/react-native/handlers/sessionHandlers.js +4 -0
- package/dist/react-native/handlers/sessionHandlers.js.map +1 -0
- package/dist/react-native/handlers/subscriptionManager.d.ts +54 -0
- package/dist/react-native/handlers/subscriptionManager.js +250 -0
- package/dist/react-native/handlers/subscriptionManager.js.map +1 -0
- package/dist/react-native/index.d.ts +7 -0
- package/dist/react-native/index.js +12 -0
- package/dist/react-native/index.js.map +1 -0
- package/dist/react-native/logger.d.ts +2 -0
- package/dist/react-native/logger.js +5 -0
- package/dist/react-native/logger.js.map +1 -0
- package/dist/react-native/models/index.d.ts +2 -0
- package/dist/react-native/models/index.js +4 -0
- package/dist/react-native/models/index.js.map +1 -0
- package/dist/react-native/models/models.d.ts +2154 -0
- package/dist/react-native/models/models.js +2251 -0
- package/dist/react-native/models/models.js.map +1 -0
- package/dist/react-native/package.json +3 -0
- package/dist/react-native/protocol/messageParser.d.ts +42 -0
- package/dist/react-native/protocol/messageParser.js +150 -0
- package/dist/react-native/protocol/messageParser.js.map +1 -0
- package/dist/react-native/voiceLiveClient.d.ts +65 -0
- package/dist/react-native/voiceLiveClient.js +81 -0
- package/dist/react-native/voiceLiveClient.js.map +1 -0
- package/dist/react-native/voiceLiveSession.d.ts +138 -0
- package/dist/react-native/voiceLiveSession.js +429 -0
- package/dist/react-native/voiceLiveSession.js.map +1 -0
- package/dist/react-native/websocket/connectionManager.d.ts +88 -0
- package/dist/react-native/websocket/connectionManager.js +183 -0
- package/dist/react-native/websocket/connectionManager.js.map +1 -0
- package/dist/react-native/websocket/websocketBrowser.d.ts +26 -0
- package/dist/react-native/websocket/websocketBrowser.js +175 -0
- package/dist/react-native/websocket/websocketBrowser.js.map +1 -0
- package/dist/react-native/websocket/websocketFactory.d.ts +23 -0
- package/dist/react-native/websocket/websocketFactory.js +80 -0
- package/dist/react-native/websocket/websocketFactory.js.map +1 -0
- package/dist/react-native/websocket/websocketLike.d.ts +78 -0
- package/dist/react-native/websocket/websocketLike.js +13 -0
- package/dist/react-native/websocket/websocketLike.js.map +1 -0
- package/dist/react-native/websocket/websocketNode.d.ts +26 -0
- package/dist/react-native/websocket/websocketNode.js +180 -0
- package/dist/react-native/websocket/websocketNode.js.map +1 -0
- package/package.json +150 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Copyright (c) Microsoft Corporation.
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
# Azure VoiceLive client library for JavaScript
|
|
2
|
+
|
|
3
|
+
Azure VoiceLive is a managed service that enables low-latency, high-quality speech-to-speech interactions for voice agents. The service consolidates speech recognition, generative AI, and text-to-speech functionalities into a single, unified interface, providing an end-to-end solution for creating seamless voice-driven experiences.
|
|
4
|
+
|
|
5
|
+
Use the client library to:
|
|
6
|
+
|
|
7
|
+
- Create real-time voice assistants and conversational agents
|
|
8
|
+
- Build speech-to-speech applications with minimal latency
|
|
9
|
+
- Integrate advanced conversational features like noise suppression and echo cancellation
|
|
10
|
+
- Leverage multiple AI models (GPT-4o, GPT-4o-mini, Phi) for different use cases
|
|
11
|
+
- Implement function calling and tool integration for dynamic responses
|
|
12
|
+
- Create avatar-enabled voice interactions with visual components
|
|
13
|
+
|
|
14
|
+
> Note: This package supports both browser and Node.js environments. WebSocket connections are used for real-time communication.
|
|
15
|
+
|
|
16
|
+
## Getting started
|
|
17
|
+
|
|
18
|
+
### Currently supported environments
|
|
19
|
+
|
|
20
|
+
- [LTS versions of Node.js](https://github.com/nodejs/release#release-schedule)
|
|
21
|
+
- Latest versions of Safari, Chrome, Edge and Firefox
|
|
22
|
+
|
|
23
|
+
### Prerequisites
|
|
24
|
+
|
|
25
|
+
- An [Azure subscription](https://azure.microsoft.com/free/)
|
|
26
|
+
- An [Azure AI Foundry resource](https://learn.microsoft.com/azure/ai-services/openai/how-to/create-resource) with Voice Live API access
|
|
27
|
+
|
|
28
|
+
### Install the package
|
|
29
|
+
|
|
30
|
+
Install the Azure VoiceLive client library using npm:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install @azure/ai-voicelive
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Install the identity library
|
|
37
|
+
|
|
38
|
+
VoiceLive clients authenticate using the Azure Identity Library. Install it as well:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
npm install @azure/identity
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Configure TypeScript
|
|
45
|
+
|
|
46
|
+
TypeScript users need to have Node type definitions installed:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
npm install @types/node
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
You also need to enable `compilerOptions.allowSyntheticDefaultImports` in your tsconfig.json. Note that if you have enabled `compilerOptions.esModuleInterop`, `allowSyntheticDefaultImports` is enabled by default.
|
|
53
|
+
|
|
54
|
+
### JavaScript Bundle
|
|
55
|
+
|
|
56
|
+
To use this client library in the browser, first you need to use a bundler. For details on how to do this, please refer to our [bundling documentation](https://aka.ms/AzureSDKBundling).
|
|
57
|
+
|
|
58
|
+
## Key concepts
|
|
59
|
+
|
|
60
|
+
### VoiceLiveClient
|
|
61
|
+
|
|
62
|
+
The primary interface for establishing connections to the Azure VoiceLive service. Use this client to authenticate and create sessions for real-time voice interactions.
|
|
63
|
+
|
|
64
|
+
### VoiceLiveSession
|
|
65
|
+
|
|
66
|
+
Represents an active WebSocket connection for real-time voice communication. This class handles bidirectional communication, allowing you to send audio input and receive audio output, text transcriptions, and other events in real-time.
|
|
67
|
+
|
|
68
|
+
### Session Configuration
|
|
69
|
+
|
|
70
|
+
The service uses session configuration to control various aspects of voice interaction:
|
|
71
|
+
|
|
72
|
+
- **Turn Detection**: Configure how the service detects when users start and stop speaking
|
|
73
|
+
- **Audio Processing**: Enable noise suppression and echo cancellation
|
|
74
|
+
- **Voice Selection**: Choose from standard Azure voices, high-definition voices, or custom voices
|
|
75
|
+
- **Model Selection**: Select the AI model (GPT-4o, GPT-4o-mini, Phi variants) that best fits your needs
|
|
76
|
+
|
|
77
|
+
### Models and Capabilities
|
|
78
|
+
|
|
79
|
+
The VoiceLive API supports multiple AI models with different capabilities:
|
|
80
|
+
|
|
81
|
+
| Model | Description | Use Case |
|
|
82
|
+
|-------|-------------|----------|
|
|
83
|
+
| `gpt-4o-realtime-preview` | GPT-4o with real-time audio processing | High-quality conversational AI |
|
|
84
|
+
| `gpt-4o-mini-realtime-preview` | Lightweight GPT-4o variant | Fast, efficient interactions |
|
|
85
|
+
| `phi4-mm-realtime` | Phi model with multimodal support | Cost-effective voice applications |
|
|
86
|
+
|
|
87
|
+
### Conversational Enhancements
|
|
88
|
+
|
|
89
|
+
The VoiceLive API provides Azure-specific enhancements:
|
|
90
|
+
|
|
91
|
+
- **Azure Semantic VAD**: Advanced voice activity detection that removes filler words
|
|
92
|
+
- **Noise Suppression**: Reduces environmental background noise
|
|
93
|
+
- **Echo Cancellation**: Removes echo from the model's own voice
|
|
94
|
+
- **End-of-Turn Detection**: Allows natural pauses without premature interruption
|
|
95
|
+
|
|
96
|
+
## Authenticating with Azure Active Directory
|
|
97
|
+
|
|
98
|
+
The VoiceLive service relies on Azure Active Directory to authenticate requests to its APIs. The [`@azure/identity`](https://www.npmjs.com/package/@azure/identity) package provides a variety of credential types that your application can use to do this. The [README for `@azure/identity`](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/identity/identity/README.md) provides more details and samples to get you started.
|
|
99
|
+
|
|
100
|
+
To interact with the Azure VoiceLive service, you need to create an instance of the `VoiceLiveClient` class, a **service endpoint** and a credential object. The examples shown in this document use a credential object named [`DefaultAzureCredential`][defaultazurecredential], which is appropriate for most scenarios, including local development and production environments. We recommend using a [managed identity][managed_identity] for authentication in production environments.
|
|
101
|
+
|
|
102
|
+
You can find more information on different ways of authenticating and their corresponding credential types in the [Azure Identity documentation][azure_identity].
|
|
103
|
+
|
|
104
|
+
Here's a quick example. First, import `DefaultAzureCredential` and `VoiceLiveClient`:
|
|
105
|
+
|
|
106
|
+
```ts snippet:ReadmeSampleCreateClient
|
|
107
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
108
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
109
|
+
|
|
110
|
+
const credential = new DefaultAzureCredential();
|
|
111
|
+
|
|
112
|
+
// Build the URL to reach your AI Foundry resource
|
|
113
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
114
|
+
|
|
115
|
+
// Create the VoiceLive client
|
|
116
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Authentication with API Key
|
|
120
|
+
|
|
121
|
+
For development scenarios, you can also authenticate using an API key:
|
|
122
|
+
|
|
123
|
+
```ts snippet:ReadmeSampleCreateClientWithApiKey
|
|
124
|
+
import { AzureKeyCredential } from "@azure/core-auth";
|
|
125
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
126
|
+
|
|
127
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
128
|
+
const credential = new AzureKeyCredential("your-api-key");
|
|
129
|
+
|
|
130
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Examples
|
|
134
|
+
|
|
135
|
+
The following sections provide code snippets that cover some of the common tasks using Azure VoiceLive. The scenarios covered here consist of:
|
|
136
|
+
|
|
137
|
+
- [Creating a basic voice assistant](#creating-a-basic-voice-assistant)
|
|
138
|
+
- [Configuring session options](#configuring-session-options)
|
|
139
|
+
- [Handling real-time events](#handling-real-time-events)
|
|
140
|
+
- [Implementing function calling](#implementing-function-calling)
|
|
141
|
+
|
|
142
|
+
### Creating a basic voice assistant
|
|
143
|
+
|
|
144
|
+
This example shows how to create a simple voice assistant that can handle speech-to-speech interactions:
|
|
145
|
+
|
|
146
|
+
```ts snippet:ReadmeSampleBasicVoiceAssistant
|
|
147
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
148
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
149
|
+
|
|
150
|
+
const credential = new DefaultAzureCredential();
|
|
151
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
152
|
+
|
|
153
|
+
// Create the client
|
|
154
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
155
|
+
|
|
156
|
+
// Create and connect a session
|
|
157
|
+
const session = await client.startSession("gpt-4o-mini-realtime-preview");
|
|
158
|
+
|
|
159
|
+
// Configure session for voice conversation
|
|
160
|
+
await session.updateSession({
|
|
161
|
+
modalities: ["text", "audio"],
|
|
162
|
+
instructions: "You are a helpful AI assistant. Respond naturally and conversationally.",
|
|
163
|
+
voice: {
|
|
164
|
+
type: "azure-standard",
|
|
165
|
+
name: "en-US-AvaNeural",
|
|
166
|
+
},
|
|
167
|
+
turnDetection: {
|
|
168
|
+
type: "server_vad",
|
|
169
|
+
threshold: 0.5,
|
|
170
|
+
prefixPaddingMs: 300,
|
|
171
|
+
silenceDurationMs: 500,
|
|
172
|
+
},
|
|
173
|
+
inputAudioFormat: "pcm16",
|
|
174
|
+
outputAudioFormat: "pcm16",
|
|
175
|
+
});
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Configuring session options
|
|
179
|
+
|
|
180
|
+
You can customize various aspects of the voice interaction:
|
|
181
|
+
|
|
182
|
+
```ts snippet:ReadmeSampleAdvancedConfiguration
|
|
183
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
184
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
185
|
+
|
|
186
|
+
const credential = new DefaultAzureCredential();
|
|
187
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
188
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
189
|
+
const session = await client.startSession("gpt-4o-realtime-preview");
|
|
190
|
+
|
|
191
|
+
// Advanced session configuration
|
|
192
|
+
await session.updateSession({
|
|
193
|
+
modalities: ["audio", "text"],
|
|
194
|
+
instructions: "You are a customer service representative. Be helpful and professional.",
|
|
195
|
+
voice: {
|
|
196
|
+
type: "azure-custom",
|
|
197
|
+
name: "your-custom-voice-name",
|
|
198
|
+
endpointId: "your-custom-voice-endpoint",
|
|
199
|
+
},
|
|
200
|
+
turnDetection: {
|
|
201
|
+
type: "server_vad",
|
|
202
|
+
threshold: 0.6,
|
|
203
|
+
prefixPaddingMs: 200,
|
|
204
|
+
silenceDurationMs: 300,
|
|
205
|
+
},
|
|
206
|
+
inputAudioFormat: "pcm16",
|
|
207
|
+
outputAudioFormat: "pcm16",
|
|
208
|
+
});
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Handling real-time events
|
|
212
|
+
|
|
213
|
+
The VoiceLive client provides event-driven communication for real-time interactions:
|
|
214
|
+
|
|
215
|
+
```ts snippet:ReadmeSampleEventHandling
|
|
216
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
217
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
218
|
+
|
|
219
|
+
const credential = new DefaultAzureCredential();
|
|
220
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
221
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
222
|
+
const session = await client.startSession("gpt-4o-mini-realtime-preview");
|
|
223
|
+
|
|
224
|
+
// Set up event handlers using subscription pattern
|
|
225
|
+
const subscription = session.subscribe({
|
|
226
|
+
onResponseAudioDelta: async (event, context) => {
|
|
227
|
+
// Handle incoming audio chunks
|
|
228
|
+
const audioData = event.delta;
|
|
229
|
+
// Play audio using Web Audio API or other audio system
|
|
230
|
+
playAudioChunk(audioData);
|
|
231
|
+
},
|
|
232
|
+
|
|
233
|
+
onResponseTextDelta: async (event, context) => {
|
|
234
|
+
// Handle incoming text deltas
|
|
235
|
+
console.log("Assistant:", event.delta);
|
|
236
|
+
},
|
|
237
|
+
|
|
238
|
+
onInputAudioTranscriptionCompleted: async (event, context) => {
|
|
239
|
+
// Handle user speech transcription
|
|
240
|
+
console.log("User said:", event.transcript);
|
|
241
|
+
},
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
// Send audio data from microphone
|
|
245
|
+
function sendAudioChunk(audioBuffer: ArrayBuffer) {
|
|
246
|
+
session.sendAudio(audioBuffer);
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Implementing function calling
|
|
251
|
+
|
|
252
|
+
Enable your voice assistant to call external functions and tools:
|
|
253
|
+
|
|
254
|
+
```ts snippet:ReadmeSampleFunctionCalling
|
|
255
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
256
|
+
import { VoiceLiveClient } from "@azure/ai-voicelive";
|
|
257
|
+
|
|
258
|
+
const credential = new DefaultAzureCredential();
|
|
259
|
+
const endpoint = "https://your-resource.cognitiveservices.azure.com";
|
|
260
|
+
const client = new VoiceLiveClient(endpoint, credential);
|
|
261
|
+
const session = await client.startSession("gpt-4o-mini-realtime-preview");
|
|
262
|
+
|
|
263
|
+
// Define available functions
|
|
264
|
+
const tools = [
|
|
265
|
+
{
|
|
266
|
+
type: "function",
|
|
267
|
+
name: "get_weather",
|
|
268
|
+
description: "Get current weather for a location",
|
|
269
|
+
parameters: {
|
|
270
|
+
type: "object",
|
|
271
|
+
properties: {
|
|
272
|
+
location: {
|
|
273
|
+
type: "string",
|
|
274
|
+
description: "The city and state or country",
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
required: ["location"],
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
];
|
|
281
|
+
|
|
282
|
+
// Configure session with tools
|
|
283
|
+
await session.updateSession({
|
|
284
|
+
modalities: ["audio", "text"],
|
|
285
|
+
instructions:
|
|
286
|
+
"You can help users with weather information. Use the get_weather function when needed.",
|
|
287
|
+
tools: tools,
|
|
288
|
+
toolChoice: "auto",
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
// Handle function calls
|
|
292
|
+
const subscription = session.subscribe({
|
|
293
|
+
onResponseFunctionCallArgumentsDone: async (event, context) => {
|
|
294
|
+
if (event.name === "get_weather") {
|
|
295
|
+
const args = JSON.parse(event.arguments);
|
|
296
|
+
const weatherData = await getWeatherData(args.location);
|
|
297
|
+
|
|
298
|
+
// Send function result back
|
|
299
|
+
await session.addConversationItem({
|
|
300
|
+
type: "function_call_output",
|
|
301
|
+
callId: event.callId,
|
|
302
|
+
output: JSON.stringify(weatherData),
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
// Request response generation
|
|
306
|
+
await session.sendEvent({
|
|
307
|
+
type: "response.create",
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
},
|
|
311
|
+
});
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
## Troubleshooting
|
|
315
|
+
|
|
316
|
+
### Common errors and exceptions
|
|
317
|
+
|
|
318
|
+
**Authentication Errors**: If you receive authentication errors, verify that:
|
|
319
|
+
- Your Azure AI Foundry resource is correctly configured
|
|
320
|
+
- Your API key or credential has the necessary permissions
|
|
321
|
+
- The endpoint URL is correct and accessible
|
|
322
|
+
|
|
323
|
+
**WebSocket Connection Issues**: VoiceLive uses WebSocket connections. Ensure that:
|
|
324
|
+
- Your network allows WebSocket connections
|
|
325
|
+
- Firewall rules permit connections to `*.cognitiveservices.azure.com`
|
|
326
|
+
- Browser policies allow WebSocket and microphone access (for browser usage)
|
|
327
|
+
|
|
328
|
+
**Audio Issues**: For audio-related problems:
|
|
329
|
+
- Verify microphone permissions in the browser
|
|
330
|
+
- Check that audio formats (PCM16, PCM24) are supported
|
|
331
|
+
- Ensure proper audio context setup for playback
|
|
332
|
+
|
|
333
|
+
### Logging
|
|
334
|
+
|
|
335
|
+
Enabling logging may help uncover useful information about failures. In order to see a log of WebSocket messages and responses, set the `AZURE_LOG_LEVEL` environment variable to `info`. Alternatively, logging can be enabled at runtime by calling `setLogLevel` in the `@azure/logger`:
|
|
336
|
+
|
|
337
|
+
```ts snippet:SetLogLevel
|
|
338
|
+
import { setLogLevel } from "@azure/logger";
|
|
339
|
+
|
|
340
|
+
setLogLevel("info");
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
For more detailed instructions on how to enable logs, you can look at the [@azure/logger package docs](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/core/logger).
|
|
344
|
+
|
|
345
|
+
## Next steps
|
|
346
|
+
|
|
347
|
+
You can find more code samples through the following links:
|
|
348
|
+
|
|
349
|
+
- [VoiceLive Samples (JavaScript/TypeScript)](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/ai/ai-voicelive/samples)
|
|
350
|
+
- [VoiceLive Test Cases](https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/ai/ai-voicelive/test)
|
|
351
|
+
|
|
352
|
+
## Contributing
|
|
353
|
+
|
|
354
|
+
If you'd like to contribute to this library, please read the [contributing guide](https://github.com/Azure/azure-sdk-for-js/blob/main/CONTRIBUTING.md) to learn more about how to build and test the code.
|
|
355
|
+
|
|
356
|
+
[defaultazurecredential]: https://learn.microsoft.com/javascript/api/@azure/identity/defaultazurecredential?view=azure-node-latest
|
|
357
|
+
[managed_identity]: https://learn.microsoft.com/azure/active-directory/managed-identities-azure-resources/overview
|
|
358
|
+
[azure_identity]: https://learn.microsoft.com/javascript/api/overview/azure/identity-readme?view=azure-node-latest
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { TokenCredential, KeyCredential } from "@azure/core-auth";
|
|
2
|
+
/**
|
|
3
|
+
* Union type for supported credential types
|
|
4
|
+
*/
|
|
5
|
+
export type VoiceLiveCredential = TokenCredential | KeyCredential;
|
|
6
|
+
/**
|
|
7
|
+
* Handles both Azure TokenCredential and KeyCredential authentication for Voice Live service
|
|
8
|
+
*/
|
|
9
|
+
export declare class CredentialHandler {
|
|
10
|
+
private _accessToken?;
|
|
11
|
+
private readonly _scope;
|
|
12
|
+
private readonly _tokenRefreshBuffer;
|
|
13
|
+
private readonly _credential;
|
|
14
|
+
private readonly _isApiKey;
|
|
15
|
+
constructor(credential: VoiceLiveCredential, scope?: string);
|
|
16
|
+
/**
|
|
17
|
+
* Gets a valid access token or API key, refreshing if necessary
|
|
18
|
+
*/
|
|
19
|
+
getAccessToken(): Promise<string>;
|
|
20
|
+
/**
|
|
21
|
+
* Builds the WebSocket URL with authentication
|
|
22
|
+
*/
|
|
23
|
+
getWebSocketUrl(baseEndpoint: string, apiVersion: string, model?: string): Promise<string>;
|
|
24
|
+
/**
|
|
25
|
+
* Gets authentication headers for the WebSocket connection
|
|
26
|
+
*/
|
|
27
|
+
getAuthHeaders(): Promise<Record<string, string>>;
|
|
28
|
+
/**
|
|
29
|
+
* Returns the type of credential being used
|
|
30
|
+
*/
|
|
31
|
+
get credentialType(): "key" | "token";
|
|
32
|
+
/**
|
|
33
|
+
* Returns whether this is using an API key credential
|
|
34
|
+
*/
|
|
35
|
+
get isApiKey(): boolean;
|
|
36
|
+
/**
|
|
37
|
+
* For API key credentials, allows updating the key
|
|
38
|
+
*/
|
|
39
|
+
updateApiKey(newKey: string): void;
|
|
40
|
+
private _isTokenValid;
|
|
41
|
+
private _generateRequestId;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=credentialHandler.d.ts.map
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
// Copyright (c) Microsoft Corporation.
|
|
2
|
+
// Licensed under the MIT License.
|
|
3
|
+
import { VoiceLiveAuthenticationError, VoiceLiveErrorCodes } from "../errors/index.js";
|
|
4
|
+
import { logger } from "../logger.js";
|
|
5
|
+
/**
|
|
6
|
+
* Type guard to check if credential is a KeyCredential
|
|
7
|
+
*/
|
|
8
|
+
function isKeyCredential(credential) {
|
|
9
|
+
return "key" in credential && typeof credential.key === "string";
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Handles both Azure TokenCredential and KeyCredential authentication for Voice Live service
|
|
13
|
+
*/
|
|
14
|
+
export class CredentialHandler {
|
|
15
|
+
_accessToken;
|
|
16
|
+
_scope;
|
|
17
|
+
_tokenRefreshBuffer = 5 * 60 * 1000; // 5 minutes
|
|
18
|
+
_credential;
|
|
19
|
+
_isApiKey;
|
|
20
|
+
constructor(credential, scope) {
|
|
21
|
+
this._credential = credential;
|
|
22
|
+
this._isApiKey = isKeyCredential(credential);
|
|
23
|
+
// Voice Live specific scope - may need adjustment based on actual service
|
|
24
|
+
this._scope = scope || "https://cognitiveservices.azure.com/.default";
|
|
25
|
+
logger.info("CredentialHandler initialized", {
|
|
26
|
+
credentialType: this._isApiKey ? "KeyCredential" : "TokenCredential",
|
|
27
|
+
scope: this._scope,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Gets a valid access token or API key, refreshing if necessary
|
|
32
|
+
*/
|
|
33
|
+
async getAccessToken() {
|
|
34
|
+
// For API Key credentials, return the key directly
|
|
35
|
+
if (this._isApiKey) {
|
|
36
|
+
const keyCredential = this._credential;
|
|
37
|
+
logger.info("Using API key for authentication");
|
|
38
|
+
return keyCredential.key;
|
|
39
|
+
}
|
|
40
|
+
// For Token credentials, handle token lifecycle
|
|
41
|
+
const tokenCredential = this._credential;
|
|
42
|
+
// Check if current token is still valid
|
|
43
|
+
if (this._accessToken && this._isTokenValid(this._accessToken)) {
|
|
44
|
+
return this._accessToken.token;
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
logger.info("Acquiring new access token", { scope: this._scope });
|
|
48
|
+
// Get new token from credential
|
|
49
|
+
const tokenResponse = await tokenCredential.getToken(this._scope);
|
|
50
|
+
if (!tokenResponse) {
|
|
51
|
+
throw new VoiceLiveAuthenticationError("Failed to acquire access token - credential returned null", VoiceLiveErrorCodes.AuthenticationFailed);
|
|
52
|
+
}
|
|
53
|
+
this._accessToken = tokenResponse;
|
|
54
|
+
logger.info("Successfully acquired access token", {
|
|
55
|
+
expiresAt: new Date(this._accessToken.expiresOnTimestamp),
|
|
56
|
+
});
|
|
57
|
+
return this._accessToken.token;
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
logger.error("Failed to obtain access token", { error, scope: this._scope });
|
|
61
|
+
if (error instanceof VoiceLiveAuthenticationError) {
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
throw new VoiceLiveAuthenticationError(`Failed to obtain access token: ${error instanceof Error ? error.message : "Unknown error"}`, VoiceLiveErrorCodes.AuthenticationFailed, error instanceof Error ? error : new Error(String(error)));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Builds the WebSocket URL with authentication
|
|
69
|
+
*/
|
|
70
|
+
async getWebSocketUrl(baseEndpoint, apiVersion, model) {
|
|
71
|
+
const authValue = await this.getAccessToken();
|
|
72
|
+
const url = new URL(baseEndpoint);
|
|
73
|
+
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
|
74
|
+
url.pathname = "/voice-live/realtime"; // Voice Live WebSocket endpoint path
|
|
75
|
+
url.searchParams.set("api-version", apiVersion);
|
|
76
|
+
// Add model parameter if provided
|
|
77
|
+
if (model) {
|
|
78
|
+
url.searchParams.set("model", model);
|
|
79
|
+
}
|
|
80
|
+
// For API keys, add as query parameter
|
|
81
|
+
if (this._isApiKey) {
|
|
82
|
+
url.searchParams.set("api-key", authValue);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
// For tokens, we'll use headers instead of query params
|
|
86
|
+
// The token will be added in getAuthHeaders()
|
|
87
|
+
}
|
|
88
|
+
return url.toString();
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Gets authentication headers for the WebSocket connection
|
|
92
|
+
*/
|
|
93
|
+
async getAuthHeaders() {
|
|
94
|
+
const authValue = await this.getAccessToken();
|
|
95
|
+
const headers = {
|
|
96
|
+
"X-MS-Client-Request-ID": this._generateRequestId(),
|
|
97
|
+
"User-Agent": "Azure-Voice-Live-SDK-JS/1.0.0",
|
|
98
|
+
};
|
|
99
|
+
// Add appropriate authentication header based on credential type
|
|
100
|
+
if (this._isApiKey) {
|
|
101
|
+
// For API keys, use the X-API-Key header or similar
|
|
102
|
+
headers["api-key"] = authValue;
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
// For tokens, use standard Bearer authorization
|
|
106
|
+
headers["Authorization"] = `Bearer ${authValue}`;
|
|
107
|
+
}
|
|
108
|
+
return headers;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Returns the type of credential being used
|
|
112
|
+
*/
|
|
113
|
+
get credentialType() {
|
|
114
|
+
return this._isApiKey ? "key" : "token";
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Returns whether this is using an API key credential
|
|
118
|
+
*/
|
|
119
|
+
get isApiKey() {
|
|
120
|
+
return this._isApiKey;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* For API key credentials, allows updating the key
|
|
124
|
+
*/
|
|
125
|
+
updateApiKey(newKey) {
|
|
126
|
+
if (!this._isApiKey) {
|
|
127
|
+
throw new VoiceLiveAuthenticationError("Cannot update API key on TokenCredential", VoiceLiveErrorCodes.InvalidCredentials);
|
|
128
|
+
}
|
|
129
|
+
const keyCredential = this._credential;
|
|
130
|
+
if ("update" in keyCredential && typeof keyCredential.update === "function") {
|
|
131
|
+
keyCredential.update(newKey);
|
|
132
|
+
logger.info("API key updated");
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
throw new VoiceLiveAuthenticationError("KeyCredential does not support key updates", VoiceLiveErrorCodes.InvalidCredentials);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
_isTokenValid(token) {
|
|
139
|
+
const expiresAt = token.expiresOnTimestamp;
|
|
140
|
+
const now = Date.now();
|
|
141
|
+
return expiresAt > now + this._tokenRefreshBuffer;
|
|
142
|
+
}
|
|
143
|
+
_generateRequestId() {
|
|
144
|
+
return `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
//# sourceMappingURL=credentialHandler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"credentialHandler.js","sourceRoot":"","sources":["../../../src/auth/credentialHandler.ts"],"names":[],"mappings":"AAAA,uCAAuC;AACvC,kCAAkC;AAGlC,OAAO,EAAE,4BAA4B,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACvF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAOtC;;GAEG;AACH,SAAS,eAAe,CAAC,UAA+B;IACtD,OAAO,KAAK,IAAI,UAAU,IAAI,OAAQ,UAA4B,CAAC,GAAG,KAAK,QAAQ,CAAC;AACtF,CAAC;AAED;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,YAAY,CAAe;IAClB,MAAM,CAAS;IACf,mBAAmB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,YAAY;IACjD,WAAW,CAAsB;IACjC,SAAS,CAAU;IAEpC,YAAY,UAA+B,EAAE,KAAc;QACzD,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;QAE7C,0EAA0E;QAC1E,IAAI,CAAC,MAAM,GAAG,KAAK,IAAI,8CAA8C,CAAC;QAEtE,MAAM,CAAC,IAAI,CAAC,+BAA+B,EAAE;YAC3C,cAAc,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,iBAAiB;YACpE,KAAK,EAAE,IAAI,CAAC,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc;QAClB,mDAAmD;QACnD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,aAAa,GAAG,IAAI,CAAC,WAA4B,CAAC;YACxD,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;YAChD,OAAO,aAAa,CAAC,GAAG,CAAC;QAC3B,CAAC;QAED,gDAAgD;QAChD,MAAM,eAAe,GAAG,IAAI,CAAC,WAA8B,CAAC;QAE5D,wCAAwC;QACxC,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/D,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC;QACjC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,4BAA4B,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAElE,gCAAgC;YAChC,MAAM,aAAa,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAElE,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,MAAM,IAAI,4BAA4B,CACpC,2DAA2D,EAC3D,mBAAmB,CAAC,oBAAoB,CACzC,CAAC;YACJ,CAAC;YAED,IAAI,CAAC,YAAY,GAAG,aAAa,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,oCAAoC,EAAE;gBAChD,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC;aAC1D,CAAC,CAAC;YAEH,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC;QACjC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,+BAA+B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE7E,IAAI,KAAK,YAAY,4BAA4B,EAAE,CAAC;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;YAED,MAAM,IAAI,4BAA4B,CACpC,kCAAkC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,EAC5F,mBAAmB,CAAC,oBAAoB,EACxC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,YAAoB,EAAE,UAAkB,EAAE,KAAc;QAC5E,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAE9C,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;QAClC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1D,GAAG,CAAC,QAAQ,GAAG,sBAAsB,CAAC,CAAC,qCAAqC;QAC5E,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;QAEhD,kCAAkC;QAClC,IAAI,KAAK,EAAE,CAAC;YACV,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,uCAAuC;QACvC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,wDAAwD;YACxD,8CAA8C;QAChD,CAAC;QAED,OAAO,GAAG,CAAC,QAAQ,EAAE,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc;QAClB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAE9C,MAAM,OAAO,GAA2B;YACtC,wBAAwB,EAAE,IAAI,CAAC,kBAAkB,EAAE;YACnD,YAAY,EAAE,+BAA+B;SAC9C,CAAC;QAEF,iEAAiE;QACjE,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,oDAAoD;YACpD,OAAO,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,SAAS,EAAE,CAAC;QACnD,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,IAAI,cAAc;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAAc;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,4BAA4B,CACpC,0CAA0C,EAC1C,mBAAmB,CAAC,kBAAkB,CACvC,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,WAA4B,CAAC;QACxD,IAAI,QAAQ,IAAI,aAAa,IAAI,OAAO,aAAa,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAC5E,aAAa,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,4BAA4B,CACpC,4CAA4C,EAC5C,mBAAmB,CAAC,kBAAkB,CACvC,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,KAAkB;QACtC,MAAM,SAAS,GAAG,KAAK,CAAC,kBAAkB,CAAC;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,OAAO,SAAS,GAAG,GAAG,GAAG,IAAI,CAAC,mBAAmB,CAAC;IACpD,CAAC;IAEO,kBAAkB;QACxB,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IACxE,CAAC;CACF","sourcesContent":["// Copyright (c) Microsoft Corporation.\n// Licensed under the MIT License.\n\nimport type { TokenCredential, KeyCredential, AccessToken } from \"@azure/core-auth\";\nimport { VoiceLiveAuthenticationError, VoiceLiveErrorCodes } from \"../errors/index.js\";\nimport { logger } from \"../logger.js\";\n\n/**\n * Union type for supported credential types\n */\nexport type VoiceLiveCredential = TokenCredential | KeyCredential;\n\n/**\n * Type guard to check if credential is a KeyCredential\n */\nfunction isKeyCredential(credential: VoiceLiveCredential): credential is KeyCredential {\n return \"key\" in credential && typeof (credential as KeyCredential).key === \"string\";\n}\n\n/**\n * Handles both Azure TokenCredential and KeyCredential authentication for Voice Live service\n */\nexport class CredentialHandler {\n private _accessToken?: AccessToken;\n private readonly _scope: string;\n private readonly _tokenRefreshBuffer = 5 * 60 * 1000; // 5 minutes\n private readonly _credential: VoiceLiveCredential;\n private readonly _isApiKey: boolean;\n\n constructor(credential: VoiceLiveCredential, scope?: string) {\n this._credential = credential;\n this._isApiKey = isKeyCredential(credential);\n\n // Voice Live specific scope - may need adjustment based on actual service\n this._scope = scope || \"https://cognitiveservices.azure.com/.default\";\n\n logger.info(\"CredentialHandler initialized\", {\n credentialType: this._isApiKey ? \"KeyCredential\" : \"TokenCredential\",\n scope: this._scope,\n });\n }\n\n /**\n * Gets a valid access token or API key, refreshing if necessary\n */\n async getAccessToken(): Promise<string> {\n // For API Key credentials, return the key directly\n if (this._isApiKey) {\n const keyCredential = this._credential as KeyCredential;\n logger.info(\"Using API key for authentication\");\n return keyCredential.key;\n }\n\n // For Token credentials, handle token lifecycle\n const tokenCredential = this._credential as TokenCredential;\n\n // Check if current token is still valid\n if (this._accessToken && this._isTokenValid(this._accessToken)) {\n return this._accessToken.token;\n }\n\n try {\n logger.info(\"Acquiring new access token\", { scope: this._scope });\n\n // Get new token from credential\n const tokenResponse = await tokenCredential.getToken(this._scope);\n\n if (!tokenResponse) {\n throw new VoiceLiveAuthenticationError(\n \"Failed to acquire access token - credential returned null\",\n VoiceLiveErrorCodes.AuthenticationFailed,\n );\n }\n\n this._accessToken = tokenResponse;\n\n logger.info(\"Successfully acquired access token\", {\n expiresAt: new Date(this._accessToken.expiresOnTimestamp),\n });\n\n return this._accessToken.token;\n } catch (error) {\n logger.error(\"Failed to obtain access token\", { error, scope: this._scope });\n\n if (error instanceof VoiceLiveAuthenticationError) {\n throw error;\n }\n\n throw new VoiceLiveAuthenticationError(\n `Failed to obtain access token: ${error instanceof Error ? error.message : \"Unknown error\"}`,\n VoiceLiveErrorCodes.AuthenticationFailed,\n error instanceof Error ? error : new Error(String(error)),\n );\n }\n }\n\n /**\n * Builds the WebSocket URL with authentication\n */\n async getWebSocketUrl(baseEndpoint: string, apiVersion: string, model?: string): Promise<string> {\n const authValue = await this.getAccessToken();\n\n const url = new URL(baseEndpoint);\n url.protocol = url.protocol === \"https:\" ? \"wss:\" : \"ws:\";\n url.pathname = \"/voice-live/realtime\"; // Voice Live WebSocket endpoint path\n url.searchParams.set(\"api-version\", apiVersion);\n\n // Add model parameter if provided\n if (model) {\n url.searchParams.set(\"model\", model);\n }\n\n // For API keys, add as query parameter\n if (this._isApiKey) {\n url.searchParams.set(\"api-key\", authValue);\n } else {\n // For tokens, we'll use headers instead of query params\n // The token will be added in getAuthHeaders()\n }\n\n return url.toString();\n }\n\n /**\n * Gets authentication headers for the WebSocket connection\n */\n async getAuthHeaders(): Promise<Record<string, string>> {\n const authValue = await this.getAccessToken();\n\n const headers: Record<string, string> = {\n \"X-MS-Client-Request-ID\": this._generateRequestId(),\n \"User-Agent\": \"Azure-Voice-Live-SDK-JS/1.0.0\",\n };\n\n // Add appropriate authentication header based on credential type\n if (this._isApiKey) {\n // For API keys, use the X-API-Key header or similar\n headers[\"api-key\"] = authValue;\n } else {\n // For tokens, use standard Bearer authorization\n headers[\"Authorization\"] = `Bearer ${authValue}`;\n }\n\n return headers;\n }\n\n /**\n * Returns the type of credential being used\n */\n get credentialType(): \"key\" | \"token\" {\n return this._isApiKey ? \"key\" : \"token\";\n }\n\n /**\n * Returns whether this is using an API key credential\n */\n get isApiKey(): boolean {\n return this._isApiKey;\n }\n\n /**\n * For API key credentials, allows updating the key\n */\n updateApiKey(newKey: string): void {\n if (!this._isApiKey) {\n throw new VoiceLiveAuthenticationError(\n \"Cannot update API key on TokenCredential\",\n VoiceLiveErrorCodes.InvalidCredentials,\n );\n }\n\n const keyCredential = this._credential as KeyCredential;\n if (\"update\" in keyCredential && typeof keyCredential.update === \"function\") {\n keyCredential.update(newKey);\n logger.info(\"API key updated\");\n } else {\n throw new VoiceLiveAuthenticationError(\n \"KeyCredential does not support key updates\",\n VoiceLiveErrorCodes.InvalidCredentials,\n );\n }\n }\n\n private _isTokenValid(token: AccessToken): boolean {\n const expiresAt = token.expiresOnTimestamp;\n const now = Date.now();\n return expiresAt > now + this._tokenRefreshBuffer;\n }\n\n private _generateRequestId(): string {\n return `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;\n }\n}\n"]}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Error codes for Voice Live WebSocket operations
|
|
3
|
+
*/
|
|
4
|
+
export declare enum VoiceLiveErrorCodes {
|
|
5
|
+
ConnectionFailed = "CONNECTION_FAILED",
|
|
6
|
+
ConnectionTimeout = "CONNECTION_TIMEOUT",
|
|
7
|
+
ConnectionLost = "CONNECTION_LOST",
|
|
8
|
+
AlreadyConnected = "ALREADY_CONNECTED",
|
|
9
|
+
NotConnected = "NOT_CONNECTED",
|
|
10
|
+
WebSocketError = "WEBSOCKET_ERROR",
|
|
11
|
+
AuthenticationFailed = "AUTHENTICATION_FAILED",
|
|
12
|
+
InvalidCredentials = "INVALID_CREDENTIALS",
|
|
13
|
+
Unauthorized = "UNAUTHORIZED",
|
|
14
|
+
Forbidden = "FORBIDDEN",
|
|
15
|
+
InvalidMessage = "INVALID_MESSAGE",
|
|
16
|
+
MessageTooLarge = "MESSAGE_TOO_LARGE",
|
|
17
|
+
ProtocolError = "PROTOCOL_ERROR",
|
|
18
|
+
BufferOverflow = "BUFFER_OVERFLOW",
|
|
19
|
+
OperationCancelled = "OPERATION_CANCELLED",
|
|
20
|
+
InvalidState = "INVALID_STATE"
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Base error class for Voice Live WebSocket operations
|
|
24
|
+
*/
|
|
25
|
+
export declare class VoiceLiveConnectionError extends Error {
|
|
26
|
+
/** Error code identifying the specific error type */
|
|
27
|
+
readonly code: string;
|
|
28
|
+
/** Context information about where the error occurred */
|
|
29
|
+
readonly context: string;
|
|
30
|
+
/** Indicates whether this error is potentially recoverable */
|
|
31
|
+
readonly recoverable: boolean;
|
|
32
|
+
/** The original error that caused this error, if any */
|
|
33
|
+
readonly cause?: Error;
|
|
34
|
+
/** Timestamp when the error occurred */
|
|
35
|
+
readonly timestamp: Date;
|
|
36
|
+
constructor(message: string, code: string, context?: string, recoverable?: boolean, cause?: Error);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Authentication error class for Voice Live operations
|
|
40
|
+
*/
|
|
41
|
+
export declare class VoiceLiveAuthenticationError extends VoiceLiveConnectionError {
|
|
42
|
+
constructor(message: string, code: string, cause?: Error);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Protocol error class for Voice Live message operations
|
|
46
|
+
*/
|
|
47
|
+
export declare class VoiceLiveProtocolError extends VoiceLiveConnectionError {
|
|
48
|
+
constructor(message: string, code: string, cause?: Error);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* General Voice Live error class
|
|
52
|
+
*/
|
|
53
|
+
export declare class VoiceLiveError extends VoiceLiveConnectionError {
|
|
54
|
+
constructor(message: string, code: string, context?: string, recoverable?: boolean, cause?: Error);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Classifies a WebSocket close event and returns appropriate error
|
|
58
|
+
*/
|
|
59
|
+
export declare function classifyWebSocketClose(code: number, reason: string): VoiceLiveConnectionError;
|
|
60
|
+
/**
|
|
61
|
+
* Classifies connection errors
|
|
62
|
+
*/
|
|
63
|
+
export declare function classifyConnectionError(error: VoiceLiveConnectionError | Error | unknown): VoiceLiveConnectionError;
|
|
64
|
+
/**
|
|
65
|
+
* Classifies protocol errors
|
|
66
|
+
*/
|
|
67
|
+
export declare function classifyProtocolError(error: Error, messageType: string): VoiceLiveProtocolError;
|
|
68
|
+
//# sourceMappingURL=connectionErrors.d.ts.map
|