@myscheme/voice-navigation-sdk 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +587 -134
- package/dist/actions.d.ts +3 -0
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +37 -97
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/navigation-controller.d.ts +3 -0
- package/dist/navigation-controller.d.ts.map +1 -1
- package/dist/navigation-controller.js +57 -8
- package/dist/services/bedrock.d.ts +4 -0
- package/dist/services/bedrock.d.ts.map +1 -1
- package/dist/services/bedrock.js +52 -15
- package/dist/services/page-registry.d.ts +25 -0
- package/dist/services/page-registry.d.ts.map +1 -0
- package/dist/services/page-registry.js +82 -0
- package/dist/services/xml-parser.d.ts +9 -0
- package/dist/services/xml-parser.d.ts.map +1 -0
- package/dist/services/xml-parser.js +60 -0
- package/dist/types.d.ts +14 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,32 +2,35 @@
|
|
|
2
2
|
|
|
3
3
|
A TypeScript SDK for voice-controlled navigation using Azure Speech-to-Text and AWS Bedrock for intent understanding.
|
|
4
4
|
|
|
5
|
-
## Features
|
|
5
|
+
## 🚀 Features
|
|
6
6
|
|
|
7
|
-
- 🎤 Real-time speech recognition using Azure Speech SDK
|
|
8
|
-
- 🤖
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
7
|
+
- 🎤 **Real-time speech recognition** using Azure Speech SDK
|
|
8
|
+
- 🤖 **AI-powered intent extraction** using AWS Bedrock (Claude)
|
|
9
|
+
- 🌐 **Dynamic page navigation** via XML configuration
|
|
10
|
+
- 🧭 **Voice-controlled navigation** actions
|
|
11
|
+
- 🖱️ **Rich browser and media controls**
|
|
12
|
+
- ♿ **Accessibility-first design**
|
|
13
|
+
- 🎨 **Customizable floating UI** control
|
|
14
|
+
- 📦 **Full TypeScript support**
|
|
15
|
+
- 🔄 **Reusable across multiple websites**
|
|
16
|
+
- 🔍 **Vector search integration** with OpenSearch
|
|
14
17
|
|
|
15
|
-
## Installation
|
|
18
|
+
## 📦 Installation
|
|
16
19
|
|
|
17
20
|
```bash
|
|
18
21
|
npm install @myscheme/voice-navigation-sdk
|
|
19
22
|
```
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
### Dependencies
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
The following packages are automatically installed:
|
|
24
27
|
|
|
25
|
-
- `@aws-sdk/client-bedrock-runtime` -
|
|
26
|
-
- `microsoft-cognitiveservices-speech-sdk` -
|
|
28
|
+
- `@aws-sdk/client-bedrock-runtime` - AWS Bedrock integration
|
|
29
|
+
- `microsoft-cognitiveservices-speech-sdk` - Azure Speech-to-Text
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
## 🎯 Quick Start
|
|
29
32
|
|
|
30
|
-
|
|
33
|
+
### Basic Setup
|
|
31
34
|
|
|
32
35
|
```typescript
|
|
33
36
|
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
@@ -36,7 +39,7 @@ const controller = initNavigationOnMicrophone({
|
|
|
36
39
|
// Azure Speech configuration
|
|
37
40
|
azure: {
|
|
38
41
|
subscriptionKey: "your-azure-subscription-key",
|
|
39
|
-
region: "
|
|
42
|
+
region: "centralindia", // e.g., 'eastus', 'westus'
|
|
40
43
|
},
|
|
41
44
|
|
|
42
45
|
// AWS Bedrock configuration
|
|
@@ -44,7 +47,7 @@ const controller = initNavigationOnMicrophone({
|
|
|
44
47
|
accessKeyId: "your-aws-access-key-id",
|
|
45
48
|
secretAccessKey: "your-aws-secret-access-key",
|
|
46
49
|
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
47
|
-
region: "ap-south-1",
|
|
50
|
+
region: "ap-south-1",
|
|
48
51
|
},
|
|
49
52
|
|
|
50
53
|
// Optional: Default language
|
|
@@ -55,9 +58,28 @@ const controller = initNavigationOnMicrophone({
|
|
|
55
58
|
});
|
|
56
59
|
```
|
|
57
60
|
|
|
58
|
-
|
|
61
|
+
### With Dynamic Pages
|
|
59
62
|
|
|
60
|
-
|
|
63
|
+
```typescript
|
|
64
|
+
const controller = initNavigationOnMicrophone({
|
|
65
|
+
azure: {
|
|
66
|
+
/* ... */
|
|
67
|
+
},
|
|
68
|
+
aws: {
|
|
69
|
+
/* ... */
|
|
70
|
+
},
|
|
71
|
+
|
|
72
|
+
// Dynamic page navigation
|
|
73
|
+
pages: {
|
|
74
|
+
xml: "/navigation-pages.xml",
|
|
75
|
+
xmlType: "url", // or "string" for inline XML
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## ⚙️ Configuration
|
|
81
|
+
|
|
82
|
+
### NavigationConfig Options
|
|
61
83
|
|
|
62
84
|
| Property | Type | Required | Description |
|
|
63
85
|
| ----------------------- | --------- | -------- | ---------------------------------------------- |
|
|
@@ -69,64 +91,193 @@ const controller = initNavigationOnMicrophone({
|
|
|
69
91
|
| `aws.region` | `string` | ❌ | AWS region (default: 'ap-south-1') |
|
|
70
92
|
| `language` | `string` | ❌ | Speech recognition language (default: 'en-IN') |
|
|
71
93
|
| `autoStart` | `boolean` | ❌ | Auto-start voice control (default: false) |
|
|
72
|
-
| `actionHandlers` | `object` | ❌ |
|
|
94
|
+
| `actionHandlers` | `object` | ❌ | Custom action callbacks |
|
|
95
|
+
| `pages` | `object` | ❌ | Dynamic page navigation configuration |
|
|
73
96
|
| `opensearch` | `object` | ❌ | OpenSearch vector search configuration |
|
|
74
97
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
|
80
|
-
|
|
|
81
|
-
| `
|
|
82
|
-
| `
|
|
83
|
-
| `
|
|
84
|
-
| `
|
|
85
|
-
| `
|
|
86
|
-
| `
|
|
87
|
-
| `
|
|
88
|
-
| `
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
98
|
+
### OpenSearch Configuration
|
|
99
|
+
|
|
100
|
+
When providing the optional `opensearch` block:
|
|
101
|
+
|
|
102
|
+
| Property | Type | Required | Description |
|
|
103
|
+
| --------------- | ---------- | -------- | --------------------------------------------------------------- |
|
|
104
|
+
| `node` | `string` | ✅ | OpenSearch cluster URL |
|
|
105
|
+
| `username` | `string` | ✅ | OpenSearch username |
|
|
106
|
+
| `password` | `string` | ✅ | OpenSearch password |
|
|
107
|
+
| `index` | `string` | ✅ | Index name containing embeddings |
|
|
108
|
+
| `vectorField` | `string` | ❌ | Embedding field (default: `embedding`) |
|
|
109
|
+
| `size` | `number` | ❌ | Result count (default: `5`) |
|
|
110
|
+
| `numCandidates` | `number` | ❌ | k-NN candidates (default: `Math.max(size*4,20)`) |
|
|
111
|
+
| `minScore` | `number` | ❌ | Minimum match score (default: `0`) |
|
|
112
|
+
| `sourceFields` | `string[]` | ❌ | Source fields to retrieve |
|
|
113
|
+
| `apiPath` | `string` | ❌ | Proxy endpoint (default: `/api/voice-navigation/vector-search`) |
|
|
114
|
+
|
|
115
|
+
## 🌐 Dynamic Page Navigation
|
|
116
|
+
|
|
117
|
+
Instead of hardcoding page navigation, configure pages dynamically using XML or direct configuration.
|
|
118
|
+
|
|
119
|
+
### Method 1: XML File (Recommended)
|
|
120
|
+
|
|
121
|
+
**Create XML file** (`public/navigation-pages.xml`):
|
|
122
|
+
|
|
123
|
+
```xml
|
|
124
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
125
|
+
<navigation>
|
|
126
|
+
<pages>
|
|
127
|
+
<page
|
|
128
|
+
id="home"
|
|
129
|
+
name="Home"
|
|
130
|
+
path="/"
|
|
131
|
+
keywords="main,homepage,start"
|
|
132
|
+
description="Main homepage"
|
|
133
|
+
/>
|
|
134
|
+
|
|
135
|
+
<page
|
|
136
|
+
id="about"
|
|
137
|
+
name="About"
|
|
138
|
+
path="/about"
|
|
139
|
+
keywords="information,company"
|
|
140
|
+
description="About us page"
|
|
141
|
+
/>
|
|
142
|
+
|
|
143
|
+
<page
|
|
144
|
+
id="contact"
|
|
145
|
+
name="Contact"
|
|
146
|
+
path="/contact"
|
|
147
|
+
keywords="reach,support,help"
|
|
148
|
+
description="Contact information"
|
|
149
|
+
/>
|
|
150
|
+
</pages>
|
|
151
|
+
</navigation>
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Configure SDK:**
|
|
155
|
+
|
|
156
|
+
```typescript
|
|
157
|
+
const controller = initNavigationOnMicrophone({
|
|
158
|
+
// ... azure & aws config ...
|
|
159
|
+
pages: {
|
|
160
|
+
xml: "/navigation-pages.xml",
|
|
161
|
+
xmlType: "url",
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Method 2: Remote XML URL
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
pages: {
|
|
170
|
+
xml: "https://yoursite.com/api/navigation-pages.xml",
|
|
171
|
+
xmlType: "url",
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Method 3: Inline XML String
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
const xmlConfig = `<?xml version="1.0" encoding="UTF-8"?>
|
|
179
|
+
<navigation>
|
|
180
|
+
<pages>
|
|
181
|
+
<page id="home" name="Home" path="/" />
|
|
182
|
+
<page id="about" name="About" path="/about" />
|
|
183
|
+
</pages>
|
|
184
|
+
</navigation>`;
|
|
185
|
+
|
|
186
|
+
pages: {
|
|
187
|
+
xml: xmlConfig,
|
|
188
|
+
xmlType: "string",
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Method 4: Direct Configuration
|
|
193
|
+
|
|
194
|
+
```typescript
|
|
195
|
+
pages: {
|
|
196
|
+
pages: [
|
|
197
|
+
{
|
|
198
|
+
id: "home",
|
|
199
|
+
name: "Home",
|
|
200
|
+
path: "/",
|
|
201
|
+
keywords: ["main", "start"]
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
id: "about",
|
|
205
|
+
name: "About",
|
|
206
|
+
path: "/about",
|
|
207
|
+
keywords: ["information", "company"]
|
|
208
|
+
},
|
|
209
|
+
],
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### XML Schema Reference
|
|
214
|
+
|
|
215
|
+
**Required attributes:**
|
|
216
|
+
|
|
217
|
+
- `id` - Unique identifier (creates `navigate_<id>` action)
|
|
218
|
+
- `name` - Display name for the page
|
|
219
|
+
- `path` - URL path to navigate to
|
|
220
|
+
|
|
221
|
+
**Optional attributes:**
|
|
222
|
+
|
|
223
|
+
- `keywords` - Comma-separated keywords for voice matching
|
|
224
|
+
- `description` - Page description
|
|
225
|
+
|
|
226
|
+
### Voice Command Examples
|
|
227
|
+
|
|
228
|
+
With the configuration above, users can say:
|
|
229
|
+
|
|
230
|
+
- "Go to home" → navigates to `/`
|
|
231
|
+
- "Open about page" → navigates to `/about`
|
|
232
|
+
- "Show me contact" → navigates to `/contact`
|
|
233
|
+
- "Take me to the team page" → navigates to `/team`
|
|
234
|
+
|
|
235
|
+
## 🎬 Supported Actions
|
|
236
|
+
|
|
237
|
+
### Dynamic Page Navigation
|
|
238
|
+
|
|
239
|
+
Configure your own pages (see above). Each page automatically gets a `navigate_<id>` action.
|
|
240
|
+
|
|
241
|
+
### Core Navigation Actions
|
|
242
|
+
|
|
243
|
+
**Scrolling:**
|
|
244
|
+
|
|
245
|
+
- `scroll_up` / `scroll_down`
|
|
246
|
+
- `scroll_left` / `scroll_right`
|
|
247
|
+
- `scroll_top` / `scroll_bottom`
|
|
248
|
+
- `page_up` / `page_down`
|
|
249
|
+
|
|
250
|
+
**Zoom:**
|
|
251
|
+
|
|
252
|
+
- `zoom_in` / `zoom_out`
|
|
253
|
+
|
|
254
|
+
**Browser:**
|
|
255
|
+
|
|
256
|
+
- `go_back` / `go_forward`
|
|
257
|
+
- `reload_page`
|
|
258
|
+
- `print_page`
|
|
259
|
+
- `copy_url`
|
|
260
|
+
|
|
261
|
+
**UI Controls:**
|
|
262
|
+
|
|
263
|
+
- `open_menu` / `close_menu`
|
|
264
|
+
- `focus_search`
|
|
265
|
+
- `toggle_fullscreen` / `exit_fullscreen`
|
|
266
|
+
|
|
267
|
+
**Media:**
|
|
268
|
+
|
|
269
|
+
- `play_media` / `pause_media`
|
|
270
|
+
- `mute_media` / `unmute_media`
|
|
271
|
+
|
|
272
|
+
**Other:**
|
|
273
|
+
|
|
274
|
+
- `search_content` - Vector search (requires OpenSearch)
|
|
122
275
|
- `stop` - Stop voice control
|
|
123
276
|
|
|
124
|
-
## API Reference
|
|
277
|
+
## 📚 API Reference
|
|
125
278
|
|
|
126
279
|
### VoiceNavigationController
|
|
127
280
|
|
|
128
|
-
The main controller class for voice navigation.
|
|
129
|
-
|
|
130
281
|
#### Methods
|
|
131
282
|
|
|
132
283
|
##### `start(): Promise<void>`
|
|
@@ -139,7 +290,7 @@ await controller.start();
|
|
|
139
290
|
|
|
140
291
|
##### `stop(): Promise<void>`
|
|
141
292
|
|
|
142
|
-
Stop voice control and process
|
|
293
|
+
Stop voice control and process pending speech.
|
|
143
294
|
|
|
144
295
|
```typescript
|
|
145
296
|
await controller.stop();
|
|
@@ -171,64 +322,199 @@ controller.destroy();
|
|
|
171
322
|
|
|
172
323
|
### Events
|
|
173
324
|
|
|
174
|
-
|
|
325
|
+
Listen for SDK events:
|
|
175
326
|
|
|
176
327
|
```typescript
|
|
177
|
-
//
|
|
328
|
+
// State changes
|
|
178
329
|
window.addEventListener("navigate:state-change", (event) => {
|
|
179
330
|
console.log("State:", event.detail.state);
|
|
180
331
|
});
|
|
181
332
|
|
|
182
|
-
//
|
|
333
|
+
// Action detection
|
|
183
334
|
window.addEventListener("navigate:action-detected", (event) => {
|
|
184
335
|
console.log("Action:", event.detail.action);
|
|
185
336
|
});
|
|
186
337
|
|
|
187
|
-
//
|
|
338
|
+
// Action performance
|
|
188
339
|
window.addEventListener("navigate:action-performed", (event) => {
|
|
189
340
|
console.log("Performed:", event.detail.performed);
|
|
190
341
|
});
|
|
191
342
|
|
|
192
|
-
//
|
|
343
|
+
// Errors
|
|
193
344
|
window.addEventListener("navigate:error", (event) => {
|
|
194
345
|
console.error("Error:", event.detail.error);
|
|
195
346
|
});
|
|
196
347
|
```
|
|
197
348
|
|
|
198
|
-
##
|
|
349
|
+
## 🔧 Framework Integration Examples
|
|
199
350
|
|
|
200
|
-
###
|
|
351
|
+
### React
|
|
352
|
+
|
|
353
|
+
```typescript
|
|
354
|
+
import { useEffect, useRef } from "react";
|
|
355
|
+
import { VoiceNavigationController } from "@myscheme/voice-navigation-sdk";
|
|
356
|
+
|
|
357
|
+
export function VoiceNavigation() {
|
|
358
|
+
const controllerRef = useRef<VoiceNavigationController | null>(null);
|
|
359
|
+
|
|
360
|
+
useEffect(() => {
|
|
361
|
+
controllerRef.current = new VoiceNavigationController({
|
|
362
|
+
azure: {
|
|
363
|
+
subscriptionKey: import.meta.env.VITE_AZURE_SPEECH_KEY,
|
|
364
|
+
region: import.meta.env.VITE_AZURE_SPEECH_REGION,
|
|
365
|
+
},
|
|
366
|
+
aws: {
|
|
367
|
+
accessKeyId: import.meta.env.VITE_AWS_ACCESS_KEY_ID,
|
|
368
|
+
secretAccessKey: import.meta.env.VITE_AWS_SECRET_ACCESS_KEY,
|
|
369
|
+
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
370
|
+
region: "ap-south-1",
|
|
371
|
+
},
|
|
372
|
+
pages: {
|
|
373
|
+
xml: "/navigation-pages.xml",
|
|
374
|
+
xmlType: "url",
|
|
375
|
+
},
|
|
376
|
+
language: "en-US",
|
|
377
|
+
autoStart: false,
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
return () => {
|
|
381
|
+
controllerRef.current?.destroy();
|
|
382
|
+
};
|
|
383
|
+
}, []);
|
|
384
|
+
|
|
385
|
+
return null;
|
|
386
|
+
}
|
|
387
|
+
```
|
|
201
388
|
|
|
202
|
-
|
|
389
|
+
### Next.js (App Router)
|
|
203
390
|
|
|
204
391
|
```typescript
|
|
392
|
+
// app/providers/voice-navigation-provider.tsx
|
|
393
|
+
"use client";
|
|
394
|
+
|
|
395
|
+
import { useEffect } from "react";
|
|
396
|
+
import { VoiceNavigationController } from "@myscheme/voice-navigation-sdk";
|
|
397
|
+
|
|
398
|
+
export function VoiceNavigationProvider() {
|
|
399
|
+
useEffect(() => {
|
|
400
|
+
const controller = new VoiceNavigationController({
|
|
401
|
+
azure: {
|
|
402
|
+
subscriptionKey: process.env.NEXT_PUBLIC_AZURE_SPEECH_KEY!,
|
|
403
|
+
region: process.env.NEXT_PUBLIC_AZURE_SPEECH_REGION!,
|
|
404
|
+
},
|
|
405
|
+
aws: {
|
|
406
|
+
accessKeyId: process.env.NEXT_PUBLIC_AWS_ACCESS_KEY_ID!,
|
|
407
|
+
secretAccessKey: process.env.NEXT_PUBLIC_AWS_SECRET_ACCESS_KEY!,
|
|
408
|
+
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
409
|
+
region: "ap-south-1",
|
|
410
|
+
},
|
|
411
|
+
pages: {
|
|
412
|
+
xml: "/navigation-pages.xml",
|
|
413
|
+
xmlType: "url",
|
|
414
|
+
},
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
return () => controller.destroy();
|
|
418
|
+
}, []);
|
|
419
|
+
|
|
420
|
+
return null;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// app/layout.tsx
|
|
424
|
+
import { VoiceNavigationProvider } from "./providers/voice-navigation-provider";
|
|
425
|
+
|
|
426
|
+
export default function RootLayout({ children }) {
|
|
427
|
+
return (
|
|
428
|
+
<html>
|
|
429
|
+
<body>
|
|
430
|
+
<VoiceNavigationProvider />
|
|
431
|
+
{children}
|
|
432
|
+
</body>
|
|
433
|
+
</html>
|
|
434
|
+
);
|
|
435
|
+
}
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
### Next.js (Pages Router)
|
|
439
|
+
|
|
440
|
+
```javascript
|
|
441
|
+
// pages/_app.js
|
|
442
|
+
import { useEffect, useRef } from "react";
|
|
205
443
|
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
206
444
|
|
|
445
|
+
export default function App({ Component, pageProps }) {
|
|
446
|
+
const voiceControllerRef = useRef(null);
|
|
447
|
+
|
|
448
|
+
useEffect(() => {
|
|
449
|
+
if (typeof window === "undefined") return;
|
|
450
|
+
|
|
451
|
+
try {
|
|
452
|
+
voiceControllerRef.current = initNavigationOnMicrophone({
|
|
453
|
+
azure: {
|
|
454
|
+
subscriptionKey: process.env.NEXT_PUBLIC_AZURE_SPEECH_KEY,
|
|
455
|
+
region: process.env.NEXT_PUBLIC_AZURE_SPEECH_REGION,
|
|
456
|
+
},
|
|
457
|
+
aws: {
|
|
458
|
+
accessKeyId: process.env.NEXT_PUBLIC_AWS_ACCESS_KEY_ID,
|
|
459
|
+
secretAccessKey: process.env.NEXT_PUBLIC_AWS_SECRET_ACCESS_KEY,
|
|
460
|
+
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
461
|
+
region: "ap-south-1",
|
|
462
|
+
},
|
|
463
|
+
pages: {
|
|
464
|
+
xml: "/navigation-pages.xml",
|
|
465
|
+
xmlType: "url",
|
|
466
|
+
},
|
|
467
|
+
language: "en-IN",
|
|
468
|
+
autoStart: false,
|
|
469
|
+
});
|
|
470
|
+
} catch (error) {
|
|
471
|
+
console.error("Failed to initialize voice navigation:", error);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
return () => {
|
|
475
|
+
voiceControllerRef.current?.destroy?.();
|
|
476
|
+
voiceControllerRef.current = null;
|
|
477
|
+
};
|
|
478
|
+
}, []);
|
|
479
|
+
|
|
480
|
+
return <Component {...pageProps} />;
|
|
481
|
+
}
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
## 🚀 Advanced Usage
|
|
485
|
+
|
|
486
|
+
### Vector Search with OpenSearch
|
|
487
|
+
|
|
488
|
+
**1. Configure SDK with OpenSearch:**
|
|
489
|
+
|
|
490
|
+
```typescript
|
|
207
491
|
initNavigationOnMicrophone({
|
|
208
|
-
azure: {
|
|
209
|
-
|
|
492
|
+
azure: {
|
|
493
|
+
/* ... */
|
|
494
|
+
},
|
|
495
|
+
aws: {
|
|
496
|
+
/* ... */
|
|
497
|
+
},
|
|
210
498
|
opensearch: {
|
|
211
499
|
node: process.env.NEXT_PUBLIC_OPENSEARCH_NODE!,
|
|
212
500
|
username: process.env.NEXT_PUBLIC_OPENSEARCH_USERNAME!,
|
|
213
501
|
password: process.env.NEXT_PUBLIC_OPENSEARCH_PASSWORD!,
|
|
214
502
|
index: "my-embeddings-index",
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
// apiPath: "/api/voice-navigation/vector-search",
|
|
503
|
+
vectorField: "embedding",
|
|
504
|
+
size: 5,
|
|
505
|
+
apiPath: "/api/voice-navigation/vector-search",
|
|
219
506
|
},
|
|
220
507
|
});
|
|
221
508
|
```
|
|
222
509
|
|
|
223
|
-
2.
|
|
510
|
+
**2. Create proxy endpoint** (`pages/api/voice-navigation/vector-search.ts`):
|
|
224
511
|
|
|
225
512
|
```typescript
|
|
226
513
|
import type { NextApiRequest, NextApiResponse } from "next";
|
|
227
514
|
import { createOpenSearchProxyHandler } from "@myscheme/voice-navigation-sdk/server";
|
|
228
515
|
|
|
229
516
|
const handler = createOpenSearchProxyHandler({
|
|
230
|
-
|
|
231
|
-
allowedOrigins: process.env.OPENSEARCH_ALLOWED_ORIGINS?.split(",") ?? undefined,
|
|
517
|
+
allowedOrigins: process.env.OPENSEARCH_ALLOWED_ORIGINS?.split(","),
|
|
232
518
|
});
|
|
233
519
|
|
|
234
520
|
export default async function vectorSearchProxy(
|
|
@@ -238,7 +524,6 @@ export default async function vectorSearchProxy(
|
|
|
238
524
|
await handler(req, res);
|
|
239
525
|
}
|
|
240
526
|
|
|
241
|
-
// Disable Next.js body parsing so the proxy can stream the incoming payload.
|
|
242
527
|
export const config = {
|
|
243
528
|
api: {
|
|
244
529
|
bodyParser: false,
|
|
@@ -246,11 +531,9 @@ export const config = {
|
|
|
246
531
|
};
|
|
247
532
|
```
|
|
248
533
|
|
|
249
|
-
Mount the route at the same `apiPath` you configured on the client (defaults to `/api/voice-navigation/vector-search`). The handler lives in the `@myscheme/voice-navigation-sdk/server` export so it only runs on the server where the official OpenSearch client is available.
|
|
250
|
-
|
|
251
534
|
### Custom Service Initialization
|
|
252
535
|
|
|
253
|
-
|
|
536
|
+
Use individual services separately:
|
|
254
537
|
|
|
255
538
|
```typescript
|
|
256
539
|
import {
|
|
@@ -277,9 +560,30 @@ const bedrockService = new BedrockService({
|
|
|
277
560
|
const action = await bedrockService.extractAction("zoom in");
|
|
278
561
|
```
|
|
279
562
|
|
|
280
|
-
###
|
|
563
|
+
### Programmatic Page Management
|
|
564
|
+
|
|
565
|
+
```typescript
|
|
566
|
+
import { PageRegistry, setPageRegistry } from "@myscheme/voice-navigation-sdk";
|
|
567
|
+
|
|
568
|
+
// Create custom registry
|
|
569
|
+
const registry = new PageRegistry([
|
|
570
|
+
{ id: "home", name: "Home", path: "/" },
|
|
571
|
+
{ id: "about", name: "About", path: "/about" },
|
|
572
|
+
]);
|
|
573
|
+
|
|
574
|
+
// Add pages dynamically
|
|
575
|
+
registry.addPage({
|
|
576
|
+
id: "blog",
|
|
577
|
+
name: "Blog",
|
|
578
|
+
path: "/blog",
|
|
579
|
+
keywords: ["articles", "posts"],
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
// Set as global registry
|
|
583
|
+
setPageRegistry(registry);
|
|
584
|
+
```
|
|
281
585
|
|
|
282
|
-
|
|
586
|
+
### Custom Action Handlers
|
|
283
587
|
|
|
284
588
|
```typescript
|
|
285
589
|
import { performAgentAction } from "@myscheme/voice-navigation-sdk";
|
|
@@ -292,68 +596,217 @@ console.log("Action performed:", result.performed);
|
|
|
292
596
|
console.log("New zoom:", result.info.newZoom);
|
|
293
597
|
```
|
|
294
598
|
|
|
295
|
-
##
|
|
599
|
+
## 🐛 Troubleshooting
|
|
600
|
+
|
|
601
|
+
### Pages Not Loading
|
|
602
|
+
|
|
603
|
+
**Symptoms:**
|
|
604
|
+
|
|
605
|
+
- Voice commands navigate to wrong page
|
|
606
|
+
- `navigate_search` is triggered instead of page-specific actions
|
|
607
|
+
- Console shows "Unknown action"
|
|
608
|
+
|
|
609
|
+
**Solutions:**
|
|
610
|
+
|
|
611
|
+
1. **Check XML file exists:**
|
|
612
|
+
|
|
613
|
+
- File should be in `public/` folder
|
|
614
|
+
- Verify URL: `http://localhost:3000/navigation-pages.xml`
|
|
615
|
+
|
|
616
|
+
2. **Validate XML syntax:**
|
|
617
|
+
|
|
618
|
+
```xml
|
|
619
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
620
|
+
<navigation>
|
|
621
|
+
<pages>
|
|
622
|
+
<page id="unique_id" name="Display Name" path="/path" />
|
|
623
|
+
</pages>
|
|
624
|
+
</navigation>
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
3. **Check browser console:**
|
|
628
|
+
|
|
629
|
+
- Look for `[VoiceNavigation]` messages
|
|
630
|
+
- Should see: "✓ Page registry initialized with X pages"
|
|
631
|
+
|
|
632
|
+
4. **Debug in console:**
|
|
633
|
+
|
|
634
|
+
```javascript
|
|
635
|
+
console.log("Registry size:", window.__navigatePageRegistry?.size);
|
|
636
|
+
console.log("Pages:", window.__navigatePageRegistry?.getAllPages());
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
5. **Clear cache and hard refresh:**
|
|
640
|
+
- Clear Next.js cache: `rm -rf .next`
|
|
641
|
+
- Hard refresh browser: `Ctrl+Shift+R` / `Cmd+Shift+R`
|
|
642
|
+
|
|
643
|
+
### Voice Not Recognized
|
|
644
|
+
|
|
645
|
+
**Solutions:**
|
|
646
|
+
|
|
647
|
+
- Check microphone permissions
|
|
648
|
+
- Reduce background noise
|
|
649
|
+
- Speak clearly at normal pace
|
|
650
|
+
- Ensure microphone is working in other apps
|
|
296
651
|
|
|
297
|
-
|
|
652
|
+
### Wrong Page Navigation
|
|
298
653
|
|
|
299
|
-
|
|
300
|
-
- Proxy Bedrock requests through a backend service when possible.
|
|
301
|
-
- Keep Azure Speech keys in secured storage and rotate them regularly.
|
|
302
|
-
- Provide configuration values via runtime injection, not hard-coded literals.
|
|
654
|
+
**Solutions:**
|
|
303
655
|
|
|
304
|
-
|
|
656
|
+
- Add more specific keywords to pages
|
|
657
|
+
- Make page names more distinct
|
|
658
|
+
- Remove overlapping keywords
|
|
659
|
+
- Use exact page names in commands
|
|
305
660
|
|
|
306
|
-
|
|
661
|
+
### SDK Initialization Fails
|
|
307
662
|
|
|
308
|
-
|
|
663
|
+
**Solutions:**
|
|
664
|
+
|
|
665
|
+
- Check all required config values are provided
|
|
666
|
+
- Verify AWS credentials are valid
|
|
667
|
+
- Confirm Azure subscription key is active
|
|
668
|
+
- Check browser console for error messages
|
|
669
|
+
|
|
670
|
+
### Testing Checklist
|
|
671
|
+
|
|
672
|
+
- [ ] XML file accessible in browser
|
|
673
|
+
- [ ] Pages config in SDK initialization
|
|
674
|
+
- [ ] Browser console shows successful load
|
|
675
|
+
- [ ] `window.__navigatePageRegistry.size > 0`
|
|
676
|
+
- [ ] No 404 errors in Network tab
|
|
677
|
+
- [ ] Hard refresh after changes
|
|
678
|
+
|
|
679
|
+
## 💡 Best Practices
|
|
680
|
+
|
|
681
|
+
### Security
|
|
682
|
+
|
|
683
|
+
1. **Never expose credentials in client code:**
|
|
684
|
+
|
|
685
|
+
```typescript
|
|
686
|
+
// ❌ BAD
|
|
687
|
+
aws: {
|
|
688
|
+
accessKeyId: "AKIAXXXXXXXX",
|
|
689
|
+
secretAccessKey: "xxxxxxxxxxxxx",
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// ✅ GOOD
|
|
693
|
+
aws: {
|
|
694
|
+
accessKeyId: process.env.NEXT_PUBLIC_AWS_ACCESS_KEY_ID!,
|
|
695
|
+
secretAccessKey: process.env.NEXT_PUBLIC_AWS_SECRET_ACCESS_KEY!,
|
|
696
|
+
}
|
|
697
|
+
```
|
|
698
|
+
|
|
699
|
+
2. **Use temporary credentials:**
|
|
700
|
+
|
|
701
|
+
- AWS Cognito for user-specific tokens
|
|
702
|
+
- STS for temporary access keys
|
|
703
|
+
- API Gateway with authorization
|
|
704
|
+
|
|
705
|
+
3. **Implement backend proxy:**
|
|
706
|
+
- Proxy Bedrock requests through your server
|
|
707
|
+
- Never expose AWS keys to the browser
|
|
708
|
+
- Use environment-specific configurations
|
|
709
|
+
|
|
710
|
+
### Performance
|
|
711
|
+
|
|
712
|
+
1. **Use XML files over direct config** for better caching
|
|
713
|
+
2. **Add specific keywords** to reduce AI processing time
|
|
714
|
+
3. **Limit the number of pages** to essential navigation
|
|
715
|
+
4. **Enable auto-start carefully** (consider user experience)
|
|
716
|
+
|
|
717
|
+
### User Experience
|
|
718
|
+
|
|
719
|
+
1. **Clear page names:** Use descriptive, unique names
|
|
720
|
+
2. **Relevant keywords:** Include synonyms and common phrases
|
|
721
|
+
3. **Test voice commands:** Try different phrasings
|
|
722
|
+
4. **Provide UI feedback:** Listen for SDK events
|
|
723
|
+
5. **Handle errors gracefully:** Show helpful error messages
|
|
724
|
+
|
|
725
|
+
### Page Configuration
|
|
726
|
+
|
|
727
|
+
1. **Use descriptive IDs:** `privacy_policy` not `pp`
|
|
728
|
+
2. **Add multiple keywords:** Cover variations and synonyms
|
|
729
|
+
3. **Keep paths accurate:** Match your actual routes
|
|
730
|
+
4. **Start small:** Begin with core pages, expand gradually
|
|
731
|
+
5. **Test variations:** Try different voice commands
|
|
732
|
+
|
|
733
|
+
## 🌐 Browser Support
|
|
734
|
+
|
|
735
|
+
**Requirements:**
|
|
736
|
+
|
|
737
|
+
- Modern browser with ES2020+ support
|
|
309
738
|
- Microphone access
|
|
310
|
-
- `MediaDevices` API
|
|
311
|
-
- `fetch` API
|
|
739
|
+
- `MediaDevices` API
|
|
740
|
+
- `fetch` API
|
|
741
|
+
- `DOMParser` (for XML)
|
|
312
742
|
|
|
313
|
-
|
|
743
|
+
**Tested on:**
|
|
314
744
|
|
|
315
|
-
|
|
745
|
+
- Chrome 90+
|
|
746
|
+
- Firefox 88+
|
|
747
|
+
- Safari 14+
|
|
748
|
+
- Edge 90+
|
|
316
749
|
|
|
317
|
-
|
|
318
|
-
- API Gateway with authorization
|
|
319
|
-
- Backend proxy for sensitive operations
|
|
750
|
+
## 🔒 Security Considerations
|
|
320
751
|
|
|
321
|
-
|
|
752
|
+
⚠️ **Important Security Notes:**
|
|
322
753
|
|
|
323
|
-
|
|
754
|
+
1. **Credential Protection:**
|
|
324
755
|
|
|
325
|
-
|
|
756
|
+
- Never hardcode AWS/Azure credentials
|
|
757
|
+
- Use environment variables
|
|
758
|
+
- Rotate keys regularly
|
|
759
|
+
- Use least-privilege IAM policies
|
|
326
760
|
|
|
327
|
-
|
|
761
|
+
2. **Production Setup:**
|
|
328
762
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
763
|
+
- Implement AWS Cognito for temporary credentials
|
|
764
|
+
- Use API Gateway for request authorization
|
|
765
|
+
- Proxy sensitive operations through backend
|
|
766
|
+
- Enable CloudWatch logging for monitoring
|
|
333
767
|
|
|
334
|
-
|
|
768
|
+
3. **Best Practices:**
|
|
769
|
+
- Validate all user inputs
|
|
770
|
+
- Sanitize XML content
|
|
771
|
+
- Implement rate limiting
|
|
772
|
+
- Monitor API usage
|
|
773
|
+
- Use HTTPS only
|
|
335
774
|
|
|
336
|
-
|
|
337
|
-
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
775
|
+
## 📄 Environment Variables
|
|
338
776
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
777
|
+
Create `.env.local` (Next.js) or `.env` file:
|
|
778
|
+
|
|
779
|
+
```bash
|
|
780
|
+
# AWS Bedrock
|
|
781
|
+
NEXT_PUBLIC_AWS_REGION=ap-south-1
|
|
782
|
+
NEXT_PUBLIC_AWS_ACCESS_KEY_ID=your_access_key_here
|
|
783
|
+
NEXT_PUBLIC_AWS_SECRET_ACCESS_KEY=your_secret_key_here
|
|
784
|
+
|
|
785
|
+
# Azure Speech
|
|
786
|
+
NEXT_PUBLIC_AZURE_SPEECH_KEY=your_subscription_key_here
|
|
787
|
+
NEXT_PUBLIC_AZURE_SPEECH_REGION=centralindia
|
|
788
|
+
|
|
789
|
+
# OpenSearch (Optional)
|
|
790
|
+
NEXT_PUBLIC_OPENSEARCH_NODE=https://your-cluster.example.com
|
|
791
|
+
NEXT_PUBLIC_OPENSEARCH_USERNAME=your_username
|
|
792
|
+
NEXT_PUBLIC_OPENSEARCH_PASSWORD=your_password
|
|
793
|
+
NEXT_PUBLIC_OPENSEARCH_INDEX=your_index
|
|
347
794
|
```
|
|
348
795
|
|
|
349
|
-
## License
|
|
796
|
+
## 📝 License
|
|
350
797
|
|
|
351
798
|
MIT
|
|
352
799
|
|
|
353
|
-
## Contributing
|
|
800
|
+
## 🤝 Contributing
|
|
801
|
+
|
|
802
|
+
Contributions are welcome! This library is currently in beta phase.
|
|
803
|
+
|
|
804
|
+
## 📧 Support
|
|
805
|
+
|
|
806
|
+
For issues and questions, please open an issue on the repository.
|
|
354
807
|
|
|
355
|
-
|
|
808
|
+
## 🎯 Version
|
|
356
809
|
|
|
357
|
-
|
|
810
|
+
Current version: **0.1.0 (Beta)**
|
|
358
811
|
|
|
359
|
-
|
|
812
|
+
This library is under active development. APIs may change between releases.
|