@myscheme/voice-navigation-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +359 -0
- package/dist/actions.d.ts +8 -0
- package/dist/actions.d.ts.map +1 -0
- package/dist/actions.js +478 -0
- package/dist/constants.d.ts +2 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +156 -0
- package/dist/microphone-handler.d.ts +47 -0
- package/dist/microphone-handler.d.ts.map +1 -0
- package/dist/microphone-handler.js +341 -0
- package/dist/navigation-controller.d.ts +50 -0
- package/dist/navigation-controller.d.ts.map +1 -0
- package/dist/navigation-controller.js +782 -0
- package/dist/server/index.d.ts +3 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +1 -0
- package/dist/server/opensearch-handler.d.ts +52 -0
- package/dist/server/opensearch-handler.d.ts.map +1 -0
- package/dist/server/opensearch-handler.js +279 -0
- package/dist/services/azure-speech.d.ts +13 -0
- package/dist/services/azure-speech.d.ts.map +1 -0
- package/dist/services/azure-speech.js +33 -0
- package/dist/services/bedrock.d.ts +18 -0
- package/dist/services/bedrock.d.ts.map +1 -0
- package/dist/services/bedrock.js +132 -0
- package/dist/services/schemes.d.ts +2 -0
- package/dist/services/schemes.d.ts.map +1 -0
- package/dist/services/schemes.js +1 -0
- package/dist/services/vector-search.d.ts +21 -0
- package/dist/services/vector-search.d.ts.map +1 -0
- package/dist/services/vector-search.js +181 -0
- package/dist/types.d.ts +107 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +1 -0
- package/dist/ui.d.ts +10 -0
- package/dist/ui.d.ts.map +1 -0
- package/dist/ui.js +225 -0
- package/package.json +55 -0
package/README.md
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
# Voice Navigation SDK
|
|
2
|
+
|
|
3
|
+
A TypeScript SDK for voice-controlled navigation using Azure Speech-to-Text and AWS Bedrock for intent understanding.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🎤 Real-time speech recognition using Azure Speech SDK
|
|
8
|
+
- 🤖 Intent extraction using AWS Bedrock (Claude)
|
|
9
|
+
- 🧭 Voice-controlled navigation actions
|
|
10
|
+
- 🖱️ Rich browser and media controls
|
|
11
|
+
- ♿ Accessibility-first design
|
|
12
|
+
- 🎨 Customizable floating UI control
|
|
13
|
+
- 📦 Full TypeScript support
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npm install @myscheme/voice-navigation-sdk
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Dependencies
|
|
22
|
+
|
|
23
|
+
This SDK requires:
|
|
24
|
+
|
|
25
|
+
- `@aws-sdk/client-bedrock-runtime` - For AWS Bedrock integration
|
|
26
|
+
- `microsoft-cognitiveservices-speech-sdk` - For Azure Speech-to-Text
|
|
27
|
+
|
|
28
|
+
These are automatically installed as dependencies.
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
34
|
+
|
|
35
|
+
const controller = initNavigationOnMicrophone({
|
|
36
|
+
// Azure Speech configuration
|
|
37
|
+
azure: {
|
|
38
|
+
subscriptionKey: "your-azure-subscription-key",
|
|
39
|
+
region: "your-azure-region", // e.g., 'eastus'
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
// AWS Bedrock configuration
|
|
43
|
+
aws: {
|
|
44
|
+
accessKeyId: "your-aws-access-key-id",
|
|
45
|
+
secretAccessKey: "your-aws-secret-access-key",
|
|
46
|
+
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
47
|
+
region: "ap-south-1", // or your preferred region
|
|
48
|
+
},
|
|
49
|
+
|
|
50
|
+
// Optional: Default language
|
|
51
|
+
language: "en-IN",
|
|
52
|
+
|
|
53
|
+
// Optional: Auto-start voice control
|
|
54
|
+
autoStart: false,
|
|
55
|
+
});
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Configuration
|
|
59
|
+
|
|
60
|
+
### NavigationConfig
|
|
61
|
+
|
|
62
|
+
| Property | Type | Required | Description |
|
|
63
|
+
| ----------------------- | --------- | -------- | ---------------------------------------------- |
|
|
64
|
+
| `azure.subscriptionKey` | `string` | ✅ | Azure Speech subscription key |
|
|
65
|
+
| `azure.region` | `string` | ✅ | Azure Speech region |
|
|
66
|
+
| `aws.accessKeyId` | `string` | ✅ | AWS access key ID |
|
|
67
|
+
| `aws.secretAccessKey` | `string` | ✅ | AWS secret access key |
|
|
68
|
+
| `aws.modelId` | `string` | ✅ | AWS Bedrock model ID |
|
|
69
|
+
| `aws.region` | `string` | ❌ | AWS region (default: 'ap-south-1') |
|
|
70
|
+
| `language` | `string` | ❌ | Speech recognition language (default: 'en-IN') |
|
|
71
|
+
| `autoStart` | `boolean` | ❌ | Auto-start voice control (default: false) |
|
|
72
|
+
| `actionHandlers` | `object` | ❌ | Map of custom callbacks keyed by action |
|
|
73
|
+
| `opensearch` | `object` | ❌ | OpenSearch vector search configuration |
|
|
74
|
+
|
|
75
|
+
When you provide the optional `opensearch` block, include the following fields:
|
|
76
|
+
|
|
77
|
+
| Property | Type | Required | Description |
|
|
78
|
+
| --------------- | -------- | -------- | ------------------------------------------------------------------ |
|
|
79
|
+
| `node` | `string` | ✅ | OpenSearch cluster URL (e.g. `https://cluster.example.com`) |
|
|
80
|
+
| `username` | `string` | ✅ | OpenSearch user name |
|
|
81
|
+
| `password` | `string` | ✅ | OpenSearch password |
|
|
82
|
+
| `index` | `string` | ✅ | Index name containing your embeddings |
|
|
83
|
+
| `vectorField` | `string` | ❌ | Embedding field (default: `embedding`) |
|
|
84
|
+
| `size` | `number` | ❌ | Result count returned to the browser (default: `5`) |
|
|
85
|
+
| `numCandidates` | `number` | ❌ | Candidate pool size for k-NN search (default: `Math.max(size*4,20)`)|
|
|
86
|
+
| `minScore` | `number` | ❌ | Minimum score to accept a hit (default: `0`) |
|
|
87
|
+
| `sourceFields` | `string[]` | ❌ | Source fields to request from OpenSearch |
|
|
88
|
+
| `apiPath` | `string` | ❌ | Browser endpoint that proxies vector search (default: `/api/voice-navigation/vector-search`) |
|
|
89
|
+
|
|
90
|
+
## Supported Actions
|
|
91
|
+
|
|
92
|
+
The SDK supports the following voice commands:
|
|
93
|
+
|
|
94
|
+
### Navigation Actions
|
|
95
|
+
|
|
96
|
+
- `navigate_home` - Navigate to home page
|
|
97
|
+
- `navigate_search` - Navigate to search page
|
|
98
|
+
- `navigate_faqs` - Navigate to FAQs
|
|
99
|
+
- `navigate_help` - Navigate to help
|
|
100
|
+
- `navigate_contact` - Navigate to contact
|
|
101
|
+
- `navigate_about` - Navigate to about
|
|
102
|
+
- `navigate_screen_reader` - Navigate to screen reader access
|
|
103
|
+
- `navigate_accessibility` - Navigate to accessibility page
|
|
104
|
+
- `navigate_disclaimer` - Navigate to disclaimer
|
|
105
|
+
- `navigate_terms_conditions` - Navigate to terms and conditions
|
|
106
|
+
|
|
107
|
+
### Page & UI Actions
|
|
108
|
+
|
|
109
|
+
- `zoom_in` / `zoom_out` - Adjust page zoom
|
|
110
|
+
- `scroll_up` / `scroll_down` / `scroll_left` / `scroll_right` - Scroll in any direction
|
|
111
|
+
- `page_up` / `page_down` - Jump by one viewport height
|
|
112
|
+
- `scroll_top` / `scroll_bottom` - Jump to page edges
|
|
113
|
+
- `go_back` / `go_forward` - Navigate browser history
|
|
114
|
+
- `reload_page` - Reload current page
|
|
115
|
+
- `print_page` - Open print dialog
|
|
116
|
+
- `copy_url` - Copy current page URL to clipboard
|
|
117
|
+
- `open_menu` / `close_menu` - Toggle navigation menus
|
|
118
|
+
- `focus_search` - Focus the primary search input
|
|
119
|
+
- `toggle_fullscreen` / `exit_fullscreen` - Control fullscreen mode
|
|
120
|
+
- `play_media` / `pause_media` - Control media playback
|
|
121
|
+
- `mute_media` / `unmute_media` - Control media volume
|
|
122
|
+
- `stop` - Stop voice control
|
|
123
|
+
|
|
124
|
+
## API Reference
|
|
125
|
+
|
|
126
|
+
### VoiceNavigationController
|
|
127
|
+
|
|
128
|
+
The main controller class for voice navigation.
|
|
129
|
+
|
|
130
|
+
#### Methods
|
|
131
|
+
|
|
132
|
+
##### `start(): Promise<void>`
|
|
133
|
+
|
|
134
|
+
Start voice control and begin listening.
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
await controller.start();
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
##### `stop(): Promise<void>`
|
|
141
|
+
|
|
142
|
+
Stop voice control and process any pending speech.
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
await controller.stop();
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
##### `setLanguage(language: string): void`
|
|
149
|
+
|
|
150
|
+
Change the speech recognition language.
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
controller.setLanguage("hi-IN"); // Switch to Hindi
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
##### `setAutoStart(enabled: boolean): void`
|
|
157
|
+
|
|
158
|
+
Enable or disable auto-start on future page loads.
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
controller.setAutoStart(true);
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
##### `destroy(): void`
|
|
165
|
+
|
|
166
|
+
Clean up and remove the controller.
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
controller.destroy();
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Events
|
|
173
|
+
|
|
174
|
+
The SDK emits custom events that you can listen for:
|
|
175
|
+
|
|
176
|
+
```typescript
|
|
177
|
+
// Listen for state changes
|
|
178
|
+
window.addEventListener("navigate:state-change", (event) => {
|
|
179
|
+
console.log("State:", event.detail.state);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// Listen for action detection
|
|
183
|
+
window.addEventListener("navigate:action-detected", (event) => {
|
|
184
|
+
console.log("Action:", event.detail.action);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// Listen for action performance
|
|
188
|
+
window.addEventListener("navigate:action-performed", (event) => {
|
|
189
|
+
console.log("Performed:", event.detail.performed);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Listen for errors
|
|
193
|
+
window.addEventListener("navigate:error", (event) => {
|
|
194
|
+
console.error("Error:", event.detail.error);
|
|
195
|
+
});
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Advanced Usage
|
|
199
|
+
|
|
200
|
+
### Vector Search with OpenSearch
|
|
201
|
+
|
|
202
|
+
1. Supply OpenSearch credentials to the client when you initialise the library:
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
206
|
+
|
|
207
|
+
initNavigationOnMicrophone({
|
|
208
|
+
azure: { /* ... */ },
|
|
209
|
+
aws: { /* ... */ },
|
|
210
|
+
opensearch: {
|
|
211
|
+
node: process.env.NEXT_PUBLIC_OPENSEARCH_NODE!,
|
|
212
|
+
username: process.env.NEXT_PUBLIC_OPENSEARCH_USERNAME!,
|
|
213
|
+
password: process.env.NEXT_PUBLIC_OPENSEARCH_PASSWORD!,
|
|
214
|
+
index: "my-embeddings-index",
|
|
215
|
+
// optional overrides:
|
|
216
|
+
// vectorField: "embedding",
|
|
217
|
+
// size: 5,
|
|
218
|
+
// apiPath: "/api/voice-navigation/vector-search",
|
|
219
|
+
},
|
|
220
|
+
});
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
2. Expose a server-side proxy so the browser never talks to OpenSearch directly. In a Next.js project, create a route such as `pages/api/voice-navigation/vector-search.ts`:
|
|
224
|
+
|
|
225
|
+
```typescript
|
|
226
|
+
import type { NextApiRequest, NextApiResponse } from "next";
|
|
227
|
+
import { createOpenSearchProxyHandler } from "@myscheme/voice-navigation-sdk/server";
|
|
228
|
+
|
|
229
|
+
const handler = createOpenSearchProxyHandler({
|
|
230
|
+
// Optionally lock down allowed origins for CORS style checks.
|
|
231
|
+
allowedOrigins: process.env.OPENSEARCH_ALLOWED_ORIGINS?.split(",") ?? undefined,
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
export default async function vectorSearchProxy(
|
|
235
|
+
req: NextApiRequest,
|
|
236
|
+
res: NextApiResponse,
|
|
237
|
+
) {
|
|
238
|
+
await handler(req, res);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Disable Next.js body parsing so the proxy can stream the incoming payload.
|
|
242
|
+
export const config = {
|
|
243
|
+
api: {
|
|
244
|
+
bodyParser: false,
|
|
245
|
+
},
|
|
246
|
+
};
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Mount the route at the same `apiPath` you configured on the client (defaults to `/api/voice-navigation/vector-search`). The handler lives in the `@myscheme/voice-navigation-sdk/server` export so it only runs on the server where the official OpenSearch client is available.
|
|
250
|
+
|
|
251
|
+
### Custom Service Initialization
|
|
252
|
+
|
|
253
|
+
You can use the individual services separately:
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
import {
|
|
257
|
+
AzureSpeechService,
|
|
258
|
+
BedrockService,
|
|
259
|
+
} from "@myscheme/voice-navigation-sdk";
|
|
260
|
+
|
|
261
|
+
// Azure Speech Service
|
|
262
|
+
const azureService = new AzureSpeechService({
|
|
263
|
+
subscriptionKey: "your-key",
|
|
264
|
+
region: "your-region",
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
const tokenResponse = await azureService.fetchToken();
|
|
268
|
+
|
|
269
|
+
// Bedrock Service
|
|
270
|
+
const bedrockService = new BedrockService({
|
|
271
|
+
region: "ap-south-1",
|
|
272
|
+
accessKeyId: "your-key",
|
|
273
|
+
secretAccessKey: "your-secret",
|
|
274
|
+
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
const action = await bedrockService.extractAction("zoom in");
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Custom Action Handlers
|
|
281
|
+
|
|
282
|
+
You can perform actions programmatically:
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
import { performAgentAction } from "@myscheme/voice-navigation-sdk";
|
|
286
|
+
|
|
287
|
+
const result = performAgentAction("zoom_in", {
|
|
288
|
+
onStop: () => console.log("Stopped"),
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
console.log("Action performed:", result.performed);
|
|
292
|
+
console.log("New zoom:", result.info.newZoom);
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## Configuration Tips
|
|
296
|
+
|
|
297
|
+
Because this SDK executes in the browser, protect your credentials:
|
|
298
|
+
|
|
299
|
+
- Use short-lived tokens (AWS Cognito, STS) instead of long-term keys.
|
|
300
|
+
- Proxy Bedrock requests through a backend service when possible.
|
|
301
|
+
- Keep Azure Speech keys in secured storage and rotate them regularly.
|
|
302
|
+
- Provide configuration values via runtime injection, not hard-coded literals.
|
|
303
|
+
|
|
304
|
+
## Browser Support
|
|
305
|
+
|
|
306
|
+
This SDK requires:
|
|
307
|
+
|
|
308
|
+
- Modern browser with ES2020 support
|
|
309
|
+
- Microphone access
|
|
310
|
+
- `MediaDevices` API support
|
|
311
|
+
- `fetch` API support
|
|
312
|
+
|
|
313
|
+
## Security Notes
|
|
314
|
+
|
|
315
|
+
⚠️ **Important**: Never expose your AWS credentials or API keys in client-side code in production. Consider using:
|
|
316
|
+
|
|
317
|
+
- AWS Cognito for temporary credentials
|
|
318
|
+
- API Gateway with authorization
|
|
319
|
+
- Backend proxy for sensitive operations
|
|
320
|
+
|
|
321
|
+
For development, you can use environment variables and a build tool to inject them securely.
|
|
322
|
+
|
|
323
|
+
## Migration from Script
|
|
324
|
+
|
|
325
|
+
If you're migrating from the previous script-based approach:
|
|
326
|
+
|
|
327
|
+
### Before (Script)
|
|
328
|
+
|
|
329
|
+
```javascript
|
|
330
|
+
// navigation_myscheme.js included in HTML
|
|
331
|
+
// API routes in Next.js pages/api/
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
### After (Library)
|
|
335
|
+
|
|
336
|
+
```typescript
|
|
337
|
+
import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
|
|
338
|
+
|
|
339
|
+
const controller = initNavigationOnMicrophone({
|
|
340
|
+
azure: {
|
|
341
|
+
/* config */
|
|
342
|
+
},
|
|
343
|
+
aws: {
|
|
344
|
+
/* config */
|
|
345
|
+
},
|
|
346
|
+
});
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## License
|
|
350
|
+
|
|
351
|
+
MIT
|
|
352
|
+
|
|
353
|
+
## Contributing
|
|
354
|
+
|
|
355
|
+
Contributions are welcome! Please open an issue or submit a pull request.
|
|
356
|
+
|
|
357
|
+
## Support
|
|
358
|
+
|
|
359
|
+
For issues and questions, please open an issue on GitHub.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { NavigationAction, ActionContext, ActionResult, AgentActionResponse } from "./types.js";
|
|
2
|
+
export declare const setZoomScale: (scale: number) => number;
|
|
3
|
+
export declare const adjustZoom: (delta: number) => number;
|
|
4
|
+
export declare const formatActionLabel: (action: string) => string;
|
|
5
|
+
export declare const performAgentAction: (action: NavigationAction, context?: ActionContext) => ActionResult;
|
|
6
|
+
export declare const extractAgentAction: (result: any) => AgentActionResponse | null;
|
|
7
|
+
export declare const isAllowedAction: (action: string) => action is NavigationAction;
|
|
8
|
+
//# sourceMappingURL=actions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,YAAY,EACZ,mBAAmB,EACpB,MAAM,YAAY,CAAC;AA0EpB,eAAO,MAAM,YAAY,GAAI,OAAO,MAAM,KAAG,MAS5C,CAAC;AAKF,eAAO,MAAM,UAAU,GAAI,OAAO,MAAM,KAAG,MAM1C,CAAC;AAKF,eAAO,MAAM,iBAAiB,GAAI,QAAQ,MAAM,KAAG,MAElD,CAAC;AAKF,eAAO,MAAM,kBAAkB,GAC7B,QAAQ,gBAAgB,EACxB,UAAS,aAAkB,KAC1B,YAoaF,CAAC;AAKF,eAAO,MAAM,kBAAkB,GAAI,QAAQ,GAAG,KAAG,mBAAmB,GAAG,IAgCtE,CAAC;AAKF,eAAO,MAAM,eAAe,GAAI,QAAQ,MAAM,KAAG,MAAM,IAAI,gBAE1D,CAAC"}
|