@myscheme/voice-navigation-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +359 -0
  2. package/dist/actions.d.ts +8 -0
  3. package/dist/actions.d.ts.map +1 -0
  4. package/dist/actions.js +478 -0
  5. package/dist/constants.d.ts +2 -0
  6. package/dist/constants.d.ts.map +1 -0
  7. package/dist/constants.js +1 -0
  8. package/dist/index.d.ts +11 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +156 -0
  11. package/dist/microphone-handler.d.ts +47 -0
  12. package/dist/microphone-handler.d.ts.map +1 -0
  13. package/dist/microphone-handler.js +341 -0
  14. package/dist/navigation-controller.d.ts +50 -0
  15. package/dist/navigation-controller.d.ts.map +1 -0
  16. package/dist/navigation-controller.js +782 -0
  17. package/dist/server/index.d.ts +3 -0
  18. package/dist/server/index.d.ts.map +1 -0
  19. package/dist/server/index.js +1 -0
  20. package/dist/server/opensearch-handler.d.ts +52 -0
  21. package/dist/server/opensearch-handler.d.ts.map +1 -0
  22. package/dist/server/opensearch-handler.js +279 -0
  23. package/dist/services/azure-speech.d.ts +13 -0
  24. package/dist/services/azure-speech.d.ts.map +1 -0
  25. package/dist/services/azure-speech.js +33 -0
  26. package/dist/services/bedrock.d.ts +18 -0
  27. package/dist/services/bedrock.d.ts.map +1 -0
  28. package/dist/services/bedrock.js +132 -0
  29. package/dist/services/schemes.d.ts +2 -0
  30. package/dist/services/schemes.d.ts.map +1 -0
  31. package/dist/services/schemes.js +1 -0
  32. package/dist/services/vector-search.d.ts +21 -0
  33. package/dist/services/vector-search.d.ts.map +1 -0
  34. package/dist/services/vector-search.js +181 -0
  35. package/dist/types.d.ts +107 -0
  36. package/dist/types.d.ts.map +1 -0
  37. package/dist/types.js +1 -0
  38. package/dist/ui.d.ts +10 -0
  39. package/dist/ui.d.ts.map +1 -0
  40. package/dist/ui.js +225 -0
  41. package/package.json +55 -0
package/README.md ADDED
@@ -0,0 +1,359 @@
1
+ # Voice Navigation SDK
2
+
3
+ A TypeScript SDK for voice-controlled navigation using Azure Speech-to-Text and AWS Bedrock for intent understanding.
4
+
5
+ ## Features
6
+
7
+ - 🎤 Real-time speech recognition using Azure Speech SDK
8
+ - 🤖 Intent extraction using AWS Bedrock (Claude)
9
+ - 🧭 Voice-controlled navigation actions
10
+ - 🖱️ Rich browser and media controls
11
+ - ♿ Accessibility-first design
12
+ - 🎨 Customizable floating UI control
13
+ - 📦 Full TypeScript support
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @myscheme/voice-navigation-sdk
19
+ ```
20
+
21
+ ## Dependencies
22
+
23
+ This SDK requires:
24
+
25
+ - `@aws-sdk/client-bedrock-runtime` - For AWS Bedrock integration
26
+ - `microsoft-cognitiveservices-speech-sdk` - For Azure Speech-to-Text
27
+
28
+ These are automatically installed as dependencies.
29
+
30
+ ## Quick Start
31
+
32
+ ```typescript
33
+ import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
34
+
35
+ const controller = initNavigationOnMicrophone({
36
+ // Azure Speech configuration
37
+ azure: {
38
+ subscriptionKey: "your-azure-subscription-key",
39
+ region: "your-azure-region", // e.g., 'eastus'
40
+ },
41
+
42
+ // AWS Bedrock configuration
43
+ aws: {
44
+ accessKeyId: "your-aws-access-key-id",
45
+ secretAccessKey: "your-aws-secret-access-key",
46
+ modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
47
+ region: "ap-south-1", // or your preferred region
48
+ },
49
+
50
+ // Optional: Default language
51
+ language: "en-IN",
52
+
53
+ // Optional: Auto-start voice control
54
+ autoStart: false,
55
+ });
56
+ ```
57
+
58
+ ## Configuration
59
+
60
+ ### NavigationConfig
61
+
62
+ | Property | Type | Required | Description |
63
+ | ----------------------- | --------- | -------- | ---------------------------------------------- |
64
+ | `azure.subscriptionKey` | `string` | ✅ | Azure Speech subscription key |
65
+ | `azure.region` | `string` | ✅ | Azure Speech region |
66
+ | `aws.accessKeyId` | `string` | ✅ | AWS access key ID |
67
+ | `aws.secretAccessKey` | `string` | ✅ | AWS secret access key |
68
+ | `aws.modelId` | `string` | ✅ | AWS Bedrock model ID |
69
+ | `aws.region` | `string` | ❌ | AWS region (default: 'ap-south-1') |
70
+ | `language` | `string` | ❌ | Speech recognition language (default: 'en-IN') |
71
+ | `autoStart` | `boolean` | ❌ | Auto-start voice control (default: false) |
72
+ | `actionHandlers` | `object` | ❌ | Map of custom callbacks keyed by action |
73
+ | `opensearch` | `object` | ❌ | OpenSearch vector search configuration |
74
+
75
+ When you provide the optional `opensearch` block, include the following fields:
76
+
77
+ | Property | Type | Required | Description |
78
+ | --------------- | -------- | -------- | ------------------------------------------------------------------ |
79
+ | `node` | `string` | ✅ | OpenSearch cluster URL (e.g. `https://cluster.example.com`) |
80
+ | `username` | `string` | ✅ | OpenSearch user name |
81
+ | `password` | `string` | ✅ | OpenSearch password |
82
+ | `index` | `string` | ✅ | Index name containing your embeddings |
83
+ | `vectorField` | `string` | ❌ | Embedding field (default: `embedding`) |
84
+ | `size` | `number` | ❌ | Result count returned to the browser (default: `5`) |
85
+ | `numCandidates` | `number` | ❌ | Candidate pool size for k-NN search (default: `Math.max(size*4,20)`)|
86
+ | `minScore` | `number` | ❌ | Minimum score to accept a hit (default: `0`) |
87
+ | `sourceFields` | `string[]` | ❌ | Source fields to request from OpenSearch |
88
+ | `apiPath` | `string` | ❌ | Browser endpoint that proxies vector search (default: `/api/voice-navigation/vector-search`) |
89
+
90
+ ## Supported Actions
91
+
92
+ The SDK supports the following voice commands:
93
+
94
+ ### Navigation Actions
95
+
96
+ - `navigate_home` - Navigate to home page
97
+ - `navigate_search` - Navigate to search page
98
+ - `navigate_faqs` - Navigate to FAQs
99
+ - `navigate_help` - Navigate to help
100
+ - `navigate_contact` - Navigate to contact
101
+ - `navigate_about` - Navigate to about
102
+ - `navigate_screen_reader` - Navigate to screen reader access
103
+ - `navigate_accessibility` - Navigate to accessibility page
104
+ - `navigate_disclaimer` - Navigate to disclaimer
105
+ - `navigate_terms_conditions` - Navigate to terms and conditions
106
+
107
+ ### Page & UI Actions
108
+
109
+ - `zoom_in` / `zoom_out` - Adjust page zoom
110
+ - `scroll_up` / `scroll_down` / `scroll_left` / `scroll_right` - Scroll in any direction
111
+ - `page_up` / `page_down` - Jump by one viewport height
112
+ - `scroll_top` / `scroll_bottom` - Jump to page edges
113
+ - `go_back` / `go_forward` - Navigate browser history
114
+ - `reload_page` - Reload current page
115
+ - `print_page` - Open print dialog
116
+ - `copy_url` - Copy current page URL to clipboard
117
+ - `open_menu` / `close_menu` - Toggle navigation menus
118
+ - `focus_search` - Focus the primary search input
119
+ - `toggle_fullscreen` / `exit_fullscreen` - Control fullscreen mode
120
+ - `play_media` / `pause_media` - Control media playback
121
+ - `mute_media` / `unmute_media` - Control media volume
122
+ - `stop` - Stop voice control
123
+
124
+ ## API Reference
125
+
126
+ ### VoiceNavigationController
127
+
128
+ The main controller class for voice navigation.
129
+
130
+ #### Methods
131
+
132
+ ##### `start(): Promise<void>`
133
+
134
+ Start voice control and begin listening.
135
+
136
+ ```typescript
137
+ await controller.start();
138
+ ```
139
+
140
+ ##### `stop(): Promise<void>`
141
+
142
+ Stop voice control and process any pending speech.
143
+
144
+ ```typescript
145
+ await controller.stop();
146
+ ```
147
+
148
+ ##### `setLanguage(language: string): void`
149
+
150
+ Change the speech recognition language.
151
+
152
+ ```typescript
153
+ controller.setLanguage("hi-IN"); // Switch to Hindi
154
+ ```
155
+
156
+ ##### `setAutoStart(enabled: boolean): void`
157
+
158
+ Enable or disable auto-start on future page loads.
159
+
160
+ ```typescript
161
+ controller.setAutoStart(true);
162
+ ```
163
+
164
+ ##### `destroy(): void`
165
+
166
+ Clean up and remove the controller.
167
+
168
+ ```typescript
169
+ controller.destroy();
170
+ ```
171
+
172
+ ### Events
173
+
174
+ The SDK emits custom events that you can listen for:
175
+
176
+ ```typescript
177
+ // Listen for state changes
178
+ window.addEventListener("navigate:state-change", (event) => {
179
+ console.log("State:", event.detail.state);
180
+ });
181
+
182
+ // Listen for action detection
183
+ window.addEventListener("navigate:action-detected", (event) => {
184
+ console.log("Action:", event.detail.action);
185
+ });
186
+
187
+ // Listen for action performance
188
+ window.addEventListener("navigate:action-performed", (event) => {
189
+ console.log("Performed:", event.detail.performed);
190
+ });
191
+
192
+ // Listen for errors
193
+ window.addEventListener("navigate:error", (event) => {
194
+ console.error("Error:", event.detail.error);
195
+ });
196
+ ```
197
+
198
+ ## Advanced Usage
199
+
200
+ ### Vector Search with OpenSearch
201
+
202
+ 1. Supply OpenSearch credentials to the client when you initialise the library:
203
+
204
+ ```typescript
205
+ import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
206
+
207
+ initNavigationOnMicrophone({
208
+ azure: { /* ... */ },
209
+ aws: { /* ... */ },
210
+ opensearch: {
211
+ node: process.env.NEXT_PUBLIC_OPENSEARCH_NODE!,
212
+ username: process.env.NEXT_PUBLIC_OPENSEARCH_USERNAME!,
213
+ password: process.env.NEXT_PUBLIC_OPENSEARCH_PASSWORD!,
214
+ index: "my-embeddings-index",
215
+ // optional overrides:
216
+ // vectorField: "embedding",
217
+ // size: 5,
218
+ // apiPath: "/api/voice-navigation/vector-search",
219
+ },
220
+ });
221
+ ```
222
+
223
+ 2. Expose a server-side proxy so the browser never talks to OpenSearch directly. In a Next.js project, create a route such as `pages/api/voice-navigation/vector-search.ts`:
224
+
225
+ ```typescript
226
+ import type { NextApiRequest, NextApiResponse } from "next";
227
+ import { createOpenSearchProxyHandler } from "@myscheme/voice-navigation-sdk/server";
228
+
229
+ const handler = createOpenSearchProxyHandler({
230
+ // Optionally lock down allowed origins for CORS style checks.
231
+ allowedOrigins: process.env.OPENSEARCH_ALLOWED_ORIGINS?.split(",") ?? undefined,
232
+ });
233
+
234
+ export default async function vectorSearchProxy(
235
+ req: NextApiRequest,
236
+ res: NextApiResponse,
237
+ ) {
238
+ await handler(req, res);
239
+ }
240
+
241
+ // Disable Next.js body parsing so the proxy can stream the incoming payload.
242
+ export const config = {
243
+ api: {
244
+ bodyParser: false,
245
+ },
246
+ };
247
+ ```
248
+
249
+ Mount the route at the same `apiPath` you configured on the client (defaults to `/api/voice-navigation/vector-search`). The handler lives in the `@myscheme/voice-navigation-sdk/server` export so it only runs on the server where the official OpenSearch client is available.
250
+
251
+ ### Custom Service Initialization
252
+
253
+ You can use the individual services separately:
254
+
255
+ ```typescript
256
+ import {
257
+ AzureSpeechService,
258
+ BedrockService,
259
+ } from "@myscheme/voice-navigation-sdk";
260
+
261
+ // Azure Speech Service
262
+ const azureService = new AzureSpeechService({
263
+ subscriptionKey: "your-key",
264
+ region: "your-region",
265
+ });
266
+
267
+ const tokenResponse = await azureService.fetchToken();
268
+
269
+ // Bedrock Service
270
+ const bedrockService = new BedrockService({
271
+ region: "ap-south-1",
272
+ accessKeyId: "your-key",
273
+ secretAccessKey: "your-secret",
274
+ modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
275
+ });
276
+
277
+ const action = await bedrockService.extractAction("zoom in");
278
+ ```
279
+
280
+ ### Custom Action Handlers
281
+
282
+ You can perform actions programmatically:
283
+
284
+ ```typescript
285
+ import { performAgentAction } from "@myscheme/voice-navigation-sdk";
286
+
287
+ const result = performAgentAction("zoom_in", {
288
+ onStop: () => console.log("Stopped"),
289
+ });
290
+
291
+ console.log("Action performed:", result.performed);
292
+ console.log("New zoom:", result.info.newZoom);
293
+ ```
294
+
295
+ ## Configuration Tips
296
+
297
+ Because this SDK executes in the browser, protect your credentials:
298
+
299
+ - Use short-lived tokens (AWS Cognito, STS) instead of long-term keys.
300
+ - Proxy Bedrock requests through a backend service when possible.
301
+ - Keep Azure Speech keys in secured storage and rotate them regularly.
302
+ - Provide configuration values via runtime injection, not hard-coded literals.
303
+
304
+ ## Browser Support
305
+
306
+ This SDK requires:
307
+
308
+ - Modern browser with ES2020 support
309
+ - Microphone access
310
+ - `MediaDevices` API support
311
+ - `fetch` API support
312
+
313
+ ## Security Notes
314
+
315
+ ⚠️ **Important**: Never expose your AWS credentials or API keys in client-side code in production. Consider using:
316
+
317
+ - AWS Cognito for temporary credentials
318
+ - API Gateway with authorization
319
+ - Backend proxy for sensitive operations
320
+
321
+ For development, you can use environment variables and a build tool to inject them securely.
322
+
323
+ ## Migration from Script
324
+
325
+ If you're migrating from the previous script-based approach:
326
+
327
+ ### Before (Script)
328
+
329
+ ```javascript
330
+ // navigation_myscheme.js included in HTML
331
+ // API routes in Next.js pages/api/
332
+ ```
333
+
334
+ ### After (Library)
335
+
336
+ ```typescript
337
+ import { initNavigationOnMicrophone } from "@myscheme/voice-navigation-sdk";
338
+
339
+ const controller = initNavigationOnMicrophone({
340
+ azure: {
341
+ /* config */
342
+ },
343
+ aws: {
344
+ /* config */
345
+ },
346
+ });
347
+ ```
348
+
349
+ ## License
350
+
351
+ MIT
352
+
353
+ ## Contributing
354
+
355
+ Contributions are welcome! Please open an issue or submit a pull request.
356
+
357
+ ## Support
358
+
359
+ For issues and questions, please open an issue on GitHub.
@@ -0,0 +1,8 @@
1
+ import type { NavigationAction, ActionContext, ActionResult, AgentActionResponse } from "./types.js";
2
+ export declare const setZoomScale: (scale: number) => number;
3
+ export declare const adjustZoom: (delta: number) => number;
4
+ export declare const formatActionLabel: (action: string) => string;
5
+ export declare const performAgentAction: (action: NavigationAction, context?: ActionContext) => ActionResult;
6
+ export declare const extractAgentAction: (result: any) => AgentActionResponse | null;
7
+ export declare const isAllowedAction: (action: string) => action is NavigationAction;
8
+ //# sourceMappingURL=actions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,YAAY,EACZ,mBAAmB,EACpB,MAAM,YAAY,CAAC;AA0EpB,eAAO,MAAM,YAAY,GAAI,OAAO,MAAM,KAAG,MAS5C,CAAC;AAKF,eAAO,MAAM,UAAU,GAAI,OAAO,MAAM,KAAG,MAM1C,CAAC;AAKF,eAAO,MAAM,iBAAiB,GAAI,QAAQ,MAAM,KAAG,MAElD,CAAC;AAKF,eAAO,MAAM,kBAAkB,GAC7B,QAAQ,gBAAgB,EACxB,UAAS,aAAkB,KAC1B,YAoaF,CAAC;AAKF,eAAO,MAAM,kBAAkB,GAAI,QAAQ,GAAG,KAAG,mBAAmB,GAAG,IAgCtE,CAAC;AAKF,eAAO,MAAM,eAAe,GAAI,QAAQ,MAAM,KAAG,MAAM,IAAI,gBAE1D,CAAC"}