@lokascript/domain-voice 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,644 @@
1
+ /**
2
+ * Voice/Accessibility Tokenizers
3
+ *
4
+ * Language-specific tokenizers for voice command input (8 languages).
5
+ * All tokenizers include CSS selector support (#id, .class) since
6
+ * voice commands may reference DOM elements by selector.
7
+ */
8
+
9
+ import { createSimpleTokenizer } from '@lokascript/framework';
10
+ import type { LanguageTokenizer, ValueExtractor, ExtractionResult } from '@lokascript/framework';
11
+
12
+ // =============================================================================
13
+ // CSS Selector Extractor
14
+ // Handles #id and .class references in voice commands
15
+ // =============================================================================
16
+
17
+ class CSSSelectorExtractor implements ValueExtractor {
18
+ readonly name = 'css-selector';
19
+
20
+ canExtract(input: string, position: number): boolean {
21
+ const ch = input[position];
22
+ return ch === '#' || ch === '.';
23
+ }
24
+
25
+ extract(input: string, position: number): ExtractionResult | null {
26
+ const prefix = input[position];
27
+ if (prefix !== '#' && prefix !== '.') return null;
28
+
29
+ let end = position + 1;
30
+ // CSS identifiers: Unicode letters, digits, hyphens, underscores
31
+ while (end < input.length && /[\p{L}\p{N}_-]/u.test(input[end])) {
32
+ end++;
33
+ }
34
+
35
+ if (end === position + 1) return null; // just # or . alone
36
+ return { value: input.slice(position, end), length: end - position };
37
+ }
38
+ }
39
+
40
+ // =============================================================================
41
+ // Latin Extended Identifier Extractor
42
+ // Handles Latin-script languages with diacritics (French à, é; Turkish ş, ç, ü)
43
+ // =============================================================================
44
+
45
+ class LatinExtendedIdentifierExtractor implements ValueExtractor {
46
+ readonly name = 'latin-extended-identifier';
47
+
48
+ canExtract(input: string, position: number): boolean {
49
+ return /\p{L}/u.test(input[position]);
50
+ }
51
+
52
+ extract(input: string, position: number): ExtractionResult | null {
53
+ let end = position;
54
+ while (end < input.length && /[\p{L}\p{N}_-]/u.test(input[end])) {
55
+ end++;
56
+ }
57
+ if (end === position) return null;
58
+ return { value: input.slice(position, end), length: end - position };
59
+ }
60
+ }
61
+
62
+ // Shared CSS selector extractor instance
63
+ const cssSelectorExtractor = new CSSSelectorExtractor();
64
+
65
+ // =============================================================================
66
+ // English
67
+ // =============================================================================
68
+
69
+ export const EnglishVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
70
+ language: 'en',
71
+ direction: 'ltr',
72
+ customExtractors: [cssSelectorExtractor],
73
+ keywords: [
74
+ // commands
75
+ 'navigate',
76
+ 'go',
77
+ 'click',
78
+ 'press',
79
+ 'tap',
80
+ 'type',
81
+ 'enter',
82
+ 'scroll',
83
+ 'read',
84
+ 'say',
85
+ 'zoom',
86
+ 'select',
87
+ 'back',
88
+ 'forward',
89
+ 'focus',
90
+ 'close',
91
+ 'open',
92
+ 'search',
93
+ 'find',
94
+ 'help',
95
+ // markers
96
+ 'to',
97
+ 'into',
98
+ 'by',
99
+ 'in',
100
+ 'on',
101
+ 'the',
102
+ 'a',
103
+ // direction words
104
+ 'up',
105
+ 'down',
106
+ 'left',
107
+ 'right',
108
+ 'top',
109
+ 'bottom',
110
+ // zoom words
111
+ 'in',
112
+ 'out',
113
+ 'reset',
114
+ // targets
115
+ 'tab',
116
+ 'dialog',
117
+ 'modal',
118
+ 'menu',
119
+ 'page',
120
+ 'all',
121
+ ],
122
+ caseInsensitive: true,
123
+ });
124
+
125
+ // =============================================================================
126
+ // Spanish
127
+ // =============================================================================
128
+
129
+ export const SpanishVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
130
+ language: 'es',
131
+ direction: 'ltr',
132
+ customExtractors: [cssSelectorExtractor, new LatinExtendedIdentifierExtractor()],
133
+ keywords: [
134
+ 'navegar',
135
+ 'ir',
136
+ 'clic',
137
+ 'pulsar',
138
+ 'escribir',
139
+ 'desplazar',
140
+ 'leer',
141
+ 'zoom',
142
+ 'seleccionar',
143
+ 'atrás',
144
+ 'volver',
145
+ 'adelante',
146
+ 'enfocar',
147
+ 'cerrar',
148
+ 'abrir',
149
+ 'buscar',
150
+ 'ayuda',
151
+ 'a',
152
+ 'en',
153
+ 'por',
154
+ 'el',
155
+ 'la',
156
+ 'de',
157
+ 'sur',
158
+ 'arriba',
159
+ 'abajo',
160
+ 'izquierda',
161
+ 'derecha',
162
+ 'más',
163
+ 'menos',
164
+ 'todo',
165
+ 'página',
166
+ 'diálogo',
167
+ ],
168
+ caseInsensitive: true,
169
+ });
170
+
171
+ // =============================================================================
172
+ // Japanese
173
+ // =============================================================================
174
+
175
+ export const JapaneseVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
176
+ language: 'ja',
177
+ direction: 'ltr',
178
+ customExtractors: [cssSelectorExtractor],
179
+ keywords: [
180
+ '移動',
181
+ 'クリック',
182
+ '入力',
183
+ 'スクロール',
184
+ '読む',
185
+ 'ズーム',
186
+ '選択',
187
+ '戻る',
188
+ '進む',
189
+ 'フォーカス',
190
+ '閉じる',
191
+ '開く',
192
+ '検索',
193
+ 'ヘルプ',
194
+ 'を',
195
+ 'に',
196
+ 'で',
197
+ 'の',
198
+ 'だけ',
199
+ '上',
200
+ '下',
201
+ '左',
202
+ '右',
203
+ 'イン',
204
+ 'アウト',
205
+ 'リセット',
206
+ 'タブ',
207
+ 'ダイアログ',
208
+ 'ページ',
209
+ '全て',
210
+ ],
211
+ keywordExtras: [
212
+ { native: '移動', normalized: 'navigate' },
213
+ { native: 'クリック', normalized: 'click' },
214
+ { native: '入力', normalized: 'type' },
215
+ { native: 'スクロール', normalized: 'scroll' },
216
+ { native: '読む', normalized: 'read' },
217
+ { native: 'ズーム', normalized: 'zoom' },
218
+ { native: '選択', normalized: 'select' },
219
+ { native: '戻る', normalized: 'back' },
220
+ { native: '進む', normalized: 'forward' },
221
+ { native: 'フォーカス', normalized: 'focus' },
222
+ { native: '閉じる', normalized: 'close' },
223
+ { native: '開く', normalized: 'open' },
224
+ { native: '検索', normalized: 'search' },
225
+ { native: 'ヘルプ', normalized: 'help' },
226
+ { native: 'を', normalized: 'wo' },
227
+ { native: 'に', normalized: 'ni' },
228
+ { native: 'で', normalized: 'de' },
229
+ ],
230
+ keywordProfile: {
231
+ keywords: {
232
+ navigate: { primary: '移動' },
233
+ click: { primary: 'クリック' },
234
+ type: { primary: '入力' },
235
+ scroll: { primary: 'スクロール' },
236
+ read: { primary: '読む' },
237
+ zoom: { primary: 'ズーム' },
238
+ select: { primary: '選択' },
239
+ back: { primary: '戻る' },
240
+ forward: { primary: '進む' },
241
+ focus: { primary: 'フォーカス' },
242
+ close: { primary: '閉じる' },
243
+ open: { primary: '開く' },
244
+ search: { primary: '検索' },
245
+ help: { primary: 'ヘルプ' },
246
+ },
247
+ },
248
+ caseInsensitive: false,
249
+ });
250
+
251
+ // =============================================================================
252
+ // Arabic (VSO)
253
+ // =============================================================================
254
+
255
+ export const ArabicVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
256
+ language: 'ar',
257
+ direction: 'rtl',
258
+ customExtractors: [cssSelectorExtractor],
259
+ keywords: [
260
+ 'انتقل',
261
+ 'انقر',
262
+ 'اكتب',
263
+ 'تمرير',
264
+ 'اقرأ',
265
+ 'تكبير',
266
+ 'اختر',
267
+ 'رجوع',
268
+ 'تقدم',
269
+ 'ركز',
270
+ 'أغلق',
271
+ 'افتح',
272
+ 'ابحث',
273
+ 'مساعدة',
274
+ 'إلى',
275
+ 'على',
276
+ 'في',
277
+ 'عن',
278
+ 'ب',
279
+ 'أعلى',
280
+ 'أسفل',
281
+ 'يسار',
282
+ 'يمين',
283
+ 'الكل',
284
+ 'الصفحة',
285
+ 'الحوار',
286
+ ],
287
+ keywordExtras: [
288
+ { native: 'انتقل', normalized: 'navigate' },
289
+ { native: 'انقر', normalized: 'click' },
290
+ { native: 'اكتب', normalized: 'type' },
291
+ { native: 'تمرير', normalized: 'scroll' },
292
+ { native: 'اقرأ', normalized: 'read' },
293
+ { native: 'تكبير', normalized: 'zoom' },
294
+ { native: 'اختر', normalized: 'select' },
295
+ { native: 'رجوع', normalized: 'back' },
296
+ { native: 'تقدم', normalized: 'forward' },
297
+ { native: 'ركز', normalized: 'focus' },
298
+ { native: 'أغلق', normalized: 'close' },
299
+ { native: 'افتح', normalized: 'open' },
300
+ { native: 'ابحث', normalized: 'search' },
301
+ { native: 'مساعدة', normalized: 'help' },
302
+ { native: 'إلى', normalized: 'to' },
303
+ { native: 'على', normalized: 'on' },
304
+ { native: 'في', normalized: 'in' },
305
+ { native: 'عن', normalized: 'about' },
306
+ ],
307
+ keywordProfile: {
308
+ keywords: {
309
+ navigate: { primary: 'انتقل' },
310
+ click: { primary: 'انقر' },
311
+ type: { primary: 'اكتب' },
312
+ scroll: { primary: 'تمرير' },
313
+ read: { primary: 'اقرأ' },
314
+ zoom: { primary: 'تكبير' },
315
+ select: { primary: 'اختر' },
316
+ back: { primary: 'رجوع' },
317
+ forward: { primary: 'تقدم' },
318
+ focus: { primary: 'ركز' },
319
+ close: { primary: 'أغلق' },
320
+ open: { primary: 'افتح' },
321
+ search: { primary: 'ابحث' },
322
+ help: { primary: 'مساعدة' },
323
+ },
324
+ },
325
+ caseInsensitive: false,
326
+ });
327
+
328
+ // =============================================================================
329
+ // Korean (SOV)
330
+ // =============================================================================
331
+
332
+ export const KoreanVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
333
+ language: 'ko',
334
+ direction: 'ltr',
335
+ customExtractors: [cssSelectorExtractor],
336
+ keywords: [
337
+ '이동',
338
+ '클릭',
339
+ '입력',
340
+ '스크롤',
341
+ '읽기',
342
+ '확대',
343
+ '선택',
344
+ '뒤로',
345
+ '앞으로',
346
+ '포커스',
347
+ '닫기',
348
+ '열기',
349
+ '검색',
350
+ '도움말',
351
+ '을',
352
+ '를',
353
+ '에',
354
+ '에서',
355
+ '로',
356
+ '만큼',
357
+ '위',
358
+ '아래',
359
+ '왼쪽',
360
+ '오른쪽',
361
+ '전체',
362
+ '탭',
363
+ '대화상자',
364
+ '페이지',
365
+ ],
366
+ keywordExtras: [
367
+ { native: '이동', normalized: 'navigate' },
368
+ { native: '클릭', normalized: 'click' },
369
+ { native: '입력', normalized: 'type' },
370
+ { native: '스크롤', normalized: 'scroll' },
371
+ { native: '읽기', normalized: 'read' },
372
+ { native: '확대', normalized: 'zoom' },
373
+ { native: '선택', normalized: 'select' },
374
+ { native: '뒤로', normalized: 'back' },
375
+ { native: '앞으로', normalized: 'forward' },
376
+ { native: '포커스', normalized: 'focus' },
377
+ { native: '닫기', normalized: 'close' },
378
+ { native: '열기', normalized: 'open' },
379
+ { native: '검색', normalized: 'search' },
380
+ { native: '도움말', normalized: 'help' },
381
+ { native: '을', normalized: 'eul' },
382
+ { native: '를', normalized: 'reul' },
383
+ { native: '에', normalized: 'e' },
384
+ { native: '에서', normalized: 'eseo' },
385
+ { native: '로', normalized: 'ro' },
386
+ ],
387
+ keywordProfile: {
388
+ keywords: {
389
+ navigate: { primary: '이동' },
390
+ click: { primary: '클릭' },
391
+ type: { primary: '입력' },
392
+ scroll: { primary: '스크롤' },
393
+ read: { primary: '읽기' },
394
+ zoom: { primary: '확대' },
395
+ select: { primary: '선택' },
396
+ back: { primary: '뒤로' },
397
+ forward: { primary: '앞으로' },
398
+ focus: { primary: '포커스' },
399
+ close: { primary: '닫기' },
400
+ open: { primary: '열기' },
401
+ search: { primary: '검색' },
402
+ help: { primary: '도움말' },
403
+ },
404
+ },
405
+ caseInsensitive: false,
406
+ });
407
+
408
+ // =============================================================================
409
+ // Chinese (SVO)
410
+ // =============================================================================
411
+
412
+ export const ChineseVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
413
+ language: 'zh',
414
+ direction: 'ltr',
415
+ customExtractors: [cssSelectorExtractor],
416
+ keywords: [
417
+ '导航',
418
+ '点击',
419
+ '输入',
420
+ '滚动',
421
+ '朗读',
422
+ '缩放',
423
+ '选择',
424
+ '返回',
425
+ '前进',
426
+ '聚焦',
427
+ '关闭',
428
+ '打开',
429
+ '搜索',
430
+ '帮助',
431
+ '到',
432
+ '在',
433
+ '幅',
434
+ '上',
435
+ '下',
436
+ '左',
437
+ '右',
438
+ '全部',
439
+ '标签',
440
+ '对话框',
441
+ '页面',
442
+ '放大',
443
+ '缩小',
444
+ '重置',
445
+ ],
446
+ keywordExtras: [
447
+ { native: '导航', normalized: 'navigate' },
448
+ { native: '点击', normalized: 'click' },
449
+ { native: '输入', normalized: 'type' },
450
+ { native: '滚动', normalized: 'scroll' },
451
+ { native: '朗读', normalized: 'read' },
452
+ { native: '缩放', normalized: 'zoom' },
453
+ { native: '选择', normalized: 'select' },
454
+ { native: '返回', normalized: 'back' },
455
+ { native: '前进', normalized: 'forward' },
456
+ { native: '聚焦', normalized: 'focus' },
457
+ { native: '关闭', normalized: 'close' },
458
+ { native: '打开', normalized: 'open' },
459
+ { native: '搜索', normalized: 'search' },
460
+ { native: '帮助', normalized: 'help' },
461
+ { native: '到', normalized: 'to' },
462
+ { native: '在', normalized: 'in' },
463
+ ],
464
+ keywordProfile: {
465
+ keywords: {
466
+ navigate: { primary: '导航' },
467
+ click: { primary: '点击' },
468
+ type: { primary: '输入' },
469
+ scroll: { primary: '滚动' },
470
+ read: { primary: '朗读' },
471
+ zoom: { primary: '缩放' },
472
+ select: { primary: '选择' },
473
+ back: { primary: '返回' },
474
+ forward: { primary: '前进' },
475
+ focus: { primary: '聚焦' },
476
+ close: { primary: '关闭' },
477
+ open: { primary: '打开' },
478
+ search: { primary: '搜索' },
479
+ help: { primary: '帮助' },
480
+ },
481
+ },
482
+ caseInsensitive: false,
483
+ });
484
+
485
+ // =============================================================================
486
+ // Turkish (SOV)
487
+ // =============================================================================
488
+
489
+ export const TurkishVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
490
+ language: 'tr',
491
+ direction: 'ltr',
492
+ customExtractors: [cssSelectorExtractor, new LatinExtendedIdentifierExtractor()],
493
+ keywords: [
494
+ 'git',
495
+ 'tıkla',
496
+ 'yaz',
497
+ 'kaydır',
498
+ 'oku',
499
+ 'yakınlaş',
500
+ 'seç',
501
+ 'geri',
502
+ 'ileri',
503
+ 'odakla',
504
+ 'kapat',
505
+ 'aç',
506
+ 'ara',
507
+ 'yardım',
508
+ 'ya',
509
+ 'da',
510
+ 'kadar',
511
+ 'yukarı',
512
+ 'aşağı',
513
+ 'sol',
514
+ 'sağ',
515
+ 'sekme',
516
+ 'diyalog',
517
+ 'sayfa',
518
+ 'hepsi',
519
+ ],
520
+ keywordExtras: [
521
+ { native: 'git', normalized: 'navigate' },
522
+ { native: 'tıkla', normalized: 'click' },
523
+ { native: 'yaz', normalized: 'type' },
524
+ { native: 'kaydır', normalized: 'scroll' },
525
+ { native: 'oku', normalized: 'read' },
526
+ { native: 'yakınlaş', normalized: 'zoom' },
527
+ { native: 'seç', normalized: 'select' },
528
+ { native: 'geri', normalized: 'back' },
529
+ { native: 'ileri', normalized: 'forward' },
530
+ { native: 'odakla', normalized: 'focus' },
531
+ { native: 'kapat', normalized: 'close' },
532
+ { native: 'aç', normalized: 'open' },
533
+ { native: 'ara', normalized: 'search' },
534
+ { native: 'yardım', normalized: 'help' },
535
+ { native: 'ya', normalized: 'to' },
536
+ { native: 'da', normalized: 'in' },
537
+ ],
538
+ keywordProfile: {
539
+ keywords: {
540
+ navigate: { primary: 'git' },
541
+ click: { primary: 'tıkla' },
542
+ type: { primary: 'yaz' },
543
+ scroll: { primary: 'kaydır' },
544
+ read: { primary: 'oku' },
545
+ zoom: { primary: 'yakınlaş' },
546
+ select: { primary: 'seç' },
547
+ back: { primary: 'geri' },
548
+ forward: { primary: 'ileri' },
549
+ focus: { primary: 'odakla' },
550
+ close: { primary: 'kapat' },
551
+ open: { primary: 'aç' },
552
+ search: { primary: 'ara' },
553
+ help: { primary: 'yardım' },
554
+ },
555
+ },
556
+ caseInsensitive: true,
557
+ });
558
+
559
+ // =============================================================================
560
+ // French (SVO)
561
+ // =============================================================================
562
+
563
+ export const FrenchVoiceTokenizer: LanguageTokenizer = createSimpleTokenizer({
564
+ language: 'fr',
565
+ direction: 'ltr',
566
+ customExtractors: [cssSelectorExtractor, new LatinExtendedIdentifierExtractor()],
567
+ keywords: [
568
+ 'naviguer',
569
+ 'aller',
570
+ 'cliquer',
571
+ 'taper',
572
+ 'écrire',
573
+ 'défiler',
574
+ 'lire',
575
+ 'zoomer',
576
+ 'sélectionner',
577
+ 'retour',
578
+ 'avancer',
579
+ 'focaliser',
580
+ 'fermer',
581
+ 'ouvrir',
582
+ 'chercher',
583
+ 'rechercher',
584
+ 'aide',
585
+ 'vers',
586
+ 'dans',
587
+ 'de',
588
+ 'sur',
589
+ 'le',
590
+ 'la',
591
+ 'les',
592
+ 'un',
593
+ 'une',
594
+ 'haut',
595
+ 'bas',
596
+ 'gauche',
597
+ 'droite',
598
+ 'onglet',
599
+ 'dialogue',
600
+ 'page',
601
+ 'tout',
602
+ ],
603
+ keywordExtras: [
604
+ { native: 'naviguer', normalized: 'navigate' },
605
+ { native: 'aller', normalized: 'go' },
606
+ { native: 'cliquer', normalized: 'click' },
607
+ { native: 'taper', normalized: 'type' },
608
+ { native: 'écrire', normalized: 'write' },
609
+ { native: 'défiler', normalized: 'scroll' },
610
+ { native: 'lire', normalized: 'read' },
611
+ { native: 'zoomer', normalized: 'zoom' },
612
+ { native: 'sélectionner', normalized: 'select' },
613
+ { native: 'retour', normalized: 'back' },
614
+ { native: 'avancer', normalized: 'forward' },
615
+ { native: 'focaliser', normalized: 'focus' },
616
+ { native: 'fermer', normalized: 'close' },
617
+ { native: 'ouvrir', normalized: 'open' },
618
+ { native: 'chercher', normalized: 'search' },
619
+ { native: 'rechercher', normalized: 'search' },
620
+ { native: 'aide', normalized: 'help' },
621
+ { native: 'vers', normalized: 'to' },
622
+ { native: 'dans', normalized: 'in' },
623
+ { native: 'sur', normalized: 'on' },
624
+ ],
625
+ keywordProfile: {
626
+ keywords: {
627
+ navigate: { primary: 'naviguer', alternatives: ['aller'] },
628
+ click: { primary: 'cliquer' },
629
+ type: { primary: 'taper', alternatives: ['écrire'] },
630
+ scroll: { primary: 'défiler' },
631
+ read: { primary: 'lire' },
632
+ zoom: { primary: 'zoomer' },
633
+ select: { primary: 'sélectionner' },
634
+ back: { primary: 'retour' },
635
+ forward: { primary: 'avancer' },
636
+ focus: { primary: 'focaliser' },
637
+ close: { primary: 'fermer' },
638
+ open: { primary: 'ouvrir' },
639
+ search: { primary: 'chercher', alternatives: ['rechercher'] },
640
+ help: { primary: 'aide' },
641
+ },
642
+ },
643
+ caseInsensitive: true,
644
+ });
package/src/types.ts ADDED
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Voice/Accessibility Domain Types
3
+ */
4
+
5
+ import type { SemanticNode } from '@lokascript/framework';
6
+ import { extractRoleValue } from '@lokascript/framework';
7
+
8
+ /**
9
+ * Structured action spec extracted from a parsed voice command.
10
+ * Useful for dispatchers that want typed access to command details
11
+ * without re-parsing the SemanticNode.
12
+ */
13
+ export interface VoiceActionSpec {
14
+ action: string;
15
+ target?: string;
16
+ value?: string;
17
+ direction?: string;
18
+ amount?: string;
19
+ metadata: {
20
+ sourceLanguage: string;
21
+ };
22
+ }
23
+
24
+ /**
25
+ * Convert a parsed SemanticNode into a typed VoiceActionSpec.
26
+ */
27
+ export function toVoiceActionSpec(node: SemanticNode, language: string): VoiceActionSpec {
28
+ return {
29
+ action: node.action,
30
+ target: extractRoleValue(node, 'patient') || extractRoleValue(node, 'destination') || undefined,
31
+ value: extractRoleValue(node, 'patient') || undefined,
32
+ direction: extractRoleValue(node, 'manner') || undefined,
33
+ amount: extractRoleValue(node, 'quantity') || undefined,
34
+ metadata: { sourceLanguage: language },
35
+ };
36
+ }