@lokascript/semantic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ar.ar.global.js +2 -2
- package/dist/browser-core.core.global.js +2 -2
- package/dist/browser-de.de.global.js +2 -2
- package/dist/browser-east-asian.east-asian.global.js +2 -2
- package/dist/browser-en-tr.en-tr.global.js +2 -2
- package/dist/browser-en.en.global.js +2 -2
- package/dist/browser-es-en.es-en.global.js +2 -2
- package/dist/browser-es.es.global.js +2 -2
- package/dist/browser-fr.fr.global.js +2 -2
- package/dist/browser-id.id.global.js +2 -2
- package/dist/browser-ja.ja.global.js +2 -2
- package/dist/browser-ko.ko.global.js +2 -2
- package/dist/browser-lazy.lazy.global.js +2 -2
- package/dist/browser-priority.priority.global.js +2 -2
- package/dist/browser-pt.pt.global.js +2 -2
- package/dist/browser-qu.qu.global.js +2 -2
- package/dist/browser-sw.sw.global.js +2 -2
- package/dist/browser-tr.tr.global.js +2 -2
- package/dist/browser-western.western.global.js +2 -2
- package/dist/browser-zh.zh.global.js +2 -2
- package/dist/browser.global.js +2 -2
- package/dist/browser.global.js.map +1 -1
- package/dist/index.cjs +13042 -17462
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +49 -5
- package/dist/index.d.ts +49 -5
- package/dist/index.js +14044 -18464
- package/dist/index.js.map +1 -1
- package/dist/languages/ar.d.ts +1 -1
- package/dist/languages/ar.js +31 -44
- package/dist/languages/ar.js.map +1 -1
- package/dist/languages/de.d.ts +1 -1
- package/dist/languages/de.js +14 -2
- package/dist/languages/de.js.map +1 -1
- package/dist/languages/en.d.ts +1 -1
- package/dist/languages/en.js +558 -12
- package/dist/languages/en.js.map +1 -1
- package/dist/languages/es.d.ts +1 -1
- package/dist/languages/es.js +16 -0
- package/dist/languages/es.js.map +1 -1
- package/dist/languages/fr.d.ts +1 -1
- package/dist/languages/fr.js +14 -2
- package/dist/languages/fr.js.map +1 -1
- package/dist/languages/id.d.ts +1 -1
- package/dist/languages/id.js +14 -2
- package/dist/languages/id.js.map +1 -1
- package/dist/languages/ja.d.ts +1 -1
- package/dist/languages/ja.js +18 -3
- package/dist/languages/ja.js.map +1 -1
- package/dist/languages/ko.d.ts +8 -1
- package/dist/languages/ko.js +75 -43
- package/dist/languages/ko.js.map +1 -1
- package/dist/languages/pt.d.ts +1 -1
- package/dist/languages/pt.js +17 -0
- package/dist/languages/pt.js.map +1 -1
- package/dist/languages/qu.d.ts +12 -1
- package/dist/languages/qu.js +77 -2
- package/dist/languages/qu.js.map +1 -1
- package/dist/languages/sw.d.ts +1 -1
- package/dist/languages/sw.js.map +1 -1
- package/dist/languages/tr.d.ts +9 -1
- package/dist/languages/tr.js +96 -72
- package/dist/languages/tr.js.map +1 -1
- package/dist/languages/zh.d.ts +1 -1
- package/dist/languages/zh.js +16 -0
- package/dist/languages/zh.js.map +1 -1
- package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
- package/package.json +20 -29
- package/src/generators/command-schemas.ts +21 -10
- package/src/generators/event-handler-generator.ts +50 -44
- package/src/generators/language-profiles.ts +6 -0
- package/src/generators/pattern-generator.ts +883 -1
- package/src/generators/profiles/arabic.ts +19 -3
- package/src/generators/profiles/bengali.ts +12 -1
- package/src/generators/profiles/chinese.ts +15 -0
- package/src/generators/profiles/french.ts +12 -1
- package/src/generators/profiles/german.ts +12 -1
- package/src/generators/profiles/hebrew.ts +148 -0
- package/src/generators/profiles/hindi.ts +12 -1
- package/src/generators/profiles/index.ts +2 -0
- package/src/generators/profiles/indonesian.ts +12 -1
- package/src/generators/profiles/italian.ts +16 -0
- package/src/generators/profiles/japanese.ts +11 -2
- package/src/generators/profiles/korean.ts +15 -1
- package/src/generators/profiles/polish.ts +12 -0
- package/src/generators/profiles/portuguese.ts +16 -0
- package/src/generators/profiles/russian.ts +11 -0
- package/src/generators/profiles/spanish.ts +15 -0
- package/src/generators/profiles/spanishMexico.ts +176 -0
- package/src/generators/profiles/thai.ts +11 -0
- package/src/generators/profiles/turkish.ts +49 -7
- package/src/generators/profiles/types.ts +21 -5
- package/src/generators/profiles/ukrainian.ts +11 -0
- package/src/generators/profiles/vietnamese.ts +11 -0
- package/src/language-building-schema.ts +111 -0
- package/src/languages/_all.ts +5 -1
- package/src/languages/es-MX.ts +32 -0
- package/src/languages/he.ts +15 -0
- package/src/parser/pattern-matcher.ts +10 -1
- package/src/parser/semantic-parser.ts +3 -0
- package/src/patterns/add/ar.ts +3 -59
- package/src/patterns/add/index.ts +5 -1
- package/src/patterns/add/ja.ts +3 -81
- package/src/patterns/add/ko.ts +3 -62
- package/src/patterns/add/qu.ts +69 -0
- package/src/patterns/add/tr.ts +3 -59
- package/src/patterns/builders.ts +1 -0
- package/src/patterns/decrement/tr.ts +3 -36
- package/src/patterns/event-handler/ar.ts +3 -139
- package/src/patterns/event-handler/he.ts +15 -0
- package/src/patterns/event-handler/index.ts +5 -1
- package/src/patterns/event-handler/ja.ts +3 -106
- package/src/patterns/event-handler/ko.ts +3 -121
- package/src/patterns/event-handler/ms.ts +45 -20
- package/src/patterns/event-handler/tr.ts +3 -158
- package/src/patterns/get/ar.ts +3 -37
- package/src/patterns/get/ja.ts +3 -41
- package/src/patterns/get/ko.ts +3 -41
- package/src/patterns/grammar-transformed/ja.ts +3 -1701
- package/src/patterns/grammar-transformed/ko.ts +3 -1299
- package/src/patterns/grammar-transformed/tr.ts +3 -1055
- package/src/patterns/hide/ar.ts +3 -55
- package/src/patterns/hide/ja.ts +3 -57
- package/src/patterns/hide/ko.ts +3 -57
- package/src/patterns/hide/tr.ts +3 -53
- package/src/patterns/increment/tr.ts +3 -40
- package/src/patterns/put/ar.ts +3 -62
- package/src/patterns/put/ja.ts +3 -63
- package/src/patterns/put/ko.ts +3 -55
- package/src/patterns/put/tr.ts +3 -55
- package/src/patterns/remove/ar.ts +3 -59
- package/src/patterns/remove/index.ts +5 -1
- package/src/patterns/remove/ja.ts +3 -62
- package/src/patterns/remove/ko.ts +3 -66
- package/src/patterns/remove/qu.ts +69 -0
- package/src/patterns/remove/tr.ts +3 -66
- package/src/patterns/set/ar.ts +3 -72
- package/src/patterns/set/ja.ts +3 -74
- package/src/patterns/set/ko.ts +3 -73
- package/src/patterns/set/tr.ts +3 -95
- package/src/patterns/show/ar.ts +3 -55
- package/src/patterns/show/ja.ts +3 -57
- package/src/patterns/show/ko.ts +3 -61
- package/src/patterns/show/tr.ts +3 -53
- package/src/patterns/take/ar.ts +3 -39
- package/src/patterns/toggle/ar.ts +3 -49
- package/src/patterns/toggle/index.ts +5 -1
- package/src/patterns/toggle/ja.ts +3 -144
- package/src/patterns/toggle/ko.ts +3 -101
- package/src/patterns/toggle/qu.ts +90 -0
- package/src/patterns/toggle/tr.ts +3 -76
- package/src/registry.ts +179 -15
- package/src/tokenizers/arabic.ts +13 -46
- package/src/tokenizers/bengali.ts +2 -16
- package/src/tokenizers/he.ts +542 -0
- package/src/tokenizers/index.ts +1 -0
- package/src/tokenizers/japanese.ts +3 -1
- package/src/tokenizers/korean.ts +104 -48
- package/src/tokenizers/ms.ts +3 -0
- package/src/tokenizers/quechua.ts +101 -2
- package/src/tokenizers/turkish.ts +64 -69
- package/src/types.ts +13 -0
package/src/registry.ts
CHANGED
|
@@ -86,6 +86,94 @@ const externalSources = new Map<string, ExternalPatternsSource>();
|
|
|
86
86
|
// Pattern generator function - set by patterns module to avoid circular deps
|
|
87
87
|
let patternGenerator: ((profile: LanguageProfile) => LanguagePattern[]) | null = null;
|
|
88
88
|
|
|
89
|
+
// =============================================================================
|
|
90
|
+
// Profile Inheritance
|
|
91
|
+
// =============================================================================
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Deep merge two objects, with variant values overriding base values.
|
|
95
|
+
* Arrays are replaced, not merged.
|
|
96
|
+
*/
|
|
97
|
+
function deepMerge<T extends object>(base: T, variant: Partial<T>): T {
|
|
98
|
+
const result = { ...base } as T;
|
|
99
|
+
|
|
100
|
+
for (const key of Object.keys(variant) as (keyof T)[]) {
|
|
101
|
+
const variantValue = variant[key];
|
|
102
|
+
const baseValue = base[key];
|
|
103
|
+
|
|
104
|
+
if (variantValue === undefined) {
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// If both are objects (but not arrays), merge recursively
|
|
109
|
+
if (
|
|
110
|
+
typeof variantValue === 'object' &&
|
|
111
|
+
variantValue !== null &&
|
|
112
|
+
!Array.isArray(variantValue) &&
|
|
113
|
+
typeof baseValue === 'object' &&
|
|
114
|
+
baseValue !== null &&
|
|
115
|
+
!Array.isArray(baseValue)
|
|
116
|
+
) {
|
|
117
|
+
result[key] = deepMerge(
|
|
118
|
+
baseValue as object,
|
|
119
|
+
variantValue as Partial<typeof baseValue>
|
|
120
|
+
) as T[keyof T];
|
|
121
|
+
} else {
|
|
122
|
+
// Replace value (including arrays)
|
|
123
|
+
result[key] = variantValue as T[keyof T];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return result;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Merge a variant profile with its base profile.
|
|
132
|
+
* The variant's fields override the base, with deep merging for nested objects.
|
|
133
|
+
*
|
|
134
|
+
* @example
|
|
135
|
+
* ```typescript
|
|
136
|
+
* const esMX = mergeProfiles(spanishProfile, {
|
|
137
|
+
* code: 'es-MX',
|
|
138
|
+
* name: 'Spanish (Mexico)',
|
|
139
|
+
* keywords: {
|
|
140
|
+
* toggle: { primary: 'alternar', alternatives: ['dale', 'cambiar'] },
|
|
141
|
+
* },
|
|
142
|
+
* });
|
|
143
|
+
* ```
|
|
144
|
+
*/
|
|
145
|
+
export function mergeProfiles(
|
|
146
|
+
base: LanguageProfile,
|
|
147
|
+
variant: Partial<LanguageProfile>
|
|
148
|
+
): LanguageProfile {
|
|
149
|
+
return deepMerge(base, variant);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Resolve a profile, applying inheritance if the profile has an `extends` field.
|
|
154
|
+
* Returns the merged profile with base language properties inherited.
|
|
155
|
+
*/
|
|
156
|
+
export function resolveProfile(profile: LanguageProfile): LanguageProfile {
|
|
157
|
+
if (!profile.extends) {
|
|
158
|
+
return profile;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const baseProfile = profiles.get(profile.extends);
|
|
162
|
+
if (!baseProfile) {
|
|
163
|
+
console.warn(
|
|
164
|
+
`[Registry] Profile '${profile.code}' extends '${profile.extends}' but base is not registered. ` +
|
|
165
|
+
`Make sure to import the base language before the variant.`
|
|
166
|
+
);
|
|
167
|
+
return profile;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Recursively resolve base profile (in case it also extends something)
|
|
171
|
+
const resolvedBase = resolveProfile(baseProfile);
|
|
172
|
+
|
|
173
|
+
// Merge, with variant overriding base
|
|
174
|
+
return mergeProfiles(resolvedBase, profile);
|
|
175
|
+
}
|
|
176
|
+
|
|
89
177
|
// =============================================================================
|
|
90
178
|
// Registration Functions
|
|
91
179
|
// =============================================================================
|
|
@@ -93,6 +181,7 @@ let patternGenerator: ((profile: LanguageProfile) => LanguagePattern[]) | null =
|
|
|
93
181
|
/**
|
|
94
182
|
* Register a language with its tokenizer and profile.
|
|
95
183
|
* Called automatically by language modules when imported.
|
|
184
|
+
* If the profile has an `extends` field, it will inherit from the base profile.
|
|
96
185
|
*/
|
|
97
186
|
export function registerLanguage(
|
|
98
187
|
code: string,
|
|
@@ -100,6 +189,7 @@ export function registerLanguage(
|
|
|
100
189
|
profile: LanguageProfile
|
|
101
190
|
): void {
|
|
102
191
|
tokenizers.set(code, tokenizer);
|
|
192
|
+
// Store the original profile (inheritance is resolved at query time)
|
|
103
193
|
profiles.set(code, profile);
|
|
104
194
|
// Clear pattern cache for this language if it was previously cached
|
|
105
195
|
patternCache.delete(code);
|
|
@@ -263,16 +353,45 @@ export async function queryExternalPatternsForCommand(
|
|
|
263
353
|
return allPatterns.sort((a, b) => b.confidence - a.confidence);
|
|
264
354
|
}
|
|
265
355
|
|
|
356
|
+
// =============================================================================
|
|
357
|
+
// Language Code Utilities
|
|
358
|
+
// =============================================================================
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Extract the base language code from a BCP 47 tag.
|
|
362
|
+
* Examples: 'es-MX' → 'es', 'pt-BR' → 'pt', 'en' → 'en'
|
|
363
|
+
*/
|
|
364
|
+
export function getBaseLanguageCode(code: string): string {
|
|
365
|
+
return code.split('-')[0];
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Check if a code is a language variant (has region subtag).
|
|
370
|
+
* Examples: 'es-MX' → true, 'pt' → false
|
|
371
|
+
*/
|
|
372
|
+
export function isLanguageVariant(code: string): boolean {
|
|
373
|
+
return code.includes('-');
|
|
374
|
+
}
|
|
375
|
+
|
|
266
376
|
// =============================================================================
|
|
267
377
|
// Query Functions
|
|
268
378
|
// =============================================================================
|
|
269
379
|
|
|
270
380
|
/**
|
|
271
381
|
* Get a tokenizer for the specified language.
|
|
272
|
-
*
|
|
382
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
383
|
+
* @throws Error if neither the variant nor base language is registered
|
|
273
384
|
*/
|
|
274
385
|
export function getTokenizer(code: string): LanguageTokenizer {
|
|
275
|
-
|
|
386
|
+
// Try exact match first
|
|
387
|
+
let tokenizer = tokenizers.get(code);
|
|
388
|
+
|
|
389
|
+
// Fallback: es-MX → es
|
|
390
|
+
if (!tokenizer && isLanguageVariant(code)) {
|
|
391
|
+
const baseCode = getBaseLanguageCode(code);
|
|
392
|
+
tokenizer = tokenizers.get(baseCode);
|
|
393
|
+
}
|
|
394
|
+
|
|
276
395
|
if (!tokenizer) {
|
|
277
396
|
const registered = Array.from(tokenizers.keys()).join(', ');
|
|
278
397
|
throw new Error(
|
|
@@ -286,10 +405,19 @@ export function getTokenizer(code: string): LanguageTokenizer {
|
|
|
286
405
|
|
|
287
406
|
/**
|
|
288
407
|
* Get a profile for the specified language.
|
|
289
|
-
*
|
|
408
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
409
|
+
* @throws Error if neither the variant nor base language is registered
|
|
290
410
|
*/
|
|
291
411
|
export function getProfile(code: string): LanguageProfile {
|
|
292
|
-
|
|
412
|
+
// Try exact match first
|
|
413
|
+
let profile = profiles.get(code);
|
|
414
|
+
|
|
415
|
+
// Fallback: es-MX → es
|
|
416
|
+
if (!profile && isLanguageVariant(code)) {
|
|
417
|
+
const baseCode = getBaseLanguageCode(code);
|
|
418
|
+
profile = profiles.get(baseCode);
|
|
419
|
+
}
|
|
420
|
+
|
|
293
421
|
if (!profile) {
|
|
294
422
|
const registered = Array.from(profiles.keys()).join(', ');
|
|
295
423
|
throw new Error(
|
|
@@ -298,21 +426,34 @@ export function getProfile(code: string): LanguageProfile {
|
|
|
298
426
|
`Import the language module first: import '@lokascript/semantic/languages/${code}';`
|
|
299
427
|
);
|
|
300
428
|
}
|
|
301
|
-
|
|
429
|
+
|
|
430
|
+
// Resolve inheritance if profile extends another
|
|
431
|
+
return resolveProfile(profile);
|
|
302
432
|
}
|
|
303
433
|
|
|
304
434
|
/**
|
|
305
435
|
* Try to get a tokenizer, returning undefined if not registered.
|
|
436
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
306
437
|
*/
|
|
307
438
|
export function tryGetTokenizer(code: string): LanguageTokenizer | undefined {
|
|
308
|
-
|
|
439
|
+
let tokenizer = tokenizers.get(code);
|
|
440
|
+
if (!tokenizer && isLanguageVariant(code)) {
|
|
441
|
+
tokenizer = tokenizers.get(getBaseLanguageCode(code));
|
|
442
|
+
}
|
|
443
|
+
return tokenizer;
|
|
309
444
|
}
|
|
310
445
|
|
|
311
446
|
/**
|
|
312
447
|
* Try to get a profile, returning undefined if not registered.
|
|
448
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
313
449
|
*/
|
|
314
450
|
export function tryGetProfile(code: string): LanguageProfile | undefined {
|
|
315
|
-
|
|
451
|
+
let profile = profiles.get(code);
|
|
452
|
+
if (!profile && isLanguageVariant(code)) {
|
|
453
|
+
profile = profiles.get(getBaseLanguageCode(code));
|
|
454
|
+
}
|
|
455
|
+
// Resolve inheritance if profile extends another
|
|
456
|
+
return profile ? resolveProfile(profile) : undefined;
|
|
316
457
|
}
|
|
317
458
|
|
|
318
459
|
/**
|
|
@@ -323,18 +464,33 @@ export function getRegisteredLanguages(): string[] {
|
|
|
323
464
|
}
|
|
324
465
|
|
|
325
466
|
/**
|
|
326
|
-
* Check if a language is registered.
|
|
467
|
+
* Check if a language is registered (exact match or base language fallback).
|
|
327
468
|
*/
|
|
328
469
|
export function isLanguageRegistered(code: string): boolean {
|
|
329
|
-
|
|
470
|
+
if (tokenizers.has(code) && profiles.has(code)) {
|
|
471
|
+
return true;
|
|
472
|
+
}
|
|
473
|
+
// Check fallback for variants
|
|
474
|
+
if (isLanguageVariant(code)) {
|
|
475
|
+
const baseCode = getBaseLanguageCode(code);
|
|
476
|
+
return tokenizers.has(baseCode) && profiles.has(baseCode);
|
|
477
|
+
}
|
|
478
|
+
return false;
|
|
330
479
|
}
|
|
331
480
|
|
|
332
481
|
/**
|
|
333
|
-
* Check if a language is supported (
|
|
482
|
+
* Check if a language is supported (exact match or base language fallback).
|
|
334
483
|
* For backwards compatibility with tokenizers API.
|
|
335
484
|
*/
|
|
336
485
|
export function isLanguageSupported(code: string): boolean {
|
|
337
|
-
|
|
486
|
+
if (tokenizers.has(code)) {
|
|
487
|
+
return true;
|
|
488
|
+
}
|
|
489
|
+
// Check fallback for variants
|
|
490
|
+
if (isLanguageVariant(code)) {
|
|
491
|
+
return tokenizers.has(getBaseLanguageCode(code));
|
|
492
|
+
}
|
|
493
|
+
return false;
|
|
338
494
|
}
|
|
339
495
|
|
|
340
496
|
// =============================================================================
|
|
@@ -358,17 +514,25 @@ export function tokenize(input: string, language: string): TokenStream {
|
|
|
358
514
|
* Get patterns for a specific language.
|
|
359
515
|
* First checks for directly registered patterns (for tree-shaking),
|
|
360
516
|
* then falls back to pattern generator.
|
|
517
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
361
518
|
* @throws Error if language is not registered
|
|
362
519
|
*/
|
|
363
520
|
export function getPatternsForLanguage(code: string): LanguagePattern[] {
|
|
364
|
-
// Check cache first
|
|
365
|
-
|
|
521
|
+
// Check cache first (try exact, then base language)
|
|
522
|
+
let cached = patternCache.get(code);
|
|
523
|
+
if (!cached && isLanguageVariant(code)) {
|
|
524
|
+
cached = patternCache.get(getBaseLanguageCode(code));
|
|
525
|
+
}
|
|
366
526
|
if (cached) {
|
|
367
527
|
return cached;
|
|
368
528
|
}
|
|
369
529
|
|
|
370
530
|
// Check for directly registered patterns (tree-shakeable path)
|
|
371
|
-
|
|
531
|
+
// Try exact match, then base language fallback
|
|
532
|
+
let registered = registeredPatterns.get(code);
|
|
533
|
+
if (!registered && isLanguageVariant(code)) {
|
|
534
|
+
registered = registeredPatterns.get(getBaseLanguageCode(code));
|
|
535
|
+
}
|
|
372
536
|
if (registered) {
|
|
373
537
|
patternCache.set(code, registered);
|
|
374
538
|
return registered;
|
|
@@ -382,7 +546,7 @@ export function getPatternsForLanguage(code: string): LanguagePattern[] {
|
|
|
382
546
|
);
|
|
383
547
|
}
|
|
384
548
|
|
|
385
|
-
// Get profile (throws if not registered)
|
|
549
|
+
// Get profile (throws if not registered) - has built-in fallback
|
|
386
550
|
const profile = getProfile(code);
|
|
387
551
|
const patterns = patternGenerator(profile);
|
|
388
552
|
patternCache.set(code, patterns);
|
package/src/tokenizers/arabic.ts
CHANGED
|
@@ -195,13 +195,16 @@ const PREPOSITIONS = new Set([
|
|
|
195
195
|
// =============================================================================
|
|
196
196
|
|
|
197
197
|
/**
|
|
198
|
-
* Extra keywords not covered by the profile
|
|
198
|
+
* Extra keywords not covered by the profile.
|
|
199
|
+
*
|
|
200
|
+
* SIMPLIFIED: Following the Tagalog/Hindi model of minimal EXTRAS.
|
|
201
|
+
* Command synonyms and spelling variants should be in profile alternatives,
|
|
202
|
+
* not duplicated here. Only includes:
|
|
199
203
|
* - Literals (true, false, null, undefined)
|
|
200
204
|
* - Positional words
|
|
201
205
|
* - Event names
|
|
202
206
|
* - Time units
|
|
203
|
-
* -
|
|
204
|
-
* - Additional synonyms and spelling variants
|
|
207
|
+
* - References not in profile
|
|
205
208
|
*/
|
|
206
209
|
const ARABIC_EXTRAS: KeywordEntry[] = [
|
|
207
210
|
// Values/Literals
|
|
@@ -239,13 +242,8 @@ const ARABIC_EXTRAS: KeywordEntry[] = [
|
|
|
239
242
|
{ native: 'تحميل', normalized: 'load' },
|
|
240
243
|
{ native: 'تمرير', normalized: 'scroll' },
|
|
241
244
|
|
|
242
|
-
// References
|
|
243
|
-
{ native: 'أنا', normalized: 'me' },
|
|
244
|
-
{ native: 'هو', normalized: 'it' },
|
|
245
|
+
// References (feminine "it" not in profile)
|
|
245
246
|
{ native: 'هي', normalized: 'it' },
|
|
246
|
-
{ native: 'النتيجة', normalized: 'result' },
|
|
247
|
-
{ native: 'الحدث', normalized: 'event' },
|
|
248
|
-
{ native: 'الهدف', normalized: 'target' },
|
|
249
247
|
|
|
250
248
|
// Time units
|
|
251
249
|
{ native: 'ثانية', normalized: 's' },
|
|
@@ -258,43 +256,12 @@ const ARABIC_EXTRAS: KeywordEntry[] = [
|
|
|
258
256
|
|
|
259
257
|
// Note: Temporal markers (عندما, حينما, etc.) are in TEMPORAL_MARKERS map
|
|
260
258
|
// with formality metadata, not in ARABIC_EXTRAS
|
|
261
|
-
|
|
262
|
-
//
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
{ native: 'اضع', normalized: 'put' },
|
|
268
|
-
{ native: 'يضع', normalized: 'put' },
|
|
269
|
-
{ native: 'اجعل', normalized: 'put' },
|
|
270
|
-
{ native: 'عين', normalized: 'set' },
|
|
271
|
-
{ native: 'زد', normalized: 'increment' },
|
|
272
|
-
{ native: 'ارفع', normalized: 'increment' },
|
|
273
|
-
{ native: 'انقص', normalized: 'decrement' },
|
|
274
|
-
{ native: 'قلل', normalized: 'decrement' },
|
|
275
|
-
{ native: 'سجل', normalized: 'log' },
|
|
276
|
-
{ native: 'اظهر', normalized: 'show' },
|
|
277
|
-
{ native: 'اعرض', normalized: 'show' },
|
|
278
|
-
{ native: 'اخف', normalized: 'hide' },
|
|
279
|
-
{ native: 'اخفي', normalized: 'hide' },
|
|
280
|
-
{ native: 'شغل', normalized: 'trigger' },
|
|
281
|
-
{ native: 'ارسل', normalized: 'send' },
|
|
282
|
-
{ native: 'ركز', normalized: 'focus' },
|
|
283
|
-
{ native: 'شوش', normalized: 'blur' },
|
|
284
|
-
{ native: 'اذا', normalized: 'if' },
|
|
285
|
-
{ native: 'لو', normalized: 'if' },
|
|
286
|
-
{ native: 'والا', normalized: 'else' },
|
|
287
|
-
{ native: 'توقف', normalized: 'halt' },
|
|
288
|
-
{ native: 'انسخ', normalized: 'clone' },
|
|
289
|
-
|
|
290
|
-
// Control flow helpers
|
|
291
|
-
{ native: 'إذن', normalized: 'then' },
|
|
292
|
-
{ native: 'فإن', normalized: 'then' },
|
|
293
|
-
{ native: 'نهاية', normalized: 'end' },
|
|
294
|
-
|
|
295
|
-
// Modifiers
|
|
296
|
-
{ native: 'قبل', normalized: 'before' },
|
|
297
|
-
{ native: 'بعد', normalized: 'after' },
|
|
259
|
+
//
|
|
260
|
+
// Command spelling variants are now in the profile alternatives:
|
|
261
|
+
// - toggle: بدل, غيّر, غير (in profile)
|
|
262
|
+
// - add: اضف, زِد (in profile)
|
|
263
|
+
// - remove: أزل, امسح (in profile)
|
|
264
|
+
// - etc.
|
|
298
265
|
];
|
|
299
266
|
|
|
300
267
|
// =============================================================================
|
|
@@ -176,22 +176,8 @@ export class BengaliTokenizer extends BaseTokenizer {
|
|
|
176
176
|
const startPos = pos;
|
|
177
177
|
let word = '';
|
|
178
178
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
// Check if next char is Bengali (compound word)
|
|
182
|
-
if (pos + 1 < input.length && isBengali(input[pos + 1])) {
|
|
183
|
-
const rest = input.slice(pos);
|
|
184
|
-
const compound = [' করুন', ' ফেলুন', ' দিন', ' না হলে', ' যে যান'].find(c =>
|
|
185
|
-
rest.startsWith(c)
|
|
186
|
-
);
|
|
187
|
-
if (compound) {
|
|
188
|
-
word += compound;
|
|
189
|
-
pos += compound.length;
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
break;
|
|
194
|
-
}
|
|
179
|
+
// Extract word without including spaces (let parser handle multi-word patterns)
|
|
180
|
+
while (pos < input.length && isBengali(input[pos])) {
|
|
195
181
|
word += input[pos];
|
|
196
182
|
pos++;
|
|
197
183
|
}
|