@lokascript/semantic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/browser-ar.ar.global.js +2 -2
  2. package/dist/browser-core.core.global.js +2 -2
  3. package/dist/browser-de.de.global.js +2 -2
  4. package/dist/browser-east-asian.east-asian.global.js +2 -2
  5. package/dist/browser-en-tr.en-tr.global.js +2 -2
  6. package/dist/browser-en.en.global.js +2 -2
  7. package/dist/browser-es-en.es-en.global.js +2 -2
  8. package/dist/browser-es.es.global.js +2 -2
  9. package/dist/browser-fr.fr.global.js +2 -2
  10. package/dist/browser-id.id.global.js +2 -2
  11. package/dist/browser-ja.ja.global.js +2 -2
  12. package/dist/browser-ko.ko.global.js +2 -2
  13. package/dist/browser-lazy.lazy.global.js +2 -2
  14. package/dist/browser-priority.priority.global.js +2 -2
  15. package/dist/browser-pt.pt.global.js +2 -2
  16. package/dist/browser-qu.qu.global.js +2 -2
  17. package/dist/browser-sw.sw.global.js +2 -2
  18. package/dist/browser-tr.tr.global.js +2 -2
  19. package/dist/browser-western.western.global.js +2 -2
  20. package/dist/browser-zh.zh.global.js +2 -2
  21. package/dist/browser.global.js +2 -2
  22. package/dist/browser.global.js.map +1 -1
  23. package/dist/index.cjs +13042 -17462
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +49 -5
  26. package/dist/index.d.ts +49 -5
  27. package/dist/index.js +14044 -18464
  28. package/dist/index.js.map +1 -1
  29. package/dist/languages/ar.d.ts +1 -1
  30. package/dist/languages/ar.js +31 -44
  31. package/dist/languages/ar.js.map +1 -1
  32. package/dist/languages/de.d.ts +1 -1
  33. package/dist/languages/de.js +14 -2
  34. package/dist/languages/de.js.map +1 -1
  35. package/dist/languages/en.d.ts +1 -1
  36. package/dist/languages/en.js +558 -12
  37. package/dist/languages/en.js.map +1 -1
  38. package/dist/languages/es.d.ts +1 -1
  39. package/dist/languages/es.js +16 -0
  40. package/dist/languages/es.js.map +1 -1
  41. package/dist/languages/fr.d.ts +1 -1
  42. package/dist/languages/fr.js +14 -2
  43. package/dist/languages/fr.js.map +1 -1
  44. package/dist/languages/id.d.ts +1 -1
  45. package/dist/languages/id.js +14 -2
  46. package/dist/languages/id.js.map +1 -1
  47. package/dist/languages/ja.d.ts +1 -1
  48. package/dist/languages/ja.js +18 -3
  49. package/dist/languages/ja.js.map +1 -1
  50. package/dist/languages/ko.d.ts +8 -1
  51. package/dist/languages/ko.js +75 -43
  52. package/dist/languages/ko.js.map +1 -1
  53. package/dist/languages/pt.d.ts +1 -1
  54. package/dist/languages/pt.js +17 -0
  55. package/dist/languages/pt.js.map +1 -1
  56. package/dist/languages/qu.d.ts +12 -1
  57. package/dist/languages/qu.js +77 -2
  58. package/dist/languages/qu.js.map +1 -1
  59. package/dist/languages/sw.d.ts +1 -1
  60. package/dist/languages/sw.js.map +1 -1
  61. package/dist/languages/tr.d.ts +9 -1
  62. package/dist/languages/tr.js +96 -72
  63. package/dist/languages/tr.js.map +1 -1
  64. package/dist/languages/zh.d.ts +1 -1
  65. package/dist/languages/zh.js +16 -0
  66. package/dist/languages/zh.js.map +1 -1
  67. package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
  68. package/package.json +20 -29
  69. package/src/generators/command-schemas.ts +21 -10
  70. package/src/generators/event-handler-generator.ts +50 -44
  71. package/src/generators/language-profiles.ts +6 -0
  72. package/src/generators/pattern-generator.ts +883 -1
  73. package/src/generators/profiles/arabic.ts +19 -3
  74. package/src/generators/profiles/bengali.ts +12 -1
  75. package/src/generators/profiles/chinese.ts +15 -0
  76. package/src/generators/profiles/french.ts +12 -1
  77. package/src/generators/profiles/german.ts +12 -1
  78. package/src/generators/profiles/hebrew.ts +148 -0
  79. package/src/generators/profiles/hindi.ts +12 -1
  80. package/src/generators/profiles/index.ts +2 -0
  81. package/src/generators/profiles/indonesian.ts +12 -1
  82. package/src/generators/profiles/italian.ts +16 -0
  83. package/src/generators/profiles/japanese.ts +11 -2
  84. package/src/generators/profiles/korean.ts +15 -1
  85. package/src/generators/profiles/polish.ts +12 -0
  86. package/src/generators/profiles/portuguese.ts +16 -0
  87. package/src/generators/profiles/russian.ts +11 -0
  88. package/src/generators/profiles/spanish.ts +15 -0
  89. package/src/generators/profiles/spanishMexico.ts +176 -0
  90. package/src/generators/profiles/thai.ts +11 -0
  91. package/src/generators/profiles/turkish.ts +49 -7
  92. package/src/generators/profiles/types.ts +21 -5
  93. package/src/generators/profiles/ukrainian.ts +11 -0
  94. package/src/generators/profiles/vietnamese.ts +11 -0
  95. package/src/language-building-schema.ts +111 -0
  96. package/src/languages/_all.ts +5 -1
  97. package/src/languages/es-MX.ts +32 -0
  98. package/src/languages/he.ts +15 -0
  99. package/src/parser/pattern-matcher.ts +10 -1
  100. package/src/parser/semantic-parser.ts +3 -0
  101. package/src/patterns/add/ar.ts +3 -59
  102. package/src/patterns/add/index.ts +5 -1
  103. package/src/patterns/add/ja.ts +3 -81
  104. package/src/patterns/add/ko.ts +3 -62
  105. package/src/patterns/add/qu.ts +69 -0
  106. package/src/patterns/add/tr.ts +3 -59
  107. package/src/patterns/builders.ts +1 -0
  108. package/src/patterns/decrement/tr.ts +3 -36
  109. package/src/patterns/event-handler/ar.ts +3 -139
  110. package/src/patterns/event-handler/he.ts +15 -0
  111. package/src/patterns/event-handler/index.ts +5 -1
  112. package/src/patterns/event-handler/ja.ts +3 -106
  113. package/src/patterns/event-handler/ko.ts +3 -121
  114. package/src/patterns/event-handler/ms.ts +45 -20
  115. package/src/patterns/event-handler/tr.ts +3 -158
  116. package/src/patterns/get/ar.ts +3 -37
  117. package/src/patterns/get/ja.ts +3 -41
  118. package/src/patterns/get/ko.ts +3 -41
  119. package/src/patterns/grammar-transformed/ja.ts +3 -1701
  120. package/src/patterns/grammar-transformed/ko.ts +3 -1299
  121. package/src/patterns/grammar-transformed/tr.ts +3 -1055
  122. package/src/patterns/hide/ar.ts +3 -55
  123. package/src/patterns/hide/ja.ts +3 -57
  124. package/src/patterns/hide/ko.ts +3 -57
  125. package/src/patterns/hide/tr.ts +3 -53
  126. package/src/patterns/increment/tr.ts +3 -40
  127. package/src/patterns/put/ar.ts +3 -62
  128. package/src/patterns/put/ja.ts +3 -63
  129. package/src/patterns/put/ko.ts +3 -55
  130. package/src/patterns/put/tr.ts +3 -55
  131. package/src/patterns/remove/ar.ts +3 -59
  132. package/src/patterns/remove/index.ts +5 -1
  133. package/src/patterns/remove/ja.ts +3 -62
  134. package/src/patterns/remove/ko.ts +3 -66
  135. package/src/patterns/remove/qu.ts +69 -0
  136. package/src/patterns/remove/tr.ts +3 -66
  137. package/src/patterns/set/ar.ts +3 -72
  138. package/src/patterns/set/ja.ts +3 -74
  139. package/src/patterns/set/ko.ts +3 -73
  140. package/src/patterns/set/tr.ts +3 -95
  141. package/src/patterns/show/ar.ts +3 -55
  142. package/src/patterns/show/ja.ts +3 -57
  143. package/src/patterns/show/ko.ts +3 -61
  144. package/src/patterns/show/tr.ts +3 -53
  145. package/src/patterns/take/ar.ts +3 -39
  146. package/src/patterns/toggle/ar.ts +3 -49
  147. package/src/patterns/toggle/index.ts +5 -1
  148. package/src/patterns/toggle/ja.ts +3 -144
  149. package/src/patterns/toggle/ko.ts +3 -101
  150. package/src/patterns/toggle/qu.ts +90 -0
  151. package/src/patterns/toggle/tr.ts +3 -76
  152. package/src/registry.ts +179 -15
  153. package/src/tokenizers/arabic.ts +13 -46
  154. package/src/tokenizers/bengali.ts +2 -16
  155. package/src/tokenizers/he.ts +542 -0
  156. package/src/tokenizers/index.ts +1 -0
  157. package/src/tokenizers/japanese.ts +3 -1
  158. package/src/tokenizers/korean.ts +104 -48
  159. package/src/tokenizers/ms.ts +3 -0
  160. package/src/tokenizers/quechua.ts +101 -2
  161. package/src/tokenizers/turkish.ts +64 -69
  162. package/src/types.ts +13 -0
package/src/registry.ts CHANGED
@@ -86,6 +86,94 @@ const externalSources = new Map<string, ExternalPatternsSource>();
86
86
  // Pattern generator function - set by patterns module to avoid circular deps
87
87
  let patternGenerator: ((profile: LanguageProfile) => LanguagePattern[]) | null = null;
88
88
 
89
+ // =============================================================================
90
+ // Profile Inheritance
91
+ // =============================================================================
92
+
93
+ /**
94
+ * Deep merge two objects, with variant values overriding base values.
95
+ * Arrays are replaced, not merged.
96
+ */
97
+ function deepMerge<T extends object>(base: T, variant: Partial<T>): T {
98
+ const result = { ...base } as T;
99
+
100
+ for (const key of Object.keys(variant) as (keyof T)[]) {
101
+ const variantValue = variant[key];
102
+ const baseValue = base[key];
103
+
104
+ if (variantValue === undefined) {
105
+ continue;
106
+ }
107
+
108
+ // If both are objects (but not arrays), merge recursively
109
+ if (
110
+ typeof variantValue === 'object' &&
111
+ variantValue !== null &&
112
+ !Array.isArray(variantValue) &&
113
+ typeof baseValue === 'object' &&
114
+ baseValue !== null &&
115
+ !Array.isArray(baseValue)
116
+ ) {
117
+ result[key] = deepMerge(
118
+ baseValue as object,
119
+ variantValue as Partial<typeof baseValue>
120
+ ) as T[keyof T];
121
+ } else {
122
+ // Replace value (including arrays)
123
+ result[key] = variantValue as T[keyof T];
124
+ }
125
+ }
126
+
127
+ return result;
128
+ }
129
+
130
+ /**
131
+ * Merge a variant profile with its base profile.
132
+ * The variant's fields override the base, with deep merging for nested objects.
133
+ *
134
+ * @example
135
+ * ```typescript
136
+ * const esMX = mergeProfiles(spanishProfile, {
137
+ * code: 'es-MX',
138
+ * name: 'Spanish (Mexico)',
139
+ * keywords: {
140
+ * toggle: { primary: 'alternar', alternatives: ['dale', 'cambiar'] },
141
+ * },
142
+ * });
143
+ * ```
144
+ */
145
+ export function mergeProfiles(
146
+ base: LanguageProfile,
147
+ variant: Partial<LanguageProfile>
148
+ ): LanguageProfile {
149
+ return deepMerge(base, variant);
150
+ }
151
+
152
+ /**
153
+ * Resolve a profile, applying inheritance if the profile has an `extends` field.
154
+ * Returns the merged profile with base language properties inherited.
155
+ */
156
+ export function resolveProfile(profile: LanguageProfile): LanguageProfile {
157
+ if (!profile.extends) {
158
+ return profile;
159
+ }
160
+
161
+ const baseProfile = profiles.get(profile.extends);
162
+ if (!baseProfile) {
163
+ console.warn(
164
+ `[Registry] Profile '${profile.code}' extends '${profile.extends}' but base is not registered. ` +
165
+ `Make sure to import the base language before the variant.`
166
+ );
167
+ return profile;
168
+ }
169
+
170
+ // Recursively resolve base profile (in case it also extends something)
171
+ const resolvedBase = resolveProfile(baseProfile);
172
+
173
+ // Merge, with variant overriding base
174
+ return mergeProfiles(resolvedBase, profile);
175
+ }
176
+
89
177
  // =============================================================================
90
178
  // Registration Functions
91
179
  // =============================================================================
@@ -93,6 +181,7 @@ let patternGenerator: ((profile: LanguageProfile) => LanguagePattern[]) | null =
93
181
  /**
94
182
  * Register a language with its tokenizer and profile.
95
183
  * Called automatically by language modules when imported.
184
+ * If the profile has an `extends` field, it will inherit from the base profile.
96
185
  */
97
186
  export function registerLanguage(
98
187
  code: string,
@@ -100,6 +189,7 @@ export function registerLanguage(
100
189
  profile: LanguageProfile
101
190
  ): void {
102
191
  tokenizers.set(code, tokenizer);
192
+ // Store the original profile (inheritance is resolved at query time)
103
193
  profiles.set(code, profile);
104
194
  // Clear pattern cache for this language if it was previously cached
105
195
  patternCache.delete(code);
@@ -263,16 +353,45 @@ export async function queryExternalPatternsForCommand(
263
353
  return allPatterns.sort((a, b) => b.confidence - a.confidence);
264
354
  }
265
355
 
356
+ // =============================================================================
357
+ // Language Code Utilities
358
+ // =============================================================================
359
+
360
+ /**
361
+ * Extract the base language code from a BCP 47 tag.
362
+ * Examples: 'es-MX' → 'es', 'pt-BR' → 'pt', 'en' → 'en'
363
+ */
364
+ export function getBaseLanguageCode(code: string): string {
365
+ return code.split('-')[0];
366
+ }
367
+
368
+ /**
369
+ * Check if a code is a language variant (has region subtag).
370
+ * Examples: 'es-MX' → true, 'pt' → false
371
+ */
372
+ export function isLanguageVariant(code: string): boolean {
373
+ return code.includes('-');
374
+ }
375
+
266
376
  // =============================================================================
267
377
  // Query Functions
268
378
  // =============================================================================
269
379
 
270
380
  /**
271
381
  * Get a tokenizer for the specified language.
272
- * @throws Error if language is not registered
382
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
383
+ * @throws Error if neither the variant nor base language is registered
273
384
  */
274
385
  export function getTokenizer(code: string): LanguageTokenizer {
275
- const tokenizer = tokenizers.get(code);
386
+ // Try exact match first
387
+ let tokenizer = tokenizers.get(code);
388
+
389
+ // Fallback: es-MX → es
390
+ if (!tokenizer && isLanguageVariant(code)) {
391
+ const baseCode = getBaseLanguageCode(code);
392
+ tokenizer = tokenizers.get(baseCode);
393
+ }
394
+
276
395
  if (!tokenizer) {
277
396
  const registered = Array.from(tokenizers.keys()).join(', ');
278
397
  throw new Error(
@@ -286,10 +405,19 @@ export function getTokenizer(code: string): LanguageTokenizer {
286
405
 
287
406
  /**
288
407
  * Get a profile for the specified language.
289
- * @throws Error if language is not registered
408
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
409
+ * @throws Error if neither the variant nor base language is registered
290
410
  */
291
411
  export function getProfile(code: string): LanguageProfile {
292
- const profile = profiles.get(code);
412
+ // Try exact match first
413
+ let profile = profiles.get(code);
414
+
415
+ // Fallback: es-MX → es
416
+ if (!profile && isLanguageVariant(code)) {
417
+ const baseCode = getBaseLanguageCode(code);
418
+ profile = profiles.get(baseCode);
419
+ }
420
+
293
421
  if (!profile) {
294
422
  const registered = Array.from(profiles.keys()).join(', ');
295
423
  throw new Error(
@@ -298,21 +426,34 @@ export function getProfile(code: string): LanguageProfile {
298
426
  `Import the language module first: import '@lokascript/semantic/languages/${code}';`
299
427
  );
300
428
  }
301
- return profile;
429
+
430
+ // Resolve inheritance if profile extends another
431
+ return resolveProfile(profile);
302
432
  }
303
433
 
304
434
  /**
305
435
  * Try to get a tokenizer, returning undefined if not registered.
436
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
306
437
  */
307
438
  export function tryGetTokenizer(code: string): LanguageTokenizer | undefined {
308
- return tokenizers.get(code);
439
+ let tokenizer = tokenizers.get(code);
440
+ if (!tokenizer && isLanguageVariant(code)) {
441
+ tokenizer = tokenizers.get(getBaseLanguageCode(code));
442
+ }
443
+ return tokenizer;
309
444
  }
310
445
 
311
446
  /**
312
447
  * Try to get a profile, returning undefined if not registered.
448
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
313
449
  */
314
450
  export function tryGetProfile(code: string): LanguageProfile | undefined {
315
- return profiles.get(code);
451
+ let profile = profiles.get(code);
452
+ if (!profile && isLanguageVariant(code)) {
453
+ profile = profiles.get(getBaseLanguageCode(code));
454
+ }
455
+ // Resolve inheritance if profile extends another
456
+ return profile ? resolveProfile(profile) : undefined;
316
457
  }
317
458
 
318
459
  /**
@@ -323,18 +464,33 @@ export function getRegisteredLanguages(): string[] {
323
464
  }
324
465
 
325
466
  /**
326
- * Check if a language is registered.
467
+ * Check if a language is registered (exact match or base language fallback).
327
468
  */
328
469
  export function isLanguageRegistered(code: string): boolean {
329
- return tokenizers.has(code) && profiles.has(code);
470
+ if (tokenizers.has(code) && profiles.has(code)) {
471
+ return true;
472
+ }
473
+ // Check fallback for variants
474
+ if (isLanguageVariant(code)) {
475
+ const baseCode = getBaseLanguageCode(code);
476
+ return tokenizers.has(baseCode) && profiles.has(baseCode);
477
+ }
478
+ return false;
330
479
  }
331
480
 
332
481
  /**
333
- * Check if a language is supported (alias for isLanguageRegistered).
482
+ * Check if a language is supported (exact match or base language fallback).
334
483
  * For backwards compatibility with tokenizers API.
335
484
  */
336
485
  export function isLanguageSupported(code: string): boolean {
337
- return tokenizers.has(code);
486
+ if (tokenizers.has(code)) {
487
+ return true;
488
+ }
489
+ // Check fallback for variants
490
+ if (isLanguageVariant(code)) {
491
+ return tokenizers.has(getBaseLanguageCode(code));
492
+ }
493
+ return false;
338
494
  }
339
495
 
340
496
  // =============================================================================
@@ -358,17 +514,25 @@ export function tokenize(input: string, language: string): TokenStream {
358
514
  * Get patterns for a specific language.
359
515
  * First checks for directly registered patterns (for tree-shaking),
360
516
  * then falls back to pattern generator.
517
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
361
518
  * @throws Error if language is not registered
362
519
  */
363
520
  export function getPatternsForLanguage(code: string): LanguagePattern[] {
364
- // Check cache first
365
- const cached = patternCache.get(code);
521
+ // Check cache first (try exact, then base language)
522
+ let cached = patternCache.get(code);
523
+ if (!cached && isLanguageVariant(code)) {
524
+ cached = patternCache.get(getBaseLanguageCode(code));
525
+ }
366
526
  if (cached) {
367
527
  return cached;
368
528
  }
369
529
 
370
530
  // Check for directly registered patterns (tree-shakeable path)
371
- const registered = registeredPatterns.get(code);
531
+ // Try exact match, then base language fallback
532
+ let registered = registeredPatterns.get(code);
533
+ if (!registered && isLanguageVariant(code)) {
534
+ registered = registeredPatterns.get(getBaseLanguageCode(code));
535
+ }
372
536
  if (registered) {
373
537
  patternCache.set(code, registered);
374
538
  return registered;
@@ -382,7 +546,7 @@ export function getPatternsForLanguage(code: string): LanguagePattern[] {
382
546
  );
383
547
  }
384
548
 
385
- // Get profile (throws if not registered)
549
+ // Get profile (throws if not registered) - has built-in fallback
386
550
  const profile = getProfile(code);
387
551
  const patterns = patternGenerator(profile);
388
552
  patternCache.set(code, patterns);
@@ -195,13 +195,16 @@ const PREPOSITIONS = new Set([
195
195
  // =============================================================================
196
196
 
197
197
  /**
198
- * Extra keywords not covered by the profile:
198
+ * Extra keywords not covered by the profile.
199
+ *
200
+ * SIMPLIFIED: Following the Tagalog/Hindi model of minimal EXTRAS.
201
+ * Command synonyms and spelling variants should be in profile alternatives,
202
+ * not duplicated here. Only includes:
199
203
  * - Literals (true, false, null, undefined)
200
204
  * - Positional words
201
205
  * - Event names
202
206
  * - Time units
203
- * - Temporal conjunctions
204
- * - Additional synonyms and spelling variants
207
+ * - References not in profile
205
208
  */
206
209
  const ARABIC_EXTRAS: KeywordEntry[] = [
207
210
  // Values/Literals
@@ -239,13 +242,8 @@ const ARABIC_EXTRAS: KeywordEntry[] = [
239
242
  { native: 'تحميل', normalized: 'load' },
240
243
  { native: 'تمرير', normalized: 'scroll' },
241
244
 
242
- // References
243
- { native: 'أنا', normalized: 'me' },
244
- { native: 'هو', normalized: 'it' },
245
+ // References (feminine "it" not in profile)
245
246
  { native: 'هي', normalized: 'it' },
246
- { native: 'النتيجة', normalized: 'result' },
247
- { native: 'الحدث', normalized: 'event' },
248
- { native: 'الهدف', normalized: 'target' },
249
247
 
250
248
  // Time units
251
249
  { native: 'ثانية', normalized: 's' },
@@ -258,43 +256,12 @@ const ARABIC_EXTRAS: KeywordEntry[] = [
258
256
 
259
257
  // Note: Temporal markers (عندما, حينما, etc.) are in TEMPORAL_MARKERS map
260
258
  // with formality metadata, not in ARABIC_EXTRAS
261
-
262
- // Additional spelling variants (without diacritics)
263
- { native: 'بدل', normalized: 'toggle' },
264
- { native: 'غير', normalized: 'toggle' },
265
- { native: 'اضف', normalized: 'add' },
266
- { native: 'ازل', normalized: 'remove' },
267
- { native: 'اضع', normalized: 'put' },
268
- { native: 'يضع', normalized: 'put' },
269
- { native: 'اجعل', normalized: 'put' },
270
- { native: 'عين', normalized: 'set' },
271
- { native: 'زد', normalized: 'increment' },
272
- { native: 'ارفع', normalized: 'increment' },
273
- { native: 'انقص', normalized: 'decrement' },
274
- { native: 'قلل', normalized: 'decrement' },
275
- { native: 'سجل', normalized: 'log' },
276
- { native: 'اظهر', normalized: 'show' },
277
- { native: 'اعرض', normalized: 'show' },
278
- { native: 'اخف', normalized: 'hide' },
279
- { native: 'اخفي', normalized: 'hide' },
280
- { native: 'شغل', normalized: 'trigger' },
281
- { native: 'ارسل', normalized: 'send' },
282
- { native: 'ركز', normalized: 'focus' },
283
- { native: 'شوش', normalized: 'blur' },
284
- { native: 'اذا', normalized: 'if' },
285
- { native: 'لو', normalized: 'if' },
286
- { native: 'والا', normalized: 'else' },
287
- { native: 'توقف', normalized: 'halt' },
288
- { native: 'انسخ', normalized: 'clone' },
289
-
290
- // Control flow helpers
291
- { native: 'إذن', normalized: 'then' },
292
- { native: 'فإن', normalized: 'then' },
293
- { native: 'نهاية', normalized: 'end' },
294
-
295
- // Modifiers
296
- { native: 'قبل', normalized: 'before' },
297
- { native: 'بعد', normalized: 'after' },
259
+ //
260
+ // Command spelling variants are now in the profile alternatives:
261
+ // - toggle: بدل, غيّر, غير (in profile)
262
+ // - add: اضف, زِد (in profile)
263
+ // - remove: أزل, امسح (in profile)
264
+ // - etc.
298
265
  ];
299
266
 
300
267
  // =============================================================================
@@ -176,22 +176,8 @@ export class BengaliTokenizer extends BaseTokenizer {
176
176
  const startPos = pos;
177
177
  let word = '';
178
178
 
179
- while (pos < input.length && (isBengali(input[pos]) || input[pos] === ' ')) {
180
- if (input[pos] === ' ') {
181
- // Check if next char is Bengali (compound word)
182
- if (pos + 1 < input.length && isBengali(input[pos + 1])) {
183
- const rest = input.slice(pos);
184
- const compound = [' করুন', ' ফেলুন', ' দিন', ' না হলে', ' যে যান'].find(c =>
185
- rest.startsWith(c)
186
- );
187
- if (compound) {
188
- word += compound;
189
- pos += compound.length;
190
- continue;
191
- }
192
- }
193
- break;
194
- }
179
+ // Extract word without including spaces (let parser handle multi-word patterns)
180
+ while (pos < input.length && isBengali(input[pos])) {
195
181
  word += input[pos];
196
182
  pos++;
197
183
  }