@lokascript/semantic 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/core.d.ts +1246 -0
  2. package/dist/core.js +3073 -0
  3. package/dist/core.js.map +1 -0
  4. package/dist/languages/bn.d.ts +33 -0
  5. package/dist/languages/bn.js +1101 -0
  6. package/dist/languages/bn.js.map +1 -0
  7. package/dist/languages/es-MX.d.ts +23 -0
  8. package/dist/languages/es-MX.js +1676 -0
  9. package/dist/languages/es-MX.js.map +1 -0
  10. package/dist/languages/es.d.ts +3 -42
  11. package/dist/languages/he.d.ts +70 -0
  12. package/dist/languages/he.js +1331 -0
  13. package/dist/languages/he.js.map +1 -0
  14. package/dist/languages/hi.d.ts +36 -0
  15. package/dist/languages/hi.js +1162 -0
  16. package/dist/languages/hi.js.map +1 -0
  17. package/dist/languages/it.d.ts +53 -0
  18. package/dist/languages/it.js +1600 -0
  19. package/dist/languages/it.js.map +1 -0
  20. package/dist/languages/ms.d.ts +32 -0
  21. package/dist/languages/ms.js +1043 -0
  22. package/dist/languages/ms.js.map +1 -0
  23. package/dist/languages/pl.d.ts +37 -0
  24. package/dist/languages/pl.js +1331 -0
  25. package/dist/languages/pl.js.map +1 -0
  26. package/dist/languages/ru.d.ts +37 -0
  27. package/dist/languages/ru.js +1356 -0
  28. package/dist/languages/ru.js.map +1 -0
  29. package/dist/languages/th.d.ts +35 -0
  30. package/dist/languages/th.js +1076 -0
  31. package/dist/languages/th.js.map +1 -0
  32. package/dist/languages/tl.d.ts +32 -0
  33. package/dist/languages/tl.js +1034 -0
  34. package/dist/languages/tl.js.map +1 -0
  35. package/dist/languages/uk.d.ts +37 -0
  36. package/dist/languages/uk.js +1356 -0
  37. package/dist/languages/uk.js.map +1 -0
  38. package/dist/languages/vi.d.ts +59 -0
  39. package/dist/languages/vi.js +1220 -0
  40. package/dist/languages/vi.js.map +1 -0
  41. package/dist/spanish-BedpM-NU.d.ts +43 -0
  42. package/package.json +53 -1
  43. package/src/core.ts +155 -0
@@ -0,0 +1,1676 @@
1
+ // src/registry.ts
2
+ var tokenizers = /* @__PURE__ */ new Map();
3
+ var profiles = /* @__PURE__ */ new Map();
4
+ var patternCache = /* @__PURE__ */ new Map();
5
+ function registerLanguage(code, tokenizer, profile) {
6
+ tokenizers.set(code, tokenizer);
7
+ profiles.set(code, profile);
8
+ patternCache.delete(code);
9
+ }
10
+
11
+ // src/tokenizers/base.ts
12
+ var TokenStreamImpl = class {
13
+ constructor(tokens, language) {
14
+ this.pos = 0;
15
+ this.tokens = tokens;
16
+ this.language = language;
17
+ }
18
+ peek(offset = 0) {
19
+ const index = this.pos + offset;
20
+ if (index < 0 || index >= this.tokens.length) {
21
+ return null;
22
+ }
23
+ return this.tokens[index];
24
+ }
25
+ advance() {
26
+ if (this.isAtEnd()) {
27
+ throw new Error("Unexpected end of token stream");
28
+ }
29
+ return this.tokens[this.pos++];
30
+ }
31
+ isAtEnd() {
32
+ return this.pos >= this.tokens.length;
33
+ }
34
+ mark() {
35
+ return { position: this.pos };
36
+ }
37
+ reset(mark) {
38
+ this.pos = mark.position;
39
+ }
40
+ position() {
41
+ return this.pos;
42
+ }
43
+ /**
44
+ * Get remaining tokens as an array.
45
+ */
46
+ remaining() {
47
+ return this.tokens.slice(this.pos);
48
+ }
49
+ /**
50
+ * Consume tokens while predicate is true.
51
+ */
52
+ takeWhile(predicate) {
53
+ const result = [];
54
+ while (!this.isAtEnd() && predicate(this.peek())) {
55
+ result.push(this.advance());
56
+ }
57
+ return result;
58
+ }
59
+ /**
60
+ * Skip tokens while predicate is true.
61
+ */
62
+ skipWhile(predicate) {
63
+ while (!this.isAtEnd() && predicate(this.peek())) {
64
+ this.advance();
65
+ }
66
+ }
67
+ };
68
+ function createPosition(start, end) {
69
+ return { start, end };
70
+ }
71
+ function createToken(value, kind, position, normalizedOrOptions) {
72
+ if (typeof normalizedOrOptions === "string") {
73
+ return { value, kind, position, normalized: normalizedOrOptions };
74
+ }
75
+ if (normalizedOrOptions) {
76
+ const { normalized: normalized2, stem, stemConfidence } = normalizedOrOptions;
77
+ const token = { value, kind, position };
78
+ if (normalized2 !== void 0) {
79
+ token.normalized = normalized2;
80
+ }
81
+ if (stem !== void 0) {
82
+ token.stem = stem;
83
+ if (stemConfidence !== void 0) {
84
+ token.stemConfidence = stemConfidence;
85
+ }
86
+ }
87
+ return token;
88
+ }
89
+ return { value, kind, position };
90
+ }
91
+ function isWhitespace(char) {
92
+ return /\s/.test(char);
93
+ }
94
+ function isSelectorStart(char) {
95
+ return char === "#" || char === "." || char === "[" || char === "@" || char === "*" || char === "<";
96
+ }
97
+ function isQuote(char) {
98
+ return char === '"' || char === "'" || char === "`" || char === "\u300C" || char === "\u300D";
99
+ }
100
+ function isDigit(char) {
101
+ return /\d/.test(char);
102
+ }
103
+ function isAsciiLetter(char) {
104
+ return /[a-zA-Z]/.test(char);
105
+ }
106
+ function isAsciiIdentifierChar(char) {
107
+ return /[a-zA-Z0-9_-]/.test(char);
108
+ }
109
+ function createLatinCharClassifiers(letterPattern) {
110
+ const isLetter = (char) => letterPattern.test(char);
111
+ const isIdentifierChar = (char) => isLetter(char) || /[0-9_-]/.test(char);
112
+ return { isLetter, isIdentifierChar };
113
+ }
114
+ function extractCssSelector(input, startPos) {
115
+ if (startPos >= input.length) return null;
116
+ const char = input[startPos];
117
+ if (!isSelectorStart(char)) return null;
118
+ let pos = startPos;
119
+ let selector = "";
120
+ if (char === "#" || char === ".") {
121
+ selector += input[pos++];
122
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
123
+ selector += input[pos++];
124
+ }
125
+ if (selector.length <= 1) return null;
126
+ if (pos < input.length && input[pos] === "." && char === "#") {
127
+ const methodStart = pos + 1;
128
+ let methodEnd = methodStart;
129
+ while (methodEnd < input.length && isAsciiIdentifierChar(input[methodEnd])) {
130
+ methodEnd++;
131
+ }
132
+ if (methodEnd < input.length && input[methodEnd] === "(") {
133
+ return selector;
134
+ }
135
+ }
136
+ } else if (char === "[") {
137
+ let depth = 1;
138
+ let inQuote = false;
139
+ let quoteChar = null;
140
+ let escaped = false;
141
+ selector += input[pos++];
142
+ while (pos < input.length && depth > 0) {
143
+ const c = input[pos];
144
+ selector += c;
145
+ if (escaped) {
146
+ escaped = false;
147
+ } else if (c === "\\") {
148
+ escaped = true;
149
+ } else if (inQuote) {
150
+ if (c === quoteChar) {
151
+ inQuote = false;
152
+ quoteChar = null;
153
+ }
154
+ } else {
155
+ if (c === '"' || c === "'" || c === "`") {
156
+ inQuote = true;
157
+ quoteChar = c;
158
+ } else if (c === "[") {
159
+ depth++;
160
+ } else if (c === "]") {
161
+ depth--;
162
+ }
163
+ }
164
+ pos++;
165
+ }
166
+ if (depth !== 0) return null;
167
+ } else if (char === "@") {
168
+ selector += input[pos++];
169
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
170
+ selector += input[pos++];
171
+ }
172
+ if (selector.length <= 1) return null;
173
+ } else if (char === "*") {
174
+ selector += input[pos++];
175
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
176
+ selector += input[pos++];
177
+ }
178
+ if (selector.length <= 1) return null;
179
+ } else if (char === "<") {
180
+ selector += input[pos++];
181
+ if (pos >= input.length || !isAsciiLetter(input[pos])) return null;
182
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
183
+ selector += input[pos++];
184
+ }
185
+ while (pos < input.length) {
186
+ const modChar = input[pos];
187
+ if (modChar === ".") {
188
+ selector += input[pos++];
189
+ if (pos >= input.length || !isAsciiIdentifierChar(input[pos])) {
190
+ return null;
191
+ }
192
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
193
+ selector += input[pos++];
194
+ }
195
+ } else if (modChar === "#") {
196
+ selector += input[pos++];
197
+ if (pos >= input.length || !isAsciiIdentifierChar(input[pos])) {
198
+ return null;
199
+ }
200
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
201
+ selector += input[pos++];
202
+ }
203
+ } else if (modChar === "[") {
204
+ let depth = 1;
205
+ let inQuote = false;
206
+ let quoteChar = null;
207
+ let escaped = false;
208
+ selector += input[pos++];
209
+ while (pos < input.length && depth > 0) {
210
+ const c = input[pos];
211
+ selector += c;
212
+ if (escaped) {
213
+ escaped = false;
214
+ } else if (c === "\\") {
215
+ escaped = true;
216
+ } else if (inQuote) {
217
+ if (c === quoteChar) {
218
+ inQuote = false;
219
+ quoteChar = null;
220
+ }
221
+ } else {
222
+ if (c === '"' || c === "'" || c === "`") {
223
+ inQuote = true;
224
+ quoteChar = c;
225
+ } else if (c === "[") {
226
+ depth++;
227
+ } else if (c === "]") {
228
+ depth--;
229
+ }
230
+ }
231
+ pos++;
232
+ }
233
+ if (depth !== 0) return null;
234
+ } else {
235
+ break;
236
+ }
237
+ }
238
+ while (pos < input.length && isWhitespace(input[pos])) {
239
+ selector += input[pos++];
240
+ }
241
+ if (pos < input.length && input[pos] === "/") {
242
+ selector += input[pos++];
243
+ while (pos < input.length && isWhitespace(input[pos])) {
244
+ selector += input[pos++];
245
+ }
246
+ }
247
+ if (pos >= input.length || input[pos] !== ">") return null;
248
+ selector += input[pos++];
249
+ }
250
+ return selector || null;
251
+ }
252
+ function isPossessiveMarker(input, pos) {
253
+ if (pos >= input.length || input[pos] !== "'") return false;
254
+ if (pos + 1 >= input.length) return false;
255
+ const nextChar = input[pos + 1].toLowerCase();
256
+ if (nextChar !== "s") return false;
257
+ if (pos + 2 >= input.length) return true;
258
+ const afterS = input[pos + 2];
259
+ return isWhitespace(afterS) || afterS === "*" || !isAsciiIdentifierChar(afterS);
260
+ }
261
+ function extractStringLiteral(input, startPos) {
262
+ if (startPos >= input.length) return null;
263
+ const openQuote = input[startPos];
264
+ if (!isQuote(openQuote)) return null;
265
+ if (openQuote === "'" && isPossessiveMarker(input, startPos)) {
266
+ return null;
267
+ }
268
+ const closeQuoteMap = {
269
+ '"': '"',
270
+ "'": "'",
271
+ "`": "`",
272
+ "\u300C": "\u300D"
273
+ };
274
+ const closeQuote = closeQuoteMap[openQuote];
275
+ if (!closeQuote) return null;
276
+ let pos = startPos + 1;
277
+ let literal = openQuote;
278
+ let escaped = false;
279
+ while (pos < input.length) {
280
+ const char = input[pos];
281
+ literal += char;
282
+ if (escaped) {
283
+ escaped = false;
284
+ } else if (char === "\\") {
285
+ escaped = true;
286
+ } else if (char === closeQuote) {
287
+ return literal;
288
+ }
289
+ pos++;
290
+ }
291
+ return literal;
292
+ }
293
+ function isUrlStart(input, pos) {
294
+ if (pos >= input.length) return false;
295
+ const char = input[pos];
296
+ const next = input[pos + 1] || "";
297
+ const third = input[pos + 2] || "";
298
+ if (char === "/" && next !== "/" && /[a-zA-Z0-9._-]/.test(next)) {
299
+ return true;
300
+ }
301
+ if (char === "/" && next === "/" && /[a-zA-Z]/.test(third)) {
302
+ return true;
303
+ }
304
+ if (char === "." && (next === "/" || next === "." && third === "/")) {
305
+ return true;
306
+ }
307
+ const slice = input.slice(pos, pos + 8).toLowerCase();
308
+ if (slice.startsWith("http://") || slice.startsWith("https://")) {
309
+ return true;
310
+ }
311
+ return false;
312
+ }
313
+ function extractUrl(input, startPos) {
314
+ if (!isUrlStart(input, startPos)) return null;
315
+ let pos = startPos;
316
+ let url = "";
317
+ const urlChars = /[a-zA-Z0-9/:._\-?&=%@+~!$'()*,;[\]]/;
318
+ while (pos < input.length) {
319
+ const char = input[pos];
320
+ if (char === "#") {
321
+ if (url.length > 0 && /[a-zA-Z0-9/.]$/.test(url)) {
322
+ url += char;
323
+ pos++;
324
+ while (pos < input.length && /[a-zA-Z0-9_-]/.test(input[pos])) {
325
+ url += input[pos++];
326
+ }
327
+ }
328
+ break;
329
+ }
330
+ if (urlChars.test(char)) {
331
+ url += char;
332
+ pos++;
333
+ } else {
334
+ break;
335
+ }
336
+ }
337
+ if (url.length < 2) return null;
338
+ return url;
339
+ }
340
+ function extractNumber(input, startPos) {
341
+ if (startPos >= input.length) return null;
342
+ const char = input[startPos];
343
+ if (!isDigit(char) && char !== "-" && char !== "+") return null;
344
+ let pos = startPos;
345
+ let number = "";
346
+ if (input[pos] === "-" || input[pos] === "+") {
347
+ number += input[pos++];
348
+ }
349
+ if (pos >= input.length || !isDigit(input[pos])) {
350
+ return null;
351
+ }
352
+ while (pos < input.length && isDigit(input[pos])) {
353
+ number += input[pos++];
354
+ }
355
+ if (pos < input.length && input[pos] === ".") {
356
+ number += input[pos++];
357
+ while (pos < input.length && isDigit(input[pos])) {
358
+ number += input[pos++];
359
+ }
360
+ }
361
+ if (pos < input.length) {
362
+ const suffix = input.slice(pos, pos + 2);
363
+ if (suffix === "ms") {
364
+ number += "ms";
365
+ } else if (input[pos] === "s" || input[pos] === "m" || input[pos] === "h") {
366
+ number += input[pos];
367
+ }
368
+ }
369
+ return number;
370
+ }
371
+ var _BaseTokenizer = class _BaseTokenizer {
372
+ constructor() {
373
+ /** Keywords derived from profile, sorted longest-first for greedy matching */
374
+ this.profileKeywords = [];
375
+ /** Map for O(1) keyword lookups by lowercase native word */
376
+ this.profileKeywordMap = /* @__PURE__ */ new Map();
377
+ }
378
+ /**
379
+ * Initialize keyword mappings from a language profile.
380
+ * Builds a list of native→english mappings from:
381
+ * - profile.keywords (primary + alternatives)
382
+ * - profile.references (me, it, you, etc.)
383
+ * - profile.roleMarkers (into, from, with, etc.)
384
+ *
385
+ * Results are sorted longest-first for greedy matching (important for non-space languages).
386
+ * Extras take precedence over profile entries when there are duplicates.
387
+ *
388
+ * @param profile - Language profile containing keyword translations
389
+ * @param extras - Additional keyword entries to include (literals, positional, events)
390
+ */
391
+ initializeKeywordsFromProfile(profile, extras = []) {
392
+ const keywordMap = /* @__PURE__ */ new Map();
393
+ if (profile.keywords) {
394
+ for (const [normalized2, translation] of Object.entries(profile.keywords)) {
395
+ keywordMap.set(translation.primary, {
396
+ native: translation.primary,
397
+ normalized: translation.normalized || normalized2
398
+ });
399
+ if (translation.alternatives) {
400
+ for (const alt of translation.alternatives) {
401
+ keywordMap.set(alt, {
402
+ native: alt,
403
+ normalized: translation.normalized || normalized2
404
+ });
405
+ }
406
+ }
407
+ }
408
+ }
409
+ if (profile.references) {
410
+ for (const [normalized2, native] of Object.entries(profile.references)) {
411
+ keywordMap.set(native, { native, normalized: normalized2 });
412
+ }
413
+ }
414
+ if (profile.roleMarkers) {
415
+ for (const [role, marker] of Object.entries(profile.roleMarkers)) {
416
+ if (marker.primary) {
417
+ keywordMap.set(marker.primary, { native: marker.primary, normalized: role });
418
+ }
419
+ if (marker.alternatives) {
420
+ for (const alt of marker.alternatives) {
421
+ keywordMap.set(alt, { native: alt, normalized: role });
422
+ }
423
+ }
424
+ }
425
+ }
426
+ for (const extra of extras) {
427
+ keywordMap.set(extra.native, extra);
428
+ }
429
+ this.profileKeywords = Array.from(keywordMap.values()).sort(
430
+ (a, b) => b.native.length - a.native.length
431
+ );
432
+ this.profileKeywordMap = /* @__PURE__ */ new Map();
433
+ for (const keyword of this.profileKeywords) {
434
+ this.profileKeywordMap.set(keyword.native.toLowerCase(), keyword);
435
+ const normalized2 = this.removeDiacritics(keyword.native);
436
+ if (normalized2 !== keyword.native && !this.profileKeywordMap.has(normalized2.toLowerCase())) {
437
+ this.profileKeywordMap.set(normalized2.toLowerCase(), keyword);
438
+ }
439
+ }
440
+ }
441
+ /**
442
+ * Remove diacritical marks from a word for normalization.
443
+ * Primarily for Arabic (shadda, fatha, kasra, damma, sukun, etc.)
444
+ * but could be extended for other languages.
445
+ *
446
+ * @param word - Word to normalize
447
+ * @returns Word without diacritics
448
+ */
449
+ removeDiacritics(word) {
450
+ return word.replace(/[\u064B-\u0652\u0670]/g, "");
451
+ }
452
+ /**
453
+ * Try to match a keyword from profile at the current position.
454
+ * Uses longest-first greedy matching (important for non-space languages).
455
+ *
456
+ * @param input - Input string
457
+ * @param pos - Current position
458
+ * @returns Token if matched, null otherwise
459
+ */
460
+ tryProfileKeyword(input, pos) {
461
+ for (const entry of this.profileKeywords) {
462
+ if (input.slice(pos).startsWith(entry.native)) {
463
+ return createToken(
464
+ entry.native,
465
+ "keyword",
466
+ createPosition(pos, pos + entry.native.length),
467
+ entry.normalized
468
+ );
469
+ }
470
+ }
471
+ return null;
472
+ }
473
+ /**
474
+ * Check if the remaining input starts with any known keyword.
475
+ * Useful for non-space languages to detect word boundaries.
476
+ *
477
+ * @param input - Input string
478
+ * @param pos - Current position
479
+ * @returns true if a keyword starts at this position
480
+ */
481
+ isKeywordStart(input, pos) {
482
+ const remaining = input.slice(pos);
483
+ return this.profileKeywords.some((entry) => remaining.startsWith(entry.native));
484
+ }
485
+ /**
486
+ * Look up a keyword by native word (case-insensitive).
487
+ * O(1) lookup using the keyword map.
488
+ *
489
+ * @param native - Native word to look up
490
+ * @returns KeywordEntry if found, undefined otherwise
491
+ */
492
+ lookupKeyword(native) {
493
+ return this.profileKeywordMap.get(native.toLowerCase());
494
+ }
495
+ /**
496
+ * Check if a word is a known keyword (case-insensitive).
497
+ * O(1) lookup using the keyword map.
498
+ *
499
+ * @param native - Native word to check
500
+ * @returns true if the word is a keyword
501
+ */
502
+ isKeyword(native) {
503
+ return this.profileKeywordMap.has(native.toLowerCase());
504
+ }
505
+ /**
506
+ * Set the morphological normalizer for this tokenizer.
507
+ */
508
+ setNormalizer(normalizer) {
509
+ this.normalizer = normalizer;
510
+ }
511
+ /**
512
+ * Try to normalize a word using the morphological normalizer.
513
+ * Returns null if no normalizer is set or normalization fails.
514
+ *
515
+ * Note: We don't check isNormalizable() here because the individual tokenizers
516
+ * historically called normalize() directly without that check. The normalize()
517
+ * method itself handles returning noChange() for words that can't be normalized.
518
+ */
519
+ tryNormalize(word) {
520
+ if (!this.normalizer) return null;
521
+ const result = this.normalizer.normalize(word);
522
+ if (result.stem !== word && result.confidence >= 0.7) {
523
+ return result;
524
+ }
525
+ return null;
526
+ }
527
+ /**
528
+ * Try morphological normalization and keyword lookup.
529
+ *
530
+ * If the word can be normalized to a stem that matches a known keyword,
531
+ * returns a keyword token with morphological metadata (stem, stemConfidence).
532
+ *
533
+ * This is the common pattern for handling conjugated verbs across languages:
534
+ * 1. Normalize the word (e.g., "toggled" → "toggle")
535
+ * 2. Look up the stem in the keyword map
536
+ * 3. Create a token with both the original form and stem metadata
537
+ *
538
+ * @param word - The word to normalize and look up
539
+ * @param startPos - Start position for the token
540
+ * @param endPos - End position for the token
541
+ * @returns Token if stem matches a keyword, null otherwise
542
+ */
543
+ tryMorphKeywordMatch(word, startPos, endPos) {
544
+ const result = this.tryNormalize(word);
545
+ if (!result) return null;
546
+ const stemEntry = this.lookupKeyword(result.stem);
547
+ if (!stemEntry) return null;
548
+ const tokenOptions = {
549
+ normalized: stemEntry.normalized,
550
+ stem: result.stem,
551
+ stemConfidence: result.confidence
552
+ };
553
+ return createToken(word, "keyword", createPosition(startPos, endPos), tokenOptions);
554
+ }
555
+ /**
556
+ * Try to extract a CSS selector at the current position.
557
+ */
558
+ trySelector(input, pos) {
559
+ const selector = extractCssSelector(input, pos);
560
+ if (selector) {
561
+ return createToken(selector, "selector", createPosition(pos, pos + selector.length));
562
+ }
563
+ return null;
564
+ }
565
+ /**
566
+ * Try to extract an event modifier at the current position.
567
+ * Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
568
+ */
569
+ tryEventModifier(input, pos) {
570
+ if (input[pos] !== ".") {
571
+ return null;
572
+ }
573
+ const match = input.slice(pos).match(/^\.(?:once|debounce|throttle|queue)(?:\(([^)]+)\))?(?:\s|$|\.)/);
574
+ if (!match) {
575
+ return null;
576
+ }
577
+ const fullMatch = match[0].replace(/(\s|\.)$/, "");
578
+ const modifierName = fullMatch.slice(1).split("(")[0];
579
+ const value = match[1];
580
+ const token = createToken(
581
+ fullMatch,
582
+ "event-modifier",
583
+ createPosition(pos, pos + fullMatch.length)
584
+ );
585
+ return {
586
+ ...token,
587
+ metadata: {
588
+ modifierName,
589
+ value: value ? modifierName === "queue" ? value : parseInt(value, 10) : void 0
590
+ }
591
+ };
592
+ }
593
+ /**
594
+ * Try to extract a string literal at the current position.
595
+ */
596
+ tryString(input, pos) {
597
+ const literal = extractStringLiteral(input, pos);
598
+ if (literal) {
599
+ return createToken(literal, "literal", createPosition(pos, pos + literal.length));
600
+ }
601
+ return null;
602
+ }
603
+ /**
604
+ * Try to extract a number at the current position.
605
+ */
606
+ tryNumber(input, pos) {
607
+ const number = extractNumber(input, pos);
608
+ if (number) {
609
+ return createToken(number, "literal", createPosition(pos, pos + number.length));
610
+ }
611
+ return null;
612
+ }
613
+ /**
614
+ * Try to match a time unit from a list of patterns.
615
+ *
616
+ * @param input - Input string
617
+ * @param pos - Position after the number
618
+ * @param timeUnits - Array of time unit mappings (native pattern → standard suffix)
619
+ * @param skipWhitespace - Whether to skip whitespace before time unit (default: false)
620
+ * @returns Object with matched suffix and new position, or null if no match
621
+ */
622
+ tryMatchTimeUnit(input, pos, timeUnits, skipWhitespace = false) {
623
+ let unitPos = pos;
624
+ if (skipWhitespace) {
625
+ while (unitPos < input.length && isWhitespace(input[unitPos])) {
626
+ unitPos++;
627
+ }
628
+ }
629
+ const remaining = input.slice(unitPos);
630
+ for (const unit of timeUnits) {
631
+ const candidate = remaining.slice(0, unit.length);
632
+ const matches = unit.caseInsensitive ? candidate.toLowerCase() === unit.pattern.toLowerCase() : candidate === unit.pattern;
633
+ if (matches) {
634
+ if (unit.notFollowedBy) {
635
+ const nextChar = remaining[unit.length] || "";
636
+ if (nextChar === unit.notFollowedBy) continue;
637
+ }
638
+ if (unit.checkBoundary) {
639
+ const nextChar = remaining[unit.length] || "";
640
+ if (isAsciiIdentifierChar(nextChar)) continue;
641
+ }
642
+ return { suffix: unit.suffix, endPos: unitPos + unit.length };
643
+ }
644
+ }
645
+ return null;
646
+ }
647
+ /**
648
+ * Parse a base number (sign, integer, decimal) without time units.
649
+ * Returns the number string and end position.
650
+ *
651
+ * @param input - Input string
652
+ * @param startPos - Start position
653
+ * @param allowSign - Whether to allow +/- sign (default: true)
654
+ * @returns Object with number string and end position, or null
655
+ */
656
+ parseBaseNumber(input, startPos, allowSign = true) {
657
+ let pos = startPos;
658
+ let number = "";
659
+ if (allowSign && (input[pos] === "-" || input[pos] === "+")) {
660
+ number += input[pos++];
661
+ }
662
+ if (pos >= input.length || !isDigit(input[pos])) {
663
+ return null;
664
+ }
665
+ while (pos < input.length && isDigit(input[pos])) {
666
+ number += input[pos++];
667
+ }
668
+ if (pos < input.length && input[pos] === ".") {
669
+ number += input[pos++];
670
+ while (pos < input.length && isDigit(input[pos])) {
671
+ number += input[pos++];
672
+ }
673
+ }
674
+ if (!number || number === "-" || number === "+") return null;
675
+ return { number, endPos: pos };
676
+ }
677
+ /**
678
+ * Try to extract a number with native language time units.
679
+ *
680
+ * This is a template method that handles the common pattern:
681
+ * 1. Parse the base number (sign, integer, decimal)
682
+ * 2. Try to match native language time units
683
+ * 3. Fall back to standard time units (ms, s, m, h)
684
+ *
685
+ * @param input - Input string
686
+ * @param pos - Start position
687
+ * @param nativeTimeUnits - Language-specific time unit mappings
688
+ * @param options - Configuration options
689
+ * @returns Token if number found, null otherwise
690
+ */
691
+ tryNumberWithTimeUnits(input, pos, nativeTimeUnits, options = {}) {
692
+ const { allowSign = true, skipWhitespace = false } = options;
693
+ const baseResult = this.parseBaseNumber(input, pos, allowSign);
694
+ if (!baseResult) return null;
695
+ let { number, endPos } = baseResult;
696
+ const allUnits = [...nativeTimeUnits, ..._BaseTokenizer.STANDARD_TIME_UNITS];
697
+ const timeMatch = this.tryMatchTimeUnit(input, endPos, allUnits, skipWhitespace);
698
+ if (timeMatch) {
699
+ number += timeMatch.suffix;
700
+ endPos = timeMatch.endPos;
701
+ }
702
+ return createToken(number, "literal", createPosition(pos, endPos));
703
+ }
704
+ /**
705
+ * Try to extract a URL at the current position.
706
+ * Handles /path, ./path, ../path, //domain.com, http://, https://
707
+ */
708
+ tryUrl(input, pos) {
709
+ const url = extractUrl(input, pos);
710
+ if (url) {
711
+ return createToken(url, "url", createPosition(pos, pos + url.length));
712
+ }
713
+ return null;
714
+ }
715
+ /**
716
+ * Try to extract a variable reference (:varname) at the current position.
717
+ * In hyperscript, :x refers to a local variable named x.
718
+ */
719
+ tryVariableRef(input, pos) {
720
+ if (input[pos] !== ":") return null;
721
+ if (pos + 1 >= input.length) return null;
722
+ if (!isAsciiIdentifierChar(input[pos + 1])) return null;
723
+ let endPos = pos + 1;
724
+ while (endPos < input.length && isAsciiIdentifierChar(input[endPos])) {
725
+ endPos++;
726
+ }
727
+ const varRef = input.slice(pos, endPos);
728
+ return createToken(varRef, "identifier", createPosition(pos, endPos));
729
+ }
730
+ /**
731
+ * Try to extract an operator or punctuation token at the current position.
732
+ * Handles two-character operators (==, !=, etc.) and single-character operators.
733
+ */
734
+ tryOperator(input, pos) {
735
+ const twoChar = input.slice(pos, pos + 2);
736
+ if (["==", "!=", "<=", ">=", "&&", "||", "->"].includes(twoChar)) {
737
+ return createToken(twoChar, "operator", createPosition(pos, pos + 2));
738
+ }
739
+ const oneChar = input[pos];
740
+ if (["<", ">", "!", "+", "-", "*", "/", "="].includes(oneChar)) {
741
+ return createToken(oneChar, "operator", createPosition(pos, pos + 1));
742
+ }
743
+ if (["(", ")", "{", "}", ",", ";", ":"].includes(oneChar)) {
744
+ return createToken(oneChar, "punctuation", createPosition(pos, pos + 1));
745
+ }
746
+ return null;
747
+ }
748
+ /**
749
+ * Try to match a multi-character particle from a list.
750
+ *
751
+ * Used by languages like Japanese, Korean, and Chinese that have
752
+ * multi-character particles (e.g., Japanese から, まで, より).
753
+ *
754
+ * @param input - Input string
755
+ * @param pos - Current position
756
+ * @param particles - Array of multi-character particles to match
757
+ * @returns Token if matched, null otherwise
758
+ */
759
+ tryMultiCharParticle(input, pos, particles) {
760
+ for (const particle of particles) {
761
+ if (input.slice(pos, pos + particle.length) === particle) {
762
+ return createToken(particle, "particle", createPosition(pos, pos + particle.length));
763
+ }
764
+ }
765
+ return null;
766
+ }
767
+ };
768
+ /**
769
+ * Configuration for native language time units.
770
+ * Maps patterns to their standard suffix (ms, s, m, h).
771
+ */
772
+ _BaseTokenizer.STANDARD_TIME_UNITS = [
773
+ { pattern: "ms", suffix: "ms", length: 2 },
774
+ { pattern: "s", suffix: "s", length: 1, checkBoundary: true },
775
+ { pattern: "m", suffix: "m", length: 1, checkBoundary: true, notFollowedBy: "s" },
776
+ { pattern: "h", suffix: "h", length: 1, checkBoundary: true }
777
+ ];
778
+ var BaseTokenizer = _BaseTokenizer;
779
+
780
+ // src/tokenizers/morphology/types.ts
781
+ function noChange(word) {
782
+ return { stem: word, confidence: 1 };
783
+ }
784
+ function normalized(stem, confidence, metadata) {
785
+ if (metadata) {
786
+ return { stem, confidence, metadata };
787
+ }
788
+ return { stem, confidence };
789
+ }
790
+
791
+ // src/tokenizers/morphology/spanish-normalizer.ts
792
+ function isSpanishSpecificLetter(char) {
793
+ return /[áéíóúüñÁÉÍÓÚÜÑ]/.test(char);
794
+ }
795
+ function looksLikeSpanishVerb(word) {
796
+ const lower = word.toLowerCase();
797
+ if (lower.endsWith("ar") || lower.endsWith("er") || lower.endsWith("ir")) return true;
798
+ if (lower.endsWith("ando") || lower.endsWith("iendo")) return true;
799
+ if (lower.endsWith("ado") || lower.endsWith("ido")) return true;
800
+ if (lower.endsWith("arse") || lower.endsWith("erse") || lower.endsWith("irse")) return true;
801
+ for (const char of word) {
802
+ if (isSpanishSpecificLetter(char)) return true;
803
+ }
804
+ return false;
805
+ }
806
+ var REFLEXIVE_SUFFIXES = ["se", "me", "te", "nos", "os"];
807
+ var AR_ENDINGS = [
808
+ // Gerund (-ando)
809
+ { ending: "ando", stem: "ar", confidence: 0.88, type: "gerund" },
810
+ // Past participle (-ado)
811
+ { ending: "ado", stem: "ar", confidence: 0.88, type: "participle" },
812
+ { ending: "ada", stem: "ar", confidence: 0.88, type: "participle" },
813
+ { ending: "ados", stem: "ar", confidence: 0.88, type: "participle" },
814
+ { ending: "adas", stem: "ar", confidence: 0.88, type: "participle" },
815
+ // Present indicative
816
+ { ending: "o", stem: "ar", confidence: 0.75, type: "present" },
817
+ // yo
818
+ { ending: "as", stem: "ar", confidence: 0.82, type: "present" },
819
+ // tú
820
+ { ending: "a", stem: "ar", confidence: 0.75, type: "present" },
821
+ // él/ella
822
+ { ending: "amos", stem: "ar", confidence: 0.85, type: "present" },
823
+ // nosotros
824
+ { ending: "\xE1is", stem: "ar", confidence: 0.85, type: "present" },
825
+ // vosotros
826
+ { ending: "ais", stem: "ar", confidence: 0.82, type: "present" },
827
+ // vosotros (no accent)
828
+ { ending: "an", stem: "ar", confidence: 0.8, type: "present" },
829
+ // ellos
830
+ // Preterite
831
+ { ending: "\xE9", stem: "ar", confidence: 0.85, type: "past" },
832
+ // yo
833
+ { ending: "aste", stem: "ar", confidence: 0.88, type: "past" },
834
+ // tú
835
+ { ending: "\xF3", stem: "ar", confidence: 0.82, type: "past" },
836
+ // él/ella
837
+ { ending: "amos", stem: "ar", confidence: 0.85, type: "past" },
838
+ // nosotros (same as present)
839
+ { ending: "asteis", stem: "ar", confidence: 0.88, type: "past" },
840
+ // vosotros
841
+ { ending: "aron", stem: "ar", confidence: 0.88, type: "past" },
842
+ // ellos
843
+ // Imperfect
844
+ { ending: "aba", stem: "ar", confidence: 0.88, type: "past" },
845
+ // yo/él
846
+ { ending: "abas", stem: "ar", confidence: 0.88, type: "past" },
847
+ // tú
848
+ { ending: "\xE1bamos", stem: "ar", confidence: 0.88, type: "past" },
849
+ // nosotros
850
+ { ending: "abamos", stem: "ar", confidence: 0.85, type: "past" },
851
+ // nosotros (no accent)
852
+ { ending: "abais", stem: "ar", confidence: 0.88, type: "past" },
853
+ // vosotros
854
+ { ending: "aban", stem: "ar", confidence: 0.88, type: "past" },
855
+ // ellos
856
+ // Subjunctive
857
+ { ending: "e", stem: "ar", confidence: 0.72, type: "subjunctive" },
858
+ // yo/él (ambiguous)
859
+ { ending: "es", stem: "ar", confidence: 0.78, type: "subjunctive" },
860
+ // tú
861
+ { ending: "emos", stem: "ar", confidence: 0.82, type: "subjunctive" },
862
+ // nosotros
863
+ { ending: "\xE9is", stem: "ar", confidence: 0.85, type: "subjunctive" },
864
+ // vosotros
865
+ { ending: "eis", stem: "ar", confidence: 0.82, type: "subjunctive" },
866
+ // vosotros (no accent)
867
+ { ending: "en", stem: "ar", confidence: 0.78, type: "subjunctive" },
868
+ // ellos
869
+ // Imperative
870
+ { ending: "a", stem: "ar", confidence: 0.75, type: "imperative" },
871
+ // tú (same as 3rd present)
872
+ { ending: "ad", stem: "ar", confidence: 0.85, type: "imperative" },
873
+ // vosotros
874
+ // Infinitive
875
+ { ending: "ar", stem: "ar", confidence: 0.92, type: "dictionary" }
876
+ ];
877
+ var ER_ENDINGS = [
878
+ // Gerund (-iendo)
879
+ { ending: "iendo", stem: "er", confidence: 0.88, type: "gerund" },
880
+ // Past participle (-ido)
881
+ { ending: "ido", stem: "er", confidence: 0.85, type: "participle" },
882
+ { ending: "ida", stem: "er", confidence: 0.85, type: "participle" },
883
+ { ending: "idos", stem: "er", confidence: 0.85, type: "participle" },
884
+ { ending: "idas", stem: "er", confidence: 0.85, type: "participle" },
885
+ // Present indicative
886
+ { ending: "o", stem: "er", confidence: 0.72, type: "present" },
887
+ // yo
888
+ { ending: "es", stem: "er", confidence: 0.78, type: "present" },
889
+ // tú
890
+ { ending: "e", stem: "er", confidence: 0.72, type: "present" },
891
+ // él/ella
892
+ { ending: "emos", stem: "er", confidence: 0.85, type: "present" },
893
+ // nosotros
894
+ { ending: "\xE9is", stem: "er", confidence: 0.85, type: "present" },
895
+ // vosotros
896
+ { ending: "eis", stem: "er", confidence: 0.82, type: "present" },
897
+ // vosotros (no accent)
898
+ { ending: "en", stem: "er", confidence: 0.78, type: "present" },
899
+ // ellos
900
+ // Preterite
901
+ { ending: "\xED", stem: "er", confidence: 0.85, type: "past" },
902
+ // yo
903
+ { ending: "iste", stem: "er", confidence: 0.88, type: "past" },
904
+ // tú
905
+ { ending: "i\xF3", stem: "er", confidence: 0.85, type: "past" },
906
+ // él/ella
907
+ { ending: "io", stem: "er", confidence: 0.82, type: "past" },
908
+ // él/ella (no accent)
909
+ { ending: "imos", stem: "er", confidence: 0.85, type: "past" },
910
+ // nosotros
911
+ { ending: "isteis", stem: "er", confidence: 0.88, type: "past" },
912
+ // vosotros
913
+ { ending: "ieron", stem: "er", confidence: 0.88, type: "past" },
914
+ // ellos
915
+ // Imperfect
916
+ { ending: "\xEDa", stem: "er", confidence: 0.88, type: "past" },
917
+ // yo/él
918
+ { ending: "ia", stem: "er", confidence: 0.85, type: "past" },
919
+ // yo/él (no accent)
920
+ { ending: "\xEDas", stem: "er", confidence: 0.88, type: "past" },
921
+ // tú
922
+ { ending: "ias", stem: "er", confidence: 0.85, type: "past" },
923
+ // tú (no accent)
924
+ { ending: "\xEDamos", stem: "er", confidence: 0.88, type: "past" },
925
+ // nosotros
926
+ { ending: "iamos", stem: "er", confidence: 0.85, type: "past" },
927
+ // nosotros (no accent)
928
+ { ending: "\xEDais", stem: "er", confidence: 0.88, type: "past" },
929
+ // vosotros
930
+ { ending: "iais", stem: "er", confidence: 0.85, type: "past" },
931
+ // vosotros (no accent)
932
+ { ending: "\xEDan", stem: "er", confidence: 0.88, type: "past" },
933
+ // ellos
934
+ { ending: "ian", stem: "er", confidence: 0.85, type: "past" },
935
+ // ellos (no accent)
936
+ // Infinitive
937
+ { ending: "er", stem: "er", confidence: 0.92, type: "dictionary" }
938
+ ];
939
+ var IR_ENDINGS = [
940
+ // Gerund (-iendo)
941
+ { ending: "iendo", stem: "ir", confidence: 0.88, type: "gerund" },
942
+ // Past participle (-ido)
943
+ { ending: "ido", stem: "ir", confidence: 0.85, type: "participle" },
944
+ { ending: "ida", stem: "ir", confidence: 0.85, type: "participle" },
945
+ { ending: "idos", stem: "ir", confidence: 0.85, type: "participle" },
946
+ { ending: "idas", stem: "ir", confidence: 0.85, type: "participle" },
947
+ // Present indicative
948
+ { ending: "o", stem: "ir", confidence: 0.72, type: "present" },
949
+ // yo
950
+ { ending: "es", stem: "ir", confidence: 0.78, type: "present" },
951
+ // tú
952
+ { ending: "e", stem: "ir", confidence: 0.72, type: "present" },
953
+ // él/ella
954
+ { ending: "imos", stem: "ir", confidence: 0.85, type: "present" },
955
+ // nosotros
956
+ { ending: "\xEDs", stem: "ir", confidence: 0.85, type: "present" },
957
+ // vosotros
958
+ { ending: "is", stem: "ir", confidence: 0.82, type: "present" },
959
+ // vosotros (no accent)
960
+ { ending: "en", stem: "ir", confidence: 0.78, type: "present" },
961
+ // ellos
962
+ // Preterite (same as -er)
963
+ { ending: "\xED", stem: "ir", confidence: 0.85, type: "past" },
964
+ // yo
965
+ { ending: "iste", stem: "ir", confidence: 0.88, type: "past" },
966
+ // tú
967
+ { ending: "i\xF3", stem: "ir", confidence: 0.85, type: "past" },
968
+ // él/ella
969
+ { ending: "io", stem: "ir", confidence: 0.82, type: "past" },
970
+ // él/ella (no accent)
971
+ { ending: "imos", stem: "ir", confidence: 0.85, type: "past" },
972
+ // nosotros
973
+ { ending: "isteis", stem: "ir", confidence: 0.88, type: "past" },
974
+ // vosotros
975
+ { ending: "ieron", stem: "ir", confidence: 0.88, type: "past" },
976
+ // ellos
977
+ // Imperfect (same as -er)
978
+ { ending: "\xEDa", stem: "ir", confidence: 0.88, type: "past" },
979
+ { ending: "ia", stem: "ir", confidence: 0.85, type: "past" },
980
+ { ending: "\xEDas", stem: "ir", confidence: 0.88, type: "past" },
981
+ { ending: "ias", stem: "ir", confidence: 0.85, type: "past" },
982
+ { ending: "\xEDamos", stem: "ir", confidence: 0.88, type: "past" },
983
+ { ending: "iamos", stem: "ir", confidence: 0.85, type: "past" },
984
+ { ending: "\xEDais", stem: "ir", confidence: 0.88, type: "past" },
985
+ { ending: "iais", stem: "ir", confidence: 0.85, type: "past" },
986
+ { ending: "\xEDan", stem: "ir", confidence: 0.88, type: "past" },
987
+ { ending: "ian", stem: "ir", confidence: 0.85, type: "past" },
988
+ // Infinitive
989
+ { ending: "ir", stem: "ir", confidence: 0.92, type: "dictionary" }
990
+ ];
991
+ var ALL_ENDINGS = [...AR_ENDINGS, ...ER_ENDINGS, ...IR_ENDINGS].sort(
992
+ (a, b) => b.ending.length - a.ending.length
993
+ );
994
+ var SpanishMorphologicalNormalizer = class {
995
+ constructor() {
996
+ this.language = "es";
997
+ }
998
+ /**
999
+ * Check if a word might be a Spanish verb that can be normalized.
1000
+ */
1001
+ isNormalizable(word) {
1002
+ if (word.length < 3) return false;
1003
+ return looksLikeSpanishVerb(word);
1004
+ }
1005
+ /**
1006
+ * Normalize a Spanish word to its infinitive form.
1007
+ */
1008
+ normalize(word) {
1009
+ const lower = word.toLowerCase();
1010
+ if (lower.endsWith("ar") || lower.endsWith("er") || lower.endsWith("ir")) {
1011
+ if (!REFLEXIVE_SUFFIXES.some(
1012
+ (s) => lower.endsWith(s + "ar") || lower.endsWith(s + "er") || lower.endsWith(s + "ir")
1013
+ )) {
1014
+ return noChange(word);
1015
+ }
1016
+ }
1017
+ const reflexiveResult = this.tryReflexiveNormalization(lower);
1018
+ if (reflexiveResult) return reflexiveResult;
1019
+ const conjugationResult = this.tryConjugationNormalization(lower);
1020
+ if (conjugationResult) return conjugationResult;
1021
+ return noChange(word);
1022
+ }
1023
+ /**
1024
+ * Try to normalize a reflexive verb.
1025
+ * Reflexive verbs end with -se, -me, -te, -nos, -os attached to infinitive.
1026
+ *
1027
+ * Examples:
1028
+ * mostrarse → mostrar
1029
+ * ocultarse → ocultar
1030
+ * esconderse → esconder
1031
+ */
1032
+ tryReflexiveNormalization(word) {
1033
+ for (const suffix of REFLEXIVE_SUFFIXES) {
1034
+ if (word.endsWith(suffix)) {
1035
+ const withoutReflexive = word.slice(0, -suffix.length);
1036
+ if (withoutReflexive.endsWith("ar") || withoutReflexive.endsWith("er") || withoutReflexive.endsWith("ir")) {
1037
+ return normalized(withoutReflexive, 0.88, {
1038
+ removedSuffixes: [suffix],
1039
+ conjugationType: "reflexive"
1040
+ });
1041
+ }
1042
+ const innerResult = this.tryConjugationNormalization(withoutReflexive);
1043
+ if (innerResult && innerResult.stem !== withoutReflexive) {
1044
+ return normalized(innerResult.stem, innerResult.confidence * 0.95, {
1045
+ removedSuffixes: [suffix, ...innerResult.metadata?.removedSuffixes || []],
1046
+ conjugationType: "reflexive"
1047
+ });
1048
+ }
1049
+ }
1050
+ }
1051
+ return null;
1052
+ }
1053
+ /**
1054
+ * Try to normalize a conjugated verb to its infinitive.
1055
+ */
1056
+ tryConjugationNormalization(word) {
1057
+ for (const rule of ALL_ENDINGS) {
1058
+ if (word.endsWith(rule.ending)) {
1059
+ const stemBase = word.slice(0, -rule.ending.length);
1060
+ if (stemBase.length < 2) continue;
1061
+ const infinitive = stemBase + rule.stem;
1062
+ return normalized(infinitive, rule.confidence, {
1063
+ removedSuffixes: [rule.ending],
1064
+ conjugationType: rule.type
1065
+ });
1066
+ }
1067
+ }
1068
+ return null;
1069
+ }
1070
+ };
1071
+ var spanishMorphologicalNormalizer = new SpanishMorphologicalNormalizer();
1072
+
1073
+ // src/generators/profiles/spanish.ts
1074
+ var spanishProfile = {
1075
+ code: "es",
1076
+ name: "Spanish",
1077
+ nativeName: "Espa\xF1ol",
1078
+ direction: "ltr",
1079
+ wordOrder: "SVO",
1080
+ markingStrategy: "preposition",
1081
+ usesSpaces: true,
1082
+ // Infinitive is standard for Spanish software UI (Guardar, Cancelar, Abrir)
1083
+ // This matches macOS, Windows, and web app conventions
1084
+ defaultVerbForm: "infinitive",
1085
+ verb: {
1086
+ position: "start",
1087
+ subjectDrop: true
1088
+ },
1089
+ references: {
1090
+ me: "yo",
1091
+ // "I/me"
1092
+ it: "ello",
1093
+ // "it"
1094
+ you: "t\xFA",
1095
+ // "you"
1096
+ result: "resultado",
1097
+ event: "evento",
1098
+ target: "objetivo",
1099
+ body: "cuerpo"
1100
+ },
1101
+ possessive: {
1102
+ marker: "de",
1103
+ // Spanish uses "de" for general possession
1104
+ markerPosition: "before-property",
1105
+ usePossessiveAdjectives: true,
1106
+ specialForms: {
1107
+ me: "mi",
1108
+ // "my" (possessive adjective)
1109
+ it: "su",
1110
+ // "its"
1111
+ you: "tu"
1112
+ // "your"
1113
+ },
1114
+ keywords: {
1115
+ mi: "me",
1116
+ tu: "you",
1117
+ su: "it"
1118
+ }
1119
+ },
1120
+ roleMarkers: {
1121
+ destination: { primary: "en", alternatives: ["sobre", "a"], position: "before" },
1122
+ source: { primary: "de", alternatives: ["desde"], position: "before" },
1123
+ patient: { primary: "", position: "before" },
1124
+ style: { primary: "con", position: "before" }
1125
+ },
1126
+ keywords: {
1127
+ // Class/Attribute operations
1128
+ toggle: { primary: "alternar", alternatives: ["cambiar", "conmutar"], normalized: "toggle" },
1129
+ add: { primary: "agregar", alternatives: ["a\xF1adir"], normalized: "add" },
1130
+ remove: {
1131
+ primary: "quitar",
1132
+ alternatives: ["eliminar", "remover", "sacar"],
1133
+ normalized: "remove"
1134
+ },
1135
+ // Content operations
1136
+ put: { primary: "poner", alternatives: ["colocar"], normalized: "put" },
1137
+ append: { primary: "a\xF1adir", normalized: "append" },
1138
+ prepend: { primary: "anteponer", normalized: "prepend" },
1139
+ take: { primary: "tomar", normalized: "take" },
1140
+ make: { primary: "hacer", alternatives: ["crear"], normalized: "make" },
1141
+ clone: { primary: "clonar", alternatives: ["copiar"], normalized: "clone" },
1142
+ swap: { primary: "intercambiar", alternatives: ["cambiar"], normalized: "swap" },
1143
+ morph: { primary: "transformar", alternatives: ["convertir"], normalized: "morph" },
1144
+ // Variable operations
1145
+ set: { primary: "establecer", alternatives: ["fijar", "definir"], normalized: "set" },
1146
+ get: { primary: "obtener", alternatives: ["conseguir"], normalized: "get" },
1147
+ increment: { primary: "incrementar", alternatives: ["aumentar"], normalized: "increment" },
1148
+ decrement: { primary: "decrementar", alternatives: ["disminuir"], normalized: "decrement" },
1149
+ log: { primary: "registrar", alternatives: ["imprimir"], normalized: "log" },
1150
+ // Visibility
1151
+ show: { primary: "mostrar", alternatives: ["ense\xF1ar"], normalized: "show" },
1152
+ hide: { primary: "ocultar", alternatives: ["esconder"], normalized: "hide" },
1153
+ transition: { primary: "transici\xF3n", alternatives: ["animar"], normalized: "transition" },
1154
+ // Events
1155
+ on: { primary: "en", alternatives: ["cuando", "al"], normalized: "on" },
1156
+ trigger: { primary: "disparar", alternatives: ["activar"], normalized: "trigger" },
1157
+ send: { primary: "enviar", normalized: "send" },
1158
+ // DOM focus
1159
+ focus: { primary: "enfocar", normalized: "focus" },
1160
+ blur: { primary: "desenfocar", normalized: "blur" },
1161
+ // Common event names (for event handler patterns)
1162
+ click: { primary: "clic", alternatives: ["hacer clic"], normalized: "click" },
1163
+ hover: { primary: "sobrevolar", alternatives: ["pasar por encima"], normalized: "hover" },
1164
+ submit: { primary: "env\xEDo", alternatives: ["enviar"], normalized: "submit" },
1165
+ input: { primary: "entrada", alternatives: ["introducir"], normalized: "input" },
1166
+ change: { primary: "cambio", alternatives: ["cambiar"], normalized: "change" },
1167
+ // Navigation
1168
+ go: { primary: "ir", alternatives: ["navegar"], normalized: "go" },
1169
+ // Async
1170
+ wait: { primary: "esperar", normalized: "wait" },
1171
+ fetch: { primary: "buscar", alternatives: ["obtener"], normalized: "fetch" },
1172
+ settle: { primary: "estabilizar", normalized: "settle" },
1173
+ // Control flow
1174
+ if: { primary: "si", normalized: "if" },
1175
+ when: { primary: "cuando", normalized: "when" },
1176
+ where: { primary: "donde", normalized: "where" },
1177
+ else: { primary: "sino", alternatives: ["de lo contrario"], normalized: "else" },
1178
+ repeat: { primary: "repetir", normalized: "repeat" },
1179
+ for: { primary: "para", normalized: "for" },
1180
+ while: { primary: "mientras", normalized: "while" },
1181
+ continue: { primary: "continuar", normalized: "continue" },
1182
+ halt: { primary: "detener", alternatives: ["parar"], normalized: "halt" },
1183
+ throw: { primary: "lanzar", alternatives: ["arrojar"], normalized: "throw" },
1184
+ call: { primary: "llamar", normalized: "call" },
1185
+ return: { primary: "retornar", alternatives: ["devolver"], normalized: "return" },
1186
+ then: { primary: "entonces", alternatives: ["luego", "despu\xE9s"], normalized: "then" },
1187
+ and: { primary: "y", alternatives: ["adem\xE1s", "tambi\xE9n"], normalized: "and" },
1188
+ end: { primary: "fin", alternatives: ["final", "terminar"], normalized: "end" },
1189
+ // Advanced
1190
+ js: { primary: "js", normalized: "js" },
1191
+ async: { primary: "as\xEDncrono", normalized: "async" },
1192
+ tell: { primary: "decir", normalized: "tell" },
1193
+ default: { primary: "predeterminar", alternatives: ["por defecto"], normalized: "default" },
1194
+ init: { primary: "iniciar", alternatives: ["inicializar"], normalized: "init" },
1195
+ behavior: { primary: "comportamiento", normalized: "behavior" },
1196
+ install: { primary: "instalar", normalized: "install" },
1197
+ measure: { primary: "medir", normalized: "measure" },
1198
+ // Modifiers
1199
+ into: { primary: "en", alternatives: ["dentro de"], normalized: "into" },
1200
+ before: { primary: "antes", normalized: "before" },
1201
+ after: { primary: "despu\xE9s", normalized: "after" },
1202
+ // Event modifiers (for repeat until event)
1203
+ until: { primary: "hasta", normalized: "until" },
1204
+ event: { primary: "evento", normalized: "event" },
1205
+ from: { primary: "de", alternatives: ["desde"], normalized: "from" }
1206
+ },
1207
+ eventHandler: {
1208
+ keyword: { primary: "al", alternatives: ["cuando", "en"], normalized: "on" },
1209
+ sourceMarker: { primary: "de", alternatives: ["desde"], position: "before" },
1210
+ // Event marker: al (when), used in SVO pattern
1211
+ // Pattern: al [event] [verb] [patient] en [destination?]
1212
+ // Example: al clic alternar .active en #button
1213
+ eventMarker: { primary: "al", alternatives: ["cuando"], position: "before" },
1214
+ temporalMarkers: ["cuando", "al"]
1215
+ // temporal conjunctions (when)
1216
+ }
1217
+ };
1218
+
1219
+ // src/tokenizers/spanish.ts
1220
+ var { isLetter: isSpanishLetter, isIdentifierChar: isSpanishIdentifierChar } = createLatinCharClassifiers(/[a-zA-ZáéíóúüñÁÉÍÓÚÜÑ]/);
1221
+ var SPANISH_TIME_UNITS = [
1222
+ { pattern: "milisegundos", suffix: "ms", length: 12, caseInsensitive: true },
1223
+ { pattern: "milisegundo", suffix: "ms", length: 11, caseInsensitive: true },
1224
+ { pattern: "segundos", suffix: "s", length: 8, caseInsensitive: true },
1225
+ { pattern: "segundo", suffix: "s", length: 7, caseInsensitive: true },
1226
+ { pattern: "minutos", suffix: "m", length: 7, caseInsensitive: true },
1227
+ { pattern: "minuto", suffix: "m", length: 6, caseInsensitive: true },
1228
+ { pattern: "horas", suffix: "h", length: 5, caseInsensitive: true },
1229
+ { pattern: "hora", suffix: "h", length: 4, caseInsensitive: true }
1230
+ ];
1231
+ var PREPOSITIONS = /* @__PURE__ */ new Set([
1232
+ "en",
1233
+ // in, on
1234
+ "a",
1235
+ // to
1236
+ "de",
1237
+ // of, from
1238
+ "desde",
1239
+ // from
1240
+ "hasta",
1241
+ // until, to
1242
+ "con",
1243
+ // with
1244
+ "sin",
1245
+ // without
1246
+ "por",
1247
+ // by, for
1248
+ "para",
1249
+ // for
1250
+ "sobre",
1251
+ // on, about
1252
+ "entre",
1253
+ // between
1254
+ "antes",
1255
+ // before
1256
+ "despu\xE9s",
1257
+ // after
1258
+ "despues",
1259
+ // after (no accent)
1260
+ "dentro",
1261
+ // inside
1262
+ "fuera",
1263
+ // outside
1264
+ "al",
1265
+ // a + el (contraction)
1266
+ "del"
1267
+ // de + el (contraction)
1268
+ ]);
1269
+ var SPANISH_EXTRAS = [
1270
+ // Values/Literals
1271
+ { native: "verdadero", normalized: "true" },
1272
+ { native: "falso", normalized: "false" },
1273
+ { native: "nulo", normalized: "null" },
1274
+ { native: "indefinido", normalized: "undefined" },
1275
+ // Positional
1276
+ { native: "primero", normalized: "first" },
1277
+ { native: "primera", normalized: "first" },
1278
+ { native: "\xFAltimo", normalized: "last" },
1279
+ { native: "ultima", normalized: "last" },
1280
+ { native: "siguiente", normalized: "next" },
1281
+ { native: "anterior", normalized: "previous" },
1282
+ { native: "cercano", normalized: "closest" },
1283
+ { native: "padre", normalized: "parent" },
1284
+ // Events
1285
+ { native: "clic", normalized: "click" },
1286
+ { native: "click", normalized: "click" },
1287
+ { native: "hacer clic", normalized: "click" },
1288
+ { native: "entrada", normalized: "input" },
1289
+ { native: "cambio", normalized: "change" },
1290
+ { native: "env\xEDo", normalized: "submit" },
1291
+ { native: "envio", normalized: "submit" },
1292
+ { native: "tecla abajo", normalized: "keydown" },
1293
+ { native: "tecla arriba", normalized: "keyup" },
1294
+ { native: "rat\xF3n encima", normalized: "mouseover" },
1295
+ { native: "raton encima", normalized: "mouseover" },
1296
+ { native: "rat\xF3n fuera", normalized: "mouseout" },
1297
+ { native: "raton fuera", normalized: "mouseout" },
1298
+ { native: "enfoque", normalized: "focus" },
1299
+ { native: "desenfoque", normalized: "blur" },
1300
+ { native: "carga", normalized: "load" },
1301
+ { native: "desplazamiento", normalized: "scroll" },
1302
+ // References
1303
+ { native: "yo", normalized: "me" },
1304
+ { native: "m\xED", normalized: "me" },
1305
+ { native: "mi", normalized: "me" },
1306
+ { native: "ello", normalized: "it" },
1307
+ { native: "resultado", normalized: "result" },
1308
+ { native: "objetivo", normalized: "target" },
1309
+ { native: "destino", normalized: "target" },
1310
+ // Time units
1311
+ { native: "segundo", normalized: "s" },
1312
+ { native: "segundos", normalized: "s" },
1313
+ { native: "milisegundo", normalized: "ms" },
1314
+ { native: "milisegundos", normalized: "ms" },
1315
+ { native: "minuto", normalized: "m" },
1316
+ { native: "minutos", normalized: "m" },
1317
+ { native: "hora", normalized: "h" },
1318
+ { native: "horas", normalized: "h" },
1319
+ // Multi-word phrases
1320
+ { native: "de lo contrario", normalized: "else" },
1321
+ { native: "hasta que", normalized: "until" },
1322
+ { native: "antes de", normalized: "before" },
1323
+ { native: "despu\xE9s de", normalized: "after" },
1324
+ { native: "despues de", normalized: "after" },
1325
+ { native: "dentro de", normalized: "into" },
1326
+ { native: "fuera de", normalized: "out" },
1327
+ // Accent variations not in profile
1328
+ { native: "asincrono", normalized: "async" },
1329
+ { native: "despues", normalized: "after" },
1330
+ // Command overrides (ensure correct mapping when profile has multiple meanings)
1331
+ { native: "a\xF1adir", normalized: "add" },
1332
+ // Profile may have this as 'append'
1333
+ // Synonyms not in profile
1334
+ { native: "toggle", normalized: "toggle" },
1335
+ { native: "borrar", normalized: "remove" },
1336
+ { native: "pon", normalized: "put" },
1337
+ { native: "crear", normalized: "make" },
1338
+ // Logical/conditional
1339
+ { native: "y", normalized: "and" },
1340
+ { native: "o", normalized: "or" },
1341
+ { native: "no", normalized: "not" },
1342
+ { native: "es", normalized: "is" },
1343
+ { native: "existe", normalized: "exists" },
1344
+ { native: "vac\xEDo", normalized: "empty" },
1345
+ { native: "vacio", normalized: "empty" }
1346
+ ];
1347
+ var SpanishTokenizer = class extends BaseTokenizer {
1348
+ constructor() {
1349
+ super();
1350
+ this.language = "es";
1351
+ this.direction = "ltr";
1352
+ this.initializeKeywordsFromProfile(spanishProfile, SPANISH_EXTRAS);
1353
+ this.normalizer = new SpanishMorphologicalNormalizer();
1354
+ }
1355
+ tokenize(input) {
1356
+ const tokens = [];
1357
+ let pos = 0;
1358
+ while (pos < input.length) {
1359
+ if (isWhitespace(input[pos])) {
1360
+ pos++;
1361
+ continue;
1362
+ }
1363
+ if (isSelectorStart(input[pos])) {
1364
+ const modifierToken = this.tryEventModifier(input, pos);
1365
+ if (modifierToken) {
1366
+ tokens.push(modifierToken);
1367
+ pos = modifierToken.position.end;
1368
+ continue;
1369
+ }
1370
+ const selectorToken = this.trySelector(input, pos);
1371
+ if (selectorToken) {
1372
+ tokens.push(selectorToken);
1373
+ pos = selectorToken.position.end;
1374
+ continue;
1375
+ }
1376
+ }
1377
+ if (isQuote(input[pos])) {
1378
+ const stringToken = this.tryString(input, pos);
1379
+ if (stringToken) {
1380
+ tokens.push(stringToken);
1381
+ pos = stringToken.position.end;
1382
+ continue;
1383
+ }
1384
+ }
1385
+ if (isUrlStart(input, pos)) {
1386
+ const urlToken = this.tryUrl(input, pos);
1387
+ if (urlToken) {
1388
+ tokens.push(urlToken);
1389
+ pos = urlToken.position.end;
1390
+ continue;
1391
+ }
1392
+ }
1393
+ if (isDigit(input[pos]) || input[pos] === "-" && pos + 1 < input.length && isDigit(input[pos + 1])) {
1394
+ const numberToken = this.extractSpanishNumber(input, pos);
1395
+ if (numberToken) {
1396
+ tokens.push(numberToken);
1397
+ pos = numberToken.position.end;
1398
+ continue;
1399
+ }
1400
+ }
1401
+ const varToken = this.tryVariableRef(input, pos);
1402
+ if (varToken) {
1403
+ tokens.push(varToken);
1404
+ pos = varToken.position.end;
1405
+ continue;
1406
+ }
1407
+ const phraseToken = this.tryMultiWordPhrase(input, pos);
1408
+ if (phraseToken) {
1409
+ tokens.push(phraseToken);
1410
+ pos = phraseToken.position.end;
1411
+ continue;
1412
+ }
1413
+ if (isSpanishLetter(input[pos])) {
1414
+ const wordToken = this.extractSpanishWord(input, pos);
1415
+ if (wordToken) {
1416
+ tokens.push(wordToken);
1417
+ pos = wordToken.position.end;
1418
+ continue;
1419
+ }
1420
+ }
1421
+ const operatorToken = this.tryOperator(input, pos);
1422
+ if (operatorToken) {
1423
+ tokens.push(operatorToken);
1424
+ pos = operatorToken.position.end;
1425
+ continue;
1426
+ }
1427
+ pos++;
1428
+ }
1429
+ return new TokenStreamImpl(tokens, "es");
1430
+ }
1431
+ classifyToken(token) {
1432
+ const lower = token.toLowerCase();
1433
+ if (PREPOSITIONS.has(lower)) return "particle";
1434
+ if (this.isKeyword(lower)) return "keyword";
1435
+ if (token.startsWith("#") || token.startsWith(".") || token.startsWith("[")) return "selector";
1436
+ if (token.startsWith('"') || token.startsWith("'")) return "literal";
1437
+ if (/^\d/.test(token)) return "literal";
1438
+ if (["==", "!=", "<=", ">=", "<", ">", "&&", "||", "!"].includes(token)) return "operator";
1439
+ return "identifier";
1440
+ }
1441
+ /**
1442
+ * Try to match multi-word phrases that function as single units.
1443
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
1444
+ * so they'll be matched before their constituent words.
1445
+ */
1446
+ tryMultiWordPhrase(input, pos) {
1447
+ for (const entry of this.profileKeywords) {
1448
+ if (!entry.native.includes(" ")) continue;
1449
+ const phrase = entry.native;
1450
+ const candidate = input.slice(pos, pos + phrase.length).toLowerCase();
1451
+ if (candidate === phrase.toLowerCase()) {
1452
+ const nextPos = pos + phrase.length;
1453
+ if (nextPos >= input.length || isWhitespace(input[nextPos]) || !isSpanishLetter(input[nextPos])) {
1454
+ return createToken(
1455
+ input.slice(pos, pos + phrase.length),
1456
+ "keyword",
1457
+ createPosition(pos, nextPos),
1458
+ entry.normalized
1459
+ );
1460
+ }
1461
+ }
1462
+ }
1463
+ return null;
1464
+ }
1465
+ /**
1466
+ * Extract a Spanish word.
1467
+ *
1468
+ * Uses morphological normalization to handle:
1469
+ * - Reflexive verbs (mostrarse → mostrar)
1470
+ * - Verb conjugations (alternando → alternar)
1471
+ */
1472
+ extractSpanishWord(input, startPos) {
1473
+ let pos = startPos;
1474
+ let word = "";
1475
+ while (pos < input.length && isSpanishIdentifierChar(input[pos])) {
1476
+ word += input[pos++];
1477
+ }
1478
+ if (!word) return null;
1479
+ const lower = word.toLowerCase();
1480
+ if (PREPOSITIONS.has(lower)) {
1481
+ return createToken(word, "particle", createPosition(startPos, pos));
1482
+ }
1483
+ const keywordEntry = this.lookupKeyword(lower);
1484
+ if (keywordEntry) {
1485
+ return createToken(word, "keyword", createPosition(startPos, pos), keywordEntry.normalized);
1486
+ }
1487
+ const morphToken = this.tryMorphKeywordMatch(lower, startPos, pos);
1488
+ if (morphToken) return morphToken;
1489
+ return createToken(word, "identifier", createPosition(startPos, pos));
1490
+ }
1491
+ /**
1492
+ * Extract a number, including Spanish time unit suffixes.
1493
+ */
1494
+ extractSpanishNumber(input, startPos) {
1495
+ return this.tryNumberWithTimeUnits(input, startPos, SPANISH_TIME_UNITS, {
1496
+ allowSign: true,
1497
+ skipWhitespace: true
1498
+ });
1499
+ }
1500
+ };
1501
+ var spanishTokenizer = new SpanishTokenizer();
1502
+
1503
+ // src/generators/profiles/spanishMexico.ts
1504
+ var spanishMexicoProfile = {
1505
+ code: "es-MX",
1506
+ name: "Spanish (Mexico)",
1507
+ nativeName: "Espa\xF1ol (M\xE9xico)",
1508
+ direction: "ltr",
1509
+ wordOrder: "SVO",
1510
+ markingStrategy: "preposition",
1511
+ usesSpaces: true,
1512
+ defaultVerbForm: "infinitive",
1513
+ extends: "es",
1514
+ // Inherit from base Spanish
1515
+ verb: {
1516
+ position: "start",
1517
+ subjectDrop: true
1518
+ },
1519
+ references: {
1520
+ me: "yo",
1521
+ it: "ello",
1522
+ you: "t\xFA",
1523
+ // Mexico uses tú (not vos like Argentina)
1524
+ result: "resultado",
1525
+ event: "evento",
1526
+ target: "objetivo",
1527
+ body: "cuerpo"
1528
+ },
1529
+ possessive: {
1530
+ marker: "de",
1531
+ markerPosition: "before-property",
1532
+ usePossessiveAdjectives: true,
1533
+ specialForms: {
1534
+ me: "mi",
1535
+ it: "su",
1536
+ you: "tu"
1537
+ },
1538
+ keywords: {
1539
+ mi: "me",
1540
+ tu: "you",
1541
+ su: "it"
1542
+ }
1543
+ },
1544
+ roleMarkers: {
1545
+ destination: { primary: "en", alternatives: ["sobre", "a"], position: "before" },
1546
+ source: { primary: "de", alternatives: ["desde"], position: "before" },
1547
+ patient: { primary: "", position: "before" },
1548
+ style: { primary: "con", position: "before" }
1549
+ },
1550
+ keywords: {
1551
+ // Class/Attribute operations - Mexican alternatives
1552
+ toggle: {
1553
+ primary: "alternar",
1554
+ alternatives: ["cambiar", "conmutar", "switchear"],
1555
+ // "switchear" is Spanglish common in MX tech
1556
+ normalized: "toggle"
1557
+ },
1558
+ add: { primary: "agregar", alternatives: ["a\xF1adir", "meter"], normalized: "add" },
1559
+ remove: {
1560
+ primary: "quitar",
1561
+ alternatives: ["eliminar", "borrar", "sacar"],
1562
+ // "borrar" more common in MX
1563
+ normalized: "remove"
1564
+ },
1565
+ // Content operations
1566
+ put: { primary: "poner", alternatives: ["colocar", "meter"], normalized: "put" },
1567
+ append: { primary: "a\xF1adir", normalized: "append" },
1568
+ prepend: { primary: "anteponer", normalized: "prepend" },
1569
+ take: { primary: "tomar", alternatives: ["agarrar"], normalized: "take" },
1570
+ // "agarrar" more MX
1571
+ make: { primary: "hacer", alternatives: ["crear"], normalized: "make" },
1572
+ clone: { primary: "clonar", alternatives: ["copiar"], normalized: "clone" },
1573
+ swap: { primary: "intercambiar", alternatives: ["cambiar"], normalized: "swap" },
1574
+ morph: { primary: "transformar", alternatives: ["convertir"], normalized: "morph" },
1575
+ // Variable operations
1576
+ set: { primary: "establecer", alternatives: ["fijar", "definir", "setear"], normalized: "set" },
1577
+ get: { primary: "obtener", alternatives: ["conseguir", "jalar"], normalized: "get" },
1578
+ increment: {
1579
+ primary: "incrementar",
1580
+ alternatives: ["aumentar", "subir"],
1581
+ normalized: "increment"
1582
+ },
1583
+ decrement: {
1584
+ primary: "decrementar",
1585
+ alternatives: ["disminuir", "bajar"],
1586
+ normalized: "decrement"
1587
+ },
1588
+ log: { primary: "registrar", alternatives: ["imprimir", "loguear"], normalized: "log" },
1589
+ // Visibility
1590
+ show: { primary: "mostrar", alternatives: ["ense\xF1ar"], normalized: "show" },
1591
+ hide: { primary: "ocultar", alternatives: ["esconder"], normalized: "hide" },
1592
+ transition: { primary: "transici\xF3n", alternatives: ["animar"], normalized: "transition" },
1593
+ // Events
1594
+ on: { primary: "en", alternatives: ["cuando", "al"], normalized: "on" },
1595
+ trigger: { primary: "disparar", alternatives: ["activar"], normalized: "trigger" },
1596
+ send: { primary: "enviar", alternatives: ["mandar"], normalized: "send" },
1597
+ // "mandar" more MX
1598
+ // DOM focus
1599
+ focus: { primary: "enfocar", normalized: "focus" },
1600
+ blur: { primary: "desenfocar", normalized: "blur" },
1601
+ // Common event names
1602
+ click: { primary: "clic", alternatives: ["hacer clic", "dar clic"], normalized: "click" },
1603
+ hover: { primary: "sobrevolar", alternatives: ["pasar encima"], normalized: "hover" },
1604
+ submit: { primary: "env\xEDo", alternatives: ["enviar"], normalized: "submit" },
1605
+ input: { primary: "entrada", alternatives: ["introducir"], normalized: "input" },
1606
+ change: { primary: "cambio", alternatives: ["cambiar"], normalized: "change" },
1607
+ // Navigation
1608
+ go: { primary: "ir", alternatives: ["navegar"], normalized: "go" },
1609
+ // Async - Mexican variants
1610
+ wait: {
1611
+ primary: "esperar",
1612
+ alternatives: ["ahorita", "aguantar"],
1613
+ // "ahorita" is distinctly Mexican
1614
+ normalized: "wait"
1615
+ },
1616
+ fetch: {
1617
+ primary: "buscar",
1618
+ alternatives: ["obtener", "jalar", "traer"],
1619
+ // "jalar" (pull) common in MX tech
1620
+ normalized: "fetch"
1621
+ },
1622
+ settle: { primary: "estabilizar", normalized: "settle" },
1623
+ // Control flow
1624
+ if: { primary: "si", normalized: "if" },
1625
+ when: { primary: "cuando", normalized: "when" },
1626
+ where: { primary: "donde", normalized: "where" },
1627
+ else: { primary: "sino", alternatives: ["de lo contrario", "si no"], normalized: "else" },
1628
+ repeat: { primary: "repetir", normalized: "repeat" },
1629
+ for: { primary: "para", normalized: "for" },
1630
+ while: { primary: "mientras", normalized: "while" },
1631
+ continue: { primary: "continuar", alternatives: ["seguir"], normalized: "continue" },
1632
+ halt: { primary: "detener", alternatives: ["parar"], normalized: "halt" },
1633
+ throw: {
1634
+ primary: "lanzar",
1635
+ alternatives: ["aventar", "arrojar"],
1636
+ // "aventar" is Mexican
1637
+ normalized: "throw"
1638
+ },
1639
+ call: { primary: "llamar", normalized: "call" },
1640
+ return: { primary: "retornar", alternatives: ["devolver", "regresar"], normalized: "return" },
1641
+ then: { primary: "entonces", alternatives: ["luego", "despu\xE9s"], normalized: "then" },
1642
+ and: { primary: "y", alternatives: ["adem\xE1s", "tambi\xE9n"], normalized: "and" },
1643
+ end: { primary: "fin", alternatives: ["final", "terminar"], normalized: "end" },
1644
+ // Advanced
1645
+ js: { primary: "js", normalized: "js" },
1646
+ async: { primary: "as\xEDncrono", normalized: "async" },
1647
+ tell: { primary: "decir", normalized: "tell" },
1648
+ default: { primary: "predeterminar", alternatives: ["por defecto"], normalized: "default" },
1649
+ init: { primary: "iniciar", alternatives: ["inicializar", "arrancar"], normalized: "init" },
1650
+ behavior: { primary: "comportamiento", normalized: "behavior" },
1651
+ install: { primary: "instalar", normalized: "install" },
1652
+ measure: { primary: "medir", normalized: "measure" },
1653
+ // Modifiers
1654
+ into: { primary: "en", alternatives: ["dentro de"], normalized: "into" },
1655
+ before: { primary: "antes", normalized: "before" },
1656
+ after: { primary: "despu\xE9s", normalized: "after" },
1657
+ // Event modifiers
1658
+ until: { primary: "hasta", normalized: "until" },
1659
+ event: { primary: "evento", normalized: "event" },
1660
+ from: { primary: "de", alternatives: ["desde"], normalized: "from" }
1661
+ },
1662
+ eventHandler: {
1663
+ keyword: { primary: "al", alternatives: ["cuando", "en"], normalized: "on" },
1664
+ sourceMarker: { primary: "de", alternatives: ["desde"], position: "before" },
1665
+ eventMarker: { primary: "al", alternatives: ["cuando"], position: "before" },
1666
+ temporalMarkers: ["cuando", "al"]
1667
+ }
1668
+ };
1669
+
1670
+ // src/languages/es-MX.ts
1671
+ registerLanguage("es-MX", spanishTokenizer, spanishMexicoProfile);
1672
+ export {
1673
+ spanishMexicoProfile,
1674
+ spanishTokenizer
1675
+ };
1676
+ //# sourceMappingURL=es-MX.js.map