@lokascript/i18n 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +286 -0
  2. package/dist/browser.cjs +7669 -0
  3. package/dist/browser.cjs.map +1 -0
  4. package/dist/browser.d.cts +50 -0
  5. package/dist/browser.d.ts +50 -0
  6. package/dist/browser.js +7592 -0
  7. package/dist/browser.js.map +1 -0
  8. package/dist/hyperfixi-i18n.min.js +2 -0
  9. package/dist/hyperfixi-i18n.min.js.map +1 -0
  10. package/dist/hyperfixi-i18n.mjs +8558 -0
  11. package/dist/hyperfixi-i18n.mjs.map +1 -0
  12. package/dist/index.cjs +14205 -0
  13. package/dist/index.cjs.map +1 -0
  14. package/dist/index.d.cts +947 -0
  15. package/dist/index.d.ts +947 -0
  16. package/dist/index.js +14095 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/transformer-Ckask-yw.d.cts +1041 -0
  19. package/dist/transformer-Ckask-yw.d.ts +1041 -0
  20. package/package.json +84 -0
  21. package/src/browser.ts +122 -0
  22. package/src/compatibility/browser-tests/grammar-demo.spec.ts +169 -0
  23. package/src/constants.ts +366 -0
  24. package/src/dictionaries/ar.ts +233 -0
  25. package/src/dictionaries/bn.ts +156 -0
  26. package/src/dictionaries/de.ts +233 -0
  27. package/src/dictionaries/derive.ts +515 -0
  28. package/src/dictionaries/en.ts +237 -0
  29. package/src/dictionaries/es.ts +233 -0
  30. package/src/dictionaries/fr.ts +233 -0
  31. package/src/dictionaries/hi.ts +270 -0
  32. package/src/dictionaries/id.ts +233 -0
  33. package/src/dictionaries/index.ts +238 -0
  34. package/src/dictionaries/it.ts +233 -0
  35. package/src/dictionaries/ja.ts +233 -0
  36. package/src/dictionaries/ko.ts +233 -0
  37. package/src/dictionaries/ms.ts +276 -0
  38. package/src/dictionaries/pl.ts +239 -0
  39. package/src/dictionaries/pt.ts +237 -0
  40. package/src/dictionaries/qu.ts +233 -0
  41. package/src/dictionaries/ru.ts +270 -0
  42. package/src/dictionaries/sw.ts +233 -0
  43. package/src/dictionaries/th.ts +156 -0
  44. package/src/dictionaries/tl.ts +276 -0
  45. package/src/dictionaries/tr.ts +233 -0
  46. package/src/dictionaries/uk.ts +270 -0
  47. package/src/dictionaries/vi.ts +210 -0
  48. package/src/dictionaries/zh.ts +233 -0
  49. package/src/enhanced-i18n.test.ts +454 -0
  50. package/src/enhanced-i18n.ts +713 -0
  51. package/src/examples/new-languages.ts +326 -0
  52. package/src/formatting.test.ts +213 -0
  53. package/src/formatting.ts +416 -0
  54. package/src/grammar/direct-mappings.ts +353 -0
  55. package/src/grammar/grammar.test.ts +1053 -0
  56. package/src/grammar/index.ts +59 -0
  57. package/src/grammar/profiles/index.ts +860 -0
  58. package/src/grammar/transformer.ts +1318 -0
  59. package/src/grammar/types.ts +630 -0
  60. package/src/index.ts +202 -0
  61. package/src/new-languages.test.ts +389 -0
  62. package/src/parser/analyze-conflicts.test.ts +229 -0
  63. package/src/parser/ar.ts +40 -0
  64. package/src/parser/create-provider.ts +309 -0
  65. package/src/parser/de.ts +36 -0
  66. package/src/parser/es.ts +31 -0
  67. package/src/parser/fr.ts +31 -0
  68. package/src/parser/id.ts +34 -0
  69. package/src/parser/index.ts +50 -0
  70. package/src/parser/ja.ts +36 -0
  71. package/src/parser/ko.ts +37 -0
  72. package/src/parser/locale-manager.test.ts +198 -0
  73. package/src/parser/locale-manager.ts +197 -0
  74. package/src/parser/parser-integration.test.ts +439 -0
  75. package/src/parser/pt.ts +37 -0
  76. package/src/parser/qu.ts +37 -0
  77. package/src/parser/sw.ts +37 -0
  78. package/src/parser/tr.ts +38 -0
  79. package/src/parser/types.ts +113 -0
  80. package/src/parser/zh.ts +38 -0
  81. package/src/plugins/vite.ts +224 -0
  82. package/src/plugins/webpack.ts +124 -0
  83. package/src/pluralization.test.ts +197 -0
  84. package/src/pluralization.ts +393 -0
  85. package/src/runtime.ts +441 -0
  86. package/src/ssr-integration.ts +225 -0
  87. package/src/test-setup.ts +195 -0
  88. package/src/translation-validation.test.ts +171 -0
  89. package/src/translator.test.ts +252 -0
  90. package/src/translator.ts +297 -0
  91. package/src/types.ts +209 -0
  92. package/src/utils/locale.ts +190 -0
  93. package/src/utils/tokenizer-adapter.ts +469 -0
  94. package/src/utils/tokenizer.ts +19 -0
  95. package/src/validators/index.ts +174 -0
  96. package/src/validators/schema.ts +129 -0
@@ -0,0 +1,1318 @@
1
+ /**
2
+ * Grammar-Aware Transformer
3
+ *
4
+ * Transforms hyperscript statements between languages using the
5
+ * generalized grammar system. The key insight is that semantic
6
+ * roles are universal - only their surface realization differs.
7
+ */
8
+
9
+ import type {
10
+ LanguageProfile,
11
+ ParsedStatement,
12
+ ParsedElement,
13
+ SemanticRole,
14
+ GrammarRule,
15
+ LineMetadata,
16
+ } from './types';
17
+ import { reorderRoles, insertMarkers, joinTokens } from './types';
18
+ import { getProfile, profiles } from './profiles';
19
+ import { hasDirectMapping, getDirectMapping } from './direct-mappings';
20
+ import { dictionaries } from '../dictionaries';
21
+ import { findInDictionary, translateFromEnglish } from '../types';
22
+ import { ENGLISH_MODIFIER_ROLES, CONDITIONAL_KEYWORDS, THEN_KEYWORDS } from '../constants';
23
+
24
+ // =============================================================================
25
+ // Compound Statement Handling
26
+ // =============================================================================
27
+
28
+ /**
29
+ * English commands that can start a new statement.
30
+ * Used to detect command boundaries in space-chained statements.
31
+ */
32
+ const COMMAND_KEYWORDS = new Set([
33
+ 'add',
34
+ 'append',
35
+ 'async',
36
+ 'beep',
37
+ 'break',
38
+ 'call',
39
+ 'continue',
40
+ 'decrement',
41
+ 'default',
42
+ 'exit',
43
+ 'fetch',
44
+ 'for',
45
+ 'get',
46
+ 'go',
47
+ 'halt',
48
+ 'hide',
49
+ 'if',
50
+ 'increment',
51
+ 'install',
52
+ 'js',
53
+ 'log',
54
+ 'make',
55
+ 'measure',
56
+ 'morph',
57
+ 'pick',
58
+ 'process',
59
+ 'push',
60
+ 'put',
61
+ 'remove',
62
+ 'render',
63
+ 'repeat',
64
+ 'replace',
65
+ 'return',
66
+ 'send',
67
+ 'set',
68
+ 'settle',
69
+ 'show',
70
+ 'swap',
71
+ 'take',
72
+ 'tell',
73
+ 'throw',
74
+ 'toggle',
75
+ 'transition',
76
+ 'trigger',
77
+ 'unless',
78
+ 'wait',
79
+ ]);
80
+
81
+ /**
82
+ * Get all command keywords including translated ones for a locale.
83
+ */
84
+ function getCommandKeywordsForLocale(locale: string): Set<string> {
85
+ const keywords = new Set(COMMAND_KEYWORDS);
86
+
87
+ // Add translated command keywords from dictionaries
88
+ const dict = dictionaries[locale];
89
+ if (dict?.commands) {
90
+ Object.values(dict.commands).forEach(cmd => {
91
+ if (typeof cmd === 'string') {
92
+ keywords.add(cmd.toLowerCase());
93
+ }
94
+ });
95
+ }
96
+
97
+ return keywords;
98
+ }
99
+
100
+ /**
101
+ * Split a compound statement into parts at "then" boundaries, newlines,
102
+ * AND command keyword boundaries.
103
+ *
104
+ * Example: "on click wait 1s then increment #count then toggle .active"
105
+ * Returns: ["on click wait 1s", "increment #count", "toggle .active"]
106
+ *
107
+ * Example: "on click\n increment #count\n toggle .highlight"
108
+ * Returns: ["on click", "increment #count", "toggle .highlight"]
109
+ *
110
+ * Example: "wait 2s toggle .highlight"
111
+ * Returns: ["wait 2s", "toggle .highlight"]
112
+ */
113
+ function splitCompoundStatement(input: string, sourceLocale: string): string[] {
114
+ // First, split on newlines (preserving non-empty lines)
115
+ const lines = input
116
+ .split(/\n/)
117
+ .map(line => line.trim())
118
+ .filter(line => line.length > 0);
119
+
120
+ // If we have multiple lines, treat each as a separate part
121
+ // (but still need to handle "then" within each line)
122
+ let parts: string[] = [];
123
+
124
+ for (const line of lines) {
125
+ const lineParts = splitOnThen(line, sourceLocale);
126
+ // Further split each part on command boundaries
127
+ for (const part of lineParts) {
128
+ const commandParts = splitOnCommandBoundaries(part, sourceLocale);
129
+ parts.push(...commandParts);
130
+ }
131
+ }
132
+
133
+ return parts;
134
+ }
135
+
136
+ // =============================================================================
137
+ // Line Structure Preservation
138
+ // =============================================================================
139
+
140
+ /**
141
+ * Result of splitting with preserved line metadata.
142
+ */
143
+ interface SplitWithMetadataResult {
144
+ /** The processed parts (commands/statements) */
145
+ parts: string[];
146
+ /** Metadata for each original line (for reconstruction) */
147
+ lineMetadata: LineMetadata[];
148
+ /** Mapping from parts back to their original line indices */
149
+ partToLineIndex: number[];
150
+ }
151
+
152
+ /**
153
+ * Split a compound statement while preserving line structure metadata.
154
+ * This tracks indentation and blank lines for reconstruction.
155
+ */
156
+ function splitCompoundStatementWithMetadata(
157
+ input: string,
158
+ sourceLocale: string
159
+ ): SplitWithMetadataResult {
160
+ const rawLines = input.split('\n');
161
+ const lineMetadata: LineMetadata[] = [];
162
+ const parts: string[] = [];
163
+ const partToLineIndex: number[] = [];
164
+
165
+ for (let lineIndex = 0; lineIndex < rawLines.length; lineIndex++) {
166
+ const rawLine = rawLines[lineIndex];
167
+
168
+ // Capture leading whitespace
169
+ const indentMatch = rawLine.match(/^(\s*)/);
170
+ const originalIndent = indentMatch ? indentMatch[1] : '';
171
+ const trimmed = rawLine.trim();
172
+
173
+ lineMetadata.push({
174
+ content: trimmed,
175
+ originalIndent,
176
+ isBlank: trimmed.length === 0,
177
+ });
178
+
179
+ if (trimmed.length > 0) {
180
+ // Process non-empty lines for "then" and command boundaries
181
+ const lineParts = splitOnThen(trimmed, sourceLocale);
182
+ for (const part of lineParts) {
183
+ const commandParts = splitOnCommandBoundaries(part, sourceLocale);
184
+ for (const cmdPart of commandParts) {
185
+ parts.push(cmdPart);
186
+ partToLineIndex.push(lineIndex);
187
+ }
188
+ }
189
+ }
190
+ }
191
+
192
+ return { parts, lineMetadata, partToLineIndex };
193
+ }
194
+
195
+ /**
196
+ * Normalize indentation to consistent 4-space levels.
197
+ * Preserves relative indentation structure while standardizing spacing.
198
+ */
199
+ function normalizeIndentation(lineMetadata: LineMetadata[]): string[] {
200
+ // Find non-blank lines with indentation
201
+ const indentedLines = lineMetadata.filter(m => !m.isBlank && m.originalIndent.length > 0);
202
+
203
+ if (indentedLines.length === 0) {
204
+ // No indented lines, return empty strings
205
+ return lineMetadata.map(() => '');
206
+ }
207
+
208
+ // Find minimum non-zero indent (the base unit)
209
+ const indentLengths = indentedLines.map(m => {
210
+ // Convert tabs to 4 spaces for consistent measurement
211
+ const normalized = m.originalIndent.replace(/\t/g, ' ');
212
+ return normalized.length;
213
+ });
214
+ const minIndent = Math.min(...indentLengths);
215
+ const baseUnit = minIndent > 0 ? minIndent : 4;
216
+
217
+ // Normalize each line's indentation
218
+ return lineMetadata.map(meta => {
219
+ if (meta.isBlank) {
220
+ return ''; // Blank lines get no indentation
221
+ }
222
+ if (meta.originalIndent.length === 0) {
223
+ return ''; // No original indent
224
+ }
225
+
226
+ // Convert tabs and calculate level
227
+ const normalized = meta.originalIndent.replace(/\t/g, ' ');
228
+ const level = Math.round(normalized.length / baseUnit);
229
+ return ' '.repeat(level); // 4 spaces per level
230
+ });
231
+ }
232
+
233
+ /**
234
+ * Reconstruct output with preserved line structure.
235
+ * Maps transformed parts back to their original lines with proper indentation.
236
+ */
237
+ function reconstructWithLineStructure(
238
+ transformedParts: string[],
239
+ lineMetadata: LineMetadata[],
240
+ partToLineIndex: number[],
241
+ targetThen: string
242
+ ): string {
243
+ // If there's only one non-blank line, simple case
244
+ const nonBlankCount = lineMetadata.filter(m => !m.isBlank).length;
245
+ if (nonBlankCount <= 1 && transformedParts.length <= 1) {
246
+ const normalizedIndents = normalizeIndentation(lineMetadata);
247
+ const result: string[] = [];
248
+
249
+ for (let i = 0; i < lineMetadata.length; i++) {
250
+ if (lineMetadata[i].isBlank) {
251
+ result.push('');
252
+ } else if (transformedParts.length > 0) {
253
+ result.push(normalizedIndents[i] + transformedParts[0]);
254
+ }
255
+ }
256
+ return result.join('\n');
257
+ }
258
+
259
+ // Normalize indentation
260
+ const normalizedIndents = normalizeIndentation(lineMetadata);
261
+
262
+ // Group transformed parts by their original line
263
+ const partsPerLine: Map<number, string[]> = new Map();
264
+ for (let i = 0; i < transformedParts.length; i++) {
265
+ const lineIdx = partToLineIndex[i];
266
+ if (!partsPerLine.has(lineIdx)) {
267
+ partsPerLine.set(lineIdx, []);
268
+ }
269
+ partsPerLine.get(lineIdx)!.push(transformedParts[i]);
270
+ }
271
+
272
+ // Build result lines
273
+ const result: string[] = [];
274
+ for (let i = 0; i < lineMetadata.length; i++) {
275
+ const meta = lineMetadata[i];
276
+ const indent = normalizedIndents[i];
277
+
278
+ if (meta.isBlank) {
279
+ result.push('');
280
+ } else {
281
+ const lineParts = partsPerLine.get(i) || [];
282
+ if (lineParts.length > 0) {
283
+ // Join multiple parts on same line with "then"
284
+ const lineContent = lineParts.join(` ${targetThen} `);
285
+ result.push(indent + lineContent);
286
+ }
287
+ }
288
+ }
289
+
290
+ return result.join('\n');
291
+ }
292
+
293
+ /**
294
+ * Split a statement on command keyword boundaries.
295
+ * E.g., "wait 2s toggle .highlight" → ["wait 2s", "toggle .highlight"]
296
+ *
297
+ * Special cases:
298
+ * - "on <event> <command>" stays together (event handler with first command)
299
+ * - Modifiers like "to", "from" don't trigger splits
300
+ */
301
+ function splitOnCommandBoundaries(input: string, sourceLocale: string): string[] {
302
+ const commandKeywords = getCommandKeywordsForLocale(sourceLocale);
303
+ const tokens = input.split(/\s+/);
304
+
305
+ if (tokens.length === 0) return [input];
306
+
307
+ const parts: string[] = [];
308
+ let currentPart: string[] = [];
309
+
310
+ // Check if this starts with an event handler pattern (on/em/en/bei/で + event)
311
+ const firstTokenLower = tokens[0]?.toLowerCase();
312
+ const isEventHandler = EVENT_KEYWORDS.has(firstTokenLower);
313
+
314
+ // If it's an event handler, the first command after the event is part of the handler
315
+ // So we need to track whether we've seen the first command yet
316
+ let seenFirstCommand = !isEventHandler; // If not event handler, we're already past the "first command" phase
317
+
318
+ for (let i = 0; i < tokens.length; i++) {
319
+ const token = tokens[i];
320
+ const lowerToken = token.toLowerCase();
321
+
322
+ // If this is a command keyword and we already have tokens in current part
323
+ if (commandKeywords.has(lowerToken) && currentPart.length > 0) {
324
+ // Check if the previous token looks like it could end a command
325
+ const prevToken = currentPart[currentPart.length - 1];
326
+ const prevLower = prevToken.toLowerCase();
327
+
328
+ // Don't split if the previous token is a modifier like "to", "from", "by", etc.
329
+ const modifiers = new Set([
330
+ 'to',
331
+ 'into',
332
+ 'from',
333
+ 'with',
334
+ 'by',
335
+ 'as',
336
+ 'at',
337
+ 'in',
338
+ 'on',
339
+ 'of',
340
+ 'over',
341
+ ]);
342
+
343
+ // For event handlers: don't split before the first command
344
+ // E.g., "on click wait 1s" should stay together
345
+ if (!seenFirstCommand) {
346
+ // Mark that we've now seen the first command
347
+ seenFirstCommand = true;
348
+ currentPart.push(token);
349
+ continue;
350
+ }
351
+
352
+ if (!modifiers.has(prevLower) && !commandKeywords.has(prevLower)) {
353
+ // This looks like a command boundary - save current part and start new one
354
+ parts.push(currentPart.join(' '));
355
+ currentPart = [token];
356
+ continue;
357
+ }
358
+ }
359
+
360
+ currentPart.push(token);
361
+ }
362
+
363
+ // Add the last part
364
+ if (currentPart.length > 0) {
365
+ parts.push(currentPart.join(' '));
366
+ }
367
+
368
+ return parts.filter(p => p.length > 0);
369
+ }
370
+
371
+ /**
372
+ * Split a single line on "then" keywords.
373
+ */
374
+ function splitOnThen(input: string, sourceLocale: string): string[] {
375
+ // Build regex pattern from all known "then" keywords
376
+ const thenKeywords = Array.from(THEN_KEYWORDS);
377
+
378
+ // Add any dictionary-specific "then" keyword for the source locale
379
+ const sourceDict = sourceLocale === 'en' ? null : dictionaries[sourceLocale];
380
+ if (sourceDict?.modifiers?.then) {
381
+ thenKeywords.push(sourceDict.modifiers.then);
382
+ }
383
+ // Also check logical.then since some dictionaries put it there
384
+ if ((sourceDict?.logical as Record<string, string>)?.then) {
385
+ thenKeywords.push((sourceDict?.logical as Record<string, string>).then);
386
+ }
387
+
388
+ // Create a regex that matches any "then" keyword as a whole word
389
+ // Use word boundaries to avoid matching "then" inside other words
390
+ const escapedKeywords = thenKeywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
391
+ const pattern = new RegExp(`\\s+(${escapedKeywords.join('|')})\\s+`, 'gi');
392
+
393
+ // Split on "then" keywords
394
+ const parts = input.split(pattern).filter(part => {
395
+ // Filter out the "then" keywords themselves (captured by the group)
396
+ const lowerPart = part.toLowerCase().trim();
397
+ return lowerPart && !thenKeywords.some(k => k.toLowerCase() === lowerPart);
398
+ });
399
+
400
+ return parts.map(p => p.trim()).filter(p => p.length > 0);
401
+ }
402
+
403
+ /**
404
+ * Get the "then" keyword in the target language.
405
+ * Checks both modifiers and logical sections since dictionaries vary.
406
+ */
407
+ function getTargetThenKeyword(targetLocale: string): string {
408
+ if (targetLocale === 'en') return 'then';
409
+
410
+ const targetDict = dictionaries[targetLocale];
411
+ if (!targetDict) return 'then';
412
+
413
+ // Check modifiers first, then logical (dictionaries vary)
414
+ return (
415
+ targetDict.modifiers?.then || (targetDict.logical as Record<string, string>)?.then || 'then'
416
+ );
417
+ }
418
+
419
+ // =============================================================================
420
+ // Derived Constants from Profiles
421
+ // =============================================================================
422
+
423
+ /**
424
+ * Derive event keywords from all language profiles.
425
+ * This replaces the hardcoded eventKeywords array.
426
+ */
427
+ function deriveEventKeywordsFromProfiles(): Set<string> {
428
+ const keywords = new Set<string>();
429
+
430
+ // Add 'on' as the English default
431
+ keywords.add('on');
432
+
433
+ // Extract event markers from all profiles
434
+ for (const profile of Object.values(profiles)) {
435
+ for (const marker of profile.markers) {
436
+ if (marker.role === 'event') {
437
+ // Strip hyphen notation and add
438
+ const form = marker.form.replace(/^-|-$/g, '').toLowerCase();
439
+ if (form) keywords.add(form);
440
+
441
+ // Add alternatives
442
+ marker.alternatives?.forEach(alt => {
443
+ const altForm = alt.replace(/^-|-$/g, '').toLowerCase();
444
+ if (altForm) keywords.add(altForm);
445
+ });
446
+ }
447
+ }
448
+ }
449
+
450
+ return keywords;
451
+ }
452
+
453
+ /** Event keywords derived from language profiles */
454
+ const EVENT_KEYWORDS = deriveEventKeywordsFromProfiles();
455
+
456
+ // =============================================================================
457
+ // Helper: Dynamic Modifier Map
458
+ // =============================================================================
459
+
460
+ /**
461
+ * Generates a lookup map for semantic roles based on the language profile.
462
+ * Maps markers (e.g., 'to', 'に', 'into', 'إلى') to their semantic roles.
463
+ * This enables parsing non-English input by using the profile's markers.
464
+ */
465
+ function generateModifierMap(profile: LanguageProfile): Record<string, SemanticRole> {
466
+ const map: Record<string, SemanticRole> = {};
467
+
468
+ // Map markers to roles from the profile
469
+ profile.markers.forEach(marker => {
470
+ // Strip hyphen notation for suffix/prefix markers
471
+ const form = marker.form.replace(/^-|-$/g, '').toLowerCase();
472
+ if (form) {
473
+ map[form] = marker.role;
474
+ }
475
+
476
+ // Map alternatives if they exist (e.g., Korean vowel harmony variants)
477
+ marker.alternatives?.forEach(alt => {
478
+ const altForm = alt.replace(/^-|-$/g, '').toLowerCase();
479
+ if (altForm) {
480
+ map[altForm] = marker.role;
481
+ }
482
+ });
483
+ });
484
+
485
+ // Add English modifiers as fallback (don't override profile-specific markers)
486
+ for (const [key, role] of Object.entries(ENGLISH_MODIFIER_ROLES)) {
487
+ if (!(key in map)) {
488
+ map[key] = role;
489
+ }
490
+ }
491
+
492
+ return map;
493
+ }
494
+
495
+ // =============================================================================
496
+ // Statement Parser
497
+ // =============================================================================
498
+
499
+ /**
500
+ * Parse a hyperscript statement into semantic roles
501
+ * This is the core analysis step that identifies WHAT each part means
502
+ */
503
+ export function parseStatement(input: string, sourceLocale: string = 'en'): ParsedStatement | null {
504
+ const profile = getProfile(sourceLocale);
505
+ if (!profile) return null;
506
+
507
+ const tokens = tokenize(input, profile);
508
+
509
+ // Identify statement type and extract roles
510
+ const statementType = identifyStatementType(tokens, profile);
511
+
512
+ switch (statementType) {
513
+ case 'event-handler':
514
+ return parseEventHandler(tokens, profile);
515
+ case 'command':
516
+ return parseCommand(tokens, profile);
517
+ case 'conditional':
518
+ return parseConditional(tokens, profile);
519
+ default:
520
+ return null;
521
+ }
522
+ }
523
+
524
+ /**
525
+ * Known suffixes that may attach to words without spaces.
526
+ * These are split off during tokenization for proper parsing.
527
+ */
528
+ const ATTACHED_SUFFIXES: Record<string, string[]> = {
529
+ // Chinese: 时 (time/when) often attaches to events like 点击时 (when clicking)
530
+ zh: ['时', '的', '地', '得'],
531
+ // Japanese: Some particles may attach in casual writing
532
+ ja: [],
533
+ // Korean: Particles sometimes written without spaces
534
+ ko: [],
535
+ };
536
+
537
+ /**
538
+ * Known prefixes that may attach to words without spaces.
539
+ */
540
+ const ATTACHED_PREFIXES: Record<string, string[]> = {
541
+ // Chinese: 当 (when) sometimes written attached
542
+ zh: ['当'],
543
+ // Arabic: Prepositions that attach
544
+ ar: ['بـ', 'كـ', 'و'],
545
+ };
546
+
547
+ /**
548
+ * Post-process tokens to split attached suffixes/prefixes.
549
+ * E.g., "点击时" → ["点击", "时"]
550
+ */
551
+ function splitAttachedAffixes(tokens: string[], locale: string): string[] {
552
+ const suffixes = ATTACHED_SUFFIXES[locale] || [];
553
+ const prefixes = ATTACHED_PREFIXES[locale] || [];
554
+
555
+ if (suffixes.length === 0 && prefixes.length === 0) {
556
+ return tokens;
557
+ }
558
+
559
+ const result: string[] = [];
560
+
561
+ for (const token of tokens) {
562
+ // Skip CSS selectors and numbers
563
+ if (/^[#.<@]/.test(token) || /^\d+/.test(token)) {
564
+ result.push(token);
565
+ continue;
566
+ }
567
+
568
+ let processed = token;
569
+ let prefix = '';
570
+ let suffix = '';
571
+
572
+ // Check for attached prefixes
573
+ for (const p of prefixes) {
574
+ if (processed.startsWith(p) && processed.length > p.length) {
575
+ prefix = p;
576
+ processed = processed.slice(p.length);
577
+ break;
578
+ }
579
+ }
580
+
581
+ // Check for attached suffixes
582
+ for (const s of suffixes) {
583
+ if (processed.endsWith(s) && processed.length > s.length) {
584
+ suffix = s;
585
+ processed = processed.slice(0, -s.length);
586
+ break;
587
+ }
588
+ }
589
+
590
+ // Add tokens in order: prefix, main, suffix
591
+ if (prefix) result.push(prefix);
592
+ if (processed) result.push(processed);
593
+ if (suffix) result.push(suffix);
594
+ }
595
+
596
+ return result;
597
+ }
598
+
599
+ /**
600
+ * Simple tokenizer that handles:
601
+ * - Keywords (from dictionary)
602
+ * - CSS selectors (#id, .class, <tag/>)
603
+ * - String literals
604
+ * - Numbers
605
+ * - Attached suffixes/prefixes (language-specific)
606
+ */
607
+ function tokenize(input: string, profile: LanguageProfile): string[] {
608
+ // Split on whitespace, preserving selectors and strings
609
+ const tokens: string[] = [];
610
+ let current = '';
611
+ let inSelector = false;
612
+ let selectorDepth = 0;
613
+
614
+ for (let i = 0; i < input.length; i++) {
615
+ const char = input[i];
616
+
617
+ // Track CSS selector context
618
+ if (char === '<') {
619
+ inSelector = true;
620
+ selectorDepth++;
621
+ } else if (char === '>' && inSelector) {
622
+ selectorDepth--;
623
+ if (selectorDepth === 0) inSelector = false;
624
+ }
625
+
626
+ // Split on whitespace unless in selector
627
+ if (/\s/.test(char) && !inSelector) {
628
+ if (current) {
629
+ tokens.push(current);
630
+ current = '';
631
+ }
632
+ } else {
633
+ current += char;
634
+ }
635
+ }
636
+
637
+ if (current) {
638
+ tokens.push(current);
639
+ }
640
+
641
+ // Post-process to split attached affixes for languages that need it
642
+ return splitAttachedAffixes(tokens, profile.code);
643
+ }
644
+
645
+ /**
646
+ * Identify what type of statement this is
647
+ */
648
+ function identifyStatementType(
649
+ tokens: string[],
650
+ profile: LanguageProfile
651
+ ): 'event-handler' | 'command' | 'conditional' | 'unknown' {
652
+ if (tokens.length === 0) return 'unknown';
653
+
654
+ const firstToken = tokens[0].toLowerCase();
655
+
656
+ // Check for event handler
657
+ const eventMarker = profile.markers.find(m => m.role === 'event' && m.position === 'preposition');
658
+ if (eventMarker && firstToken === eventMarker.form.toLowerCase()) {
659
+ return 'event-handler';
660
+ }
661
+
662
+ // Check if first token is a known event keyword (derived from profiles)
663
+ if (EVENT_KEYWORDS.has(firstToken)) {
664
+ return 'event-handler';
665
+ }
666
+
667
+ // Check for conditional using shared constants
668
+ if (CONDITIONAL_KEYWORDS.has(firstToken)) {
669
+ return 'conditional';
670
+ }
671
+
672
+ return 'command';
673
+ }
674
+
675
+ /**
676
+ * Parse an event handler statement
677
+ * Pattern: on {event} {command} {target?} {modifiers?}
678
+ *
679
+ * Now handles modifiers like "by 3" in "on click increment #count by 3"
680
+ */
681
+ function parseEventHandler(tokens: string[], profile: LanguageProfile): ParsedStatement {
682
+ const roles = new Map<SemanticRole, ParsedElement>();
683
+
684
+ // Skip the event keyword (e.g., 'on', 'で', '当', etc.) - derived from profiles
685
+ let startIndex = EVENT_KEYWORDS.has(tokens[0]?.toLowerCase()) ? 1 : 0;
686
+
687
+ // Next token is the event
688
+ if (tokens[startIndex]) {
689
+ roles.set('event', {
690
+ role: 'event',
691
+ value: tokens[startIndex],
692
+ });
693
+ startIndex++;
694
+ }
695
+
696
+ // Next token is typically the action
697
+ if (tokens[startIndex]) {
698
+ roles.set('action', {
699
+ role: 'action',
700
+ value: tokens[startIndex],
701
+ });
702
+ startIndex++;
703
+ }
704
+
705
+ // Parse remaining tokens with modifier awareness (like parseCommand does)
706
+ // This handles "by 3" in "on click increment #count by 3"
707
+ if (tokens[startIndex]) {
708
+ const modifierMap = generateModifierMap(profile);
709
+ let currentRole: SemanticRole = 'patient';
710
+ let currentValue: string[] = [];
711
+
712
+ for (let i = startIndex; i < tokens.length; i++) {
713
+ const token = tokens[i];
714
+ const mappedRole = modifierMap[token.toLowerCase()];
715
+
716
+ if (mappedRole) {
717
+ // Save previous role
718
+ if (currentValue.length > 0) {
719
+ const value = currentValue.join(' ');
720
+ roles.set(currentRole, {
721
+ role: currentRole,
722
+ value,
723
+ isSelector: /^[#.<@]/.test(value),
724
+ });
725
+ }
726
+ currentRole = mappedRole;
727
+ currentValue = [];
728
+ } else {
729
+ currentValue.push(token);
730
+ }
731
+ }
732
+
733
+ // Save final role
734
+ if (currentValue.length > 0) {
735
+ const value = currentValue.join(' ');
736
+ roles.set(currentRole, {
737
+ role: currentRole,
738
+ value,
739
+ isSelector: /^[#.<@]/.test(value),
740
+ });
741
+ }
742
+ }
743
+
744
+ return {
745
+ type: 'event-handler',
746
+ roles,
747
+ original: tokens.join(' '),
748
+ };
749
+ }
750
+
751
+ /**
752
+ * Parse a command statement
753
+ * Pattern: {command} {args...}
754
+ */
755
+ function parseCommand(tokens: string[], profile: LanguageProfile): ParsedStatement {
756
+ const roles = new Map<SemanticRole, ParsedElement>();
757
+
758
+ if (tokens.length === 0) {
759
+ return { type: 'command', roles, original: '' };
760
+ }
761
+
762
+ // First token is the command
763
+ roles.set('action', {
764
+ role: 'action',
765
+ value: tokens[0],
766
+ });
767
+
768
+ // Generate dynamic modifier map from language profile
769
+ // This enables parsing non-English input (e.g., Japanese に, Korean 에, Arabic إلى)
770
+ const modifierMap = generateModifierMap(profile);
771
+
772
+ let currentRole: SemanticRole = 'patient';
773
+ let currentValue: string[] = [];
774
+
775
+ for (let i = 1; i < tokens.length; i++) {
776
+ const token = tokens[i];
777
+ const mappedRole = modifierMap[token.toLowerCase()];
778
+
779
+ if (mappedRole) {
780
+ // Save previous role
781
+ if (currentValue.length > 0) {
782
+ const value = currentValue.join(' ');
783
+ roles.set(currentRole, {
784
+ role: currentRole,
785
+ value,
786
+ isSelector: /^[#.<@]/.test(value),
787
+ });
788
+ }
789
+ currentRole = mappedRole;
790
+ currentValue = [];
791
+ } else {
792
+ currentValue.push(token);
793
+ }
794
+ }
795
+
796
+ // Save final role
797
+ if (currentValue.length > 0) {
798
+ const value = currentValue.join(' ');
799
+ roles.set(currentRole, {
800
+ role: currentRole,
801
+ value,
802
+ isSelector: /^[#.<@]/.test(value),
803
+ });
804
+ }
805
+
806
+ return {
807
+ type: 'command',
808
+ roles,
809
+ original: tokens.join(' '),
810
+ };
811
+ }
812
+
813
+ /**
814
+ * Parse a conditional statement
815
+ */
816
+ function parseConditional(tokens: string[], _profile: LanguageProfile): ParsedStatement {
817
+ const roles = new Map<SemanticRole, ParsedElement>();
818
+
819
+ // First token is the 'if' keyword
820
+ roles.set('action', {
821
+ role: 'action',
822
+ value: tokens[0],
823
+ });
824
+
825
+ // Find 'then' to split condition from body - using shared constants
826
+ const thenIndex = tokens.findIndex(t => THEN_KEYWORDS.has(t.toLowerCase()));
827
+
828
+ if (thenIndex > 1) {
829
+ const conditionValue = tokens.slice(1, thenIndex).join(' ');
830
+ roles.set('condition', {
831
+ role: 'condition',
832
+ value: conditionValue,
833
+ });
834
+ }
835
+
836
+ return {
837
+ type: 'conditional',
838
+ roles,
839
+ original: tokens.join(' '),
840
+ };
841
+ }
842
+
843
+ // =============================================================================
844
+ // Translation
845
+ // =============================================================================
846
+
847
+ /**
848
+ * Translate words using dictionary with type-safe access.
849
+ */
850
+ function translateWord(word: string, sourceLocale: string, targetLocale: string): string {
851
+ // Don't translate CSS selectors
852
+ if (/^[#.<@]/.test(word)) {
853
+ return word;
854
+ }
855
+
856
+ // Don't translate numbers
857
+ if (/^\d+/.test(word)) {
858
+ return word;
859
+ }
860
+
861
+ const sourceDict = sourceLocale === 'en' ? null : dictionaries[sourceLocale];
862
+ const targetDict = dictionaries[targetLocale];
863
+
864
+ if (!targetDict) return word;
865
+
866
+ // If source is not English, first map to English using type-safe lookup
867
+ let englishWord = word;
868
+ if (sourceDict) {
869
+ const found = findInDictionary(sourceDict, word);
870
+ if (found) {
871
+ englishWord = found.englishKey;
872
+ }
873
+ }
874
+
875
+ // Now map English to target locale using type-safe lookup
876
+ const translated = translateFromEnglish(targetDict, englishWord);
877
+ return translated ?? word;
878
+ }
879
+
880
+ /**
881
+ * Possessive markers for each language.
882
+ * Used to transform "X's Y" patterns to target language structure.
883
+ */
884
+ const POSSESSIVE_MARKERS: Record<
885
+ string,
886
+ { type: 'prefix' | 'suffix' | 'preposition'; marker: string }
887
+ > = {
888
+ en: { type: 'suffix', marker: "'s" },
889
+ es: { type: 'preposition', marker: 'de' },
890
+ pt: { type: 'preposition', marker: 'de' },
891
+ fr: { type: 'preposition', marker: 'de' },
892
+ de: { type: 'preposition', marker: 'von' },
893
+ ja: { type: 'suffix', marker: 'の' },
894
+ ko: { type: 'suffix', marker: '의' },
895
+ zh: { type: 'suffix', marker: '的' },
896
+ ar: { type: 'preposition', marker: 'لـ' },
897
+ tr: { type: 'suffix', marker: "'ın" },
898
+ id: { type: 'preposition', marker: 'dari' },
899
+ qu: { type: 'suffix', marker: '-pa' },
900
+ sw: { type: 'preposition', marker: 'ya' },
901
+ };
902
+
903
+ /**
904
+ * Transform possessive 's syntax to target language.
905
+ *
906
+ * Examples:
907
+ * me's value → mi valor (Spanish - pronoun becomes possessive adjective)
908
+ * #button's textContent → textContent de #button (Spanish - prepositional)
909
+ * me's value → 私の値 (Japanese - の particle)
910
+ */
911
+ function translatePossessive(token: string, sourceLocale: string, targetLocale: string): string {
912
+ // Check for 's possessive pattern
913
+ const possessiveMatch = token.match(/^(.+)'s$/i);
914
+ if (!possessiveMatch) {
915
+ return token;
916
+ }
917
+
918
+ const owner = possessiveMatch[1];
919
+ const targetMarker = POSSESSIVE_MARKERS[targetLocale] || POSSESSIVE_MARKERS.en;
920
+
921
+ // Check if owner is a pronoun that has a possessive form
922
+ const pronounPossessives: Record<string, string> = {
923
+ me: 'my',
924
+ it: 'its',
925
+ you: 'your',
926
+ };
927
+
928
+ const lowerOwner = owner.toLowerCase();
929
+ if (pronounPossessives[lowerOwner]) {
930
+ // Convert "me's" to "my" then translate
931
+ const possessiveForm = pronounPossessives[lowerOwner];
932
+ return translateWord(possessiveForm, 'en', targetLocale);
933
+ }
934
+
935
+ // For selectors and other owners, translate owner and apply target possessive marker
936
+ const translatedOwner = translateWord(owner, sourceLocale, targetLocale);
937
+
938
+ switch (targetMarker.type) {
939
+ case 'suffix':
940
+ // Japanese/Korean/Chinese: owner + marker (e.g., #buttonの, #button의)
941
+ return `${translatedOwner}${targetMarker.marker}`;
942
+ case 'preposition':
943
+ // Will be handled by caller - return marker + owner format
944
+ // Store as special format to be processed later
945
+ return `__POSS__${targetMarker.marker}__${translatedOwner}__POSS__`;
946
+ default:
947
+ return `${translatedOwner}'s`;
948
+ }
949
+ }
950
+
951
+ /**
952
+ * Translate a multi-word value, translating each word individually.
953
+ * Handles possessives like "my value" → "mi valor" in Spanish.
954
+ * Also handles 's possessive syntax like "me's value" → "mi valor".
955
+ */
956
+ function translateMultiWordValue(
957
+ value: string,
958
+ sourceLocale: string,
959
+ targetLocale: string
960
+ ): string {
961
+ // If it's a single word, check for possessive then translate
962
+ if (!value.includes(' ')) {
963
+ // Check for possessive 's
964
+ if (value.includes("'s")) {
965
+ return translatePossessive(value, sourceLocale, targetLocale);
966
+ }
967
+ return translateWord(value, sourceLocale, targetLocale);
968
+ }
969
+
970
+ // Split into words and translate each
971
+ const words = value.split(/\s+/);
972
+ const translated: string[] = [];
973
+ let i = 0;
974
+
975
+ while (i < words.length) {
976
+ const word = words[i];
977
+
978
+ // Check for possessive 's pattern FIRST (e.g., "me's value", "#button's textContent")
979
+ // This must come before selector check because "#button's" starts with #
980
+ if (word.includes("'s")) {
981
+ const possessiveResult = translatePossessive(word, sourceLocale, targetLocale);
982
+
983
+ // Check if it's a prepositional possessive that needs reordering
984
+ const prepMatch = possessiveResult.match(/^__POSS__(.+)__(.+)__POSS__$/);
985
+ if (prepMatch && i + 1 < words.length) {
986
+ // Prepositional: "X's Y" → "Y marker X" (e.g., "textContent de #button")
987
+ const marker = prepMatch[1];
988
+ const owner = prepMatch[2];
989
+ const property = words[i + 1];
990
+ const translatedProperty = translateWord(property, sourceLocale, targetLocale);
991
+ translated.push(`${translatedProperty} ${marker} ${owner}`);
992
+ i += 2; // Skip property since we consumed it
993
+ continue;
994
+ } else if (prepMatch) {
995
+ // No property following - just output owner with marker prefix
996
+ const marker = prepMatch[1];
997
+ const owner = prepMatch[2];
998
+ translated.push(`${marker} ${owner}`);
999
+ i++;
1000
+ continue;
1001
+ }
1002
+
1003
+ // Suffix-style possessive (Japanese, Korean, etc.) or pronoun
1004
+ translated.push(possessiveResult);
1005
+ i++;
1006
+ continue;
1007
+ }
1008
+
1009
+ // Skip pure CSS selectors and numbers (but NOT possessives which were handled above)
1010
+ if (/^[#.<@]/.test(word) || /^\d+/.test(word)) {
1011
+ translated.push(word);
1012
+ i++;
1013
+ continue;
1014
+ }
1015
+
1016
+ // Skip quoted strings
1017
+ if (/^["'].*["']$/.test(word)) {
1018
+ translated.push(word);
1019
+ i++;
1020
+ continue;
1021
+ }
1022
+
1023
+ translated.push(translateWord(word, sourceLocale, targetLocale));
1024
+ i++;
1025
+ }
1026
+
1027
+ return translated.join(' ');
1028
+ }
1029
+
1030
+ /**
1031
+ * Translate all elements in a parsed statement
1032
+ */
1033
+ function translateElements(
1034
+ parsed: ParsedStatement,
1035
+ sourceLocale: string,
1036
+ targetLocale: string
1037
+ ): void {
1038
+ for (const [_role, element] of parsed.roles) {
1039
+ // Always process possessive 's syntax, even for selectors
1040
+ // E.g., "#button's textContent" should translate the possessive
1041
+ if (element.value.includes("'s")) {
1042
+ element.translated = translateMultiWordValue(element.value, sourceLocale, targetLocale);
1043
+ } else if (!element.isSelector && !element.isLiteral) {
1044
+ element.translated = translateMultiWordValue(element.value, sourceLocale, targetLocale);
1045
+ } else {
1046
+ element.translated = element.value;
1047
+ }
1048
+ }
1049
+ }
1050
+
1051
+ // =============================================================================
1052
+ // Main Transformer
1053
+ // =============================================================================
1054
+
1055
+ export class GrammarTransformer {
1056
+ private sourceProfile: LanguageProfile;
1057
+ private targetProfile: LanguageProfile;
1058
+
1059
+ constructor(sourceLocale: string = 'en', targetLocale: string) {
1060
+ const source = getProfile(sourceLocale);
1061
+ const target = getProfile(targetLocale);
1062
+
1063
+ if (!source) throw new Error(`Unknown source locale: ${sourceLocale}`);
1064
+ if (!target) throw new Error(`Unknown target locale: ${targetLocale}`);
1065
+
1066
+ this.sourceProfile = source;
1067
+ this.targetProfile = target;
1068
+ }
1069
+
1070
+ /**
1071
+ * Transform a hyperscript statement from source to target language.
1072
+ * Handles compound statements with "then" by splitting, transforming each part,
1073
+ * and rejoining with the target language's "then" keyword.
1074
+ *
1075
+ * For multi-line input, preserves line structure (indentation, blank lines).
1076
+ */
1077
+ transform(input: string): string {
1078
+ const targetThen = getTargetThenKeyword(this.targetProfile.code);
1079
+
1080
+ // Check if input has multi-line structure worth preserving
1081
+ const hasMultiLineStructure = input.includes('\n');
1082
+
1083
+ if (hasMultiLineStructure) {
1084
+ // Multi-line case - preserve structure (indentation, blank lines)
1085
+ const { parts, lineMetadata, partToLineIndex } = splitCompoundStatementWithMetadata(
1086
+ input,
1087
+ this.sourceProfile.code
1088
+ );
1089
+
1090
+ const transformedParts = parts.map(part => this.transformSingle(part));
1091
+
1092
+ return reconstructWithLineStructure(
1093
+ transformedParts,
1094
+ lineMetadata,
1095
+ partToLineIndex,
1096
+ targetThen
1097
+ );
1098
+ }
1099
+
1100
+ // Single-line case - use existing logic
1101
+ const parts = splitCompoundStatement(input, this.sourceProfile.code);
1102
+
1103
+ if (parts.length > 1) {
1104
+ const transformedParts = parts.map(part => this.transformSingle(part));
1105
+ return transformedParts.join(` ${targetThen} `);
1106
+ }
1107
+
1108
+ // Single statement (no "then" splitting needed)
1109
+ return this.transformSingle(input);
1110
+ }
1111
+
1112
+ /**
1113
+ * Transform a single hyperscript statement (no compound "then" chains).
1114
+ */
1115
+ private transformSingle(input: string): string {
1116
+ // 1. Parse into semantic roles
1117
+ const parsed = parseStatement(input, this.sourceProfile.code);
1118
+ if (!parsed) {
1119
+ return input; // Return unchanged if parsing fails
1120
+ }
1121
+
1122
+ // 2. Translate individual words
1123
+ translateElements(parsed, this.sourceProfile.code, this.targetProfile.code);
1124
+
1125
+ // 3. Find applicable rule
1126
+ const rule = this.findRule(parsed);
1127
+
1128
+ // 4. Apply transformation
1129
+ if (rule?.transform.custom) {
1130
+ return rule.transform.custom(parsed, this.targetProfile);
1131
+ }
1132
+
1133
+ // 5. Reorder according to target language's canonical order
1134
+ const roleOrder = rule?.transform.roleOrder || this.targetProfile.canonicalOrder;
1135
+ const reordered = reorderRoles(parsed.roles, roleOrder);
1136
+
1137
+ // 6. Insert grammatical markers
1138
+ const shouldInsertMarkers = rule?.transform.insertMarkers ?? true;
1139
+ if (shouldInsertMarkers) {
1140
+ const result = insertMarkers(
1141
+ reordered,
1142
+ this.targetProfile.markers,
1143
+ this.targetProfile.adpositionType
1144
+ );
1145
+ // Use joinTokens for proper suffix/prefix attachment (Turkish -i, Quechua -ta, etc.)
1146
+ return joinTokens(result);
1147
+ }
1148
+
1149
+ // 7. Join without markers (still use joinTokens for consistency)
1150
+ return joinTokens(reordered.map(e => e.translated || e.value));
1151
+ }
1152
+
1153
+ /**
1154
+ * Find the best matching rule for this statement
1155
+ */
1156
+ private findRule(parsed: ParsedStatement): GrammarRule | undefined {
1157
+ if (!this.targetProfile.rules) return undefined;
1158
+
1159
+ const matchingRules = this.targetProfile.rules
1160
+ .filter(rule => this.matchesRule(parsed, rule))
1161
+ .sort((a, b) => b.priority - a.priority);
1162
+
1163
+ return matchingRules[0];
1164
+ }
1165
+
1166
+ /**
1167
+ * Check if a parsed statement matches a rule
1168
+ */
1169
+ private matchesRule(parsed: ParsedStatement, rule: GrammarRule): boolean {
1170
+ const { match } = rule;
1171
+
1172
+ // Check required roles
1173
+ for (const role of match.requiredRoles) {
1174
+ if (!parsed.roles.has(role)) {
1175
+ return false;
1176
+ }
1177
+ }
1178
+
1179
+ // Check command match if specified
1180
+ if (match.commands && match.commands.length > 0) {
1181
+ const action = parsed.roles.get('action');
1182
+ if (!action) return false;
1183
+
1184
+ const actionValue = action.value.toLowerCase();
1185
+ if (!match.commands.some(cmd => cmd.toLowerCase() === actionValue)) {
1186
+ return false;
1187
+ }
1188
+ }
1189
+
1190
+ // Check custom predicate
1191
+ if (match.predicate && !match.predicate(parsed)) {
1192
+ return false;
1193
+ }
1194
+
1195
+ return true;
1196
+ }
1197
+ }
1198
+
1199
+ // =============================================================================
1200
+ // Convenience Functions
1201
+ // =============================================================================
1202
+
1203
+ /**
1204
+ * Transform hyperscript from English to target language
1205
+ */
1206
+ export function toLocale(input: string, targetLocale: string): string {
1207
+ const transformer = new GrammarTransformer('en', targetLocale);
1208
+ return transformer.transform(input);
1209
+ }
1210
+
1211
+ /**
1212
+ * Transform hyperscript from source language to English
1213
+ */
1214
+ export function toEnglish(input: string, sourceLocale: string): string {
1215
+ const transformer = new GrammarTransformer(sourceLocale, 'en');
1216
+ return transformer.transform(input);
1217
+ }
1218
+
1219
+ /**
1220
+ * Transform between any two languages.
1221
+ *
1222
+ * Uses direct translation for supported language pairs (ja↔zh, es↔pt, ko↔ja),
1223
+ * falling back to English pivot for other pairs.
1224
+ */
1225
+ export function translate(input: string, sourceLocale: string, targetLocale: string): string {
1226
+ if (sourceLocale === targetLocale) return input;
1227
+ if (sourceLocale === 'en') return toLocale(input, targetLocale);
1228
+ if (targetLocale === 'en') return toEnglish(input, sourceLocale);
1229
+
1230
+ // Try direct translation for supported pairs
1231
+ if (hasDirectMapping(sourceLocale, targetLocale)) {
1232
+ return translateDirect(input, sourceLocale, targetLocale);
1233
+ }
1234
+
1235
+ // Fallback: Via English pivot
1236
+ const english = toEnglish(input, sourceLocale);
1237
+ return toLocale(english, targetLocale);
1238
+ }
1239
+
1240
+ /**
1241
+ * Direct translation between language pairs without English pivot.
1242
+ * More accurate for closely related languages (ja↔zh, es↔pt).
1243
+ */
1244
+ function translateDirect(input: string, sourceLocale: string, targetLocale: string): string {
1245
+ const mapping = getDirectMapping(sourceLocale, targetLocale);
1246
+ if (!mapping) {
1247
+ // Fallback to pivot translation
1248
+ return toLocale(toEnglish(input, sourceLocale), targetLocale);
1249
+ }
1250
+
1251
+ // Tokenize input
1252
+ const tokens = input.split(/\s+/);
1253
+
1254
+ // Translate each token using direct mapping
1255
+ const translated = tokens.map(token => {
1256
+ // Preserve CSS selectors and literals
1257
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('@')) {
1258
+ return token;
1259
+ }
1260
+ if (token.startsWith('"') || token.startsWith("'")) {
1261
+ return token;
1262
+ }
1263
+
1264
+ // Look up in direct mapping
1265
+ const directTranslation = mapping.words[token];
1266
+ if (directTranslation) {
1267
+ return directTranslation;
1268
+ }
1269
+
1270
+ // Check for suffix-attached tokens (e.g., "#count-ta" in Quechua)
1271
+ const suffixMatch = token.match(/^(.+?)(-.+)$/);
1272
+ if (suffixMatch) {
1273
+ const [, base, suffix] = suffixMatch;
1274
+ const translatedBase = mapping.words[base] || base;
1275
+ return translatedBase + suffix;
1276
+ }
1277
+
1278
+ // Return unchanged if no mapping found
1279
+ return token;
1280
+ });
1281
+
1282
+ return translated.join(' ');
1283
+ }
1284
+
1285
+ // =============================================================================
1286
+ // Examples (for testing)
1287
+ // =============================================================================
1288
+
1289
+ export const examples = {
1290
+ english: {
1291
+ eventHandler: 'on click increment #count',
1292
+ putInto: 'put my value into #output',
1293
+ toggle: 'toggle .active',
1294
+ wait: 'wait 2 seconds',
1295
+ },
1296
+
1297
+ // Expected outputs (approximate, for reference)
1298
+ japanese: {
1299
+ eventHandler: '#count を クリック で 増加',
1300
+ putInto: '私の 値 を #output に 置く',
1301
+ toggle: '.active を 切り替え',
1302
+ wait: '2秒 待つ',
1303
+ },
1304
+
1305
+ chinese: {
1306
+ eventHandler: '当 点击 时 增加 #count',
1307
+ putInto: '把 我的值 放 到 #output',
1308
+ toggle: '切换 .active',
1309
+ wait: '等待 2秒',
1310
+ },
1311
+
1312
+ arabic: {
1313
+ eventHandler: 'زِد #count عند النقر',
1314
+ putInto: 'ضع قيمتي في #output',
1315
+ toggle: 'بدّل .active',
1316
+ wait: 'انتظر ثانيتين',
1317
+ },
1318
+ };