@lokascript/i18n 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +286 -0
- package/dist/browser.cjs +7669 -0
- package/dist/browser.cjs.map +1 -0
- package/dist/browser.d.cts +50 -0
- package/dist/browser.d.ts +50 -0
- package/dist/browser.js +7592 -0
- package/dist/browser.js.map +1 -0
- package/dist/hyperfixi-i18n.min.js +2 -0
- package/dist/hyperfixi-i18n.min.js.map +1 -0
- package/dist/hyperfixi-i18n.mjs +8558 -0
- package/dist/hyperfixi-i18n.mjs.map +1 -0
- package/dist/index.cjs +14205 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +947 -0
- package/dist/index.d.ts +947 -0
- package/dist/index.js +14095 -0
- package/dist/index.js.map +1 -0
- package/dist/transformer-Ckask-yw.d.cts +1041 -0
- package/dist/transformer-Ckask-yw.d.ts +1041 -0
- package/package.json +84 -0
- package/src/browser.ts +122 -0
- package/src/compatibility/browser-tests/grammar-demo.spec.ts +169 -0
- package/src/constants.ts +366 -0
- package/src/dictionaries/ar.ts +233 -0
- package/src/dictionaries/bn.ts +156 -0
- package/src/dictionaries/de.ts +233 -0
- package/src/dictionaries/derive.ts +515 -0
- package/src/dictionaries/en.ts +237 -0
- package/src/dictionaries/es.ts +233 -0
- package/src/dictionaries/fr.ts +233 -0
- package/src/dictionaries/hi.ts +270 -0
- package/src/dictionaries/id.ts +233 -0
- package/src/dictionaries/index.ts +238 -0
- package/src/dictionaries/it.ts +233 -0
- package/src/dictionaries/ja.ts +233 -0
- package/src/dictionaries/ko.ts +233 -0
- package/src/dictionaries/ms.ts +276 -0
- package/src/dictionaries/pl.ts +239 -0
- package/src/dictionaries/pt.ts +237 -0
- package/src/dictionaries/qu.ts +233 -0
- package/src/dictionaries/ru.ts +270 -0
- package/src/dictionaries/sw.ts +233 -0
- package/src/dictionaries/th.ts +156 -0
- package/src/dictionaries/tl.ts +276 -0
- package/src/dictionaries/tr.ts +233 -0
- package/src/dictionaries/uk.ts +270 -0
- package/src/dictionaries/vi.ts +210 -0
- package/src/dictionaries/zh.ts +233 -0
- package/src/enhanced-i18n.test.ts +454 -0
- package/src/enhanced-i18n.ts +713 -0
- package/src/examples/new-languages.ts +326 -0
- package/src/formatting.test.ts +213 -0
- package/src/formatting.ts +416 -0
- package/src/grammar/direct-mappings.ts +353 -0
- package/src/grammar/grammar.test.ts +1053 -0
- package/src/grammar/index.ts +59 -0
- package/src/grammar/profiles/index.ts +860 -0
- package/src/grammar/transformer.ts +1318 -0
- package/src/grammar/types.ts +630 -0
- package/src/index.ts +202 -0
- package/src/new-languages.test.ts +389 -0
- package/src/parser/analyze-conflicts.test.ts +229 -0
- package/src/parser/ar.ts +40 -0
- package/src/parser/create-provider.ts +309 -0
- package/src/parser/de.ts +36 -0
- package/src/parser/es.ts +31 -0
- package/src/parser/fr.ts +31 -0
- package/src/parser/id.ts +34 -0
- package/src/parser/index.ts +50 -0
- package/src/parser/ja.ts +36 -0
- package/src/parser/ko.ts +37 -0
- package/src/parser/locale-manager.test.ts +198 -0
- package/src/parser/locale-manager.ts +197 -0
- package/src/parser/parser-integration.test.ts +439 -0
- package/src/parser/pt.ts +37 -0
- package/src/parser/qu.ts +37 -0
- package/src/parser/sw.ts +37 -0
- package/src/parser/tr.ts +38 -0
- package/src/parser/types.ts +113 -0
- package/src/parser/zh.ts +38 -0
- package/src/plugins/vite.ts +224 -0
- package/src/plugins/webpack.ts +124 -0
- package/src/pluralization.test.ts +197 -0
- package/src/pluralization.ts +393 -0
- package/src/runtime.ts +441 -0
- package/src/ssr-integration.ts +225 -0
- package/src/test-setup.ts +195 -0
- package/src/translation-validation.test.ts +171 -0
- package/src/translator.test.ts +252 -0
- package/src/translator.ts +297 -0
- package/src/types.ts +209 -0
- package/src/utils/locale.ts +190 -0
- package/src/utils/tokenizer-adapter.ts +469 -0
- package/src/utils/tokenizer.ts +19 -0
- package/src/validators/index.ts +174 -0
- package/src/validators/schema.ts +129 -0
|
@@ -0,0 +1,1318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grammar-Aware Transformer
|
|
3
|
+
*
|
|
4
|
+
* Transforms hyperscript statements between languages using the
|
|
5
|
+
* generalized grammar system. The key insight is that semantic
|
|
6
|
+
* roles are universal - only their surface realization differs.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type {
|
|
10
|
+
LanguageProfile,
|
|
11
|
+
ParsedStatement,
|
|
12
|
+
ParsedElement,
|
|
13
|
+
SemanticRole,
|
|
14
|
+
GrammarRule,
|
|
15
|
+
LineMetadata,
|
|
16
|
+
} from './types';
|
|
17
|
+
import { reorderRoles, insertMarkers, joinTokens } from './types';
|
|
18
|
+
import { getProfile, profiles } from './profiles';
|
|
19
|
+
import { hasDirectMapping, getDirectMapping } from './direct-mappings';
|
|
20
|
+
import { dictionaries } from '../dictionaries';
|
|
21
|
+
import { findInDictionary, translateFromEnglish } from '../types';
|
|
22
|
+
import { ENGLISH_MODIFIER_ROLES, CONDITIONAL_KEYWORDS, THEN_KEYWORDS } from '../constants';
|
|
23
|
+
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// Compound Statement Handling
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* English commands that can start a new statement.
|
|
30
|
+
* Used to detect command boundaries in space-chained statements.
|
|
31
|
+
*/
|
|
32
|
+
const COMMAND_KEYWORDS = new Set([
|
|
33
|
+
'add',
|
|
34
|
+
'append',
|
|
35
|
+
'async',
|
|
36
|
+
'beep',
|
|
37
|
+
'break',
|
|
38
|
+
'call',
|
|
39
|
+
'continue',
|
|
40
|
+
'decrement',
|
|
41
|
+
'default',
|
|
42
|
+
'exit',
|
|
43
|
+
'fetch',
|
|
44
|
+
'for',
|
|
45
|
+
'get',
|
|
46
|
+
'go',
|
|
47
|
+
'halt',
|
|
48
|
+
'hide',
|
|
49
|
+
'if',
|
|
50
|
+
'increment',
|
|
51
|
+
'install',
|
|
52
|
+
'js',
|
|
53
|
+
'log',
|
|
54
|
+
'make',
|
|
55
|
+
'measure',
|
|
56
|
+
'morph',
|
|
57
|
+
'pick',
|
|
58
|
+
'process',
|
|
59
|
+
'push',
|
|
60
|
+
'put',
|
|
61
|
+
'remove',
|
|
62
|
+
'render',
|
|
63
|
+
'repeat',
|
|
64
|
+
'replace',
|
|
65
|
+
'return',
|
|
66
|
+
'send',
|
|
67
|
+
'set',
|
|
68
|
+
'settle',
|
|
69
|
+
'show',
|
|
70
|
+
'swap',
|
|
71
|
+
'take',
|
|
72
|
+
'tell',
|
|
73
|
+
'throw',
|
|
74
|
+
'toggle',
|
|
75
|
+
'transition',
|
|
76
|
+
'trigger',
|
|
77
|
+
'unless',
|
|
78
|
+
'wait',
|
|
79
|
+
]);
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get all command keywords including translated ones for a locale.
|
|
83
|
+
*/
|
|
84
|
+
function getCommandKeywordsForLocale(locale: string): Set<string> {
|
|
85
|
+
const keywords = new Set(COMMAND_KEYWORDS);
|
|
86
|
+
|
|
87
|
+
// Add translated command keywords from dictionaries
|
|
88
|
+
const dict = dictionaries[locale];
|
|
89
|
+
if (dict?.commands) {
|
|
90
|
+
Object.values(dict.commands).forEach(cmd => {
|
|
91
|
+
if (typeof cmd === 'string') {
|
|
92
|
+
keywords.add(cmd.toLowerCase());
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return keywords;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Split a compound statement into parts at "then" boundaries, newlines,
|
|
102
|
+
* AND command keyword boundaries.
|
|
103
|
+
*
|
|
104
|
+
* Example: "on click wait 1s then increment #count then toggle .active"
|
|
105
|
+
* Returns: ["on click wait 1s", "increment #count", "toggle .active"]
|
|
106
|
+
*
|
|
107
|
+
* Example: "on click\n increment #count\n toggle .highlight"
|
|
108
|
+
* Returns: ["on click", "increment #count", "toggle .highlight"]
|
|
109
|
+
*
|
|
110
|
+
* Example: "wait 2s toggle .highlight"
|
|
111
|
+
* Returns: ["wait 2s", "toggle .highlight"]
|
|
112
|
+
*/
|
|
113
|
+
function splitCompoundStatement(input: string, sourceLocale: string): string[] {
|
|
114
|
+
// First, split on newlines (preserving non-empty lines)
|
|
115
|
+
const lines = input
|
|
116
|
+
.split(/\n/)
|
|
117
|
+
.map(line => line.trim())
|
|
118
|
+
.filter(line => line.length > 0);
|
|
119
|
+
|
|
120
|
+
// If we have multiple lines, treat each as a separate part
|
|
121
|
+
// (but still need to handle "then" within each line)
|
|
122
|
+
let parts: string[] = [];
|
|
123
|
+
|
|
124
|
+
for (const line of lines) {
|
|
125
|
+
const lineParts = splitOnThen(line, sourceLocale);
|
|
126
|
+
// Further split each part on command boundaries
|
|
127
|
+
for (const part of lineParts) {
|
|
128
|
+
const commandParts = splitOnCommandBoundaries(part, sourceLocale);
|
|
129
|
+
parts.push(...commandParts);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return parts;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// =============================================================================
|
|
137
|
+
// Line Structure Preservation
|
|
138
|
+
// =============================================================================
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Result of splitting with preserved line metadata.
|
|
142
|
+
*/
|
|
143
|
+
interface SplitWithMetadataResult {
|
|
144
|
+
/** The processed parts (commands/statements) */
|
|
145
|
+
parts: string[];
|
|
146
|
+
/** Metadata for each original line (for reconstruction) */
|
|
147
|
+
lineMetadata: LineMetadata[];
|
|
148
|
+
/** Mapping from parts back to their original line indices */
|
|
149
|
+
partToLineIndex: number[];
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Split a compound statement while preserving line structure metadata.
|
|
154
|
+
* This tracks indentation and blank lines for reconstruction.
|
|
155
|
+
*/
|
|
156
|
+
function splitCompoundStatementWithMetadata(
|
|
157
|
+
input: string,
|
|
158
|
+
sourceLocale: string
|
|
159
|
+
): SplitWithMetadataResult {
|
|
160
|
+
const rawLines = input.split('\n');
|
|
161
|
+
const lineMetadata: LineMetadata[] = [];
|
|
162
|
+
const parts: string[] = [];
|
|
163
|
+
const partToLineIndex: number[] = [];
|
|
164
|
+
|
|
165
|
+
for (let lineIndex = 0; lineIndex < rawLines.length; lineIndex++) {
|
|
166
|
+
const rawLine = rawLines[lineIndex];
|
|
167
|
+
|
|
168
|
+
// Capture leading whitespace
|
|
169
|
+
const indentMatch = rawLine.match(/^(\s*)/);
|
|
170
|
+
const originalIndent = indentMatch ? indentMatch[1] : '';
|
|
171
|
+
const trimmed = rawLine.trim();
|
|
172
|
+
|
|
173
|
+
lineMetadata.push({
|
|
174
|
+
content: trimmed,
|
|
175
|
+
originalIndent,
|
|
176
|
+
isBlank: trimmed.length === 0,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
if (trimmed.length > 0) {
|
|
180
|
+
// Process non-empty lines for "then" and command boundaries
|
|
181
|
+
const lineParts = splitOnThen(trimmed, sourceLocale);
|
|
182
|
+
for (const part of lineParts) {
|
|
183
|
+
const commandParts = splitOnCommandBoundaries(part, sourceLocale);
|
|
184
|
+
for (const cmdPart of commandParts) {
|
|
185
|
+
parts.push(cmdPart);
|
|
186
|
+
partToLineIndex.push(lineIndex);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return { parts, lineMetadata, partToLineIndex };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Normalize indentation to consistent 4-space levels.
|
|
197
|
+
* Preserves relative indentation structure while standardizing spacing.
|
|
198
|
+
*/
|
|
199
|
+
function normalizeIndentation(lineMetadata: LineMetadata[]): string[] {
|
|
200
|
+
// Find non-blank lines with indentation
|
|
201
|
+
const indentedLines = lineMetadata.filter(m => !m.isBlank && m.originalIndent.length > 0);
|
|
202
|
+
|
|
203
|
+
if (indentedLines.length === 0) {
|
|
204
|
+
// No indented lines, return empty strings
|
|
205
|
+
return lineMetadata.map(() => '');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Find minimum non-zero indent (the base unit)
|
|
209
|
+
const indentLengths = indentedLines.map(m => {
|
|
210
|
+
// Convert tabs to 4 spaces for consistent measurement
|
|
211
|
+
const normalized = m.originalIndent.replace(/\t/g, ' ');
|
|
212
|
+
return normalized.length;
|
|
213
|
+
});
|
|
214
|
+
const minIndent = Math.min(...indentLengths);
|
|
215
|
+
const baseUnit = minIndent > 0 ? minIndent : 4;
|
|
216
|
+
|
|
217
|
+
// Normalize each line's indentation
|
|
218
|
+
return lineMetadata.map(meta => {
|
|
219
|
+
if (meta.isBlank) {
|
|
220
|
+
return ''; // Blank lines get no indentation
|
|
221
|
+
}
|
|
222
|
+
if (meta.originalIndent.length === 0) {
|
|
223
|
+
return ''; // No original indent
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Convert tabs and calculate level
|
|
227
|
+
const normalized = meta.originalIndent.replace(/\t/g, ' ');
|
|
228
|
+
const level = Math.round(normalized.length / baseUnit);
|
|
229
|
+
return ' '.repeat(level); // 4 spaces per level
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Reconstruct output with preserved line structure.
|
|
235
|
+
* Maps transformed parts back to their original lines with proper indentation.
|
|
236
|
+
*/
|
|
237
|
+
function reconstructWithLineStructure(
|
|
238
|
+
transformedParts: string[],
|
|
239
|
+
lineMetadata: LineMetadata[],
|
|
240
|
+
partToLineIndex: number[],
|
|
241
|
+
targetThen: string
|
|
242
|
+
): string {
|
|
243
|
+
// If there's only one non-blank line, simple case
|
|
244
|
+
const nonBlankCount = lineMetadata.filter(m => !m.isBlank).length;
|
|
245
|
+
if (nonBlankCount <= 1 && transformedParts.length <= 1) {
|
|
246
|
+
const normalizedIndents = normalizeIndentation(lineMetadata);
|
|
247
|
+
const result: string[] = [];
|
|
248
|
+
|
|
249
|
+
for (let i = 0; i < lineMetadata.length; i++) {
|
|
250
|
+
if (lineMetadata[i].isBlank) {
|
|
251
|
+
result.push('');
|
|
252
|
+
} else if (transformedParts.length > 0) {
|
|
253
|
+
result.push(normalizedIndents[i] + transformedParts[0]);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return result.join('\n');
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Normalize indentation
|
|
260
|
+
const normalizedIndents = normalizeIndentation(lineMetadata);
|
|
261
|
+
|
|
262
|
+
// Group transformed parts by their original line
|
|
263
|
+
const partsPerLine: Map<number, string[]> = new Map();
|
|
264
|
+
for (let i = 0; i < transformedParts.length; i++) {
|
|
265
|
+
const lineIdx = partToLineIndex[i];
|
|
266
|
+
if (!partsPerLine.has(lineIdx)) {
|
|
267
|
+
partsPerLine.set(lineIdx, []);
|
|
268
|
+
}
|
|
269
|
+
partsPerLine.get(lineIdx)!.push(transformedParts[i]);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Build result lines
|
|
273
|
+
const result: string[] = [];
|
|
274
|
+
for (let i = 0; i < lineMetadata.length; i++) {
|
|
275
|
+
const meta = lineMetadata[i];
|
|
276
|
+
const indent = normalizedIndents[i];
|
|
277
|
+
|
|
278
|
+
if (meta.isBlank) {
|
|
279
|
+
result.push('');
|
|
280
|
+
} else {
|
|
281
|
+
const lineParts = partsPerLine.get(i) || [];
|
|
282
|
+
if (lineParts.length > 0) {
|
|
283
|
+
// Join multiple parts on same line with "then"
|
|
284
|
+
const lineContent = lineParts.join(` ${targetThen} `);
|
|
285
|
+
result.push(indent + lineContent);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return result.join('\n');
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Split a statement on command keyword boundaries.
|
|
295
|
+
* E.g., "wait 2s toggle .highlight" → ["wait 2s", "toggle .highlight"]
|
|
296
|
+
*
|
|
297
|
+
* Special cases:
|
|
298
|
+
* - "on <event> <command>" stays together (event handler with first command)
|
|
299
|
+
* - Modifiers like "to", "from" don't trigger splits
|
|
300
|
+
*/
|
|
301
|
+
function splitOnCommandBoundaries(input: string, sourceLocale: string): string[] {
|
|
302
|
+
const commandKeywords = getCommandKeywordsForLocale(sourceLocale);
|
|
303
|
+
const tokens = input.split(/\s+/);
|
|
304
|
+
|
|
305
|
+
if (tokens.length === 0) return [input];
|
|
306
|
+
|
|
307
|
+
const parts: string[] = [];
|
|
308
|
+
let currentPart: string[] = [];
|
|
309
|
+
|
|
310
|
+
// Check if this starts with an event handler pattern (on/em/en/bei/で + event)
|
|
311
|
+
const firstTokenLower = tokens[0]?.toLowerCase();
|
|
312
|
+
const isEventHandler = EVENT_KEYWORDS.has(firstTokenLower);
|
|
313
|
+
|
|
314
|
+
// If it's an event handler, the first command after the event is part of the handler
|
|
315
|
+
// So we need to track whether we've seen the first command yet
|
|
316
|
+
let seenFirstCommand = !isEventHandler; // If not event handler, we're already past the "first command" phase
|
|
317
|
+
|
|
318
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
319
|
+
const token = tokens[i];
|
|
320
|
+
const lowerToken = token.toLowerCase();
|
|
321
|
+
|
|
322
|
+
// If this is a command keyword and we already have tokens in current part
|
|
323
|
+
if (commandKeywords.has(lowerToken) && currentPart.length > 0) {
|
|
324
|
+
// Check if the previous token looks like it could end a command
|
|
325
|
+
const prevToken = currentPart[currentPart.length - 1];
|
|
326
|
+
const prevLower = prevToken.toLowerCase();
|
|
327
|
+
|
|
328
|
+
// Don't split if the previous token is a modifier like "to", "from", "by", etc.
|
|
329
|
+
const modifiers = new Set([
|
|
330
|
+
'to',
|
|
331
|
+
'into',
|
|
332
|
+
'from',
|
|
333
|
+
'with',
|
|
334
|
+
'by',
|
|
335
|
+
'as',
|
|
336
|
+
'at',
|
|
337
|
+
'in',
|
|
338
|
+
'on',
|
|
339
|
+
'of',
|
|
340
|
+
'over',
|
|
341
|
+
]);
|
|
342
|
+
|
|
343
|
+
// For event handlers: don't split before the first command
|
|
344
|
+
// E.g., "on click wait 1s" should stay together
|
|
345
|
+
if (!seenFirstCommand) {
|
|
346
|
+
// Mark that we've now seen the first command
|
|
347
|
+
seenFirstCommand = true;
|
|
348
|
+
currentPart.push(token);
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (!modifiers.has(prevLower) && !commandKeywords.has(prevLower)) {
|
|
353
|
+
// This looks like a command boundary - save current part and start new one
|
|
354
|
+
parts.push(currentPart.join(' '));
|
|
355
|
+
currentPart = [token];
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
currentPart.push(token);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Add the last part
|
|
364
|
+
if (currentPart.length > 0) {
|
|
365
|
+
parts.push(currentPart.join(' '));
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return parts.filter(p => p.length > 0);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Split a single line on "then" keywords.
|
|
373
|
+
*/
|
|
374
|
+
function splitOnThen(input: string, sourceLocale: string): string[] {
|
|
375
|
+
// Build regex pattern from all known "then" keywords
|
|
376
|
+
const thenKeywords = Array.from(THEN_KEYWORDS);
|
|
377
|
+
|
|
378
|
+
// Add any dictionary-specific "then" keyword for the source locale
|
|
379
|
+
const sourceDict = sourceLocale === 'en' ? null : dictionaries[sourceLocale];
|
|
380
|
+
if (sourceDict?.modifiers?.then) {
|
|
381
|
+
thenKeywords.push(sourceDict.modifiers.then);
|
|
382
|
+
}
|
|
383
|
+
// Also check logical.then since some dictionaries put it there
|
|
384
|
+
if ((sourceDict?.logical as Record<string, string>)?.then) {
|
|
385
|
+
thenKeywords.push((sourceDict?.logical as Record<string, string>).then);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Create a regex that matches any "then" keyword as a whole word
|
|
389
|
+
// Use word boundaries to avoid matching "then" inside other words
|
|
390
|
+
const escapedKeywords = thenKeywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
|
391
|
+
const pattern = new RegExp(`\\s+(${escapedKeywords.join('|')})\\s+`, 'gi');
|
|
392
|
+
|
|
393
|
+
// Split on "then" keywords
|
|
394
|
+
const parts = input.split(pattern).filter(part => {
|
|
395
|
+
// Filter out the "then" keywords themselves (captured by the group)
|
|
396
|
+
const lowerPart = part.toLowerCase().trim();
|
|
397
|
+
return lowerPart && !thenKeywords.some(k => k.toLowerCase() === lowerPart);
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
return parts.map(p => p.trim()).filter(p => p.length > 0);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Get the "then" keyword in the target language.
|
|
405
|
+
* Checks both modifiers and logical sections since dictionaries vary.
|
|
406
|
+
*/
|
|
407
|
+
function getTargetThenKeyword(targetLocale: string): string {
|
|
408
|
+
if (targetLocale === 'en') return 'then';
|
|
409
|
+
|
|
410
|
+
const targetDict = dictionaries[targetLocale];
|
|
411
|
+
if (!targetDict) return 'then';
|
|
412
|
+
|
|
413
|
+
// Check modifiers first, then logical (dictionaries vary)
|
|
414
|
+
return (
|
|
415
|
+
targetDict.modifiers?.then || (targetDict.logical as Record<string, string>)?.then || 'then'
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// =============================================================================
|
|
420
|
+
// Derived Constants from Profiles
|
|
421
|
+
// =============================================================================
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Derive event keywords from all language profiles.
|
|
425
|
+
* This replaces the hardcoded eventKeywords array.
|
|
426
|
+
*/
|
|
427
|
+
function deriveEventKeywordsFromProfiles(): Set<string> {
|
|
428
|
+
const keywords = new Set<string>();
|
|
429
|
+
|
|
430
|
+
// Add 'on' as the English default
|
|
431
|
+
keywords.add('on');
|
|
432
|
+
|
|
433
|
+
// Extract event markers from all profiles
|
|
434
|
+
for (const profile of Object.values(profiles)) {
|
|
435
|
+
for (const marker of profile.markers) {
|
|
436
|
+
if (marker.role === 'event') {
|
|
437
|
+
// Strip hyphen notation and add
|
|
438
|
+
const form = marker.form.replace(/^-|-$/g, '').toLowerCase();
|
|
439
|
+
if (form) keywords.add(form);
|
|
440
|
+
|
|
441
|
+
// Add alternatives
|
|
442
|
+
marker.alternatives?.forEach(alt => {
|
|
443
|
+
const altForm = alt.replace(/^-|-$/g, '').toLowerCase();
|
|
444
|
+
if (altForm) keywords.add(altForm);
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return keywords;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/** Event keywords derived from language profiles */
|
|
454
|
+
const EVENT_KEYWORDS = deriveEventKeywordsFromProfiles();
|
|
455
|
+
|
|
456
|
+
// =============================================================================
|
|
457
|
+
// Helper: Dynamic Modifier Map
|
|
458
|
+
// =============================================================================
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Generates a lookup map for semantic roles based on the language profile.
|
|
462
|
+
* Maps markers (e.g., 'to', 'に', 'into', 'إلى') to their semantic roles.
|
|
463
|
+
* This enables parsing non-English input by using the profile's markers.
|
|
464
|
+
*/
|
|
465
|
+
function generateModifierMap(profile: LanguageProfile): Record<string, SemanticRole> {
|
|
466
|
+
const map: Record<string, SemanticRole> = {};
|
|
467
|
+
|
|
468
|
+
// Map markers to roles from the profile
|
|
469
|
+
profile.markers.forEach(marker => {
|
|
470
|
+
// Strip hyphen notation for suffix/prefix markers
|
|
471
|
+
const form = marker.form.replace(/^-|-$/g, '').toLowerCase();
|
|
472
|
+
if (form) {
|
|
473
|
+
map[form] = marker.role;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Map alternatives if they exist (e.g., Korean vowel harmony variants)
|
|
477
|
+
marker.alternatives?.forEach(alt => {
|
|
478
|
+
const altForm = alt.replace(/^-|-$/g, '').toLowerCase();
|
|
479
|
+
if (altForm) {
|
|
480
|
+
map[altForm] = marker.role;
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
// Add English modifiers as fallback (don't override profile-specific markers)
|
|
486
|
+
for (const [key, role] of Object.entries(ENGLISH_MODIFIER_ROLES)) {
|
|
487
|
+
if (!(key in map)) {
|
|
488
|
+
map[key] = role;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
return map;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// =============================================================================
|
|
496
|
+
// Statement Parser
|
|
497
|
+
// =============================================================================
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Parse a hyperscript statement into semantic roles
|
|
501
|
+
* This is the core analysis step that identifies WHAT each part means
|
|
502
|
+
*/
|
|
503
|
+
export function parseStatement(input: string, sourceLocale: string = 'en'): ParsedStatement | null {
|
|
504
|
+
const profile = getProfile(sourceLocale);
|
|
505
|
+
if (!profile) return null;
|
|
506
|
+
|
|
507
|
+
const tokens = tokenize(input, profile);
|
|
508
|
+
|
|
509
|
+
// Identify statement type and extract roles
|
|
510
|
+
const statementType = identifyStatementType(tokens, profile);
|
|
511
|
+
|
|
512
|
+
switch (statementType) {
|
|
513
|
+
case 'event-handler':
|
|
514
|
+
return parseEventHandler(tokens, profile);
|
|
515
|
+
case 'command':
|
|
516
|
+
return parseCommand(tokens, profile);
|
|
517
|
+
case 'conditional':
|
|
518
|
+
return parseConditional(tokens, profile);
|
|
519
|
+
default:
|
|
520
|
+
return null;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Known suffixes that may attach to words without spaces.
|
|
526
|
+
* These are split off during tokenization for proper parsing.
|
|
527
|
+
*/
|
|
528
|
+
const ATTACHED_SUFFIXES: Record<string, string[]> = {
|
|
529
|
+
// Chinese: 时 (time/when) often attaches to events like 点击时 (when clicking)
|
|
530
|
+
zh: ['时', '的', '地', '得'],
|
|
531
|
+
// Japanese: Some particles may attach in casual writing
|
|
532
|
+
ja: [],
|
|
533
|
+
// Korean: Particles sometimes written without spaces
|
|
534
|
+
ko: [],
|
|
535
|
+
};
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Known prefixes that may attach to words without spaces.
|
|
539
|
+
*/
|
|
540
|
+
const ATTACHED_PREFIXES: Record<string, string[]> = {
|
|
541
|
+
// Chinese: 当 (when) sometimes written attached
|
|
542
|
+
zh: ['当'],
|
|
543
|
+
// Arabic: Prepositions that attach
|
|
544
|
+
ar: ['بـ', 'كـ', 'و'],
|
|
545
|
+
};
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Post-process tokens to split attached suffixes/prefixes.
|
|
549
|
+
* E.g., "点击时" → ["点击", "时"]
|
|
550
|
+
*/
|
|
551
|
+
function splitAttachedAffixes(tokens: string[], locale: string): string[] {
|
|
552
|
+
const suffixes = ATTACHED_SUFFIXES[locale] || [];
|
|
553
|
+
const prefixes = ATTACHED_PREFIXES[locale] || [];
|
|
554
|
+
|
|
555
|
+
if (suffixes.length === 0 && prefixes.length === 0) {
|
|
556
|
+
return tokens;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const result: string[] = [];
|
|
560
|
+
|
|
561
|
+
for (const token of tokens) {
|
|
562
|
+
// Skip CSS selectors and numbers
|
|
563
|
+
if (/^[#.<@]/.test(token) || /^\d+/.test(token)) {
|
|
564
|
+
result.push(token);
|
|
565
|
+
continue;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
let processed = token;
|
|
569
|
+
let prefix = '';
|
|
570
|
+
let suffix = '';
|
|
571
|
+
|
|
572
|
+
// Check for attached prefixes
|
|
573
|
+
for (const p of prefixes) {
|
|
574
|
+
if (processed.startsWith(p) && processed.length > p.length) {
|
|
575
|
+
prefix = p;
|
|
576
|
+
processed = processed.slice(p.length);
|
|
577
|
+
break;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Check for attached suffixes
|
|
582
|
+
for (const s of suffixes) {
|
|
583
|
+
if (processed.endsWith(s) && processed.length > s.length) {
|
|
584
|
+
suffix = s;
|
|
585
|
+
processed = processed.slice(0, -s.length);
|
|
586
|
+
break;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// Add tokens in order: prefix, main, suffix
|
|
591
|
+
if (prefix) result.push(prefix);
|
|
592
|
+
if (processed) result.push(processed);
|
|
593
|
+
if (suffix) result.push(suffix);
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
return result;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Simple tokenizer that handles:
|
|
601
|
+
* - Keywords (from dictionary)
|
|
602
|
+
* - CSS selectors (#id, .class, <tag/>)
|
|
603
|
+
* - String literals
|
|
604
|
+
* - Numbers
|
|
605
|
+
* - Attached suffixes/prefixes (language-specific)
|
|
606
|
+
*/
|
|
607
|
+
function tokenize(input: string, profile: LanguageProfile): string[] {
|
|
608
|
+
// Split on whitespace, preserving selectors and strings
|
|
609
|
+
const tokens: string[] = [];
|
|
610
|
+
let current = '';
|
|
611
|
+
let inSelector = false;
|
|
612
|
+
let selectorDepth = 0;
|
|
613
|
+
|
|
614
|
+
for (let i = 0; i < input.length; i++) {
|
|
615
|
+
const char = input[i];
|
|
616
|
+
|
|
617
|
+
// Track CSS selector context
|
|
618
|
+
if (char === '<') {
|
|
619
|
+
inSelector = true;
|
|
620
|
+
selectorDepth++;
|
|
621
|
+
} else if (char === '>' && inSelector) {
|
|
622
|
+
selectorDepth--;
|
|
623
|
+
if (selectorDepth === 0) inSelector = false;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Split on whitespace unless in selector
|
|
627
|
+
if (/\s/.test(char) && !inSelector) {
|
|
628
|
+
if (current) {
|
|
629
|
+
tokens.push(current);
|
|
630
|
+
current = '';
|
|
631
|
+
}
|
|
632
|
+
} else {
|
|
633
|
+
current += char;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
if (current) {
|
|
638
|
+
tokens.push(current);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Post-process to split attached affixes for languages that need it
|
|
642
|
+
return splitAttachedAffixes(tokens, profile.code);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/**
|
|
646
|
+
* Identify what type of statement this is
|
|
647
|
+
*/
|
|
648
|
+
function identifyStatementType(
|
|
649
|
+
tokens: string[],
|
|
650
|
+
profile: LanguageProfile
|
|
651
|
+
): 'event-handler' | 'command' | 'conditional' | 'unknown' {
|
|
652
|
+
if (tokens.length === 0) return 'unknown';
|
|
653
|
+
|
|
654
|
+
const firstToken = tokens[0].toLowerCase();
|
|
655
|
+
|
|
656
|
+
// Check for event handler
|
|
657
|
+
const eventMarker = profile.markers.find(m => m.role === 'event' && m.position === 'preposition');
|
|
658
|
+
if (eventMarker && firstToken === eventMarker.form.toLowerCase()) {
|
|
659
|
+
return 'event-handler';
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Check if first token is a known event keyword (derived from profiles)
|
|
663
|
+
if (EVENT_KEYWORDS.has(firstToken)) {
|
|
664
|
+
return 'event-handler';
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// Check for conditional using shared constants
|
|
668
|
+
if (CONDITIONAL_KEYWORDS.has(firstToken)) {
|
|
669
|
+
return 'conditional';
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
return 'command';
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* Parse an event handler statement
|
|
677
|
+
* Pattern: on {event} {command} {target?} {modifiers?}
|
|
678
|
+
*
|
|
679
|
+
* Now handles modifiers like "by 3" in "on click increment #count by 3"
|
|
680
|
+
*/
|
|
681
|
+
function parseEventHandler(tokens: string[], profile: LanguageProfile): ParsedStatement {
|
|
682
|
+
const roles = new Map<SemanticRole, ParsedElement>();
|
|
683
|
+
|
|
684
|
+
// Skip the event keyword (e.g., 'on', 'で', '当', etc.) - derived from profiles
|
|
685
|
+
let startIndex = EVENT_KEYWORDS.has(tokens[0]?.toLowerCase()) ? 1 : 0;
|
|
686
|
+
|
|
687
|
+
// Next token is the event
|
|
688
|
+
if (tokens[startIndex]) {
|
|
689
|
+
roles.set('event', {
|
|
690
|
+
role: 'event',
|
|
691
|
+
value: tokens[startIndex],
|
|
692
|
+
});
|
|
693
|
+
startIndex++;
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Next token is typically the action
|
|
697
|
+
if (tokens[startIndex]) {
|
|
698
|
+
roles.set('action', {
|
|
699
|
+
role: 'action',
|
|
700
|
+
value: tokens[startIndex],
|
|
701
|
+
});
|
|
702
|
+
startIndex++;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Parse remaining tokens with modifier awareness (like parseCommand does)
|
|
706
|
+
// This handles "by 3" in "on click increment #count by 3"
|
|
707
|
+
if (tokens[startIndex]) {
|
|
708
|
+
const modifierMap = generateModifierMap(profile);
|
|
709
|
+
let currentRole: SemanticRole = 'patient';
|
|
710
|
+
let currentValue: string[] = [];
|
|
711
|
+
|
|
712
|
+
for (let i = startIndex; i < tokens.length; i++) {
|
|
713
|
+
const token = tokens[i];
|
|
714
|
+
const mappedRole = modifierMap[token.toLowerCase()];
|
|
715
|
+
|
|
716
|
+
if (mappedRole) {
|
|
717
|
+
// Save previous role
|
|
718
|
+
if (currentValue.length > 0) {
|
|
719
|
+
const value = currentValue.join(' ');
|
|
720
|
+
roles.set(currentRole, {
|
|
721
|
+
role: currentRole,
|
|
722
|
+
value,
|
|
723
|
+
isSelector: /^[#.<@]/.test(value),
|
|
724
|
+
});
|
|
725
|
+
}
|
|
726
|
+
currentRole = mappedRole;
|
|
727
|
+
currentValue = [];
|
|
728
|
+
} else {
|
|
729
|
+
currentValue.push(token);
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Save final role
|
|
734
|
+
if (currentValue.length > 0) {
|
|
735
|
+
const value = currentValue.join(' ');
|
|
736
|
+
roles.set(currentRole, {
|
|
737
|
+
role: currentRole,
|
|
738
|
+
value,
|
|
739
|
+
isSelector: /^[#.<@]/.test(value),
|
|
740
|
+
});
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
return {
|
|
745
|
+
type: 'event-handler',
|
|
746
|
+
roles,
|
|
747
|
+
original: tokens.join(' '),
|
|
748
|
+
};
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
/**
|
|
752
|
+
* Parse a command statement
|
|
753
|
+
* Pattern: {command} {args...}
|
|
754
|
+
*/
|
|
755
|
+
function parseCommand(tokens: string[], profile: LanguageProfile): ParsedStatement {
|
|
756
|
+
const roles = new Map<SemanticRole, ParsedElement>();
|
|
757
|
+
|
|
758
|
+
if (tokens.length === 0) {
|
|
759
|
+
return { type: 'command', roles, original: '' };
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// First token is the command
|
|
763
|
+
roles.set('action', {
|
|
764
|
+
role: 'action',
|
|
765
|
+
value: tokens[0],
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
// Generate dynamic modifier map from language profile
|
|
769
|
+
// This enables parsing non-English input (e.g., Japanese に, Korean 에, Arabic إلى)
|
|
770
|
+
const modifierMap = generateModifierMap(profile);
|
|
771
|
+
|
|
772
|
+
let currentRole: SemanticRole = 'patient';
|
|
773
|
+
let currentValue: string[] = [];
|
|
774
|
+
|
|
775
|
+
for (let i = 1; i < tokens.length; i++) {
|
|
776
|
+
const token = tokens[i];
|
|
777
|
+
const mappedRole = modifierMap[token.toLowerCase()];
|
|
778
|
+
|
|
779
|
+
if (mappedRole) {
|
|
780
|
+
// Save previous role
|
|
781
|
+
if (currentValue.length > 0) {
|
|
782
|
+
const value = currentValue.join(' ');
|
|
783
|
+
roles.set(currentRole, {
|
|
784
|
+
role: currentRole,
|
|
785
|
+
value,
|
|
786
|
+
isSelector: /^[#.<@]/.test(value),
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
currentRole = mappedRole;
|
|
790
|
+
currentValue = [];
|
|
791
|
+
} else {
|
|
792
|
+
currentValue.push(token);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// Save final role
|
|
797
|
+
if (currentValue.length > 0) {
|
|
798
|
+
const value = currentValue.join(' ');
|
|
799
|
+
roles.set(currentRole, {
|
|
800
|
+
role: currentRole,
|
|
801
|
+
value,
|
|
802
|
+
isSelector: /^[#.<@]/.test(value),
|
|
803
|
+
});
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
return {
|
|
807
|
+
type: 'command',
|
|
808
|
+
roles,
|
|
809
|
+
original: tokens.join(' '),
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Parse a conditional statement
|
|
815
|
+
*/
|
|
816
|
+
function parseConditional(tokens: string[], _profile: LanguageProfile): ParsedStatement {
|
|
817
|
+
const roles = new Map<SemanticRole, ParsedElement>();
|
|
818
|
+
|
|
819
|
+
// First token is the 'if' keyword
|
|
820
|
+
roles.set('action', {
|
|
821
|
+
role: 'action',
|
|
822
|
+
value: tokens[0],
|
|
823
|
+
});
|
|
824
|
+
|
|
825
|
+
// Find 'then' to split condition from body - using shared constants
|
|
826
|
+
const thenIndex = tokens.findIndex(t => THEN_KEYWORDS.has(t.toLowerCase()));
|
|
827
|
+
|
|
828
|
+
if (thenIndex > 1) {
|
|
829
|
+
const conditionValue = tokens.slice(1, thenIndex).join(' ');
|
|
830
|
+
roles.set('condition', {
|
|
831
|
+
role: 'condition',
|
|
832
|
+
value: conditionValue,
|
|
833
|
+
});
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
return {
|
|
837
|
+
type: 'conditional',
|
|
838
|
+
roles,
|
|
839
|
+
original: tokens.join(' '),
|
|
840
|
+
};
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// =============================================================================
|
|
844
|
+
// Translation
|
|
845
|
+
// =============================================================================
|
|
846
|
+
|
|
847
|
+
/**
|
|
848
|
+
* Translate words using dictionary with type-safe access.
|
|
849
|
+
*/
|
|
850
|
+
function translateWord(word: string, sourceLocale: string, targetLocale: string): string {
|
|
851
|
+
// Don't translate CSS selectors
|
|
852
|
+
if (/^[#.<@]/.test(word)) {
|
|
853
|
+
return word;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Don't translate numbers
|
|
857
|
+
if (/^\d+/.test(word)) {
|
|
858
|
+
return word;
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
const sourceDict = sourceLocale === 'en' ? null : dictionaries[sourceLocale];
|
|
862
|
+
const targetDict = dictionaries[targetLocale];
|
|
863
|
+
|
|
864
|
+
if (!targetDict) return word;
|
|
865
|
+
|
|
866
|
+
// If source is not English, first map to English using type-safe lookup
|
|
867
|
+
let englishWord = word;
|
|
868
|
+
if (sourceDict) {
|
|
869
|
+
const found = findInDictionary(sourceDict, word);
|
|
870
|
+
if (found) {
|
|
871
|
+
englishWord = found.englishKey;
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Now map English to target locale using type-safe lookup
|
|
876
|
+
const translated = translateFromEnglish(targetDict, englishWord);
|
|
877
|
+
return translated ?? word;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Possessive markers for each language.
|
|
882
|
+
* Used to transform "X's Y" patterns to target language structure.
|
|
883
|
+
*/
|
|
884
|
+
const POSSESSIVE_MARKERS: Record<
|
|
885
|
+
string,
|
|
886
|
+
{ type: 'prefix' | 'suffix' | 'preposition'; marker: string }
|
|
887
|
+
> = {
|
|
888
|
+
en: { type: 'suffix', marker: "'s" },
|
|
889
|
+
es: { type: 'preposition', marker: 'de' },
|
|
890
|
+
pt: { type: 'preposition', marker: 'de' },
|
|
891
|
+
fr: { type: 'preposition', marker: 'de' },
|
|
892
|
+
de: { type: 'preposition', marker: 'von' },
|
|
893
|
+
ja: { type: 'suffix', marker: 'の' },
|
|
894
|
+
ko: { type: 'suffix', marker: '의' },
|
|
895
|
+
zh: { type: 'suffix', marker: '的' },
|
|
896
|
+
ar: { type: 'preposition', marker: 'لـ' },
|
|
897
|
+
tr: { type: 'suffix', marker: "'ın" },
|
|
898
|
+
id: { type: 'preposition', marker: 'dari' },
|
|
899
|
+
qu: { type: 'suffix', marker: '-pa' },
|
|
900
|
+
sw: { type: 'preposition', marker: 'ya' },
|
|
901
|
+
};
|
|
902
|
+
|
|
903
|
+
/**
|
|
904
|
+
* Transform possessive 's syntax to target language.
|
|
905
|
+
*
|
|
906
|
+
* Examples:
|
|
907
|
+
* me's value → mi valor (Spanish - pronoun becomes possessive adjective)
|
|
908
|
+
* #button's textContent → textContent de #button (Spanish - prepositional)
|
|
909
|
+
* me's value → 私の値 (Japanese - の particle)
|
|
910
|
+
*/
|
|
911
|
+
function translatePossessive(token: string, sourceLocale: string, targetLocale: string): string {
|
|
912
|
+
// Check for 's possessive pattern
|
|
913
|
+
const possessiveMatch = token.match(/^(.+)'s$/i);
|
|
914
|
+
if (!possessiveMatch) {
|
|
915
|
+
return token;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const owner = possessiveMatch[1];
|
|
919
|
+
const targetMarker = POSSESSIVE_MARKERS[targetLocale] || POSSESSIVE_MARKERS.en;
|
|
920
|
+
|
|
921
|
+
// Check if owner is a pronoun that has a possessive form
|
|
922
|
+
const pronounPossessives: Record<string, string> = {
|
|
923
|
+
me: 'my',
|
|
924
|
+
it: 'its',
|
|
925
|
+
you: 'your',
|
|
926
|
+
};
|
|
927
|
+
|
|
928
|
+
const lowerOwner = owner.toLowerCase();
|
|
929
|
+
if (pronounPossessives[lowerOwner]) {
|
|
930
|
+
// Convert "me's" to "my" then translate
|
|
931
|
+
const possessiveForm = pronounPossessives[lowerOwner];
|
|
932
|
+
return translateWord(possessiveForm, 'en', targetLocale);
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
// For selectors and other owners, translate owner and apply target possessive marker
|
|
936
|
+
const translatedOwner = translateWord(owner, sourceLocale, targetLocale);
|
|
937
|
+
|
|
938
|
+
switch (targetMarker.type) {
|
|
939
|
+
case 'suffix':
|
|
940
|
+
// Japanese/Korean/Chinese: owner + marker (e.g., #buttonの, #button의)
|
|
941
|
+
return `${translatedOwner}${targetMarker.marker}`;
|
|
942
|
+
case 'preposition':
|
|
943
|
+
// Will be handled by caller - return marker + owner format
|
|
944
|
+
// Store as special format to be processed later
|
|
945
|
+
return `__POSS__${targetMarker.marker}__${translatedOwner}__POSS__`;
|
|
946
|
+
default:
|
|
947
|
+
return `${translatedOwner}'s`;
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
/**
|
|
952
|
+
* Translate a multi-word value, translating each word individually.
|
|
953
|
+
* Handles possessives like "my value" → "mi valor" in Spanish.
|
|
954
|
+
* Also handles 's possessive syntax like "me's value" → "mi valor".
|
|
955
|
+
*/
|
|
956
|
+
function translateMultiWordValue(
|
|
957
|
+
value: string,
|
|
958
|
+
sourceLocale: string,
|
|
959
|
+
targetLocale: string
|
|
960
|
+
): string {
|
|
961
|
+
// If it's a single word, check for possessive then translate
|
|
962
|
+
if (!value.includes(' ')) {
|
|
963
|
+
// Check for possessive 's
|
|
964
|
+
if (value.includes("'s")) {
|
|
965
|
+
return translatePossessive(value, sourceLocale, targetLocale);
|
|
966
|
+
}
|
|
967
|
+
return translateWord(value, sourceLocale, targetLocale);
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
// Split into words and translate each
|
|
971
|
+
const words = value.split(/\s+/);
|
|
972
|
+
const translated: string[] = [];
|
|
973
|
+
let i = 0;
|
|
974
|
+
|
|
975
|
+
while (i < words.length) {
|
|
976
|
+
const word = words[i];
|
|
977
|
+
|
|
978
|
+
// Check for possessive 's pattern FIRST (e.g., "me's value", "#button's textContent")
|
|
979
|
+
// This must come before selector check because "#button's" starts with #
|
|
980
|
+
if (word.includes("'s")) {
|
|
981
|
+
const possessiveResult = translatePossessive(word, sourceLocale, targetLocale);
|
|
982
|
+
|
|
983
|
+
// Check if it's a prepositional possessive that needs reordering
|
|
984
|
+
const prepMatch = possessiveResult.match(/^__POSS__(.+)__(.+)__POSS__$/);
|
|
985
|
+
if (prepMatch && i + 1 < words.length) {
|
|
986
|
+
// Prepositional: "X's Y" → "Y marker X" (e.g., "textContent de #button")
|
|
987
|
+
const marker = prepMatch[1];
|
|
988
|
+
const owner = prepMatch[2];
|
|
989
|
+
const property = words[i + 1];
|
|
990
|
+
const translatedProperty = translateWord(property, sourceLocale, targetLocale);
|
|
991
|
+
translated.push(`${translatedProperty} ${marker} ${owner}`);
|
|
992
|
+
i += 2; // Skip property since we consumed it
|
|
993
|
+
continue;
|
|
994
|
+
} else if (prepMatch) {
|
|
995
|
+
// No property following - just output owner with marker prefix
|
|
996
|
+
const marker = prepMatch[1];
|
|
997
|
+
const owner = prepMatch[2];
|
|
998
|
+
translated.push(`${marker} ${owner}`);
|
|
999
|
+
i++;
|
|
1000
|
+
continue;
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
// Suffix-style possessive (Japanese, Korean, etc.) or pronoun
|
|
1004
|
+
translated.push(possessiveResult);
|
|
1005
|
+
i++;
|
|
1006
|
+
continue;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
// Skip pure CSS selectors and numbers (but NOT possessives which were handled above)
|
|
1010
|
+
if (/^[#.<@]/.test(word) || /^\d+/.test(word)) {
|
|
1011
|
+
translated.push(word);
|
|
1012
|
+
i++;
|
|
1013
|
+
continue;
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// Skip quoted strings
|
|
1017
|
+
if (/^["'].*["']$/.test(word)) {
|
|
1018
|
+
translated.push(word);
|
|
1019
|
+
i++;
|
|
1020
|
+
continue;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
translated.push(translateWord(word, sourceLocale, targetLocale));
|
|
1024
|
+
i++;
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
return translated.join(' ');
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
/**
|
|
1031
|
+
* Translate all elements in a parsed statement
|
|
1032
|
+
*/
|
|
1033
|
+
function translateElements(
|
|
1034
|
+
parsed: ParsedStatement,
|
|
1035
|
+
sourceLocale: string,
|
|
1036
|
+
targetLocale: string
|
|
1037
|
+
): void {
|
|
1038
|
+
for (const [_role, element] of parsed.roles) {
|
|
1039
|
+
// Always process possessive 's syntax, even for selectors
|
|
1040
|
+
// E.g., "#button's textContent" should translate the possessive
|
|
1041
|
+
if (element.value.includes("'s")) {
|
|
1042
|
+
element.translated = translateMultiWordValue(element.value, sourceLocale, targetLocale);
|
|
1043
|
+
} else if (!element.isSelector && !element.isLiteral) {
|
|
1044
|
+
element.translated = translateMultiWordValue(element.value, sourceLocale, targetLocale);
|
|
1045
|
+
} else {
|
|
1046
|
+
element.translated = element.value;
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
// =============================================================================
|
|
1052
|
+
// Main Transformer
|
|
1053
|
+
// =============================================================================
|
|
1054
|
+
|
|
1055
|
+
export class GrammarTransformer {
|
|
1056
|
+
private sourceProfile: LanguageProfile;
|
|
1057
|
+
private targetProfile: LanguageProfile;
|
|
1058
|
+
|
|
1059
|
+
constructor(sourceLocale: string = 'en', targetLocale: string) {
|
|
1060
|
+
const source = getProfile(sourceLocale);
|
|
1061
|
+
const target = getProfile(targetLocale);
|
|
1062
|
+
|
|
1063
|
+
if (!source) throw new Error(`Unknown source locale: ${sourceLocale}`);
|
|
1064
|
+
if (!target) throw new Error(`Unknown target locale: ${targetLocale}`);
|
|
1065
|
+
|
|
1066
|
+
this.sourceProfile = source;
|
|
1067
|
+
this.targetProfile = target;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
/**
|
|
1071
|
+
* Transform a hyperscript statement from source to target language.
|
|
1072
|
+
* Handles compound statements with "then" by splitting, transforming each part,
|
|
1073
|
+
* and rejoining with the target language's "then" keyword.
|
|
1074
|
+
*
|
|
1075
|
+
* For multi-line input, preserves line structure (indentation, blank lines).
|
|
1076
|
+
*/
|
|
1077
|
+
transform(input: string): string {
|
|
1078
|
+
const targetThen = getTargetThenKeyword(this.targetProfile.code);
|
|
1079
|
+
|
|
1080
|
+
// Check if input has multi-line structure worth preserving
|
|
1081
|
+
const hasMultiLineStructure = input.includes('\n');
|
|
1082
|
+
|
|
1083
|
+
if (hasMultiLineStructure) {
|
|
1084
|
+
// Multi-line case - preserve structure (indentation, blank lines)
|
|
1085
|
+
const { parts, lineMetadata, partToLineIndex } = splitCompoundStatementWithMetadata(
|
|
1086
|
+
input,
|
|
1087
|
+
this.sourceProfile.code
|
|
1088
|
+
);
|
|
1089
|
+
|
|
1090
|
+
const transformedParts = parts.map(part => this.transformSingle(part));
|
|
1091
|
+
|
|
1092
|
+
return reconstructWithLineStructure(
|
|
1093
|
+
transformedParts,
|
|
1094
|
+
lineMetadata,
|
|
1095
|
+
partToLineIndex,
|
|
1096
|
+
targetThen
|
|
1097
|
+
);
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// Single-line case - use existing logic
|
|
1101
|
+
const parts = splitCompoundStatement(input, this.sourceProfile.code);
|
|
1102
|
+
|
|
1103
|
+
if (parts.length > 1) {
|
|
1104
|
+
const transformedParts = parts.map(part => this.transformSingle(part));
|
|
1105
|
+
return transformedParts.join(` ${targetThen} `);
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
// Single statement (no "then" splitting needed)
|
|
1109
|
+
return this.transformSingle(input);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
/**
|
|
1113
|
+
* Transform a single hyperscript statement (no compound "then" chains).
|
|
1114
|
+
*/
|
|
1115
|
+
private transformSingle(input: string): string {
|
|
1116
|
+
// 1. Parse into semantic roles
|
|
1117
|
+
const parsed = parseStatement(input, this.sourceProfile.code);
|
|
1118
|
+
if (!parsed) {
|
|
1119
|
+
return input; // Return unchanged if parsing fails
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
// 2. Translate individual words
|
|
1123
|
+
translateElements(parsed, this.sourceProfile.code, this.targetProfile.code);
|
|
1124
|
+
|
|
1125
|
+
// 3. Find applicable rule
|
|
1126
|
+
const rule = this.findRule(parsed);
|
|
1127
|
+
|
|
1128
|
+
// 4. Apply transformation
|
|
1129
|
+
if (rule?.transform.custom) {
|
|
1130
|
+
return rule.transform.custom(parsed, this.targetProfile);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
// 5. Reorder according to target language's canonical order
|
|
1134
|
+
const roleOrder = rule?.transform.roleOrder || this.targetProfile.canonicalOrder;
|
|
1135
|
+
const reordered = reorderRoles(parsed.roles, roleOrder);
|
|
1136
|
+
|
|
1137
|
+
// 6. Insert grammatical markers
|
|
1138
|
+
const shouldInsertMarkers = rule?.transform.insertMarkers ?? true;
|
|
1139
|
+
if (shouldInsertMarkers) {
|
|
1140
|
+
const result = insertMarkers(
|
|
1141
|
+
reordered,
|
|
1142
|
+
this.targetProfile.markers,
|
|
1143
|
+
this.targetProfile.adpositionType
|
|
1144
|
+
);
|
|
1145
|
+
// Use joinTokens for proper suffix/prefix attachment (Turkish -i, Quechua -ta, etc.)
|
|
1146
|
+
return joinTokens(result);
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// 7. Join without markers (still use joinTokens for consistency)
|
|
1150
|
+
return joinTokens(reordered.map(e => e.translated || e.value));
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
/**
|
|
1154
|
+
* Find the best matching rule for this statement
|
|
1155
|
+
*/
|
|
1156
|
+
private findRule(parsed: ParsedStatement): GrammarRule | undefined {
|
|
1157
|
+
if (!this.targetProfile.rules) return undefined;
|
|
1158
|
+
|
|
1159
|
+
const matchingRules = this.targetProfile.rules
|
|
1160
|
+
.filter(rule => this.matchesRule(parsed, rule))
|
|
1161
|
+
.sort((a, b) => b.priority - a.priority);
|
|
1162
|
+
|
|
1163
|
+
return matchingRules[0];
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
/**
|
|
1167
|
+
* Check if a parsed statement matches a rule
|
|
1168
|
+
*/
|
|
1169
|
+
private matchesRule(parsed: ParsedStatement, rule: GrammarRule): boolean {
|
|
1170
|
+
const { match } = rule;
|
|
1171
|
+
|
|
1172
|
+
// Check required roles
|
|
1173
|
+
for (const role of match.requiredRoles) {
|
|
1174
|
+
if (!parsed.roles.has(role)) {
|
|
1175
|
+
return false;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
// Check command match if specified
|
|
1180
|
+
if (match.commands && match.commands.length > 0) {
|
|
1181
|
+
const action = parsed.roles.get('action');
|
|
1182
|
+
if (!action) return false;
|
|
1183
|
+
|
|
1184
|
+
const actionValue = action.value.toLowerCase();
|
|
1185
|
+
if (!match.commands.some(cmd => cmd.toLowerCase() === actionValue)) {
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
// Check custom predicate
|
|
1191
|
+
if (match.predicate && !match.predicate(parsed)) {
|
|
1192
|
+
return false;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
return true;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// =============================================================================
|
|
1200
|
+
// Convenience Functions
|
|
1201
|
+
// =============================================================================
|
|
1202
|
+
|
|
1203
|
+
/**
|
|
1204
|
+
* Transform hyperscript from English to target language
|
|
1205
|
+
*/
|
|
1206
|
+
export function toLocale(input: string, targetLocale: string): string {
|
|
1207
|
+
const transformer = new GrammarTransformer('en', targetLocale);
|
|
1208
|
+
return transformer.transform(input);
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
/**
|
|
1212
|
+
* Transform hyperscript from source language to English
|
|
1213
|
+
*/
|
|
1214
|
+
export function toEnglish(input: string, sourceLocale: string): string {
|
|
1215
|
+
const transformer = new GrammarTransformer(sourceLocale, 'en');
|
|
1216
|
+
return transformer.transform(input);
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
/**
|
|
1220
|
+
* Transform between any two languages.
|
|
1221
|
+
*
|
|
1222
|
+
* Uses direct translation for supported language pairs (ja↔zh, es↔pt, ko↔ja),
|
|
1223
|
+
* falling back to English pivot for other pairs.
|
|
1224
|
+
*/
|
|
1225
|
+
export function translate(input: string, sourceLocale: string, targetLocale: string): string {
|
|
1226
|
+
if (sourceLocale === targetLocale) return input;
|
|
1227
|
+
if (sourceLocale === 'en') return toLocale(input, targetLocale);
|
|
1228
|
+
if (targetLocale === 'en') return toEnglish(input, sourceLocale);
|
|
1229
|
+
|
|
1230
|
+
// Try direct translation for supported pairs
|
|
1231
|
+
if (hasDirectMapping(sourceLocale, targetLocale)) {
|
|
1232
|
+
return translateDirect(input, sourceLocale, targetLocale);
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
// Fallback: Via English pivot
|
|
1236
|
+
const english = toEnglish(input, sourceLocale);
|
|
1237
|
+
return toLocale(english, targetLocale);
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
/**
|
|
1241
|
+
* Direct translation between language pairs without English pivot.
|
|
1242
|
+
* More accurate for closely related languages (ja↔zh, es↔pt).
|
|
1243
|
+
*/
|
|
1244
|
+
function translateDirect(input: string, sourceLocale: string, targetLocale: string): string {
|
|
1245
|
+
const mapping = getDirectMapping(sourceLocale, targetLocale);
|
|
1246
|
+
if (!mapping) {
|
|
1247
|
+
// Fallback to pivot translation
|
|
1248
|
+
return toLocale(toEnglish(input, sourceLocale), targetLocale);
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
// Tokenize input
|
|
1252
|
+
const tokens = input.split(/\s+/);
|
|
1253
|
+
|
|
1254
|
+
// Translate each token using direct mapping
|
|
1255
|
+
const translated = tokens.map(token => {
|
|
1256
|
+
// Preserve CSS selectors and literals
|
|
1257
|
+
if (token.startsWith('#') || token.startsWith('.') || token.startsWith('@')) {
|
|
1258
|
+
return token;
|
|
1259
|
+
}
|
|
1260
|
+
if (token.startsWith('"') || token.startsWith("'")) {
|
|
1261
|
+
return token;
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
// Look up in direct mapping
|
|
1265
|
+
const directTranslation = mapping.words[token];
|
|
1266
|
+
if (directTranslation) {
|
|
1267
|
+
return directTranslation;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
// Check for suffix-attached tokens (e.g., "#count-ta" in Quechua)
|
|
1271
|
+
const suffixMatch = token.match(/^(.+?)(-.+)$/);
|
|
1272
|
+
if (suffixMatch) {
|
|
1273
|
+
const [, base, suffix] = suffixMatch;
|
|
1274
|
+
const translatedBase = mapping.words[base] || base;
|
|
1275
|
+
return translatedBase + suffix;
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
// Return unchanged if no mapping found
|
|
1279
|
+
return token;
|
|
1280
|
+
});
|
|
1281
|
+
|
|
1282
|
+
return translated.join(' ');
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// =============================================================================
|
|
1286
|
+
// Examples (for testing)
|
|
1287
|
+
// =============================================================================
|
|
1288
|
+
|
|
1289
|
+
export const examples = {
|
|
1290
|
+
english: {
|
|
1291
|
+
eventHandler: 'on click increment #count',
|
|
1292
|
+
putInto: 'put my value into #output',
|
|
1293
|
+
toggle: 'toggle .active',
|
|
1294
|
+
wait: 'wait 2 seconds',
|
|
1295
|
+
},
|
|
1296
|
+
|
|
1297
|
+
// Expected outputs (approximate, for reference)
|
|
1298
|
+
japanese: {
|
|
1299
|
+
eventHandler: '#count を クリック で 増加',
|
|
1300
|
+
putInto: '私の 値 を #output に 置く',
|
|
1301
|
+
toggle: '.active を 切り替え',
|
|
1302
|
+
wait: '2秒 待つ',
|
|
1303
|
+
},
|
|
1304
|
+
|
|
1305
|
+
chinese: {
|
|
1306
|
+
eventHandler: '当 点击 时 增加 #count',
|
|
1307
|
+
putInto: '把 我的值 放 到 #output',
|
|
1308
|
+
toggle: '切换 .active',
|
|
1309
|
+
wait: '等待 2秒',
|
|
1310
|
+
},
|
|
1311
|
+
|
|
1312
|
+
arabic: {
|
|
1313
|
+
eventHandler: 'زِد #count عند النقر',
|
|
1314
|
+
putInto: 'ضع قيمتي في #output',
|
|
1315
|
+
toggle: 'بدّل .active',
|
|
1316
|
+
wait: 'انتظر ثانيتين',
|
|
1317
|
+
},
|
|
1318
|
+
};
|