@lokascript/i18n 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +286 -0
  2. package/dist/browser.cjs +7669 -0
  3. package/dist/browser.cjs.map +1 -0
  4. package/dist/browser.d.cts +50 -0
  5. package/dist/browser.d.ts +50 -0
  6. package/dist/browser.js +7592 -0
  7. package/dist/browser.js.map +1 -0
  8. package/dist/hyperfixi-i18n.min.js +2 -0
  9. package/dist/hyperfixi-i18n.min.js.map +1 -0
  10. package/dist/hyperfixi-i18n.mjs +8558 -0
  11. package/dist/hyperfixi-i18n.mjs.map +1 -0
  12. package/dist/index.cjs +14205 -0
  13. package/dist/index.cjs.map +1 -0
  14. package/dist/index.d.cts +947 -0
  15. package/dist/index.d.ts +947 -0
  16. package/dist/index.js +14095 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/transformer-Ckask-yw.d.cts +1041 -0
  19. package/dist/transformer-Ckask-yw.d.ts +1041 -0
  20. package/package.json +84 -0
  21. package/src/browser.ts +122 -0
  22. package/src/compatibility/browser-tests/grammar-demo.spec.ts +169 -0
  23. package/src/constants.ts +366 -0
  24. package/src/dictionaries/ar.ts +233 -0
  25. package/src/dictionaries/bn.ts +156 -0
  26. package/src/dictionaries/de.ts +233 -0
  27. package/src/dictionaries/derive.ts +515 -0
  28. package/src/dictionaries/en.ts +237 -0
  29. package/src/dictionaries/es.ts +233 -0
  30. package/src/dictionaries/fr.ts +233 -0
  31. package/src/dictionaries/hi.ts +270 -0
  32. package/src/dictionaries/id.ts +233 -0
  33. package/src/dictionaries/index.ts +238 -0
  34. package/src/dictionaries/it.ts +233 -0
  35. package/src/dictionaries/ja.ts +233 -0
  36. package/src/dictionaries/ko.ts +233 -0
  37. package/src/dictionaries/ms.ts +276 -0
  38. package/src/dictionaries/pl.ts +239 -0
  39. package/src/dictionaries/pt.ts +237 -0
  40. package/src/dictionaries/qu.ts +233 -0
  41. package/src/dictionaries/ru.ts +270 -0
  42. package/src/dictionaries/sw.ts +233 -0
  43. package/src/dictionaries/th.ts +156 -0
  44. package/src/dictionaries/tl.ts +276 -0
  45. package/src/dictionaries/tr.ts +233 -0
  46. package/src/dictionaries/uk.ts +270 -0
  47. package/src/dictionaries/vi.ts +210 -0
  48. package/src/dictionaries/zh.ts +233 -0
  49. package/src/enhanced-i18n.test.ts +454 -0
  50. package/src/enhanced-i18n.ts +713 -0
  51. package/src/examples/new-languages.ts +326 -0
  52. package/src/formatting.test.ts +213 -0
  53. package/src/formatting.ts +416 -0
  54. package/src/grammar/direct-mappings.ts +353 -0
  55. package/src/grammar/grammar.test.ts +1053 -0
  56. package/src/grammar/index.ts +59 -0
  57. package/src/grammar/profiles/index.ts +860 -0
  58. package/src/grammar/transformer.ts +1318 -0
  59. package/src/grammar/types.ts +630 -0
  60. package/src/index.ts +202 -0
  61. package/src/new-languages.test.ts +389 -0
  62. package/src/parser/analyze-conflicts.test.ts +229 -0
  63. package/src/parser/ar.ts +40 -0
  64. package/src/parser/create-provider.ts +309 -0
  65. package/src/parser/de.ts +36 -0
  66. package/src/parser/es.ts +31 -0
  67. package/src/parser/fr.ts +31 -0
  68. package/src/parser/id.ts +34 -0
  69. package/src/parser/index.ts +50 -0
  70. package/src/parser/ja.ts +36 -0
  71. package/src/parser/ko.ts +37 -0
  72. package/src/parser/locale-manager.test.ts +198 -0
  73. package/src/parser/locale-manager.ts +197 -0
  74. package/src/parser/parser-integration.test.ts +439 -0
  75. package/src/parser/pt.ts +37 -0
  76. package/src/parser/qu.ts +37 -0
  77. package/src/parser/sw.ts +37 -0
  78. package/src/parser/tr.ts +38 -0
  79. package/src/parser/types.ts +113 -0
  80. package/src/parser/zh.ts +38 -0
  81. package/src/plugins/vite.ts +224 -0
  82. package/src/plugins/webpack.ts +124 -0
  83. package/src/pluralization.test.ts +197 -0
  84. package/src/pluralization.ts +393 -0
  85. package/src/runtime.ts +441 -0
  86. package/src/ssr-integration.ts +225 -0
  87. package/src/test-setup.ts +195 -0
  88. package/src/translation-validation.test.ts +171 -0
  89. package/src/translator.test.ts +252 -0
  90. package/src/translator.ts +297 -0
  91. package/src/types.ts +209 -0
  92. package/src/utils/locale.ts +190 -0
  93. package/src/utils/tokenizer-adapter.ts +469 -0
  94. package/src/utils/tokenizer.ts +19 -0
  95. package/src/validators/index.ts +174 -0
  96. package/src/validators/schema.ts +129 -0
@@ -0,0 +1,630 @@
1
+ /**
2
+ * Generalized Grammar System for Multilingual Hyperscript
3
+ *
4
+ * This system abstracts grammatical patterns across language families,
5
+ * enabling deep multilingual support without per-language hardcoding.
6
+ *
7
+ * Key Linguistic Concepts:
8
+ * - Word Order: SVO, SOV, VSO (and variations)
9
+ * - Adposition Type: Preposition (English) vs Postposition (Japanese/Korean)
10
+ * - Morphology: Isolating (Chinese) vs Agglutinative (Turkish) vs Fusional (Arabic)
11
+ * - Text Direction: LTR vs RTL
12
+ */
13
+
14
+ // =============================================================================
15
+ // Core Types
16
+ // =============================================================================
17
+
18
+ /**
19
+ * Semantic roles in hyperscript commands.
20
+ * These are universal across all 13 supported languages - only the surface form changes.
21
+ *
22
+ * ## Core Thematic Roles (from linguistic theory)
23
+ * | Role | Usage | Purpose | Example |
24
+ * |-------------|-------|-----------------------------|---------------------------|
25
+ * | action | 100% | Command verb | toggle, put, fetch |
26
+ * | patient | 90% | What is acted upon | .active, #count |
27
+ * | destination | 40% | Where something goes | into #output, to .class |
28
+ * | source | 13% | Where something comes from | from #input, from URL |
29
+ * | event | 106% | Trigger events | click, keydown, submit |
30
+ * | condition | 8% | Boolean expressions | if x > 5, when visible |
31
+ * | agent | 0% | Who performs action | Reserved for future use |
32
+ * | goal | 1% | Target value/state | to 'red' (in transition) |
33
+ *
34
+ * ## Quantitative Roles (answer "how much/long")
35
+ * | Role | Usage | Purpose | Example |
36
+ * |----------|-------|----------------|----------------------|
37
+ * | quantity | 7% | Numeric amount | by 5, 3 times |
38
+ * | duration | 1% | Time span | for 5 seconds, 500ms |
39
+ *
40
+ * ## Adverbial/Modifier Roles (answer "how/by what means")
41
+ * | Role | Usage | Purpose | Example |
42
+ * |--------------|-------|---------------------------|-------------------|
43
+ * | style | 2% | Animation/behavior | with fade |
44
+ * | manner | 2% | Insertion position | before, after |
45
+ * | method | 1% | HTTP method/technique | via POST, as GET |
46
+ * | responseType | 1% | Response format | as json, as html |
47
+ *
48
+ * ## Control Flow Roles
49
+ * | Role | Usage | Purpose | Example |
50
+ * |----------|-------|--------------|-----------------------|
51
+ * | loopType | 6% | Loop variant | forever, until, times |
52
+ *
53
+ * ## Design Notes
54
+ * - Low-usage roles (agent, goal, method, responseType) are intentionally kept for:
55
+ * - Linguistic completeness across all 13 languages
56
+ * - Future extensibility (AI agents, server-side execution)
57
+ * - Command-specific semantics (fetch, transition)
58
+ * - Each role has distinct grammatical markers per language (see profiles/index.ts)
59
+ * - Usage percentages based on pattern database analysis
60
+ */
61
+ export type SemanticRole =
62
+ // Core thematic roles
63
+ | 'action' // The command/verb (increment, put, toggle)
64
+ | 'agent' // Who/what performs action (reserved for future: AI agents, server-side)
65
+ | 'patient' // What is acted upon (the counter, .active)
66
+ | 'source' // Origin (from #input, from URL)
67
+ | 'destination' // Target location (into #output, to .class)
68
+ | 'goal' // Target value/state (to 'red', to 100)
69
+ | 'event' // Trigger (click, input, keydown)
70
+ | 'condition' // Boolean expression (if x > 5)
71
+ // Quantitative roles
72
+ | 'quantity' // Numeric amount (by 5, 3 times)
73
+ | 'duration' // Time span (for 5 seconds, over 500ms)
74
+ // Adverbial roles
75
+ | 'responseType' // Response format (as json, as text, as html)
76
+ | 'method' // HTTP method/technique (via POST, using GET)
77
+ | 'style' // Visual/behavioral manner (with fade, smoothly)
78
+ | 'manner' // Insertion position (before, after)
79
+ // Control flow roles
80
+ | 'loopType' // Loop variant: forever, times, for, while, until, until-event
81
+ // Structural roles (for parser control)
82
+ | 'continues'; // Continuation marker (then-chains)
83
+
84
+ /**
85
+ * Word order patterns
86
+ * These represent the major typological categories
87
+ */
88
+ export type WordOrder = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OVS' | 'OSV' | 'free';
89
+
90
+ /**
91
+ * Where grammatical markers appear relative to their noun/verb
92
+ */
93
+ export type AdpositionType = 'preposition' | 'postposition' | 'circumposition' | 'none';
94
+
95
+ /**
96
+ * Morphological typology - how words are constructed
97
+ */
98
+ export type MorphologyType =
99
+ | 'isolating' // Chinese - no inflection, word order matters
100
+ | 'agglutinative' // Turkish, Japanese - morphemes stack predictably
101
+ | 'fusional' // Arabic, Spanish - morphemes blend together
102
+ | 'polysynthetic'; // Quechua - complex words encode full sentences
103
+
104
+ /**
105
+ * A grammatical marker (particle, case ending, preposition)
106
+ */
107
+ export interface GrammaticalMarker {
108
+ form: string; // The actual text (を, に, to, 的)
109
+ role: SemanticRole; // What semantic role it marks
110
+ position: AdpositionType; // Where it appears
111
+ required: boolean; // Is it mandatory?
112
+ alternatives?: string[]; // Alternative forms (e.g., 을/를 in Korean)
113
+ }
114
+
115
+ /**
116
+ * Metadata for preserving line structure during translation.
117
+ * Tracks indentation and blank lines so output maintains the same format.
118
+ */
119
+ export interface LineMetadata {
120
+ /** Trimmed line content (empty string for blank lines) */
121
+ content: string;
122
+ /** Leading whitespace (tabs/spaces) from original line */
123
+ originalIndent: string;
124
+ /** True if the line was empty or whitespace-only */
125
+ isBlank: boolean;
126
+ }
127
+
128
+ // =============================================================================
129
+ // Language Profile
130
+ // =============================================================================
131
+
132
+ /**
133
+ * Complete grammatical profile for a language
134
+ * This captures the essential typological features needed for transformation
135
+ */
136
+ export interface LanguageProfile {
137
+ code: string; // ISO 639-1 code
138
+ name: string; // Native name
139
+
140
+ // Typological features
141
+ wordOrder: WordOrder;
142
+ adpositionType: AdpositionType;
143
+ morphology: MorphologyType;
144
+ direction: 'ltr' | 'rtl';
145
+
146
+ // Grammatical markers for each semantic role
147
+ markers: GrammaticalMarker[];
148
+
149
+ // Role ordering - which semantic roles come in what order
150
+ // E.g., Japanese: ['patient', 'source', 'destination', 'action']
151
+ // E.g., English: ['action', 'patient', 'source', 'destination']
152
+ canonicalOrder: SemanticRole[];
153
+
154
+ // Special rules
155
+ rules?: GrammarRule[];
156
+ }
157
+
158
+ /**
159
+ * Pattern for transforming hyperscript structures
160
+ */
161
+ export interface GrammarRule {
162
+ name: string;
163
+ description: string;
164
+
165
+ // Pattern matching (in canonical English form)
166
+ match: PatternMatcher;
167
+
168
+ // How to transform for this language
169
+ transform: PatternTransform;
170
+
171
+ // Priority (higher = checked first)
172
+ priority: number;
173
+ }
174
+
175
+ /**
176
+ * Matches a hyperscript pattern
177
+ */
178
+ export interface PatternMatcher {
179
+ // Command type(s) this matches
180
+ commands?: string[];
181
+
182
+ // Required semantic roles
183
+ requiredRoles: SemanticRole[];
184
+
185
+ // Optional roles
186
+ optionalRoles?: SemanticRole[];
187
+
188
+ // Custom predicate for complex matching
189
+ predicate?: (parsed: ParsedStatement) => boolean;
190
+ }
191
+
192
+ /**
193
+ * Defines how to transform a matched pattern
194
+ */
195
+ export interface PatternTransform {
196
+ // Reorder roles for target language
197
+ roleOrder: SemanticRole[];
198
+
199
+ // Insert markers between roles
200
+ insertMarkers?: boolean;
201
+
202
+ // Custom transformation function
203
+ custom?: (parsed: ParsedStatement, profile: LanguageProfile) => string;
204
+ }
205
+
206
+ // =============================================================================
207
+ // Parsed Structures
208
+ // =============================================================================
209
+
210
+ /**
211
+ * A parsed hyperscript statement broken into semantic components
212
+ */
213
+ export interface ParsedStatement {
214
+ type: 'event-handler' | 'command' | 'conditional' | 'loop';
215
+ roles: Map<SemanticRole, ParsedElement>;
216
+ original: string;
217
+ }
218
+
219
+ /**
220
+ * A single element with its semantic role
221
+ */
222
+ export interface ParsedElement {
223
+ role: SemanticRole;
224
+ value: string; // Original English value
225
+ translated?: string; // Translated value
226
+ isSelector?: boolean; // CSS selector (don't translate)
227
+ isLiteral?: boolean; // Literal value (don't translate)
228
+ }
229
+
230
+ // =============================================================================
231
+ // Universal Pattern Templates
232
+ // =============================================================================
233
+
234
+ /**
235
+ * Universal templates for common hyperscript patterns
236
+ * These define the semantic structure independent of surface form
237
+ */
238
+ export const UNIVERSAL_PATTERNS = {
239
+ // on click increment #count
240
+ eventIncrement: {
241
+ name: 'event-increment',
242
+ roles: ['event', 'action', 'patient'] as SemanticRole[],
243
+ english: 'on {event} {action} {patient}',
244
+ },
245
+
246
+ // put X into Y
247
+ putInto: {
248
+ name: 'put-into',
249
+ roles: ['action', 'patient', 'destination'] as SemanticRole[],
250
+ english: '{action} {patient} into {destination}',
251
+ },
252
+
253
+ // add .class to element
254
+ addTo: {
255
+ name: 'add-to',
256
+ roles: ['action', 'patient', 'destination'] as SemanticRole[],
257
+ english: '{action} {patient} to {destination}',
258
+ },
259
+
260
+ // toggle .class on element
261
+ toggleOn: {
262
+ name: 'toggle-on',
263
+ roles: ['action', 'patient', 'destination'] as SemanticRole[],
264
+ english: '{action} {patient} on {destination}',
265
+ },
266
+
267
+ // wait 2 seconds
268
+ waitDuration: {
269
+ name: 'wait-duration',
270
+ roles: ['action', 'quantity'] as SemanticRole[],
271
+ english: '{action} {quantity}',
272
+ },
273
+
274
+ // if condition then ... end
275
+ conditional: {
276
+ name: 'conditional',
277
+ roles: ['action', 'condition'] as SemanticRole[],
278
+ english: '{action} {condition} then ... end',
279
+ },
280
+
281
+ // fetch URL as type
282
+ fetchAs: {
283
+ name: 'fetch-as',
284
+ roles: ['action', 'source', 'method'] as SemanticRole[],
285
+ english: '{action} {source} as {method}',
286
+ },
287
+
288
+ // show element with animation
289
+ showWith: {
290
+ name: 'show-with',
291
+ roles: ['action', 'patient', 'style'] as SemanticRole[],
292
+ english: '{action} {patient} with {style}',
293
+ },
294
+
295
+ // transition property over duration
296
+ transitionOver: {
297
+ name: 'transition-over',
298
+ roles: ['action', 'patient', 'duration'] as SemanticRole[],
299
+ english: '{action} {patient} over {duration}',
300
+ },
301
+ } as const;
302
+
303
+ // =============================================================================
304
+ // Language Family Defaults
305
+ // =============================================================================
306
+
307
+ /**
308
+ * Default profiles for major language families
309
+ * Individual languages inherit and override these
310
+ */
311
+ export const LANGUAGE_FAMILY_DEFAULTS: Record<string, Partial<LanguageProfile>> = {
312
+ // Germanic (English, German, Dutch)
313
+ germanic: {
314
+ wordOrder: 'SVO',
315
+ adpositionType: 'preposition',
316
+ morphology: 'fusional',
317
+ direction: 'ltr',
318
+ canonicalOrder: [
319
+ 'action',
320
+ 'patient',
321
+ 'source',
322
+ 'destination',
323
+ 'quantity',
324
+ 'duration',
325
+ 'method',
326
+ 'style',
327
+ ],
328
+ },
329
+
330
+ // Romance (Spanish, French, Italian, Portuguese)
331
+ romance: {
332
+ wordOrder: 'SVO',
333
+ adpositionType: 'preposition',
334
+ morphology: 'fusional',
335
+ direction: 'ltr',
336
+ canonicalOrder: [
337
+ 'action',
338
+ 'patient',
339
+ 'source',
340
+ 'destination',
341
+ 'quantity',
342
+ 'duration',
343
+ 'method',
344
+ 'style',
345
+ ],
346
+ },
347
+
348
+ // Japonic (Japanese)
349
+ japonic: {
350
+ wordOrder: 'SOV',
351
+ adpositionType: 'postposition',
352
+ morphology: 'agglutinative',
353
+ direction: 'ltr',
354
+ canonicalOrder: [
355
+ 'patient',
356
+ 'source',
357
+ 'destination',
358
+ 'quantity',
359
+ 'duration',
360
+ 'method',
361
+ 'style',
362
+ 'action',
363
+ ],
364
+ },
365
+
366
+ // Koreanic (Korean)
367
+ koreanic: {
368
+ wordOrder: 'SOV',
369
+ adpositionType: 'postposition',
370
+ morphology: 'agglutinative',
371
+ direction: 'ltr',
372
+ canonicalOrder: [
373
+ 'patient',
374
+ 'source',
375
+ 'destination',
376
+ 'quantity',
377
+ 'duration',
378
+ 'method',
379
+ 'style',
380
+ 'action',
381
+ ],
382
+ },
383
+
384
+ // Turkic (Turkish, Azerbaijani)
385
+ turkic: {
386
+ wordOrder: 'SOV',
387
+ adpositionType: 'postposition',
388
+ morphology: 'agglutinative',
389
+ direction: 'ltr',
390
+ canonicalOrder: [
391
+ 'patient',
392
+ 'source',
393
+ 'destination',
394
+ 'quantity',
395
+ 'duration',
396
+ 'method',
397
+ 'style',
398
+ 'action',
399
+ ],
400
+ },
401
+
402
+ // Sinitic (Chinese, Cantonese)
403
+ sinitic: {
404
+ wordOrder: 'SVO', // Topic-prominent, flexible
405
+ adpositionType: 'preposition',
406
+ morphology: 'isolating',
407
+ direction: 'ltr',
408
+ canonicalOrder: [
409
+ 'action',
410
+ 'patient',
411
+ 'source',
412
+ 'destination',
413
+ 'quantity',
414
+ 'duration',
415
+ 'method',
416
+ 'style',
417
+ ],
418
+ },
419
+
420
+ // Semitic (Arabic, Hebrew)
421
+ semitic: {
422
+ wordOrder: 'VSO',
423
+ adpositionType: 'preposition',
424
+ morphology: 'fusional', // Root-pattern system
425
+ direction: 'rtl',
426
+ canonicalOrder: [
427
+ 'action',
428
+ 'agent',
429
+ 'patient',
430
+ 'destination',
431
+ 'source',
432
+ 'quantity',
433
+ 'duration',
434
+ 'method',
435
+ 'style',
436
+ ],
437
+ },
438
+
439
+ // Austronesian (Indonesian, Tagalog)
440
+ austronesian: {
441
+ wordOrder: 'SVO',
442
+ adpositionType: 'preposition',
443
+ morphology: 'agglutinative',
444
+ direction: 'ltr',
445
+ canonicalOrder: [
446
+ 'action',
447
+ 'patient',
448
+ 'source',
449
+ 'destination',
450
+ 'quantity',
451
+ 'duration',
452
+ 'method',
453
+ 'style',
454
+ ],
455
+ },
456
+
457
+ // Quechuan (Quechua)
458
+ quechuan: {
459
+ wordOrder: 'SOV',
460
+ adpositionType: 'postposition',
461
+ morphology: 'agglutinative', // Actually polysynthetic but simplified
462
+ direction: 'ltr',
463
+ canonicalOrder: [
464
+ 'patient',
465
+ 'source',
466
+ 'destination',
467
+ 'quantity',
468
+ 'duration',
469
+ 'method',
470
+ 'style',
471
+ 'action',
472
+ ],
473
+ },
474
+
475
+ // Bantu (Swahili)
476
+ bantu: {
477
+ wordOrder: 'SVO',
478
+ adpositionType: 'preposition',
479
+ morphology: 'agglutinative',
480
+ direction: 'ltr',
481
+ canonicalOrder: [
482
+ 'action',
483
+ 'patient',
484
+ 'source',
485
+ 'destination',
486
+ 'quantity',
487
+ 'duration',
488
+ 'method',
489
+ 'style',
490
+ ],
491
+ },
492
+ };
493
+
494
+ // =============================================================================
495
+ // Transformation Utilities
496
+ // =============================================================================
497
+
498
+ /**
499
+ * Reorder semantic roles according to target language.
500
+ * Includes a safety net to append any roles present in input
501
+ * but missing from the target order, preventing data loss.
502
+ */
503
+ export function reorderRoles(
504
+ roles: Map<SemanticRole, ParsedElement>,
505
+ targetOrder: SemanticRole[]
506
+ ): ParsedElement[] {
507
+ const result: ParsedElement[] = [];
508
+ const usedRoles = new Set<SemanticRole>();
509
+
510
+ // 1. Add roles that are explicitly in the canonical order
511
+ for (const role of targetOrder) {
512
+ const element = roles.get(role);
513
+ if (element) {
514
+ result.push(element);
515
+ usedRoles.add(role);
516
+ }
517
+ }
518
+
519
+ // 2. Safety Net: Append any roles present in input but missing from target order
520
+ // This prevents data loss (e.g., if 'manner' or 'instrument' isn't in the profile)
521
+ for (const [role, element] of roles) {
522
+ if (!usedRoles.has(role)) {
523
+ result.push(element);
524
+ }
525
+ }
526
+
527
+ return result;
528
+ }
529
+
530
+ /**
531
+ * Insert grammatical markers between elements
532
+ */
533
+ export function insertMarkers(
534
+ elements: ParsedElement[],
535
+ markers: GrammaticalMarker[],
536
+ adpositionType: AdpositionType
537
+ ): string[] {
538
+ const result: string[] = [];
539
+
540
+ for (const element of elements) {
541
+ const marker = markers.find(m => m.role === element.role);
542
+
543
+ if (marker) {
544
+ if (adpositionType === 'preposition') {
545
+ // Marker before element: "to element"
546
+ if (marker.form) result.push(marker.form);
547
+ result.push(element.translated || element.value);
548
+ } else if (adpositionType === 'postposition') {
549
+ // Marker after element: "element を"
550
+ result.push(element.translated || element.value);
551
+ if (marker.form) result.push(marker.form);
552
+ } else {
553
+ result.push(element.translated || element.value);
554
+ }
555
+ } else {
556
+ result.push(element.translated || element.value);
557
+ }
558
+ }
559
+
560
+ return result;
561
+ }
562
+
563
+ /**
564
+ * Intelligently joins tokens, handling agglutinative suffixes and prefixes.
565
+ *
566
+ * Rules:
567
+ * 1. If a token ends with '-' (prefix marker), no space after it
568
+ * 2. If a token starts with '-' (suffix marker), no space before it
569
+ * 3. Removes the hyphen indicators from the final output
570
+ *
571
+ * Examples:
572
+ * - ['#count', '-ta'] → '#countta' (Quechua accusative suffix)
573
+ * - ['بـ-', 'الماوس'] → 'بـالماوس' (Arabic prefix attachment)
574
+ * - ['value', 'を'] → 'value を' (Japanese particle, normal spacing)
575
+ */
576
+ export function joinTokens(tokens: string[]): string {
577
+ if (tokens.length === 0) return '';
578
+
579
+ let result = '';
580
+
581
+ for (let i = 0; i < tokens.length; i++) {
582
+ const token = tokens[i];
583
+ const nextToken = tokens[i + 1];
584
+
585
+ // Check if current token is a prefix (ends with -)
586
+ const isPrefix = token.endsWith('-');
587
+ // Check if current token is a suffix (starts with -)
588
+ const isSuffix = token.startsWith('-');
589
+
590
+ // Get the display form (strip hyphen markers)
591
+ let displayToken = token;
592
+ if (isPrefix) displayToken = token.slice(0, -1);
593
+ if (isSuffix) displayToken = token.substring(1);
594
+
595
+ result += displayToken;
596
+
597
+ // Determine if we need a space before the next token
598
+ if (nextToken) {
599
+ const nextIsSuffix = nextToken.startsWith('-');
600
+
601
+ // Don't add space if:
602
+ // - Current token is a prefix (ends with -)
603
+ // - Next token is a suffix (starts with -)
604
+ if (!isPrefix && !nextIsSuffix) {
605
+ result += ' ';
606
+ }
607
+ }
608
+ }
609
+
610
+ return result;
611
+ }
612
+
613
+ /**
614
+ * Transform a parsed statement to target language
615
+ */
616
+ export function transformStatement(
617
+ parsed: ParsedStatement,
618
+ _sourceProfile: LanguageProfile,
619
+ targetProfile: LanguageProfile
620
+ ): string {
621
+ // 1. Reorder roles for target language
622
+ const reordered = reorderRoles(parsed.roles, targetProfile.canonicalOrder);
623
+
624
+ // 2. Insert grammatical markers
625
+ const withMarkers = insertMarkers(reordered, targetProfile.markers, targetProfile.adpositionType);
626
+
627
+ // 3. Join with intelligent spacing for agglutinative languages
628
+ // (handles suffixes like -ta, prefixes like بـ-, etc.)
629
+ return joinTokens(withMarkers);
630
+ }