@lokascript/semantic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/browser-ar.ar.global.js +2 -2
  2. package/dist/browser-core.core.global.js +2 -2
  3. package/dist/browser-de.de.global.js +2 -2
  4. package/dist/browser-east-asian.east-asian.global.js +2 -2
  5. package/dist/browser-en-tr.en-tr.global.js +2 -2
  6. package/dist/browser-en.en.global.js +2 -2
  7. package/dist/browser-es-en.es-en.global.js +2 -2
  8. package/dist/browser-es.es.global.js +2 -2
  9. package/dist/browser-fr.fr.global.js +2 -2
  10. package/dist/browser-id.id.global.js +2 -2
  11. package/dist/browser-ja.ja.global.js +2 -2
  12. package/dist/browser-ko.ko.global.js +2 -2
  13. package/dist/browser-lazy.lazy.global.js +2 -2
  14. package/dist/browser-priority.priority.global.js +2 -2
  15. package/dist/browser-pt.pt.global.js +2 -2
  16. package/dist/browser-qu.qu.global.js +2 -2
  17. package/dist/browser-sw.sw.global.js +2 -2
  18. package/dist/browser-tr.tr.global.js +2 -2
  19. package/dist/browser-western.western.global.js +2 -2
  20. package/dist/browser-zh.zh.global.js +2 -2
  21. package/dist/browser.global.js +2 -2
  22. package/dist/browser.global.js.map +1 -1
  23. package/dist/index.cjs +13042 -17462
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +49 -5
  26. package/dist/index.d.ts +49 -5
  27. package/dist/index.js +14044 -18464
  28. package/dist/index.js.map +1 -1
  29. package/dist/languages/ar.d.ts +1 -1
  30. package/dist/languages/ar.js +31 -44
  31. package/dist/languages/ar.js.map +1 -1
  32. package/dist/languages/de.d.ts +1 -1
  33. package/dist/languages/de.js +14 -2
  34. package/dist/languages/de.js.map +1 -1
  35. package/dist/languages/en.d.ts +1 -1
  36. package/dist/languages/en.js +558 -12
  37. package/dist/languages/en.js.map +1 -1
  38. package/dist/languages/es.d.ts +1 -1
  39. package/dist/languages/es.js +16 -0
  40. package/dist/languages/es.js.map +1 -1
  41. package/dist/languages/fr.d.ts +1 -1
  42. package/dist/languages/fr.js +14 -2
  43. package/dist/languages/fr.js.map +1 -1
  44. package/dist/languages/id.d.ts +1 -1
  45. package/dist/languages/id.js +14 -2
  46. package/dist/languages/id.js.map +1 -1
  47. package/dist/languages/ja.d.ts +1 -1
  48. package/dist/languages/ja.js +18 -3
  49. package/dist/languages/ja.js.map +1 -1
  50. package/dist/languages/ko.d.ts +8 -1
  51. package/dist/languages/ko.js +75 -43
  52. package/dist/languages/ko.js.map +1 -1
  53. package/dist/languages/pt.d.ts +1 -1
  54. package/dist/languages/pt.js +17 -0
  55. package/dist/languages/pt.js.map +1 -1
  56. package/dist/languages/qu.d.ts +12 -1
  57. package/dist/languages/qu.js +77 -2
  58. package/dist/languages/qu.js.map +1 -1
  59. package/dist/languages/sw.d.ts +1 -1
  60. package/dist/languages/sw.js.map +1 -1
  61. package/dist/languages/tr.d.ts +9 -1
  62. package/dist/languages/tr.js +96 -72
  63. package/dist/languages/tr.js.map +1 -1
  64. package/dist/languages/zh.d.ts +1 -1
  65. package/dist/languages/zh.js +16 -0
  66. package/dist/languages/zh.js.map +1 -1
  67. package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
  68. package/package.json +20 -29
  69. package/src/generators/command-schemas.ts +21 -10
  70. package/src/generators/event-handler-generator.ts +50 -44
  71. package/src/generators/language-profiles.ts +6 -0
  72. package/src/generators/pattern-generator.ts +883 -1
  73. package/src/generators/profiles/arabic.ts +19 -3
  74. package/src/generators/profiles/bengali.ts +12 -1
  75. package/src/generators/profiles/chinese.ts +15 -0
  76. package/src/generators/profiles/french.ts +12 -1
  77. package/src/generators/profiles/german.ts +12 -1
  78. package/src/generators/profiles/hebrew.ts +148 -0
  79. package/src/generators/profiles/hindi.ts +12 -1
  80. package/src/generators/profiles/index.ts +2 -0
  81. package/src/generators/profiles/indonesian.ts +12 -1
  82. package/src/generators/profiles/italian.ts +16 -0
  83. package/src/generators/profiles/japanese.ts +11 -2
  84. package/src/generators/profiles/korean.ts +15 -1
  85. package/src/generators/profiles/polish.ts +12 -0
  86. package/src/generators/profiles/portuguese.ts +16 -0
  87. package/src/generators/profiles/russian.ts +11 -0
  88. package/src/generators/profiles/spanish.ts +15 -0
  89. package/src/generators/profiles/spanishMexico.ts +176 -0
  90. package/src/generators/profiles/thai.ts +11 -0
  91. package/src/generators/profiles/turkish.ts +49 -7
  92. package/src/generators/profiles/types.ts +21 -5
  93. package/src/generators/profiles/ukrainian.ts +11 -0
  94. package/src/generators/profiles/vietnamese.ts +11 -0
  95. package/src/language-building-schema.ts +111 -0
  96. package/src/languages/_all.ts +5 -1
  97. package/src/languages/es-MX.ts +32 -0
  98. package/src/languages/he.ts +15 -0
  99. package/src/parser/pattern-matcher.ts +10 -1
  100. package/src/parser/semantic-parser.ts +3 -0
  101. package/src/patterns/add/ar.ts +3 -59
  102. package/src/patterns/add/index.ts +5 -1
  103. package/src/patterns/add/ja.ts +3 -81
  104. package/src/patterns/add/ko.ts +3 -62
  105. package/src/patterns/add/qu.ts +69 -0
  106. package/src/patterns/add/tr.ts +3 -59
  107. package/src/patterns/builders.ts +1 -0
  108. package/src/patterns/decrement/tr.ts +3 -36
  109. package/src/patterns/event-handler/ar.ts +3 -139
  110. package/src/patterns/event-handler/he.ts +15 -0
  111. package/src/patterns/event-handler/index.ts +5 -1
  112. package/src/patterns/event-handler/ja.ts +3 -106
  113. package/src/patterns/event-handler/ko.ts +3 -121
  114. package/src/patterns/event-handler/ms.ts +45 -20
  115. package/src/patterns/event-handler/tr.ts +3 -158
  116. package/src/patterns/get/ar.ts +3 -37
  117. package/src/patterns/get/ja.ts +3 -41
  118. package/src/patterns/get/ko.ts +3 -41
  119. package/src/patterns/grammar-transformed/ja.ts +3 -1701
  120. package/src/patterns/grammar-transformed/ko.ts +3 -1299
  121. package/src/patterns/grammar-transformed/tr.ts +3 -1055
  122. package/src/patterns/hide/ar.ts +3 -55
  123. package/src/patterns/hide/ja.ts +3 -57
  124. package/src/patterns/hide/ko.ts +3 -57
  125. package/src/patterns/hide/tr.ts +3 -53
  126. package/src/patterns/increment/tr.ts +3 -40
  127. package/src/patterns/put/ar.ts +3 -62
  128. package/src/patterns/put/ja.ts +3 -63
  129. package/src/patterns/put/ko.ts +3 -55
  130. package/src/patterns/put/tr.ts +3 -55
  131. package/src/patterns/remove/ar.ts +3 -59
  132. package/src/patterns/remove/index.ts +5 -1
  133. package/src/patterns/remove/ja.ts +3 -62
  134. package/src/patterns/remove/ko.ts +3 -66
  135. package/src/patterns/remove/qu.ts +69 -0
  136. package/src/patterns/remove/tr.ts +3 -66
  137. package/src/patterns/set/ar.ts +3 -72
  138. package/src/patterns/set/ja.ts +3 -74
  139. package/src/patterns/set/ko.ts +3 -73
  140. package/src/patterns/set/tr.ts +3 -95
  141. package/src/patterns/show/ar.ts +3 -55
  142. package/src/patterns/show/ja.ts +3 -57
  143. package/src/patterns/show/ko.ts +3 -61
  144. package/src/patterns/show/tr.ts +3 -53
  145. package/src/patterns/take/ar.ts +3 -39
  146. package/src/patterns/toggle/ar.ts +3 -49
  147. package/src/patterns/toggle/index.ts +5 -1
  148. package/src/patterns/toggle/ja.ts +3 -144
  149. package/src/patterns/toggle/ko.ts +3 -101
  150. package/src/patterns/toggle/qu.ts +90 -0
  151. package/src/patterns/toggle/tr.ts +3 -76
  152. package/src/registry.ts +179 -15
  153. package/src/tokenizers/arabic.ts +13 -46
  154. package/src/tokenizers/bengali.ts +2 -16
  155. package/src/tokenizers/he.ts +542 -0
  156. package/src/tokenizers/index.ts +1 -0
  157. package/src/tokenizers/japanese.ts +3 -1
  158. package/src/tokenizers/korean.ts +104 -48
  159. package/src/tokenizers/ms.ts +3 -0
  160. package/src/tokenizers/quechua.ts +101 -2
  161. package/src/tokenizers/turkish.ts +64 -69
  162. package/src/types.ts +13 -0
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Mexican Spanish Language Profile
3
+ *
4
+ * ⚠️ EXAMPLE/TEST ONLY - NOT FOR PRODUCTION USE
5
+ *
6
+ * This profile demonstrates the language variant feature:
7
+ * - Extends base Spanish using `extends: 'es'`
8
+ * - Overrides specific keywords with regional alternatives
9
+ * - Reuses the Spanish tokenizer
10
+ *
11
+ * The Mexican-specific vocabulary (ahorita, jalar, aventar, etc.) is
12
+ * illustrative and has NOT been validated by native speakers.
13
+ *
14
+ * To create a production-ready regional variant, consult native speakers
15
+ * to verify vocabulary choices and completeness.
16
+ */
17
+
18
+ import type { LanguageProfile } from './types';
19
+
20
+ export const spanishMexicoProfile: LanguageProfile = {
21
+ code: 'es-MX',
22
+ name: 'Spanish (Mexico)',
23
+ nativeName: 'Español (México)',
24
+ direction: 'ltr',
25
+ wordOrder: 'SVO',
26
+ markingStrategy: 'preposition',
27
+ usesSpaces: true,
28
+ defaultVerbForm: 'infinitive',
29
+ extends: 'es', // Inherit from base Spanish
30
+
31
+ verb: {
32
+ position: 'start',
33
+ subjectDrop: true,
34
+ },
35
+ references: {
36
+ me: 'yo',
37
+ it: 'ello',
38
+ you: 'tú', // Mexico uses tú (not vos like Argentina)
39
+ result: 'resultado',
40
+ event: 'evento',
41
+ target: 'objetivo',
42
+ body: 'cuerpo',
43
+ },
44
+ possessive: {
45
+ marker: 'de',
46
+ markerPosition: 'before-property',
47
+ usePossessiveAdjectives: true,
48
+ specialForms: {
49
+ me: 'mi',
50
+ it: 'su',
51
+ you: 'tu',
52
+ },
53
+ keywords: {
54
+ mi: 'me',
55
+ tu: 'you',
56
+ su: 'it',
57
+ },
58
+ },
59
+ roleMarkers: {
60
+ destination: { primary: 'en', alternatives: ['sobre', 'a'], position: 'before' },
61
+ source: { primary: 'de', alternatives: ['desde'], position: 'before' },
62
+ patient: { primary: '', position: 'before' },
63
+ style: { primary: 'con', position: 'before' },
64
+ },
65
+ keywords: {
66
+ // Class/Attribute operations - Mexican alternatives
67
+ toggle: {
68
+ primary: 'alternar',
69
+ alternatives: ['cambiar', 'conmutar', 'switchear'], // "switchear" is Spanglish common in MX tech
70
+ normalized: 'toggle',
71
+ },
72
+ add: { primary: 'agregar', alternatives: ['añadir', 'meter'], normalized: 'add' },
73
+ remove: {
74
+ primary: 'quitar',
75
+ alternatives: ['eliminar', 'borrar', 'sacar'], // "borrar" more common in MX
76
+ normalized: 'remove',
77
+ },
78
+ // Content operations
79
+ put: { primary: 'poner', alternatives: ['colocar', 'meter'], normalized: 'put' },
80
+ append: { primary: 'añadir', normalized: 'append' },
81
+ prepend: { primary: 'anteponer', normalized: 'prepend' },
82
+ take: { primary: 'tomar', alternatives: ['agarrar'], normalized: 'take' }, // "agarrar" more MX
83
+ make: { primary: 'hacer', alternatives: ['crear'], normalized: 'make' },
84
+ clone: { primary: 'clonar', alternatives: ['copiar'], normalized: 'clone' },
85
+ swap: { primary: 'intercambiar', alternatives: ['cambiar'], normalized: 'swap' },
86
+ morph: { primary: 'transformar', alternatives: ['convertir'], normalized: 'morph' },
87
+ // Variable operations
88
+ set: { primary: 'establecer', alternatives: ['fijar', 'definir', 'setear'], normalized: 'set' },
89
+ get: { primary: 'obtener', alternatives: ['conseguir', 'jalar'], normalized: 'get' },
90
+ increment: {
91
+ primary: 'incrementar',
92
+ alternatives: ['aumentar', 'subir'],
93
+ normalized: 'increment',
94
+ },
95
+ decrement: {
96
+ primary: 'decrementar',
97
+ alternatives: ['disminuir', 'bajar'],
98
+ normalized: 'decrement',
99
+ },
100
+ log: { primary: 'registrar', alternatives: ['imprimir', 'loguear'], normalized: 'log' },
101
+ // Visibility
102
+ show: { primary: 'mostrar', alternatives: ['enseñar'], normalized: 'show' },
103
+ hide: { primary: 'ocultar', alternatives: ['esconder'], normalized: 'hide' },
104
+ transition: { primary: 'transición', alternatives: ['animar'], normalized: 'transition' },
105
+ // Events
106
+ on: { primary: 'en', alternatives: ['cuando', 'al'], normalized: 'on' },
107
+ trigger: { primary: 'disparar', alternatives: ['activar'], normalized: 'trigger' },
108
+ send: { primary: 'enviar', alternatives: ['mandar'], normalized: 'send' }, // "mandar" more MX
109
+ // DOM focus
110
+ focus: { primary: 'enfocar', normalized: 'focus' },
111
+ blur: { primary: 'desenfocar', normalized: 'blur' },
112
+ // Common event names
113
+ click: { primary: 'clic', alternatives: ['hacer clic', 'dar clic'], normalized: 'click' },
114
+ hover: { primary: 'sobrevolar', alternatives: ['pasar encima'], normalized: 'hover' },
115
+ submit: { primary: 'envío', alternatives: ['enviar'], normalized: 'submit' },
116
+ input: { primary: 'entrada', alternatives: ['introducir'], normalized: 'input' },
117
+ change: { primary: 'cambio', alternatives: ['cambiar'], normalized: 'change' },
118
+ // Navigation
119
+ go: { primary: 'ir', alternatives: ['navegar'], normalized: 'go' },
120
+ // Async - Mexican variants
121
+ wait: {
122
+ primary: 'esperar',
123
+ alternatives: ['ahorita', 'aguantar'], // "ahorita" is distinctly Mexican
124
+ normalized: 'wait',
125
+ },
126
+ fetch: {
127
+ primary: 'buscar',
128
+ alternatives: ['obtener', 'jalar', 'traer'], // "jalar" (pull) common in MX tech
129
+ normalized: 'fetch',
130
+ },
131
+ settle: { primary: 'estabilizar', normalized: 'settle' },
132
+ // Control flow
133
+ if: { primary: 'si', normalized: 'if' },
134
+ when: { primary: 'cuando', normalized: 'when' },
135
+ where: { primary: 'donde', normalized: 'where' },
136
+ else: { primary: 'sino', alternatives: ['de lo contrario', 'si no'], normalized: 'else' },
137
+ repeat: { primary: 'repetir', normalized: 'repeat' },
138
+ for: { primary: 'para', normalized: 'for' },
139
+ while: { primary: 'mientras', normalized: 'while' },
140
+ continue: { primary: 'continuar', alternatives: ['seguir'], normalized: 'continue' },
141
+ halt: { primary: 'detener', alternatives: ['parar'], normalized: 'halt' },
142
+ throw: {
143
+ primary: 'lanzar',
144
+ alternatives: ['aventar', 'arrojar'], // "aventar" is Mexican
145
+ normalized: 'throw',
146
+ },
147
+ call: { primary: 'llamar', normalized: 'call' },
148
+ return: { primary: 'retornar', alternatives: ['devolver', 'regresar'], normalized: 'return' },
149
+ then: { primary: 'entonces', alternatives: ['luego', 'después'], normalized: 'then' },
150
+ and: { primary: 'y', alternatives: ['además', 'también'], normalized: 'and' },
151
+ end: { primary: 'fin', alternatives: ['final', 'terminar'], normalized: 'end' },
152
+ // Advanced
153
+ js: { primary: 'js', normalized: 'js' },
154
+ async: { primary: 'asíncrono', normalized: 'async' },
155
+ tell: { primary: 'decir', normalized: 'tell' },
156
+ default: { primary: 'predeterminar', alternatives: ['por defecto'], normalized: 'default' },
157
+ init: { primary: 'iniciar', alternatives: ['inicializar', 'arrancar'], normalized: 'init' },
158
+ behavior: { primary: 'comportamiento', normalized: 'behavior' },
159
+ install: { primary: 'instalar', normalized: 'install' },
160
+ measure: { primary: 'medir', normalized: 'measure' },
161
+ // Modifiers
162
+ into: { primary: 'en', alternatives: ['dentro de'], normalized: 'into' },
163
+ before: { primary: 'antes', normalized: 'before' },
164
+ after: { primary: 'después', normalized: 'after' },
165
+ // Event modifiers
166
+ until: { primary: 'hasta', normalized: 'until' },
167
+ event: { primary: 'evento', normalized: 'event' },
168
+ from: { primary: 'de', alternatives: ['desde'], normalized: 'from' },
169
+ },
170
+ eventHandler: {
171
+ keyword: { primary: 'al', alternatives: ['cuando', 'en'], normalized: 'on' },
172
+ sourceMarker: { primary: 'de', alternatives: ['desde'], position: 'before' },
173
+ eventMarker: { primary: 'al', alternatives: ['cuando'], position: 'before' },
174
+ temporalMarkers: ['cuando', 'al'],
175
+ },
176
+ };
@@ -83,6 +83,12 @@ export const thaiProfile: LanguageProfile = {
83
83
  // DOM focus
84
84
  focus: { primary: 'โฟกัส', alternatives: [], normalized: 'focus' },
85
85
  blur: { primary: 'เบลอ', alternatives: [], normalized: 'blur' },
86
+ // Common event names (for event handler patterns)
87
+ click: { primary: 'คลิก', normalized: 'click' },
88
+ hover: { primary: 'โฮเวอร์', alternatives: ['วางเมาส์'], normalized: 'hover' },
89
+ submit: { primary: 'ส่ง', alternatives: ['ส่งข้อมูล'], normalized: 'submit' },
90
+ input: { primary: 'ป้อน', alternatives: ['กรอก'], normalized: 'input' },
91
+ change: { primary: 'เปลี่ยน', alternatives: ['เปลี่ยนแปลง'], normalized: 'change' },
86
92
  // Navigation
87
93
  go: { primary: 'ไป', alternatives: ['ไปที่'], normalized: 'go' },
88
94
  // Async
@@ -128,5 +134,10 @@ export const thaiProfile: LanguageProfile = {
128
134
  eventHandler: {
129
135
  keyword: { primary: 'เมื่อ', alternatives: ['ตอน'], normalized: 'on' },
130
136
  sourceMarker: { primary: 'จาก', position: 'before' },
137
+ // Event marker: เมื่อ (when), used in SVO pattern
138
+ // Pattern: เมื่อ [event] [verb] [patient] ใน [destination?]
139
+ // Example: เมื่อ คลิก สลับ .active ใน #button
140
+ eventMarker: { primary: 'เมื่อ', alternatives: ['ตอน'], position: 'before' },
141
+ temporalMarkers: ['เมื่อ', 'ตอน'], // temporal conjunctions (when)
131
142
  },
132
143
  };
@@ -45,8 +45,36 @@ export const turkishProfile: LanguageProfile = {
45
45
  },
46
46
  },
47
47
  roleMarkers: {
48
- patient: { primary: 'i', alternatives: ['ı', 'u', 'ü'], position: 'after' }, // Accusative
49
- destination: { primary: 'e', alternatives: ['a', 'de', 'da', 'te', 'ta'], position: 'after' }, // Dative/Locative
48
+ patient: {
49
+ primary: 'i',
50
+ alternatives: ['ı', 'u', 'ü', 'yi', 'yı', 'yu', 'yü', 'ni', 'nı', 'nu', 'nü'],
51
+ position: 'after',
52
+ }, // Accusative (with buffer consonants y/n)
53
+ destination: {
54
+ primary: 'e',
55
+ // Include both dative (e/a) and genitive (ın/in/un/ün) for possessive patterns
56
+ // Genitive is used in "X's Y" patterns: #button ın .active = "#button's .active"
57
+ alternatives: [
58
+ 'a',
59
+ 'ye',
60
+ 'ya',
61
+ 'ne',
62
+ 'na',
63
+ 'de',
64
+ 'da',
65
+ 'te',
66
+ 'ta',
67
+ 'ın',
68
+ 'in',
69
+ 'un',
70
+ 'ün',
71
+ 'nın',
72
+ 'nin',
73
+ 'nun',
74
+ 'nün',
75
+ ],
76
+ position: 'after',
77
+ }, // Dative/Locative + Genitive (with buffer consonants)
50
78
  source: { primary: 'den', alternatives: ['dan', 'ten', 'tan'], position: 'after' }, // Ablative
51
79
  style: { primary: 'le', alternatives: ['la', 'yle', 'yla'], position: 'after' }, // Instrumental
52
80
  event: { primary: 'i', alternatives: ['ı', 'u', 'ü'], position: 'after' }, // Event as accusative
@@ -59,10 +87,10 @@ export const turkishProfile: LanguageProfile = {
59
87
  // Content operations
60
88
  put: { primary: 'koy', normalized: 'put' },
61
89
  append: { primary: 'ekle', normalized: 'append' },
62
- take: { primary: 'al', normalized: 'take' },
90
+ take: { primary: 'tut', normalized: 'take' }, // al removed to avoid collision with get
63
91
  make: { primary: 'yap', normalized: 'make' },
64
92
  clone: { primary: 'kopyala', normalized: 'clone' },
65
- swap: { primary: 'değiştir', alternatives: ['takas'], normalized: 'swap' },
93
+ swap: { primary: 'takas', normalized: 'swap' }, // Removed değiştir alternative to avoid collision with toggle
66
94
  morph: { primary: 'dönüştür', alternatives: ['şekil değiştir'], normalized: 'morph' },
67
95
  // Variable operations
68
96
  set: { primary: 'ayarla', alternatives: ['yap', 'belirle'], normalized: 'set' },
@@ -79,8 +107,14 @@ export const turkishProfile: LanguageProfile = {
79
107
  trigger: { primary: 'tetikle', normalized: 'trigger' },
80
108
  send: { primary: 'gönder', normalized: 'send' },
81
109
  // DOM focus
82
- focus: { primary: 'odak', normalized: 'focus' },
83
- blur: { primary: 'bulanık', normalized: 'blur' },
110
+ focus: { primary: 'odak', alternatives: ['odaklanma'], normalized: 'focus' },
111
+ blur: { primary: 'bulanık', alternatives: ['bulanıklık'], normalized: 'blur' },
112
+ // Common event names (for event handler patterns)
113
+ click: { primary: 'tıklama', alternatives: ['tıkla'], normalized: 'click' },
114
+ hover: { primary: 'üzerine gelme', alternatives: ['üzerinde gezinme'], normalized: 'hover' },
115
+ submit: { primary: 'gönderme', alternatives: ['gönder'], normalized: 'submit' },
116
+ input: { primary: 'giriş', alternatives: ['girdi'], normalized: 'input' },
117
+ change: { primary: 'değişiklik', alternatives: ['değişim'], normalized: 'change' },
84
118
  // Navigation
85
119
  go: { primary: 'git', normalized: 'go' },
86
120
  // Async
@@ -119,6 +153,14 @@ export const turkishProfile: LanguageProfile = {
119
153
  // Event modifiers (for repeat until event)
120
154
  until: { primary: 'kadar', normalized: 'until' },
121
155
  event: { primary: 'olay', normalized: 'event' },
122
- from: { primary: '-den', alternatives: ['-dan'], normalized: 'from' },
156
+ from: { primary: 'den', alternatives: ['dan'], normalized: 'from' },
157
+ },
158
+ eventHandler: {
159
+ // Event marker: da/de/ta/te (locative case suffix with vowel harmony), used in SOV pattern
160
+ // Pattern: [event] da [patient] i [action]
161
+ // Example: tıklama da .active i değiştir
162
+ // Note: Vowel harmony variants (da/de/ta/te) should be handled by vowel harmony expansion
163
+ eventMarker: { primary: 'da', alternatives: ['de', 'ta', 'te'], position: 'after' },
164
+ temporalMarkers: ['dığında', 'diğinde'], // temporal converb suffixes (when)
123
165
  },
124
166
  };
@@ -65,7 +65,7 @@ export interface PossessiveConfig {
65
65
  * Complete language profile for pattern generation.
66
66
  */
67
67
  export interface LanguageProfile {
68
- /** ISO 639-1 language code */
68
+ /** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
69
69
  readonly code: string;
70
70
  /** Human-readable language name */
71
71
  readonly name: string;
@@ -104,19 +104,35 @@ export interface LanguageProfile {
104
104
  * Individual keywords can override this via KeywordTranslation.form
105
105
  */
106
106
  readonly defaultVerbForm?: VerbForm;
107
+ /**
108
+ * Base language code to extend (for regional variants).
109
+ * When set, this profile inherits from the base and overrides specific fields.
110
+ * Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
111
+ */
112
+ readonly extends?: string;
107
113
  }
108
114
 
109
115
  /**
110
116
  * Configuration for event handler pattern generation.
111
- * Used by simple SVO languages that don't need hand-crafted patterns.
117
+ * Supports both SVO and SOV/VSO languages.
112
118
  */
113
119
  export interface EventHandlerConfig {
114
- /** Primary event keyword (e.g., 'on', 'bei', 'sur') */
115
- readonly keyword: KeywordTranslation;
120
+ /** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
121
+ readonly keyword?: KeywordTranslation;
116
122
  /** Source filter marker (e.g., 'from', 'von', 'de') */
117
- readonly sourceMarker: RoleMarker;
123
+ readonly sourceMarker?: RoleMarker;
118
124
  /** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
119
125
  readonly conditionalKeyword?: KeywordTranslation;
126
+
127
+ /** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
128
+ readonly eventMarker?: RoleMarker;
129
+ /** Temporal/conditional markers that can optionally appear with events */
130
+ readonly temporalMarkers?: string[];
131
+ /**
132
+ * Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
133
+ * Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
134
+ */
135
+ readonly negationMarker?: RoleMarker;
120
136
  }
121
137
 
122
138
  /**
@@ -173,6 +173,12 @@ export const ukrainianProfile: LanguageProfile = {
173
173
  normalized: 'blur',
174
174
  form: 'infinitive',
175
175
  },
176
+ // Common event names (for event handler patterns)
177
+ click: { primary: 'кліку', alternatives: ['клік', 'натисканні'], normalized: 'click' },
178
+ hover: { primary: 'наведенні', alternatives: ['наведення'], normalized: 'hover' },
179
+ submit: { primary: 'відправці', alternatives: ['відправка'], normalized: 'submit' },
180
+ input: { primary: 'введенні', alternatives: ['введення'], normalized: 'input' },
181
+ change: { primary: 'зміні', alternatives: ['зміна'], normalized: 'change' },
176
182
  // Navigation
177
183
  go: {
178
184
  primary: 'перейти',
@@ -266,5 +272,10 @@ export const ukrainianProfile: LanguageProfile = {
266
272
  eventHandler: {
267
273
  keyword: { primary: 'при', alternatives: ['коли'], normalized: 'on' },
268
274
  sourceMarker: { primary: 'на', alternatives: ['в', 'при'], position: 'before' },
275
+ // Event marker: при (at/on/upon), used in SVO pattern
276
+ // Pattern: при [event] [verb] [patient] на [destination?]
277
+ // Example: при кліку перемкнути .active на #button
278
+ eventMarker: { primary: 'при', alternatives: ['коли'], position: 'before' },
279
+ temporalMarkers: ['коли', 'якщо'], // temporal conjunctions (when, if)
269
280
  },
270
281
  };
@@ -86,6 +86,12 @@ export const vietnameseProfile: LanguageProfile = {
86
86
  // DOM focus
87
87
  focus: { primary: 'tập trung', normalized: 'focus' },
88
88
  blur: { primary: 'mất tập trung', normalized: 'blur' },
89
+ // Common event names (for event handler patterns)
90
+ click: { primary: 'nhấp', alternatives: ['bấm'], normalized: 'click' },
91
+ hover: { primary: 'di chuột', alternatives: ['rê chuột'], normalized: 'hover' },
92
+ submit: { primary: 'gửi', alternatives: ['nộp'], normalized: 'submit' },
93
+ input: { primary: 'nhập', alternatives: ['nhập liệu'], normalized: 'input' },
94
+ change: { primary: 'thay đổi', alternatives: ['đổi'], normalized: 'change' },
89
95
  // Navigation
90
96
  go: { primary: 'đi đến', alternatives: ['đi'], normalized: 'go' },
91
97
  // Async
@@ -129,5 +135,10 @@ export const vietnameseProfile: LanguageProfile = {
129
135
  eventHandler: {
130
136
  keyword: { primary: 'khi', alternatives: ['lúc', 'trên'], normalized: 'on' },
131
137
  sourceMarker: { primary: 'trên', alternatives: ['tại'], position: 'before' },
138
+ // Event marker: khi (when), used in SVO pattern
139
+ // Pattern: khi [event] [verb] [patient] vào [destination?]
140
+ // Example: khi nhấp chuyển đổi .active vào #button
141
+ eventMarker: { primary: 'khi', alternatives: ['lúc'], position: 'before' },
142
+ temporalMarkers: ['khi', 'lúc'], // temporal conjunctions (when)
132
143
  },
133
144
  };
@@ -1160,6 +1160,75 @@ export const SUPPORTED_LANGUAGES: LanguageChecklist[] = [
1160
1160
  missingFromTokenizer: [], // Now synced
1161
1161
  potentialConflicts: [],
1162
1162
  },
1163
+ {
1164
+ // ⚠️ TEST VARIANT - demonstrates language variant feature, not production-ready
1165
+ code: 'es-MX',
1166
+ name: 'Spanish (Mexico) [TEST]',
1167
+ wordOrder: 'SVO',
1168
+ direction: 'ltr',
1169
+ files: {
1170
+ languageProfile: true,
1171
+ tokenizer: true, // Uses Spanish tokenizer via inheritance
1172
+ tokenizerRegistered: true,
1173
+ morphologicalNormalizer: true, // Uses Spanish normalizer
1174
+ eventHandlerPatterns: true, // Uses Spanish patterns
1175
+ tests: true,
1176
+ morphologyTests: false,
1177
+ },
1178
+ morphology: {
1179
+ needed: true,
1180
+ reason: 'Example variant - uses Spanish morphology with illustrative Mexican vocabulary',
1181
+ inflectionTypes: ['inherits from Spanish'],
1182
+ integratedWithTokenizer: true,
1183
+ confidenceThreshold: 0.7,
1184
+ },
1185
+ profileKeywords: [
1186
+ 'alternar',
1187
+ 'añadir',
1188
+ 'quitar',
1189
+ 'poner',
1190
+ 'establecer',
1191
+ 'mostrar',
1192
+ 'ocultar',
1193
+ 'añadir',
1194
+ 'anteponer',
1195
+ 'incrementar',
1196
+ 'decrementar',
1197
+ 'esperar',
1198
+ 'ahorita', // Mexican alternative for wait
1199
+ 'obtener',
1200
+ 'jalar', // Mexican alternative for fetch
1201
+ 'ir',
1202
+ 'disparar',
1203
+ 'enviar',
1204
+ 'registrar',
1205
+ 'tomar',
1206
+ 'hacer',
1207
+ 'clonar',
1208
+ 'enfocar',
1209
+ 'desenfocar',
1210
+ 'transición',
1211
+ 'estabilizar',
1212
+ 'llamar',
1213
+ 'devolver',
1214
+ 'aventar', // Mexican alternative for throw
1215
+ 'js',
1216
+ 'asíncrono',
1217
+ 'decir',
1218
+ 'predeterminar',
1219
+ 'iniciar',
1220
+ 'comportamiento',
1221
+ 'dentro de',
1222
+ 'antes',
1223
+ 'después',
1224
+ ],
1225
+ tokenizerKeywords: [
1226
+ // Inherits Spanish tokenizer keywords
1227
+ // Mexican-specific alternatives are in profile
1228
+ ],
1229
+ missingFromTokenizer: [],
1230
+ potentialConflicts: [],
1231
+ },
1163
1232
  {
1164
1233
  code: 'tr',
1165
1234
  name: 'Turkish',
@@ -2893,6 +2962,48 @@ export const SUPPORTED_LANGUAGES: LanguageChecklist[] = [
2893
2962
  missingFromTokenizer: [],
2894
2963
  potentialConflicts: [],
2895
2964
  },
2965
+ {
2966
+ code: 'he',
2967
+ name: 'Hebrew',
2968
+ wordOrder: 'SVO',
2969
+ direction: 'rtl',
2970
+ files: {
2971
+ languageProfile: true,
2972
+ tokenizer: true,
2973
+ tokenizerRegistered: true,
2974
+ morphologicalNormalizer: false,
2975
+ eventHandlerPatterns: true,
2976
+ tests: true,
2977
+ morphologyTests: false,
2978
+ },
2979
+ morphology: {
2980
+ needed: true,
2981
+ reason: 'Hebrew has verb conjugation (binyanim) but commands use imperative/infinitive forms',
2982
+ inflectionTypes: ['imperative form', 'infinitive form', 'binyanim patterns'],
2983
+ integratedWithTokenizer: false,
2984
+ confidenceThreshold: 0.7,
2985
+ },
2986
+ profileKeywords: ['החלף', 'הוסף', 'הסר', 'שים', 'קבע', 'הראה', 'הסתר', 'הגדל', 'הקטן'],
2987
+ tokenizerKeywords: [
2988
+ 'החלף',
2989
+ 'שנה',
2990
+ 'הוסף',
2991
+ 'הסר',
2992
+ 'מחק',
2993
+ 'שים',
2994
+ 'הכנס',
2995
+ 'קבע',
2996
+ 'הגדר',
2997
+ 'קבל',
2998
+ 'הראה',
2999
+ 'הצג',
3000
+ 'הסתר',
3001
+ 'הגדל',
3002
+ 'הקטן',
3003
+ ],
3004
+ missingFromTokenizer: [],
3005
+ potentialConflicts: [],
3006
+ },
2896
3007
  ];
2897
3008
 
2898
3009
  /**
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * All Languages Module
3
3
  *
4
- * Imports and registers all 23 supported languages.
4
+ * Imports and registers all 25 supported languages.
5
5
  * Use this for the full bundle with all language support.
6
6
  *
7
7
  * @example
@@ -20,7 +20,9 @@ import './bn';
20
20
  import './de';
21
21
  import './en';
22
22
  import './es';
23
+ import './es-MX';
23
24
  import './fr';
25
+ import './he';
24
26
  import './hi';
25
27
  import './id';
26
28
  import './it';
@@ -45,7 +47,9 @@ export * from './bn';
45
47
  export * from './de';
46
48
  export * from './en';
47
49
  export * from './es';
50
+ export * from './es-MX';
48
51
  export * from './fr';
52
+ export * from './he';
49
53
  export * from './hi';
50
54
  export * from './id';
51
55
  export * from './it';
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Mexican Spanish Language Module
3
+ *
4
+ * Self-registering module for Mexican Spanish (es-MX) language support.
5
+ * Registers the Mexican Spanish profile with the base Spanish tokenizer.
6
+ *
7
+ * This demonstrates the language variant feature:
8
+ * - Reuses the Spanish tokenizer (Mexican Spanish has same tokenization rules)
9
+ * - Has its own profile with Mexican-specific keywords and alternatives
10
+ * - Profile extends 'es' for inheritance of common properties
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * // Import to register
15
+ * import '@lokascript/semantic/languages/es-MX';
16
+ *
17
+ * // Parse with Mexican Spanish
18
+ * parse('ahorita mostrar .loading', 'es-MX'); // "ahorita" is Mexican for "wait"
19
+ * parse('jalar datos de /api', 'es-MX'); // "jalar" is Mexican for "fetch/pull"
20
+ * ```
21
+ */
22
+
23
+ import { registerLanguage } from '../registry';
24
+ import { spanishTokenizer } from '../tokenizers/spanish';
25
+ import { spanishMexicoProfile } from '../generators/profiles/spanishMexico';
26
+
27
+ export { spanishMexicoProfile } from '../generators/profiles/spanishMexico';
28
+ // Re-export Spanish tokenizer since we use it for es-MX
29
+ export { spanishTokenizer } from '../tokenizers/spanish';
30
+
31
+ // Register es-MX with the Mexican Spanish profile and Spanish tokenizer
32
+ registerLanguage('es-MX', spanishTokenizer, spanishMexicoProfile);
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Hebrew Language Module
3
+ *
4
+ * Self-registering module for Hebrew language support.
5
+ * Importing this module registers Hebrew tokenizer and profile.
6
+ */
7
+
8
+ import { registerLanguage } from '../registry';
9
+ import { hebrewTokenizer } from '../tokenizers/he';
10
+ import { hebrewProfile } from '../generators/profiles/hebrew';
11
+
12
+ export { hebrewTokenizer } from '../tokenizers/he';
13
+ export { hebrewProfile } from '../generators/profiles/hebrew';
14
+
15
+ registerLanguage('he', hebrewTokenizer, hebrewProfile);
@@ -112,7 +112,16 @@ export class PatternMatcher {
112
112
  captured: Map<SemanticRole, SemanticValue>
113
113
  ): boolean {
114
114
  // Skip leading conjunctions for Arabic (proclitics: و, ف, ول, وب, etc.)
115
- if (this.currentProfile?.code === 'ar') {
115
+ // BUT NOT if the pattern explicitly expects a conjunction (proclitic patterns)
116
+ const firstPatternToken = patternTokens[0];
117
+ const patternExpectsConjunction =
118
+ firstPatternToken?.type === 'literal' &&
119
+ (firstPatternToken.value === 'and' ||
120
+ firstPatternToken.value === 'then' ||
121
+ firstPatternToken.alternatives?.includes('and') ||
122
+ firstPatternToken.alternatives?.includes('then'));
123
+
124
+ if (this.currentProfile?.code === 'ar' && !patternExpectsConjunction) {
116
125
  while (tokens.peek()?.kind === 'conjunction') {
117
126
  tokens.advance();
118
127
  }
@@ -106,6 +106,7 @@ export class SemanticParserImpl implements ISemanticParser {
106
106
  return createCommandNode(match.pattern.command, roles, {
107
107
  sourceLanguage: language,
108
108
  patternId: match.pattern.id,
109
+ confidence: match.confidence,
109
110
  });
110
111
  }
111
112
 
@@ -150,6 +151,7 @@ export class SemanticParserImpl implements ISemanticParser {
150
151
  const commandNode = createCommandNode(actionName as ActionType, roles, {
151
152
  sourceLanguage: language,
152
153
  patternId: match.pattern.id,
154
+ confidence: match.confidence,
153
155
  });
154
156
 
155
157
  // Check if pattern has continuation marker (then-chains)
@@ -195,6 +197,7 @@ export class SemanticParserImpl implements ISemanticParser {
195
197
  return createEventHandler(eventValue, body, eventModifiers, {
196
198
  sourceLanguage: language,
197
199
  patternId: match.pattern.id,
200
+ confidence: match.confidence,
198
201
  });
199
202
  }
200
203