@lokascript/semantic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ar.ar.global.js +2 -2
- package/dist/browser-core.core.global.js +2 -2
- package/dist/browser-de.de.global.js +2 -2
- package/dist/browser-east-asian.east-asian.global.js +2 -2
- package/dist/browser-en-tr.en-tr.global.js +2 -2
- package/dist/browser-en.en.global.js +2 -2
- package/dist/browser-es-en.es-en.global.js +2 -2
- package/dist/browser-es.es.global.js +2 -2
- package/dist/browser-fr.fr.global.js +2 -2
- package/dist/browser-id.id.global.js +2 -2
- package/dist/browser-ja.ja.global.js +2 -2
- package/dist/browser-ko.ko.global.js +2 -2
- package/dist/browser-lazy.lazy.global.js +2 -2
- package/dist/browser-priority.priority.global.js +2 -2
- package/dist/browser-pt.pt.global.js +2 -2
- package/dist/browser-qu.qu.global.js +2 -2
- package/dist/browser-sw.sw.global.js +2 -2
- package/dist/browser-tr.tr.global.js +2 -2
- package/dist/browser-western.western.global.js +2 -2
- package/dist/browser-zh.zh.global.js +2 -2
- package/dist/browser.global.js +2 -2
- package/dist/browser.global.js.map +1 -1
- package/dist/index.cjs +13042 -17462
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +49 -5
- package/dist/index.d.ts +49 -5
- package/dist/index.js +14044 -18464
- package/dist/index.js.map +1 -1
- package/dist/languages/ar.d.ts +1 -1
- package/dist/languages/ar.js +31 -44
- package/dist/languages/ar.js.map +1 -1
- package/dist/languages/de.d.ts +1 -1
- package/dist/languages/de.js +14 -2
- package/dist/languages/de.js.map +1 -1
- package/dist/languages/en.d.ts +1 -1
- package/dist/languages/en.js +558 -12
- package/dist/languages/en.js.map +1 -1
- package/dist/languages/es.d.ts +1 -1
- package/dist/languages/es.js +16 -0
- package/dist/languages/es.js.map +1 -1
- package/dist/languages/fr.d.ts +1 -1
- package/dist/languages/fr.js +14 -2
- package/dist/languages/fr.js.map +1 -1
- package/dist/languages/id.d.ts +1 -1
- package/dist/languages/id.js +14 -2
- package/dist/languages/id.js.map +1 -1
- package/dist/languages/ja.d.ts +1 -1
- package/dist/languages/ja.js +18 -3
- package/dist/languages/ja.js.map +1 -1
- package/dist/languages/ko.d.ts +8 -1
- package/dist/languages/ko.js +75 -43
- package/dist/languages/ko.js.map +1 -1
- package/dist/languages/pt.d.ts +1 -1
- package/dist/languages/pt.js +17 -0
- package/dist/languages/pt.js.map +1 -1
- package/dist/languages/qu.d.ts +12 -1
- package/dist/languages/qu.js +77 -2
- package/dist/languages/qu.js.map +1 -1
- package/dist/languages/sw.d.ts +1 -1
- package/dist/languages/sw.js.map +1 -1
- package/dist/languages/tr.d.ts +9 -1
- package/dist/languages/tr.js +96 -72
- package/dist/languages/tr.js.map +1 -1
- package/dist/languages/zh.d.ts +1 -1
- package/dist/languages/zh.js +16 -0
- package/dist/languages/zh.js.map +1 -1
- package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
- package/package.json +20 -29
- package/src/generators/command-schemas.ts +21 -10
- package/src/generators/event-handler-generator.ts +50 -44
- package/src/generators/language-profiles.ts +6 -0
- package/src/generators/pattern-generator.ts +883 -1
- package/src/generators/profiles/arabic.ts +19 -3
- package/src/generators/profiles/bengali.ts +12 -1
- package/src/generators/profiles/chinese.ts +15 -0
- package/src/generators/profiles/french.ts +12 -1
- package/src/generators/profiles/german.ts +12 -1
- package/src/generators/profiles/hebrew.ts +148 -0
- package/src/generators/profiles/hindi.ts +12 -1
- package/src/generators/profiles/index.ts +2 -0
- package/src/generators/profiles/indonesian.ts +12 -1
- package/src/generators/profiles/italian.ts +16 -0
- package/src/generators/profiles/japanese.ts +11 -2
- package/src/generators/profiles/korean.ts +15 -1
- package/src/generators/profiles/polish.ts +12 -0
- package/src/generators/profiles/portuguese.ts +16 -0
- package/src/generators/profiles/russian.ts +11 -0
- package/src/generators/profiles/spanish.ts +15 -0
- package/src/generators/profiles/spanishMexico.ts +176 -0
- package/src/generators/profiles/thai.ts +11 -0
- package/src/generators/profiles/turkish.ts +49 -7
- package/src/generators/profiles/types.ts +21 -5
- package/src/generators/profiles/ukrainian.ts +11 -0
- package/src/generators/profiles/vietnamese.ts +11 -0
- package/src/language-building-schema.ts +111 -0
- package/src/languages/_all.ts +5 -1
- package/src/languages/es-MX.ts +32 -0
- package/src/languages/he.ts +15 -0
- package/src/parser/pattern-matcher.ts +10 -1
- package/src/parser/semantic-parser.ts +3 -0
- package/src/patterns/add/ar.ts +3 -59
- package/src/patterns/add/index.ts +5 -1
- package/src/patterns/add/ja.ts +3 -81
- package/src/patterns/add/ko.ts +3 -62
- package/src/patterns/add/qu.ts +69 -0
- package/src/patterns/add/tr.ts +3 -59
- package/src/patterns/builders.ts +1 -0
- package/src/patterns/decrement/tr.ts +3 -36
- package/src/patterns/event-handler/ar.ts +3 -139
- package/src/patterns/event-handler/he.ts +15 -0
- package/src/patterns/event-handler/index.ts +5 -1
- package/src/patterns/event-handler/ja.ts +3 -106
- package/src/patterns/event-handler/ko.ts +3 -121
- package/src/patterns/event-handler/ms.ts +45 -20
- package/src/patterns/event-handler/tr.ts +3 -158
- package/src/patterns/get/ar.ts +3 -37
- package/src/patterns/get/ja.ts +3 -41
- package/src/patterns/get/ko.ts +3 -41
- package/src/patterns/grammar-transformed/ja.ts +3 -1701
- package/src/patterns/grammar-transformed/ko.ts +3 -1299
- package/src/patterns/grammar-transformed/tr.ts +3 -1055
- package/src/patterns/hide/ar.ts +3 -55
- package/src/patterns/hide/ja.ts +3 -57
- package/src/patterns/hide/ko.ts +3 -57
- package/src/patterns/hide/tr.ts +3 -53
- package/src/patterns/increment/tr.ts +3 -40
- package/src/patterns/put/ar.ts +3 -62
- package/src/patterns/put/ja.ts +3 -63
- package/src/patterns/put/ko.ts +3 -55
- package/src/patterns/put/tr.ts +3 -55
- package/src/patterns/remove/ar.ts +3 -59
- package/src/patterns/remove/index.ts +5 -1
- package/src/patterns/remove/ja.ts +3 -62
- package/src/patterns/remove/ko.ts +3 -66
- package/src/patterns/remove/qu.ts +69 -0
- package/src/patterns/remove/tr.ts +3 -66
- package/src/patterns/set/ar.ts +3 -72
- package/src/patterns/set/ja.ts +3 -74
- package/src/patterns/set/ko.ts +3 -73
- package/src/patterns/set/tr.ts +3 -95
- package/src/patterns/show/ar.ts +3 -55
- package/src/patterns/show/ja.ts +3 -57
- package/src/patterns/show/ko.ts +3 -61
- package/src/patterns/show/tr.ts +3 -53
- package/src/patterns/take/ar.ts +3 -39
- package/src/patterns/toggle/ar.ts +3 -49
- package/src/patterns/toggle/index.ts +5 -1
- package/src/patterns/toggle/ja.ts +3 -144
- package/src/patterns/toggle/ko.ts +3 -101
- package/src/patterns/toggle/qu.ts +90 -0
- package/src/patterns/toggle/tr.ts +3 -76
- package/src/registry.ts +179 -15
- package/src/tokenizers/arabic.ts +13 -46
- package/src/tokenizers/bengali.ts +2 -16
- package/src/tokenizers/he.ts +542 -0
- package/src/tokenizers/index.ts +1 -0
- package/src/tokenizers/japanese.ts +3 -1
- package/src/tokenizers/korean.ts +104 -48
- package/src/tokenizers/ms.ts +3 -0
- package/src/tokenizers/quechua.ts +101 -2
- package/src/tokenizers/turkish.ts +64 -69
- package/src/types.ts +13 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mexican Spanish Language Profile
|
|
3
|
+
*
|
|
4
|
+
* ⚠️ EXAMPLE/TEST ONLY - NOT FOR PRODUCTION USE
|
|
5
|
+
*
|
|
6
|
+
* This profile demonstrates the language variant feature:
|
|
7
|
+
* - Extends base Spanish using `extends: 'es'`
|
|
8
|
+
* - Overrides specific keywords with regional alternatives
|
|
9
|
+
* - Reuses the Spanish tokenizer
|
|
10
|
+
*
|
|
11
|
+
* The Mexican-specific vocabulary (ahorita, jalar, aventar, etc.) is
|
|
12
|
+
* illustrative and has NOT been validated by native speakers.
|
|
13
|
+
*
|
|
14
|
+
* To create a production-ready regional variant, consult native speakers
|
|
15
|
+
* to verify vocabulary choices and completeness.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { LanguageProfile } from './types';
|
|
19
|
+
|
|
20
|
+
export const spanishMexicoProfile: LanguageProfile = {
|
|
21
|
+
code: 'es-MX',
|
|
22
|
+
name: 'Spanish (Mexico)',
|
|
23
|
+
nativeName: 'Español (México)',
|
|
24
|
+
direction: 'ltr',
|
|
25
|
+
wordOrder: 'SVO',
|
|
26
|
+
markingStrategy: 'preposition',
|
|
27
|
+
usesSpaces: true,
|
|
28
|
+
defaultVerbForm: 'infinitive',
|
|
29
|
+
extends: 'es', // Inherit from base Spanish
|
|
30
|
+
|
|
31
|
+
verb: {
|
|
32
|
+
position: 'start',
|
|
33
|
+
subjectDrop: true,
|
|
34
|
+
},
|
|
35
|
+
references: {
|
|
36
|
+
me: 'yo',
|
|
37
|
+
it: 'ello',
|
|
38
|
+
you: 'tú', // Mexico uses tú (not vos like Argentina)
|
|
39
|
+
result: 'resultado',
|
|
40
|
+
event: 'evento',
|
|
41
|
+
target: 'objetivo',
|
|
42
|
+
body: 'cuerpo',
|
|
43
|
+
},
|
|
44
|
+
possessive: {
|
|
45
|
+
marker: 'de',
|
|
46
|
+
markerPosition: 'before-property',
|
|
47
|
+
usePossessiveAdjectives: true,
|
|
48
|
+
specialForms: {
|
|
49
|
+
me: 'mi',
|
|
50
|
+
it: 'su',
|
|
51
|
+
you: 'tu',
|
|
52
|
+
},
|
|
53
|
+
keywords: {
|
|
54
|
+
mi: 'me',
|
|
55
|
+
tu: 'you',
|
|
56
|
+
su: 'it',
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
roleMarkers: {
|
|
60
|
+
destination: { primary: 'en', alternatives: ['sobre', 'a'], position: 'before' },
|
|
61
|
+
source: { primary: 'de', alternatives: ['desde'], position: 'before' },
|
|
62
|
+
patient: { primary: '', position: 'before' },
|
|
63
|
+
style: { primary: 'con', position: 'before' },
|
|
64
|
+
},
|
|
65
|
+
keywords: {
|
|
66
|
+
// Class/Attribute operations - Mexican alternatives
|
|
67
|
+
toggle: {
|
|
68
|
+
primary: 'alternar',
|
|
69
|
+
alternatives: ['cambiar', 'conmutar', 'switchear'], // "switchear" is Spanglish common in MX tech
|
|
70
|
+
normalized: 'toggle',
|
|
71
|
+
},
|
|
72
|
+
add: { primary: 'agregar', alternatives: ['añadir', 'meter'], normalized: 'add' },
|
|
73
|
+
remove: {
|
|
74
|
+
primary: 'quitar',
|
|
75
|
+
alternatives: ['eliminar', 'borrar', 'sacar'], // "borrar" more common in MX
|
|
76
|
+
normalized: 'remove',
|
|
77
|
+
},
|
|
78
|
+
// Content operations
|
|
79
|
+
put: { primary: 'poner', alternatives: ['colocar', 'meter'], normalized: 'put' },
|
|
80
|
+
append: { primary: 'añadir', normalized: 'append' },
|
|
81
|
+
prepend: { primary: 'anteponer', normalized: 'prepend' },
|
|
82
|
+
take: { primary: 'tomar', alternatives: ['agarrar'], normalized: 'take' }, // "agarrar" more MX
|
|
83
|
+
make: { primary: 'hacer', alternatives: ['crear'], normalized: 'make' },
|
|
84
|
+
clone: { primary: 'clonar', alternatives: ['copiar'], normalized: 'clone' },
|
|
85
|
+
swap: { primary: 'intercambiar', alternatives: ['cambiar'], normalized: 'swap' },
|
|
86
|
+
morph: { primary: 'transformar', alternatives: ['convertir'], normalized: 'morph' },
|
|
87
|
+
// Variable operations
|
|
88
|
+
set: { primary: 'establecer', alternatives: ['fijar', 'definir', 'setear'], normalized: 'set' },
|
|
89
|
+
get: { primary: 'obtener', alternatives: ['conseguir', 'jalar'], normalized: 'get' },
|
|
90
|
+
increment: {
|
|
91
|
+
primary: 'incrementar',
|
|
92
|
+
alternatives: ['aumentar', 'subir'],
|
|
93
|
+
normalized: 'increment',
|
|
94
|
+
},
|
|
95
|
+
decrement: {
|
|
96
|
+
primary: 'decrementar',
|
|
97
|
+
alternatives: ['disminuir', 'bajar'],
|
|
98
|
+
normalized: 'decrement',
|
|
99
|
+
},
|
|
100
|
+
log: { primary: 'registrar', alternatives: ['imprimir', 'loguear'], normalized: 'log' },
|
|
101
|
+
// Visibility
|
|
102
|
+
show: { primary: 'mostrar', alternatives: ['enseñar'], normalized: 'show' },
|
|
103
|
+
hide: { primary: 'ocultar', alternatives: ['esconder'], normalized: 'hide' },
|
|
104
|
+
transition: { primary: 'transición', alternatives: ['animar'], normalized: 'transition' },
|
|
105
|
+
// Events
|
|
106
|
+
on: { primary: 'en', alternatives: ['cuando', 'al'], normalized: 'on' },
|
|
107
|
+
trigger: { primary: 'disparar', alternatives: ['activar'], normalized: 'trigger' },
|
|
108
|
+
send: { primary: 'enviar', alternatives: ['mandar'], normalized: 'send' }, // "mandar" more MX
|
|
109
|
+
// DOM focus
|
|
110
|
+
focus: { primary: 'enfocar', normalized: 'focus' },
|
|
111
|
+
blur: { primary: 'desenfocar', normalized: 'blur' },
|
|
112
|
+
// Common event names
|
|
113
|
+
click: { primary: 'clic', alternatives: ['hacer clic', 'dar clic'], normalized: 'click' },
|
|
114
|
+
hover: { primary: 'sobrevolar', alternatives: ['pasar encima'], normalized: 'hover' },
|
|
115
|
+
submit: { primary: 'envío', alternatives: ['enviar'], normalized: 'submit' },
|
|
116
|
+
input: { primary: 'entrada', alternatives: ['introducir'], normalized: 'input' },
|
|
117
|
+
change: { primary: 'cambio', alternatives: ['cambiar'], normalized: 'change' },
|
|
118
|
+
// Navigation
|
|
119
|
+
go: { primary: 'ir', alternatives: ['navegar'], normalized: 'go' },
|
|
120
|
+
// Async - Mexican variants
|
|
121
|
+
wait: {
|
|
122
|
+
primary: 'esperar',
|
|
123
|
+
alternatives: ['ahorita', 'aguantar'], // "ahorita" is distinctly Mexican
|
|
124
|
+
normalized: 'wait',
|
|
125
|
+
},
|
|
126
|
+
fetch: {
|
|
127
|
+
primary: 'buscar',
|
|
128
|
+
alternatives: ['obtener', 'jalar', 'traer'], // "jalar" (pull) common in MX tech
|
|
129
|
+
normalized: 'fetch',
|
|
130
|
+
},
|
|
131
|
+
settle: { primary: 'estabilizar', normalized: 'settle' },
|
|
132
|
+
// Control flow
|
|
133
|
+
if: { primary: 'si', normalized: 'if' },
|
|
134
|
+
when: { primary: 'cuando', normalized: 'when' },
|
|
135
|
+
where: { primary: 'donde', normalized: 'where' },
|
|
136
|
+
else: { primary: 'sino', alternatives: ['de lo contrario', 'si no'], normalized: 'else' },
|
|
137
|
+
repeat: { primary: 'repetir', normalized: 'repeat' },
|
|
138
|
+
for: { primary: 'para', normalized: 'for' },
|
|
139
|
+
while: { primary: 'mientras', normalized: 'while' },
|
|
140
|
+
continue: { primary: 'continuar', alternatives: ['seguir'], normalized: 'continue' },
|
|
141
|
+
halt: { primary: 'detener', alternatives: ['parar'], normalized: 'halt' },
|
|
142
|
+
throw: {
|
|
143
|
+
primary: 'lanzar',
|
|
144
|
+
alternatives: ['aventar', 'arrojar'], // "aventar" is Mexican
|
|
145
|
+
normalized: 'throw',
|
|
146
|
+
},
|
|
147
|
+
call: { primary: 'llamar', normalized: 'call' },
|
|
148
|
+
return: { primary: 'retornar', alternatives: ['devolver', 'regresar'], normalized: 'return' },
|
|
149
|
+
then: { primary: 'entonces', alternatives: ['luego', 'después'], normalized: 'then' },
|
|
150
|
+
and: { primary: 'y', alternatives: ['además', 'también'], normalized: 'and' },
|
|
151
|
+
end: { primary: 'fin', alternatives: ['final', 'terminar'], normalized: 'end' },
|
|
152
|
+
// Advanced
|
|
153
|
+
js: { primary: 'js', normalized: 'js' },
|
|
154
|
+
async: { primary: 'asíncrono', normalized: 'async' },
|
|
155
|
+
tell: { primary: 'decir', normalized: 'tell' },
|
|
156
|
+
default: { primary: 'predeterminar', alternatives: ['por defecto'], normalized: 'default' },
|
|
157
|
+
init: { primary: 'iniciar', alternatives: ['inicializar', 'arrancar'], normalized: 'init' },
|
|
158
|
+
behavior: { primary: 'comportamiento', normalized: 'behavior' },
|
|
159
|
+
install: { primary: 'instalar', normalized: 'install' },
|
|
160
|
+
measure: { primary: 'medir', normalized: 'measure' },
|
|
161
|
+
// Modifiers
|
|
162
|
+
into: { primary: 'en', alternatives: ['dentro de'], normalized: 'into' },
|
|
163
|
+
before: { primary: 'antes', normalized: 'before' },
|
|
164
|
+
after: { primary: 'después', normalized: 'after' },
|
|
165
|
+
// Event modifiers
|
|
166
|
+
until: { primary: 'hasta', normalized: 'until' },
|
|
167
|
+
event: { primary: 'evento', normalized: 'event' },
|
|
168
|
+
from: { primary: 'de', alternatives: ['desde'], normalized: 'from' },
|
|
169
|
+
},
|
|
170
|
+
eventHandler: {
|
|
171
|
+
keyword: { primary: 'al', alternatives: ['cuando', 'en'], normalized: 'on' },
|
|
172
|
+
sourceMarker: { primary: 'de', alternatives: ['desde'], position: 'before' },
|
|
173
|
+
eventMarker: { primary: 'al', alternatives: ['cuando'], position: 'before' },
|
|
174
|
+
temporalMarkers: ['cuando', 'al'],
|
|
175
|
+
},
|
|
176
|
+
};
|
|
@@ -83,6 +83,12 @@ export const thaiProfile: LanguageProfile = {
|
|
|
83
83
|
// DOM focus
|
|
84
84
|
focus: { primary: 'โฟกัส', alternatives: [], normalized: 'focus' },
|
|
85
85
|
blur: { primary: 'เบลอ', alternatives: [], normalized: 'blur' },
|
|
86
|
+
// Common event names (for event handler patterns)
|
|
87
|
+
click: { primary: 'คลิก', normalized: 'click' },
|
|
88
|
+
hover: { primary: 'โฮเวอร์', alternatives: ['วางเมาส์'], normalized: 'hover' },
|
|
89
|
+
submit: { primary: 'ส่ง', alternatives: ['ส่งข้อมูล'], normalized: 'submit' },
|
|
90
|
+
input: { primary: 'ป้อน', alternatives: ['กรอก'], normalized: 'input' },
|
|
91
|
+
change: { primary: 'เปลี่ยน', alternatives: ['เปลี่ยนแปลง'], normalized: 'change' },
|
|
86
92
|
// Navigation
|
|
87
93
|
go: { primary: 'ไป', alternatives: ['ไปที่'], normalized: 'go' },
|
|
88
94
|
// Async
|
|
@@ -128,5 +134,10 @@ export const thaiProfile: LanguageProfile = {
|
|
|
128
134
|
eventHandler: {
|
|
129
135
|
keyword: { primary: 'เมื่อ', alternatives: ['ตอน'], normalized: 'on' },
|
|
130
136
|
sourceMarker: { primary: 'จาก', position: 'before' },
|
|
137
|
+
// Event marker: เมื่อ (when), used in SVO pattern
|
|
138
|
+
// Pattern: เมื่อ [event] [verb] [patient] ใน [destination?]
|
|
139
|
+
// Example: เมื่อ คลิก สลับ .active ใน #button
|
|
140
|
+
eventMarker: { primary: 'เมื่อ', alternatives: ['ตอน'], position: 'before' },
|
|
141
|
+
temporalMarkers: ['เมื่อ', 'ตอน'], // temporal conjunctions (when)
|
|
131
142
|
},
|
|
132
143
|
};
|
|
@@ -45,8 +45,36 @@ export const turkishProfile: LanguageProfile = {
|
|
|
45
45
|
},
|
|
46
46
|
},
|
|
47
47
|
roleMarkers: {
|
|
48
|
-
patient: {
|
|
49
|
-
|
|
48
|
+
patient: {
|
|
49
|
+
primary: 'i',
|
|
50
|
+
alternatives: ['ı', 'u', 'ü', 'yi', 'yı', 'yu', 'yü', 'ni', 'nı', 'nu', 'nü'],
|
|
51
|
+
position: 'after',
|
|
52
|
+
}, // Accusative (with buffer consonants y/n)
|
|
53
|
+
destination: {
|
|
54
|
+
primary: 'e',
|
|
55
|
+
// Include both dative (e/a) and genitive (ın/in/un/ün) for possessive patterns
|
|
56
|
+
// Genitive is used in "X's Y" patterns: #button ın .active = "#button's .active"
|
|
57
|
+
alternatives: [
|
|
58
|
+
'a',
|
|
59
|
+
'ye',
|
|
60
|
+
'ya',
|
|
61
|
+
'ne',
|
|
62
|
+
'na',
|
|
63
|
+
'de',
|
|
64
|
+
'da',
|
|
65
|
+
'te',
|
|
66
|
+
'ta',
|
|
67
|
+
'ın',
|
|
68
|
+
'in',
|
|
69
|
+
'un',
|
|
70
|
+
'ün',
|
|
71
|
+
'nın',
|
|
72
|
+
'nin',
|
|
73
|
+
'nun',
|
|
74
|
+
'nün',
|
|
75
|
+
],
|
|
76
|
+
position: 'after',
|
|
77
|
+
}, // Dative/Locative + Genitive (with buffer consonants)
|
|
50
78
|
source: { primary: 'den', alternatives: ['dan', 'ten', 'tan'], position: 'after' }, // Ablative
|
|
51
79
|
style: { primary: 'le', alternatives: ['la', 'yle', 'yla'], position: 'after' }, // Instrumental
|
|
52
80
|
event: { primary: 'i', alternatives: ['ı', 'u', 'ü'], position: 'after' }, // Event as accusative
|
|
@@ -59,10 +87,10 @@ export const turkishProfile: LanguageProfile = {
|
|
|
59
87
|
// Content operations
|
|
60
88
|
put: { primary: 'koy', normalized: 'put' },
|
|
61
89
|
append: { primary: 'ekle', normalized: 'append' },
|
|
62
|
-
take: { primary: '
|
|
90
|
+
take: { primary: 'tut', normalized: 'take' }, // al removed to avoid collision with get
|
|
63
91
|
make: { primary: 'yap', normalized: 'make' },
|
|
64
92
|
clone: { primary: 'kopyala', normalized: 'clone' },
|
|
65
|
-
swap: { primary: '
|
|
93
|
+
swap: { primary: 'takas', normalized: 'swap' }, // Removed değiştir alternative to avoid collision with toggle
|
|
66
94
|
morph: { primary: 'dönüştür', alternatives: ['şekil değiştir'], normalized: 'morph' },
|
|
67
95
|
// Variable operations
|
|
68
96
|
set: { primary: 'ayarla', alternatives: ['yap', 'belirle'], normalized: 'set' },
|
|
@@ -79,8 +107,14 @@ export const turkishProfile: LanguageProfile = {
|
|
|
79
107
|
trigger: { primary: 'tetikle', normalized: 'trigger' },
|
|
80
108
|
send: { primary: 'gönder', normalized: 'send' },
|
|
81
109
|
// DOM focus
|
|
82
|
-
focus: { primary: 'odak', normalized: 'focus' },
|
|
83
|
-
blur: { primary: 'bulanık', normalized: 'blur' },
|
|
110
|
+
focus: { primary: 'odak', alternatives: ['odaklanma'], normalized: 'focus' },
|
|
111
|
+
blur: { primary: 'bulanık', alternatives: ['bulanıklık'], normalized: 'blur' },
|
|
112
|
+
// Common event names (for event handler patterns)
|
|
113
|
+
click: { primary: 'tıklama', alternatives: ['tıkla'], normalized: 'click' },
|
|
114
|
+
hover: { primary: 'üzerine gelme', alternatives: ['üzerinde gezinme'], normalized: 'hover' },
|
|
115
|
+
submit: { primary: 'gönderme', alternatives: ['gönder'], normalized: 'submit' },
|
|
116
|
+
input: { primary: 'giriş', alternatives: ['girdi'], normalized: 'input' },
|
|
117
|
+
change: { primary: 'değişiklik', alternatives: ['değişim'], normalized: 'change' },
|
|
84
118
|
// Navigation
|
|
85
119
|
go: { primary: 'git', normalized: 'go' },
|
|
86
120
|
// Async
|
|
@@ -119,6 +153,14 @@ export const turkishProfile: LanguageProfile = {
|
|
|
119
153
|
// Event modifiers (for repeat until event)
|
|
120
154
|
until: { primary: 'kadar', normalized: 'until' },
|
|
121
155
|
event: { primary: 'olay', normalized: 'event' },
|
|
122
|
-
from: { primary: '
|
|
156
|
+
from: { primary: 'den', alternatives: ['dan'], normalized: 'from' },
|
|
157
|
+
},
|
|
158
|
+
eventHandler: {
|
|
159
|
+
// Event marker: da/de/ta/te (locative case suffix with vowel harmony), used in SOV pattern
|
|
160
|
+
// Pattern: [event] da [patient] i [action]
|
|
161
|
+
// Example: tıklama da .active i değiştir
|
|
162
|
+
// Note: Vowel harmony variants (da/de/ta/te) should be handled by vowel harmony expansion
|
|
163
|
+
eventMarker: { primary: 'da', alternatives: ['de', 'ta', 'te'], position: 'after' },
|
|
164
|
+
temporalMarkers: ['dığında', 'diğinde'], // temporal converb suffixes (when)
|
|
123
165
|
},
|
|
124
166
|
};
|
|
@@ -65,7 +65,7 @@ export interface PossessiveConfig {
|
|
|
65
65
|
* Complete language profile for pattern generation.
|
|
66
66
|
*/
|
|
67
67
|
export interface LanguageProfile {
|
|
68
|
-
/** ISO 639-1 language code */
|
|
68
|
+
/** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
|
|
69
69
|
readonly code: string;
|
|
70
70
|
/** Human-readable language name */
|
|
71
71
|
readonly name: string;
|
|
@@ -104,19 +104,35 @@ export interface LanguageProfile {
|
|
|
104
104
|
* Individual keywords can override this via KeywordTranslation.form
|
|
105
105
|
*/
|
|
106
106
|
readonly defaultVerbForm?: VerbForm;
|
|
107
|
+
/**
|
|
108
|
+
* Base language code to extend (for regional variants).
|
|
109
|
+
* When set, this profile inherits from the base and overrides specific fields.
|
|
110
|
+
* Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
|
|
111
|
+
*/
|
|
112
|
+
readonly extends?: string;
|
|
107
113
|
}
|
|
108
114
|
|
|
109
115
|
/**
|
|
110
116
|
* Configuration for event handler pattern generation.
|
|
111
|
-
*
|
|
117
|
+
* Supports both SVO and SOV/VSO languages.
|
|
112
118
|
*/
|
|
113
119
|
export interface EventHandlerConfig {
|
|
114
|
-
/** Primary event keyword (e.g., 'on', 'bei', 'sur') */
|
|
115
|
-
readonly keyword
|
|
120
|
+
/** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
|
|
121
|
+
readonly keyword?: KeywordTranslation;
|
|
116
122
|
/** Source filter marker (e.g., 'from', 'von', 'de') */
|
|
117
|
-
readonly sourceMarker
|
|
123
|
+
readonly sourceMarker?: RoleMarker;
|
|
118
124
|
/** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
|
|
119
125
|
readonly conditionalKeyword?: KeywordTranslation;
|
|
126
|
+
|
|
127
|
+
/** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
|
|
128
|
+
readonly eventMarker?: RoleMarker;
|
|
129
|
+
/** Temporal/conditional markers that can optionally appear with events */
|
|
130
|
+
readonly temporalMarkers?: string[];
|
|
131
|
+
/**
|
|
132
|
+
* Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
|
|
133
|
+
* Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
|
|
134
|
+
*/
|
|
135
|
+
readonly negationMarker?: RoleMarker;
|
|
120
136
|
}
|
|
121
137
|
|
|
122
138
|
/**
|
|
@@ -173,6 +173,12 @@ export const ukrainianProfile: LanguageProfile = {
|
|
|
173
173
|
normalized: 'blur',
|
|
174
174
|
form: 'infinitive',
|
|
175
175
|
},
|
|
176
|
+
// Common event names (for event handler patterns)
|
|
177
|
+
click: { primary: 'кліку', alternatives: ['клік', 'натисканні'], normalized: 'click' },
|
|
178
|
+
hover: { primary: 'наведенні', alternatives: ['наведення'], normalized: 'hover' },
|
|
179
|
+
submit: { primary: 'відправці', alternatives: ['відправка'], normalized: 'submit' },
|
|
180
|
+
input: { primary: 'введенні', alternatives: ['введення'], normalized: 'input' },
|
|
181
|
+
change: { primary: 'зміні', alternatives: ['зміна'], normalized: 'change' },
|
|
176
182
|
// Navigation
|
|
177
183
|
go: {
|
|
178
184
|
primary: 'перейти',
|
|
@@ -266,5 +272,10 @@ export const ukrainianProfile: LanguageProfile = {
|
|
|
266
272
|
eventHandler: {
|
|
267
273
|
keyword: { primary: 'при', alternatives: ['коли'], normalized: 'on' },
|
|
268
274
|
sourceMarker: { primary: 'на', alternatives: ['в', 'при'], position: 'before' },
|
|
275
|
+
// Event marker: при (at/on/upon), used in SVO pattern
|
|
276
|
+
// Pattern: при [event] [verb] [patient] на [destination?]
|
|
277
|
+
// Example: при кліку перемкнути .active на #button
|
|
278
|
+
eventMarker: { primary: 'при', alternatives: ['коли'], position: 'before' },
|
|
279
|
+
temporalMarkers: ['коли', 'якщо'], // temporal conjunctions (when, if)
|
|
269
280
|
},
|
|
270
281
|
};
|
|
@@ -86,6 +86,12 @@ export const vietnameseProfile: LanguageProfile = {
|
|
|
86
86
|
// DOM focus
|
|
87
87
|
focus: { primary: 'tập trung', normalized: 'focus' },
|
|
88
88
|
blur: { primary: 'mất tập trung', normalized: 'blur' },
|
|
89
|
+
// Common event names (for event handler patterns)
|
|
90
|
+
click: { primary: 'nhấp', alternatives: ['bấm'], normalized: 'click' },
|
|
91
|
+
hover: { primary: 'di chuột', alternatives: ['rê chuột'], normalized: 'hover' },
|
|
92
|
+
submit: { primary: 'gửi', alternatives: ['nộp'], normalized: 'submit' },
|
|
93
|
+
input: { primary: 'nhập', alternatives: ['nhập liệu'], normalized: 'input' },
|
|
94
|
+
change: { primary: 'thay đổi', alternatives: ['đổi'], normalized: 'change' },
|
|
89
95
|
// Navigation
|
|
90
96
|
go: { primary: 'đi đến', alternatives: ['đi'], normalized: 'go' },
|
|
91
97
|
// Async
|
|
@@ -129,5 +135,10 @@ export const vietnameseProfile: LanguageProfile = {
|
|
|
129
135
|
eventHandler: {
|
|
130
136
|
keyword: { primary: 'khi', alternatives: ['lúc', 'trên'], normalized: 'on' },
|
|
131
137
|
sourceMarker: { primary: 'trên', alternatives: ['tại'], position: 'before' },
|
|
138
|
+
// Event marker: khi (when), used in SVO pattern
|
|
139
|
+
// Pattern: khi [event] [verb] [patient] vào [destination?]
|
|
140
|
+
// Example: khi nhấp chuyển đổi .active vào #button
|
|
141
|
+
eventMarker: { primary: 'khi', alternatives: ['lúc'], position: 'before' },
|
|
142
|
+
temporalMarkers: ['khi', 'lúc'], // temporal conjunctions (when)
|
|
132
143
|
},
|
|
133
144
|
};
|
|
@@ -1160,6 +1160,75 @@ export const SUPPORTED_LANGUAGES: LanguageChecklist[] = [
|
|
|
1160
1160
|
missingFromTokenizer: [], // Now synced
|
|
1161
1161
|
potentialConflicts: [],
|
|
1162
1162
|
},
|
|
1163
|
+
{
|
|
1164
|
+
// ⚠️ TEST VARIANT - demonstrates language variant feature, not production-ready
|
|
1165
|
+
code: 'es-MX',
|
|
1166
|
+
name: 'Spanish (Mexico) [TEST]',
|
|
1167
|
+
wordOrder: 'SVO',
|
|
1168
|
+
direction: 'ltr',
|
|
1169
|
+
files: {
|
|
1170
|
+
languageProfile: true,
|
|
1171
|
+
tokenizer: true, // Uses Spanish tokenizer via inheritance
|
|
1172
|
+
tokenizerRegistered: true,
|
|
1173
|
+
morphologicalNormalizer: true, // Uses Spanish normalizer
|
|
1174
|
+
eventHandlerPatterns: true, // Uses Spanish patterns
|
|
1175
|
+
tests: true,
|
|
1176
|
+
morphologyTests: false,
|
|
1177
|
+
},
|
|
1178
|
+
morphology: {
|
|
1179
|
+
needed: true,
|
|
1180
|
+
reason: 'Example variant - uses Spanish morphology with illustrative Mexican vocabulary',
|
|
1181
|
+
inflectionTypes: ['inherits from Spanish'],
|
|
1182
|
+
integratedWithTokenizer: true,
|
|
1183
|
+
confidenceThreshold: 0.7,
|
|
1184
|
+
},
|
|
1185
|
+
profileKeywords: [
|
|
1186
|
+
'alternar',
|
|
1187
|
+
'añadir',
|
|
1188
|
+
'quitar',
|
|
1189
|
+
'poner',
|
|
1190
|
+
'establecer',
|
|
1191
|
+
'mostrar',
|
|
1192
|
+
'ocultar',
|
|
1193
|
+
'añadir',
|
|
1194
|
+
'anteponer',
|
|
1195
|
+
'incrementar',
|
|
1196
|
+
'decrementar',
|
|
1197
|
+
'esperar',
|
|
1198
|
+
'ahorita', // Mexican alternative for wait
|
|
1199
|
+
'obtener',
|
|
1200
|
+
'jalar', // Mexican alternative for fetch
|
|
1201
|
+
'ir',
|
|
1202
|
+
'disparar',
|
|
1203
|
+
'enviar',
|
|
1204
|
+
'registrar',
|
|
1205
|
+
'tomar',
|
|
1206
|
+
'hacer',
|
|
1207
|
+
'clonar',
|
|
1208
|
+
'enfocar',
|
|
1209
|
+
'desenfocar',
|
|
1210
|
+
'transición',
|
|
1211
|
+
'estabilizar',
|
|
1212
|
+
'llamar',
|
|
1213
|
+
'devolver',
|
|
1214
|
+
'aventar', // Mexican alternative for throw
|
|
1215
|
+
'js',
|
|
1216
|
+
'asíncrono',
|
|
1217
|
+
'decir',
|
|
1218
|
+
'predeterminar',
|
|
1219
|
+
'iniciar',
|
|
1220
|
+
'comportamiento',
|
|
1221
|
+
'dentro de',
|
|
1222
|
+
'antes',
|
|
1223
|
+
'después',
|
|
1224
|
+
],
|
|
1225
|
+
tokenizerKeywords: [
|
|
1226
|
+
// Inherits Spanish tokenizer keywords
|
|
1227
|
+
// Mexican-specific alternatives are in profile
|
|
1228
|
+
],
|
|
1229
|
+
missingFromTokenizer: [],
|
|
1230
|
+
potentialConflicts: [],
|
|
1231
|
+
},
|
|
1163
1232
|
{
|
|
1164
1233
|
code: 'tr',
|
|
1165
1234
|
name: 'Turkish',
|
|
@@ -2893,6 +2962,48 @@ export const SUPPORTED_LANGUAGES: LanguageChecklist[] = [
|
|
|
2893
2962
|
missingFromTokenizer: [],
|
|
2894
2963
|
potentialConflicts: [],
|
|
2895
2964
|
},
|
|
2965
|
+
{
|
|
2966
|
+
code: 'he',
|
|
2967
|
+
name: 'Hebrew',
|
|
2968
|
+
wordOrder: 'SVO',
|
|
2969
|
+
direction: 'rtl',
|
|
2970
|
+
files: {
|
|
2971
|
+
languageProfile: true,
|
|
2972
|
+
tokenizer: true,
|
|
2973
|
+
tokenizerRegistered: true,
|
|
2974
|
+
morphologicalNormalizer: false,
|
|
2975
|
+
eventHandlerPatterns: true,
|
|
2976
|
+
tests: true,
|
|
2977
|
+
morphologyTests: false,
|
|
2978
|
+
},
|
|
2979
|
+
morphology: {
|
|
2980
|
+
needed: true,
|
|
2981
|
+
reason: 'Hebrew has verb conjugation (binyanim) but commands use imperative/infinitive forms',
|
|
2982
|
+
inflectionTypes: ['imperative form', 'infinitive form', 'binyanim patterns'],
|
|
2983
|
+
integratedWithTokenizer: false,
|
|
2984
|
+
confidenceThreshold: 0.7,
|
|
2985
|
+
},
|
|
2986
|
+
profileKeywords: ['החלף', 'הוסף', 'הסר', 'שים', 'קבע', 'הראה', 'הסתר', 'הגדל', 'הקטן'],
|
|
2987
|
+
tokenizerKeywords: [
|
|
2988
|
+
'החלף',
|
|
2989
|
+
'שנה',
|
|
2990
|
+
'הוסף',
|
|
2991
|
+
'הסר',
|
|
2992
|
+
'מחק',
|
|
2993
|
+
'שים',
|
|
2994
|
+
'הכנס',
|
|
2995
|
+
'קבע',
|
|
2996
|
+
'הגדר',
|
|
2997
|
+
'קבל',
|
|
2998
|
+
'הראה',
|
|
2999
|
+
'הצג',
|
|
3000
|
+
'הסתר',
|
|
3001
|
+
'הגדל',
|
|
3002
|
+
'הקטן',
|
|
3003
|
+
],
|
|
3004
|
+
missingFromTokenizer: [],
|
|
3005
|
+
potentialConflicts: [],
|
|
3006
|
+
},
|
|
2896
3007
|
];
|
|
2897
3008
|
|
|
2898
3009
|
/**
|
package/src/languages/_all.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* All Languages Module
|
|
3
3
|
*
|
|
4
|
-
* Imports and registers all
|
|
4
|
+
* Imports and registers all 25 supported languages.
|
|
5
5
|
* Use this for the full bundle with all language support.
|
|
6
6
|
*
|
|
7
7
|
* @example
|
|
@@ -20,7 +20,9 @@ import './bn';
|
|
|
20
20
|
import './de';
|
|
21
21
|
import './en';
|
|
22
22
|
import './es';
|
|
23
|
+
import './es-MX';
|
|
23
24
|
import './fr';
|
|
25
|
+
import './he';
|
|
24
26
|
import './hi';
|
|
25
27
|
import './id';
|
|
26
28
|
import './it';
|
|
@@ -45,7 +47,9 @@ export * from './bn';
|
|
|
45
47
|
export * from './de';
|
|
46
48
|
export * from './en';
|
|
47
49
|
export * from './es';
|
|
50
|
+
export * from './es-MX';
|
|
48
51
|
export * from './fr';
|
|
52
|
+
export * from './he';
|
|
49
53
|
export * from './hi';
|
|
50
54
|
export * from './id';
|
|
51
55
|
export * from './it';
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mexican Spanish Language Module
|
|
3
|
+
*
|
|
4
|
+
* Self-registering module for Mexican Spanish (es-MX) language support.
|
|
5
|
+
* Registers the Mexican Spanish profile with the base Spanish tokenizer.
|
|
6
|
+
*
|
|
7
|
+
* This demonstrates the language variant feature:
|
|
8
|
+
* - Reuses the Spanish tokenizer (Mexican Spanish has same tokenization rules)
|
|
9
|
+
* - Has its own profile with Mexican-specific keywords and alternatives
|
|
10
|
+
* - Profile extends 'es' for inheritance of common properties
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* // Import to register
|
|
15
|
+
* import '@lokascript/semantic/languages/es-MX';
|
|
16
|
+
*
|
|
17
|
+
* // Parse with Mexican Spanish
|
|
18
|
+
* parse('ahorita mostrar .loading', 'es-MX'); // "ahorita" is Mexican for "wait"
|
|
19
|
+
* parse('jalar datos de /api', 'es-MX'); // "jalar" is Mexican for "fetch/pull"
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { registerLanguage } from '../registry';
|
|
24
|
+
import { spanishTokenizer } from '../tokenizers/spanish';
|
|
25
|
+
import { spanishMexicoProfile } from '../generators/profiles/spanishMexico';
|
|
26
|
+
|
|
27
|
+
export { spanishMexicoProfile } from '../generators/profiles/spanishMexico';
|
|
28
|
+
// Re-export Spanish tokenizer since we use it for es-MX
|
|
29
|
+
export { spanishTokenizer } from '../tokenizers/spanish';
|
|
30
|
+
|
|
31
|
+
// Register es-MX with the Mexican Spanish profile and Spanish tokenizer
|
|
32
|
+
registerLanguage('es-MX', spanishTokenizer, spanishMexicoProfile);
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hebrew Language Module
|
|
3
|
+
*
|
|
4
|
+
* Self-registering module for Hebrew language support.
|
|
5
|
+
* Importing this module registers Hebrew tokenizer and profile.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { registerLanguage } from '../registry';
|
|
9
|
+
import { hebrewTokenizer } from '../tokenizers/he';
|
|
10
|
+
import { hebrewProfile } from '../generators/profiles/hebrew';
|
|
11
|
+
|
|
12
|
+
export { hebrewTokenizer } from '../tokenizers/he';
|
|
13
|
+
export { hebrewProfile } from '../generators/profiles/hebrew';
|
|
14
|
+
|
|
15
|
+
registerLanguage('he', hebrewTokenizer, hebrewProfile);
|
|
@@ -112,7 +112,16 @@ export class PatternMatcher {
|
|
|
112
112
|
captured: Map<SemanticRole, SemanticValue>
|
|
113
113
|
): boolean {
|
|
114
114
|
// Skip leading conjunctions for Arabic (proclitics: و, ف, ول, وب, etc.)
|
|
115
|
-
if (
|
|
115
|
+
// BUT NOT if the pattern explicitly expects a conjunction (proclitic patterns)
|
|
116
|
+
const firstPatternToken = patternTokens[0];
|
|
117
|
+
const patternExpectsConjunction =
|
|
118
|
+
firstPatternToken?.type === 'literal' &&
|
|
119
|
+
(firstPatternToken.value === 'and' ||
|
|
120
|
+
firstPatternToken.value === 'then' ||
|
|
121
|
+
firstPatternToken.alternatives?.includes('and') ||
|
|
122
|
+
firstPatternToken.alternatives?.includes('then'));
|
|
123
|
+
|
|
124
|
+
if (this.currentProfile?.code === 'ar' && !patternExpectsConjunction) {
|
|
116
125
|
while (tokens.peek()?.kind === 'conjunction') {
|
|
117
126
|
tokens.advance();
|
|
118
127
|
}
|
|
@@ -106,6 +106,7 @@ export class SemanticParserImpl implements ISemanticParser {
|
|
|
106
106
|
return createCommandNode(match.pattern.command, roles, {
|
|
107
107
|
sourceLanguage: language,
|
|
108
108
|
patternId: match.pattern.id,
|
|
109
|
+
confidence: match.confidence,
|
|
109
110
|
});
|
|
110
111
|
}
|
|
111
112
|
|
|
@@ -150,6 +151,7 @@ export class SemanticParserImpl implements ISemanticParser {
|
|
|
150
151
|
const commandNode = createCommandNode(actionName as ActionType, roles, {
|
|
151
152
|
sourceLanguage: language,
|
|
152
153
|
patternId: match.pattern.id,
|
|
154
|
+
confidence: match.confidence,
|
|
153
155
|
});
|
|
154
156
|
|
|
155
157
|
// Check if pattern has continuation marker (then-chains)
|
|
@@ -195,6 +197,7 @@ export class SemanticParserImpl implements ISemanticParser {
|
|
|
195
197
|
return createEventHandler(eventValue, body, eventModifiers, {
|
|
196
198
|
sourceLanguage: language,
|
|
197
199
|
patternId: match.pattern.id,
|
|
200
|
+
confidence: match.confidence,
|
|
198
201
|
});
|
|
199
202
|
}
|
|
200
203
|
|