@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,3170 @@
1
+ /**
2
+ * Language Building Schema
3
+ *
4
+ * This file defines the schema for adding languages and commands to the semantic parser.
5
+ * It serves as both documentation and validation for ensuring all required pieces are in place.
6
+ *
7
+ * ## Adding a New Language
8
+ *
9
+ * Required steps:
10
+ * 1. Create language profile in generators/language-profiles.ts
11
+ * 2. Create tokenizer in tokenizers/{language}.ts
12
+ * 3. Register tokenizer in tokenizers/index.ts
13
+ * 4. Create morphological normalizer in tokenizers/morphology/{language}-normalizer.ts (for agglutinative/inflected languages)
14
+ * 5. Add event handler patterns in patterns/event-handler.ts (if different from generated)
15
+ * 6. Add tests in test/official-examples.test.ts
16
+ * 7. Add morphology tests in test/morphology.test.ts (if normalizer created)
17
+ *
18
+ * ## Adding a New Command
19
+ *
20
+ * Required steps:
21
+ * 1. Define command schema in generators/command-schemas.ts
22
+ * 2. Wire schema in patterns/index.ts (generatePatternsForCommand)
23
+ * 3. Add keywords to ALL language profiles in generators/language-profiles.ts
24
+ * 4. Add keywords to ALL tokenizers' keyword maps
25
+ * 5. Add tests for all languages in test/official-examples.test.ts
26
+ *
27
+ * ## Morphological Infrastructure
28
+ *
29
+ * Languages with verb conjugations or inflections need morphological normalizers:
30
+ * - Japanese: て-form, た-form, ます-form, ている, etc.
31
+ * - Korean: 다 ending, 요/니다 forms, 았/었 past tense
32
+ * - Spanish: Conjugations (-ar, -er, -ir), reflexive verbs (mostrarse)
33
+ * - Arabic: Prefix/suffix stripping (ال, ي, ت, ن, أ, ون, ين)
34
+ * - Turkish: Vowel harmony, tense suffixes (-iyor, -di, -miş)
35
+ *
36
+ * The normalizer extracts the stem and provides a confidence score.
37
+ * Pattern matching uses stemmed forms when exact/normalized matches fail.
38
+ *
39
+ * ## Common Pitfalls
40
+ *
41
+ * - Keywords in language profiles but NOT in tokenizer keyword maps
42
+ * → Tokens classified as 'identifier' instead of 'keyword'
43
+ * → Pattern matching fails
44
+ *
45
+ * - Particle conflicts (e.g., Japanese 'に' used for both events and destinations)
46
+ * → Event handler patterns match command patterns
47
+ * → Solution: Use distinct particles or adjust pattern priorities
48
+ *
49
+ * - Missing keyword alternatives
50
+ * → Native speakers may use different words for same concept
51
+ * → Include common alternatives in both profile and tokenizer
52
+ *
53
+ * - Missing morphological normalizer for agglutinative languages
54
+ * → Conjugated verb forms won't be recognized
55
+ * → Solution: Create normalizer and integrate with tokenizer
56
+ */
57
+
58
+ // =============================================================================
59
+ // Language Checklist Schema
60
+ // =============================================================================
61
+
62
+ export interface LanguageChecklist {
63
+ /** ISO 639-1 language code */
64
+ code: string;
65
+
66
+ /** Human-readable language name */
67
+ name: string;
68
+
69
+ /** Word order (SVO, SOV, VSO) */
70
+ wordOrder: 'SVO' | 'SOV' | 'VSO';
71
+
72
+ /** Writing direction */
73
+ direction: 'ltr' | 'rtl';
74
+
75
+ /** Required files */
76
+ files: {
77
+ /** Language profile exists in generators/language-profiles.ts */
78
+ languageProfile: boolean;
79
+
80
+ /** Tokenizer exists in tokenizers/{code}.ts */
81
+ tokenizer: boolean;
82
+
83
+ /** Tokenizer registered in tokenizers/index.ts */
84
+ tokenizerRegistered: boolean;
85
+
86
+ /** Morphological normalizer in tokenizers/morphology/{code}-normalizer.ts */
87
+ morphologicalNormalizer: boolean;
88
+
89
+ /** Event handler patterns in patterns/event-handler.ts (optional for generated) */
90
+ eventHandlerPatterns: boolean;
91
+
92
+ /** Tests in test/official-examples.test.ts */
93
+ tests: boolean;
94
+
95
+ /** Morphology tests in test/morphology.test.ts */
96
+ morphologyTests: boolean;
97
+ };
98
+
99
+ /** Morphological infrastructure details */
100
+ morphology: {
101
+ /** Whether this language needs morphological normalization */
102
+ needed: boolean;
103
+
104
+ /** Why it's needed (or not needed) */
105
+ reason: string;
106
+
107
+ /** Types of inflections handled */
108
+ inflectionTypes: string[];
109
+
110
+ /** Whether the normalizer is integrated with the tokenizer */
111
+ integratedWithTokenizer: boolean;
112
+
113
+ /** Confidence threshold for stemmed matches */
114
+ confidenceThreshold: number;
115
+ };
116
+
117
+ /** Keywords defined in language profile */
118
+ profileKeywords: string[];
119
+
120
+ /** Keywords defined in tokenizer keyword map */
121
+ tokenizerKeywords: string[];
122
+
123
+ /** Keywords missing from tokenizer (profile has, tokenizer doesn't) */
124
+ missingFromTokenizer: string[];
125
+
126
+ /** Particles/markers that may cause conflicts */
127
+ potentialConflicts: ParticleConflict[];
128
+ }
129
+
130
+ export interface ParticleConflict {
131
+ /** The particle/marker */
132
+ particle: string;
133
+
134
+ /** What it's used for (e.g., 'destination', 'event', 'object') */
135
+ usedFor: string[];
136
+
137
+ /** Whether this causes actual pattern conflicts */
138
+ isResolved: boolean;
139
+
140
+ /** How it was resolved */
141
+ resolution?: string;
142
+ }
143
+
144
+ // =============================================================================
145
+ // Command Checklist Schema
146
+ // =============================================================================
147
+
148
+ export interface CommandChecklist {
149
+ /** Command action name */
150
+ action: string;
151
+
152
+ /** Whether schema exists in generators/command-schemas.ts */
153
+ schemaExists: boolean;
154
+
155
+ /** Whether wired in patterns/index.ts (false for hand-crafted patterns) */
156
+ wiredInPatterns: boolean;
157
+
158
+ /** Whether this command uses hand-crafted patterns instead of generated */
159
+ usesHandCraftedPatterns: boolean;
160
+
161
+ /** Languages with profile keywords defined */
162
+ profileKeywordsIn: string[];
163
+
164
+ /** Languages with tokenizer keywords defined */
165
+ tokenizerKeywordsIn: string[];
166
+
167
+ /** Languages missing tokenizer keywords */
168
+ missingTokenizerKeywordsIn: string[];
169
+
170
+ /** Tests exist for each language */
171
+ testsFor: string[];
172
+ }
173
+
174
+ // =============================================================================
175
+ // Validation Functions
176
+ // =============================================================================
177
+
178
+ /**
179
+ * Validates that a language has all required pieces in place.
180
+ */
181
+ export function validateLanguage(checklist: LanguageChecklist): ValidationResult {
182
+ const errors: string[] = [];
183
+ const warnings: string[] = [];
184
+
185
+ // Check required files
186
+ if (!checklist.files.languageProfile) {
187
+ errors.push(`Missing language profile for ${checklist.code}`);
188
+ }
189
+ if (!checklist.files.tokenizer) {
190
+ errors.push(`Missing tokenizer for ${checklist.code}`);
191
+ }
192
+ if (!checklist.files.tokenizerRegistered) {
193
+ errors.push(`Tokenizer not registered for ${checklist.code}`);
194
+ }
195
+ if (!checklist.files.tests) {
196
+ warnings.push(`No tests found for ${checklist.code}`);
197
+ }
198
+
199
+ // Check keyword sync
200
+ if (checklist.missingFromTokenizer.length > 0) {
201
+ errors.push(
202
+ `Keywords in profile but not tokenizer for ${checklist.code}: ` +
203
+ checklist.missingFromTokenizer.join(', ')
204
+ );
205
+ }
206
+
207
+ // Check particle conflicts
208
+ const unresolvedConflicts = checklist.potentialConflicts.filter(c => !c.isResolved);
209
+ if (unresolvedConflicts.length > 0) {
210
+ for (const conflict of unresolvedConflicts) {
211
+ warnings.push(
212
+ `Unresolved particle conflict in ${checklist.code}: ` +
213
+ `'${conflict.particle}' used for ${conflict.usedFor.join(' and ')}`
214
+ );
215
+ }
216
+ }
217
+
218
+ return {
219
+ valid: errors.length === 0,
220
+ errors,
221
+ warnings,
222
+ };
223
+ }
224
+
225
+ /**
226
+ * Validates that a command has all required pieces in place.
227
+ */
228
+ export function validateCommand(checklist: CommandChecklist): ValidationResult {
229
+ const errors: string[] = [];
230
+ const warnings: string[] = [];
231
+
232
+ if (!checklist.schemaExists) {
233
+ errors.push(`Missing schema for command: ${checklist.action}`);
234
+ }
235
+
236
+ // Hand-crafted patterns don't need to be wired
237
+ if (!checklist.wiredInPatterns && !checklist.usesHandCraftedPatterns) {
238
+ errors.push(`Command not wired in patterns/index.ts: ${checklist.action}`);
239
+ }
240
+
241
+ if (checklist.missingTokenizerKeywordsIn.length > 0) {
242
+ warnings.push(
243
+ `Command ${checklist.action} missing tokenizer keywords in: ` +
244
+ checklist.missingTokenizerKeywordsIn.join(', ')
245
+ );
246
+ }
247
+
248
+ // All languages should have tests
249
+ const allLanguages = ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'];
250
+ const missingTests = allLanguages.filter(lang => !checklist.testsFor.includes(lang));
251
+ if (missingTests.length > 0) {
252
+ warnings.push(`Command ${checklist.action} missing tests for: ${missingTests.join(', ')}`);
253
+ }
254
+
255
+ return {
256
+ valid: errors.length === 0,
257
+ errors,
258
+ warnings,
259
+ };
260
+ }
261
+
262
+ export interface ValidationResult {
263
+ valid: boolean;
264
+ errors: string[];
265
+ warnings: string[];
266
+ }
267
+
268
+ // =============================================================================
269
+ // Current State Documentation
270
+ // =============================================================================
271
+
272
+ /**
273
+ * Documents the current state of language support.
274
+ * Update this when adding new languages or commands.
275
+ */
276
+ export const SUPPORTED_LANGUAGES: LanguageChecklist[] = [
277
+ {
278
+ code: 'en',
279
+ name: 'English',
280
+ wordOrder: 'SVO',
281
+ direction: 'ltr',
282
+ files: {
283
+ languageProfile: true,
284
+ tokenizer: true,
285
+ tokenizerRegistered: true,
286
+ morphologicalNormalizer: false,
287
+ eventHandlerPatterns: true,
288
+ tests: true,
289
+ morphologyTests: false,
290
+ },
291
+ morphology: {
292
+ needed: false,
293
+ reason: 'English has minimal inflection; keywords match base forms',
294
+ inflectionTypes: [],
295
+ integratedWithTokenizer: false,
296
+ confidenceThreshold: 1.0,
297
+ },
298
+ profileKeywords: [
299
+ 'toggle',
300
+ 'add',
301
+ 'remove',
302
+ 'put',
303
+ 'set',
304
+ 'show',
305
+ 'hide',
306
+ 'append',
307
+ 'prepend',
308
+ 'increment',
309
+ 'decrement',
310
+ 'wait',
311
+ 'fetch',
312
+ 'go',
313
+ 'trigger',
314
+ 'send',
315
+ 'log',
316
+ 'make',
317
+ 'clone',
318
+ 'take',
319
+ 'get',
320
+ 'focus',
321
+ 'blur',
322
+ 'transition',
323
+ 'settle',
324
+ 'if',
325
+ 'else',
326
+ 'repeat',
327
+ 'for',
328
+ 'while',
329
+ 'continue',
330
+ 'halt',
331
+ 'throw',
332
+ 'call',
333
+ 'return',
334
+ 'js',
335
+ 'async',
336
+ 'tell',
337
+ 'default',
338
+ 'init',
339
+ 'behavior',
340
+ 'into',
341
+ 'before',
342
+ 'after',
343
+ 'on',
344
+ ],
345
+ tokenizerKeywords: [
346
+ // Commands - Class/Attribute operations
347
+ 'toggle',
348
+ 'add',
349
+ 'remove',
350
+ // Commands - Content operations
351
+ 'put',
352
+ 'append',
353
+ 'prepend',
354
+ 'take',
355
+ 'make',
356
+ 'clone',
357
+ 'swap',
358
+ 'morph', // swap/morph
359
+ // Commands - Variable operations
360
+ 'set',
361
+ 'get',
362
+ 'increment',
363
+ 'decrement',
364
+ 'log',
365
+ // Commands - Visibility
366
+ 'show',
367
+ 'hide',
368
+ 'transition',
369
+ // Commands - Events
370
+ 'on',
371
+ 'trigger',
372
+ 'send',
373
+ // Commands - DOM focus
374
+ 'focus',
375
+ 'blur',
376
+ // Commands - Navigation
377
+ 'go',
378
+ // Commands - Async
379
+ 'wait',
380
+ 'fetch',
381
+ 'settle',
382
+ // Commands - Control flow
383
+ 'if',
384
+ 'else',
385
+ 'repeat',
386
+ 'for',
387
+ 'while',
388
+ 'continue',
389
+ 'halt',
390
+ 'throw',
391
+ 'call',
392
+ 'return',
393
+ // Commands - Advanced
394
+ 'js',
395
+ 'async',
396
+ 'tell',
397
+ 'default',
398
+ 'init',
399
+ 'behavior',
400
+ // Control flow helpers
401
+ 'then', // then
402
+ 'end', // end
403
+ // Modifiers
404
+ 'into',
405
+ 'before',
406
+ 'after',
407
+ ],
408
+ missingFromTokenizer: [],
409
+ potentialConflicts: [],
410
+ },
411
+ {
412
+ code: 'ja',
413
+ name: 'Japanese',
414
+ wordOrder: 'SOV',
415
+ direction: 'ltr',
416
+ files: {
417
+ languageProfile: true,
418
+ tokenizer: true,
419
+ tokenizerRegistered: true,
420
+ morphologicalNormalizer: true,
421
+ eventHandlerPatterns: true,
422
+ tests: true,
423
+ morphologyTests: true,
424
+ },
425
+ morphology: {
426
+ needed: true,
427
+ reason: 'Japanese is agglutinative with rich verb conjugation',
428
+ inflectionTypes: [
429
+ 'て-form (切り替えて)',
430
+ 'た-form/past (切り替えた)',
431
+ 'ます-form/polite (切り替えます)',
432
+ 'ている/progressive (切り替えている)',
433
+ 'ない/negative (切り替えない)',
434
+ 'する verbs (トグルする → トグル)',
435
+ ],
436
+ integratedWithTokenizer: true,
437
+ confidenceThreshold: 0.7,
438
+ },
439
+ profileKeywords: [
440
+ '切り替え',
441
+ '追加',
442
+ '削除',
443
+ '置く',
444
+ '設定',
445
+ '表示',
446
+ '隠す',
447
+ '末尾追加',
448
+ '先頭追加',
449
+ '増加',
450
+ '減少',
451
+ '待つ',
452
+ '取得',
453
+ '移動',
454
+ '引き金',
455
+ '送信',
456
+ '記録',
457
+ '作る',
458
+ '複製',
459
+ '取る',
460
+ 'フォーカス',
461
+ 'ぼかし',
462
+ '遷移',
463
+ '安定',
464
+ 'もし',
465
+ 'そうでなければ',
466
+ '繰り返し',
467
+ 'ために',
468
+ 'の間',
469
+ '続ける',
470
+ '停止',
471
+ '投げる',
472
+ '呼び出し',
473
+ '戻る',
474
+ 'JS実行',
475
+ '非同期',
476
+ '伝える',
477
+ '既定',
478
+ '初期化',
479
+ '振る舞い',
480
+ 'へ',
481
+ '前に',
482
+ '後に',
483
+ 'で',
484
+ ],
485
+ tokenizerKeywords: [
486
+ // Commands - Class/Attribute operations
487
+ '切り替え',
488
+ '切り替える',
489
+ 'トグル',
490
+ 'トグルする',
491
+ '追加',
492
+ '追加する',
493
+ '加える',
494
+ '削除',
495
+ '削除する',
496
+ '取り除く',
497
+ // Commands - Content operations
498
+ '置く',
499
+ '入れる',
500
+ '末尾追加',
501
+ 'アペンド',
502
+ '先頭追加',
503
+ 'プリペンド',
504
+ '取る',
505
+ '作る',
506
+ '作成',
507
+ '複製',
508
+ 'クローン',
509
+ '交換',
510
+ 'スワップ', // swap
511
+ '変形',
512
+ 'モーフ', // morph
513
+ // Commands - Variable operations
514
+ 'セット',
515
+ 'セットする',
516
+ '設定',
517
+ '設定する',
518
+ '取得',
519
+ '取得する',
520
+ 'ゲット',
521
+ '増加',
522
+ '増加する',
523
+ '増やす',
524
+ 'インクリメント',
525
+ '減少',
526
+ '減少する',
527
+ '減らす',
528
+ 'デクリメント',
529
+ '記録',
530
+ 'ログ',
531
+ '出力',
532
+ // Commands - Visibility
533
+ '表示',
534
+ '表示する',
535
+ '見せる',
536
+ '隠す',
537
+ '非表示',
538
+ '非表示にする',
539
+ '遷移',
540
+ 'トランジション',
541
+ 'アニメーション',
542
+ // Commands - Events
543
+ 'で',
544
+ '時',
545
+ 'とき',
546
+ 'トリガー',
547
+ '発火',
548
+ '引き金',
549
+ '送る',
550
+ '送信',
551
+ '送信する',
552
+ // Commands - DOM focus
553
+ 'フォーカス',
554
+ '集中',
555
+ 'ぼかし',
556
+ 'フォーカス解除',
557
+ // Commands - Navigation
558
+ '移動',
559
+ '行く',
560
+ 'ナビゲート',
561
+ // Commands - Async
562
+ '待つ',
563
+ '待機',
564
+ 'フェッチ',
565
+ '安定',
566
+ '落ち着く',
567
+ // Commands - Control flow
568
+ 'もし',
569
+ '条件',
570
+ 'そうでなければ',
571
+ 'それ以外',
572
+ '繰り返し',
573
+ '繰り返す',
574
+ 'リピート',
575
+ 'ために',
576
+ '各',
577
+ 'の間',
578
+ '間',
579
+ '続ける',
580
+ '継続',
581
+ '停止',
582
+ '止める',
583
+ 'ハルト',
584
+ '投げる',
585
+ 'スロー',
586
+ '呼び出し',
587
+ 'コール',
588
+ '呼ぶ',
589
+ '呼び出す',
590
+ '戻る',
591
+ '返す',
592
+ 'リターン',
593
+ // Commands - Advanced
594
+ 'JS実行',
595
+ 'js',
596
+ '非同期',
597
+ 'アシンク',
598
+ '伝える',
599
+ 'テル',
600
+ '既定',
601
+ 'デフォルト',
602
+ '初期化',
603
+ 'イニット',
604
+ '振る舞い',
605
+ 'ビヘイビア',
606
+ // Control flow helpers
607
+ 'それから',
608
+ 'そして',
609
+ '次に', // then
610
+ '終わり',
611
+ '終了', // end
612
+ // Modifiers
613
+ 'へ',
614
+ '前に',
615
+ '後に',
616
+ '後',
617
+ ],
618
+ missingFromTokenizer: [], // Now synced
619
+ potentialConflicts: [
620
+ {
621
+ particle: 'に',
622
+ usedFor: ['destination', 'time'],
623
+ isResolved: true,
624
+ resolution: 'Removed に from event handler alternatives; use で for events',
625
+ },
626
+ ],
627
+ },
628
+ {
629
+ code: 'ko',
630
+ name: 'Korean',
631
+ wordOrder: 'SOV',
632
+ direction: 'ltr',
633
+ files: {
634
+ languageProfile: true,
635
+ tokenizer: true,
636
+ tokenizerRegistered: true,
637
+ morphologicalNormalizer: true,
638
+ eventHandlerPatterns: false, // Uses generated patterns
639
+ tests: true,
640
+ morphologyTests: true,
641
+ },
642
+ morphology: {
643
+ needed: true,
644
+ reason: 'Korean is agglutinative with verb conjugation and honorific forms',
645
+ inflectionTypes: [
646
+ '다 ending/dictionary (바꾸다 → 바꾸)',
647
+ '요 ending/polite (바꿔요 → 바꾸)',
648
+ '니다 ending/formal (바꿉니다 → 바꾸)',
649
+ '세요 ending/honorific (바꾸세요 → 바꾸)',
650
+ '았/었 past tense (바꿨어 → 바꾸)',
651
+ 'Vowel harmony for particles',
652
+ ],
653
+ integratedWithTokenizer: true,
654
+ confidenceThreshold: 0.7,
655
+ },
656
+ profileKeywords: [
657
+ '토글',
658
+ '추가',
659
+ '제거',
660
+ '놓다',
661
+ '설정',
662
+ '보이다',
663
+ '숨기다',
664
+ '추가',
665
+ '앞에추가',
666
+ '증가',
667
+ '감소',
668
+ '기다리다',
669
+ '가져오다',
670
+ '가다',
671
+ '트리거',
672
+ '보내다',
673
+ '로그',
674
+ '만들다',
675
+ '복사',
676
+ '가져오기',
677
+ '포커스',
678
+ '블러',
679
+ '전환',
680
+ '안정',
681
+ '던지다',
682
+ '비동기',
683
+ '말하다',
684
+ '기본값',
685
+ '초기화',
686
+ '동작',
687
+ '으로',
688
+ '전에',
689
+ '후에',
690
+ ],
691
+ tokenizerKeywords: [
692
+ // Commands - Class/Attribute operations
693
+ '토글',
694
+ '토글하다',
695
+ '바꾸다',
696
+ '전환',
697
+ '스위치',
698
+ '추가',
699
+ '추가하다',
700
+ '더하다',
701
+ '제거',
702
+ '제거하다',
703
+ '삭제',
704
+ '없애다',
705
+ // Commands - Content operations
706
+ '놓다',
707
+ '넣다',
708
+ '두다',
709
+ '뒤에추가',
710
+ '앞에추가',
711
+ '가져가다',
712
+ '취하다',
713
+ '만들다',
714
+ '생성',
715
+ '복사',
716
+ '클론',
717
+ '교환',
718
+ '스왑', // swap
719
+ '변형',
720
+ '모프', // morph
721
+ // Commands - Variable operations
722
+ '세트',
723
+ '설정',
724
+ '설정하다',
725
+ '정하다',
726
+ '가져오다',
727
+ '얻다',
728
+ '취득',
729
+ '증가',
730
+ '증가하다',
731
+ '늘리다',
732
+ '감소',
733
+ '감소하다',
734
+ '줄이다',
735
+ '로그',
736
+ '기록',
737
+ // Commands - Visibility
738
+ '보이다',
739
+ '표시',
740
+ '보여주다',
741
+ '숨기다',
742
+ '숨김',
743
+ '감추다',
744
+ '전환',
745
+ '애니메이션',
746
+ // Commands - Events
747
+ '에',
748
+ '시',
749
+ '때',
750
+ '트리거',
751
+ '발동',
752
+ '발사',
753
+ '보내다',
754
+ '전송',
755
+ // Commands - DOM focus
756
+ '포커스',
757
+ '집중',
758
+ '블러',
759
+ '흐리게',
760
+ // Commands - Navigation
761
+ '가다',
762
+ '이동',
763
+ '네비게이트',
764
+ // Commands - Async
765
+ '기다리다',
766
+ '대기',
767
+ '페치',
768
+ '가져오기',
769
+ '안정',
770
+ '정착',
771
+ // Commands - Control flow
772
+ '만약',
773
+ '조건',
774
+ '아니면',
775
+ '그렇지않으면',
776
+ '반복',
777
+ '되풀이',
778
+ '각각',
779
+ '동안',
780
+ '하는동안',
781
+ '계속',
782
+ '계속하다',
783
+ '정지',
784
+ '중지',
785
+ '멈추다',
786
+ '던지다',
787
+ '던지기',
788
+ '호출',
789
+ '부르다',
790
+ '반환',
791
+ '돌려주다',
792
+ // Commands - Advanced
793
+ 'js',
794
+ 'JS실행',
795
+ '비동기',
796
+ '어싱크',
797
+ '말하다',
798
+ '알리다',
799
+ '기본값',
800
+ '디폴트',
801
+ '초기화',
802
+ '시작',
803
+ '동작',
804
+ '행동',
805
+ // Control flow helpers
806
+ '그다음',
807
+ '그리고',
808
+ '그런다음', // then
809
+ '끝',
810
+ '종료', // end
811
+ // Modifiers
812
+ '으로',
813
+ '로',
814
+ '전에',
815
+ '이전',
816
+ '후에',
817
+ '이후',
818
+ ],
819
+ missingFromTokenizer: [], // Now synced
820
+ potentialConflicts: [],
821
+ },
822
+ {
823
+ code: 'ar',
824
+ name: 'Arabic',
825
+ wordOrder: 'VSO',
826
+ direction: 'rtl',
827
+ files: {
828
+ languageProfile: true,
829
+ tokenizer: true,
830
+ tokenizerRegistered: true,
831
+ morphologicalNormalizer: true,
832
+ eventHandlerPatterns: true,
833
+ tests: true,
834
+ morphologyTests: true,
835
+ },
836
+ morphology: {
837
+ needed: true,
838
+ reason: 'Arabic has complex root-pattern morphology with prefixes/suffixes',
839
+ inflectionTypes: [
840
+ 'Definite article ال (البدل → بدل)',
841
+ 'Conjunction prefixes و/ف (والبدل → بدل)',
842
+ 'Preposition prefixes ب/ل (ببدل → بدل)',
843
+ 'Present tense markers ي/ت/ن/أ (يبدل → بدل)',
844
+ 'Plural suffixes ون/ين (مستخدمون → مستخدم)',
845
+ 'Diacritics normalization (بدّل = بدل)',
846
+ ],
847
+ integratedWithTokenizer: true,
848
+ confidenceThreshold: 0.7,
849
+ },
850
+ profileKeywords: [
851
+ 'بدل',
852
+ 'أضف',
853
+ 'أزل',
854
+ 'ضع',
855
+ 'اضبط',
856
+ 'أظهر',
857
+ 'أخف',
858
+ 'ألحق',
859
+ 'سبق',
860
+ 'زد',
861
+ 'أنقص',
862
+ 'انتظر',
863
+ 'جلب',
864
+ 'اذهب',
865
+ 'تشغيل',
866
+ 'أرسل',
867
+ 'سجل',
868
+ 'خذ',
869
+ 'اصنع',
870
+ 'استنسخ',
871
+ 'تركيز',
872
+ 'ضبابية',
873
+ 'انتقال',
874
+ 'استقر',
875
+ 'استدع',
876
+ 'ارجع',
877
+ 'جافاسكربت',
878
+ 'متزامن',
879
+ 'أخبر',
880
+ 'افتراضي',
881
+ 'تهيئة',
882
+ 'سلوك',
883
+ 'في',
884
+ 'قبل',
885
+ 'بعد',
886
+ ],
887
+ tokenizerKeywords: [
888
+ // Commands - Class/Attribute operations
889
+ 'بدّل',
890
+ 'بدل',
891
+ 'غيّر',
892
+ 'غير',
893
+ 'أضف',
894
+ 'اضف',
895
+ 'زِد',
896
+ 'أزل',
897
+ 'ازل',
898
+ 'احذف',
899
+ 'امسح',
900
+ // Commands - Content operations
901
+ 'ضع',
902
+ 'اضع',
903
+ 'يضع',
904
+ 'اجعل',
905
+ 'ألحق',
906
+ 'سبق',
907
+ 'خذ',
908
+ 'اصنع',
909
+ 'أنشئ',
910
+ 'استنسخ',
911
+ 'انسخ',
912
+ // Commands - Variable operations
913
+ 'اضبط',
914
+ 'عيّن',
915
+ 'عين',
916
+ 'حدد',
917
+ 'احصل',
918
+ 'زِد',
919
+ 'زد',
920
+ 'ارفع',
921
+ 'أنقص',
922
+ 'انقص',
923
+ 'قلل',
924
+ 'سجّل',
925
+ 'سجل',
926
+ // Commands - Visibility
927
+ 'أظهر',
928
+ 'اظهر',
929
+ 'اعرض',
930
+ 'أخفِ',
931
+ 'اخفِ',
932
+ 'اخف',
933
+ 'اخفي',
934
+ 'انتقال',
935
+ 'انتقل',
936
+ // Commands - Events
937
+ 'على',
938
+ 'عند',
939
+ 'لدى',
940
+ 'حين',
941
+ 'تشغيل',
942
+ 'شغّل',
943
+ 'شغل',
944
+ 'أطلق',
945
+ 'فعّل',
946
+ 'أرسل',
947
+ 'ارسل',
948
+ // Commands - DOM focus
949
+ 'تركيز',
950
+ 'ركز',
951
+ 'ضبابية',
952
+ 'شوش',
953
+ // Commands - Navigation
954
+ 'اذهب',
955
+ // Commands - Async
956
+ 'انتظر',
957
+ 'احضر',
958
+ 'جلب',
959
+ 'استقر',
960
+ // Commands - Control flow
961
+ 'إذا',
962
+ 'اذا',
963
+ 'لو',
964
+ 'وإلا',
965
+ 'والا',
966
+ 'كرر',
967
+ 'لكل',
968
+ 'بينما',
969
+ 'واصل',
970
+ 'أوقف',
971
+ 'توقف',
972
+ 'ارم',
973
+ 'ارمِ',
974
+ 'استدع',
975
+ 'اتصل',
976
+ 'نادِ',
977
+ 'ارجع',
978
+ 'عُد',
979
+ // Commands - Advanced
980
+ 'جافاسكربت',
981
+ 'js',
982
+ 'متزامن',
983
+ 'أخبر',
984
+ 'افتراضي',
985
+ 'تهيئة',
986
+ 'بدء',
987
+ 'سلوك',
988
+ // Control flow helpers
989
+ 'ثم',
990
+ 'بعدها', // then
991
+ 'نهاية',
992
+ 'انتهى', // end
993
+ // Modifiers
994
+ 'في',
995
+ 'إلى',
996
+ 'قبل',
997
+ 'بعد',
998
+ ],
999
+ missingFromTokenizer: [], // Now synced
1000
+ potentialConflicts: [],
1001
+ },
1002
+ {
1003
+ code: 'es',
1004
+ name: 'Spanish',
1005
+ wordOrder: 'SVO',
1006
+ direction: 'ltr',
1007
+ files: {
1008
+ languageProfile: true,
1009
+ tokenizer: true,
1010
+ tokenizerRegistered: true,
1011
+ morphologicalNormalizer: true,
1012
+ eventHandlerPatterns: true,
1013
+ tests: true,
1014
+ morphologyTests: true,
1015
+ },
1016
+ morphology: {
1017
+ needed: true,
1018
+ reason: 'Spanish has verb conjugations and reflexive verb forms',
1019
+ inflectionTypes: [
1020
+ '-ar conjugations (alternando, alternado → alternar)',
1021
+ '-er conjugations (escondiendo → esconder)',
1022
+ '-ir conjugations (similar pattern)',
1023
+ 'Reflexive verbs (mostrarse → mostrar)',
1024
+ 'Reflexive pronouns (se muestra → mostrar)',
1025
+ 'Imperative reflexive (muéstrate → mostrar)',
1026
+ ],
1027
+ integratedWithTokenizer: true,
1028
+ confidenceThreshold: 0.7,
1029
+ },
1030
+ profileKeywords: [
1031
+ 'alternar',
1032
+ 'añadir',
1033
+ 'quitar',
1034
+ 'poner',
1035
+ 'establecer',
1036
+ 'mostrar',
1037
+ 'ocultar',
1038
+ 'añadir',
1039
+ 'anteponer',
1040
+ 'incrementar',
1041
+ 'decrementar',
1042
+ 'esperar',
1043
+ 'obtener',
1044
+ 'ir',
1045
+ 'disparar',
1046
+ 'enviar',
1047
+ 'registrar',
1048
+ 'tomar',
1049
+ 'hacer',
1050
+ 'clonar',
1051
+ 'enfocar',
1052
+ 'desenfocar',
1053
+ 'transición',
1054
+ 'estabilizar',
1055
+ 'llamar',
1056
+ 'devolver',
1057
+ 'js',
1058
+ 'asíncrono',
1059
+ 'decir',
1060
+ 'predeterminar',
1061
+ 'iniciar',
1062
+ 'comportamiento',
1063
+ 'dentro de',
1064
+ 'antes',
1065
+ 'después',
1066
+ ],
1067
+ tokenizerKeywords: [
1068
+ // Commands - Class/Attribute operations
1069
+ 'alternar',
1070
+ 'cambiar',
1071
+ 'toggle',
1072
+ 'conmutar',
1073
+ 'añadir',
1074
+ 'agregar',
1075
+ 'quitar',
1076
+ 'eliminar',
1077
+ 'borrar',
1078
+ 'remover',
1079
+ // Commands - Content operations
1080
+ 'poner',
1081
+ 'pon',
1082
+ 'colocar',
1083
+ 'anteponer',
1084
+ 'tomar',
1085
+ 'hacer',
1086
+ 'crear',
1087
+ 'clonar',
1088
+ 'copiar',
1089
+ 'intercambiar', // swap
1090
+ 'transformar',
1091
+ 'mutar', // morph
1092
+ // Commands - Variable operations
1093
+ 'establecer',
1094
+ 'fijar',
1095
+ 'obtener',
1096
+ 'incrementar',
1097
+ 'aumentar',
1098
+ 'decrementar',
1099
+ 'disminuir',
1100
+ 'registrar',
1101
+ 'imprimir',
1102
+ // Commands - Visibility
1103
+ 'mostrar',
1104
+ 'enseñar',
1105
+ 'ocultar',
1106
+ 'esconder',
1107
+ 'transición',
1108
+ 'animar',
1109
+ // Commands - Events
1110
+ 'en',
1111
+ 'cuando',
1112
+ 'al',
1113
+ 'disparar',
1114
+ 'activar',
1115
+ 'enviar',
1116
+ // Commands - DOM focus
1117
+ 'enfocar',
1118
+ 'desenfocar',
1119
+ // Commands - Navigation
1120
+ 'ir',
1121
+ 'navegar',
1122
+ // Commands - Async
1123
+ 'esperar',
1124
+ 'buscar',
1125
+ 'estabilizar',
1126
+ // Commands - Control flow
1127
+ 'si',
1128
+ 'sino',
1129
+ 'repetir',
1130
+ 'para',
1131
+ 'mientras',
1132
+ 'continuar',
1133
+ 'detener',
1134
+ 'parar',
1135
+ 'lanzar',
1136
+ 'arrojar',
1137
+ 'llamar',
1138
+ 'devolver',
1139
+ 'retornar',
1140
+ // Commands - Advanced
1141
+ 'js',
1142
+ 'asíncrono',
1143
+ 'asincrono',
1144
+ 'decir',
1145
+ 'predeterminar',
1146
+ 'iniciar',
1147
+ 'inicializar',
1148
+ 'comportamiento',
1149
+ // Control flow helpers
1150
+ 'entonces',
1151
+ 'luego', // then
1152
+ 'fin',
1153
+ 'terminar', // end
1154
+ // Modifiers
1155
+ 'dentro de',
1156
+ 'antes',
1157
+ 'después',
1158
+ 'despues',
1159
+ ],
1160
+ missingFromTokenizer: [], // Now synced
1161
+ potentialConflicts: [],
1162
+ },
1163
+ {
1164
+ code: 'tr',
1165
+ name: 'Turkish',
1166
+ wordOrder: 'SOV',
1167
+ direction: 'ltr',
1168
+ files: {
1169
+ languageProfile: true,
1170
+ tokenizer: true,
1171
+ tokenizerRegistered: true,
1172
+ morphologicalNormalizer: true,
1173
+ eventHandlerPatterns: false, // Uses generated patterns
1174
+ tests: true,
1175
+ morphologyTests: true,
1176
+ },
1177
+ morphology: {
1178
+ needed: true,
1179
+ reason: 'Turkish is highly agglutinative with vowel harmony',
1180
+ inflectionTypes: [
1181
+ 'Vowel harmony (değiştir + iyor → değiştiriyor)',
1182
+ 'Present continuous -iyor/-ıyor/-uyor/-üyor',
1183
+ 'Past tense -di/-dı/-du/-dü',
1184
+ 'Reported past -miş/-mış/-muş/-müş',
1185
+ 'Person suffixes -im/-sin/-iz',
1186
+ 'Negation -me/-ma',
1187
+ 'Infinitive -mek/-mak',
1188
+ ],
1189
+ integratedWithTokenizer: true,
1190
+ confidenceThreshold: 0.7,
1191
+ },
1192
+ profileKeywords: [
1193
+ 'değiştir',
1194
+ 'ekle',
1195
+ 'kaldır',
1196
+ 'koy',
1197
+ 'ayarla',
1198
+ 'göster',
1199
+ 'gizle',
1200
+ 'ekle',
1201
+ 'öneekle',
1202
+ 'artır',
1203
+ 'azalt',
1204
+ 'bekle',
1205
+ 'getir',
1206
+ 'git',
1207
+ 'tetikle',
1208
+ 'gönder',
1209
+ 'kaydet',
1210
+ 'al',
1211
+ 'yap',
1212
+ 'kopyala',
1213
+ 'odakla',
1214
+ 'bulanıklaştır',
1215
+ 'geçiş',
1216
+ 'yerleş',
1217
+ 'fırlat',
1218
+ 'asenkron',
1219
+ 'söyle',
1220
+ 'varsayılan',
1221
+ 'başlat',
1222
+ 'davranış',
1223
+ 'içine',
1224
+ 'önce',
1225
+ 'sonra',
1226
+ ],
1227
+ tokenizerKeywords: [
1228
+ // Commands - Class/Attribute operations
1229
+ 'değiştir',
1230
+ 'değistir',
1231
+ 'ekle',
1232
+ 'kaldır',
1233
+ 'kaldir',
1234
+ 'sil',
1235
+ // Commands - Content operations
1236
+ 'koy',
1237
+ 'yerleştir',
1238
+ 'yerlestir',
1239
+ 'sonunaekle',
1240
+ 'sona_ekle',
1241
+ 'basaekle',
1242
+ 'başa_ekle',
1243
+ 'basa_ekle',
1244
+ 'al',
1245
+ 'yap',
1246
+ 'oluştur',
1247
+ 'olustur',
1248
+ 'kopyala',
1249
+ 'klonla',
1250
+ 'dönüştür',
1251
+ 'donustur', // morph
1252
+ // Commands - Variable operations
1253
+ 'ayarla',
1254
+ 'belirle',
1255
+ 'getir',
1256
+ 'elde_et',
1257
+ 'artır',
1258
+ 'artir',
1259
+ 'azalt',
1260
+ 'kaydet',
1261
+ 'yazdır',
1262
+ 'yazdir',
1263
+ // Commands - Visibility
1264
+ 'göster',
1265
+ 'goster',
1266
+ 'gizle',
1267
+ 'sakla',
1268
+ 'geçiş',
1269
+ 'gecis',
1270
+ 'animasyon',
1271
+ // Commands - Events
1272
+ 'üzerinde',
1273
+ 'uzerinde',
1274
+ 'olduğunda',
1275
+ 'oldugunda',
1276
+ 'tetikle',
1277
+ 'ateşle',
1278
+ 'atesle',
1279
+ 'gönder',
1280
+ 'gonder',
1281
+ // Commands - DOM focus
1282
+ 'odakla',
1283
+ 'odaklan',
1284
+ 'bulanıklaştır',
1285
+ 'bulaniklastir',
1286
+ 'odak_kaldır',
1287
+ 'odak_kaldir',
1288
+ // Commands - Navigation
1289
+ 'git',
1290
+ 'yönlendir',
1291
+ 'yonlendir',
1292
+ // Commands - Async
1293
+ 'bekle',
1294
+ 'çek',
1295
+ 'cek',
1296
+ 'yerleş',
1297
+ 'yerles',
1298
+ 'istikrar',
1299
+ 'sabitlen',
1300
+ // Commands - Control flow
1301
+ 'eğer',
1302
+ 'eger',
1303
+ 'yoksa',
1304
+ 'değilse',
1305
+ 'degilse',
1306
+ 'tekrarla',
1307
+ 'herbir',
1308
+ 'her',
1309
+ 'iken',
1310
+ 'devam',
1311
+ 'devam_et',
1312
+ 'dur',
1313
+ 'durdur',
1314
+ 'fırlat',
1315
+ 'firlat',
1316
+ 'at',
1317
+ 'çağır',
1318
+ 'cagir',
1319
+ 'dön',
1320
+ 'don',
1321
+ 'döndür',
1322
+ 'dondur',
1323
+ // Commands - Advanced
1324
+ 'js',
1325
+ 'javascript',
1326
+ 'asenkron',
1327
+ 'eşzamansız',
1328
+ 'eszamansiz',
1329
+ 'söyle',
1330
+ 'soyle',
1331
+ 'varsayılan',
1332
+ 'varsayilan',
1333
+ 'başlat',
1334
+ 'baslat',
1335
+ 'başla',
1336
+ 'basla',
1337
+ 'davranış',
1338
+ 'davranis',
1339
+ // Control flow helpers
1340
+ 'sonra',
1341
+ 'ardından',
1342
+ 'ardindan', // then
1343
+ 'son',
1344
+ 'bitti', // end
1345
+ // Modifiers
1346
+ 'içine',
1347
+ 'icine',
1348
+ 'önce',
1349
+ 'once',
1350
+ ],
1351
+ missingFromTokenizer: [], // Now synced
1352
+ potentialConflicts: [],
1353
+ },
1354
+ {
1355
+ code: 'zh',
1356
+ name: 'Chinese',
1357
+ wordOrder: 'SVO',
1358
+ direction: 'ltr',
1359
+ files: {
1360
+ languageProfile: true,
1361
+ tokenizer: true,
1362
+ tokenizerRegistered: true,
1363
+ morphologicalNormalizer: false,
1364
+ eventHandlerPatterns: false, // Uses generated patterns
1365
+ tests: true,
1366
+ morphologyTests: false,
1367
+ },
1368
+ morphology: {
1369
+ needed: false,
1370
+ reason: 'Chinese is isolating with no verb conjugation; keywords match base forms',
1371
+ inflectionTypes: [],
1372
+ integratedWithTokenizer: false,
1373
+ confidenceThreshold: 1.0,
1374
+ },
1375
+ profileKeywords: [
1376
+ '切换',
1377
+ '添加',
1378
+ '移除',
1379
+ '放置',
1380
+ '设置',
1381
+ '显示',
1382
+ '隐藏',
1383
+ '追加',
1384
+ '前置',
1385
+ '增加',
1386
+ '减少',
1387
+ '等待',
1388
+ '获取',
1389
+ '前往',
1390
+ '触发',
1391
+ '发送',
1392
+ '日志',
1393
+ '拿取',
1394
+ '制作',
1395
+ '复制',
1396
+ '聚焦',
1397
+ '失焦',
1398
+ '过渡',
1399
+ '稳定',
1400
+ '抛出',
1401
+ '调用',
1402
+ '返回',
1403
+ '脚本',
1404
+ '异步',
1405
+ '告诉',
1406
+ '默认',
1407
+ '初始化',
1408
+ '行为',
1409
+ '里',
1410
+ '之前',
1411
+ '之后',
1412
+ ],
1413
+ tokenizerKeywords: [
1414
+ // Commands - Class/Attribute operations
1415
+ '切换',
1416
+ '添加',
1417
+ '加',
1418
+ '移除',
1419
+ '删除',
1420
+ '去掉',
1421
+ // Commands - Content operations
1422
+ '放置',
1423
+ '放',
1424
+ '放入',
1425
+ '追加',
1426
+ '附加',
1427
+ '前置',
1428
+ '预置',
1429
+ '拿取',
1430
+ '取',
1431
+ '制作',
1432
+ '创建',
1433
+ '复制',
1434
+ '克隆',
1435
+ '交换',
1436
+ '互换', // swap
1437
+ '变形',
1438
+ '变换', // morph
1439
+ // Commands - Variable operations
1440
+ '设置',
1441
+ '设定',
1442
+ '获取',
1443
+ '取得',
1444
+ '获得',
1445
+ '增加',
1446
+ '减少',
1447
+ '日志',
1448
+ '记录',
1449
+ '打印',
1450
+ // Commands - Visibility
1451
+ '显示',
1452
+ '展示',
1453
+ '隐藏',
1454
+ '过渡',
1455
+ '动画',
1456
+ // Commands - Events
1457
+ '当',
1458
+ '在',
1459
+ '触发',
1460
+ '激发',
1461
+ '发送',
1462
+ // Commands - DOM focus
1463
+ '聚焦',
1464
+ '对焦',
1465
+ '失焦',
1466
+ '模糊',
1467
+ // Commands - Navigation
1468
+ '前往',
1469
+ '跳转',
1470
+ '导航',
1471
+ // Commands - Async
1472
+ '等待',
1473
+ '抓取',
1474
+ '获取数据',
1475
+ '稳定',
1476
+ '安定',
1477
+ // Commands - Control flow
1478
+ '如果',
1479
+ '若',
1480
+ '否则',
1481
+ '不然',
1482
+ '重复',
1483
+ '循环',
1484
+ '遍历',
1485
+ '每个',
1486
+ '为每',
1487
+ '当',
1488
+ '继续',
1489
+ '停止',
1490
+ '中止',
1491
+ '抛出',
1492
+ '抛',
1493
+ '调用',
1494
+ '呼叫',
1495
+ '返回',
1496
+ '回',
1497
+ // Commands - Advanced
1498
+ 'js',
1499
+ 'javascript',
1500
+ '脚本',
1501
+ '异步',
1502
+ '告诉',
1503
+ '通知',
1504
+ '默认',
1505
+ '缺省',
1506
+ '初始化',
1507
+ '初始',
1508
+ '行为',
1509
+ '动作',
1510
+ // Control flow helpers
1511
+ '然后',
1512
+ '接着', // then
1513
+ '结束',
1514
+ '完成', // end
1515
+ // Modifiers
1516
+ '到里面',
1517
+ '进入',
1518
+ '里',
1519
+ '之前',
1520
+ '前',
1521
+ '之后',
1522
+ '后',
1523
+ ],
1524
+ missingFromTokenizer: [], // Now synced
1525
+ potentialConflicts: [],
1526
+ },
1527
+ {
1528
+ code: 'pt',
1529
+ name: 'Portuguese',
1530
+ wordOrder: 'SVO',
1531
+ direction: 'ltr',
1532
+ files: {
1533
+ languageProfile: true,
1534
+ tokenizer: true,
1535
+ tokenizerRegistered: true,
1536
+ morphologicalNormalizer: false,
1537
+ eventHandlerPatterns: false,
1538
+ tests: false,
1539
+ morphologyTests: false,
1540
+ },
1541
+ morphology: {
1542
+ needed: false,
1543
+ reason: 'Portuguese has verb conjugation but keywords match infinitive forms',
1544
+ inflectionTypes: [],
1545
+ integratedWithTokenizer: false,
1546
+ confidenceThreshold: 1.0,
1547
+ },
1548
+ profileKeywords: [
1549
+ 'alternar',
1550
+ 'adicionar',
1551
+ 'remover',
1552
+ 'colocar',
1553
+ 'definir',
1554
+ 'mostrar',
1555
+ 'ocultar',
1556
+ 'anexar',
1557
+ 'preceder',
1558
+ 'incrementar',
1559
+ 'decrementar',
1560
+ 'esperar',
1561
+ 'buscar',
1562
+ 'ir',
1563
+ 'disparar',
1564
+ 'enviar',
1565
+ 'registrar',
1566
+ 'fazer',
1567
+ 'clonar',
1568
+ 'pegar',
1569
+ 'obter',
1570
+ 'focar',
1571
+ 'desfocar',
1572
+ ],
1573
+ tokenizerKeywords: [
1574
+ // Commands - Class/Attribute operations
1575
+ 'alternar',
1576
+ 'trocar',
1577
+ 'adicionar',
1578
+ 'acrescentar',
1579
+ 'remover',
1580
+ 'eliminar',
1581
+ 'apagar',
1582
+ // Commands - Content operations
1583
+ 'colocar',
1584
+ 'pôr',
1585
+ 'por',
1586
+ 'anexar',
1587
+ 'preceder',
1588
+ 'pegar',
1589
+ 'fazer',
1590
+ 'criar',
1591
+ 'clonar',
1592
+ 'copiar',
1593
+ 'transformar',
1594
+ 'mutar', // morph
1595
+ // Commands - Variable operations
1596
+ 'definir',
1597
+ 'configurar',
1598
+ 'obter',
1599
+ 'incrementar',
1600
+ 'aumentar',
1601
+ 'decrementar',
1602
+ 'diminuir',
1603
+ 'registrar',
1604
+ 'imprimir',
1605
+ // Commands - Visibility
1606
+ 'mostrar',
1607
+ 'exibir',
1608
+ 'ocultar',
1609
+ 'esconder',
1610
+ 'transição',
1611
+ 'transicao',
1612
+ 'animar',
1613
+ // Commands - Events
1614
+ 'em',
1615
+ 'quando',
1616
+ 'ao',
1617
+ 'disparar',
1618
+ 'ativar',
1619
+ 'enviar',
1620
+ // Commands - DOM focus
1621
+ 'focar',
1622
+ 'foco',
1623
+ 'desfocar',
1624
+ // Commands - Navigation
1625
+ 'ir',
1626
+ 'navegar',
1627
+ // Commands - Async
1628
+ 'esperar',
1629
+ 'aguardar',
1630
+ 'buscar',
1631
+ 'estabilizar',
1632
+ // Commands - Control flow
1633
+ 'se',
1634
+ 'senão',
1635
+ 'senao',
1636
+ 'repetir',
1637
+ 'para',
1638
+ 'enquanto',
1639
+ 'continuar',
1640
+ 'parar',
1641
+ 'lançar',
1642
+ 'lancar',
1643
+ 'chamar',
1644
+ 'retornar',
1645
+ 'devolver',
1646
+ // Commands - Advanced
1647
+ 'js',
1648
+ 'assíncrono',
1649
+ 'assincrono',
1650
+ 'dizer',
1651
+ 'padrão',
1652
+ 'padrao',
1653
+ 'iniciar',
1654
+ 'inicializar',
1655
+ 'comportamento',
1656
+ // Control flow helpers
1657
+ 'então',
1658
+ 'entao',
1659
+ 'depois', // then
1660
+ 'fim',
1661
+ 'terminar', // end
1662
+ // Modifiers
1663
+ 'dentro de',
1664
+ 'antes',
1665
+ ],
1666
+ missingFromTokenizer: [],
1667
+ potentialConflicts: [],
1668
+ },
1669
+ {
1670
+ code: 'fr',
1671
+ name: 'French',
1672
+ wordOrder: 'SVO',
1673
+ direction: 'ltr',
1674
+ files: {
1675
+ languageProfile: true,
1676
+ tokenizer: true,
1677
+ tokenizerRegistered: true,
1678
+ morphologicalNormalizer: false,
1679
+ eventHandlerPatterns: false,
1680
+ tests: false,
1681
+ morphologyTests: false,
1682
+ },
1683
+ morphology: {
1684
+ needed: false,
1685
+ reason: 'French has verb conjugation but keywords match infinitive forms',
1686
+ inflectionTypes: [],
1687
+ integratedWithTokenizer: false,
1688
+ confidenceThreshold: 1.0,
1689
+ },
1690
+ profileKeywords: [
1691
+ 'basculer',
1692
+ 'ajouter',
1693
+ 'supprimer',
1694
+ 'mettre',
1695
+ 'définir',
1696
+ 'montrer',
1697
+ 'cacher',
1698
+ 'annexer',
1699
+ 'préfixer',
1700
+ 'incrémenter',
1701
+ 'décrémenter',
1702
+ 'attendre',
1703
+ 'chercher',
1704
+ 'aller',
1705
+ 'déclencher',
1706
+ 'envoyer',
1707
+ 'enregistrer',
1708
+ 'faire',
1709
+ 'cloner',
1710
+ 'prendre',
1711
+ 'obtenir',
1712
+ 'focaliser',
1713
+ 'défocaliser',
1714
+ ],
1715
+ tokenizerKeywords: [
1716
+ // Commands - Class/Attribute operations
1717
+ 'basculer',
1718
+ 'permuter',
1719
+ 'alterner',
1720
+ 'ajouter',
1721
+ 'supprimer',
1722
+ 'enlever',
1723
+ 'retirer',
1724
+ // Commands - Content operations
1725
+ 'mettre',
1726
+ 'placer',
1727
+ 'annexer',
1728
+ 'préfixer',
1729
+ 'prefixer',
1730
+ 'prendre',
1731
+ 'faire',
1732
+ 'créer',
1733
+ 'creer',
1734
+ 'cloner',
1735
+ 'copier',
1736
+ 'transformer',
1737
+ 'transmuter', // morph
1738
+ // Commands - Variable operations
1739
+ 'définir',
1740
+ 'definir',
1741
+ 'établir',
1742
+ 'etablir',
1743
+ 'obtenir',
1744
+ 'incrémenter',
1745
+ 'incrementer',
1746
+ 'décrémenter',
1747
+ 'decrementer',
1748
+ 'enregistrer',
1749
+ 'journaliser',
1750
+ 'afficher',
1751
+ // Commands - Visibility
1752
+ 'montrer',
1753
+ 'cacher',
1754
+ 'masquer',
1755
+ 'transition',
1756
+ 'animer',
1757
+ // Commands - Events
1758
+ 'sur',
1759
+ 'quand',
1760
+ 'lors',
1761
+ 'déclencher',
1762
+ 'declencher',
1763
+ 'envoyer',
1764
+ // Commands - DOM focus
1765
+ 'focaliser',
1766
+ 'concentrer',
1767
+ 'défocaliser',
1768
+ 'defocaliser',
1769
+ // Commands - Navigation
1770
+ 'aller',
1771
+ 'naviguer',
1772
+ // Commands - Async
1773
+ 'attendre',
1774
+ 'chercher',
1775
+ 'récupérer',
1776
+ 'recuperer',
1777
+ 'stabiliser',
1778
+ // Commands - Control flow
1779
+ 'si',
1780
+ 'sinon',
1781
+ 'répéter',
1782
+ 'repeter',
1783
+ 'pour',
1784
+ 'tant que',
1785
+ 'pendant',
1786
+ 'continuer',
1787
+ 'arrêter',
1788
+ 'arreter',
1789
+ 'stopper',
1790
+ 'lancer',
1791
+ 'appeler',
1792
+ 'retourner',
1793
+ 'renvoyer',
1794
+ // Commands - Advanced
1795
+ 'js',
1796
+ 'asynchrone',
1797
+ 'dire',
1798
+ 'défaut',
1799
+ 'defaut',
1800
+ 'initialiser',
1801
+ 'comportement',
1802
+ // Control flow helpers
1803
+ 'alors',
1804
+ 'puis',
1805
+ 'ensuite', // then
1806
+ 'fin',
1807
+ 'terminer',
1808
+ 'finir', // end
1809
+ // Modifiers
1810
+ 'dans',
1811
+ 'avant',
1812
+ 'après',
1813
+ 'apres',
1814
+ ],
1815
+ missingFromTokenizer: [],
1816
+ potentialConflicts: [],
1817
+ },
1818
+ {
1819
+ code: 'de',
1820
+ name: 'German',
1821
+ wordOrder: 'SVO',
1822
+ direction: 'ltr',
1823
+ files: {
1824
+ languageProfile: true,
1825
+ tokenizer: true,
1826
+ tokenizerRegistered: true,
1827
+ morphologicalNormalizer: false,
1828
+ eventHandlerPatterns: false,
1829
+ tests: false,
1830
+ morphologyTests: false,
1831
+ },
1832
+ morphology: {
1833
+ needed: false,
1834
+ reason: 'German has verb conjugation but keywords match infinitive forms',
1835
+ inflectionTypes: [],
1836
+ integratedWithTokenizer: false,
1837
+ confidenceThreshold: 1.0,
1838
+ },
1839
+ profileKeywords: [
1840
+ 'umschalten',
1841
+ 'hinzufügen',
1842
+ 'entfernen',
1843
+ 'setzen',
1844
+ 'festlegen',
1845
+ 'zeigen',
1846
+ 'verbergen',
1847
+ 'anhängen',
1848
+ 'voranstellen',
1849
+ 'erhöhen',
1850
+ 'verringern',
1851
+ 'warten',
1852
+ 'abrufen',
1853
+ 'gehen',
1854
+ 'auslösen',
1855
+ 'senden',
1856
+ 'protokollieren',
1857
+ 'machen',
1858
+ 'klonen',
1859
+ 'nehmen',
1860
+ 'holen',
1861
+ 'fokussieren',
1862
+ 'defokussieren',
1863
+ ],
1864
+ tokenizerKeywords: [
1865
+ // Commands - Class/Attribute operations
1866
+ 'umschalten',
1867
+ 'wechseln',
1868
+ 'hinzufügen',
1869
+ 'hinzufugen',
1870
+ 'hinzufgen',
1871
+ 'entfernen',
1872
+ 'löschen',
1873
+ 'loschen',
1874
+ // Commands - Content operations
1875
+ 'setzen',
1876
+ 'stellen',
1877
+ 'platzieren',
1878
+ 'anhängen',
1879
+ 'anhangen',
1880
+ 'voranstellen',
1881
+ 'nehmen',
1882
+ 'machen',
1883
+ 'erstellen',
1884
+ 'erzeugen',
1885
+ 'klonen',
1886
+ 'kopieren',
1887
+ 'verwandeln',
1888
+ 'transformieren', // morph
1889
+ // Commands - Variable operations
1890
+ 'festlegen',
1891
+ 'definieren',
1892
+ 'holen',
1893
+ 'bekommen',
1894
+ 'erhöhen',
1895
+ 'erhohen',
1896
+ 'verringern',
1897
+ 'vermindern',
1898
+ 'protokollieren',
1899
+ 'ausgeben',
1900
+ // Commands - Visibility
1901
+ 'zeigen',
1902
+ 'anzeigen',
1903
+ 'verbergen',
1904
+ 'verstecken',
1905
+ 'übergang',
1906
+ 'ubergang',
1907
+ 'animieren',
1908
+ // Commands - Events
1909
+ 'bei',
1910
+ 'wenn',
1911
+ 'auf',
1912
+ 'auslösen',
1913
+ 'auslosen',
1914
+ 'senden',
1915
+ 'schicken',
1916
+ // Commands - DOM focus
1917
+ 'fokussieren',
1918
+ 'defokussieren',
1919
+ 'entfokussieren',
1920
+ // Commands - Navigation
1921
+ 'gehen',
1922
+ 'navigieren',
1923
+ // Commands - Async
1924
+ 'warten',
1925
+ 'abrufen',
1926
+ 'laden',
1927
+ 'stabilisieren',
1928
+ // Commands - Control flow
1929
+ 'falls',
1930
+ 'sonst',
1931
+ 'ansonsten',
1932
+ 'wiederholen',
1933
+ 'für',
1934
+ 'solange',
1935
+ 'während',
1936
+ 'fortfahren',
1937
+ 'weiter',
1938
+ 'anhalten',
1939
+ 'stoppen',
1940
+ 'werfen',
1941
+ 'aufrufen',
1942
+ 'zurückgeben',
1943
+ 'zuruckgeben',
1944
+ // Commands - Advanced
1945
+ 'js',
1946
+ 'javascript',
1947
+ 'asynchron',
1948
+ 'sagen',
1949
+ 'standard',
1950
+ 'initialisieren',
1951
+ 'verhalten',
1952
+ // Control flow helpers
1953
+ 'dann',
1954
+ 'danach',
1955
+ 'anschließend',
1956
+ 'anschliessend', // then
1957
+ 'ende',
1958
+ 'beenden',
1959
+ 'fertig', // end
1960
+ // Modifiers
1961
+ 'hinein',
1962
+ 'vor',
1963
+ 'nach',
1964
+ ],
1965
+ missingFromTokenizer: [],
1966
+ potentialConflicts: [],
1967
+ },
1968
+ {
1969
+ code: 'id',
1970
+ name: 'Indonesian',
1971
+ wordOrder: 'SVO',
1972
+ direction: 'ltr',
1973
+ files: {
1974
+ languageProfile: true,
1975
+ tokenizer: true,
1976
+ tokenizerRegistered: true,
1977
+ morphologicalNormalizer: false,
1978
+ eventHandlerPatterns: false,
1979
+ tests: false,
1980
+ morphologyTests: false,
1981
+ },
1982
+ morphology: {
1983
+ needed: false,
1984
+ reason: 'Indonesian is agglutinative but prefixes/suffixes are consistent',
1985
+ inflectionTypes: [],
1986
+ integratedWithTokenizer: false,
1987
+ confidenceThreshold: 1.0,
1988
+ },
1989
+ profileKeywords: [
1990
+ 'alihkan',
1991
+ 'tambah',
1992
+ 'hapus',
1993
+ 'taruh',
1994
+ 'atur',
1995
+ 'tampilkan',
1996
+ 'sembunyikan',
1997
+ 'sisipkan',
1998
+ 'awali',
1999
+ 'tingkatkan',
2000
+ 'turunkan',
2001
+ 'tunggu',
2002
+ 'ambil',
2003
+ 'pergi',
2004
+ 'picu',
2005
+ 'kirim',
2006
+ 'catat',
2007
+ 'buat',
2008
+ 'klon',
2009
+ 'ambil',
2010
+ 'dapatkan',
2011
+ 'fokus',
2012
+ 'blur',
2013
+ ],
2014
+ tokenizerKeywords: [
2015
+ // Commands - Class/Attribute operations
2016
+ 'alihkan',
2017
+ 'ganti',
2018
+ 'tukar',
2019
+ 'tambah',
2020
+ 'tambahkan',
2021
+ 'hapus',
2022
+ 'buang',
2023
+ 'hilangkan',
2024
+ // Commands - Content operations
2025
+ 'taruh',
2026
+ 'letakkan',
2027
+ 'masukkan',
2028
+ 'sisipkan',
2029
+ 'awali',
2030
+ 'ambil',
2031
+ 'buat',
2032
+ 'bikin',
2033
+ 'ciptakan',
2034
+ 'klon',
2035
+ 'salin',
2036
+ 'tiru',
2037
+ 'ubah',
2038
+ 'transformasi', // morph
2039
+ // Commands - Variable operations
2040
+ 'atur',
2041
+ 'tetapkan',
2042
+ 'dapatkan',
2043
+ 'peroleh',
2044
+ 'tingkatkan',
2045
+ 'naikkan',
2046
+ 'turunkan',
2047
+ 'kurangi',
2048
+ 'catat',
2049
+ 'rekam',
2050
+ 'cetak',
2051
+ // Commands - Visibility
2052
+ 'tampilkan',
2053
+ 'perlihatkan',
2054
+ 'sembunyikan',
2055
+ 'tutup',
2056
+ 'transisi',
2057
+ 'animasikan',
2058
+ // Commands - Events
2059
+ 'pada',
2060
+ 'saat',
2061
+ 'ketika',
2062
+ 'picu',
2063
+ 'jalankan',
2064
+ 'kirim',
2065
+ 'kirimkan',
2066
+ // Commands - DOM focus
2067
+ 'fokus',
2068
+ 'fokuskan',
2069
+ 'hilangkan fokus',
2070
+ 'blur',
2071
+ // Commands - Navigation
2072
+ 'pergi',
2073
+ 'pindah',
2074
+ 'navigasi',
2075
+ // Commands - Async
2076
+ 'tunggu',
2077
+ 'muat',
2078
+ 'stabilkan',
2079
+ // Commands - Control flow
2080
+ 'jika',
2081
+ 'kalau',
2082
+ 'bila',
2083
+ 'selainnya',
2084
+ 'jika tidak',
2085
+ 'ulangi',
2086
+ 'untuk',
2087
+ 'selama',
2088
+ 'lanjutkan',
2089
+ 'terus',
2090
+ 'hentikan',
2091
+ 'berhenti',
2092
+ 'lempar',
2093
+ 'panggil',
2094
+ 'kembalikan',
2095
+ 'kembali',
2096
+ // Commands - Advanced
2097
+ 'js',
2098
+ 'javascript',
2099
+ 'asinkron',
2100
+ 'katakan',
2101
+ 'beritahu',
2102
+ 'bawaan',
2103
+ 'inisialisasi',
2104
+ 'mulai',
2105
+ 'perilaku',
2106
+ // Control flow helpers
2107
+ 'maka',
2108
+ 'lalu',
2109
+ 'kemudian', // then
2110
+ 'akhir',
2111
+ 'selesai',
2112
+ 'tamat', // end
2113
+ // Modifiers
2114
+ 'ke dalam',
2115
+ 'sebelum',
2116
+ 'sesudah',
2117
+ 'setelah',
2118
+ ],
2119
+ missingFromTokenizer: [],
2120
+ potentialConflicts: [],
2121
+ },
2122
+ {
2123
+ code: 'qu',
2124
+ name: 'Quechua',
2125
+ wordOrder: 'SOV',
2126
+ direction: 'ltr',
2127
+ files: {
2128
+ languageProfile: true,
2129
+ tokenizer: true,
2130
+ tokenizerRegistered: true,
2131
+ morphologicalNormalizer: false,
2132
+ eventHandlerPatterns: false,
2133
+ tests: false,
2134
+ morphologyTests: false,
2135
+ },
2136
+ morphology: {
2137
+ needed: true,
2138
+ reason: 'Quechua is polysynthetic with complex suffixation',
2139
+ inflectionTypes: ['agglutinative suffixes', 'evidential markers'],
2140
+ integratedWithTokenizer: false,
2141
+ confidenceThreshold: 0.8,
2142
+ },
2143
+ profileKeywords: [
2144
+ "t'ikray",
2145
+ 'yapay',
2146
+ 'qichuy',
2147
+ 'churay',
2148
+ 'rikuchiy',
2149
+ 'pakay',
2150
+ 'qatichiy',
2151
+ 'ñawpachiy',
2152
+ 'yapachiy',
2153
+ 'pisiyachiy',
2154
+ 'suyay',
2155
+ 'apamuy',
2156
+ 'riy',
2157
+ 'qallarichiy',
2158
+ 'kachay',
2159
+ 'qillqakuy',
2160
+ 'ruray',
2161
+ 'kikinchay',
2162
+ 'hapiy',
2163
+ 'taripay',
2164
+ 'qhawachiy',
2165
+ ],
2166
+ tokenizerKeywords: [
2167
+ // Commands - Class/Attribute operations
2168
+ "t'ikray",
2169
+ 'tikray',
2170
+ 'kutichiy',
2171
+ 'yapay',
2172
+ 'yapaykuy',
2173
+ 'qichuy',
2174
+ 'hurquy',
2175
+ 'anchuchiy',
2176
+ // Commands - Content operations
2177
+ 'churay',
2178
+ 'tiyachiy',
2179
+ 'qatichiy',
2180
+ 'ñawpachiy',
2181
+ 'nawpachiy',
2182
+ 'hapiy',
2183
+ 'ruray',
2184
+ 'kamay',
2185
+ 'kikinchay',
2186
+ 'qillqay',
2187
+ "t'inkuy",
2188
+ 'tinkuy', // swap
2189
+ // Commands - Variable operations
2190
+ 'kamaykuy',
2191
+ 'taripay',
2192
+ 'yapachiy',
2193
+ 'pisiyachiy',
2194
+ 'qillqakuy',
2195
+ 'willakuy',
2196
+ // Commands - Visibility
2197
+ 'rikuchiy',
2198
+ 'qawachiy',
2199
+ 'pakay',
2200
+ 'pakakuy',
2201
+ 'kuyuchiy',
2202
+ // Commands - Events
2203
+ 'chaypim',
2204
+ 'kaypi',
2205
+ 'qallarichiy',
2206
+ 'kachay',
2207
+ 'apachiy',
2208
+ // Commands - DOM focus
2209
+ 'qhawachiy',
2210
+ 'mana qhawachiy',
2211
+ // Commands - Navigation
2212
+ 'riy',
2213
+ 'puriy',
2214
+ // Commands - Async
2215
+ 'suyay',
2216
+ 'apamuy',
2217
+ 'taripakaramuy',
2218
+ 'tiyakuy',
2219
+ // Commands - Control flow
2220
+ 'sichus',
2221
+ 'manachus',
2222
+ 'hukniraq',
2223
+ 'kutipay',
2224
+ 'muyu',
2225
+ 'sapankaq',
2226
+ 'kaykamaqa',
2227
+ 'qatipay',
2228
+ 'sayay',
2229
+ 'tukuy',
2230
+ 'chanqay',
2231
+ 'waqyay',
2232
+ 'kutimuy',
2233
+ // Commands - Advanced
2234
+ 'js',
2235
+ 'mana waqtalla',
2236
+ 'niy',
2237
+ 'qallariy',
2238
+ 'ruwana',
2239
+ // Control flow helpers
2240
+ 'chayqa',
2241
+ 'chaymanta',
2242
+ 'chaymantataq',
2243
+ 'hinaspa', // then
2244
+ 'tukukuy',
2245
+ 'puchukay', // end
2246
+ // Modifiers
2247
+ 'ukuman',
2248
+ 'ñawpaq',
2249
+ 'nawpaq',
2250
+ 'qhipa',
2251
+ ],
2252
+ missingFromTokenizer: [],
2253
+ potentialConflicts: [],
2254
+ },
2255
+ {
2256
+ code: 'sw',
2257
+ name: 'Swahili',
2258
+ wordOrder: 'SVO',
2259
+ direction: 'ltr',
2260
+ files: {
2261
+ languageProfile: true,
2262
+ tokenizer: true,
2263
+ tokenizerRegistered: true,
2264
+ morphologicalNormalizer: false,
2265
+ eventHandlerPatterns: false,
2266
+ tests: false,
2267
+ morphologyTests: false,
2268
+ },
2269
+ morphology: {
2270
+ needed: true,
2271
+ reason: 'Swahili is agglutinative with noun class prefixes and verb agreement',
2272
+ inflectionTypes: ['noun class prefixes', 'verb prefixes'],
2273
+ integratedWithTokenizer: false,
2274
+ confidenceThreshold: 0.8,
2275
+ },
2276
+ profileKeywords: [
2277
+ 'badilisha',
2278
+ 'ongeza',
2279
+ 'ondoa',
2280
+ 'weka',
2281
+ 'onyesha',
2282
+ 'ficha',
2283
+ 'ambatanisha',
2284
+ 'tanguliza',
2285
+ 'ongeza',
2286
+ 'punguza',
2287
+ 'subiri',
2288
+ 'leta',
2289
+ 'nenda',
2290
+ 'chochea',
2291
+ 'tuma',
2292
+ 'andika',
2293
+ 'tengeneza',
2294
+ 'nakili',
2295
+ 'chukua',
2296
+ 'pata',
2297
+ 'lenga',
2298
+ 'blur',
2299
+ ],
2300
+ tokenizerKeywords: [
2301
+ // Commands - Class/Attribute operations
2302
+ 'badilisha',
2303
+ 'geuza',
2304
+ 'ongeza',
2305
+ 'weka',
2306
+ 'ondoa',
2307
+ 'futa',
2308
+ 'toa',
2309
+ // Commands - Content operations
2310
+ 'tia',
2311
+ 'ambatanisha',
2312
+ 'tanguliza',
2313
+ 'chukua',
2314
+ 'tengeneza',
2315
+ 'unda',
2316
+ 'nakili',
2317
+ 'rudufu',
2318
+ // Commands - Variable operations
2319
+ 'seti',
2320
+ 'pata',
2321
+ 'pokea',
2322
+ 'punguza',
2323
+ 'andika',
2324
+ 'rekodi',
2325
+ // Commands - Visibility
2326
+ 'onyesha',
2327
+ 'ficha',
2328
+ 'mficho',
2329
+ 'hamisha',
2330
+ 'animisha',
2331
+ // Commands - Events
2332
+ 'wakati',
2333
+ 'kwenye',
2334
+ 'unapo',
2335
+ 'chochea',
2336
+ 'anzisha',
2337
+ 'tuma',
2338
+ 'peleka',
2339
+ // Commands - DOM focus
2340
+ 'lenga',
2341
+ 'angazia',
2342
+ 'ondoa lenga',
2343
+ 'blur',
2344
+ // Commands - Navigation
2345
+ 'nenda',
2346
+ 'enda',
2347
+ 'elekea',
2348
+ // Commands - Async
2349
+ 'subiri',
2350
+ 'ngoja',
2351
+ 'leta',
2352
+ 'pakia',
2353
+ 'tulia',
2354
+ 'imarika',
2355
+ // Commands - Control flow
2356
+ 'kama',
2357
+ 'ikiwa',
2358
+ 'vinginevyo',
2359
+ 'sivyo',
2360
+ 'rudia',
2361
+ 'kwa',
2362
+ 'endelea',
2363
+ 'simama',
2364
+ 'acha',
2365
+ 'tupa',
2366
+ 'ita',
2367
+ 'piga simu',
2368
+ 'rudisha',
2369
+ 'rejea',
2370
+ // Commands - Advanced
2371
+ 'js',
2372
+ 'javascript',
2373
+ 'isiyo sawia',
2374
+ 'sema',
2375
+ 'ambia',
2376
+ 'chaguo-msingi',
2377
+ 'anza',
2378
+ 'tabia',
2379
+ // Control flow helpers
2380
+ 'basi',
2381
+ 'kisha',
2382
+ 'halafu',
2383
+ 'baadaye', // then
2384
+ 'mwisho',
2385
+ 'maliza',
2386
+ 'tamati', // end
2387
+ // Modifiers
2388
+ 'ndani',
2389
+ 'kabla',
2390
+ 'baada',
2391
+ ],
2392
+ missingFromTokenizer: [],
2393
+ potentialConflicts: [],
2394
+ },
2395
+ {
2396
+ code: 'it',
2397
+ name: 'Italian',
2398
+ wordOrder: 'SVO',
2399
+ direction: 'ltr',
2400
+ files: {
2401
+ languageProfile: true,
2402
+ tokenizer: true,
2403
+ tokenizerRegistered: true,
2404
+ morphologicalNormalizer: true,
2405
+ eventHandlerPatterns: false,
2406
+ tests: true,
2407
+ morphologyTests: true,
2408
+ },
2409
+ morphology: {
2410
+ needed: true,
2411
+ reason: 'Italian has verb conjugations like Spanish',
2412
+ inflectionTypes: [
2413
+ '-are conjugations',
2414
+ '-ere conjugations',
2415
+ '-ire conjugations',
2416
+ 'reflexive verbs',
2417
+ ],
2418
+ integratedWithTokenizer: true,
2419
+ confidenceThreshold: 0.7,
2420
+ },
2421
+ profileKeywords: [
2422
+ 'commutare',
2423
+ 'aggiungere',
2424
+ 'rimuovere',
2425
+ 'mettere',
2426
+ 'impostare',
2427
+ 'mostrare',
2428
+ 'nascondere',
2429
+ 'incrementare',
2430
+ 'decrementare',
2431
+ ],
2432
+ tokenizerKeywords: [
2433
+ 'commutare',
2434
+ 'alternare',
2435
+ 'toggle',
2436
+ 'aggiungere',
2437
+ 'rimuovere',
2438
+ 'eliminare',
2439
+ 'mettere',
2440
+ 'impostare',
2441
+ 'ottenere',
2442
+ 'mostrare',
2443
+ 'nascondere',
2444
+ 'incrementare',
2445
+ 'decrementare',
2446
+ ],
2447
+ missingFromTokenizer: [],
2448
+ potentialConflicts: [],
2449
+ },
2450
+ {
2451
+ code: 'vi',
2452
+ name: 'Vietnamese',
2453
+ wordOrder: 'SVO',
2454
+ direction: 'ltr',
2455
+ files: {
2456
+ languageProfile: true,
2457
+ tokenizer: true,
2458
+ tokenizerRegistered: true,
2459
+ morphologicalNormalizer: false,
2460
+ eventHandlerPatterns: false,
2461
+ tests: true,
2462
+ morphologyTests: false,
2463
+ },
2464
+ morphology: {
2465
+ needed: false,
2466
+ reason: 'Vietnamese is isolating with no verb conjugation',
2467
+ inflectionTypes: [],
2468
+ integratedWithTokenizer: false,
2469
+ confidenceThreshold: 1.0,
2470
+ },
2471
+ profileKeywords: ['chuyển đổi', 'thêm', 'xóa', 'đặt', 'gán', 'hiển thị', 'ẩn', 'tăng', 'giảm'],
2472
+ tokenizerKeywords: [
2473
+ 'chuyển đổi',
2474
+ 'bật tắt',
2475
+ 'thêm',
2476
+ 'xóa',
2477
+ 'gỡ bỏ',
2478
+ 'đặt',
2479
+ 'gán',
2480
+ 'lấy giá trị',
2481
+ 'hiển thị',
2482
+ 'hiện',
2483
+ 'ẩn',
2484
+ 'tăng',
2485
+ 'giảm',
2486
+ ],
2487
+ missingFromTokenizer: [],
2488
+ potentialConflicts: [],
2489
+ },
2490
+ {
2491
+ code: 'pl',
2492
+ name: 'Polish',
2493
+ wordOrder: 'SVO',
2494
+ direction: 'ltr',
2495
+ files: {
2496
+ languageProfile: true,
2497
+ tokenizer: true,
2498
+ tokenizerRegistered: true,
2499
+ morphologicalNormalizer: true,
2500
+ eventHandlerPatterns: false,
2501
+ tests: true,
2502
+ morphologyTests: false,
2503
+ },
2504
+ morphology: {
2505
+ needed: true,
2506
+ reason: 'Polish is fusional with verb conjugations; uses imperative form for commands',
2507
+ inflectionTypes: ['imperative form', 'present tense', 'past tense'],
2508
+ integratedWithTokenizer: false,
2509
+ confidenceThreshold: 0.7,
2510
+ },
2511
+ profileKeywords: [
2512
+ 'przełącz',
2513
+ 'dodaj',
2514
+ 'usuń',
2515
+ 'umieść',
2516
+ 'ustaw',
2517
+ 'pokaż',
2518
+ 'ukryj',
2519
+ 'zwiększ',
2520
+ 'zmniejsz',
2521
+ ],
2522
+ tokenizerKeywords: [
2523
+ 'przełącz',
2524
+ 'przelacz',
2525
+ 'dodaj',
2526
+ 'usuń',
2527
+ 'usun',
2528
+ 'umieść',
2529
+ 'umiesc',
2530
+ 'ustaw',
2531
+ 'pokaż',
2532
+ 'pokaz',
2533
+ 'ukryj',
2534
+ 'zwiększ',
2535
+ 'zwieksz',
2536
+ 'zmniejsz',
2537
+ ],
2538
+ missingFromTokenizer: [],
2539
+ potentialConflicts: [],
2540
+ },
2541
+ {
2542
+ code: 'ru',
2543
+ name: 'Russian',
2544
+ wordOrder: 'SVO',
2545
+ direction: 'ltr',
2546
+ files: {
2547
+ languageProfile: true,
2548
+ tokenizer: true,
2549
+ tokenizerRegistered: true,
2550
+ morphologicalNormalizer: false,
2551
+ eventHandlerPatterns: false,
2552
+ tests: true,
2553
+ morphologyTests: false,
2554
+ },
2555
+ morphology: {
2556
+ needed: true,
2557
+ reason:
2558
+ 'Russian is fusional with verb conjugations; uses infinitive or imperative form for commands',
2559
+ inflectionTypes: ['infinitive form', 'imperative form', 'present tense', 'past tense'],
2560
+ integratedWithTokenizer: false,
2561
+ confidenceThreshold: 0.7,
2562
+ },
2563
+ profileKeywords: [
2564
+ 'переключить',
2565
+ 'добавить',
2566
+ 'удалить',
2567
+ 'положить',
2568
+ 'установить',
2569
+ 'показать',
2570
+ 'скрыть',
2571
+ 'увеличить',
2572
+ 'уменьшить',
2573
+ ],
2574
+ tokenizerKeywords: [
2575
+ 'переключить',
2576
+ 'переключи',
2577
+ 'добавить',
2578
+ 'добавь',
2579
+ 'удалить',
2580
+ 'удали',
2581
+ 'положить',
2582
+ 'положи',
2583
+ 'установить',
2584
+ 'установи',
2585
+ 'показать',
2586
+ 'покажи',
2587
+ 'скрыть',
2588
+ 'скрой',
2589
+ 'увеличить',
2590
+ 'увеличь',
2591
+ 'уменьшить',
2592
+ 'уменьши',
2593
+ ],
2594
+ missingFromTokenizer: [],
2595
+ potentialConflicts: [],
2596
+ },
2597
+ {
2598
+ code: 'uk',
2599
+ name: 'Ukrainian',
2600
+ wordOrder: 'SVO',
2601
+ direction: 'ltr',
2602
+ files: {
2603
+ languageProfile: true,
2604
+ tokenizer: true,
2605
+ tokenizerRegistered: true,
2606
+ morphologicalNormalizer: false,
2607
+ eventHandlerPatterns: false,
2608
+ tests: true,
2609
+ morphologyTests: false,
2610
+ },
2611
+ morphology: {
2612
+ needed: true,
2613
+ reason:
2614
+ 'Ukrainian is fusional with verb conjugations; uses infinitive or imperative form for commands',
2615
+ inflectionTypes: ['infinitive form', 'imperative form', 'present tense', 'past tense'],
2616
+ integratedWithTokenizer: false,
2617
+ confidenceThreshold: 0.7,
2618
+ },
2619
+ profileKeywords: [
2620
+ 'перемкнути',
2621
+ 'додати',
2622
+ 'видалити',
2623
+ 'покласти',
2624
+ 'встановити',
2625
+ 'показати',
2626
+ 'сховати',
2627
+ 'збільшити',
2628
+ 'зменшити',
2629
+ ],
2630
+ tokenizerKeywords: [
2631
+ 'перемкнути',
2632
+ 'перемкни',
2633
+ 'додати',
2634
+ 'додай',
2635
+ 'видалити',
2636
+ 'видали',
2637
+ 'покласти',
2638
+ 'поклади',
2639
+ 'встановити',
2640
+ 'встанови',
2641
+ 'показати',
2642
+ 'покажи',
2643
+ 'сховати',
2644
+ 'сховай',
2645
+ 'збільшити',
2646
+ 'збільш',
2647
+ 'зменшити',
2648
+ 'зменш',
2649
+ ],
2650
+ missingFromTokenizer: [],
2651
+ potentialConflicts: [],
2652
+ },
2653
+ {
2654
+ code: 'hi',
2655
+ name: 'Hindi',
2656
+ wordOrder: 'SOV',
2657
+ direction: 'ltr',
2658
+ files: {
2659
+ languageProfile: true,
2660
+ tokenizer: true,
2661
+ tokenizerRegistered: true,
2662
+ morphologicalNormalizer: false,
2663
+ eventHandlerPatterns: true,
2664
+ tests: true,
2665
+ morphologyTests: false,
2666
+ },
2667
+ morphology: {
2668
+ needed: true,
2669
+ reason: 'Hindi has verb conjugations but commands use imperative/infinitive forms',
2670
+ inflectionTypes: ['imperative form', 'infinitive form'],
2671
+ integratedWithTokenizer: false,
2672
+ confidenceThreshold: 0.7,
2673
+ },
2674
+ profileKeywords: [
2675
+ 'टॉगल',
2676
+ 'जोड़ें',
2677
+ 'हटाएं',
2678
+ 'रखें',
2679
+ 'सेट',
2680
+ 'दिखाएं',
2681
+ 'छिपाएं',
2682
+ 'बढ़ाएं',
2683
+ 'घटाएं',
2684
+ ],
2685
+ tokenizerKeywords: [
2686
+ 'टॉगल',
2687
+ 'बदलें',
2688
+ 'जोड़ें',
2689
+ 'जोड़',
2690
+ 'हटाएं',
2691
+ 'हटा',
2692
+ 'रखें',
2693
+ 'रख',
2694
+ 'सेट',
2695
+ 'निर्धारित',
2696
+ 'दिखाएं',
2697
+ 'दिखा',
2698
+ 'छिपाएं',
2699
+ 'छिपा',
2700
+ 'बढ़ाएं',
2701
+ 'बढ़ा',
2702
+ 'घटाएं',
2703
+ 'घटा',
2704
+ ],
2705
+ missingFromTokenizer: [],
2706
+ potentialConflicts: [],
2707
+ },
2708
+ {
2709
+ code: 'bn',
2710
+ name: 'Bengali',
2711
+ wordOrder: 'SOV',
2712
+ direction: 'ltr',
2713
+ files: {
2714
+ languageProfile: true,
2715
+ tokenizer: true,
2716
+ tokenizerRegistered: true,
2717
+ morphologicalNormalizer: false,
2718
+ eventHandlerPatterns: true,
2719
+ tests: true,
2720
+ morphologyTests: false,
2721
+ },
2722
+ morphology: {
2723
+ needed: true,
2724
+ reason: 'Bengali has verb conjugations but commands use imperative forms',
2725
+ inflectionTypes: ['imperative form', 'infinitive form'],
2726
+ integratedWithTokenizer: false,
2727
+ confidenceThreshold: 0.7,
2728
+ },
2729
+ profileKeywords: ['টগল', 'যোগ', 'সরান', 'রাখুন', 'সেট', 'দেখান', 'লুকান', 'বৃদ্ধি', 'হ্রাস'],
2730
+ tokenizerKeywords: [
2731
+ 'টগল',
2732
+ 'পরিবর্তন',
2733
+ 'যোগ',
2734
+ 'সরান',
2735
+ 'মুছুন',
2736
+ 'রাখুন',
2737
+ 'রাখ',
2738
+ 'সেট',
2739
+ 'নির্ধারণ',
2740
+ 'দেখান',
2741
+ 'দেখাও',
2742
+ 'লুকান',
2743
+ 'লুকাও',
2744
+ 'বৃদ্ধি',
2745
+ 'বাড়ান',
2746
+ 'হ্রাস',
2747
+ 'কমান',
2748
+ ],
2749
+ missingFromTokenizer: [],
2750
+ potentialConflicts: [],
2751
+ },
2752
+ {
2753
+ code: 'th',
2754
+ name: 'Thai',
2755
+ wordOrder: 'SVO',
2756
+ direction: 'ltr',
2757
+ files: {
2758
+ languageProfile: true,
2759
+ tokenizer: true,
2760
+ tokenizerRegistered: true,
2761
+ morphologicalNormalizer: false,
2762
+ eventHandlerPatterns: true,
2763
+ tests: true,
2764
+ morphologyTests: false,
2765
+ },
2766
+ morphology: {
2767
+ needed: false,
2768
+ reason: 'Thai is isolating with no verb conjugation',
2769
+ inflectionTypes: [],
2770
+ integratedWithTokenizer: false,
2771
+ confidenceThreshold: 1.0,
2772
+ },
2773
+ profileKeywords: ['สลับ', 'เพิ่ม', 'ลบ', 'ใส่', 'ตั้ง', 'แสดง', 'ซ่อน', 'เพิ่มค่า', 'ลดค่า'],
2774
+ tokenizerKeywords: [
2775
+ 'สลับ',
2776
+ 'เปลี่ยน',
2777
+ 'เพิ่ม',
2778
+ 'ลบ',
2779
+ 'ลบออก',
2780
+ 'ใส่',
2781
+ 'วาง',
2782
+ 'ตั้ง',
2783
+ 'กำหนด',
2784
+ 'แสดง',
2785
+ 'โชว์',
2786
+ 'ซ่อน',
2787
+ 'เพิ่มค่า',
2788
+ 'ลดค่า',
2789
+ ],
2790
+ missingFromTokenizer: [],
2791
+ potentialConflicts: [],
2792
+ },
2793
+ {
2794
+ code: 'ms',
2795
+ name: 'Malay',
2796
+ wordOrder: 'SVO',
2797
+ direction: 'ltr',
2798
+ files: {
2799
+ languageProfile: true,
2800
+ tokenizer: true,
2801
+ tokenizerRegistered: true,
2802
+ morphologicalNormalizer: false,
2803
+ eventHandlerPatterns: true,
2804
+ tests: true,
2805
+ morphologyTests: false,
2806
+ },
2807
+ morphology: {
2808
+ needed: false,
2809
+ reason: 'Malay is largely isolating with limited affixation',
2810
+ inflectionTypes: [],
2811
+ integratedWithTokenizer: false,
2812
+ confidenceThreshold: 1.0,
2813
+ },
2814
+ profileKeywords: [
2815
+ 'togol',
2816
+ 'tambah',
2817
+ 'buang',
2818
+ 'letak',
2819
+ 'tetap',
2820
+ 'tunjuk',
2821
+ 'sembunyi',
2822
+ 'tambah nilai',
2823
+ 'kurang nilai',
2824
+ ],
2825
+ tokenizerKeywords: [
2826
+ 'togol',
2827
+ 'tukar',
2828
+ 'tambah',
2829
+ 'buang',
2830
+ 'alih',
2831
+ 'letak',
2832
+ 'tetap',
2833
+ 'tunjuk',
2834
+ 'sembunyi',
2835
+ 'tambah nilai',
2836
+ 'kurang nilai',
2837
+ ],
2838
+ missingFromTokenizer: [],
2839
+ potentialConflicts: [],
2840
+ },
2841
+ {
2842
+ code: 'tl',
2843
+ name: 'Tagalog',
2844
+ wordOrder: 'VSO',
2845
+ direction: 'ltr',
2846
+ files: {
2847
+ languageProfile: true,
2848
+ tokenizer: true,
2849
+ tokenizerRegistered: true,
2850
+ morphologicalNormalizer: false,
2851
+ eventHandlerPatterns: true,
2852
+ tests: true,
2853
+ morphologyTests: false,
2854
+ },
2855
+ morphology: {
2856
+ needed: true,
2857
+ reason: 'Tagalog has verb focus/voice system and affix-based morphology',
2858
+ inflectionTypes: ['focus marking', 'aspect affixes'],
2859
+ integratedWithTokenizer: false,
2860
+ confidenceThreshold: 0.8,
2861
+ },
2862
+ profileKeywords: [
2863
+ 'palitan',
2864
+ 'idagdag',
2865
+ 'alisin',
2866
+ 'ilagay',
2867
+ 'itakda',
2868
+ 'ipakita',
2869
+ 'itago',
2870
+ 'dagdagan',
2871
+ 'bawasan',
2872
+ ],
2873
+ tokenizerKeywords: [
2874
+ 'palitan',
2875
+ 'itoggle',
2876
+ 'idagdag',
2877
+ 'magdagdag',
2878
+ 'alisin',
2879
+ 'tanggalin',
2880
+ 'ilagay',
2881
+ 'maglagay',
2882
+ 'itakda',
2883
+ 'magtakda',
2884
+ 'ipakita',
2885
+ 'magpakita',
2886
+ 'itago',
2887
+ 'magtago',
2888
+ 'dagdagan',
2889
+ 'taasan',
2890
+ 'bawasan',
2891
+ 'ibaba',
2892
+ ],
2893
+ missingFromTokenizer: [],
2894
+ potentialConflicts: [],
2895
+ },
2896
+ ];
2897
+
2898
+ /**
2899
+ * Documents the current state of command support.
2900
+ */
2901
+ export const SUPPORTED_COMMANDS: CommandChecklist[] = [
2902
+ {
2903
+ action: 'toggle',
2904
+ schemaExists: true,
2905
+ wiredInPatterns: false,
2906
+ usesHandCraftedPatterns: true, // Hand-crafted in patterns/toggle.ts
2907
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2908
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2909
+ missingTokenizerKeywordsIn: [],
2910
+ testsFor: ['en', 'ja', 'ar', 'es', 'ko', 'tr'],
2911
+ },
2912
+ {
2913
+ action: 'add',
2914
+ schemaExists: true,
2915
+ wiredInPatterns: true,
2916
+ usesHandCraftedPatterns: false,
2917
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2918
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2919
+ missingTokenizerKeywordsIn: [],
2920
+ testsFor: ['en', 'ja', 'ko', 'ar', 'es', 'tr', 'zh'],
2921
+ },
2922
+ {
2923
+ action: 'append',
2924
+ schemaExists: true,
2925
+ wiredInPatterns: true,
2926
+ usesHandCraftedPatterns: false,
2927
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2928
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2929
+ missingTokenizerKeywordsIn: [],
2930
+ testsFor: ['en', 'ja', 'es', 'ar'],
2931
+ },
2932
+ {
2933
+ action: 'prepend',
2934
+ schemaExists: true,
2935
+ wiredInPatterns: true,
2936
+ usesHandCraftedPatterns: false,
2937
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2938
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2939
+ missingTokenizerKeywordsIn: [],
2940
+ testsFor: ['en', 'ja', 'es'],
2941
+ },
2942
+ {
2943
+ action: 'trigger',
2944
+ schemaExists: true,
2945
+ wiredInPatterns: true,
2946
+ usesHandCraftedPatterns: false,
2947
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2948
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2949
+ missingTokenizerKeywordsIn: [],
2950
+ testsFor: ['en', 'ja', 'es', 'ar'],
2951
+ },
2952
+ {
2953
+ action: 'set',
2954
+ schemaExists: true,
2955
+ wiredInPatterns: true,
2956
+ usesHandCraftedPatterns: false,
2957
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2958
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2959
+ missingTokenizerKeywordsIn: [],
2960
+ testsFor: ['en', 'ja', 'es', 'ar', 'ko', 'tr'],
2961
+ },
2962
+ // Tier 2: Content & variable operations (newly wired)
2963
+ {
2964
+ action: 'take',
2965
+ schemaExists: true,
2966
+ wiredInPatterns: true,
2967
+ usesHandCraftedPatterns: false,
2968
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2969
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2970
+ missingTokenizerKeywordsIn: [],
2971
+ testsFor: ['en'],
2972
+ },
2973
+ {
2974
+ action: 'make',
2975
+ schemaExists: true,
2976
+ wiredInPatterns: true,
2977
+ usesHandCraftedPatterns: false,
2978
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2979
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2980
+ missingTokenizerKeywordsIn: [],
2981
+ testsFor: ['en'],
2982
+ },
2983
+ {
2984
+ action: 'clone',
2985
+ schemaExists: true,
2986
+ wiredInPatterns: true,
2987
+ usesHandCraftedPatterns: false,
2988
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2989
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2990
+ missingTokenizerKeywordsIn: [],
2991
+ testsFor: ['en'],
2992
+ },
2993
+ {
2994
+ action: 'get',
2995
+ schemaExists: true,
2996
+ wiredInPatterns: true,
2997
+ usesHandCraftedPatterns: false,
2998
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
2999
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3000
+ missingTokenizerKeywordsIn: [],
3001
+ testsFor: ['en'],
3002
+ },
3003
+ // Tier 3: Control flow & DOM (newly wired)
3004
+ {
3005
+ action: 'focus',
3006
+ schemaExists: true,
3007
+ wiredInPatterns: true,
3008
+ usesHandCraftedPatterns: false,
3009
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3010
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3011
+ missingTokenizerKeywordsIn: [],
3012
+ testsFor: ['en'],
3013
+ },
3014
+ {
3015
+ action: 'blur',
3016
+ schemaExists: true,
3017
+ wiredInPatterns: true,
3018
+ usesHandCraftedPatterns: false,
3019
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3020
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3021
+ missingTokenizerKeywordsIn: [],
3022
+ testsFor: ['en'],
3023
+ },
3024
+ {
3025
+ action: 'call',
3026
+ schemaExists: true,
3027
+ wiredInPatterns: true,
3028
+ usesHandCraftedPatterns: false,
3029
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3030
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3031
+ missingTokenizerKeywordsIn: [],
3032
+ testsFor: ['en'],
3033
+ },
3034
+ {
3035
+ action: 'return',
3036
+ schemaExists: true,
3037
+ wiredInPatterns: true,
3038
+ usesHandCraftedPatterns: false,
3039
+ profileKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3040
+ tokenizerKeywordsIn: ['en', 'ja', 'ar', 'es', 'ko', 'tr', 'zh'],
3041
+ missingTokenizerKeywordsIn: [],
3042
+ testsFor: ['en'],
3043
+ },
3044
+ ];
3045
+
3046
+ // =============================================================================
3047
+ // Process Documentation
3048
+ // =============================================================================
3049
+
3050
+ /**
3051
+ * Step-by-step process for adding a new language.
3052
+ */
3053
+ export const ADD_LANGUAGE_PROCESS = `
3054
+ # Adding a New Language
3055
+
3056
+ ## Step 1: Create Language Profile
3057
+ File: packages/semantic/src/generators/language-profiles.ts
3058
+
3059
+ Add a new profile with:
3060
+ - code: ISO 639-1 code (e.g., 'de' for German)
3061
+ - name: Human-readable name
3062
+ - wordOrder: 'SVO', 'SOV', or 'VSO'
3063
+ - direction: 'ltr' or 'rtl'
3064
+ - keywords: Map of command → { primary, alternatives, normalized }
3065
+ - particles: Object marking case/role particles
3066
+ - prepositions: Destination/source markers
3067
+
3068
+ ## Step 2: Create Tokenizer
3069
+ File: packages/semantic/src/tokenizers/{code}.ts
3070
+
3071
+ Copy structure from similar language tokenizer:
3072
+ - Character classification functions (if non-Latin script)
3073
+ - KEYWORDS map: native words → English normalized
3074
+ - Particle detection
3075
+ - Word extraction logic
3076
+
3077
+ ## Step 3: Register Tokenizer
3078
+ File: packages/semantic/src/tokenizers/index.ts
3079
+
3080
+ - Import tokenizer
3081
+ - Add to tokenizers map
3082
+ - Add to exports
3083
+
3084
+ ## Step 4: Add Event Handler Patterns (if needed)
3085
+ File: packages/semantic/src/patterns/event-handler.ts
3086
+
3087
+ If the language needs custom event handler syntax:
3088
+ - Add pattern for standard event form
3089
+ - Add pattern for source-filtered events
3090
+ - Add event name translations
3091
+
3092
+ ## Step 5: Add Tests
3093
+ File: packages/semantic/test/official-examples.test.ts
3094
+
3095
+ Add tests for:
3096
+ - Basic commands (toggle, add, put)
3097
+ - Commands with targets
3098
+ - Multilingual equivalents section
3099
+ - AST equivalence tests
3100
+
3101
+ ## Step 6: Update Documentation
3102
+ File: packages/semantic/src/language-building-schema.ts
3103
+
3104
+ Add entry to SUPPORTED_LANGUAGES array.
3105
+ `;
3106
+
3107
+ /**
3108
+ * Step-by-step process for adding a new command.
3109
+ */
3110
+ export const ADD_COMMAND_PROCESS = `
3111
+ # Adding a New Command
3112
+
3113
+ ## Step 1: Define Command Schema
3114
+ File: packages/semantic/src/generators/command-schemas.ts
3115
+
3116
+ Add schema with:
3117
+ - action: command name
3118
+ - description: what it does
3119
+ - category: 'dom-class', 'dom-content', 'variable', etc.
3120
+ - primaryRole: main semantic role
3121
+ - roles: array of RoleSpec with:
3122
+ - role: semantic role name
3123
+ - description
3124
+ - required: boolean
3125
+ - expectedTypes: ['selector', 'literal', 'reference', 'expression']
3126
+ - default: optional default value
3127
+ - svoPosition/sovPosition: word order hints
3128
+
3129
+ ## Step 2: Add Keywords to Language Profiles
3130
+ File: packages/semantic/src/generators/language-profiles.ts
3131
+
3132
+ For EACH language profile, add:
3133
+ \`\`\`typescript
3134
+ {command}: {
3135
+ primary: 'native_word',
3136
+ alternatives: ['alt1', 'alt2'],
3137
+ normalized: 'command',
3138
+ }
3139
+ \`\`\`
3140
+
3141
+ ## Step 3: Add Keywords to Tokenizers
3142
+ Files: packages/semantic/src/tokenizers/{language}.ts
3143
+
3144
+ For EACH tokenizer's KEYWORDS map, add:
3145
+ \`\`\`typescript
3146
+ ['native_word', 'command'],
3147
+ ['alternative1', 'command'],
3148
+ ['alternative2', 'command'],
3149
+ \`\`\`
3150
+
3151
+ ## Step 4: Wire Schema in Pattern Registry
3152
+ File: packages/semantic/src/patterns/index.ts
3153
+
3154
+ - Import schema from generators
3155
+ - Add to generatedPatterns array:
3156
+ \`...generatePatternsForCommand({command}Schema),\`
3157
+
3158
+ ## Step 5: Add Tests
3159
+ File: packages/semantic/test/official-examples.test.ts
3160
+
3161
+ Add tests for:
3162
+ - English syntax
3163
+ - Each supported language
3164
+ - Edge cases (implicit targets, etc.)
3165
+
3166
+ ## Step 6: Update Documentation
3167
+ File: packages/semantic/src/language-building-schema.ts
3168
+
3169
+ Add entry to SUPPORTED_COMMANDS array.
3170
+ `;