@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,410 @@
1
+ /**
2
+ * Vietnamese Tokenizer
3
+ *
4
+ * Tokenizes Vietnamese hyperscript input.
5
+ * Vietnamese is an isolating (analytic) language with:
6
+ * - SVO word order (like English)
7
+ * - Latin script with extensive diacritics (tone marks)
8
+ * - No verb conjugation or noun declension
9
+ * - Space-separated syllables (can be multi-syllable words)
10
+ * - Prepositions for grammatical roles
11
+ *
12
+ * Vietnamese diacritics:
13
+ * - Tone marks: à á ả ã ạ (and similar for other vowels)
14
+ * - Vowel modifications: ă â ê ô ơ ư đ
15
+ *
16
+ * Examples:
17
+ * chuyển đổi .active → toggle .active
18
+ * thêm .highlight → add .highlight
19
+ * hiển thị #modal → show #modal
20
+ */
21
+
22
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
23
+ import {
24
+ BaseTokenizer,
25
+ TokenStreamImpl,
26
+ createToken,
27
+ createPosition,
28
+ createLatinCharClassifiers,
29
+ isWhitespace,
30
+ isSelectorStart,
31
+ isQuote,
32
+ isDigit,
33
+ isUrlStart,
34
+ type KeywordEntry,
35
+ } from './base';
36
+ import { vietnameseProfile } from '../generators/profiles/vietnamese';
37
+
38
+ // =============================================================================
39
+ // Vietnamese Character Classification
40
+ // =============================================================================
41
+
42
+ // Vietnamese letters include Latin alphabet plus special characters and tone marks
43
+ const { isLetter: isVietnameseLetter, isIdentifierChar: isVietnameseIdentifierChar } =
44
+ createLatinCharClassifiers(
45
+ /[a-zA-ZàáảãạăằắẳẵặâầấẩẫậèéẻẽẹêềếểễệìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵđÀÁẢÃẠĂẰẮẲẴẶÂẦẤẨẪẬÈÉẺẼẸÊỀẾỂỄỆÌÍỈĨỊÒÓỎÕỌÔỒỐỔỖỘƠỜỚỞỠỢÙÚỦŨỤƯỪỨỬỮỰỲÝỶỸỴĐ]/
46
+ );
47
+
48
+ // =============================================================================
49
+ // Vietnamese Prepositions
50
+ // =============================================================================
51
+
52
+ /**
53
+ * Vietnamese prepositions that mark grammatical roles.
54
+ */
55
+ const PREPOSITIONS = new Set([
56
+ 'trong', // in, inside
57
+ 'ngoài', // outside
58
+ 'trên', // on, above
59
+ 'dưới', // under, below
60
+ 'vào', // into
61
+ 'ra', // out
62
+ 'đến', // to
63
+ 'từ', // from
64
+ 'với', // with
65
+ 'cho', // for, to
66
+ 'bởi', // by
67
+ 'qua', // through
68
+ 'trước', // before
69
+ 'sau', // after
70
+ 'giữa', // between
71
+ 'bên', // beside
72
+ 'theo', // according to, along
73
+ 'về', // about, towards
74
+ 'tới', // to, towards
75
+ 'lên', // up
76
+ 'xuống', // down
77
+ ]);
78
+
79
+ // =============================================================================
80
+ // Vietnamese Extras (keywords not in profile)
81
+ // =============================================================================
82
+
83
+ /**
84
+ * Extra keywords not covered by the profile:
85
+ * - Literals (true, false)
86
+ * - Positional words
87
+ * - Event names
88
+ * - Time units
89
+ * - Multi-word phrases not in profile
90
+ * - Additional synonyms
91
+ */
92
+ const VIETNAMESE_EXTRAS: KeywordEntry[] = [
93
+ // Values/Literals
94
+ { native: 'đúng', normalized: 'true' },
95
+ { native: 'sai', normalized: 'false' },
96
+ { native: 'null', normalized: 'null' },
97
+ { native: 'không xác định', normalized: 'undefined' },
98
+
99
+ // Positional
100
+ { native: 'đầu tiên', normalized: 'first' },
101
+ { native: 'cuối cùng', normalized: 'last' },
102
+ { native: 'tiếp theo', normalized: 'next' },
103
+ { native: 'trước đó', normalized: 'previous' },
104
+ { native: 'gần nhất', normalized: 'closest' },
105
+ { native: 'cha', normalized: 'parent' },
106
+
107
+ // Events
108
+ { native: 'nhấp', normalized: 'click' },
109
+ { native: 'nhấp chuột', normalized: 'click' },
110
+ { native: 'click', normalized: 'click' },
111
+ { native: 'nhấp đúp', normalized: 'dblclick' },
112
+ { native: 'nhập', normalized: 'input' },
113
+ { native: 'thay đổi', normalized: 'change' },
114
+ { native: 'gửi biểu mẫu', normalized: 'submit' },
115
+ { native: 'phím xuống', normalized: 'keydown' },
116
+ { native: 'phím lên', normalized: 'keyup' },
117
+ { native: 'chuột vào', normalized: 'mouseover' },
118
+ { native: 'chuột ra', normalized: 'mouseout' },
119
+ { native: 'tải trang', normalized: 'load' },
120
+ { native: 'cuộn', normalized: 'scroll' },
121
+
122
+ // References - possessive forms
123
+ { native: 'của tôi', normalized: 'my' },
124
+ { native: 'của nó', normalized: 'its' },
125
+
126
+ // Time units
127
+ { native: 'giây', normalized: 's' },
128
+ { native: 'mili giây', normalized: 'ms' },
129
+ { native: 'phút', normalized: 'm' },
130
+ { native: 'giờ', normalized: 'h' },
131
+
132
+ // Additional multi-word phrases not in profile
133
+ { native: 'thêm vào cuối', normalized: 'append' },
134
+ { native: 'nhân bản', normalized: 'clone' },
135
+ { native: 'tạo ra', normalized: 'make' },
136
+ { native: 'đặt giá trị', normalized: 'set' },
137
+ { native: 'ghi nhật ký', normalized: 'log' },
138
+ { native: 'chuyển tới', normalized: 'go' },
139
+ { native: 'ngược lại', normalized: 'else' },
140
+ { native: 'lặp', normalized: 'repeat' },
141
+
142
+ // Logical/conditional
143
+ { native: 'hoặc', normalized: 'or' },
144
+ { native: 'không', normalized: 'not' },
145
+ { native: 'là', normalized: 'is' },
146
+ { native: 'tồn tại', normalized: 'exists' },
147
+ { native: 'rỗng', normalized: 'empty' },
148
+
149
+ // English synonyms
150
+ { native: 'javascript', normalized: 'js' },
151
+ ];
152
+
153
+ // =============================================================================
154
+ // Vietnamese Tokenizer Implementation
155
+ // =============================================================================
156
+
157
+ export class VietnameseTokenizer extends BaseTokenizer {
158
+ readonly language = 'vi';
159
+ readonly direction = 'ltr' as const;
160
+
161
+ constructor() {
162
+ super();
163
+ // Initialize keywords from profile + extras (single source of truth)
164
+ this.initializeKeywordsFromProfile(vietnameseProfile, VIETNAMESE_EXTRAS);
165
+ }
166
+
167
+ tokenize(input: string): TokenStream {
168
+ const tokens: LanguageToken[] = [];
169
+ let pos = 0;
170
+
171
+ while (pos < input.length) {
172
+ // Skip whitespace
173
+ if (isWhitespace(input[pos])) {
174
+ pos++;
175
+ continue;
176
+ }
177
+
178
+ // Try CSS selector first (ASCII-based, highest priority)
179
+ if (isSelectorStart(input[pos])) {
180
+ // Check for event modifier first (.once, .debounce(), etc.)
181
+ const modifierToken = this.tryEventModifier(input, pos);
182
+ if (modifierToken) {
183
+ tokens.push(modifierToken);
184
+ pos = modifierToken.position.end;
185
+ continue;
186
+ }
187
+
188
+ const selectorToken = this.trySelector(input, pos);
189
+ if (selectorToken) {
190
+ tokens.push(selectorToken);
191
+ pos = selectorToken.position.end;
192
+ continue;
193
+ }
194
+ }
195
+
196
+ // Try string literal
197
+ if (isQuote(input[pos])) {
198
+ const stringToken = this.tryString(input, pos);
199
+ if (stringToken) {
200
+ tokens.push(stringToken);
201
+ pos = stringToken.position.end;
202
+ continue;
203
+ }
204
+ }
205
+
206
+ // Try URL (/path, ./path, http://, etc.)
207
+ if (isUrlStart(input, pos)) {
208
+ const urlToken = this.tryUrl(input, pos);
209
+ if (urlToken) {
210
+ tokens.push(urlToken);
211
+ pos = urlToken.position.end;
212
+ continue;
213
+ }
214
+ }
215
+
216
+ // Try number
217
+ if (isDigit(input[pos])) {
218
+ const numberToken = this.extractVietnameseNumber(input, pos);
219
+ if (numberToken) {
220
+ tokens.push(numberToken);
221
+ pos = numberToken.position.end;
222
+ continue;
223
+ }
224
+ }
225
+
226
+ // Try variable reference (:varname)
227
+ const varToken = this.tryVariableRef(input, pos);
228
+ if (varToken) {
229
+ tokens.push(varToken);
230
+ pos = varToken.position.end;
231
+ continue;
232
+ }
233
+
234
+ // Try operator
235
+ const opToken = this.tryOperator(input, pos);
236
+ if (opToken) {
237
+ tokens.push(opToken);
238
+ pos = opToken.position.end;
239
+ continue;
240
+ }
241
+
242
+ // Try multi-word phrase first (before single words)
243
+ const phraseToken = this.tryMultiWordPhrase(input, pos);
244
+ if (phraseToken) {
245
+ tokens.push(phraseToken);
246
+ pos = phraseToken.position.end;
247
+ continue;
248
+ }
249
+
250
+ // Try Vietnamese word
251
+ if (isVietnameseLetter(input[pos])) {
252
+ const wordToken = this.extractVietnameseWord(input, pos);
253
+ if (wordToken) {
254
+ tokens.push(wordToken);
255
+ pos = wordToken.position.end;
256
+ continue;
257
+ }
258
+ }
259
+
260
+ // Skip unknown character
261
+ pos++;
262
+ }
263
+
264
+ return new TokenStreamImpl(tokens, 'vi');
265
+ }
266
+
267
+ classifyToken(token: string): TokenKind {
268
+ const lower = token.toLowerCase();
269
+ if (PREPOSITIONS.has(lower)) return 'particle';
270
+ // O(1) Map lookup instead of O(n) array search
271
+ if (this.isKeyword(lower)) return 'keyword';
272
+ if (
273
+ token.startsWith('#') ||
274
+ token.startsWith('.') ||
275
+ token.startsWith('[') ||
276
+ token.startsWith('<')
277
+ )
278
+ return 'selector';
279
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
280
+ if (/^\d/.test(token)) return 'literal';
281
+
282
+ return 'identifier';
283
+ }
284
+
285
+ /**
286
+ * Try to match a multi-word phrase.
287
+ * Multi-word phrases are included in profileKeywords and sorted longest-first.
288
+ */
289
+ private tryMultiWordPhrase(input: string, pos: number): LanguageToken | null {
290
+ // Check against multi-word entries in profileKeywords (sorted longest-first)
291
+ for (const entry of this.profileKeywords) {
292
+ // Only check multi-word phrases (contain space)
293
+ if (!entry.native.includes(' ')) continue;
294
+
295
+ const phrase = entry.native;
296
+ const candidate = input.slice(pos, pos + phrase.length).toLowerCase();
297
+ if (candidate === phrase.toLowerCase()) {
298
+ // Make sure we're at a word boundary after the phrase
299
+ const nextChar = input[pos + phrase.length];
300
+ if (nextChar && isVietnameseLetter(nextChar)) continue;
301
+
302
+ return createToken(
303
+ input.slice(pos, pos + phrase.length),
304
+ 'keyword',
305
+ createPosition(pos, pos + phrase.length),
306
+ entry.normalized
307
+ );
308
+ }
309
+ }
310
+
311
+ return null;
312
+ }
313
+
314
+ /**
315
+ * Extract a Vietnamese word (single syllable/word).
316
+ */
317
+ private extractVietnameseWord(input: string, startPos: number): LanguageToken | null {
318
+ let pos = startPos;
319
+ let word = '';
320
+
321
+ while (pos < input.length && isVietnameseIdentifierChar(input[pos])) {
322
+ word += input[pos++];
323
+ }
324
+
325
+ if (!word) return null;
326
+
327
+ const lower = word.toLowerCase();
328
+
329
+ // Check if it's a preposition first
330
+ if (PREPOSITIONS.has(lower)) {
331
+ return createToken(word, 'particle', createPosition(startPos, pos));
332
+ }
333
+
334
+ // O(1) Map lookup for exact keyword match
335
+ const keywordEntry = this.lookupKeyword(lower);
336
+ if (keywordEntry) {
337
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
338
+ }
339
+
340
+ // Return as identifier
341
+ return createToken(word, 'identifier', createPosition(startPos, pos));
342
+ }
343
+
344
+ /**
345
+ * Extract a number, including time unit suffixes.
346
+ */
347
+ private extractVietnameseNumber(input: string, startPos: number): LanguageToken | null {
348
+ let pos = startPos;
349
+ let number = '';
350
+
351
+ // Integer part
352
+ while (pos < input.length && isDigit(input[pos])) {
353
+ number += input[pos++];
354
+ }
355
+
356
+ // Optional decimal
357
+ if (pos < input.length && input[pos] === '.') {
358
+ number += input[pos++];
359
+ while (pos < input.length && isDigit(input[pos])) {
360
+ number += input[pos++];
361
+ }
362
+ }
363
+
364
+ // Check for time units (Vietnamese or standard)
365
+ if (pos < input.length) {
366
+ const remaining = input.slice(pos).toLowerCase();
367
+ // Vietnamese time units (with space after number)
368
+ if (remaining.startsWith(' mili giây') || remaining.startsWith(' miligiây')) {
369
+ number += 'ms';
370
+ pos += remaining.startsWith(' mili giây') ? 10 : 9;
371
+ } else if (remaining.startsWith(' giây')) {
372
+ number += 's';
373
+ pos += 5;
374
+ } else if (remaining.startsWith(' phút')) {
375
+ number += 'm';
376
+ pos += 5;
377
+ } else if (remaining.startsWith(' giờ')) {
378
+ number += 'h';
379
+ pos += 4;
380
+ }
381
+ // Standard time units (s, ms, m, h) - no space
382
+ else if (remaining.startsWith('ms')) {
383
+ number += 'ms';
384
+ pos += 2;
385
+ } else if (remaining[0] === 's' && !isVietnameseLetter(remaining[1] || '')) {
386
+ number += 's';
387
+ pos += 1;
388
+ } else if (
389
+ remaining[0] === 'm' &&
390
+ remaining[1] !== 's' &&
391
+ !isVietnameseLetter(remaining[1] || '')
392
+ ) {
393
+ number += 'm';
394
+ pos += 1;
395
+ } else if (remaining[0] === 'h' && !isVietnameseLetter(remaining[1] || '')) {
396
+ number += 'h';
397
+ pos += 1;
398
+ }
399
+ }
400
+
401
+ if (!number) return null;
402
+
403
+ return createToken(number, 'literal', createPosition(startPos, pos));
404
+ }
405
+ }
406
+
407
+ /**
408
+ * Singleton instance.
409
+ */
410
+ export const vietnameseTokenizer = new VietnameseTokenizer();