@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,481 @@
1
+ /**
2
+ * Chinese Tokenizer
3
+ *
4
+ * Tokenizes Chinese hyperscript input.
5
+ * Chinese is challenging because:
6
+ * - No spaces between words (like Japanese)
7
+ * - Uses CJK characters (shared with Japanese Kanji)
8
+ * - SVO word order (like English)
9
+ * - Uses prepositions (把, 在, 从, etc.) for grammatical roles
10
+ * - No conjugation (unlike Japanese/Korean)
11
+ * - CSS selectors are embedded ASCII
12
+ */
13
+
14
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
15
+ import {
16
+ BaseTokenizer,
17
+ TokenStreamImpl,
18
+ createToken,
19
+ createPosition,
20
+ createUnicodeRangeClassifier,
21
+ isWhitespace,
22
+ isSelectorStart,
23
+ isQuote,
24
+ isDigit,
25
+ isAsciiIdentifierChar,
26
+ isUrlStart,
27
+ type KeywordEntry,
28
+ type TimeUnitMapping,
29
+ } from './base';
30
+ import { chineseProfile } from '../generators/profiles/chinese';
31
+
32
+ // =============================================================================
33
+ // Chinese Character Classification
34
+ // =============================================================================
35
+
36
+ /** Check if character is a CJK character (Chinese). */
37
+ const isChinese = createUnicodeRangeClassifier([
38
+ [0x4e00, 0x9fff], // CJK Unified Ideographs
39
+ [0x3400, 0x4dbf], // CJK Unified Ideographs Extension A
40
+ [0x20000, 0x2a6df], // CJK Unified Ideographs Extension B
41
+ [0xf900, 0xfaff], // CJK Compatibility Ideographs
42
+ [0x2f800, 0x2fa1f], // CJK Compatibility Ideographs Supplement
43
+ ]);
44
+
45
+ // =============================================================================
46
+ // Chinese Particles/Prepositions
47
+ // =============================================================================
48
+
49
+ /**
50
+ * Chinese grammatical particles and prepositions.
51
+ * These mark grammatical relationships in Chinese sentences.
52
+ */
53
+ const PARTICLES = new Set([
54
+ '把', // ba - marks direct object (BA construction)
55
+ '在', // zai - at, in, on (location)
56
+ '从', // cong - from
57
+ '到', // dao - to, until
58
+ '向', // xiang - towards
59
+ '给', // gei - to, for (recipient)
60
+ '对', // dui - to, towards
61
+ '用', // yong - with, using
62
+ '被', // bei - by (passive)
63
+ '让', // rang - let, allow
64
+ '的', // de - possessive/attributive
65
+ '地', // de - adverbial marker
66
+ '得', // de - complement marker
67
+ '了', // le - completion marker
68
+ '着', // zhe - progressive marker
69
+ '过', // guo - experiential marker
70
+ '吗', // ma - question particle
71
+ '呢', // ne - question/emphasis particle
72
+ '吧', // ba - suggestion particle
73
+ ]);
74
+
75
+ /**
76
+ * Multi-character particles/phrases.
77
+ */
78
+ const MULTI_CHAR_PARTICLES = ['然后', '接着', '并且', '或者', '如果', '那么', '否则'];
79
+
80
+ // =============================================================================
81
+ // Chinese Extras (keywords not in profile)
82
+ // =============================================================================
83
+
84
+ /**
85
+ * Extra keywords not covered by the profile:
86
+ * - Literals (true, false, null, undefined)
87
+ * - Positional words
88
+ * - Event names
89
+ * - Time units
90
+ * - Additional synonyms
91
+ */
92
+ const CHINESE_EXTRAS: KeywordEntry[] = [
93
+ // Values/Literals
94
+ { native: '真', normalized: 'true' },
95
+ { native: '假', normalized: 'false' },
96
+ { native: '空', normalized: 'null' },
97
+ { native: '未定义', normalized: 'undefined' },
98
+
99
+ // Positional
100
+ { native: '第一个', normalized: 'first' },
101
+ { native: '首个', normalized: 'first' },
102
+ { native: '最后一个', normalized: 'last' },
103
+ { native: '末个', normalized: 'last' },
104
+ { native: '下一个', normalized: 'next' },
105
+ { native: '上一个', normalized: 'previous' },
106
+ { native: '最近的', normalized: 'closest' },
107
+ { native: '父级', normalized: 'parent' },
108
+
109
+ // Events
110
+ { native: '点击', normalized: 'click' },
111
+ { native: '双击', normalized: 'dblclick' },
112
+ { native: '输入', normalized: 'input' },
113
+ { native: '变更', normalized: 'change' },
114
+ { native: '改变', normalized: 'change' },
115
+ { native: '提交', normalized: 'submit' },
116
+ { native: '按键', normalized: 'keydown' },
117
+ { native: '释放键', normalized: 'keyup' },
118
+ { native: '鼠标移入', normalized: 'mouseover' },
119
+ { native: '鼠标移出', normalized: 'mouseout' },
120
+ { native: '获得焦点', normalized: 'focus' },
121
+ { native: '失去焦点', normalized: 'blur' },
122
+ { native: '加载', normalized: 'load' },
123
+ { native: '滚动', normalized: 'scroll' },
124
+
125
+ // Additional references
126
+ { native: '我的', normalized: 'my' },
127
+ { native: '它的', normalized: 'its' },
128
+
129
+ // Time units
130
+ { native: '秒', normalized: 's' },
131
+ { native: '毫秒', normalized: 'ms' },
132
+ { native: '分钟', normalized: 'm' },
133
+ { native: '小时', normalized: 'h' },
134
+
135
+ // Logical operators
136
+ { native: '和', normalized: 'and' },
137
+ { native: '或者', normalized: 'or' },
138
+ { native: '或', normalized: 'or' },
139
+ { native: '不', normalized: 'not' },
140
+ { native: '非', normalized: 'not' },
141
+ { native: '是', normalized: 'is' },
142
+
143
+ // Additional synonyms not in profile
144
+ { native: '若', normalized: 'if' },
145
+ { native: '不然', normalized: 'else' },
146
+ { native: '循环', normalized: 'repeat' },
147
+ { native: '遍历', normalized: 'for' },
148
+ { native: '每个', normalized: 'for' },
149
+ { native: '为每', normalized: 'for' },
150
+ { native: '中止', normalized: 'halt' },
151
+ { native: '抛', normalized: 'throw' },
152
+ { native: '呼叫', normalized: 'call' },
153
+ { native: '回', normalized: 'return' },
154
+ { native: '脚本', normalized: 'js' },
155
+ { native: '通知', normalized: 'tell' },
156
+ { native: '缺省', normalized: 'default' },
157
+ { native: '初始', normalized: 'init' },
158
+ { native: '动作', normalized: 'behavior' },
159
+ { native: '激发', normalized: 'trigger' },
160
+ { native: '对焦', normalized: 'focus' },
161
+ { native: '模糊', normalized: 'blur' },
162
+ { native: '跳转', normalized: 'go' },
163
+ { native: '导航', normalized: 'go' },
164
+ { native: '抓取', normalized: 'fetch' },
165
+ { native: '获取数据', normalized: 'fetch' },
166
+ { native: '安定', normalized: 'settle' },
167
+ { native: '拿取', normalized: 'take' },
168
+ { native: '取', normalized: 'take' },
169
+ { native: '创建', normalized: 'make' },
170
+ { native: '克隆', normalized: 'clone' },
171
+ { native: '记录', normalized: 'log' },
172
+ { native: '打印', normalized: 'log' },
173
+ { native: '动画', normalized: 'transition' },
174
+
175
+ // Modifiers
176
+ { native: '到里面', normalized: 'into' },
177
+ { native: '里', normalized: 'into' },
178
+ { native: '前', normalized: 'before' },
179
+ { native: '后', normalized: 'after' },
180
+ { native: '那么', normalized: 'then' },
181
+ { native: '完', normalized: 'end' },
182
+ ];
183
+
184
+ // =============================================================================
185
+ // Chinese Time Units
186
+ // =============================================================================
187
+
188
+ /**
189
+ * Chinese time unit patterns for number parsing.
190
+ * Sorted by length (longest first) to ensure correct matching.
191
+ * Chinese time units attach directly without whitespace.
192
+ */
193
+ const CHINESE_TIME_UNITS: readonly TimeUnitMapping[] = [
194
+ { pattern: '毫秒', suffix: 'ms', length: 2 },
195
+ { pattern: '分钟', suffix: 'm', length: 2 },
196
+ { pattern: '小时', suffix: 'h', length: 2 },
197
+ { pattern: '秒', suffix: 's', length: 1 },
198
+ { pattern: '分', suffix: 'm', length: 1 },
199
+ ];
200
+
201
+ // =============================================================================
202
+ // Chinese Tokenizer Implementation
203
+ // =============================================================================
204
+
205
+ export class ChineseTokenizer extends BaseTokenizer {
206
+ readonly language = 'zh';
207
+ readonly direction = 'ltr' as const;
208
+
209
+ constructor() {
210
+ super();
211
+ this.initializeKeywordsFromProfile(chineseProfile, CHINESE_EXTRAS);
212
+ }
213
+
214
+ tokenize(input: string): TokenStream {
215
+ const tokens: LanguageToken[] = [];
216
+ let pos = 0;
217
+
218
+ while (pos < input.length) {
219
+ // Skip whitespace (Chinese can have spaces for readability)
220
+ if (isWhitespace(input[pos])) {
221
+ pos++;
222
+ continue;
223
+ }
224
+
225
+ // Try CSS selector first (ASCII-based, highest priority)
226
+ if (isSelectorStart(input[pos])) {
227
+ // Check for event modifier first (.once, .debounce(), etc.)
228
+ const modifierToken = this.tryEventModifier(input, pos);
229
+ if (modifierToken) {
230
+ tokens.push(modifierToken);
231
+ pos = modifierToken.position.end;
232
+ continue;
233
+ }
234
+
235
+ const selectorToken = this.trySelector(input, pos);
236
+ if (selectorToken) {
237
+ tokens.push(selectorToken);
238
+ pos = selectorToken.position.end;
239
+ continue;
240
+ }
241
+ }
242
+
243
+ // Try string literal (both ASCII and Chinese quotes)
244
+ // Chinese quotes: \u201C " \u201D " \u2018 ' \u2019 '
245
+ if (
246
+ isQuote(input[pos]) ||
247
+ input[pos] === '\u201C' ||
248
+ input[pos] === '\u201D' ||
249
+ input[pos] === '\u2018' ||
250
+ input[pos] === '\u2019'
251
+ ) {
252
+ const stringToken = this.tryChineseString(input, pos);
253
+ if (stringToken) {
254
+ tokens.push(stringToken);
255
+ pos = stringToken.position.end;
256
+ continue;
257
+ }
258
+ }
259
+
260
+ // Try URL (/path, ./path, http://, etc.)
261
+ if (isUrlStart(input, pos)) {
262
+ const urlToken = this.tryUrl(input, pos);
263
+ if (urlToken) {
264
+ tokens.push(urlToken);
265
+ pos = urlToken.position.end;
266
+ continue;
267
+ }
268
+ }
269
+
270
+ // Try number (including Chinese time units)
271
+ if (isDigit(input[pos])) {
272
+ const numberToken = this.extractChineseNumber(input, pos);
273
+ if (numberToken) {
274
+ tokens.push(numberToken);
275
+ pos = numberToken.position.end;
276
+ continue;
277
+ }
278
+ }
279
+
280
+ // Try variable reference (:varname)
281
+ const varToken = this.tryVariableRef(input, pos);
282
+ if (varToken) {
283
+ tokens.push(varToken);
284
+ pos = varToken.position.end;
285
+ continue;
286
+ }
287
+
288
+ // Try multi-character particle (before single-character)
289
+ const multiParticle = this.tryMultiCharParticle(input, pos, MULTI_CHAR_PARTICLES);
290
+ if (multiParticle) {
291
+ tokens.push(multiParticle);
292
+ pos = multiParticle.position.end;
293
+ continue;
294
+ }
295
+
296
+ // Try Chinese word (CJK sequence)
297
+ if (isChinese(input[pos])) {
298
+ const wordToken = this.extractChineseWord(input, pos);
299
+ if (wordToken) {
300
+ tokens.push(wordToken);
301
+ pos = wordToken.position.end;
302
+ continue;
303
+ }
304
+ }
305
+
306
+ // Try ASCII word (for mixed content)
307
+ if (isAsciiIdentifierChar(input[pos])) {
308
+ const asciiToken = this.extractAsciiWord(input, pos);
309
+ if (asciiToken) {
310
+ tokens.push(asciiToken);
311
+ pos = asciiToken.position.end;
312
+ continue;
313
+ }
314
+ }
315
+
316
+ // Skip unknown character
317
+ pos++;
318
+ }
319
+
320
+ return new TokenStreamImpl(tokens, 'zh');
321
+ }
322
+
323
+ classifyToken(token: string): TokenKind {
324
+ if (PARTICLES.has(token)) return 'particle';
325
+ // O(1) Map lookup instead of O(n) array search
326
+ if (this.isKeyword(token)) return 'keyword';
327
+ if (
328
+ token.startsWith('#') ||
329
+ token.startsWith('.') ||
330
+ token.startsWith('[') ||
331
+ token.startsWith('<')
332
+ )
333
+ return 'selector';
334
+ if (
335
+ token.startsWith('"') ||
336
+ token.startsWith("'") ||
337
+ token.startsWith('\u201C') ||
338
+ token.startsWith('\u2018')
339
+ )
340
+ return 'literal';
341
+ if (/^\d/.test(token)) return 'literal';
342
+
343
+ return 'identifier';
344
+ }
345
+
346
+ /**
347
+ * Extract a Chinese word.
348
+ * Uses greedy matching to find the longest known keyword.
349
+ * Chinese doesn't have inflection, so we don't need morphological normalization.
350
+ * profileKeywords is already sorted longest-first, enabling greedy matching.
351
+ */
352
+ private extractChineseWord(input: string, startPos: number): LanguageToken | null {
353
+ // profileKeywords is sorted longest-first, so iterate through for greedy match
354
+ for (const entry of this.profileKeywords) {
355
+ const keyword = entry.native;
356
+ const candidate = input.slice(startPos, startPos + keyword.length);
357
+
358
+ if (candidate === keyword) {
359
+ // Check all chars are Chinese (to avoid matching partial ASCII)
360
+ let allChinese = true;
361
+ for (let i = 0; i < keyword.length; i++) {
362
+ if (!isChinese(keyword[i])) {
363
+ allChinese = false;
364
+ break;
365
+ }
366
+ }
367
+ if (allChinese) {
368
+ return createToken(
369
+ candidate,
370
+ 'keyword',
371
+ createPosition(startPos, startPos + keyword.length),
372
+ entry.normalized
373
+ );
374
+ }
375
+ }
376
+ }
377
+
378
+ // No keyword match - extract as regular word
379
+ // Stop at particles, ASCII, or whitespace
380
+ let pos = startPos;
381
+ let word = '';
382
+
383
+ while (pos < input.length) {
384
+ const char = input[pos];
385
+
386
+ // Stop at single-char particles if we have content
387
+ if (PARTICLES.has(char) && word.length > 0) {
388
+ break;
389
+ }
390
+
391
+ // Continue if Chinese character
392
+ if (isChinese(char)) {
393
+ word += char;
394
+ pos++;
395
+ } else {
396
+ break;
397
+ }
398
+ }
399
+
400
+ if (!word) return null;
401
+
402
+ // Check if this word is a particle
403
+ if (PARTICLES.has(word)) {
404
+ return createToken(word, 'particle', createPosition(startPos, pos));
405
+ }
406
+
407
+ // Not a keyword, return as identifier
408
+ return createToken(word, 'identifier', createPosition(startPos, pos));
409
+ }
410
+
411
+ /**
412
+ * Extract an ASCII word (for mixed Chinese/English content).
413
+ */
414
+ private extractAsciiWord(input: string, startPos: number): LanguageToken | null {
415
+ let pos = startPos;
416
+ let word = '';
417
+
418
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
419
+ word += input[pos++];
420
+ }
421
+
422
+ if (!word) return null;
423
+
424
+ return createToken(word, 'identifier', createPosition(startPos, pos));
425
+ }
426
+
427
+ /**
428
+ * Try to extract a string literal, including Chinese quotes.
429
+ * Chinese quotes: \u201C " (open) \u201D " (close) \u2018 ' (open) \u2019 ' (close)
430
+ */
431
+ private tryChineseString(input: string, pos: number): LanguageToken | null {
432
+ const char = input[pos];
433
+
434
+ // ASCII quotes
435
+ if (char === '"' || char === "'" || char === '`') {
436
+ return this.tryString(input, pos);
437
+ }
438
+
439
+ // Chinese double quotes: \u201C " ... \u201D "
440
+ if (char === '\u201C') {
441
+ let endPos = pos + 1;
442
+ while (endPos < input.length && input[endPos] !== '\u201D') {
443
+ endPos++;
444
+ }
445
+ if (endPos >= input.length) return null;
446
+
447
+ const value = input.slice(pos, endPos + 1);
448
+ return createToken(value, 'literal', createPosition(pos, endPos + 1));
449
+ }
450
+
451
+ // Chinese single quotes: \u2018 ' ... \u2019 '
452
+ if (char === '\u2018') {
453
+ let endPos = pos + 1;
454
+ while (endPos < input.length && input[endPos] !== '\u2019') {
455
+ endPos++;
456
+ }
457
+ if (endPos >= input.length) return null;
458
+
459
+ const value = input.slice(pos, endPos + 1);
460
+ return createToken(value, 'literal', createPosition(pos, endPos + 1));
461
+ }
462
+
463
+ return null;
464
+ }
465
+
466
+ /**
467
+ * Extract a number, including Chinese time unit suffixes.
468
+ * Chinese time units attach directly without whitespace.
469
+ */
470
+ private extractChineseNumber(input: string, startPos: number): LanguageToken | null {
471
+ return this.tryNumberWithTimeUnits(input, startPos, CHINESE_TIME_UNITS, {
472
+ allowSign: false,
473
+ skipWhitespace: false,
474
+ });
475
+ }
476
+ }
477
+
478
+ /**
479
+ * Singleton instance.
480
+ */
481
+ export const chineseTokenizer = new ChineseTokenizer();