@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,642 @@
1
+ /**
2
+ * Korean Tokenizer
3
+ *
4
+ * Tokenizes Korean hyperscript input.
5
+ * Korean is an agglutinative language with:
6
+ * - Hangul syllable blocks (가-힣)
7
+ * - Particles (조사) mark grammatical roles
8
+ * - 하다 verbs (noun + 하다)
9
+ * - CSS selectors are embedded ASCII
10
+ */
11
+
12
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
13
+ import {
14
+ BaseTokenizer,
15
+ TokenStreamImpl,
16
+ createToken,
17
+ createPosition,
18
+ createUnicodeRangeClassifier,
19
+ combineClassifiers,
20
+ isWhitespace,
21
+ isSelectorStart,
22
+ isQuote,
23
+ isDigit,
24
+ isAsciiIdentifierChar,
25
+ isUrlStart,
26
+ type KeywordEntry,
27
+ type TimeUnitMapping,
28
+ } from './base';
29
+ import { KoreanMorphologicalNormalizer } from './morphology/korean-normalizer';
30
+ import { koreanProfile } from '../generators/profiles/korean';
31
+
32
+ // =============================================================================
33
+ // Korean Character Classification
34
+ // =============================================================================
35
+
36
+ /** Check if character is a Korean syllable block (U+AC00-U+D7A3). */
37
+ const isHangul = createUnicodeRangeClassifier([[0xac00, 0xd7a3]]);
38
+
39
+ /** Check if character is a Hangul Jamo (U+1100-U+11FF, U+3130-U+318F). */
40
+ const isJamo = createUnicodeRangeClassifier([
41
+ [0x1100, 0x11ff], // Hangul Jamo
42
+ [0x3130, 0x318f], // Hangul Compatibility Jamo
43
+ ]);
44
+
45
+ /** Check if character is Korean (Hangul syllable or Jamo). */
46
+ const isKorean = combineClassifiers(isHangul, isJamo);
47
+
48
+ // =============================================================================
49
+ // Korean Particles (조사)
50
+ // =============================================================================
51
+
52
+ /**
53
+ * Korean particles that mark grammatical roles.
54
+ * These appear after nouns and vary based on vowel harmony.
55
+ */
56
+ const PARTICLES = new Set([
57
+ // Subject markers
58
+ '이', // i - after consonant
59
+ '가', // ga - after vowel
60
+ // Object markers
61
+ '을', // eul - after consonant
62
+ '를', // reul - after vowel
63
+ // Topic markers
64
+ '은', // eun - after consonant
65
+ '는', // neun - after vowel
66
+ // Location/time markers
67
+ '에', // e - at, to
68
+ '에서', // eseo - at (action location), from
69
+ '로', // ro - to, by means (after vowel or ㄹ)
70
+ '으로', // euro - to, by means (after consonant)
71
+ // Others
72
+ '와', // wa - and, with (after vowel)
73
+ '과', // gwa - and, with (after consonant)
74
+ '의', // ui - possessive ('s)
75
+ '도', // do - also
76
+ '만', // man - only
77
+ '부터', // buteo - from
78
+ '까지', // kkaji - until
79
+ '처럼', // cheoreom - like
80
+ '보다', // boda - than
81
+ ]);
82
+
83
+ /**
84
+ * Single-character particles.
85
+ */
86
+ const SINGLE_CHAR_PARTICLES = new Set([
87
+ '이',
88
+ '가',
89
+ '을',
90
+ '를',
91
+ '은',
92
+ '는',
93
+ '에',
94
+ '로',
95
+ '와',
96
+ '과',
97
+ '의',
98
+ '도',
99
+ '만',
100
+ ]);
101
+
102
+ /**
103
+ * Multi-character particles.
104
+ */
105
+ const MULTI_CHAR_PARTICLES = ['에서', '으로', '부터', '까지', '처럼', '보다'];
106
+
107
+ /**
108
+ * Particle metadata mapping particles to semantic roles, confidence scores,
109
+ * and vowel harmony variants. Korean particles change based on whether the
110
+ * preceding syllable ends in a consonant or vowel.
111
+ */
112
+ interface ParticleMetadata {
113
+ readonly role: string; // SemanticRole
114
+ readonly confidence: number;
115
+ readonly variant?: 'consonant' | 'vowel'; // For vowel harmony pairs
116
+ readonly description?: string;
117
+ }
118
+
119
+ const PARTICLE_ROLES = new Map<string, ParticleMetadata>([
120
+ // Subject markers (vowel harmony pair)
121
+ [
122
+ '이',
123
+ {
124
+ role: 'agent',
125
+ confidence: 0.85,
126
+ variant: 'consonant',
127
+ description: 'subject marker (after consonant)',
128
+ },
129
+ ],
130
+ [
131
+ '가',
132
+ {
133
+ role: 'agent',
134
+ confidence: 0.85,
135
+ variant: 'vowel',
136
+ description: 'subject marker (after vowel)',
137
+ },
138
+ ],
139
+
140
+ // Object markers (vowel harmony pair)
141
+ [
142
+ '을',
143
+ {
144
+ role: 'patient',
145
+ confidence: 0.95,
146
+ variant: 'consonant',
147
+ description: 'object marker (after consonant)',
148
+ },
149
+ ],
150
+ [
151
+ '를',
152
+ {
153
+ role: 'patient',
154
+ confidence: 0.95,
155
+ variant: 'vowel',
156
+ description: 'object marker (after vowel)',
157
+ },
158
+ ],
159
+
160
+ // Topic markers (vowel harmony pair)
161
+ [
162
+ '은',
163
+ {
164
+ role: 'agent',
165
+ confidence: 0.75,
166
+ variant: 'consonant',
167
+ description: 'topic marker (after consonant)',
168
+ },
169
+ ],
170
+ [
171
+ '는',
172
+ {
173
+ role: 'agent',
174
+ confidence: 0.75,
175
+ variant: 'vowel',
176
+ description: 'topic marker (after vowel)',
177
+ },
178
+ ],
179
+
180
+ // Location/time markers
181
+ ['에', { role: 'destination', confidence: 0.85, description: 'at/to marker' }],
182
+ ['에서', { role: 'source', confidence: 0.8, description: 'at/from marker (action location)' }],
183
+
184
+ // Direction/means markers (vowel harmony pair)
185
+ [
186
+ '로',
187
+ {
188
+ role: 'destination',
189
+ confidence: 0.85,
190
+ variant: 'vowel',
191
+ description: 'to/by means (after vowel or ㄹ)',
192
+ },
193
+ ],
194
+ [
195
+ '으로',
196
+ {
197
+ role: 'destination',
198
+ confidence: 0.85,
199
+ variant: 'consonant',
200
+ description: 'to/by means (after consonant)',
201
+ },
202
+ ],
203
+
204
+ // And/with markers (vowel harmony pair)
205
+ [
206
+ '와',
207
+ { role: 'style', confidence: 0.7, variant: 'vowel', description: 'and/with (after vowel)' },
208
+ ],
209
+ [
210
+ '과',
211
+ {
212
+ role: 'style',
213
+ confidence: 0.7,
214
+ variant: 'consonant',
215
+ description: 'and/with (after consonant)',
216
+ },
217
+ ],
218
+
219
+ // Other markers
220
+ ['의', { role: 'patient', confidence: 0.6, description: 'possessive marker' }],
221
+ ['도', { role: 'patient', confidence: 0.65, description: 'also/too marker' }],
222
+ ['만', { role: 'patient', confidence: 0.65, description: 'only marker' }],
223
+ ['부터', { role: 'source', confidence: 0.9, description: 'from/since marker' }],
224
+ ['까지', { role: 'destination', confidence: 0.75, description: 'until/to marker' }],
225
+ ['처럼', { role: 'manner', confidence: 0.8, description: 'like/as marker' }],
226
+ ['보다', { role: 'source', confidence: 0.75, description: 'than marker' }],
227
+ ]);
228
+
229
+ // =============================================================================
230
+ // Korean Extras (keywords not in profile)
231
+ // =============================================================================
232
+
233
+ /**
234
+ * Extra keywords not covered by the profile:
235
+ * - Literals (true, false, null, undefined)
236
+ * - Positional words
237
+ * - Event names
238
+ * - Attached particle forms (native idioms)
239
+ * - Conditional event forms
240
+ * - Time units
241
+ */
242
+ const KOREAN_EXTRAS: KeywordEntry[] = [
243
+ // Values/Literals
244
+ { native: '참', normalized: 'true' },
245
+ { native: '거짓', normalized: 'false' },
246
+ { native: '널', normalized: 'null' },
247
+ { native: '미정의', normalized: 'undefined' },
248
+
249
+ // Positional
250
+ { native: '첫번째', normalized: 'first' },
251
+ { native: '마지막', normalized: 'last' },
252
+ { native: '다음', normalized: 'next' },
253
+ { native: '이전', normalized: 'previous' },
254
+ { native: '가장가까운', normalized: 'closest' },
255
+ { native: '부모', normalized: 'parent' },
256
+
257
+ // Events
258
+ { native: '클릭', normalized: 'click' },
259
+ { native: '더블클릭', normalized: 'dblclick' },
260
+ { native: '변경', normalized: 'change' },
261
+ { native: '제출', normalized: 'submit' },
262
+ { native: '입력', normalized: 'input' },
263
+ { native: '로드', normalized: 'load' },
264
+ { native: '스크롤', normalized: 'scroll' },
265
+ { native: '키다운', normalized: 'keydown' },
266
+ { native: '키업', normalized: 'keyup' },
267
+ { native: '마우스오버', normalized: 'mouseover' },
268
+ { native: '마우스아웃', normalized: 'mouseout' },
269
+
270
+ // References (additional forms)
271
+ { native: '내', normalized: 'my' },
272
+ { native: '그것의', normalized: 'its' },
273
+
274
+ // Conditional event forms (native idioms)
275
+ { native: '하면', normalized: 'on' },
276
+ { native: '으면', normalized: 'on' },
277
+ { native: '면', normalized: 'on' },
278
+ { native: '할때', normalized: 'on' },
279
+ { native: '할 때', normalized: 'on' },
280
+ { native: '을때', normalized: 'on' },
281
+ { native: '을 때', normalized: 'on' },
282
+ { native: '하니까', normalized: 'on' },
283
+ { native: '니까', normalized: 'on' },
284
+
285
+ // Control flow helpers
286
+ { native: '그러면', normalized: 'then' },
287
+ { native: '그렇지않으면', normalized: 'otherwise' },
288
+ { native: '중단', normalized: 'break' },
289
+
290
+ // Logical
291
+ { native: '그리고', normalized: 'and' },
292
+ { native: '또는', normalized: 'or' },
293
+ { native: '아니', normalized: 'not' },
294
+ { native: '이다', normalized: 'is' },
295
+
296
+ // Command overrides (ensure correct mapping when profile has multiple meanings)
297
+ { native: '추가', normalized: 'add' }, // Profile may have this as 'append'
298
+
299
+ // Attached particle forms (native idioms - particle + verb without space)
300
+ // Object particle 를 (after vowel)
301
+ { native: '를토글', normalized: 'toggle' },
302
+ { native: '를전환', normalized: 'toggle' },
303
+ { native: '를추가', normalized: 'add' },
304
+ { native: '를제거', normalized: 'remove' },
305
+ { native: '를삭제', normalized: 'remove' },
306
+ { native: '를증가', normalized: 'increment' },
307
+ { native: '를감소', normalized: 'decrement' },
308
+ { native: '를표시', normalized: 'show' },
309
+ { native: '를숨기다', normalized: 'hide' },
310
+ { native: '를설정', normalized: 'set' },
311
+ // Object particle 을 (after consonant)
312
+ { native: '을토글', normalized: 'toggle' },
313
+ { native: '을전환', normalized: 'toggle' },
314
+ { native: '을추가', normalized: 'add' },
315
+ { native: '을제거', normalized: 'remove' },
316
+ { native: '을삭제', normalized: 'remove' },
317
+ { native: '을증가', normalized: 'increment' },
318
+ { native: '을감소', normalized: 'decrement' },
319
+ { native: '을표시', normalized: 'show' },
320
+ { native: '을숨기다', normalized: 'hide' },
321
+ { native: '을설정', normalized: 'set' },
322
+
323
+ // Time units
324
+ { native: '초', normalized: 's' },
325
+ { native: '밀리초', normalized: 'ms' },
326
+ { native: '분', normalized: 'm' },
327
+ { native: '시간', normalized: 'h' },
328
+ ];
329
+
330
+ // =============================================================================
331
+ // Korean Time Units
332
+ // =============================================================================
333
+
334
+ /**
335
+ * Korean time unit patterns for number parsing.
336
+ * Sorted by length (longest first) to ensure correct matching.
337
+ * Korean time units attach directly without whitespace.
338
+ */
339
+ const KOREAN_TIME_UNITS: readonly TimeUnitMapping[] = [
340
+ { pattern: '밀리초', suffix: 'ms', length: 3 },
341
+ { pattern: '시간', suffix: 'h', length: 2 },
342
+ { pattern: '초', suffix: 's', length: 1 },
343
+ { pattern: '분', suffix: 'm', length: 1 },
344
+ ];
345
+
346
+ // =============================================================================
347
+ // Korean Tokenizer Implementation
348
+ // =============================================================================
349
+
350
+ export class KoreanTokenizer extends BaseTokenizer {
351
+ readonly language = 'ko';
352
+ readonly direction = 'ltr' as const;
353
+
354
+ constructor() {
355
+ super();
356
+ // Initialize keywords from profile + extras (single source of truth)
357
+ this.initializeKeywordsFromProfile(koreanProfile, KOREAN_EXTRAS);
358
+ // Set morphological normalizer for verb conjugations
359
+ this.normalizer = new KoreanMorphologicalNormalizer();
360
+ }
361
+
362
+ tokenize(input: string): TokenStream {
363
+ const tokens: LanguageToken[] = [];
364
+ let pos = 0;
365
+
366
+ while (pos < input.length) {
367
+ // Skip whitespace
368
+ if (isWhitespace(input[pos])) {
369
+ pos++;
370
+ continue;
371
+ }
372
+
373
+ // Try CSS selector first (ASCII-based, highest priority)
374
+ if (isSelectorStart(input[pos])) {
375
+ // Check for event modifier first (.once, .debounce(), etc.)
376
+ const modifierToken = this.tryEventModifier(input, pos);
377
+ if (modifierToken) {
378
+ tokens.push(modifierToken);
379
+ pos = modifierToken.position.end;
380
+ continue;
381
+ }
382
+
383
+ const selectorToken = this.trySelector(input, pos);
384
+ if (selectorToken) {
385
+ tokens.push(selectorToken);
386
+ pos = selectorToken.position.end;
387
+ continue;
388
+ }
389
+ }
390
+
391
+ // Try string literal
392
+ if (isQuote(input[pos])) {
393
+ const stringToken = this.tryString(input, pos);
394
+ if (stringToken) {
395
+ tokens.push(stringToken);
396
+ pos = stringToken.position.end;
397
+ continue;
398
+ }
399
+ }
400
+
401
+ // Try URL (/path, ./path, http://, etc.)
402
+ if (isUrlStart(input, pos)) {
403
+ const urlToken = this.tryUrl(input, pos);
404
+ if (urlToken) {
405
+ tokens.push(urlToken);
406
+ pos = urlToken.position.end;
407
+ continue;
408
+ }
409
+ }
410
+
411
+ // Try number (including Korean time units)
412
+ if (isDigit(input[pos])) {
413
+ const numberToken = this.extractKoreanNumber(input, pos);
414
+ if (numberToken) {
415
+ tokens.push(numberToken);
416
+ pos = numberToken.position.end;
417
+ continue;
418
+ }
419
+ }
420
+
421
+ // Try variable reference (:varname)
422
+ const varToken = this.tryVariableRef(input, pos);
423
+ if (varToken) {
424
+ tokens.push(varToken);
425
+ pos = varToken.position.end;
426
+ continue;
427
+ }
428
+
429
+ // Try Korean word FIRST (before particles)
430
+ // This ensures keywords like 로그 aren't split on particle characters
431
+ if (isKorean(input[pos])) {
432
+ const wordToken = this.extractKoreanWord(input, pos);
433
+ if (wordToken) {
434
+ tokens.push(wordToken);
435
+ pos = wordToken.position.end;
436
+ continue;
437
+ }
438
+ }
439
+
440
+ // Try multi-character particle (before single-character)
441
+ const multiParticle = this.tryMultiCharParticle(input, pos, MULTI_CHAR_PARTICLES);
442
+ if (multiParticle) {
443
+ // Add role metadata to particle token
444
+ const metadata = PARTICLE_ROLES.get(multiParticle.value);
445
+ if (metadata) {
446
+ tokens.push({
447
+ ...multiParticle,
448
+ metadata: {
449
+ particleRole: metadata.role,
450
+ particleConfidence: metadata.confidence,
451
+ particleVariant: metadata.variant,
452
+ },
453
+ });
454
+ } else {
455
+ tokens.push(multiParticle);
456
+ }
457
+ pos = multiParticle.position.end;
458
+ continue;
459
+ }
460
+
461
+ // Try single-character particle
462
+ if (SINGLE_CHAR_PARTICLES.has(input[pos])) {
463
+ const particle = input[pos];
464
+ const metadata = PARTICLE_ROLES.get(particle);
465
+ if (metadata) {
466
+ tokens.push({
467
+ ...createToken(particle, 'particle', createPosition(pos, pos + 1)),
468
+ metadata: {
469
+ particleRole: metadata.role,
470
+ particleConfidence: metadata.confidence,
471
+ particleVariant: metadata.variant,
472
+ },
473
+ });
474
+ } else {
475
+ tokens.push(createToken(particle, 'particle', createPosition(pos, pos + 1)));
476
+ }
477
+ pos++;
478
+ continue;
479
+ }
480
+
481
+ // Try ASCII word (for mixed content)
482
+ if (isAsciiIdentifierChar(input[pos])) {
483
+ const asciiToken = this.extractAsciiWord(input, pos);
484
+ if (asciiToken) {
485
+ tokens.push(asciiToken);
486
+ pos = asciiToken.position.end;
487
+ continue;
488
+ }
489
+ }
490
+
491
+ // Skip unknown character
492
+ pos++;
493
+ }
494
+
495
+ return new TokenStreamImpl(tokens, 'ko');
496
+ }
497
+
498
+ classifyToken(token: string): TokenKind {
499
+ if (PARTICLES.has(token)) return 'particle';
500
+ // O(1) Map lookup instead of O(n) array search
501
+ if (this.isKeyword(token)) return 'keyword';
502
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
503
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
504
+ if (/^\d/.test(token)) return 'literal';
505
+
506
+ return 'identifier';
507
+ }
508
+
509
+ /**
510
+ * Extract a Korean word (sequence of Hangul).
511
+ * Prioritizes known keywords, then uses particle-based word boundaries.
512
+ *
513
+ * Uses morphological normalization to handle verb conjugations.
514
+ */
515
+ private extractKoreanWord(input: string, startPos: number): LanguageToken | null {
516
+ // First, try to find the longest matching keyword starting at this position
517
+ // This ensures compound words like 추가, 증가, 숨기다 are recognized whole
518
+ const maxKeywordLen = 6; // Longest Korean keyword
519
+ for (let len = Math.min(maxKeywordLen, input.length - startPos); len >= 2; len--) {
520
+ const candidate = input.slice(startPos, startPos + len);
521
+ // Check all chars are Korean
522
+ let allKorean = true;
523
+ for (let i = 0; i < candidate.length; i++) {
524
+ if (!isKorean(candidate[i])) {
525
+ allKorean = false;
526
+ break;
527
+ }
528
+ }
529
+ if (!allKorean) continue;
530
+
531
+ // O(1) Map lookup instead of O(n) array search
532
+ const keywordEntry = this.lookupKeyword(candidate);
533
+ if (keywordEntry) {
534
+ return createToken(
535
+ candidate,
536
+ 'keyword',
537
+ createPosition(startPos, startPos + len),
538
+ keywordEntry.normalized
539
+ );
540
+ }
541
+
542
+ // Try morphological normalization for conjugated forms
543
+ const morphToken = this.tryMorphKeywordMatch(candidate, startPos, startPos + len);
544
+ if (morphToken) return morphToken;
545
+ }
546
+
547
+ // No keyword match - extract as regular word using particle boundaries
548
+ let pos = startPos;
549
+ let word = '';
550
+
551
+ while (pos < input.length) {
552
+ const char = input[pos];
553
+ const nextChar = pos + 1 < input.length ? input[pos + 1] : '';
554
+
555
+ // Stop at single-char particles only if:
556
+ // 1. We have content already
557
+ // 2. The particle is at a word boundary (followed by whitespace, end, non-Korean, or another particle)
558
+ if (SINGLE_CHAR_PARTICLES.has(char) && word.length > 0) {
559
+ const isWordBoundary =
560
+ nextChar === '' ||
561
+ isWhitespace(nextChar) ||
562
+ !isKorean(nextChar) ||
563
+ SINGLE_CHAR_PARTICLES.has(nextChar);
564
+
565
+ if (isWordBoundary) {
566
+ break;
567
+ }
568
+ // Otherwise, continue - this particle char is part of the word
569
+ }
570
+
571
+ // Check for multi-char particle (these are always at word boundaries)
572
+ let foundMulti = false;
573
+ for (const particle of MULTI_CHAR_PARTICLES) {
574
+ if (input.slice(pos, pos + particle.length) === particle && word.length > 0) {
575
+ // Only treat as particle if followed by word boundary
576
+ const afterParticle = pos + particle.length;
577
+ const charAfter = afterParticle < input.length ? input[afterParticle] : '';
578
+ if (charAfter === '' || isWhitespace(charAfter) || !isKorean(charAfter)) {
579
+ foundMulti = true;
580
+ break;
581
+ }
582
+ }
583
+ }
584
+ if (foundMulti) break;
585
+
586
+ // Continue if Korean character
587
+ if (isKorean(char)) {
588
+ word += char;
589
+ pos++;
590
+ } else {
591
+ break;
592
+ }
593
+ }
594
+
595
+ if (!word) return null;
596
+
597
+ // O(1) Map lookup instead of O(n) array search
598
+ const keywordEntry = this.lookupKeyword(word);
599
+ if (keywordEntry) {
600
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
601
+ }
602
+
603
+ // Try morphological normalization for conjugated forms
604
+ const morphToken = this.tryMorphKeywordMatch(word, startPos, pos);
605
+ if (morphToken) return morphToken;
606
+
607
+ // Not a keyword, return as identifier
608
+ return createToken(word, 'identifier', createPosition(startPos, pos));
609
+ }
610
+
611
+ /**
612
+ * Extract an ASCII word (for mixed Korean/English content).
613
+ */
614
+ private extractAsciiWord(input: string, startPos: number): LanguageToken | null {
615
+ let pos = startPos;
616
+ let word = '';
617
+
618
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
619
+ word += input[pos++];
620
+ }
621
+
622
+ if (!word) return null;
623
+
624
+ return createToken(word, 'identifier', createPosition(startPos, pos));
625
+ }
626
+
627
+ /**
628
+ * Extract a number, including Korean time unit suffixes.
629
+ * Korean time units attach directly without whitespace.
630
+ */
631
+ private extractKoreanNumber(input: string, startPos: number): LanguageToken | null {
632
+ return this.tryNumberWithTimeUnits(input, startPos, KOREAN_TIME_UNITS, {
633
+ allowSign: false,
634
+ skipWhitespace: false,
635
+ });
636
+ }
637
+ }
638
+
639
+ /**
640
+ * Singleton instance.
641
+ */
642
+ export const koreanTokenizer = new KoreanTokenizer();