@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,447 @@
1
+ /**
2
+ * Japanese Tokenizer
3
+ *
4
+ * Tokenizes Japanese hyperscript input.
5
+ * Japanese is challenging because:
6
+ * - No spaces between words
7
+ * - Particles (助詞) mark grammatical roles
8
+ * - Mixed scripts (hiragana, katakana, kanji, romaji)
9
+ * - CSS selectors are embedded ASCII
10
+ */
11
+
12
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
13
+ import {
14
+ BaseTokenizer,
15
+ TokenStreamImpl,
16
+ createToken,
17
+ createPosition,
18
+ createUnicodeRangeClassifier,
19
+ combineClassifiers,
20
+ isWhitespace,
21
+ isSelectorStart,
22
+ isQuote,
23
+ isDigit,
24
+ isAsciiIdentifierChar,
25
+ isUrlStart,
26
+ type KeywordEntry,
27
+ type TimeUnitMapping,
28
+ } from './base';
29
+ import { JapaneseMorphologicalNormalizer } from './morphology/japanese-normalizer';
30
+ import { japaneseProfile } from '../generators/profiles/japanese';
31
+
32
+ // =============================================================================
33
+ // Japanese Character Classification
34
+ // =============================================================================
35
+
36
+ /** Check if character is hiragana (U+3040-U+309F). */
37
+ const isHiragana = createUnicodeRangeClassifier([[0x3040, 0x309f]]);
38
+
39
+ /** Check if character is katakana (U+30A0-U+30FF). */
40
+ const isKatakana = createUnicodeRangeClassifier([[0x30a0, 0x30ff]]);
41
+
42
+ /** Check if character is kanji (CJK Unified Ideographs + Extension A). */
43
+ const isKanji = createUnicodeRangeClassifier([
44
+ [0x4e00, 0x9fff], // CJK Unified Ideographs
45
+ [0x3400, 0x4dbf], // CJK Unified Ideographs Extension A
46
+ ]);
47
+
48
+ /** Check if character is Japanese (hiragana, katakana, or kanji). */
49
+ const isJapanese = combineClassifiers(isHiragana, isKatakana, isKanji);
50
+
51
+ // =============================================================================
52
+ // Japanese Particles
53
+ // =============================================================================
54
+
55
+ /**
56
+ * Japanese particles that mark grammatical roles.
57
+ * These are single hiragana characters that appear after nouns/verbs.
58
+ */
59
+ const PARTICLES = new Set([
60
+ 'を', // wo - object marker
61
+ 'に', // ni - destination, time
62
+ 'で', // de - location of action, means
63
+ 'から', // kara - from
64
+ 'まで', // made - until
65
+ 'へ', // e - direction
66
+ 'と', // to - and, with
67
+ 'の', // no - possessive
68
+ 'が', // ga - subject marker
69
+ 'は', // wa - topic marker
70
+ 'も', // mo - also
71
+ 'より', // yori - than, from
72
+ ]);
73
+
74
+ /**
75
+ * Single-character particles (most common).
76
+ */
77
+ const SINGLE_CHAR_PARTICLES = new Set(['を', 'に', 'で', 'へ', 'と', 'の', 'が', 'は', 'も']);
78
+
79
+ /**
80
+ * Multi-character particles.
81
+ */
82
+ const MULTI_CHAR_PARTICLES = ['から', 'まで', 'より'];
83
+
84
+ /**
85
+ * Particle metadata mapping particles to semantic roles and confidence scores.
86
+ * Used to enhance particle tokens with role information for the pattern matcher.
87
+ */
88
+ interface ParticleMetadata {
89
+ readonly role: string; // SemanticRole
90
+ readonly confidence: number;
91
+ readonly description?: string;
92
+ }
93
+
94
+ const PARTICLE_ROLES = new Map<string, ParticleMetadata>([
95
+ ['を', { role: 'patient', confidence: 0.95, description: 'object marker' }],
96
+ ['に', { role: 'destination', confidence: 0.85, description: 'destination/time marker' }],
97
+ ['で', { role: 'manner', confidence: 0.88, description: 'means/location marker' }],
98
+ ['から', { role: 'source', confidence: 0.9, description: 'from/source marker' }],
99
+ ['まで', { role: 'destination', confidence: 0.75, description: 'until/boundary marker' }],
100
+ ['へ', { role: 'destination', confidence: 0.9, description: 'direction marker' }],
101
+ ['と', { role: 'style', confidence: 0.7, description: 'with/and marker' }],
102
+ ['の', { role: 'patient', confidence: 0.6, description: 'possessive marker' }],
103
+ ['が', { role: 'agent', confidence: 0.85, description: 'subject marker' }],
104
+ ['は', { role: 'agent', confidence: 0.75, description: 'topic marker' }],
105
+ ['も', { role: 'patient', confidence: 0.65, description: 'also/too marker' }],
106
+ ['より', { role: 'source', confidence: 0.85, description: 'from/than marker' }],
107
+ ]);
108
+
109
+ // =============================================================================
110
+ // Japanese Extras (keywords not in profile)
111
+ // =============================================================================
112
+
113
+ /**
114
+ * Extra keywords not covered by the profile:
115
+ * - Literals (true, false, null, undefined)
116
+ * - Positional words
117
+ * - Event names
118
+ * - Attached particle forms (native idioms)
119
+ * - Conditional event forms
120
+ * - Time units
121
+ */
122
+ const JAPANESE_EXTRAS: KeywordEntry[] = [
123
+ // Values/Literals
124
+ { native: '真', normalized: 'true' },
125
+ { native: '偽', normalized: 'false' },
126
+ { native: 'ヌル', normalized: 'null' },
127
+ { native: '未定義', normalized: 'undefined' },
128
+
129
+ // Positional
130
+ { native: '最初', normalized: 'first' },
131
+ { native: '最後', normalized: 'last' },
132
+ { native: '次', normalized: 'next' },
133
+ { native: '前', normalized: 'previous' },
134
+ { native: '最も近い', normalized: 'closest' },
135
+ { native: '親', normalized: 'parent' },
136
+
137
+ // Events
138
+ { native: 'クリック', normalized: 'click' },
139
+ { native: '変更', normalized: 'change' },
140
+ { native: '送信', normalized: 'submit' },
141
+ { native: '入力', normalized: 'input' },
142
+ { native: 'ロード', normalized: 'load' },
143
+ { native: 'スクロール', normalized: 'scroll' },
144
+ { native: 'キーダウン', normalized: 'keydown' },
145
+ { native: 'キーアップ', normalized: 'keyup' },
146
+ { native: 'マウスオーバー', normalized: 'mouseover' },
147
+ { native: 'マウスアウト', normalized: 'mouseout' },
148
+ { native: 'ブラー', normalized: 'blur' },
149
+
150
+ // References (additional forms)
151
+ { native: '私', normalized: 'me' },
152
+ { native: '私の', normalized: 'my' },
153
+ { native: 'その', normalized: 'its' },
154
+
155
+ // Note: Attached particle forms (を切り替え, を追加, etc.) are intentionally NOT included
156
+ // because they would cause ambiguous parsing. The separate particle + verb pattern
157
+ // (を + 切り替え) is preferred for consistent semantic analysis.
158
+
159
+ // Conditional event forms
160
+ { native: 'したら', normalized: 'on' },
161
+ { native: 'すると', normalized: 'on' },
162
+ { native: '時に', normalized: 'on' },
163
+
164
+ // Control flow helpers
165
+ { native: 'もし', normalized: 'if' }, // Starts with particle も, needs explicit entry
166
+ { native: 'ならば', normalized: 'then' },
167
+ { native: 'なら', normalized: 'then' },
168
+
169
+ // Time units
170
+ { native: '秒', normalized: 's' },
171
+ { native: 'ミリ秒', normalized: 'ms' },
172
+ { native: '分', normalized: 'm' },
173
+ { native: '時間', normalized: 'h' },
174
+ ];
175
+
176
+ // =============================================================================
177
+ // Japanese Time Units
178
+ // =============================================================================
179
+
180
+ /**
181
+ * Japanese time unit patterns for number parsing.
182
+ * Sorted by length (longest first) to ensure correct matching.
183
+ * Japanese time units attach directly without whitespace.
184
+ */
185
+ const JAPANESE_TIME_UNITS: readonly TimeUnitMapping[] = [
186
+ { pattern: 'ミリ秒', suffix: 'ms', length: 3 },
187
+ { pattern: '時間', suffix: 'h', length: 2 },
188
+ { pattern: '秒', suffix: 's', length: 1 },
189
+ { pattern: '分', suffix: 'm', length: 1 },
190
+ ];
191
+
192
+ // =============================================================================
193
+ // Japanese Tokenizer Implementation
194
+ // =============================================================================
195
+
196
+ export class JapaneseTokenizer extends BaseTokenizer {
197
+ readonly language = 'ja';
198
+ readonly direction = 'ltr' as const;
199
+
200
+ constructor() {
201
+ super();
202
+ // Initialize keywords from profile + extras (single source of truth)
203
+ this.initializeKeywordsFromProfile(japaneseProfile, JAPANESE_EXTRAS);
204
+ // Set morphological normalizer for verb conjugations
205
+ this.normalizer = new JapaneseMorphologicalNormalizer();
206
+ }
207
+
208
+ tokenize(input: string): TokenStream {
209
+ const tokens: LanguageToken[] = [];
210
+ let pos = 0;
211
+
212
+ while (pos < input.length) {
213
+ // Skip whitespace (Japanese can have spaces for readability)
214
+ if (isWhitespace(input[pos])) {
215
+ pos++;
216
+ continue;
217
+ }
218
+
219
+ // Try CSS selector first (ASCII-based, highest priority)
220
+ if (isSelectorStart(input[pos])) {
221
+ // Check for event modifier first (.once, .debounce(), etc.)
222
+ const modifierToken = this.tryEventModifier(input, pos);
223
+ if (modifierToken) {
224
+ tokens.push(modifierToken);
225
+ pos = modifierToken.position.end;
226
+ continue;
227
+ }
228
+
229
+ const selectorToken = this.trySelector(input, pos);
230
+ if (selectorToken) {
231
+ tokens.push(selectorToken);
232
+ pos = selectorToken.position.end;
233
+ continue;
234
+ }
235
+ }
236
+
237
+ // Try string literal (both ASCII and Japanese quotes)
238
+ if (isQuote(input[pos])) {
239
+ const stringToken = this.tryString(input, pos);
240
+ if (stringToken) {
241
+ tokens.push(stringToken);
242
+ pos = stringToken.position.end;
243
+ continue;
244
+ }
245
+ }
246
+
247
+ // Try URL (/path, ./path, http://, etc.)
248
+ if (isUrlStart(input, pos)) {
249
+ const urlToken = this.tryUrl(input, pos);
250
+ if (urlToken) {
251
+ tokens.push(urlToken);
252
+ pos = urlToken.position.end;
253
+ continue;
254
+ }
255
+ }
256
+
257
+ // Try number (including Japanese time units)
258
+ if (isDigit(input[pos])) {
259
+ const numberToken = this.extractJapaneseNumber(input, pos);
260
+ if (numberToken) {
261
+ tokens.push(numberToken);
262
+ pos = numberToken.position.end;
263
+ continue;
264
+ }
265
+ }
266
+
267
+ // Try variable reference (:varname)
268
+ const varToken = this.tryVariableRef(input, pos);
269
+ if (varToken) {
270
+ tokens.push(varToken);
271
+ pos = varToken.position.end;
272
+ continue;
273
+ }
274
+
275
+ // Try multi-character particle (before single-character)
276
+ const multiParticle = this.tryMultiCharParticle(input, pos, MULTI_CHAR_PARTICLES);
277
+ if (multiParticle) {
278
+ // Add role metadata to particle token
279
+ const metadata = PARTICLE_ROLES.get(multiParticle.value);
280
+ if (metadata) {
281
+ tokens.push({
282
+ ...multiParticle,
283
+ metadata: {
284
+ particleRole: metadata.role,
285
+ particleConfidence: metadata.confidence,
286
+ },
287
+ });
288
+ } else {
289
+ tokens.push(multiParticle);
290
+ }
291
+ pos = multiParticle.position.end;
292
+ continue;
293
+ }
294
+
295
+ // Check if this starts a multi-character keyword (before single-char particle check)
296
+ // This prevents splitting keywords like もし (if) into も (particle) + し (identifier)
297
+ if (SINGLE_CHAR_PARTICLES.has(input[pos])) {
298
+ const keywordToken = this.tryProfileKeyword(input, pos);
299
+ // Only accept keywords longer than 1 char (e.g., もし but not を/で/に which are role markers)
300
+ if (keywordToken && keywordToken.value.length > 1) {
301
+ tokens.push(keywordToken);
302
+ pos = keywordToken.position.end;
303
+ continue;
304
+ }
305
+ // Not a multi-char keyword, treat as particle
306
+ const particle = input[pos];
307
+ const metadata = PARTICLE_ROLES.get(particle);
308
+ if (metadata) {
309
+ tokens.push({
310
+ ...createToken(particle, 'particle', createPosition(pos, pos + 1)),
311
+ metadata: {
312
+ particleRole: metadata.role,
313
+ particleConfidence: metadata.confidence,
314
+ },
315
+ });
316
+ } else {
317
+ tokens.push(createToken(particle, 'particle', createPosition(pos, pos + 1)));
318
+ }
319
+ pos++;
320
+ continue;
321
+ }
322
+
323
+ // Try Japanese word (kanji/kana sequence)
324
+ if (isJapanese(input[pos])) {
325
+ const wordToken = this.extractJapaneseWord(input, pos);
326
+ if (wordToken) {
327
+ tokens.push(wordToken);
328
+ pos = wordToken.position.end;
329
+ continue;
330
+ }
331
+ }
332
+
333
+ // Try ASCII word (for mixed content)
334
+ if (isAsciiIdentifierChar(input[pos])) {
335
+ const asciiToken = this.extractAsciiWord(input, pos);
336
+ if (asciiToken) {
337
+ tokens.push(asciiToken);
338
+ pos = asciiToken.position.end;
339
+ continue;
340
+ }
341
+ }
342
+
343
+ // Skip unknown character
344
+ pos++;
345
+ }
346
+
347
+ return new TokenStreamImpl(tokens, 'ja');
348
+ }
349
+
350
+ classifyToken(token: string): TokenKind {
351
+ if (PARTICLES.has(token)) return 'particle';
352
+ // O(1) Map lookup instead of O(n) array search
353
+ if (this.isKeyword(token)) return 'keyword';
354
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
355
+ if (token.startsWith('"') || token.startsWith("'") || token.startsWith('「')) return 'literal';
356
+ if (/^\d/.test(token)) return 'literal';
357
+
358
+ return 'identifier';
359
+ }
360
+
361
+ /**
362
+ * Extract a Japanese word (sequence of kanji/kana).
363
+ * Stops at particles, ASCII, or whitespace.
364
+ *
365
+ * Uses morphological normalization to handle verb conjugations:
366
+ * 1. First checks if the exact word is in the keyword map
367
+ * 2. If not found, tries to strip conjugation suffixes and check again
368
+ */
369
+ private extractJapaneseWord(input: string, startPos: number): LanguageToken | null {
370
+ let pos = startPos;
371
+ let word = '';
372
+
373
+ while (pos < input.length) {
374
+ const char = input[pos];
375
+
376
+ // Stop at particles (except within longer words)
377
+ if (SINGLE_CHAR_PARTICLES.has(char) && word.length > 0) {
378
+ break;
379
+ }
380
+
381
+ // Check for multi-char particle
382
+ let foundMulti = false;
383
+ for (const particle of MULTI_CHAR_PARTICLES) {
384
+ if (input.slice(pos, pos + particle.length) === particle && word.length > 0) {
385
+ foundMulti = true;
386
+ break;
387
+ }
388
+ }
389
+ if (foundMulti) break;
390
+
391
+ // Continue if Japanese character
392
+ if (isJapanese(char)) {
393
+ word += char;
394
+ pos++;
395
+ } else {
396
+ break;
397
+ }
398
+ }
399
+
400
+ if (!word) return null;
401
+
402
+ // O(1) Map lookup instead of O(n) array search
403
+ const keywordEntry = this.lookupKeyword(word);
404
+ if (keywordEntry) {
405
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
406
+ }
407
+
408
+ // Try morphological normalization for conjugated forms
409
+ const morphToken = this.tryMorphKeywordMatch(word, startPos, pos);
410
+ if (morphToken) return morphToken;
411
+
412
+ // Not a keyword, return as identifier
413
+ return createToken(word, 'identifier', createPosition(startPos, pos));
414
+ }
415
+
416
+ /**
417
+ * Extract an ASCII word (for mixed Japanese/English content).
418
+ */
419
+ private extractAsciiWord(input: string, startPos: number): LanguageToken | null {
420
+ let pos = startPos;
421
+ let word = '';
422
+
423
+ while (pos < input.length && isAsciiIdentifierChar(input[pos])) {
424
+ word += input[pos++];
425
+ }
426
+
427
+ if (!word) return null;
428
+
429
+ return createToken(word, 'identifier', createPosition(startPos, pos));
430
+ }
431
+
432
+ /**
433
+ * Extract a number, including Japanese time unit suffixes.
434
+ * Japanese time units attach directly without whitespace.
435
+ */
436
+ private extractJapaneseNumber(input: string, startPos: number): LanguageToken | null {
437
+ return this.tryNumberWithTimeUnits(input, startPos, JAPANESE_TIME_UNITS, {
438
+ allowSign: false,
439
+ skipWhitespace: false,
440
+ });
441
+ }
442
+ }
443
+
444
+ /**
445
+ * Singleton instance.
446
+ */
447
+ export const japaneseTokenizer = new JapaneseTokenizer();