@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,288 @@
1
+ /**
2
+ * Japanese Morphological Normalizer
3
+ *
4
+ * Reduces Japanese verb conjugations to their stem forms.
5
+ * Japanese verbs conjugate by modifying their endings:
6
+ *
7
+ * Base: 切り替え (kiri-kae) - "toggle"
8
+ * て-form: 切り替えて (kiri-kaete) - "toggle and..."
9
+ * た-form: 切り替えた (kiri-kaeta) - "toggled" (past)
10
+ * ます-form: 切り替えます (kiri-kaemasu) - polite present
11
+ * ている: 切り替えている (kiri-kaeteiru) - "is toggling" (progressive)
12
+ * ない: 切り替えない (kiri-kaenai) - "don't toggle" (negative)
13
+ *
14
+ * This normalizer strips these suffixes to find the stem,
15
+ * which can then be matched against keyword dictionaries.
16
+ */
17
+
18
+ import type {
19
+ MorphologicalNormalizer,
20
+ NormalizationResult,
21
+ SuffixRule,
22
+ ConjugationType,
23
+ } from './types';
24
+ import { noChange, normalized } from './types';
25
+
26
+ /**
27
+ * Suffix rules for Japanese verb conjugation.
28
+ * Ordered by length (longest first) to ensure greedy matching.
29
+ */
30
+ const JAPANESE_SUFFIX_RULES: readonly SuffixRule[] = [
31
+ // Conditional forms - very common for event handlers (longest first)
32
+ // したら/すると/すれば are する verb conditionals
33
+ { pattern: 'したら', confidence: 0.88, conjugationType: 'conditional-tara', minStemLength: 2 },
34
+ { pattern: 'すると', confidence: 0.88, conjugationType: 'conditional-to', minStemLength: 2 },
35
+ { pattern: 'すれば', confidence: 0.85, conjugationType: 'conditional-ba', minStemLength: 2 },
36
+ // たら/れば are regular verb conditionals
37
+ { pattern: 'たら', confidence: 0.85, conjugationType: 'conditional-tara', minStemLength: 2 },
38
+ { pattern: 'れば', confidence: 0.82, conjugationType: 'conditional-ba', minStemLength: 2 },
39
+
40
+ // Compound forms (longest first)
41
+ { pattern: 'ていました', confidence: 0.82, conjugationType: 'past', minStemLength: 2 },
42
+ { pattern: 'ています', confidence: 0.85, conjugationType: 'progressive', minStemLength: 2 },
43
+ { pattern: 'てください', confidence: 0.85, conjugationType: 'request', minStemLength: 2 },
44
+ { pattern: 'でください', confidence: 0.85, conjugationType: 'request', minStemLength: 2 },
45
+ { pattern: 'ている', confidence: 0.85, conjugationType: 'progressive', minStemLength: 2 },
46
+ { pattern: 'ておく', confidence: 0.82, conjugationType: 'progressive', minStemLength: 2 },
47
+ { pattern: 'てみる', confidence: 0.82, conjugationType: 'progressive', minStemLength: 2 },
48
+ { pattern: 'てある', confidence: 0.82, conjugationType: 'progressive', minStemLength: 2 },
49
+
50
+ // Casual request forms
51
+ { pattern: 'てくれ', confidence: 0.8, conjugationType: 'casual-request', minStemLength: 2 },
52
+ { pattern: 'でくれ', confidence: 0.8, conjugationType: 'casual-request', minStemLength: 2 },
53
+
54
+ // Contracted/colloquial forms (ちゃう/じゃう = てしまう/でしまう)
55
+ { pattern: 'ちゃった', confidence: 0.82, conjugationType: 'contracted-past', minStemLength: 2 },
56
+ { pattern: 'じゃった', confidence: 0.82, conjugationType: 'contracted-past', minStemLength: 2 },
57
+ { pattern: 'ちゃう', confidence: 0.82, conjugationType: 'contracted', minStemLength: 2 },
58
+ { pattern: 'じゃう', confidence: 0.82, conjugationType: 'contracted', minStemLength: 2 },
59
+
60
+ // Polite forms
61
+ { pattern: 'ました', confidence: 0.85, conjugationType: 'past', minStemLength: 2 },
62
+ { pattern: 'ません', confidence: 0.85, conjugationType: 'negative', minStemLength: 2 },
63
+ { pattern: 'ます', confidence: 0.85, conjugationType: 'polite', minStemLength: 2 },
64
+
65
+ // て/た forms (very common)
66
+ { pattern: 'て', confidence: 0.85, conjugationType: 'te-form', minStemLength: 2 },
67
+ { pattern: 'た', confidence: 0.85, conjugationType: 'past', minStemLength: 2 },
68
+
69
+ // Negative forms
70
+ { pattern: 'ない', confidence: 0.82, conjugationType: 'negative', minStemLength: 2 },
71
+ { pattern: 'なかった', confidence: 0.82, conjugationType: 'past', minStemLength: 2 },
72
+
73
+ // Potential forms
74
+ { pattern: 'られる', confidence: 0.8, conjugationType: 'potential', minStemLength: 2 },
75
+ { pattern: 'れる', confidence: 0.78, conjugationType: 'potential', minStemLength: 2 },
76
+
77
+ // Passive forms
78
+ { pattern: 'られた', confidence: 0.8, conjugationType: 'passive', minStemLength: 2 },
79
+
80
+ // Causative forms
81
+ { pattern: 'させる', confidence: 0.8, conjugationType: 'causative', minStemLength: 2 },
82
+ { pattern: 'せる', confidence: 0.78, conjugationType: 'causative', minStemLength: 2 },
83
+
84
+ // Volitional forms
85
+ { pattern: 'よう', confidence: 0.8, conjugationType: 'volitional', minStemLength: 2 },
86
+
87
+ // Dictionary form ending (る-verbs) - lower confidence due to ambiguity
88
+ { pattern: 'る', confidence: 0.75, conjugationType: 'dictionary', minStemLength: 3 },
89
+ ];
90
+
91
+ /**
92
+ * Special する verb patterns.
93
+ * する verbs are formed by noun + する, very common in Japanese.
94
+ * Order by length (longest first) for greedy matching.
95
+ */
96
+ const SURU_PATTERNS: readonly {
97
+ pattern: string;
98
+ confidence: number;
99
+ conjugationType: ConjugationType;
100
+ }[] = [
101
+ // Conditional forms (most important for native idioms)
102
+ { pattern: 'したら', confidence: 0.88, conjugationType: 'conditional-tara' },
103
+ { pattern: 'すると', confidence: 0.88, conjugationType: 'conditional-to' },
104
+ { pattern: 'すれば', confidence: 0.85, conjugationType: 'conditional-ba' },
105
+ // Progressive forms
106
+ { pattern: 'しています', confidence: 0.85, conjugationType: 'progressive' },
107
+ { pattern: 'している', confidence: 0.85, conjugationType: 'progressive' },
108
+ // Other forms
109
+ { pattern: 'しました', confidence: 0.85, conjugationType: 'past' },
110
+ { pattern: 'します', confidence: 0.85, conjugationType: 'polite' },
111
+ { pattern: 'しない', confidence: 0.82, conjugationType: 'negative' },
112
+ { pattern: 'して', confidence: 0.85, conjugationType: 'te-form' },
113
+ { pattern: 'した', confidence: 0.85, conjugationType: 'past' },
114
+ { pattern: 'する', confidence: 0.88, conjugationType: 'dictionary' },
115
+ ];
116
+
117
+ /**
118
+ * Check if a character is hiragana.
119
+ */
120
+ function isHiragana(char: string): boolean {
121
+ const code = char.charCodeAt(0);
122
+ return code >= 0x3040 && code <= 0x309f;
123
+ }
124
+
125
+ /**
126
+ * Check if a character is katakana.
127
+ */
128
+ function isKatakana(char: string): boolean {
129
+ const code = char.charCodeAt(0);
130
+ return code >= 0x30a0 && code <= 0x30ff;
131
+ }
132
+
133
+ /**
134
+ * Check if a character is kanji.
135
+ */
136
+ function isKanji(char: string): boolean {
137
+ const code = char.charCodeAt(0);
138
+ return (code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3400 && code <= 0x4dbf);
139
+ }
140
+
141
+ /**
142
+ * Check if a word contains Japanese characters.
143
+ */
144
+ function containsJapanese(word: string): boolean {
145
+ for (const char of word) {
146
+ if (isHiragana(char) || isKatakana(char) || isKanji(char)) {
147
+ return true;
148
+ }
149
+ }
150
+ return false;
151
+ }
152
+
153
+ /**
154
+ * Japanese morphological normalizer.
155
+ */
156
+ export class JapaneseMorphologicalNormalizer implements MorphologicalNormalizer {
157
+ readonly language = 'ja';
158
+
159
+ /**
160
+ * Check if a word might be a Japanese verb that can be normalized.
161
+ */
162
+ isNormalizable(word: string): boolean {
163
+ // Must contain Japanese characters
164
+ if (!containsJapanese(word)) return false;
165
+
166
+ // Must be at least 2 characters
167
+ if (word.length < 2) return false;
168
+
169
+ // Check if it ends with a hiragana character (verbs typically do)
170
+ const lastChar = word[word.length - 1];
171
+ return isHiragana(lastChar);
172
+ }
173
+
174
+ /**
175
+ * Normalize a Japanese word to its stem form.
176
+ */
177
+ normalize(word: string): NormalizationResult {
178
+ // Check for compound conjugations first (multi-layer suffixes)
179
+ const compoundResult = this.normalizeCompound(word);
180
+ if (compoundResult) return compoundResult;
181
+
182
+ // Check for する verb patterns (most common compound verbs)
183
+ const suruResult = this.trySuruNormalization(word);
184
+ if (suruResult) return suruResult;
185
+
186
+ // Try suffix rules
187
+ for (const rule of JAPANESE_SUFFIX_RULES) {
188
+ if (word.endsWith(rule.pattern)) {
189
+ const stem = word.slice(0, -rule.pattern.length);
190
+
191
+ // Validate stem length
192
+ const minLength = rule.minStemLength ?? 2;
193
+ if (stem.length < minLength) continue;
194
+
195
+ // Return normalized result
196
+ const metadata: {
197
+ removedSuffixes: string[];
198
+ conjugationType?: typeof rule.conjugationType;
199
+ } = {
200
+ removedSuffixes: [rule.pattern],
201
+ };
202
+ if (rule.conjugationType) {
203
+ metadata.conjugationType = rule.conjugationType;
204
+ }
205
+ return normalized(stem, rule.confidence, metadata);
206
+ }
207
+ }
208
+
209
+ // No normalization needed
210
+ return noChange(word);
211
+ }
212
+
213
+ /**
214
+ * Try to normalize a する verb.
215
+ */
216
+ private trySuruNormalization(word: string): NormalizationResult | null {
217
+ for (const pattern of SURU_PATTERNS) {
218
+ if (word.endsWith(pattern.pattern)) {
219
+ const stem = word.slice(0, -pattern.pattern.length);
220
+
221
+ // する verbs need at least one character for the noun part
222
+ if (stem.length < 1) continue;
223
+
224
+ // Return the noun part (without する)
225
+ return normalized(stem, pattern.confidence, {
226
+ removedSuffixes: [pattern.pattern],
227
+ conjugationType: pattern.conjugationType,
228
+ originalForm: 'suru-verb',
229
+ });
230
+ }
231
+ }
232
+ return null;
233
+ }
234
+
235
+ /**
236
+ * Normalize compound conjugations (multi-layer suffixes).
237
+ * These are combinations like ていなかった (was not doing), でいない (is not doing).
238
+ * Handles cases that single-suffix rules miss.
239
+ */
240
+ private normalizeCompound(word: string): NormalizationResult | null {
241
+ // Compound patterns with negative progressive forms
242
+ const compoundPatterns: readonly {
243
+ pattern: string;
244
+ suffixes: string[];
245
+ confidence: number;
246
+ minStemLength: number;
247
+ }[] = [
248
+ // Progressive negative past forms
249
+ {
250
+ pattern: 'ていなかった',
251
+ suffixes: ['て', 'い', 'なかった'],
252
+ confidence: 0.8,
253
+ minStemLength: 2,
254
+ },
255
+ {
256
+ pattern: 'でいなかった',
257
+ suffixes: ['で', 'い', 'なかった'],
258
+ confidence: 0.8,
259
+ minStemLength: 2,
260
+ },
261
+ // Progressive negative forms
262
+ { pattern: 'ていない', suffixes: ['て', 'い', 'ない'], confidence: 0.82, minStemLength: 2 },
263
+ { pattern: 'でいない', suffixes: ['で', 'い', 'ない'], confidence: 0.82, minStemLength: 2 },
264
+ // Progressive past forms
265
+ { pattern: 'ていた', suffixes: ['て', 'い', 'た'], confidence: 0.85, minStemLength: 2 },
266
+ { pattern: 'でいた', suffixes: ['で', 'い', 'た'], confidence: 0.85, minStemLength: 2 },
267
+ ];
268
+
269
+ for (const { pattern, suffixes, confidence, minStemLength } of compoundPatterns) {
270
+ if (word.endsWith(pattern)) {
271
+ const stem = word.slice(0, -pattern.length);
272
+
273
+ // Validate minimum stem length
274
+ if (stem.length < minStemLength) continue;
275
+
276
+ return normalized(stem, confidence, {
277
+ removedSuffixes: suffixes,
278
+ conjugationType: 'compound',
279
+ });
280
+ }
281
+ }
282
+
283
+ return null;
284
+ }
285
+ }
286
+
287
+ // Export singleton instance
288
+ export const japaneseMorphologicalNormalizer = new JapaneseMorphologicalNormalizer();