@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,428 @@
1
+ /**
2
+ * Korean Morphological Normalizer
3
+ *
4
+ * Reduces Korean verb conjugations to their stem forms.
5
+ * Korean verbs conjugate by modifying their endings:
6
+ *
7
+ * Base: 토글 (togeul) - "toggle" (loanword)
8
+ * 다 ending: 토글하다 (togeul-hada) - "to toggle" (dictionary form)
9
+ * 요 ending: 토글해요 (togeul-haeyo) - polite present
10
+ * 니다 ending: 토글합니다 (togeul-hamnida) - formal present
11
+ * 세요 ending: 토글하세요 (togeul-haseyo) - honorific request
12
+ * 았/었 past: 토글했어 (togeul-haesseo) - informal past
13
+ *
14
+ * Korean also has vowel harmony affecting suffix forms.
15
+ *
16
+ * This normalizer strips these suffixes to find the stem,
17
+ * which can then be matched against keyword dictionaries.
18
+ */
19
+
20
+ import type {
21
+ MorphologicalNormalizer,
22
+ NormalizationResult,
23
+ SuffixRule,
24
+ ConjugationType,
25
+ } from './types';
26
+ import { noChange, normalized } from './types';
27
+
28
+ /**
29
+ * Check if a character is a Korean syllable block (Hangul).
30
+ * Korean syllables are in the range U+AC00 to U+D7A3.
31
+ */
32
+ function isHangul(char: string): boolean {
33
+ const code = char.charCodeAt(0);
34
+ return code >= 0xac00 && code <= 0xd7a3;
35
+ }
36
+
37
+ /**
38
+ * Check if a word contains Korean characters.
39
+ */
40
+ function containsKorean(word: string): boolean {
41
+ for (const char of word) {
42
+ if (isHangul(char)) return true;
43
+ }
44
+ return false;
45
+ }
46
+
47
+ /**
48
+ * Suffix rules for Korean verb conjugation.
49
+ * Ordered by length (longest first) to ensure greedy matching.
50
+ */
51
+ const KOREAN_SUFFIX_RULES: readonly SuffixRule[] = [
52
+ // Honorific conditional/temporal forms (-시- infix)
53
+ // These are critical for polite/formal Korean
54
+ { pattern: '하시니까', confidence: 0.85, conjugationType: 'honorific-causal', minStemLength: 1 },
55
+ { pattern: '하실때', confidence: 0.88, conjugationType: 'honorific-temporal', minStemLength: 1 },
56
+ { pattern: '하실 때', confidence: 0.88, conjugationType: 'honorific-temporal', minStemLength: 1 },
57
+ {
58
+ pattern: '하시면',
59
+ confidence: 0.88,
60
+ conjugationType: 'honorific-conditional',
61
+ minStemLength: 1,
62
+ },
63
+ {
64
+ pattern: '으시면',
65
+ confidence: 0.85,
66
+ conjugationType: 'honorific-conditional',
67
+ minStemLength: 2,
68
+ },
69
+ { pattern: '시면', confidence: 0.82, conjugationType: 'honorific-conditional', minStemLength: 2 },
70
+
71
+ // Sequential/temporal forms - "after doing", "before doing", "as soon as"
72
+ { pattern: '하고나서', confidence: 0.85, conjugationType: 'sequential-after', minStemLength: 1 },
73
+ { pattern: '하고 나서', confidence: 0.85, conjugationType: 'sequential-after', minStemLength: 1 },
74
+ { pattern: '하고서', confidence: 0.85, conjugationType: 'sequential-after', minStemLength: 1 },
75
+ { pattern: '고나서', confidence: 0.82, conjugationType: 'sequential-after', minStemLength: 2 },
76
+ { pattern: '고 나서', confidence: 0.82, conjugationType: 'sequential-after', minStemLength: 2 },
77
+ { pattern: '고서', confidence: 0.82, conjugationType: 'sequential-after', minStemLength: 2 },
78
+ { pattern: '하기전에', confidence: 0.85, conjugationType: 'sequential-before', minStemLength: 1 },
79
+ {
80
+ pattern: '하기 전에',
81
+ confidence: 0.85,
82
+ conjugationType: 'sequential-before',
83
+ minStemLength: 1,
84
+ },
85
+ { pattern: '기전에', confidence: 0.82, conjugationType: 'sequential-before', minStemLength: 2 },
86
+ { pattern: '기 전에', confidence: 0.82, conjugationType: 'sequential-before', minStemLength: 2 },
87
+ { pattern: '하자마자', confidence: 0.88, conjugationType: 'immediate', minStemLength: 1 },
88
+ { pattern: '자마자', confidence: 0.85, conjugationType: 'immediate', minStemLength: 2 },
89
+
90
+ // Obligation forms - "must do", "should do"
91
+ { pattern: '해야해요', confidence: 0.85, conjugationType: 'obligation', minStemLength: 1 },
92
+ { pattern: '해야해', confidence: 0.85, conjugationType: 'obligation', minStemLength: 1 },
93
+ { pattern: '해야하다', confidence: 0.85, conjugationType: 'obligation', minStemLength: 1 },
94
+ { pattern: '어야해요', confidence: 0.82, conjugationType: 'obligation', minStemLength: 2 },
95
+ { pattern: '어야해', confidence: 0.82, conjugationType: 'obligation', minStemLength: 2 },
96
+ { pattern: '아야해요', confidence: 0.82, conjugationType: 'obligation', minStemLength: 2 },
97
+ { pattern: '아야해', confidence: 0.82, conjugationType: 'obligation', minStemLength: 2 },
98
+
99
+ // Conditional forms - most natural for event handlers (longest first)
100
+ // These are critical for native Korean idioms like "클릭하면 증가"
101
+ { pattern: '하니까', confidence: 0.85, conjugationType: 'causal-nikka', minStemLength: 1 },
102
+ { pattern: '할때', confidence: 0.88, conjugationType: 'temporal-ttae', minStemLength: 1 },
103
+ { pattern: '할 때', confidence: 0.88, conjugationType: 'temporal-ttae', minStemLength: 1 },
104
+ { pattern: '을때', confidence: 0.85, conjugationType: 'temporal-ttae', minStemLength: 2 },
105
+ { pattern: '을 때', confidence: 0.85, conjugationType: 'temporal-ttae', minStemLength: 2 },
106
+ { pattern: '하면', confidence: 0.88, conjugationType: 'conditional-myeon', minStemLength: 1 },
107
+ { pattern: '으면', confidence: 0.85, conjugationType: 'conditional-myeon', minStemLength: 2 },
108
+ { pattern: '니까', confidence: 0.82, conjugationType: 'causal-nikka', minStemLength: 2 },
109
+ { pattern: '면', confidence: 0.8, conjugationType: 'conditional-myeon', minStemLength: 2 },
110
+
111
+ // Formal polite forms (longest first)
112
+ { pattern: '하였습니다', confidence: 0.85, conjugationType: 'past', minStemLength: 1 },
113
+ { pattern: '했습니다', confidence: 0.85, conjugationType: 'past', minStemLength: 1 },
114
+ { pattern: '합니다', confidence: 0.85, conjugationType: 'polite', minStemLength: 1 },
115
+ { pattern: '습니다', confidence: 0.82, conjugationType: 'polite', minStemLength: 2 },
116
+ { pattern: '됩니다', confidence: 0.82, conjugationType: 'polite', minStemLength: 1 },
117
+ { pattern: 'ㅂ니다', confidence: 0.82, conjugationType: 'polite', minStemLength: 2 },
118
+
119
+ // Honorific request forms
120
+ { pattern: '하세요', confidence: 0.85, conjugationType: 'honorific', minStemLength: 1 },
121
+ { pattern: '하십시오', confidence: 0.85, conjugationType: 'honorific', minStemLength: 1 },
122
+ { pattern: '세요', confidence: 0.82, conjugationType: 'honorific', minStemLength: 2 },
123
+ { pattern: '십시오', confidence: 0.82, conjugationType: 'honorific', minStemLength: 2 },
124
+
125
+ // Informal polite (요) forms
126
+ { pattern: '하고있어요', confidence: 0.82, conjugationType: 'progressive', minStemLength: 1 },
127
+ { pattern: '하고있어', confidence: 0.82, conjugationType: 'progressive', minStemLength: 1 },
128
+ { pattern: '했어요', confidence: 0.85, conjugationType: 'past', minStemLength: 1 },
129
+ { pattern: '해요', confidence: 0.85, conjugationType: 'polite', minStemLength: 1 },
130
+ { pattern: '어요', confidence: 0.82, conjugationType: 'polite', minStemLength: 2 },
131
+ { pattern: '아요', confidence: 0.82, conjugationType: 'polite', minStemLength: 2 },
132
+
133
+ // Informal (반말) forms
134
+ { pattern: '했어', confidence: 0.85, conjugationType: 'past', minStemLength: 1 },
135
+ { pattern: '해', confidence: 0.8, conjugationType: 'present', minStemLength: 1 },
136
+ { pattern: '었어', confidence: 0.82, conjugationType: 'past', minStemLength: 2 },
137
+ { pattern: '았어', confidence: 0.82, conjugationType: 'past', minStemLength: 2 },
138
+
139
+ // Progressive forms
140
+ { pattern: '하고있다', confidence: 0.82, conjugationType: 'progressive', minStemLength: 1 },
141
+ { pattern: '고있다', confidence: 0.8, conjugationType: 'progressive', minStemLength: 2 },
142
+ { pattern: '고있어', confidence: 0.8, conjugationType: 'progressive', minStemLength: 2 },
143
+
144
+ // Dictionary/infinitive form (하다 verbs)
145
+ { pattern: '하다', confidence: 0.88, conjugationType: 'dictionary', minStemLength: 1 },
146
+
147
+ // Negative forms
148
+ { pattern: '하지않다', confidence: 0.82, conjugationType: 'negative', minStemLength: 1 },
149
+ { pattern: '안하다', confidence: 0.82, conjugationType: 'negative', minStemLength: 1 },
150
+ { pattern: '지않다', confidence: 0.8, conjugationType: 'negative', minStemLength: 2 },
151
+
152
+ // Imperative forms
153
+ { pattern: '해라', confidence: 0.82, conjugationType: 'imperative', minStemLength: 1 },
154
+ { pattern: '하라', confidence: 0.82, conjugationType: 'imperative', minStemLength: 1 },
155
+
156
+ // Generic verb endings (lower confidence)
157
+ { pattern: '다', confidence: 0.75, conjugationType: 'dictionary', minStemLength: 2 },
158
+ ];
159
+
160
+ /**
161
+ * 하다 verb pattern - very common pattern in Korean.
162
+ * Noun + 하다 forms a verb.
163
+ * e.g., 토글 + 하다 = 토글하다 (to toggle)
164
+ */
165
+ const HADA_PATTERNS: readonly {
166
+ pattern: string;
167
+ confidence: number;
168
+ conjugationType: ConjugationType;
169
+ }[] = [
170
+ // Honorific forms (-시- infix) - polite/formal Korean
171
+ // 클릭하시면 → 클릭 (if you click - honorific)
172
+ { pattern: '하시니까', confidence: 0.88, conjugationType: 'honorific-causal' },
173
+ { pattern: '하실때', confidence: 0.88, conjugationType: 'honorific-temporal' },
174
+ { pattern: '하실 때', confidence: 0.88, conjugationType: 'honorific-temporal' },
175
+ { pattern: '하시면', confidence: 0.88, conjugationType: 'honorific-conditional' },
176
+ { pattern: '하셨어요', confidence: 0.85, conjugationType: 'honorific-past' },
177
+ { pattern: '하셨어', confidence: 0.85, conjugationType: 'honorific-past' },
178
+ { pattern: '하십니다', confidence: 0.85, conjugationType: 'honorific-polite' },
179
+
180
+ // Sequential/temporal forms - "after doing", "before doing", "as soon as"
181
+ { pattern: '하고나서', confidence: 0.88, conjugationType: 'sequential-after' },
182
+ { pattern: '하고 나서', confidence: 0.88, conjugationType: 'sequential-after' },
183
+ { pattern: '하고서', confidence: 0.88, conjugationType: 'sequential-after' },
184
+ { pattern: '하기전에', confidence: 0.88, conjugationType: 'sequential-before' },
185
+ { pattern: '하기 전에', confidence: 0.88, conjugationType: 'sequential-before' },
186
+ { pattern: '하자마자', confidence: 0.88, conjugationType: 'immediate' },
187
+
188
+ // Obligation forms - "must do", "should do"
189
+ { pattern: '해야해요', confidence: 0.88, conjugationType: 'obligation' },
190
+ { pattern: '해야해', confidence: 0.88, conjugationType: 'obligation' },
191
+ { pattern: '해야하다', confidence: 0.88, conjugationType: 'obligation' },
192
+
193
+ // Conditional forms - most natural for event handlers (highest priority)
194
+ // 클릭하면 → 클릭 (if clicked)
195
+ { pattern: '하니까', confidence: 0.88, conjugationType: 'causal-nikka' },
196
+ { pattern: '할때', confidence: 0.88, conjugationType: 'temporal-ttae' },
197
+ { pattern: '할 때', confidence: 0.88, conjugationType: 'temporal-ttae' },
198
+ { pattern: '하면', confidence: 0.88, conjugationType: 'conditional-myeon' },
199
+
200
+ // Formal
201
+ { pattern: '하였습니다', confidence: 0.85, conjugationType: 'past' },
202
+ { pattern: '했습니다', confidence: 0.85, conjugationType: 'past' },
203
+ { pattern: '합니다', confidence: 0.85, conjugationType: 'polite' },
204
+ { pattern: '하십시오', confidence: 0.85, conjugationType: 'honorific' },
205
+ { pattern: '하세요', confidence: 0.85, conjugationType: 'honorific' },
206
+ // Informal polite
207
+ { pattern: '했어요', confidence: 0.85, conjugationType: 'past' },
208
+ { pattern: '해요', confidence: 0.85, conjugationType: 'polite' },
209
+ // Informal
210
+ { pattern: '했어', confidence: 0.85, conjugationType: 'past' },
211
+ { pattern: '해', confidence: 0.8, conjugationType: 'present' },
212
+ // Progressive
213
+ { pattern: '하고있어요', confidence: 0.82, conjugationType: 'progressive' },
214
+ { pattern: '하고있어', confidence: 0.82, conjugationType: 'progressive' },
215
+ { pattern: '하고있다', confidence: 0.82, conjugationType: 'progressive' },
216
+ // Connective forms (해서 = because/so, 하고 = and)
217
+ { pattern: '해서', confidence: 0.82, conjugationType: 'connective' },
218
+ { pattern: '하고', confidence: 0.8, conjugationType: 'connective' },
219
+ // Negative
220
+ { pattern: '하지않아요', confidence: 0.82, conjugationType: 'negative' },
221
+ { pattern: '하지않다', confidence: 0.82, conjugationType: 'negative' },
222
+ { pattern: '안해요', confidence: 0.82, conjugationType: 'negative' },
223
+ { pattern: '안해', confidence: 0.82, conjugationType: 'negative' },
224
+ // Imperative
225
+ { pattern: '해라', confidence: 0.82, conjugationType: 'imperative' },
226
+ { pattern: '하라', confidence: 0.82, conjugationType: 'imperative' },
227
+ // Dictionary form
228
+ { pattern: '하다', confidence: 0.88, conjugationType: 'dictionary' },
229
+ ];
230
+
231
+ /**
232
+ * Korean morphological normalizer.
233
+ */
234
+ export class KoreanMorphologicalNormalizer implements MorphologicalNormalizer {
235
+ readonly language = 'ko';
236
+
237
+ /**
238
+ * Check if a word might be a Korean verb that can be normalized.
239
+ */
240
+ isNormalizable(word: string): boolean {
241
+ // Must contain Korean characters
242
+ if (!containsKorean(word)) return false;
243
+
244
+ // Must be at least 2 characters
245
+ if (word.length < 2) return false;
246
+
247
+ return true;
248
+ }
249
+
250
+ /**
251
+ * Normalize a Korean word to its stem form.
252
+ */
253
+ normalize(word: string): NormalizationResult {
254
+ // Check for compound conjugations first (multi-layer suffixes)
255
+ const compoundResult = this.normalizeCompound(word);
256
+ if (compoundResult) return compoundResult;
257
+
258
+ // Check for 하다 verb patterns (most common verb type)
259
+ const hadaResult = this.tryHadaNormalization(word);
260
+ if (hadaResult) return hadaResult;
261
+
262
+ // Try general suffix rules
263
+ for (const rule of KOREAN_SUFFIX_RULES) {
264
+ if (word.endsWith(rule.pattern)) {
265
+ const stem = word.slice(0, -rule.pattern.length);
266
+
267
+ // Validate stem length
268
+ const minLength = rule.minStemLength ?? 2;
269
+ if (stem.length < minLength) continue;
270
+
271
+ const metadata: {
272
+ removedSuffixes: string[];
273
+ conjugationType?: typeof rule.conjugationType;
274
+ } = {
275
+ removedSuffixes: [rule.pattern],
276
+ };
277
+ if (rule.conjugationType) {
278
+ metadata.conjugationType = rule.conjugationType;
279
+ }
280
+ return normalized(stem, rule.confidence, metadata);
281
+ }
282
+ }
283
+
284
+ // No normalization needed
285
+ return noChange(word);
286
+ }
287
+
288
+ /**
289
+ * Try to normalize a 하다 verb.
290
+ * 하다 verbs are formed by noun + 하다, very common in Korean.
291
+ */
292
+ private tryHadaNormalization(word: string): NormalizationResult | null {
293
+ for (const pattern of HADA_PATTERNS) {
294
+ if (word.endsWith(pattern.pattern)) {
295
+ const stem = word.slice(0, -pattern.pattern.length);
296
+
297
+ // 하다 verbs need at least one character for the noun part
298
+ if (stem.length < 1) continue;
299
+
300
+ // Return the noun part (without 하다 conjugation)
301
+ return normalized(stem, pattern.confidence, {
302
+ removedSuffixes: [pattern.pattern],
303
+ conjugationType: pattern.conjugationType,
304
+ originalForm: 'hada-verb',
305
+ });
306
+ }
307
+ }
308
+ return null;
309
+ }
310
+
311
+ /**
312
+ * Normalize compound conjugations (multi-layer suffixes).
313
+ * Korean has complex compound forms that combine multiple grammatical elements.
314
+ */
315
+ private normalizeCompound(word: string): NormalizationResult | null {
316
+ // Compound patterns with sequential/modal forms
317
+ const compoundPatterns: readonly {
318
+ pattern: string;
319
+ suffixes: string[];
320
+ confidence: number;
321
+ conjugationType: ConjugationType;
322
+ minStemLength: number;
323
+ }[] = [
324
+ // Sequential past forms (after doing, was)
325
+ {
326
+ pattern: '하고나서였어',
327
+ suffixes: ['하고나서', '였어'],
328
+ confidence: 0.78,
329
+ conjugationType: 'sequential-after',
330
+ minStemLength: 2,
331
+ },
332
+ {
333
+ pattern: '하고나서였다',
334
+ suffixes: ['하고나서', '였다'],
335
+ confidence: 0.78,
336
+ conjugationType: 'sequential-after',
337
+ minStemLength: 2,
338
+ },
339
+ {
340
+ pattern: '하고나서',
341
+ suffixes: ['하고', '나서'],
342
+ confidence: 0.85,
343
+ conjugationType: 'sequential-after',
344
+ minStemLength: 2,
345
+ },
346
+
347
+ // Modal necessity past forms (had to do)
348
+ {
349
+ pattern: '해야했어',
350
+ suffixes: ['해야', '했어'],
351
+ confidence: 0.8,
352
+ conjugationType: 'obligation',
353
+ minStemLength: 2,
354
+ },
355
+ {
356
+ pattern: '해야했다',
357
+ suffixes: ['해야', '했다'],
358
+ confidence: 0.8,
359
+ conjugationType: 'obligation',
360
+ minStemLength: 2,
361
+ },
362
+ {
363
+ pattern: '해야했습니다',
364
+ suffixes: ['해야', '했습니다'],
365
+ confidence: 0.8,
366
+ conjugationType: 'obligation',
367
+ minStemLength: 2,
368
+ },
369
+
370
+ // Honorific simultaneous forms (while doing, honorific)
371
+ {
372
+ pattern: '하시면서',
373
+ suffixes: ['하시', '면서'],
374
+ confidence: 0.82,
375
+ conjugationType: 'connective',
376
+ minStemLength: 2,
377
+ },
378
+ {
379
+ pattern: '하시며',
380
+ suffixes: ['하시', '며'],
381
+ confidence: 0.82,
382
+ conjugationType: 'connective',
383
+ minStemLength: 2,
384
+ },
385
+
386
+ // Progressive forms with copula
387
+ {
388
+ pattern: '하고있었어',
389
+ suffixes: ['하고', '있었어'],
390
+ confidence: 0.8,
391
+ conjugationType: 'progressive',
392
+ minStemLength: 2,
393
+ },
394
+ {
395
+ pattern: '하고있었다',
396
+ suffixes: ['하고', '있었다'],
397
+ confidence: 0.8,
398
+ conjugationType: 'progressive',
399
+ minStemLength: 2,
400
+ },
401
+ ];
402
+
403
+ for (const {
404
+ pattern,
405
+ suffixes,
406
+ confidence,
407
+ conjugationType,
408
+ minStemLength,
409
+ } of compoundPatterns) {
410
+ if (word.endsWith(pattern)) {
411
+ const stem = word.slice(0, -pattern.length);
412
+
413
+ // Validate minimum stem length
414
+ if (stem.length < minStemLength) continue;
415
+
416
+ return normalized(stem, confidence, {
417
+ removedSuffixes: suffixes,
418
+ conjugationType,
419
+ });
420
+ }
421
+ }
422
+
423
+ return null;
424
+ }
425
+ }
426
+
427
+ // Export singleton instance
428
+ export const koreanMorphologicalNormalizer = new KoreanMorphologicalNormalizer();
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Polish Morphological Normalizer
3
+ *
4
+ * Normalizes Polish verb forms to their base/infinitive form.
5
+ *
6
+ * Polish verb conjugation is complex with:
7
+ * - Three main conjugation classes (determined by infinitive ending)
8
+ * - Person/number agreement (6 forms per tense)
9
+ * - Aspect pairs (perfective/imperfective)
10
+ *
11
+ * For software UI, Polish uses IMPERATIVE form (unlike most languages):
12
+ * - zapisz (save), otwórz (open), usuń (delete)
13
+ *
14
+ * This normalizer focuses on recognizing imperative forms and
15
+ * mapping them back to their base form for keyword matching.
16
+ */
17
+
18
+ export interface NormalizationResult {
19
+ stem: string;
20
+ suffix?: string;
21
+ confidence: number;
22
+ originalForm?: string;
23
+ }
24
+
25
+ /**
26
+ * Polish Morphological Normalizer
27
+ *
28
+ * Key patterns:
29
+ * - Imperative suffixes: -aj, -ij, -uj (2nd person singular)
30
+ * - Infinitive endings: -ać, -eć, -ić, -yć, -ąć
31
+ * - Present tense endings: -am, -em, -ę, -asz, -esz, -isz, -ysz
32
+ */
33
+ export class PolishMorphologicalNormalizer {
34
+ /**
35
+ * Normalize a Polish verb to its base/infinitive form
36
+ */
37
+ normalize(word: string): NormalizationResult {
38
+ const lower = word.toLowerCase();
39
+
40
+ // Already in infinitive form (-ać, -eć, -ić, -yć, -ąć, -ować)?
41
+ if (this.isInfinitive(lower)) {
42
+ return { stem: lower, confidence: 1.0 };
43
+ }
44
+
45
+ // Try imperative normalization
46
+ const imperativeResult = this.tryImperativeNormalization(lower);
47
+ if (imperativeResult) return imperativeResult;
48
+
49
+ // Try present tense normalization
50
+ const presentResult = this.tryPresentTenseNormalization(lower);
51
+ if (presentResult) return presentResult;
52
+
53
+ // Try past tense normalization
54
+ const pastResult = this.tryPastTenseNormalization(lower);
55
+ if (pastResult) return pastResult;
56
+
57
+ // Return as-is if no normalization found
58
+ return { stem: lower, confidence: 0.5, originalForm: word };
59
+ }
60
+
61
+ /**
62
+ * Check if word is already in infinitive form
63
+ */
64
+ private isInfinitive(word: string): boolean {
65
+ const infinitiveEndings = ['ać', 'eć', 'ić', 'yć', 'ąć', 'ować', 'iwać', 'ywać'];
66
+ return infinitiveEndings.some(ending => word.endsWith(ending));
67
+ }
68
+
69
+ /**
70
+ * Try to normalize imperative form to infinitive
71
+ *
72
+ * Polish imperative (2nd person singular) patterns:
73
+ * - pisać → pisz (write)
74
+ * - czytać → czytaj (read)
75
+ * - robić → rób (do)
76
+ * - mówić → mów (speak)
77
+ * - uczyć → ucz (teach)
78
+ */
79
+ private tryImperativeNormalization(word: string): NormalizationResult | null {
80
+ // Common imperative forms used in Polish software UI
81
+ const imperativeToInfinitive: Map<string, string> = new Map([
82
+ // Core commands
83
+ ['przełącz', 'przełączać'],
84
+ ['przelacz', 'przelaczac'],
85
+ ['dodaj', 'dodawać'],
86
+ ['usuń', 'usuwać'],
87
+ ['usun', 'usuwac'],
88
+ ['umieść', 'umieszczać'],
89
+ ['umiesc', 'umieszczac'],
90
+ ['wstaw', 'wstawiać'],
91
+ ['ustaw', 'ustawiać'],
92
+ ['pobierz', 'pobierać'],
93
+ ['weź', 'brać'],
94
+ ['wez', 'brac'],
95
+ ['zwiększ', 'zwiększać'],
96
+ ['zwieksz', 'zwiekszac'],
97
+ ['zmniejsz', 'zmniejszać'],
98
+ ['pokaż', 'pokazywać'],
99
+ ['pokaz', 'pokazywac'],
100
+ ['ukryj', 'ukrywać'],
101
+ ['schowaj', 'schowywać'],
102
+ ['czekaj', 'czekać'],
103
+ ['poczekaj', 'poczekać'],
104
+ ['idź', 'iść'],
105
+ ['idz', 'isc'],
106
+ ['przejdź', 'przejść'],
107
+ ['przejdz', 'przejsc'],
108
+ ['wywołaj', 'wywoływać'],
109
+ ['wywolaj', 'wywolywac'],
110
+ ['wyślij', 'wysyłać'],
111
+ ['wyslij', 'wysylac'],
112
+ ['loguj', 'logować'],
113
+ ['wypisz', 'wypisywać'],
114
+ ['sklonuj', 'sklonować'],
115
+ ['kopiuj', 'kopiować'],
116
+ ['zamień', 'zamieniać'],
117
+ ['zamien', 'zamieniac'],
118
+ ['utwórz', 'tworzyć'],
119
+ ['utworz', 'tworzyc'],
120
+ ['stwórz', 'stwarzać'],
121
+ ['stworz', 'stwarzac'],
122
+ ['skup', 'skupiać'],
123
+ ['rozmyj', 'rozmywać'],
124
+ ['nawiguj', 'nawigować'],
125
+ ['załaduj', 'ładować'],
126
+ ['zaladuj', 'ladowac'],
127
+ ['powtórz', 'powtarzać'],
128
+ ['powtorz', 'powtarzac'],
129
+ ['kontynuuj', 'kontynuować'],
130
+ ['zatrzymaj', 'zatrzymywać'],
131
+ ['przerwij', 'przerywać'],
132
+ ['rzuć', 'rzucać'],
133
+ ['rzuc', 'rzucac'],
134
+ ['zwróć', 'zwracać'],
135
+ ['zwroc', 'zwracac'],
136
+ ['inicjuj', 'inicjować'],
137
+ ['zainstaluj', 'instalować'],
138
+ ['zmierz', 'mierzyć'],
139
+ ]);
140
+
141
+ if (imperativeToInfinitive.has(word)) {
142
+ return {
143
+ stem: imperativeToInfinitive.get(word)!,
144
+ suffix: 'imperative',
145
+ confidence: 0.95,
146
+ originalForm: word,
147
+ };
148
+ }
149
+
150
+ // Generic imperative pattern: ends in consonant or -j
151
+ // Try to reconstruct infinitive
152
+
153
+ // Pattern: -aj → -ać (czytaj → czytać)
154
+ if (word.endsWith('aj')) {
155
+ const stem = word.slice(0, -2) + 'ać';
156
+ return { stem, suffix: 'aj', confidence: 0.8, originalForm: word };
157
+ }
158
+
159
+ // Pattern: -uj → -ować (kopiuj → kopiować)
160
+ if (word.endsWith('uj')) {
161
+ const stem = word.slice(0, -2) + 'ować';
162
+ return { stem, suffix: 'uj', confidence: 0.8, originalForm: word };
163
+ }
164
+
165
+ // Pattern: -ij → -ić (rób → robić - irregular)
166
+ if (word.endsWith('ij')) {
167
+ const stem = word.slice(0, -2) + 'ić';
168
+ return { stem, suffix: 'ij', confidence: 0.75, originalForm: word };
169
+ }
170
+
171
+ return null;
172
+ }
173
+
174
+ /**
175
+ * Try to normalize present tense form to infinitive
176
+ */
177
+ private tryPresentTenseNormalization(word: string): NormalizationResult | null {
178
+ // Pattern: -am → -ać (czytam → czytać)
179
+ if (word.endsWith('am')) {
180
+ const stem = word.slice(0, -2) + 'ać';
181
+ return { stem, suffix: 'am', confidence: 0.8, originalForm: word };
182
+ }
183
+
184
+ // Pattern: -em → -eć (rozumiem → rozumieć)
185
+ if (word.endsWith('em') && word.length > 3) {
186
+ const stem = word.slice(0, -2) + 'eć';
187
+ return { stem, suffix: 'em', confidence: 0.75, originalForm: word };
188
+ }
189
+
190
+ // Pattern: -ę → -ać/-eć (piszę → pisać)
191
+ if (word.endsWith('ę')) {
192
+ const stem = word.slice(0, -1) + 'ać';
193
+ return { stem, suffix: 'ę', confidence: 0.7, originalForm: word };
194
+ }
195
+
196
+ // Pattern: -uję → -ować (pracuję → pracować)
197
+ if (word.endsWith('uję') || word.endsWith('uje')) {
198
+ const stem = word.slice(0, -3) + 'ować';
199
+ return { stem, suffix: 'uję', confidence: 0.85, originalForm: word };
200
+ }
201
+
202
+ return null;
203
+ }
204
+
205
+ /**
206
+ * Try to normalize past tense form to infinitive
207
+ */
208
+ private tryPastTenseNormalization(word: string): NormalizationResult | null {
209
+ // Pattern: -ałem/-ałam → -ać (czytałem → czytać)
210
+ if (word.endsWith('ałem') || word.endsWith('ałam')) {
211
+ const stem = word.slice(0, -4) + 'ać';
212
+ return { stem, suffix: word.slice(-4), confidence: 0.85, originalForm: word };
213
+ }
214
+
215
+ // Pattern: -ał/-ała → -ać (czytał → czytać)
216
+ if (word.endsWith('ał') || word.endsWith('ała')) {
217
+ const suffixLen = word.endsWith('ała') ? 3 : 2;
218
+ const stem = word.slice(0, -suffixLen) + 'ać';
219
+ return { stem, suffix: word.slice(-suffixLen), confidence: 0.8, originalForm: word };
220
+ }
221
+
222
+ // Pattern: -iłem/-iłam → -ić (robiłem → robić)
223
+ if (
224
+ word.endsWith('iłem') ||
225
+ word.endsWith('iłam') ||
226
+ word.endsWith('ilem') ||
227
+ word.endsWith('ilam')
228
+ ) {
229
+ const stem = word.slice(0, -4) + 'ić';
230
+ return { stem, suffix: word.slice(-4), confidence: 0.85, originalForm: word };
231
+ }
232
+
233
+ // Pattern: -ił/-iła → -ić (robił → robić)
234
+ if (
235
+ word.endsWith('ił') ||
236
+ word.endsWith('iła') ||
237
+ word.endsWith('il') ||
238
+ word.endsWith('ila')
239
+ ) {
240
+ const suffixLen = word.endsWith('iła') || word.endsWith('ila') ? 3 : 2;
241
+ const stem = word.slice(0, -suffixLen) + 'ić';
242
+ return { stem, suffix: word.slice(-suffixLen), confidence: 0.8, originalForm: word };
243
+ }
244
+
245
+ return null;
246
+ }
247
+
248
+ /**
249
+ * Check if two words are morphologically related
250
+ */
251
+ areMorphologicallyRelated(word1: string, word2: string): boolean {
252
+ const norm1 = this.normalize(word1);
253
+ const norm2 = this.normalize(word2);
254
+
255
+ // Same stem
256
+ if (norm1.stem === norm2.stem) return true;
257
+
258
+ // Check if one is prefix of the other (for aspectual pairs)
259
+ const stems = [norm1.stem, norm2.stem].sort((a, b) => a.length - b.length);
260
+ if (stems[1].endsWith(stems[0].slice(-4))) return true;
261
+
262
+ return false;
263
+ }
264
+ }