@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Arabic Morphological Normalizer
3
+ *
4
+ * Arabic uses a complex root-pattern morphology system where most words
5
+ * are derived from triliteral (3-consonant) roots. This normalizer focuses
6
+ * on prefix/suffix stripping rather than full root extraction.
7
+ *
8
+ * Key features:
9
+ * - Definite article prefix: ال (al-)
10
+ * - Conjunction/preposition prefixes: و (wa-), ف (fa-), ب (bi-), ل (li-), ك (ka-)
11
+ * - Verb prefixes (present tense markers): ي (ya-), ت (ta-), ن (na-), أ (a-)
12
+ * - Plural/gender suffixes: ون (ūn), ين (īn), ات (āt), ة (a)
13
+ * - Pronoun suffixes: ها (hā), هم (hum), etc.
14
+ * - Diacritics handling: Words with and without diacritics should match
15
+ *
16
+ * Examples:
17
+ * والتبديل → تبديل → بدّل (and the changing → changing → change!)
18
+ * يبدّل → بدّل (he changes → change!)
19
+ * المستخدمين → مستخدم (the users → user)
20
+ */
21
+
22
+ import type { MorphologicalNormalizer, NormalizationResult, PrefixRule } from './types';
23
+ import { noChange, normalized } from './types';
24
+
25
+ /**
26
+ * Check if a character is Arabic.
27
+ */
28
+ function isArabic(char: string): boolean {
29
+ const code = char.charCodeAt(0);
30
+ return (
31
+ (code >= 0x0600 && code <= 0x06ff) || // Arabic
32
+ (code >= 0x0750 && code <= 0x077f) || // Arabic Supplement
33
+ (code >= 0x08a0 && code <= 0x08ff) || // Arabic Extended-A
34
+ (code >= 0xfb50 && code <= 0xfdff) || // Arabic Presentation Forms-A
35
+ (code >= 0xfe70 && code <= 0xfeff)
36
+ ); // Arabic Presentation Forms-B
37
+ }
38
+
39
+ /**
40
+ * Check if a word contains Arabic characters.
41
+ */
42
+ function containsArabic(word: string): boolean {
43
+ for (const char of word) {
44
+ if (isArabic(char)) return true;
45
+ }
46
+ return false;
47
+ }
48
+
49
+ /**
50
+ * Remove Arabic diacritics (tashkeel) from a word.
51
+ * This helps match words regardless of vocalization marks.
52
+ */
53
+ function removeDiacritics(word: string): string {
54
+ // Arabic diacritics: fatha, kasra, damma, sukun, shadda, etc.
55
+ return word.replace(/[\u064B-\u0652\u0670]/g, '');
56
+ }
57
+
58
+ /**
59
+ * Prefix rules for Arabic, ordered by priority.
60
+ * Combined prefixes should be checked first.
61
+ */
62
+ const COMBINED_PREFIXES: readonly PrefixRule[] = [
63
+ // Conjunction + article combinations (4 chars)
64
+ { pattern: 'وال', confidencePenalty: 0.15, prefixType: 'conjunction' }, // wa + al
65
+ { pattern: 'فال', confidencePenalty: 0.15, prefixType: 'conjunction' }, // fa + al
66
+ { pattern: 'بال', confidencePenalty: 0.15, prefixType: 'preposition' }, // bi + al
67
+ { pattern: 'كال', confidencePenalty: 0.15, prefixType: 'preposition' }, // ka + al
68
+ { pattern: 'لل', confidencePenalty: 0.12, prefixType: 'preposition' }, // li + al (assimilation)
69
+ ];
70
+
71
+ /**
72
+ * Single prefix rules.
73
+ * Note: Single-character prefixes require minimum 3-char remaining stem
74
+ * to avoid over-stripping words where the character is part of the root.
75
+ */
76
+ const SINGLE_PREFIXES: readonly PrefixRule[] = [
77
+ // Definite article (2 chars) - can leave 2-char stem
78
+ { pattern: 'ال', confidencePenalty: 0.08, prefixType: 'article', minRemaining: 2 },
79
+
80
+ // Conjunctions and prepositions (1 char) - need longer stem to be safe
81
+ { pattern: 'و', confidencePenalty: 0.08, prefixType: 'conjunction', minRemaining: 3 }, // wa- (and)
82
+ { pattern: 'ف', confidencePenalty: 0.08, prefixType: 'conjunction', minRemaining: 3 }, // fa- (then/so)
83
+ { pattern: 'ب', confidencePenalty: 0.1, prefixType: 'preposition', minRemaining: 3 }, // bi- (with/by)
84
+ { pattern: 'ل', confidencePenalty: 0.1, prefixType: 'preposition', minRemaining: 3 }, // li- (to/for)
85
+ { pattern: 'ك', confidencePenalty: 0.1, prefixType: 'preposition', minRemaining: 3 }, // ka- (like/as)
86
+ ];
87
+
88
+ /**
89
+ * Verb prefixes (present tense markers).
90
+ * These are more tentative as they change verb meaning.
91
+ * Require minimum 3-char remaining to avoid over-stripping.
92
+ */
93
+ const VERB_PREFIXES: readonly PrefixRule[] = [
94
+ { pattern: 'ي', confidencePenalty: 0.12, prefixType: 'verb-marker', minRemaining: 3 }, // ya- (he/it)
95
+ { pattern: 'ت', confidencePenalty: 0.12, prefixType: 'verb-marker', minRemaining: 3 }, // ta- (she/you)
96
+ { pattern: 'ن', confidencePenalty: 0.12, prefixType: 'verb-marker', minRemaining: 3 }, // na- (we)
97
+ { pattern: 'أ', confidencePenalty: 0.12, prefixType: 'verb-marker', minRemaining: 3 }, // a- (I)
98
+ { pattern: 'ا', confidencePenalty: 0.12, prefixType: 'verb-marker', minRemaining: 3 }, // a- without hamza
99
+ ];
100
+
101
+ /**
102
+ * Suffix rules for Arabic.
103
+ */
104
+ const SUFFIXES: readonly { pattern: string; confidencePenalty: number; type: string }[] = [
105
+ // Plural forms
106
+ { pattern: 'ون', confidencePenalty: 0.1, type: 'masculine-plural' },
107
+ { pattern: 'ين', confidencePenalty: 0.1, type: 'masculine-plural-accusative' },
108
+ { pattern: 'ات', confidencePenalty: 0.1, type: 'feminine-plural' },
109
+ // Dual forms
110
+ { pattern: 'ان', confidencePenalty: 0.1, type: 'dual-nominative' },
111
+ { pattern: 'ين', confidencePenalty: 0.1, type: 'dual-accusative' },
112
+ // Pronoun suffixes
113
+ { pattern: 'ها', confidencePenalty: 0.1, type: 'pronoun-her' },
114
+ { pattern: 'هم', confidencePenalty: 0.1, type: 'pronoun-them' },
115
+ { pattern: 'هن', confidencePenalty: 0.1, type: 'pronoun-them-f' },
116
+ { pattern: 'نا', confidencePenalty: 0.1, type: 'pronoun-us' },
117
+ { pattern: 'كم', confidencePenalty: 0.1, type: 'pronoun-you-pl' },
118
+ { pattern: 'ك', confidencePenalty: 0.08, type: 'pronoun-you' },
119
+ { pattern: 'ه', confidencePenalty: 0.08, type: 'pronoun-him' },
120
+ { pattern: 'ي', confidencePenalty: 0.08, type: 'pronoun-me' },
121
+ // Feminine marker
122
+ { pattern: 'ة', confidencePenalty: 0.08, type: 'feminine' },
123
+ ];
124
+
125
+ /**
126
+ * Arabic morphological normalizer.
127
+ */
128
+ export class ArabicMorphologicalNormalizer implements MorphologicalNormalizer {
129
+ readonly language = 'ar';
130
+
131
+ /**
132
+ * Check if a word might be an Arabic word that can be normalized.
133
+ */
134
+ isNormalizable(word: string): boolean {
135
+ if (!containsArabic(word)) return false;
136
+ // Arabic words are typically at least 2 characters
137
+ if (word.length < 2) return false;
138
+ return true;
139
+ }
140
+
141
+ /**
142
+ * Normalize an Arabic word by stripping prefixes and suffixes.
143
+ */
144
+ normalize(word: string): NormalizationResult {
145
+ // Remove diacritics for consistent matching
146
+ let stem = removeDiacritics(word);
147
+ let confidence = 1.0;
148
+ const removedPrefixes: string[] = [];
149
+ const removedSuffixes: string[] = [];
150
+
151
+ // Try combined prefixes first (longest match)
152
+ for (const rule of COMBINED_PREFIXES) {
153
+ if (stem.startsWith(rule.pattern)) {
154
+ const remaining = stem.slice(rule.pattern.length);
155
+ // Must leave a meaningful stem (at least 2 characters)
156
+ if (remaining.length >= 2) {
157
+ stem = remaining;
158
+ confidence -= rule.confidencePenalty;
159
+ removedPrefixes.push(rule.pattern);
160
+ break; // Only one combined prefix
161
+ }
162
+ }
163
+ }
164
+
165
+ // Try single prefixes (if no combined prefix was found)
166
+ if (removedPrefixes.length === 0) {
167
+ for (const rule of SINGLE_PREFIXES) {
168
+ if (stem.startsWith(rule.pattern)) {
169
+ const remaining = stem.slice(rule.pattern.length);
170
+ const minLen = rule.minRemaining ?? 2;
171
+ if (remaining.length >= minLen) {
172
+ stem = remaining;
173
+ confidence -= rule.confidencePenalty;
174
+ removedPrefixes.push(rule.pattern);
175
+ break; // Only one prefix at a time for now
176
+ }
177
+ }
178
+ }
179
+ }
180
+
181
+ // Try verb prefixes ONLY for words that look like verbs (not nouns)
182
+ // Skip if the word has noun-pattern suffixes or pronoun suffixes
183
+ // This prevents stripping ت from تغييرات (changes) or تغييرها (her change)
184
+ const looksLikeNoun =
185
+ stem.endsWith('ات') ||
186
+ stem.endsWith('ة') ||
187
+ stem.endsWith('ون') ||
188
+ stem.endsWith('ين') ||
189
+ stem.endsWith('ها') ||
190
+ stem.endsWith('هم') ||
191
+ stem.endsWith('هن') ||
192
+ stem.endsWith('نا') ||
193
+ stem.endsWith('كم');
194
+ if (
195
+ !looksLikeNoun &&
196
+ (removedPrefixes.length === 0 || removedPrefixes[0] === 'و' || removedPrefixes[0] === 'ف')
197
+ ) {
198
+ for (const rule of VERB_PREFIXES) {
199
+ if (stem.startsWith(rule.pattern)) {
200
+ const remaining = stem.slice(rule.pattern.length);
201
+ const minLen = rule.minRemaining ?? 3;
202
+ if (remaining.length >= minLen) {
203
+ stem = remaining;
204
+ confidence -= rule.confidencePenalty;
205
+ removedPrefixes.push(rule.pattern);
206
+ break;
207
+ }
208
+ }
209
+ }
210
+ }
211
+
212
+ // Try suffixes (can apply multiple passes)
213
+ for (const rule of SUFFIXES) {
214
+ if (stem.endsWith(rule.pattern)) {
215
+ const remaining = stem.slice(0, -rule.pattern.length);
216
+ // Must leave a meaningful stem
217
+ if (remaining.length >= 2) {
218
+ stem = remaining;
219
+ confidence -= rule.confidencePenalty;
220
+ removedSuffixes.push(rule.pattern);
221
+ // Don't break - some suffixes can be stacked
222
+ }
223
+ }
224
+ }
225
+
226
+ // Ensure confidence stays reasonable
227
+ confidence = Math.max(0.5, confidence);
228
+
229
+ // If nothing was stripped, return unchanged
230
+ if (removedPrefixes.length === 0 && removedSuffixes.length === 0) {
231
+ return noChange(word);
232
+ }
233
+
234
+ return normalized(stem, confidence, {
235
+ removedPrefixes,
236
+ removedSuffixes,
237
+ });
238
+ }
239
+ }
240
+
241
+ // Export singleton instance
242
+ export const arabicMorphologicalNormalizer = new ArabicMorphologicalNormalizer();
@@ -0,0 +1,268 @@
1
+ /**
2
+ * French Morphological Normalizer
3
+ *
4
+ * Reduces French verb conjugations to their infinitive forms.
5
+ * French has three verb conjugation groups:
6
+ * - 1st group: -er verbs (parler, montrer, afficher)
7
+ * - 2nd group: -ir verbs with -iss- forms (finir, choisir)
8
+ * - 3rd group: irregular -ir, -re, -oir verbs (partir, prendre, voir)
9
+ *
10
+ * Key features:
11
+ * - Reflexive verb handling: se montrer → montrer
12
+ * - Regular conjugation patterns for all three groups
13
+ * - Past participle (-é, -i, -u) and present participle (-ant) forms
14
+ *
15
+ * Examples:
16
+ * affiche → afficher (3rd person present)
17
+ * montrant → montrer (present participle)
18
+ * caché → cacher (past participle)
19
+ * finissons → finir (1st person plural present)
20
+ */
21
+
22
+ import type { MorphologicalNormalizer, NormalizationResult, ConjugationType } from './types';
23
+ import { noChange, normalized } from './types';
24
+
25
+ /**
26
+ * Check if a word looks like a French verb.
27
+ */
28
+ function looksLikeFrenchVerb(word: string): boolean {
29
+ const lower = word.toLowerCase();
30
+ // Check for infinitive endings
31
+ if (lower.endsWith('er') || lower.endsWith('ir') || lower.endsWith('re')) return true;
32
+ // Check for common conjugation endings
33
+ if (lower.endsWith('ant')) return true; // present participle
34
+ if (lower.endsWith('é') || lower.endsWith('i') || lower.endsWith('u')) return true; // past participles
35
+ // Check for French-specific characters
36
+ if (/[àâäéèêëïîôùûüÿçœæ]/i.test(word)) return true;
37
+ return false;
38
+ }
39
+
40
+ /**
41
+ * Reflexive pronouns that attach to verbs in imperative form.
42
+ */
43
+ const REFLEXIVE_SUFFIXES = ['toi', 'vous', 'nous'];
44
+
45
+ /**
46
+ * -ER verb conjugation endings (1st group - largest group).
47
+ */
48
+ const ER_ENDINGS: readonly {
49
+ ending: string;
50
+ stem: string;
51
+ confidence: number;
52
+ type: ConjugationType;
53
+ }[] = [
54
+ // Present participle
55
+ { ending: 'ant', stem: 'er', confidence: 0.88, type: 'gerund' },
56
+ // Past participle
57
+ { ending: 'é', stem: 'er', confidence: 0.88, type: 'participle' },
58
+ { ending: 'ée', stem: 'er', confidence: 0.88, type: 'participle' },
59
+ { ending: 'és', stem: 'er', confidence: 0.88, type: 'participle' },
60
+ { ending: 'ées', stem: 'er', confidence: 0.88, type: 'participle' },
61
+ // Present indicative
62
+ { ending: 'e', stem: 'er', confidence: 0.75, type: 'present' }, // je/il/elle
63
+ { ending: 'es', stem: 'er', confidence: 0.78, type: 'present' }, // tu
64
+ { ending: 'ons', stem: 'er', confidence: 0.85, type: 'present' }, // nous
65
+ { ending: 'ez', stem: 'er', confidence: 0.85, type: 'present' }, // vous
66
+ { ending: 'ent', stem: 'er', confidence: 0.82, type: 'present' }, // ils/elles
67
+ // Imperfect
68
+ { ending: 'ais', stem: 'er', confidence: 0.82, type: 'past' }, // je/tu
69
+ { ending: 'ait', stem: 'er', confidence: 0.82, type: 'past' }, // il/elle
70
+ { ending: 'ions', stem: 'er', confidence: 0.85, type: 'past' }, // nous
71
+ { ending: 'iez', stem: 'er', confidence: 0.85, type: 'past' }, // vous
72
+ { ending: 'aient', stem: 'er', confidence: 0.85, type: 'past' }, // ils/elles
73
+ // Simple past (passé simple)
74
+ { ending: 'ai', stem: 'er', confidence: 0.8, type: 'past' }, // je
75
+ { ending: 'as', stem: 'er', confidence: 0.78, type: 'past' }, // tu
76
+ { ending: 'a', stem: 'er', confidence: 0.75, type: 'past' }, // il/elle
77
+ { ending: 'âmes', stem: 'er', confidence: 0.88, type: 'past' }, // nous
78
+ { ending: 'âtes', stem: 'er', confidence: 0.88, type: 'past' }, // vous
79
+ { ending: 'èrent', stem: 'er', confidence: 0.88, type: 'past' }, // ils/elles
80
+ // Future
81
+ { ending: 'erai', stem: 'er', confidence: 0.85, type: 'future' }, // je
82
+ { ending: 'eras', stem: 'er', confidence: 0.85, type: 'future' }, // tu
83
+ { ending: 'era', stem: 'er', confidence: 0.82, type: 'future' }, // il/elle
84
+ { ending: 'erons', stem: 'er', confidence: 0.88, type: 'future' }, // nous
85
+ { ending: 'erez', stem: 'er', confidence: 0.88, type: 'future' }, // vous
86
+ { ending: 'eront', stem: 'er', confidence: 0.88, type: 'future' }, // ils/elles
87
+ // Conditional
88
+ { ending: 'erais', stem: 'er', confidence: 0.85, type: 'conditional' }, // je/tu
89
+ { ending: 'erait', stem: 'er', confidence: 0.85, type: 'conditional' }, // il/elle
90
+ { ending: 'erions', stem: 'er', confidence: 0.88, type: 'conditional' }, // nous
91
+ { ending: 'eriez', stem: 'er', confidence: 0.88, type: 'conditional' }, // vous
92
+ { ending: 'eraient', stem: 'er', confidence: 0.88, type: 'conditional' }, // ils/elles
93
+ // Subjunctive
94
+ { ending: 'ions', stem: 'er', confidence: 0.8, type: 'subjunctive' }, // nous
95
+ { ending: 'iez', stem: 'er', confidence: 0.8, type: 'subjunctive' }, // vous
96
+ // Imperative
97
+ { ending: 'ons', stem: 'er', confidence: 0.82, type: 'imperative' }, // nous
98
+ { ending: 'ez', stem: 'er', confidence: 0.82, type: 'imperative' }, // vous
99
+ // Infinitive
100
+ { ending: 'er', stem: 'er', confidence: 0.92, type: 'dictionary' },
101
+ ];
102
+
103
+ /**
104
+ * -IR verb conjugation endings (2nd group - verbs with -iss- forms).
105
+ * Examples: finir → finissons, choisir → choisissons
106
+ */
107
+ const IR_ENDINGS: readonly {
108
+ ending: string;
109
+ stem: string;
110
+ confidence: number;
111
+ type: ConjugationType;
112
+ }[] = [
113
+ // Present participle
114
+ { ending: 'issant', stem: 'ir', confidence: 0.88, type: 'gerund' },
115
+ // Past participle
116
+ { ending: 'i', stem: 'ir', confidence: 0.8, type: 'participle' },
117
+ { ending: 'ie', stem: 'ir', confidence: 0.82, type: 'participle' },
118
+ { ending: 'is', stem: 'ir', confidence: 0.78, type: 'participle' },
119
+ { ending: 'ies', stem: 'ir', confidence: 0.82, type: 'participle' },
120
+ // Present indicative with -iss-
121
+ { ending: 'is', stem: 'ir', confidence: 0.78, type: 'present' }, // je/tu
122
+ { ending: 'it', stem: 'ir', confidence: 0.78, type: 'present' }, // il/elle
123
+ { ending: 'issons', stem: 'ir', confidence: 0.88, type: 'present' }, // nous
124
+ { ending: 'issez', stem: 'ir', confidence: 0.88, type: 'present' }, // vous
125
+ { ending: 'issent', stem: 'ir', confidence: 0.88, type: 'present' }, // ils/elles
126
+ // Imperfect
127
+ { ending: 'issais', stem: 'ir', confidence: 0.85, type: 'past' }, // je/tu
128
+ { ending: 'issait', stem: 'ir', confidence: 0.85, type: 'past' }, // il/elle
129
+ { ending: 'issions', stem: 'ir', confidence: 0.88, type: 'past' }, // nous
130
+ { ending: 'issiez', stem: 'ir', confidence: 0.88, type: 'past' }, // vous
131
+ { ending: 'issaient', stem: 'ir', confidence: 0.88, type: 'past' }, // ils/elles
132
+ // Future
133
+ { ending: 'irai', stem: 'ir', confidence: 0.85, type: 'future' }, // je
134
+ { ending: 'iras', stem: 'ir', confidence: 0.85, type: 'future' }, // tu
135
+ { ending: 'ira', stem: 'ir', confidence: 0.82, type: 'future' }, // il/elle
136
+ { ending: 'irons', stem: 'ir', confidence: 0.88, type: 'future' }, // nous
137
+ { ending: 'irez', stem: 'ir', confidence: 0.88, type: 'future' }, // vous
138
+ { ending: 'iront', stem: 'ir', confidence: 0.88, type: 'future' }, // ils/elles
139
+ // Infinitive
140
+ { ending: 'ir', stem: 'ir', confidence: 0.9, type: 'dictionary' },
141
+ ];
142
+
143
+ /**
144
+ * -RE verb conjugation endings (3rd group).
145
+ * Examples: prendre, vendre, attendre
146
+ */
147
+ const RE_ENDINGS: readonly {
148
+ ending: string;
149
+ stem: string;
150
+ confidence: number;
151
+ type: ConjugationType;
152
+ }[] = [
153
+ // Present participle
154
+ { ending: 'ant', stem: 're', confidence: 0.82, type: 'gerund' },
155
+ // Past participle (common patterns)
156
+ { ending: 'u', stem: 're', confidence: 0.8, type: 'participle' },
157
+ { ending: 'ue', stem: 're', confidence: 0.82, type: 'participle' },
158
+ { ending: 'us', stem: 're', confidence: 0.82, type: 'participle' },
159
+ { ending: 'ues', stem: 're', confidence: 0.82, type: 'participle' },
160
+ // Present indicative
161
+ { ending: 's', stem: 're', confidence: 0.72, type: 'present' }, // je/tu
162
+ { ending: 'd', stem: 're', confidence: 0.75, type: 'present' }, // il/elle (prend, vend)
163
+ { ending: 'ons', stem: 're', confidence: 0.82, type: 'present' }, // nous
164
+ { ending: 'ez', stem: 're', confidence: 0.82, type: 'present' }, // vous
165
+ { ending: 'ent', stem: 're', confidence: 0.8, type: 'present' }, // ils/elles
166
+ // Infinitive
167
+ { ending: 're', stem: 're', confidence: 0.9, type: 'dictionary' },
168
+ ];
169
+
170
+ /**
171
+ * All endings combined, sorted by length (longest first).
172
+ */
173
+ const ALL_ENDINGS = [...ER_ENDINGS, ...IR_ENDINGS, ...RE_ENDINGS].sort(
174
+ (a, b) => b.ending.length - a.ending.length
175
+ );
176
+
177
+ /**
178
+ * French morphological normalizer.
179
+ */
180
+ export class FrenchMorphologicalNormalizer implements MorphologicalNormalizer {
181
+ readonly language = 'fr';
182
+
183
+ /**
184
+ * Check if a word might be a French verb that can be normalized.
185
+ */
186
+ isNormalizable(word: string): boolean {
187
+ if (word.length < 3) return false;
188
+ return looksLikeFrenchVerb(word);
189
+ }
190
+
191
+ /**
192
+ * Normalize a French word to its infinitive form.
193
+ */
194
+ normalize(word: string): NormalizationResult {
195
+ const lower = word.toLowerCase();
196
+
197
+ // Check if this is already an infinitive (no change needed)
198
+ if (lower.endsWith('er') || lower.endsWith('ir') || lower.endsWith('re')) {
199
+ // Simple infinitive, return as-is
200
+ if (lower.length >= 4) {
201
+ return noChange(word);
202
+ }
203
+ }
204
+
205
+ // Try reflexive verb normalization first (for imperative forms like "montrez-vous")
206
+ const reflexiveResult = this.tryReflexiveNormalization(lower);
207
+ if (reflexiveResult) return reflexiveResult;
208
+
209
+ // Try standard conjugation normalization
210
+ const conjugationResult = this.tryConjugationNormalization(lower);
211
+ if (conjugationResult) return conjugationResult;
212
+
213
+ // No normalization needed
214
+ return noChange(word);
215
+ }
216
+
217
+ /**
218
+ * Try to normalize a reflexive verb (imperative forms with attached pronouns).
219
+ * Examples: montrez-vous → montrer, lève-toi → lever
220
+ */
221
+ private tryReflexiveNormalization(word: string): NormalizationResult | null {
222
+ // Check for hyphenated reflexive forms (e.g., "montrez-vous")
223
+ for (const suffix of REFLEXIVE_SUFFIXES) {
224
+ const hyphenatedSuffix = '-' + suffix;
225
+ if (word.endsWith(hyphenatedSuffix)) {
226
+ const withoutReflexive = word.slice(0, -hyphenatedSuffix.length);
227
+
228
+ // Try to normalize the remaining part
229
+ const innerResult = this.tryConjugationNormalization(withoutReflexive);
230
+ if (innerResult && innerResult.stem !== withoutReflexive) {
231
+ return normalized(innerResult.stem, innerResult.confidence * 0.95, {
232
+ removedSuffixes: [hyphenatedSuffix, ...(innerResult.metadata?.removedSuffixes || [])],
233
+ conjugationType: 'reflexive',
234
+ });
235
+ }
236
+ }
237
+ }
238
+
239
+ return null;
240
+ }
241
+
242
+ /**
243
+ * Try to normalize a conjugated verb to its infinitive.
244
+ */
245
+ private tryConjugationNormalization(word: string): NormalizationResult | null {
246
+ for (const rule of ALL_ENDINGS) {
247
+ if (word.endsWith(rule.ending)) {
248
+ const stemBase = word.slice(0, -rule.ending.length);
249
+
250
+ // Must have a meaningful stem (at least 2 characters)
251
+ if (stemBase.length < 2) continue;
252
+
253
+ // Reconstruct infinitive
254
+ const infinitive = stemBase + rule.stem;
255
+
256
+ return normalized(infinitive, rule.confidence, {
257
+ removedSuffixes: [rule.ending],
258
+ conjugationType: rule.type,
259
+ });
260
+ }
261
+ }
262
+
263
+ return null;
264
+ }
265
+ }
266
+
267
+ // Export singleton instance
268
+ export const frenchMorphologicalNormalizer = new FrenchMorphologicalNormalizer();