@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,256 @@
1
+ /**
2
+ * German Morphological Normalizer
3
+ *
4
+ * Reduces German verb conjugations to their infinitive forms.
5
+ * German verbs have:
6
+ * - Weak verbs (regular): machen → machte (past)
7
+ * - Strong verbs (stem changes): fahren → fuhr (past)
8
+ * - Mixed verbs: kennen → kannte
9
+ * - Separable prefixes: an-, auf-, aus-, ein-, mit-, vor-, zu-
10
+ *
11
+ * Key features:
12
+ * - Handles common conjugation endings
13
+ * - Recognizes past participle ge- prefix
14
+ * - Handles separable prefix verbs
15
+ *
16
+ * Examples:
17
+ * zeigt → zeigen (3rd person present)
18
+ * gemacht → machen (past participle)
19
+ * anzeigen → anzeigen (separable prefix verb)
20
+ */
21
+
22
+ import type { MorphologicalNormalizer, NormalizationResult, ConjugationType } from './types';
23
+ import { noChange, normalized } from './types';
24
+
25
+ /**
26
+ * Common separable prefixes in German.
27
+ */
28
+ const SEPARABLE_PREFIXES = [
29
+ 'an',
30
+ 'auf',
31
+ 'aus',
32
+ 'ein',
33
+ 'mit',
34
+ 'vor',
35
+ 'zu',
36
+ 'ab',
37
+ 'bei',
38
+ 'nach',
39
+ 'weg',
40
+ 'um',
41
+ 'her',
42
+ 'hin',
43
+ ];
44
+
45
+ /**
46
+ * Check if a word looks like a German verb.
47
+ */
48
+ function looksLikeGermanVerb(word: string): boolean {
49
+ const lower = word.toLowerCase();
50
+ // Check for infinitive ending
51
+ if (lower.endsWith('en') || lower.endsWith('eln') || lower.endsWith('ern')) return true;
52
+ // Check for past participle prefix ge-
53
+ if (lower.startsWith('ge') && lower.endsWith('t')) return true;
54
+ if (lower.startsWith('ge') && lower.endsWith('en')) return true;
55
+ // Check for German-specific characters
56
+ if (/[äöüß]/i.test(word)) return true;
57
+ return false;
58
+ }
59
+
60
+ /**
61
+ * Verb conjugation endings.
62
+ * German infinitives end in -en (or -eln/-ern for some verbs).
63
+ */
64
+ const VERB_ENDINGS: readonly {
65
+ ending: string;
66
+ stem: string;
67
+ confidence: number;
68
+ type: ConjugationType;
69
+ }[] = [
70
+ // Present participle
71
+ { ending: 'end', stem: 'en', confidence: 0.88, type: 'gerund' },
72
+
73
+ // Present indicative (regular weak verbs)
74
+ { ending: 'e', stem: 'en', confidence: 0.75, type: 'present' }, // ich
75
+ { ending: 'st', stem: 'en', confidence: 0.8, type: 'present' }, // du
76
+ { ending: 't', stem: 'en', confidence: 0.78, type: 'present' }, // er/sie/es, ihr
77
+ { ending: 'en', stem: 'en', confidence: 0.85, type: 'dictionary' }, // wir/sie/Sie, infinitive
78
+
79
+ // Past tense (weak verbs: -te, -test, -te, -ten, -tet, -ten)
80
+ { ending: 'test', stem: 'en', confidence: 0.85, type: 'past' }, // du
81
+ { ending: 'ten', stem: 'en', confidence: 0.82, type: 'past' }, // wir/sie/Sie
82
+ { ending: 'tet', stem: 'en', confidence: 0.85, type: 'past' }, // ihr
83
+ { ending: 'te', stem: 'en', confidence: 0.82, type: 'past' }, // ich/er/sie/es
84
+
85
+ // Subjunctive II (weak verbs)
86
+ { ending: 'test', stem: 'en', confidence: 0.8, type: 'subjunctive' },
87
+ { ending: 'ten', stem: 'en', confidence: 0.78, type: 'subjunctive' },
88
+ { ending: 'tet', stem: 'en', confidence: 0.8, type: 'subjunctive' },
89
+ { ending: 'te', stem: 'en', confidence: 0.78, type: 'subjunctive' },
90
+
91
+ // Imperative
92
+ { ending: 'e', stem: 'en', confidence: 0.72, type: 'imperative' }, // du (informal singular)
93
+ { ending: 't', stem: 'en', confidence: 0.72, type: 'imperative' }, // ihr (informal plural)
94
+ { ending: 'en', stem: 'en', confidence: 0.75, type: 'imperative' }, // Sie (formal)
95
+ ];
96
+
97
+ /**
98
+ * -eln and -ern verb endings (sammeln, wandern).
99
+ */
100
+ const ELN_ERN_ENDINGS: readonly {
101
+ ending: string;
102
+ stem: string;
103
+ confidence: number;
104
+ type: ConjugationType;
105
+ }[] = [
106
+ // Present
107
+ { ending: 'le', stem: 'eln', confidence: 0.82, type: 'present' }, // ich sammle
108
+ { ending: 'elst', stem: 'eln', confidence: 0.85, type: 'present' }, // du sammelst
109
+ { ending: 'elt', stem: 'eln', confidence: 0.85, type: 'present' }, // er/sie/es sammelt
110
+ { ending: 'eln', stem: 'eln', confidence: 0.88, type: 'dictionary' }, // infinitive
111
+
112
+ { ending: 're', stem: 'ern', confidence: 0.82, type: 'present' }, // ich wandre
113
+ { ending: 'erst', stem: 'ern', confidence: 0.85, type: 'present' }, // du wanderst
114
+ { ending: 'ert', stem: 'ern', confidence: 0.85, type: 'present' }, // er/sie/es wandert
115
+ { ending: 'ern', stem: 'ern', confidence: 0.88, type: 'dictionary' }, // infinitive
116
+ ];
117
+
118
+ /**
119
+ * All endings combined, sorted by length (longest first).
120
+ */
121
+ const ALL_ENDINGS = [...VERB_ENDINGS, ...ELN_ERN_ENDINGS].sort(
122
+ (a, b) => b.ending.length - a.ending.length
123
+ );
124
+
125
+ /**
126
+ * German morphological normalizer.
127
+ */
128
+ export class GermanMorphologicalNormalizer implements MorphologicalNormalizer {
129
+ readonly language = 'de';
130
+
131
+ /**
132
+ * Check if a word might be a German verb that can be normalized.
133
+ */
134
+ isNormalizable(word: string): boolean {
135
+ if (word.length < 3) return false;
136
+ return looksLikeGermanVerb(word);
137
+ }
138
+
139
+ /**
140
+ * Normalize a German word to its infinitive form.
141
+ */
142
+ normalize(word: string): NormalizationResult {
143
+ const lower = word.toLowerCase();
144
+
145
+ // Check if this is already an infinitive (no change needed)
146
+ if (lower.endsWith('en') && lower.length >= 4) {
147
+ return noChange(word);
148
+ }
149
+ if ((lower.endsWith('eln') || lower.endsWith('ern')) && lower.length >= 5) {
150
+ return noChange(word);
151
+ }
152
+
153
+ // Try past participle normalization (ge-...-t or ge-...-en)
154
+ const participleResult = this.tryParticipleNormalization(lower);
155
+ if (participleResult) return participleResult;
156
+
157
+ // Try standard conjugation normalization
158
+ const conjugationResult = this.tryConjugationNormalization(lower);
159
+ if (conjugationResult) return conjugationResult;
160
+
161
+ // No normalization needed
162
+ return noChange(word);
163
+ }
164
+
165
+ /**
166
+ * Try to normalize a past participle.
167
+ * German past participles often have ge- prefix and -t or -en suffix.
168
+ *
169
+ * Examples:
170
+ * gemacht → machen (weak verb)
171
+ * gegangen → gehen (strong verb)
172
+ * angemacht → anmachen (separable prefix)
173
+ */
174
+ private tryParticipleNormalization(word: string): NormalizationResult | null {
175
+ // Check for separable prefix verbs first (e.g., "angemacht" → "anmachen")
176
+ for (const prefix of SEPARABLE_PREFIXES) {
177
+ if (word.startsWith(prefix + 'ge')) {
178
+ const afterPrefix = word.slice(prefix.length);
179
+ const innerResult = this.trySimpleParticipleNormalization(afterPrefix);
180
+ if (innerResult) {
181
+ const metadata: {
182
+ removedPrefixes: string[];
183
+ removedSuffixes?: readonly string[];
184
+ conjugationType: 'participle';
185
+ } = {
186
+ removedPrefixes: ['ge'],
187
+ conjugationType: 'participle',
188
+ };
189
+ if (innerResult.metadata?.removedSuffixes) {
190
+ metadata.removedSuffixes = innerResult.metadata.removedSuffixes;
191
+ }
192
+ return normalized(prefix + innerResult.stem, innerResult.confidence * 0.95, metadata);
193
+ }
194
+ }
195
+ }
196
+
197
+ // Try simple ge- prefix participle
198
+ return this.trySimpleParticipleNormalization(word);
199
+ }
200
+
201
+ /**
202
+ * Try to normalize a simple ge-...-t or ge-...-en participle.
203
+ */
204
+ private trySimpleParticipleNormalization(word: string): NormalizationResult | null {
205
+ if (!word.startsWith('ge')) return null;
206
+
207
+ const withoutGe = word.slice(2);
208
+
209
+ // Weak verb participle: ge-...-t → ...-en
210
+ if (withoutGe.endsWith('t') && withoutGe.length >= 3) {
211
+ const stem = withoutGe.slice(0, -1);
212
+ return normalized(stem + 'en', 0.85, {
213
+ removedPrefixes: ['ge'],
214
+ removedSuffixes: ['t'],
215
+ conjugationType: 'participle',
216
+ });
217
+ }
218
+
219
+ // Strong verb participle: ge-...-en → ...-en (same ending)
220
+ if (withoutGe.endsWith('en') && withoutGe.length >= 4) {
221
+ return normalized(withoutGe, 0.82, {
222
+ removedPrefixes: ['ge'],
223
+ conjugationType: 'participle',
224
+ });
225
+ }
226
+
227
+ return null;
228
+ }
229
+
230
+ /**
231
+ * Try to normalize a conjugated verb to its infinitive.
232
+ */
233
+ private tryConjugationNormalization(word: string): NormalizationResult | null {
234
+ for (const rule of ALL_ENDINGS) {
235
+ if (word.endsWith(rule.ending)) {
236
+ const stemBase = word.slice(0, -rule.ending.length);
237
+
238
+ // Must have a meaningful stem (at least 2 characters)
239
+ if (stemBase.length < 2) continue;
240
+
241
+ // Reconstruct infinitive
242
+ const infinitive = stemBase + rule.stem;
243
+
244
+ return normalized(infinitive, rule.confidence, {
245
+ removedSuffixes: [rule.ending],
246
+ conjugationType: rule.type,
247
+ });
248
+ }
249
+ }
250
+
251
+ return null;
252
+ }
253
+ }
254
+
255
+ // Export singleton instance
256
+ export const germanMorphologicalNormalizer = new GermanMorphologicalNormalizer();
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Morphological Normalizers
3
+ *
4
+ * Re-exports all morphological normalizer types and implementations.
5
+ */
6
+
7
+ // Types
8
+ export type {
9
+ NormalizationResult,
10
+ NormalizationMetadata,
11
+ ConjugationType,
12
+ MorphologicalNormalizer,
13
+ SuffixRule,
14
+ PrefixRule,
15
+ } from './types';
16
+
17
+ export { noChange, normalized } from './types';
18
+
19
+ // Language-specific normalizers
20
+ export {
21
+ JapaneseMorphologicalNormalizer,
22
+ japaneseMorphologicalNormalizer,
23
+ } from './japanese-normalizer';
24
+
25
+ export { KoreanMorphologicalNormalizer, koreanMorphologicalNormalizer } from './korean-normalizer';
26
+
27
+ export {
28
+ SpanishMorphologicalNormalizer,
29
+ spanishMorphologicalNormalizer,
30
+ } from './spanish-normalizer';
31
+
32
+ export { ArabicMorphologicalNormalizer, arabicMorphologicalNormalizer } from './arabic-normalizer';
33
+
34
+ export {
35
+ TurkishMorphologicalNormalizer,
36
+ turkishMorphologicalNormalizer,
37
+ } from './turkish-normalizer';
38
+
39
+ export {
40
+ PortugueseMorphologicalNormalizer,
41
+ portugueseMorphologicalNormalizer,
42
+ } from './portuguese-normalizer';
43
+
44
+ export { FrenchMorphologicalNormalizer, frenchMorphologicalNormalizer } from './french-normalizer';
45
+
46
+ export { GermanMorphologicalNormalizer, germanMorphologicalNormalizer } from './german-normalizer';
@@ -0,0 +1,329 @@
1
+ /**
2
+ * Italian Morphological Normalizer
3
+ *
4
+ * Reduces Italian verb conjugations to their infinitive forms.
5
+ * Italian has three verb conjugation classes (-are, -ere, -ire) and
6
+ * supports reflexive verbs (verbs with -si suffix).
7
+ *
8
+ * Key features:
9
+ * - Reflexive verb handling: mostrarsi → mostrare, nascondersi → nascondere
10
+ * - Regular conjugation patterns for -are, -ere, -ire verbs
11
+ * - Handles common irregular verbs
12
+ *
13
+ * Examples:
14
+ * mostrarsi → mostrare (reflexive infinitive)
15
+ * alternando → alternare (gerund)
16
+ * nascosto → nascondere (past participle)
17
+ * mostra → mostrare (3rd person present)
18
+ */
19
+
20
+ import type { MorphologicalNormalizer, NormalizationResult, ConjugationType } from './types';
21
+ import { noChange, normalized } from './types';
22
+
23
+ /**
24
+ * Check if a character is an Italian-specific letter (accented characters).
25
+ */
26
+ function isItalianSpecificLetter(char: string): boolean {
27
+ return /[àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ]/.test(char);
28
+ }
29
+
30
+ /**
31
+ * Check if a word looks like an Italian verb.
32
+ * Italian verbs end in -are, -ere, or -ire, or have Italian-specific characters.
33
+ */
34
+ function looksLikeItalianVerb(word: string): boolean {
35
+ const lower = word.toLowerCase();
36
+ // Check for infinitive endings
37
+ if (lower.endsWith('are') || lower.endsWith('ere') || lower.endsWith('ire')) return true;
38
+ // Check for common conjugation endings
39
+ if (lower.endsWith('ando') || lower.endsWith('endo')) return true;
40
+ if (lower.endsWith('ato') || lower.endsWith('uto') || lower.endsWith('ito')) return true;
41
+ // Check for reflexive -si ending
42
+ if (lower.endsWith('arsi') || lower.endsWith('ersi') || lower.endsWith('irsi')) return true;
43
+ // Check for Italian-specific characters
44
+ for (const char of word) {
45
+ if (isItalianSpecificLetter(char)) return true;
46
+ }
47
+ return false;
48
+ }
49
+
50
+ /**
51
+ * Reflexive pronoun patterns that can be attached to verbs.
52
+ */
53
+ const REFLEXIVE_SUFFIXES = ['si', 'mi', 'ti', 'ci', 'vi'];
54
+
55
+ /**
56
+ * -ARE verb conjugation endings mapped to infinitive reconstruction.
57
+ */
58
+ const ARE_ENDINGS: readonly {
59
+ ending: string;
60
+ stem: string;
61
+ confidence: number;
62
+ type: ConjugationType;
63
+ }[] = [
64
+ // Gerund (-ando)
65
+ { ending: 'ando', stem: 'are', confidence: 0.88, type: 'gerund' },
66
+ // Past participle (-ato)
67
+ { ending: 'ato', stem: 'are', confidence: 0.88, type: 'participle' },
68
+ { ending: 'ata', stem: 'are', confidence: 0.88, type: 'participle' },
69
+ { ending: 'ati', stem: 'are', confidence: 0.88, type: 'participle' },
70
+ { ending: 'ate', stem: 'are', confidence: 0.88, type: 'participle' },
71
+ // Present indicative
72
+ { ending: 'o', stem: 'are', confidence: 0.75, type: 'present' }, // io
73
+ { ending: 'i', stem: 'are', confidence: 0.72, type: 'present' }, // tu
74
+ { ending: 'a', stem: 'are', confidence: 0.75, type: 'present' }, // lui/lei
75
+ { ending: 'iamo', stem: 'are', confidence: 0.85, type: 'present' }, // noi
76
+ { ending: 'ate', stem: 'are', confidence: 0.85, type: 'present' }, // voi
77
+ { ending: 'ano', stem: 'are', confidence: 0.85, type: 'present' }, // loro
78
+ // Imperfect
79
+ { ending: 'avo', stem: 'are', confidence: 0.88, type: 'past' }, // io
80
+ { ending: 'avi', stem: 'are', confidence: 0.88, type: 'past' }, // tu
81
+ { ending: 'ava', stem: 'are', confidence: 0.88, type: 'past' }, // lui/lei
82
+ { ending: 'avamo', stem: 'are', confidence: 0.88, type: 'past' }, // noi
83
+ { ending: 'avate', stem: 'are', confidence: 0.88, type: 'past' }, // voi
84
+ { ending: 'avano', stem: 'are', confidence: 0.88, type: 'past' }, // loro
85
+ // Preterite (passato remoto)
86
+ { ending: 'ai', stem: 'are', confidence: 0.85, type: 'past' }, // io
87
+ { ending: 'asti', stem: 'are', confidence: 0.88, type: 'past' }, // tu
88
+ { ending: 'ò', stem: 'are', confidence: 0.85, type: 'past' }, // lui/lei
89
+ { ending: 'ammo', stem: 'are', confidence: 0.88, type: 'past' }, // noi
90
+ { ending: 'aste', stem: 'are', confidence: 0.88, type: 'past' }, // voi
91
+ { ending: 'arono', stem: 'are', confidence: 0.88, type: 'past' }, // loro
92
+ // Subjunctive present
93
+ { ending: 'i', stem: 'are', confidence: 0.72, type: 'subjunctive' }, // io/tu/lui (ambiguous)
94
+ { ending: 'ino', stem: 'are', confidence: 0.82, type: 'subjunctive' }, // loro
95
+ // Imperative
96
+ { ending: 'a', stem: 'are', confidence: 0.75, type: 'imperative' }, // tu
97
+ // Infinitive
98
+ { ending: 'are', stem: 'are', confidence: 0.92, type: 'dictionary' },
99
+ ];
100
+
101
+ /**
102
+ * -ERE verb conjugation endings.
103
+ */
104
+ const ERE_ENDINGS: readonly {
105
+ ending: string;
106
+ stem: string;
107
+ confidence: number;
108
+ type: ConjugationType;
109
+ }[] = [
110
+ // Gerund (-endo)
111
+ { ending: 'endo', stem: 'ere', confidence: 0.88, type: 'gerund' },
112
+ // Past participle (-uto)
113
+ { ending: 'uto', stem: 'ere', confidence: 0.85, type: 'participle' },
114
+ { ending: 'uta', stem: 'ere', confidence: 0.85, type: 'participle' },
115
+ { ending: 'uti', stem: 'ere', confidence: 0.85, type: 'participle' },
116
+ { ending: 'ute', stem: 'ere', confidence: 0.85, type: 'participle' },
117
+ // Present indicative
118
+ { ending: 'o', stem: 'ere', confidence: 0.72, type: 'present' }, // io
119
+ { ending: 'i', stem: 'ere', confidence: 0.72, type: 'present' }, // tu
120
+ { ending: 'e', stem: 'ere', confidence: 0.72, type: 'present' }, // lui/lei
121
+ { ending: 'iamo', stem: 'ere', confidence: 0.85, type: 'present' }, // noi
122
+ { ending: 'ete', stem: 'ere', confidence: 0.85, type: 'present' }, // voi
123
+ { ending: 'ono', stem: 'ere', confidence: 0.82, type: 'present' }, // loro
124
+ // Imperfect
125
+ { ending: 'evo', stem: 'ere', confidence: 0.88, type: 'past' }, // io
126
+ { ending: 'evi', stem: 'ere', confidence: 0.88, type: 'past' }, // tu
127
+ { ending: 'eva', stem: 'ere', confidence: 0.88, type: 'past' }, // lui/lei
128
+ { ending: 'evamo', stem: 'ere', confidence: 0.88, type: 'past' }, // noi
129
+ { ending: 'evate', stem: 'ere', confidence: 0.88, type: 'past' }, // voi
130
+ { ending: 'evano', stem: 'ere', confidence: 0.88, type: 'past' }, // loro
131
+ // Preterite
132
+ { ending: 'ei', stem: 'ere', confidence: 0.85, type: 'past' }, // io
133
+ { ending: 'etti', stem: 'ere', confidence: 0.85, type: 'past' }, // io (variant)
134
+ { ending: 'esti', stem: 'ere', confidence: 0.88, type: 'past' }, // tu
135
+ { ending: 'é', stem: 'ere', confidence: 0.85, type: 'past' }, // lui/lei
136
+ { ending: 'ette', stem: 'ere', confidence: 0.85, type: 'past' }, // lui/lei (variant)
137
+ { ending: 'emmo', stem: 'ere', confidence: 0.88, type: 'past' }, // noi
138
+ { ending: 'este', stem: 'ere', confidence: 0.88, type: 'past' }, // voi
139
+ { ending: 'erono', stem: 'ere', confidence: 0.88, type: 'past' }, // loro
140
+ { ending: 'ettero', stem: 'ere', confidence: 0.88, type: 'past' }, // loro (variant)
141
+ // Infinitive
142
+ { ending: 'ere', stem: 'ere', confidence: 0.92, type: 'dictionary' },
143
+ ];
144
+
145
+ /**
146
+ * -IRE verb conjugation endings.
147
+ */
148
+ const IRE_ENDINGS: readonly {
149
+ ending: string;
150
+ stem: string;
151
+ confidence: number;
152
+ type: ConjugationType;
153
+ }[] = [
154
+ // Gerund (-endo)
155
+ { ending: 'endo', stem: 'ire', confidence: 0.85, type: 'gerund' },
156
+ // Past participle (-ito)
157
+ { ending: 'ito', stem: 'ire', confidence: 0.85, type: 'participle' },
158
+ { ending: 'ita', stem: 'ire', confidence: 0.85, type: 'participle' },
159
+ { ending: 'iti', stem: 'ire', confidence: 0.85, type: 'participle' },
160
+ { ending: 'ite', stem: 'ire', confidence: 0.85, type: 'participle' },
161
+ // Present indicative (standard)
162
+ { ending: 'o', stem: 'ire', confidence: 0.7, type: 'present' }, // io
163
+ { ending: 'i', stem: 'ire', confidence: 0.7, type: 'present' }, // tu
164
+ { ending: 'e', stem: 'ire', confidence: 0.7, type: 'present' }, // lui/lei
165
+ { ending: 'iamo', stem: 'ire', confidence: 0.85, type: 'present' }, // noi
166
+ { ending: 'ite', stem: 'ire', confidence: 0.85, type: 'present' }, // voi
167
+ { ending: 'ono', stem: 'ire', confidence: 0.78, type: 'present' }, // loro
168
+ // Present indicative (-isco verbs)
169
+ { ending: 'isco', stem: 'ire', confidence: 0.85, type: 'present' }, // io
170
+ { ending: 'isci', stem: 'ire', confidence: 0.85, type: 'present' }, // tu
171
+ { ending: 'isce', stem: 'ire', confidence: 0.85, type: 'present' }, // lui/lei
172
+ { ending: 'iscono', stem: 'ire', confidence: 0.88, type: 'present' }, // loro
173
+ // Imperfect
174
+ { ending: 'ivo', stem: 'ire', confidence: 0.88, type: 'past' }, // io
175
+ { ending: 'ivi', stem: 'ire', confidence: 0.88, type: 'past' }, // tu
176
+ { ending: 'iva', stem: 'ire', confidence: 0.88, type: 'past' }, // lui/lei
177
+ { ending: 'ivamo', stem: 'ire', confidence: 0.88, type: 'past' }, // noi
178
+ { ending: 'ivate', stem: 'ire', confidence: 0.88, type: 'past' }, // voi
179
+ { ending: 'ivano', stem: 'ire', confidence: 0.88, type: 'past' }, // loro
180
+ // Preterite
181
+ { ending: 'ii', stem: 'ire', confidence: 0.85, type: 'past' }, // io
182
+ { ending: 'isti', stem: 'ire', confidence: 0.88, type: 'past' }, // tu
183
+ { ending: 'ì', stem: 'ire', confidence: 0.85, type: 'past' }, // lui/lei
184
+ { ending: 'immo', stem: 'ire', confidence: 0.88, type: 'past' }, // noi
185
+ { ending: 'iste', stem: 'ire', confidence: 0.88, type: 'past' }, // voi
186
+ { ending: 'irono', stem: 'ire', confidence: 0.88, type: 'past' }, // loro
187
+ // Infinitive
188
+ { ending: 'ire', stem: 'ire', confidence: 0.92, type: 'dictionary' },
189
+ ];
190
+
191
+ /**
192
+ * All endings combined, sorted by length (longest first).
193
+ */
194
+ const ALL_ENDINGS = [...ARE_ENDINGS, ...ERE_ENDINGS, ...IRE_ENDINGS].sort(
195
+ (a, b) => b.ending.length - a.ending.length
196
+ );
197
+
198
+ /**
199
+ * Italian morphological normalizer.
200
+ */
201
+ export class ItalianMorphologicalNormalizer implements MorphologicalNormalizer {
202
+ readonly language = 'it';
203
+
204
+ /**
205
+ * Check if a word might be an Italian verb that can be normalized.
206
+ */
207
+ isNormalizable(word: string): boolean {
208
+ if (word.length < 3) return false;
209
+ return looksLikeItalianVerb(word);
210
+ }
211
+
212
+ /**
213
+ * Normalize an Italian word to its infinitive form.
214
+ */
215
+ normalize(word: string): NormalizationResult {
216
+ const lower = word.toLowerCase();
217
+
218
+ // Check if this is already an infinitive (no change needed)
219
+ if (lower.endsWith('are') || lower.endsWith('ere') || lower.endsWith('ire')) {
220
+ // If it's a simple infinitive, return as-is with 1.0 confidence
221
+ // (unless it's a reflexive like "mostrarsi")
222
+ if (
223
+ !REFLEXIVE_SUFFIXES.some(
224
+ s => lower.endsWith(s + 'are') || lower.endsWith(s + 'ere') || lower.endsWith(s + 'ire')
225
+ )
226
+ ) {
227
+ return noChange(word);
228
+ }
229
+ }
230
+
231
+ // Try reflexive verb normalization first (highest priority)
232
+ const reflexiveResult = this.tryReflexiveNormalization(lower);
233
+ if (reflexiveResult) return reflexiveResult;
234
+
235
+ // Try standard conjugation normalization
236
+ const conjugationResult = this.tryConjugationNormalization(lower);
237
+ if (conjugationResult) return conjugationResult;
238
+
239
+ // No normalization needed
240
+ return noChange(word);
241
+ }
242
+
243
+ /**
244
+ * Try to normalize a reflexive verb.
245
+ * Reflexive verbs end with -si, -mi, -ti, -ci, -vi attached to infinitive.
246
+ *
247
+ * In Italian, reflexive infinitives drop the final -e before attaching the pronoun:
248
+ * mostrare + si → mostrarsi (not mostraresi)
249
+ * nascondere + si → nascondersi
250
+ *
251
+ * Examples:
252
+ * mostrarsi → mostrare
253
+ * nascondersi → nascondere
254
+ */
255
+ private tryReflexiveNormalization(word: string): NormalizationResult | null {
256
+ for (const suffix of REFLEXIVE_SUFFIXES) {
257
+ if (word.endsWith(suffix)) {
258
+ const withoutReflexive = word.slice(0, -suffix.length);
259
+
260
+ // In Italian, reflexive infinitives are formed by dropping the final -e
261
+ // So mostrarsi = mostrar + si, where mostrar comes from mostrare
262
+ // Check if adding 'e' gives us a valid infinitive
263
+ if (
264
+ withoutReflexive.endsWith('ar') ||
265
+ withoutReflexive.endsWith('er') ||
266
+ withoutReflexive.endsWith('ir')
267
+ ) {
268
+ // Reconstruct the infinitive by adding 'e'
269
+ const infinitive = withoutReflexive + 'e';
270
+ return normalized(infinitive, 0.88, {
271
+ removedSuffixes: [suffix],
272
+ conjugationType: 'reflexive',
273
+ });
274
+ }
275
+
276
+ // Check if this already looks like an infinitive (less common case)
277
+ if (
278
+ withoutReflexive.endsWith('are') ||
279
+ withoutReflexive.endsWith('ere') ||
280
+ withoutReflexive.endsWith('ire')
281
+ ) {
282
+ return normalized(withoutReflexive, 0.88, {
283
+ removedSuffixes: [suffix],
284
+ conjugationType: 'reflexive',
285
+ });
286
+ }
287
+
288
+ // Try to normalize the remaining part as a conjugated verb
289
+ const innerResult = this.tryConjugationNormalization(withoutReflexive);
290
+ if (innerResult && innerResult.stem !== withoutReflexive) {
291
+ // It's a reflexive conjugated form
292
+ return normalized(innerResult.stem, innerResult.confidence * 0.95, {
293
+ removedSuffixes: [suffix, ...(innerResult.metadata?.removedSuffixes || [])],
294
+ conjugationType: 'reflexive',
295
+ });
296
+ }
297
+ }
298
+ }
299
+
300
+ return null;
301
+ }
302
+
303
+ /**
304
+ * Try to normalize a conjugated verb to its infinitive.
305
+ */
306
+ private tryConjugationNormalization(word: string): NormalizationResult | null {
307
+ for (const rule of ALL_ENDINGS) {
308
+ if (word.endsWith(rule.ending)) {
309
+ const stemBase = word.slice(0, -rule.ending.length);
310
+
311
+ // Must have a meaningful stem (at least 2 characters)
312
+ if (stemBase.length < 2) continue;
313
+
314
+ // Reconstruct infinitive
315
+ const infinitive = stemBase + rule.stem;
316
+
317
+ return normalized(infinitive, rule.confidence, {
318
+ removedSuffixes: [rule.ending],
319
+ conjugationType: rule.type,
320
+ });
321
+ }
322
+ }
323
+
324
+ return null;
325
+ }
326
+ }
327
+
328
+ // Export singleton instance
329
+ export const italianMorphologicalNormalizer = new ItalianMorphologicalNormalizer();