@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,403 @@
1
+ /**
2
+ * Spanish Tokenizer
3
+ *
4
+ * Tokenizes Spanish hyperscript input.
5
+ * Spanish is relatively straightforward as it:
6
+ * - Uses space-separated words like English
7
+ * - Has similar preposition structure (SVO)
8
+ * - Uses accent marks that need proper handling
9
+ */
10
+
11
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
12
+ import {
13
+ BaseTokenizer,
14
+ TokenStreamImpl,
15
+ createToken,
16
+ createPosition,
17
+ createLatinCharClassifiers,
18
+ isWhitespace,
19
+ isSelectorStart,
20
+ isQuote,
21
+ isDigit,
22
+ isUrlStart,
23
+ type KeywordEntry,
24
+ type TimeUnitMapping,
25
+ } from './base';
26
+ import { SpanishMorphologicalNormalizer } from './morphology/spanish-normalizer';
27
+ import { spanishProfile } from '../generators/profiles/spanish';
28
+
29
+ // =============================================================================
30
+ // Spanish Character Classification
31
+ // =============================================================================
32
+
33
+ const { isLetter: isSpanishLetter, isIdentifierChar: isSpanishIdentifierChar } =
34
+ createLatinCharClassifiers(/[a-zA-ZáéíóúüñÁÉÍÓÚÜÑ]/);
35
+
36
+ // =============================================================================
37
+ // Spanish Time Units
38
+ // =============================================================================
39
+
40
+ /**
41
+ * Spanish time unit patterns for number parsing.
42
+ * Sorted by length (longest first) to ensure correct matching.
43
+ */
44
+ const SPANISH_TIME_UNITS: readonly TimeUnitMapping[] = [
45
+ { pattern: 'milisegundos', suffix: 'ms', length: 12, caseInsensitive: true },
46
+ { pattern: 'milisegundo', suffix: 'ms', length: 11, caseInsensitive: true },
47
+ { pattern: 'segundos', suffix: 's', length: 8, caseInsensitive: true },
48
+ { pattern: 'segundo', suffix: 's', length: 7, caseInsensitive: true },
49
+ { pattern: 'minutos', suffix: 'm', length: 7, caseInsensitive: true },
50
+ { pattern: 'minuto', suffix: 'm', length: 6, caseInsensitive: true },
51
+ { pattern: 'horas', suffix: 'h', length: 5, caseInsensitive: true },
52
+ { pattern: 'hora', suffix: 'h', length: 4, caseInsensitive: true },
53
+ ];
54
+
55
+ // =============================================================================
56
+ // Spanish Prepositions
57
+ // =============================================================================
58
+
59
+ /**
60
+ * Spanish prepositions that mark grammatical roles.
61
+ */
62
+ const PREPOSITIONS = new Set([
63
+ 'en', // in, on
64
+ 'a', // to
65
+ 'de', // of, from
66
+ 'desde', // from
67
+ 'hasta', // until, to
68
+ 'con', // with
69
+ 'sin', // without
70
+ 'por', // by, for
71
+ 'para', // for
72
+ 'sobre', // on, about
73
+ 'entre', // between
74
+ 'antes', // before
75
+ 'después', // after
76
+ 'despues', // after (no accent)
77
+ 'dentro', // inside
78
+ 'fuera', // outside
79
+ 'al', // a + el (contraction)
80
+ 'del', // de + el (contraction)
81
+ ]);
82
+
83
+ // =============================================================================
84
+ // Spanish Extras (keywords not in profile)
85
+ // =============================================================================
86
+
87
+ /**
88
+ * Extra keywords not covered by the profile:
89
+ * - Literals (true, false)
90
+ * - Positional words
91
+ * - Event names
92
+ * - Time units
93
+ * - Multi-word phrases
94
+ * - Additional synonyms
95
+ * - Accent variations
96
+ */
97
+ const SPANISH_EXTRAS: KeywordEntry[] = [
98
+ // Values/Literals
99
+ { native: 'verdadero', normalized: 'true' },
100
+ { native: 'falso', normalized: 'false' },
101
+ { native: 'nulo', normalized: 'null' },
102
+ { native: 'indefinido', normalized: 'undefined' },
103
+
104
+ // Positional
105
+ { native: 'primero', normalized: 'first' },
106
+ { native: 'primera', normalized: 'first' },
107
+ { native: 'último', normalized: 'last' },
108
+ { native: 'ultima', normalized: 'last' },
109
+ { native: 'siguiente', normalized: 'next' },
110
+ { native: 'anterior', normalized: 'previous' },
111
+ { native: 'cercano', normalized: 'closest' },
112
+ { native: 'padre', normalized: 'parent' },
113
+
114
+ // Events
115
+ { native: 'clic', normalized: 'click' },
116
+ { native: 'click', normalized: 'click' },
117
+ { native: 'hacer clic', normalized: 'click' },
118
+ { native: 'entrada', normalized: 'input' },
119
+ { native: 'cambio', normalized: 'change' },
120
+ { native: 'envío', normalized: 'submit' },
121
+ { native: 'envio', normalized: 'submit' },
122
+ { native: 'tecla abajo', normalized: 'keydown' },
123
+ { native: 'tecla arriba', normalized: 'keyup' },
124
+ { native: 'ratón encima', normalized: 'mouseover' },
125
+ { native: 'raton encima', normalized: 'mouseover' },
126
+ { native: 'ratón fuera', normalized: 'mouseout' },
127
+ { native: 'raton fuera', normalized: 'mouseout' },
128
+ { native: 'enfoque', normalized: 'focus' },
129
+ { native: 'desenfoque', normalized: 'blur' },
130
+ { native: 'carga', normalized: 'load' },
131
+ { native: 'desplazamiento', normalized: 'scroll' },
132
+
133
+ // References
134
+ { native: 'yo', normalized: 'me' },
135
+ { native: 'mí', normalized: 'me' },
136
+ { native: 'mi', normalized: 'me' },
137
+ { native: 'ello', normalized: 'it' },
138
+ { native: 'resultado', normalized: 'result' },
139
+ { native: 'objetivo', normalized: 'target' },
140
+ { native: 'destino', normalized: 'target' },
141
+
142
+ // Time units
143
+ { native: 'segundo', normalized: 's' },
144
+ { native: 'segundos', normalized: 's' },
145
+ { native: 'milisegundo', normalized: 'ms' },
146
+ { native: 'milisegundos', normalized: 'ms' },
147
+ { native: 'minuto', normalized: 'm' },
148
+ { native: 'minutos', normalized: 'm' },
149
+ { native: 'hora', normalized: 'h' },
150
+ { native: 'horas', normalized: 'h' },
151
+
152
+ // Multi-word phrases
153
+ { native: 'de lo contrario', normalized: 'else' },
154
+ { native: 'hasta que', normalized: 'until' },
155
+ { native: 'antes de', normalized: 'before' },
156
+ { native: 'después de', normalized: 'after' },
157
+ { native: 'despues de', normalized: 'after' },
158
+ { native: 'dentro de', normalized: 'into' },
159
+ { native: 'fuera de', normalized: 'out' },
160
+
161
+ // Accent variations not in profile
162
+ { native: 'asincrono', normalized: 'async' },
163
+ { native: 'despues', normalized: 'after' },
164
+
165
+ // Command overrides (ensure correct mapping when profile has multiple meanings)
166
+ { native: 'añadir', normalized: 'add' }, // Profile may have this as 'append'
167
+
168
+ // Synonyms not in profile
169
+ { native: 'toggle', normalized: 'toggle' },
170
+ { native: 'borrar', normalized: 'remove' },
171
+ { native: 'pon', normalized: 'put' },
172
+ { native: 'crear', normalized: 'make' },
173
+
174
+ // Logical/conditional
175
+ { native: 'y', normalized: 'and' },
176
+ { native: 'o', normalized: 'or' },
177
+ { native: 'no', normalized: 'not' },
178
+ { native: 'es', normalized: 'is' },
179
+ { native: 'existe', normalized: 'exists' },
180
+ { native: 'vacío', normalized: 'empty' },
181
+ { native: 'vacio', normalized: 'empty' },
182
+ ];
183
+
184
+ // =============================================================================
185
+ // Spanish Tokenizer Implementation
186
+ // =============================================================================
187
+
188
+ export class SpanishTokenizer extends BaseTokenizer {
189
+ readonly language = 'es';
190
+ readonly direction = 'ltr' as const;
191
+
192
+ constructor() {
193
+ super();
194
+ // Initialize keywords from profile + extras (single source of truth)
195
+ this.initializeKeywordsFromProfile(spanishProfile, SPANISH_EXTRAS);
196
+ // Set morphological normalizer for verb conjugations
197
+ this.normalizer = new SpanishMorphologicalNormalizer();
198
+ }
199
+
200
+ tokenize(input: string): TokenStream {
201
+ const tokens: LanguageToken[] = [];
202
+ let pos = 0;
203
+
204
+ while (pos < input.length) {
205
+ // Skip whitespace
206
+ if (isWhitespace(input[pos])) {
207
+ pos++;
208
+ continue;
209
+ }
210
+
211
+ // Try CSS selector first
212
+ if (isSelectorStart(input[pos])) {
213
+ // Check for event modifier first (.once, .debounce(), etc.)
214
+ const modifierToken = this.tryEventModifier(input, pos);
215
+ if (modifierToken) {
216
+ tokens.push(modifierToken);
217
+ pos = modifierToken.position.end;
218
+ continue;
219
+ }
220
+
221
+ const selectorToken = this.trySelector(input, pos);
222
+ if (selectorToken) {
223
+ tokens.push(selectorToken);
224
+ pos = selectorToken.position.end;
225
+ continue;
226
+ }
227
+ }
228
+
229
+ // Try string literal
230
+ if (isQuote(input[pos])) {
231
+ const stringToken = this.tryString(input, pos);
232
+ if (stringToken) {
233
+ tokens.push(stringToken);
234
+ pos = stringToken.position.end;
235
+ continue;
236
+ }
237
+ }
238
+
239
+ // Try URL (/path, ./path, http://, etc.)
240
+ if (isUrlStart(input, pos)) {
241
+ const urlToken = this.tryUrl(input, pos);
242
+ if (urlToken) {
243
+ tokens.push(urlToken);
244
+ pos = urlToken.position.end;
245
+ continue;
246
+ }
247
+ }
248
+
249
+ // Try number
250
+ if (
251
+ isDigit(input[pos]) ||
252
+ (input[pos] === '-' && pos + 1 < input.length && isDigit(input[pos + 1]))
253
+ ) {
254
+ const numberToken = this.extractSpanishNumber(input, pos);
255
+ if (numberToken) {
256
+ tokens.push(numberToken);
257
+ pos = numberToken.position.end;
258
+ continue;
259
+ }
260
+ }
261
+
262
+ // Try variable reference (:varname)
263
+ const varToken = this.tryVariableRef(input, pos);
264
+ if (varToken) {
265
+ tokens.push(varToken);
266
+ pos = varToken.position.end;
267
+ continue;
268
+ }
269
+
270
+ // Try multi-word phrases first (e.g., "de lo contrario", "hasta que")
271
+ const phraseToken = this.tryMultiWordPhrase(input, pos);
272
+ if (phraseToken) {
273
+ tokens.push(phraseToken);
274
+ pos = phraseToken.position.end;
275
+ continue;
276
+ }
277
+
278
+ // Try Spanish word
279
+ if (isSpanishLetter(input[pos])) {
280
+ const wordToken = this.extractSpanishWord(input, pos);
281
+ if (wordToken) {
282
+ tokens.push(wordToken);
283
+ pos = wordToken.position.end;
284
+ continue;
285
+ }
286
+ }
287
+
288
+ // Try operator
289
+ const operatorToken = this.tryOperator(input, pos);
290
+ if (operatorToken) {
291
+ tokens.push(operatorToken);
292
+ pos = operatorToken.position.end;
293
+ continue;
294
+ }
295
+
296
+ // Skip unknown character
297
+ pos++;
298
+ }
299
+
300
+ return new TokenStreamImpl(tokens, 'es');
301
+ }
302
+
303
+ classifyToken(token: string): TokenKind {
304
+ const lower = token.toLowerCase();
305
+
306
+ if (PREPOSITIONS.has(lower)) return 'particle';
307
+ // O(1) Map lookup instead of O(n) array search
308
+ if (this.isKeyword(lower)) return 'keyword';
309
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
310
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
311
+ if (/^\d/.test(token)) return 'literal';
312
+ if (['==', '!=', '<=', '>=', '<', '>', '&&', '||', '!'].includes(token)) return 'operator';
313
+
314
+ return 'identifier';
315
+ }
316
+
317
+ /**
318
+ * Try to match multi-word phrases that function as single units.
319
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
320
+ * so they'll be matched before their constituent words.
321
+ */
322
+ private tryMultiWordPhrase(input: string, pos: number): LanguageToken | null {
323
+ // Check against multi-word entries in profileKeywords (sorted longest-first)
324
+ for (const entry of this.profileKeywords) {
325
+ // Only check multi-word phrases (contain space)
326
+ if (!entry.native.includes(' ')) continue;
327
+
328
+ const phrase = entry.native;
329
+ const candidate = input.slice(pos, pos + phrase.length).toLowerCase();
330
+ if (candidate === phrase.toLowerCase()) {
331
+ // Check word boundary
332
+ const nextPos = pos + phrase.length;
333
+ if (
334
+ nextPos >= input.length ||
335
+ isWhitespace(input[nextPos]) ||
336
+ !isSpanishLetter(input[nextPos])
337
+ ) {
338
+ return createToken(
339
+ input.slice(pos, pos + phrase.length),
340
+ 'keyword',
341
+ createPosition(pos, nextPos),
342
+ entry.normalized
343
+ );
344
+ }
345
+ }
346
+ }
347
+
348
+ return null;
349
+ }
350
+
351
+ /**
352
+ * Extract a Spanish word.
353
+ *
354
+ * Uses morphological normalization to handle:
355
+ * - Reflexive verbs (mostrarse → mostrar)
356
+ * - Verb conjugations (alternando → alternar)
357
+ */
358
+ private extractSpanishWord(input: string, startPos: number): LanguageToken | null {
359
+ let pos = startPos;
360
+ let word = '';
361
+
362
+ while (pos < input.length && isSpanishIdentifierChar(input[pos])) {
363
+ word += input[pos++];
364
+ }
365
+
366
+ if (!word) return null;
367
+
368
+ const lower = word.toLowerCase();
369
+
370
+ // Check if it's a preposition first
371
+ if (PREPOSITIONS.has(lower)) {
372
+ return createToken(word, 'particle', createPosition(startPos, pos));
373
+ }
374
+
375
+ // O(1) Map lookup for exact keyword match
376
+ const keywordEntry = this.lookupKeyword(lower);
377
+ if (keywordEntry) {
378
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
379
+ }
380
+
381
+ // Try morphological normalization for conjugated/reflexive forms
382
+ const morphToken = this.tryMorphKeywordMatch(lower, startPos, pos);
383
+ if (morphToken) return morphToken;
384
+
385
+ // Not a keyword, return as identifier
386
+ return createToken(word, 'identifier', createPosition(startPos, pos));
387
+ }
388
+
389
+ /**
390
+ * Extract a number, including Spanish time unit suffixes.
391
+ */
392
+ private extractSpanishNumber(input: string, startPos: number): LanguageToken | null {
393
+ return this.tryNumberWithTimeUnits(input, startPos, SPANISH_TIME_UNITS, {
394
+ allowSign: true,
395
+ skipWhitespace: true,
396
+ });
397
+ }
398
+ }
399
+
400
+ /**
401
+ * Singleton instance.
402
+ */
403
+ export const spanishTokenizer = new SpanishTokenizer();
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Swahili Tokenizer
3
+ *
4
+ * Tokenizes Swahili (Kiswahili) hyperscript input.
5
+ * Swahili characteristics:
6
+ * - SVO word order
7
+ * - Agglutinative morphology
8
+ * - Noun class prefixes (m-, wa-, ki-, vi-, etc.)
9
+ * - Verb prefixes for subject/object agreement
10
+ * - No grammatical gender, but noun classes
11
+ */
12
+
13
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
14
+ import {
15
+ BaseTokenizer,
16
+ TokenStreamImpl,
17
+ createToken,
18
+ createPosition,
19
+ createLatinCharClassifiers,
20
+ isWhitespace,
21
+ isSelectorStart,
22
+ isQuote,
23
+ isDigit,
24
+ isUrlStart,
25
+ type KeywordEntry,
26
+ } from './base';
27
+ import { swahiliProfile } from '../generators/profiles/swahili';
28
+
29
+ // =============================================================================
30
+ // Swahili Character Classification
31
+ // =============================================================================
32
+
33
+ const { isLetter: isSwahiliLetter, isIdentifierChar: isSwahiliIdentifierChar } =
34
+ createLatinCharClassifiers(/[a-zA-Z]/);
35
+
36
+ // =============================================================================
37
+ // Swahili Prepositions
38
+ // =============================================================================
39
+
40
+ const PREPOSITIONS = new Set([
41
+ 'kwa', // to, for, with, by
42
+ 'na', // and, with
43
+ 'katika', // in, at
44
+ 'kwenye', // on, at
45
+ 'kutoka', // from
46
+ 'hadi', // until, to
47
+ 'mpaka', // until, up to
48
+ 'kabla', // before
49
+ 'baada', // after
50
+ 'wakati', // during, when
51
+ 'bila', // without
52
+ 'kuhusu', // about
53
+ 'karibu', // near
54
+ 'mbele', // in front of
55
+ 'nyuma', // behind
56
+ 'ndani', // inside
57
+ 'nje', // outside
58
+ 'juu', // above, on
59
+ 'chini', // below, under
60
+ 'kati', // between
61
+ ]);
62
+
63
+ // =============================================================================
64
+ // Swahili Extras (keywords not in profile)
65
+ // =============================================================================
66
+
67
+ /**
68
+ * Extra keywords not covered by the profile:
69
+ * - Literals (true, false, null, undefined)
70
+ * - Positional words
71
+ * - Event names
72
+ * - Time units
73
+ * - Noun class possessive variants
74
+ */
75
+ const SWAHILI_EXTRAS: KeywordEntry[] = [
76
+ // Values/Literals
77
+ { native: 'kweli', normalized: 'true' },
78
+ { native: 'uongo', normalized: 'false' },
79
+ { native: 'null', normalized: 'null' },
80
+ { native: 'tupu', normalized: 'null' },
81
+ { native: 'haijafafanuliwa', normalized: 'undefined' },
82
+
83
+ // Positional
84
+ { native: 'kwanza', normalized: 'first' },
85
+ { native: 'mwisho', normalized: 'last' },
86
+ { native: 'inayofuata', normalized: 'next' },
87
+ { native: 'iliyopita', normalized: 'previous' },
88
+ { native: 'karibu zaidi', normalized: 'closest' },
89
+ { native: 'mzazi', normalized: 'parent' },
90
+
91
+ // Events
92
+ { native: 'bonyeza', normalized: 'click' },
93
+ { native: 'click', normalized: 'click' },
94
+ { native: 'ingiza', normalized: 'input' },
95
+ { native: 'badiliko', normalized: 'change' },
96
+ { native: 'wasilisha', normalized: 'submit' },
97
+ { native: 'funguo chini', normalized: 'keydown' },
98
+ { native: 'funguo juu', normalized: 'keyup' },
99
+ { native: 'kipanya juu', normalized: 'mouseover' },
100
+ { native: 'kipanya nje', normalized: 'mouseout' },
101
+ { native: 'ukungu', normalized: 'blur' },
102
+ { native: 'sogeza', normalized: 'scroll' },
103
+
104
+ // Additional references
105
+ { native: 'yenyewe', normalized: 'it' },
106
+ { native: 'wangu', normalized: 'my' },
107
+ { native: 'langu', normalized: 'my' },
108
+ { native: 'changu', normalized: 'my' },
109
+
110
+ // Time units
111
+ { native: 'sekunde', normalized: 's' },
112
+ { native: 'milisekunde', normalized: 'ms' },
113
+ { native: 'dakika', normalized: 'm' },
114
+ { native: 'saa', normalized: 'h' },
115
+
116
+ // Additional synonyms and multi-word phrases
117
+ { native: 'ondoa lenga', normalized: 'blur' },
118
+ { native: 'piga simu', normalized: 'call' },
119
+ { native: 'basi', normalized: 'then' },
120
+ { native: 'mpaka', normalized: 'until' },
121
+ ];
122
+
123
+ // =============================================================================
124
+ // Swahili Tokenizer Implementation
125
+ // =============================================================================
126
+
127
+ export class SwahiliTokenizer extends BaseTokenizer {
128
+ readonly language = 'sw';
129
+ readonly direction = 'ltr' as const;
130
+
131
+ constructor() {
132
+ super();
133
+ this.initializeKeywordsFromProfile(swahiliProfile, SWAHILI_EXTRAS);
134
+ }
135
+
136
+ tokenize(input: string): TokenStream {
137
+ const tokens: LanguageToken[] = [];
138
+ let pos = 0;
139
+
140
+ while (pos < input.length) {
141
+ if (isWhitespace(input[pos])) {
142
+ pos++;
143
+ continue;
144
+ }
145
+
146
+ if (isSelectorStart(input[pos])) {
147
+ // Check for event modifier first (.once, .debounce(), etc.)
148
+ const modifierToken = this.tryEventModifier(input, pos);
149
+ if (modifierToken) {
150
+ tokens.push(modifierToken);
151
+ pos = modifierToken.position.end;
152
+ continue;
153
+ }
154
+
155
+ const selectorToken = this.trySelector(input, pos);
156
+ if (selectorToken) {
157
+ tokens.push(selectorToken);
158
+ pos = selectorToken.position.end;
159
+ continue;
160
+ }
161
+ }
162
+
163
+ if (isQuote(input[pos])) {
164
+ const stringToken = this.tryString(input, pos);
165
+ if (stringToken) {
166
+ tokens.push(stringToken);
167
+ pos = stringToken.position.end;
168
+ continue;
169
+ }
170
+ }
171
+
172
+ if (isUrlStart(input, pos)) {
173
+ const urlToken = this.tryUrl(input, pos);
174
+ if (urlToken) {
175
+ tokens.push(urlToken);
176
+ pos = urlToken.position.end;
177
+ continue;
178
+ }
179
+ }
180
+
181
+ if (
182
+ isDigit(input[pos]) ||
183
+ (input[pos] === '-' && pos + 1 < input.length && isDigit(input[pos + 1]))
184
+ ) {
185
+ const numberToken = this.extractNumber(input, pos);
186
+ if (numberToken) {
187
+ tokens.push(numberToken);
188
+ pos = numberToken.position.end;
189
+ continue;
190
+ }
191
+ }
192
+
193
+ const varToken = this.tryVariableRef(input, pos);
194
+ if (varToken) {
195
+ tokens.push(varToken);
196
+ pos = varToken.position.end;
197
+ continue;
198
+ }
199
+
200
+ if (isSwahiliLetter(input[pos])) {
201
+ const wordToken = this.extractWord(input, pos);
202
+ if (wordToken) {
203
+ tokens.push(wordToken);
204
+ pos = wordToken.position.end;
205
+ continue;
206
+ }
207
+ }
208
+
209
+ const operatorToken = this.tryOperator(input, pos);
210
+ if (operatorToken) {
211
+ tokens.push(operatorToken);
212
+ pos = operatorToken.position.end;
213
+ continue;
214
+ }
215
+
216
+ pos++;
217
+ }
218
+
219
+ return new TokenStreamImpl(tokens, 'sw');
220
+ }
221
+
222
+ classifyToken(token: string): TokenKind {
223
+ const lower = token.toLowerCase();
224
+ if (PREPOSITIONS.has(lower)) return 'particle';
225
+ // O(1) Map lookup instead of O(n) array search
226
+ if (this.isKeyword(lower)) return 'keyword';
227
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
228
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
229
+ if (/^\d/.test(token)) return 'literal';
230
+ return 'identifier';
231
+ }
232
+
233
+ private extractWord(input: string, startPos: number): LanguageToken | null {
234
+ let pos = startPos;
235
+ let word = '';
236
+
237
+ while (pos < input.length && isSwahiliIdentifierChar(input[pos])) {
238
+ word += input[pos++];
239
+ }
240
+
241
+ if (!word) return null;
242
+
243
+ const lower = word.toLowerCase();
244
+
245
+ // O(1) Map lookup instead of O(n) array search
246
+ const keywordEntry = this.lookupKeyword(lower);
247
+ if (keywordEntry) {
248
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
249
+ }
250
+
251
+ if (PREPOSITIONS.has(lower)) {
252
+ return createToken(word, 'particle', createPosition(startPos, pos));
253
+ }
254
+
255
+ return createToken(word, 'identifier', createPosition(startPos, pos));
256
+ }
257
+
258
+ private extractNumber(input: string, startPos: number): LanguageToken | null {
259
+ let pos = startPos;
260
+ let number = '';
261
+
262
+ if (input[pos] === '-' || input[pos] === '+') {
263
+ number += input[pos++];
264
+ }
265
+
266
+ while (pos < input.length && isDigit(input[pos])) {
267
+ number += input[pos++];
268
+ }
269
+
270
+ if (pos < input.length && input[pos] === '.') {
271
+ number += input[pos++];
272
+ while (pos < input.length && isDigit(input[pos])) {
273
+ number += input[pos++];
274
+ }
275
+ }
276
+
277
+ let unitPos = pos;
278
+ while (unitPos < input.length && isWhitespace(input[unitPos])) {
279
+ unitPos++;
280
+ }
281
+
282
+ const remaining = input.slice(unitPos).toLowerCase();
283
+ if (remaining.startsWith('milisekunde')) {
284
+ number += 'ms';
285
+ pos = unitPos + 11;
286
+ } else if (remaining.startsWith('sekunde')) {
287
+ number += 's';
288
+ pos = unitPos + 7;
289
+ } else if (remaining.startsWith('dakika')) {
290
+ number += 'm';
291
+ pos = unitPos + 6;
292
+ } else if (remaining.startsWith('saa')) {
293
+ number += 'h';
294
+ pos = unitPos + 3;
295
+ }
296
+
297
+ if (!number || number === '-' || number === '+') return null;
298
+
299
+ return createToken(number, 'literal', createPosition(startPos, pos));
300
+ }
301
+ }
302
+
303
+ export const swahiliTokenizer = new SwahiliTokenizer();