@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,326 @@
1
+ /**
2
+ * French Tokenizer
3
+ *
4
+ * Tokenizes French hyperscript input.
5
+ * French characteristics:
6
+ * - SVO word order
7
+ * - Space-separated words
8
+ * - Prepositions
9
+ * - Accent marks (é, è, ê, ë, à, â, ù, û, ô, î, ï, ç, œ, æ)
10
+ */
11
+
12
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
13
+ import {
14
+ BaseTokenizer,
15
+ TokenStreamImpl,
16
+ createToken,
17
+ createPosition,
18
+ createLatinCharClassifiers,
19
+ isWhitespace,
20
+ isSelectorStart,
21
+ isQuote,
22
+ isDigit,
23
+ isUrlStart,
24
+ type KeywordEntry,
25
+ type TimeUnitMapping,
26
+ } from './base';
27
+ import { frenchProfile } from '../generators/profiles/french';
28
+
29
+ // =============================================================================
30
+ // French Character Classification
31
+ // =============================================================================
32
+
33
+ const { isLetter: isFrenchLetter, isIdentifierChar: isFrenchIdentifierChar } =
34
+ createLatinCharClassifiers(/[a-zA-ZàâäéèêëîïôùûüçœæÀÂÄÉÈÊËÎÏÔÙÛÜÇŒÆ]/);
35
+
36
+ // =============================================================================
37
+ // French Prepositions
38
+ // =============================================================================
39
+
40
+ const PREPOSITIONS = new Set([
41
+ 'à', // to, at
42
+ 'a', // to, at (no accent)
43
+ 'de', // of, from
44
+ 'du', // de + le
45
+ 'des', // de + les
46
+ 'dans', // in
47
+ 'sur', // on
48
+ 'sous', // under
49
+ 'avec', // with
50
+ 'sans', // without
51
+ 'par', // by
52
+ 'pour', // for
53
+ 'entre', // between
54
+ 'avant', // before
55
+ 'après', // after
56
+ 'apres', // after (no accent)
57
+ 'depuis', // since, from
58
+ 'vers', // towards
59
+ 'chez', // at (someone's place)
60
+ 'contre', // against
61
+ 'au', // à + le
62
+ 'aux', // à + les
63
+ ]);
64
+
65
+ // =============================================================================
66
+ // French Extras (keywords not in profile)
67
+ // =============================================================================
68
+
69
+ /**
70
+ * Extra keywords not covered by the profile:
71
+ * - Literals (true, false, null, undefined)
72
+ * - Positional words
73
+ * - Event names
74
+ * - Time units
75
+ * - Accent-free variants for accessibility
76
+ */
77
+ const FRENCH_EXTRAS: KeywordEntry[] = [
78
+ // Values/Literals
79
+ { native: 'vrai', normalized: 'true' },
80
+ { native: 'faux', normalized: 'false' },
81
+ { native: 'nul', normalized: 'null' },
82
+ { native: 'indéfini', normalized: 'undefined' },
83
+ { native: 'indefini', normalized: 'undefined' },
84
+
85
+ // Positional
86
+ { native: 'premier', normalized: 'first' },
87
+ { native: 'première', normalized: 'first' },
88
+ { native: 'premiere', normalized: 'first' },
89
+ { native: 'dernier', normalized: 'last' },
90
+ { native: 'dernière', normalized: 'last' },
91
+ { native: 'derniere', normalized: 'last' },
92
+ { native: 'suivant', normalized: 'next' },
93
+ { native: 'précédent', normalized: 'previous' },
94
+ { native: 'precedent', normalized: 'previous' },
95
+ { native: 'plus proche', normalized: 'closest' },
96
+ { native: 'parent', normalized: 'parent' },
97
+
98
+ // Events
99
+ { native: 'clic', normalized: 'click' },
100
+ { native: 'click', normalized: 'click' },
101
+ { native: 'entrée', normalized: 'input' },
102
+ { native: 'entree', normalized: 'input' },
103
+ { native: 'changement', normalized: 'change' },
104
+ { native: 'soumission', normalized: 'submit' },
105
+ { native: 'touche bas', normalized: 'keydown' },
106
+ { native: 'touche haut', normalized: 'keyup' },
107
+ { native: 'souris dessus', normalized: 'mouseover' },
108
+ { native: 'souris dehors', normalized: 'mouseout' },
109
+ { native: 'focus', normalized: 'focus' },
110
+ { native: 'flou', normalized: 'blur' },
111
+ { native: 'chargement', normalized: 'load' },
112
+ { native: 'défilement', normalized: 'scroll' },
113
+ { native: 'defilement', normalized: 'scroll' },
114
+
115
+ // Additional references
116
+ { native: 'je', normalized: 'me' },
117
+ { native: 'mon', normalized: 'my' },
118
+ { native: 'ma', normalized: 'my' },
119
+ { native: 'mes', normalized: 'my' },
120
+ { native: 'ça', normalized: 'it' },
121
+ { native: 'ca', normalized: 'it' },
122
+ { native: 'resultat', normalized: 'result' },
123
+ { native: 'evenement', normalized: 'event' },
124
+
125
+ // Time units
126
+ { native: 'seconde', normalized: 's' },
127
+ { native: 'secondes', normalized: 's' },
128
+ { native: 'milliseconde', normalized: 'ms' },
129
+ { native: 'millisecondes', normalized: 'ms' },
130
+ { native: 'minute', normalized: 'm' },
131
+ { native: 'minutes', normalized: 'm' },
132
+ { native: 'heure', normalized: 'h' },
133
+ { native: 'heures', normalized: 'h' },
134
+
135
+ // Accent-free variants (for user convenience)
136
+ { native: 'prefixer', normalized: 'prepend' },
137
+ { native: 'creer', normalized: 'make' },
138
+ { native: 'definir', normalized: 'set' },
139
+ { native: 'etablir', normalized: 'set' },
140
+ { native: 'incrementer', normalized: 'increment' },
141
+ { native: 'decrementer', normalized: 'decrement' },
142
+ { native: 'declencher', normalized: 'trigger' },
143
+ { native: 'defocaliser', normalized: 'blur' },
144
+ { native: 'recuperer', normalized: 'fetch' },
145
+ { native: 'repeter', normalized: 'repeat' },
146
+ { native: 'arreter', normalized: 'halt' },
147
+ { native: 'defaut', normalized: 'default' },
148
+ { native: 'jusqua', normalized: 'until' },
149
+ { native: 'apres', normalized: 'after' },
150
+
151
+ // Additional log synonyms
152
+ { native: 'journaliser', normalized: 'log' },
153
+
154
+ // Additional morph synonym
155
+ { native: 'transmuter', normalized: 'morph' },
156
+
157
+ // Multi-word phrases
158
+ { native: 'tant que', normalized: 'while' },
159
+ ];
160
+
161
+ // =============================================================================
162
+ // French Time Units
163
+ // =============================================================================
164
+
165
+ /**
166
+ * French time unit patterns for number parsing.
167
+ * Sorted by length (longest first) to ensure correct matching.
168
+ */
169
+ const FRENCH_TIME_UNITS: readonly TimeUnitMapping[] = [
170
+ { pattern: 'millisecondes', suffix: 'ms', length: 13, caseInsensitive: true },
171
+ { pattern: 'milliseconde', suffix: 'ms', length: 12, caseInsensitive: true },
172
+ { pattern: 'secondes', suffix: 's', length: 8, caseInsensitive: true },
173
+ { pattern: 'seconde', suffix: 's', length: 7, caseInsensitive: true },
174
+ { pattern: 'minutes', suffix: 'm', length: 7, caseInsensitive: true },
175
+ { pattern: 'minute', suffix: 'm', length: 6, caseInsensitive: true },
176
+ { pattern: 'heures', suffix: 'h', length: 6, caseInsensitive: true },
177
+ { pattern: 'heure', suffix: 'h', length: 5, caseInsensitive: true },
178
+ ];
179
+
180
+ // =============================================================================
181
+ // French Tokenizer Implementation
182
+ // =============================================================================
183
+
184
+ export class FrenchTokenizer extends BaseTokenizer {
185
+ readonly language = 'fr';
186
+ readonly direction = 'ltr' as const;
187
+
188
+ constructor() {
189
+ super();
190
+ this.initializeKeywordsFromProfile(frenchProfile, FRENCH_EXTRAS);
191
+ }
192
+
193
+ tokenize(input: string): TokenStream {
194
+ const tokens: LanguageToken[] = [];
195
+ let pos = 0;
196
+
197
+ while (pos < input.length) {
198
+ if (isWhitespace(input[pos])) {
199
+ pos++;
200
+ continue;
201
+ }
202
+
203
+ if (isSelectorStart(input[pos])) {
204
+ // Check for event modifier first (.once, .debounce(), etc.)
205
+ const modifierToken = this.tryEventModifier(input, pos);
206
+ if (modifierToken) {
207
+ tokens.push(modifierToken);
208
+ pos = modifierToken.position.end;
209
+ continue;
210
+ }
211
+
212
+ const selectorToken = this.trySelector(input, pos);
213
+ if (selectorToken) {
214
+ tokens.push(selectorToken);
215
+ pos = selectorToken.position.end;
216
+ continue;
217
+ }
218
+ }
219
+
220
+ if (isQuote(input[pos])) {
221
+ const stringToken = this.tryString(input, pos);
222
+ if (stringToken) {
223
+ tokens.push(stringToken);
224
+ pos = stringToken.position.end;
225
+ continue;
226
+ }
227
+ }
228
+
229
+ if (isUrlStart(input, pos)) {
230
+ const urlToken = this.tryUrl(input, pos);
231
+ if (urlToken) {
232
+ tokens.push(urlToken);
233
+ pos = urlToken.position.end;
234
+ continue;
235
+ }
236
+ }
237
+
238
+ if (
239
+ isDigit(input[pos]) ||
240
+ (input[pos] === '-' && pos + 1 < input.length && isDigit(input[pos + 1]))
241
+ ) {
242
+ const numberToken = this.extractNumber(input, pos);
243
+ if (numberToken) {
244
+ tokens.push(numberToken);
245
+ pos = numberToken.position.end;
246
+ continue;
247
+ }
248
+ }
249
+
250
+ const varToken = this.tryVariableRef(input, pos);
251
+ if (varToken) {
252
+ tokens.push(varToken);
253
+ pos = varToken.position.end;
254
+ continue;
255
+ }
256
+
257
+ if (isFrenchLetter(input[pos])) {
258
+ const wordToken = this.extractWord(input, pos);
259
+ if (wordToken) {
260
+ tokens.push(wordToken);
261
+ pos = wordToken.position.end;
262
+ continue;
263
+ }
264
+ }
265
+
266
+ const operatorToken = this.tryOperator(input, pos);
267
+ if (operatorToken) {
268
+ tokens.push(operatorToken);
269
+ pos = operatorToken.position.end;
270
+ continue;
271
+ }
272
+
273
+ pos++;
274
+ }
275
+
276
+ return new TokenStreamImpl(tokens, 'fr');
277
+ }
278
+
279
+ classifyToken(token: string): TokenKind {
280
+ const lower = token.toLowerCase();
281
+ if (PREPOSITIONS.has(lower)) return 'particle';
282
+ // O(1) Map lookup instead of O(n) array search
283
+ if (this.isKeyword(lower)) return 'keyword';
284
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
285
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
286
+ if (/^\d/.test(token)) return 'literal';
287
+ return 'identifier';
288
+ }
289
+
290
+ private extractWord(input: string, startPos: number): LanguageToken | null {
291
+ let pos = startPos;
292
+ let word = '';
293
+
294
+ while (pos < input.length && isFrenchIdentifierChar(input[pos])) {
295
+ word += input[pos++];
296
+ }
297
+
298
+ if (!word) return null;
299
+
300
+ const lower = word.toLowerCase();
301
+
302
+ // O(1) Map lookup instead of O(n) array search
303
+ const keywordEntry = this.lookupKeyword(lower);
304
+ if (keywordEntry) {
305
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
306
+ }
307
+
308
+ if (PREPOSITIONS.has(lower)) {
309
+ return createToken(word, 'particle', createPosition(startPos, pos));
310
+ }
311
+
312
+ return createToken(word, 'identifier', createPosition(startPos, pos));
313
+ }
314
+
315
+ /**
316
+ * Extract a number, including French time unit suffixes.
317
+ */
318
+ private extractNumber(input: string, startPos: number): LanguageToken | null {
319
+ return this.tryNumberWithTimeUnits(input, startPos, FRENCH_TIME_UNITS, {
320
+ allowSign: true,
321
+ skipWhitespace: true,
322
+ });
323
+ }
324
+ }
325
+
326
+ export const frenchTokenizer = new FrenchTokenizer();
@@ -0,0 +1,324 @@
1
+ /**
2
+ * German Tokenizer
3
+ *
4
+ * Tokenizes German hyperscript input.
5
+ * German characteristics:
6
+ * - SVO word order (V2 in main clauses, but SVO for our purposes)
7
+ * - Space-separated words
8
+ * - Prepositions
9
+ * - Umlauts (ä, ö, ü) and ß
10
+ * - Compound nouns
11
+ */
12
+
13
+ import type { LanguageToken, TokenKind, TokenStream } from '../types';
14
+ import {
15
+ BaseTokenizer,
16
+ TokenStreamImpl,
17
+ createToken,
18
+ createPosition,
19
+ createLatinCharClassifiers,
20
+ isWhitespace,
21
+ isSelectorStart,
22
+ isQuote,
23
+ isDigit,
24
+ isUrlStart,
25
+ type KeywordEntry,
26
+ type TimeUnitMapping,
27
+ } from './base';
28
+ import { germanProfile } from '../generators/profiles/german';
29
+
30
+ // =============================================================================
31
+ // German Character Classification
32
+ // =============================================================================
33
+
34
+ const { isLetter: isGermanLetter, isIdentifierChar: isGermanIdentifierChar } =
35
+ createLatinCharClassifiers(/[a-zA-ZäöüÄÖÜß]/);
36
+
37
+ // =============================================================================
38
+ // German Prepositions
39
+ // =============================================================================
40
+
41
+ const PREPOSITIONS = new Set([
42
+ 'an', // at, on
43
+ 'auf', // on
44
+ 'aus', // from, out of
45
+ 'bei', // at, near
46
+ 'durch', // through
47
+ 'für', // for
48
+ 'fur', // for (no umlaut)
49
+ 'gegen', // against
50
+ 'in', // in
51
+ 'mit', // with
52
+ 'nach', // after, to
53
+ 'ohne', // without
54
+ 'seit', // since
55
+ 'über', // over, about
56
+ 'uber', // over (no umlaut)
57
+ 'um', // around, at
58
+ 'unter', // under
59
+ 'von', // from, of
60
+ 'vor', // before, in front of
61
+ 'zu', // to
62
+ 'zwischen', // between
63
+ 'bis', // until
64
+ 'gegenüber', // opposite
65
+ 'gegenuber', // opposite (no umlaut)
66
+ 'während', // during
67
+ 'wahrend', // during (no umlaut)
68
+ 'wegen', // because of
69
+ 'trotz', // despite
70
+ 'statt', // instead of
71
+ 'innerhalb', // inside
72
+ 'außerhalb', // outside
73
+ 'ausserhalb', // outside (no umlaut)
74
+ ]);
75
+
76
+ // =============================================================================
77
+ // German Extras (keywords not in profile)
78
+ // =============================================================================
79
+
80
+ /**
81
+ * Extra keywords not covered by the profile:
82
+ * - Literals (true, false, null, undefined)
83
+ * - Positional words
84
+ * - Event names
85
+ * - Time units
86
+ * - Umlaut-free variants for accessibility
87
+ * - Verb conjugation variants (imperatives)
88
+ */
89
+ const GERMAN_EXTRAS: KeywordEntry[] = [
90
+ // Values/Literals
91
+ { native: 'wahr', normalized: 'true' },
92
+ { native: 'falsch', normalized: 'false' },
93
+ { native: 'null', normalized: 'null' },
94
+ { native: 'undefiniert', normalized: 'undefined' },
95
+
96
+ // Positional
97
+ { native: 'erste', normalized: 'first' },
98
+ { native: 'erster', normalized: 'first' },
99
+ { native: 'erstes', normalized: 'first' },
100
+ { native: 'letzte', normalized: 'last' },
101
+ { native: 'letzter', normalized: 'last' },
102
+ { native: 'letztes', normalized: 'last' },
103
+ { native: 'nächste', normalized: 'next' },
104
+ { native: 'nachste', normalized: 'next' },
105
+ { native: 'vorherige', normalized: 'previous' },
106
+ { native: 'nächste', normalized: 'closest' },
107
+ { native: 'eltern', normalized: 'parent' },
108
+
109
+ // Events
110
+ { native: 'klick', normalized: 'click' },
111
+ { native: 'click', normalized: 'click' },
112
+ { native: 'eingabe', normalized: 'input' },
113
+ { native: 'änderung', normalized: 'change' },
114
+ { native: 'anderung', normalized: 'change' },
115
+ { native: 'absenden', normalized: 'submit' },
116
+ { native: 'taste unten', normalized: 'keydown' },
117
+ { native: 'taste oben', normalized: 'keyup' },
118
+ { native: 'maus drüber', normalized: 'mouseover' },
119
+ { native: 'maus druber', normalized: 'mouseover' },
120
+ { native: 'maus weg', normalized: 'mouseout' },
121
+ { native: 'fokus', normalized: 'focus' },
122
+ { native: 'unschärfe', normalized: 'blur' },
123
+ { native: 'unscharfe', normalized: 'blur' },
124
+ { native: 'scrollen', normalized: 'scroll' },
125
+
126
+ // Additional references
127
+ { native: 'meine', normalized: 'my' },
128
+ { native: 'meinen', normalized: 'my' },
129
+ { native: 'ergebnis', normalized: 'result' },
130
+ { native: 'ziel', normalized: 'target' },
131
+
132
+ // Time units
133
+ { native: 'sekunde', normalized: 's' },
134
+ { native: 'sekunden', normalized: 's' },
135
+ { native: 'millisekunde', normalized: 'ms' },
136
+ { native: 'millisekunden', normalized: 'ms' },
137
+ { native: 'minute', normalized: 'm' },
138
+ { native: 'minuten', normalized: 'm' },
139
+ { native: 'stunde', normalized: 'h' },
140
+ { native: 'stunden', normalized: 'h' },
141
+
142
+ // Umlaut-free variants (for user convenience)
143
+ { native: 'hinzufugen', normalized: 'add' },
144
+ { native: 'hinzufgen', normalized: 'add' },
145
+ { native: 'loschen', normalized: 'remove' },
146
+ { native: 'anhangen', normalized: 'append' },
147
+ { native: 'erhohen', normalized: 'increment' },
148
+ { native: 'ubergang', normalized: 'transition' },
149
+ { native: 'auslosen', normalized: 'trigger' },
150
+ { native: 'zuruckgeben', normalized: 'return' },
151
+ { native: 'anschliessend', normalized: 'then' },
152
+
153
+ // Verb conjugation variants (imperatives for test cases)
154
+ { native: 'erhöhe', normalized: 'increment' },
155
+ { native: 'erhohe', normalized: 'increment' },
156
+ { native: 'verringere', normalized: 'decrement' },
157
+ ];
158
+
159
+ // =============================================================================
160
+ // German Time Units
161
+ // =============================================================================
162
+
163
+ /**
164
+ * German time unit patterns for number parsing.
165
+ * Sorted by length (longest first) to ensure correct matching.
166
+ */
167
+ const GERMAN_TIME_UNITS: readonly TimeUnitMapping[] = [
168
+ { pattern: 'millisekunden', suffix: 'ms', length: 13, caseInsensitive: true },
169
+ { pattern: 'millisekunde', suffix: 'ms', length: 12, caseInsensitive: true },
170
+ { pattern: 'sekunden', suffix: 's', length: 8, caseInsensitive: true },
171
+ { pattern: 'sekunde', suffix: 's', length: 7, caseInsensitive: true },
172
+ { pattern: 'minuten', suffix: 'm', length: 7, caseInsensitive: true },
173
+ { pattern: 'minute', suffix: 'm', length: 6, caseInsensitive: true },
174
+ { pattern: 'stunden', suffix: 'h', length: 7, caseInsensitive: true },
175
+ { pattern: 'stunde', suffix: 'h', length: 6, caseInsensitive: true },
176
+ ];
177
+
178
+ // =============================================================================
179
+ // German Tokenizer Implementation
180
+ // =============================================================================
181
+
182
+ export class GermanTokenizer extends BaseTokenizer {
183
+ readonly language = 'de';
184
+ readonly direction = 'ltr' as const;
185
+
186
+ constructor() {
187
+ super();
188
+ this.initializeKeywordsFromProfile(germanProfile, GERMAN_EXTRAS);
189
+ }
190
+
191
+ tokenize(input: string): TokenStream {
192
+ const tokens: LanguageToken[] = [];
193
+ let pos = 0;
194
+
195
+ while (pos < input.length) {
196
+ if (isWhitespace(input[pos])) {
197
+ pos++;
198
+ continue;
199
+ }
200
+
201
+ if (isSelectorStart(input[pos])) {
202
+ // Check for event modifier first (.once, .debounce(), etc.)
203
+ const modifierToken = this.tryEventModifier(input, pos);
204
+ if (modifierToken) {
205
+ tokens.push(modifierToken);
206
+ pos = modifierToken.position.end;
207
+ continue;
208
+ }
209
+
210
+ const selectorToken = this.trySelector(input, pos);
211
+ if (selectorToken) {
212
+ tokens.push(selectorToken);
213
+ pos = selectorToken.position.end;
214
+ continue;
215
+ }
216
+ }
217
+
218
+ if (isQuote(input[pos])) {
219
+ const stringToken = this.tryString(input, pos);
220
+ if (stringToken) {
221
+ tokens.push(stringToken);
222
+ pos = stringToken.position.end;
223
+ continue;
224
+ }
225
+ }
226
+
227
+ if (isUrlStart(input, pos)) {
228
+ const urlToken = this.tryUrl(input, pos);
229
+ if (urlToken) {
230
+ tokens.push(urlToken);
231
+ pos = urlToken.position.end;
232
+ continue;
233
+ }
234
+ }
235
+
236
+ if (
237
+ isDigit(input[pos]) ||
238
+ (input[pos] === '-' && pos + 1 < input.length && isDigit(input[pos + 1]))
239
+ ) {
240
+ const numberToken = this.extractNumber(input, pos);
241
+ if (numberToken) {
242
+ tokens.push(numberToken);
243
+ pos = numberToken.position.end;
244
+ continue;
245
+ }
246
+ }
247
+
248
+ const varToken = this.tryVariableRef(input, pos);
249
+ if (varToken) {
250
+ tokens.push(varToken);
251
+ pos = varToken.position.end;
252
+ continue;
253
+ }
254
+
255
+ if (isGermanLetter(input[pos])) {
256
+ const wordToken = this.extractWord(input, pos);
257
+ if (wordToken) {
258
+ tokens.push(wordToken);
259
+ pos = wordToken.position.end;
260
+ continue;
261
+ }
262
+ }
263
+
264
+ const operatorToken = this.tryOperator(input, pos);
265
+ if (operatorToken) {
266
+ tokens.push(operatorToken);
267
+ pos = operatorToken.position.end;
268
+ continue;
269
+ }
270
+
271
+ pos++;
272
+ }
273
+
274
+ return new TokenStreamImpl(tokens, 'de');
275
+ }
276
+
277
+ classifyToken(token: string): TokenKind {
278
+ const lower = token.toLowerCase();
279
+ if (PREPOSITIONS.has(lower)) return 'particle';
280
+ // O(1) Map lookup instead of O(n) array search
281
+ if (this.isKeyword(lower)) return 'keyword';
282
+ if (token.startsWith('#') || token.startsWith('.') || token.startsWith('[')) return 'selector';
283
+ if (token.startsWith('"') || token.startsWith("'")) return 'literal';
284
+ if (/^\d/.test(token)) return 'literal';
285
+ return 'identifier';
286
+ }
287
+
288
+ private extractWord(input: string, startPos: number): LanguageToken | null {
289
+ let pos = startPos;
290
+ let word = '';
291
+
292
+ while (pos < input.length && isGermanIdentifierChar(input[pos])) {
293
+ word += input[pos++];
294
+ }
295
+
296
+ if (!word) return null;
297
+
298
+ const lower = word.toLowerCase();
299
+
300
+ // O(1) Map lookup instead of O(n) array search
301
+ const keywordEntry = this.lookupKeyword(lower);
302
+ if (keywordEntry) {
303
+ return createToken(word, 'keyword', createPosition(startPos, pos), keywordEntry.normalized);
304
+ }
305
+
306
+ if (PREPOSITIONS.has(lower)) {
307
+ return createToken(word, 'particle', createPosition(startPos, pos));
308
+ }
309
+
310
+ return createToken(word, 'identifier', createPosition(startPos, pos));
311
+ }
312
+
313
+ /**
314
+ * Extract a number, including German time unit suffixes.
315
+ */
316
+ private extractNumber(input: string, startPos: number): LanguageToken | null {
317
+ return this.tryNumberWithTimeUnits(input, startPos, GERMAN_TIME_UNITS, {
318
+ allowSign: true,
319
+ skipWhitespace: true,
320
+ });
321
+ }
322
+ }
323
+
324
+ export const germanTokenizer = new GermanTokenizer();