@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,1181 @@
1
+ /**
2
+ * Pattern Matcher
3
+ *
4
+ * Matches tokenized input against language patterns to extract semantic roles.
5
+ * This is the core algorithm for multilingual parsing.
6
+ */
7
+
8
+ import type {
9
+ LanguagePattern,
10
+ PatternToken,
11
+ PatternMatchResult,
12
+ SemanticRole,
13
+ SemanticValue,
14
+ TokenStream,
15
+ LanguageToken,
16
+ } from '../types';
17
+ import { createSelector, createLiteral, createReference, createPropertyPath } from '../types';
18
+ import { isTypeCompatible } from './utils/type-validation';
19
+ import { getPossessiveReference } from './utils/possessive-keywords';
20
+ import type { LanguageProfile } from '../generators/profiles/types';
21
+ import { tryGetProfile } from '../registry';
22
+
23
+ // =============================================================================
24
+ // Pattern Matcher
25
+ // =============================================================================
26
+
27
+ export class PatternMatcher {
28
+ /** Current language profile for the pattern being matched */
29
+ private currentProfile: LanguageProfile | undefined;
30
+
31
+ /**
32
+ * Try to match a single pattern against the token stream.
33
+ * Returns the match result or null if no match.
34
+ */
35
+ matchPattern(tokens: TokenStream, pattern: LanguagePattern): PatternMatchResult | null {
36
+ const mark = tokens.mark();
37
+ const captured = new Map<SemanticRole, SemanticValue>();
38
+
39
+ // Get language profile for possessive keyword lookup
40
+ this.currentProfile = tryGetProfile(pattern.language);
41
+
42
+ // Reset match counters for this pattern
43
+ this.stemMatchCount = 0;
44
+ this.totalKeywordMatches = 0;
45
+
46
+ const success = this.matchTokenSequence(tokens, pattern.template.tokens, captured);
47
+
48
+ if (!success) {
49
+ tokens.reset(mark);
50
+ return null;
51
+ }
52
+
53
+ // Calculate confidence BEFORE applying defaults
54
+ // This ensures defaulted roles don't artificially inflate confidence
55
+ const confidence = this.calculateConfidence(pattern, captured);
56
+
57
+ // Apply extraction rules to fill in any missing roles with defaults
58
+ this.applyExtractionRules(pattern, captured);
59
+
60
+ return {
61
+ pattern,
62
+ captured,
63
+ consumedTokens: tokens.position() - mark.position,
64
+ confidence,
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Try to match multiple patterns, return the best match.
70
+ */
71
+ matchBest(tokens: TokenStream, patterns: LanguagePattern[]): PatternMatchResult | null {
72
+ const matches: PatternMatchResult[] = [];
73
+
74
+ for (const pattern of patterns) {
75
+ const mark = tokens.mark();
76
+ const result = this.matchPattern(tokens, pattern);
77
+
78
+ if (result) {
79
+ matches.push(result);
80
+ }
81
+
82
+ tokens.reset(mark);
83
+ }
84
+
85
+ if (matches.length === 0) {
86
+ return null;
87
+ }
88
+
89
+ // Sort by confidence and priority
90
+ matches.sort((a, b) => {
91
+ // First by priority
92
+ const priorityDiff = b.pattern.priority - a.pattern.priority;
93
+ if (priorityDiff !== 0) return priorityDiff;
94
+
95
+ // Then by confidence
96
+ return b.confidence - a.confidence;
97
+ });
98
+
99
+ // Re-consume tokens for the best match
100
+ const best = matches[0];
101
+ this.matchPattern(tokens, best.pattern);
102
+
103
+ return best;
104
+ }
105
+
106
+ /**
107
+ * Match a sequence of pattern tokens against the token stream.
108
+ */
109
+ private matchTokenSequence(
110
+ tokens: TokenStream,
111
+ patternTokens: PatternToken[],
112
+ captured: Map<SemanticRole, SemanticValue>
113
+ ): boolean {
114
+ // Skip leading conjunctions for Arabic (proclitics: و, ف, ول, وب, etc.)
115
+ if (this.currentProfile?.code === 'ar') {
116
+ while (tokens.peek()?.kind === 'conjunction') {
117
+ tokens.advance();
118
+ }
119
+ }
120
+
121
+ for (const patternToken of patternTokens) {
122
+ const matched = this.matchPatternToken(tokens, patternToken, captured);
123
+
124
+ if (!matched) {
125
+ // If token is optional, continue
126
+ if (this.isOptional(patternToken)) {
127
+ continue;
128
+ }
129
+ return false;
130
+ }
131
+ }
132
+
133
+ return true;
134
+ }
135
+
136
+ /**
137
+ * Match a single pattern token against the current position in the stream.
138
+ */
139
+ private matchPatternToken(
140
+ tokens: TokenStream,
141
+ patternToken: PatternToken,
142
+ captured: Map<SemanticRole, SemanticValue>
143
+ ): boolean {
144
+ switch (patternToken.type) {
145
+ case 'literal':
146
+ return this.matchLiteralToken(tokens, patternToken);
147
+
148
+ case 'role':
149
+ return this.matchRoleToken(tokens, patternToken, captured);
150
+
151
+ case 'group':
152
+ return this.matchGroupToken(tokens, patternToken, captured);
153
+
154
+ default:
155
+ return false;
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Match a literal pattern token (keyword or particle).
161
+ */
162
+ private matchLiteralToken(
163
+ tokens: TokenStream,
164
+ patternToken: PatternToken & { type: 'literal' }
165
+ ): boolean {
166
+ const token = tokens.peek();
167
+ if (!token) return false;
168
+
169
+ // Check main value
170
+ const matchType = this.getMatchType(token, patternToken.value);
171
+ if (matchType !== 'none') {
172
+ this.totalKeywordMatches++;
173
+ if (matchType === 'stem') {
174
+ this.stemMatchCount++;
175
+ }
176
+ tokens.advance();
177
+ return true;
178
+ }
179
+
180
+ // Check alternatives
181
+ if (patternToken.alternatives) {
182
+ for (const alt of patternToken.alternatives) {
183
+ const altMatchType = this.getMatchType(token, alt);
184
+ if (altMatchType !== 'none') {
185
+ this.totalKeywordMatches++;
186
+ if (altMatchType === 'stem') {
187
+ this.stemMatchCount++;
188
+ }
189
+ tokens.advance();
190
+ return true;
191
+ }
192
+ }
193
+ }
194
+
195
+ return false;
196
+ }
197
+
198
+ /**
199
+ * Match a role pattern token (captures a semantic value).
200
+ * Handles multi-token expressions like:
201
+ * - 'my value' (possessive keyword + property)
202
+ * - '#dialog.showModal()' (method call)
203
+ * - "#element's *opacity" (possessive selector + property)
204
+ */
205
+ private matchRoleToken(
206
+ tokens: TokenStream,
207
+ patternToken: PatternToken & { type: 'role' },
208
+ captured: Map<SemanticRole, SemanticValue>
209
+ ): boolean {
210
+ // Skip noise words like "the" before selectors (English idiom support)
211
+ this.skipNoiseWords(tokens);
212
+
213
+ const token = tokens.peek();
214
+ if (!token) {
215
+ return patternToken.optional || false;
216
+ }
217
+
218
+ // Check for possessive expression (e.g., 'my value', 'its innerHTML')
219
+ const possessiveValue = this.tryMatchPossessiveExpression(tokens);
220
+ if (possessiveValue) {
221
+ // Validate expected types if specified
222
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
223
+ if (
224
+ !patternToken.expectedTypes.includes(possessiveValue.type) &&
225
+ !patternToken.expectedTypes.includes('expression')
226
+ ) {
227
+ return patternToken.optional || false;
228
+ }
229
+ }
230
+ captured.set(patternToken.role, possessiveValue);
231
+ return true;
232
+ }
233
+
234
+ // Check for method call expression (e.g., '#dialog.showModal()')
235
+ const methodCallValue = this.tryMatchMethodCallExpression(tokens);
236
+ if (methodCallValue) {
237
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
238
+ if (
239
+ !patternToken.expectedTypes.includes(methodCallValue.type) &&
240
+ !patternToken.expectedTypes.includes('expression')
241
+ ) {
242
+ return patternToken.optional || false;
243
+ }
244
+ }
245
+ captured.set(patternToken.role, methodCallValue);
246
+ return true;
247
+ }
248
+
249
+ // Check for possessive selector expression (e.g., "#element's *opacity")
250
+ const possessiveSelectorValue = this.tryMatchPossessiveSelectorExpression(tokens);
251
+ if (possessiveSelectorValue) {
252
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
253
+ // property-path is compatible with selector, reference, and expression
254
+ if (!isTypeCompatible(possessiveSelectorValue.type, patternToken.expectedTypes)) {
255
+ return patternToken.optional || false;
256
+ }
257
+ }
258
+ captured.set(patternToken.role, possessiveSelectorValue);
259
+ return true;
260
+ }
261
+
262
+ // Check for property access expression (e.g., 'userData.name', 'it.data')
263
+ const propertyAccessValue = this.tryMatchPropertyAccessExpression(tokens);
264
+ if (propertyAccessValue) {
265
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
266
+ if (
267
+ !patternToken.expectedTypes.includes(propertyAccessValue.type) &&
268
+ !patternToken.expectedTypes.includes('expression')
269
+ ) {
270
+ return patternToken.optional || false;
271
+ }
272
+ }
273
+ captured.set(patternToken.role, propertyAccessValue);
274
+ return true;
275
+ }
276
+
277
+ // Check for selector + property expression (e.g., '#output.innerText')
278
+ // This handles cases where the tokenizer produces two selector tokens
279
+ const selectorPropertyValue = this.tryMatchSelectorPropertyExpression(tokens);
280
+ if (selectorPropertyValue) {
281
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
282
+ if (!isTypeCompatible(selectorPropertyValue.type, patternToken.expectedTypes)) {
283
+ return patternToken.optional || false;
284
+ }
285
+ }
286
+ captured.set(patternToken.role, selectorPropertyValue);
287
+ return true;
288
+ }
289
+
290
+ // Try to extract a semantic value from the token
291
+ const value = this.tokenToSemanticValue(token);
292
+ if (!value) {
293
+ return patternToken.optional || false;
294
+ }
295
+
296
+ // Validate expected types if specified
297
+ if (patternToken.expectedTypes && patternToken.expectedTypes.length > 0) {
298
+ if (!patternToken.expectedTypes.includes(value.type)) {
299
+ return patternToken.optional || false;
300
+ }
301
+ }
302
+
303
+ captured.set(patternToken.role, value);
304
+ tokens.advance();
305
+ return true;
306
+ }
307
+
308
+ /**
309
+ * Try to match a possessive expression like 'my value' or 'its innerHTML'.
310
+ * Returns the PropertyPathValue if matched, or null if not.
311
+ */
312
+ private tryMatchPossessiveExpression(tokens: TokenStream): SemanticValue | null {
313
+ const token = tokens.peek();
314
+ if (!token) return null;
315
+
316
+ // Use profile-based possessive keyword lookup
317
+ if (!this.currentProfile) return null;
318
+
319
+ const tokenLower = (token.normalized || token.value).toLowerCase();
320
+ const baseRef = getPossessiveReference(this.currentProfile, tokenLower);
321
+
322
+ if (!baseRef) return null;
323
+
324
+ // We have a possessive keyword, look ahead for property name
325
+ const mark = tokens.mark();
326
+ tokens.advance();
327
+
328
+ const propertyToken = tokens.peek();
329
+ if (!propertyToken) {
330
+ // Just the possessive keyword, no property - revert
331
+ tokens.reset(mark);
332
+ return null;
333
+ }
334
+
335
+ // Property should be an identifier, keyword (not structural), or selector (for style props)
336
+ // Examples: "my value", "my innerHTML", "my *background", "my *opacity"
337
+ if (
338
+ propertyToken.kind === 'identifier' ||
339
+ (propertyToken.kind === 'keyword' && !this.isStructuralKeyword(propertyToken.value)) ||
340
+ (propertyToken.kind === 'selector' && propertyToken.value.startsWith('*'))
341
+ ) {
342
+ tokens.advance();
343
+
344
+ // Create property-path: my value -> { object: me, property: 'value' }
345
+ return createPropertyPath(createReference(baseRef as any), propertyToken.value);
346
+ }
347
+
348
+ // Not a valid property, revert
349
+ tokens.reset(mark);
350
+ return null;
351
+ }
352
+
353
+ /**
354
+ * Check if a keyword is a structural keyword (preposition, control flow, etc.)
355
+ * that shouldn't be consumed as a property name.
356
+ */
357
+ private isStructuralKeyword(value: string): boolean {
358
+ const structural = new Set([
359
+ // Prepositions
360
+ 'into',
361
+ 'in',
362
+ 'to',
363
+ 'from',
364
+ 'at',
365
+ 'by',
366
+ 'with',
367
+ 'without',
368
+ 'before',
369
+ 'after',
370
+ 'of',
371
+ 'as',
372
+ 'on',
373
+ // Control flow
374
+ 'then',
375
+ 'end',
376
+ 'else',
377
+ 'if',
378
+ 'repeat',
379
+ 'while',
380
+ 'for',
381
+ // Commands (shouldn't be property names)
382
+ 'toggle',
383
+ 'add',
384
+ 'remove',
385
+ 'put',
386
+ 'set',
387
+ 'show',
388
+ 'hide',
389
+ 'increment',
390
+ 'decrement',
391
+ 'send',
392
+ 'trigger',
393
+ 'call',
394
+ ]);
395
+ return structural.has(value.toLowerCase());
396
+ }
397
+
398
+ /**
399
+ * Try to match a method call expression like '#dialog.showModal()'.
400
+ * Pattern: selector + '.' + identifier + '(' + [args] + ')'
401
+ * Returns an expression value if matched, or null if not.
402
+ */
403
+ private tryMatchMethodCallExpression(tokens: TokenStream): SemanticValue | null {
404
+ const token = tokens.peek();
405
+ if (!token || token.kind !== 'selector') return null;
406
+
407
+ // Look ahead for: . identifier (
408
+ const mark = tokens.mark();
409
+ tokens.advance(); // consume selector
410
+
411
+ const dotToken = tokens.peek();
412
+ if (!dotToken || dotToken.kind !== 'operator' || dotToken.value !== '.') {
413
+ tokens.reset(mark);
414
+ return null;
415
+ }
416
+ tokens.advance(); // consume .
417
+
418
+ const methodToken = tokens.peek();
419
+ if (!methodToken || methodToken.kind !== 'identifier') {
420
+ tokens.reset(mark);
421
+ return null;
422
+ }
423
+ tokens.advance(); // consume method name
424
+
425
+ const openParen = tokens.peek();
426
+ if (!openParen || openParen.kind !== 'punctuation' || openParen.value !== '(') {
427
+ tokens.reset(mark);
428
+ return null;
429
+ }
430
+ tokens.advance(); // consume (
431
+
432
+ // Consume arguments until we find ) (with depth limit for security)
433
+ const args: string[] = [];
434
+ while (!tokens.isAtEnd() && args.length < PatternMatcher.MAX_METHOD_ARGS) {
435
+ const argToken = tokens.peek();
436
+ if (!argToken) break;
437
+ if (argToken.kind === 'punctuation' && argToken.value === ')') {
438
+ tokens.advance(); // consume )
439
+ break;
440
+ }
441
+ // Skip commas
442
+ if (argToken.kind === 'punctuation' && argToken.value === ',') {
443
+ tokens.advance();
444
+ continue;
445
+ }
446
+ // Collect arg value
447
+ args.push(argToken.value);
448
+ tokens.advance();
449
+ }
450
+
451
+ // Create expression value: #dialog.showModal()
452
+ const methodCall = `${token.value}.${methodToken.value}(${args.join(', ')})`;
453
+ return {
454
+ type: 'expression',
455
+ raw: methodCall,
456
+ } as SemanticValue;
457
+ }
458
+
459
+ /**
460
+ * Try to match a property access expression like 'userData.name' or 'it.data'.
461
+ * Pattern: (identifier | keyword) + '.' + identifier [+ '.' + identifier ...]
462
+ * Returns an expression value if matched, or null if not.
463
+ */
464
+ private tryMatchPropertyAccessExpression(tokens: TokenStream): SemanticValue | null {
465
+ const token = tokens.peek();
466
+ if (!token) return null;
467
+
468
+ // Must start with an identifier or keyword reference
469
+ if (token.kind !== 'identifier' && token.kind !== 'keyword') return null;
470
+
471
+ // Look ahead for: . identifier
472
+ const mark = tokens.mark();
473
+ tokens.advance(); // consume first token
474
+
475
+ const dotToken = tokens.peek();
476
+ if (!dotToken || dotToken.kind !== 'operator' || dotToken.value !== '.') {
477
+ tokens.reset(mark);
478
+ return null;
479
+ }
480
+ tokens.advance(); // consume .
481
+
482
+ const propertyToken = tokens.peek();
483
+ if (!propertyToken || propertyToken.kind !== 'identifier') {
484
+ tokens.reset(mark);
485
+ return null;
486
+ }
487
+ tokens.advance(); // consume property name
488
+
489
+ // Build the property chain
490
+ let chain = `${token.value}.${propertyToken.value}`;
491
+ let depth = 1; // Already have one property access
492
+
493
+ // Continue for nested property access (e.g., userData.address.city)
494
+ // With depth limit for security
495
+ while (!tokens.isAtEnd() && depth < PatternMatcher.MAX_PROPERTY_DEPTH) {
496
+ const nextDot = tokens.peek();
497
+ if (!nextDot || nextDot.kind !== 'operator' || nextDot.value !== '.') {
498
+ break;
499
+ }
500
+ tokens.advance(); // consume .
501
+
502
+ const nextProp = tokens.peek();
503
+ if (!nextProp || nextProp.kind !== 'identifier') {
504
+ // Dot without property - put the dot back and stop
505
+ // Can't easily put a single token back, so we'll include it
506
+ break;
507
+ }
508
+ tokens.advance(); // consume property
509
+ chain += `.${nextProp.value}`;
510
+ depth++;
511
+ }
512
+
513
+ // Check for method call: chain + '(' + args + ')'
514
+ // e.g., me.insertBefore(draggedItem, dropTarget)
515
+ const openParen = tokens.peek();
516
+ if (openParen && openParen.kind === 'punctuation' && openParen.value === '(') {
517
+ tokens.advance(); // consume (
518
+
519
+ // Collect arguments (comma-separated values)
520
+ const args: string[] = [];
521
+ let argDepth = 0; // Track nested parentheses
522
+ while (!tokens.isAtEnd() && args.length < PatternMatcher.MAX_METHOD_ARGS) {
523
+ const argToken = tokens.peek();
524
+ if (!argToken) break;
525
+
526
+ // Handle close paren - respecting nesting
527
+ if (argToken.kind === 'punctuation' && argToken.value === ')') {
528
+ if (argDepth === 0) {
529
+ tokens.advance(); // consume )
530
+ break;
531
+ }
532
+ argDepth--;
533
+ }
534
+ // Track nested open parens
535
+ if (argToken.kind === 'punctuation' && argToken.value === '(') {
536
+ argDepth++;
537
+ }
538
+ // Skip commas between arguments
539
+ if (argToken.kind === 'punctuation' && argToken.value === ',') {
540
+ tokens.advance();
541
+ continue;
542
+ }
543
+ // Collect arg value
544
+ args.push(argToken.value);
545
+ tokens.advance();
546
+ }
547
+
548
+ // Create expression value with method call: me.insertBefore(a, b)
549
+ const methodCall = `${chain}(${args.join(', ')})`;
550
+ return {
551
+ type: 'expression',
552
+ raw: methodCall,
553
+ } as SemanticValue;
554
+ }
555
+
556
+ // Create expression value: userData.name
557
+ return {
558
+ type: 'expression',
559
+ raw: chain,
560
+ } as SemanticValue;
561
+ }
562
+
563
+ /**
564
+ * Try to match a possessive selector expression like "#element's *opacity".
565
+ * Pattern: selector + "'s" + (selector | identifier)
566
+ * Returns a property-path value if matched, or null if not.
567
+ */
568
+ private tryMatchPossessiveSelectorExpression(tokens: TokenStream): SemanticValue | null {
569
+ const token = tokens.peek();
570
+ if (!token || token.kind !== 'selector') return null;
571
+
572
+ // Look ahead for: 's (possessive marker)
573
+ const mark = tokens.mark();
574
+ tokens.advance(); // consume selector
575
+
576
+ const possessiveToken = tokens.peek();
577
+ if (
578
+ !possessiveToken ||
579
+ possessiveToken.kind !== 'punctuation' ||
580
+ possessiveToken.value !== "'s"
581
+ ) {
582
+ tokens.reset(mark);
583
+ return null;
584
+ }
585
+ tokens.advance(); // consume 's
586
+
587
+ const propertyToken = tokens.peek();
588
+ if (!propertyToken) {
589
+ tokens.reset(mark);
590
+ return null;
591
+ }
592
+
593
+ // Property can be a selector (*opacity) or identifier
594
+ if (propertyToken.kind !== 'selector' && propertyToken.kind !== 'identifier') {
595
+ tokens.reset(mark);
596
+ return null;
597
+ }
598
+ tokens.advance(); // consume property
599
+
600
+ // Create property-path: #element's *opacity
601
+ return createPropertyPath(createSelector(token.value), propertyToken.value);
602
+ }
603
+
604
+ /**
605
+ * Try to match a selector + property expression like "#output.innerText".
606
+ * This handles cases where the tokenizer produces two selector tokens:
607
+ * - #output (id selector)
608
+ * - .innerText (looks like class selector, but is actually property)
609
+ *
610
+ * Pattern: id-selector + class-selector-that-is-actually-property
611
+ * Returns a property-path value if matched, or null if not.
612
+ */
613
+ private tryMatchSelectorPropertyExpression(tokens: TokenStream): SemanticValue | null {
614
+ const token = tokens.peek();
615
+ if (!token || token.kind !== 'selector') return null;
616
+
617
+ // Must be an ID selector (starts with #)
618
+ if (!token.value.startsWith('#')) return null;
619
+
620
+ // Look ahead for: selector that looks like a property (.something)
621
+ const mark = tokens.mark();
622
+ tokens.advance(); // consume first selector
623
+
624
+ const propertyToken = tokens.peek();
625
+ if (!propertyToken || propertyToken.kind !== 'selector') {
626
+ tokens.reset(mark);
627
+ return null;
628
+ }
629
+
630
+ // Second token must look like a class selector (starts with .)
631
+ // but we interpret it as a property access
632
+ if (!propertyToken.value.startsWith('.')) {
633
+ tokens.reset(mark);
634
+ return null;
635
+ }
636
+
637
+ // Verify the next token is not a selector (to avoid consuming too many)
638
+ // This helps distinguish "#output.innerText" from "#box .child"
639
+ const peek2 = tokens.peek(1);
640
+ if (peek2 && peek2.kind === 'selector') {
641
+ // Could be a compound selector chain - only take first two
642
+ }
643
+
644
+ tokens.advance(); // consume property selector
645
+
646
+ // Create property-path: #output.innerText
647
+ // Extract property name without the leading dot
648
+ const propertyName = propertyToken.value.slice(1);
649
+
650
+ return createPropertyPath(createSelector(token.value), propertyName);
651
+ }
652
+
653
+ /**
654
+ * Match a group pattern token (optional sequence).
655
+ */
656
+ private matchGroupToken(
657
+ tokens: TokenStream,
658
+ patternToken: PatternToken & { type: 'group' },
659
+ captured: Map<SemanticRole, SemanticValue>
660
+ ): boolean {
661
+ const mark = tokens.mark();
662
+
663
+ // Track which roles were captured before this group
664
+ const capturedBefore = new Set(captured.keys());
665
+
666
+ const success = this.matchTokenSequence(tokens, patternToken.tokens, captured);
667
+
668
+ if (!success) {
669
+ tokens.reset(mark);
670
+ // Clear any roles that were partially captured during the failed group match
671
+ for (const role of captured.keys()) {
672
+ if (!capturedBefore.has(role)) {
673
+ captured.delete(role);
674
+ }
675
+ }
676
+ return patternToken.optional || false;
677
+ }
678
+
679
+ return true;
680
+ }
681
+
682
+ /**
683
+ * Get the type of match for a token against a value.
684
+ * Used for confidence calculation.
685
+ */
686
+ private getMatchType(
687
+ token: LanguageToken,
688
+ value: string
689
+ ): 'exact' | 'normalized' | 'stem' | 'case-insensitive' | 'none' {
690
+ // Exact match (highest confidence)
691
+ if (token.value === value) return 'exact';
692
+
693
+ // Explicit keyword map normalized match (high confidence)
694
+ if (token.normalized === value) return 'normalized';
695
+
696
+ // Morphologically normalized stem match (medium-high confidence)
697
+ // Only accept if stem confidence is reasonable
698
+ if (token.stem === value && token.stemConfidence !== undefined && token.stemConfidence >= 0.7) {
699
+ return 'stem';
700
+ }
701
+
702
+ // Case-insensitive match for keywords (medium confidence)
703
+ if (token.kind === 'keyword' && token.value.toLowerCase() === value.toLowerCase()) {
704
+ return 'case-insensitive';
705
+ }
706
+
707
+ return 'none';
708
+ }
709
+
710
+ /**
711
+ * Track stem matches for confidence calculation.
712
+ * This is set during matching and read during confidence calculation.
713
+ */
714
+ private stemMatchCount: number = 0;
715
+ private totalKeywordMatches: number = 0;
716
+
717
+ // ==========================================================================
718
+ // Depth Limits for Expression Parsing (security hardening)
719
+ // ==========================================================================
720
+
721
+ /** Maximum depth for nested property access (e.g., a.b.c.d...) */
722
+ private static readonly MAX_PROPERTY_DEPTH = 10;
723
+
724
+ /** Maximum number of arguments in method calls */
725
+ private static readonly MAX_METHOD_ARGS = 20;
726
+
727
+ /**
728
+ * Convert a language token to a semantic value.
729
+ */
730
+ private tokenToSemanticValue(token: LanguageToken): SemanticValue | null {
731
+ switch (token.kind) {
732
+ case 'selector':
733
+ return createSelector(token.value);
734
+
735
+ case 'literal':
736
+ return this.parseLiteralValue(token.value);
737
+
738
+ case 'keyword':
739
+ // Keywords might be references or values
740
+ const lower = (token.normalized || token.value).toLowerCase();
741
+ if (['me', 'you', 'it', 'result', 'event', 'target', 'body'].includes(lower)) {
742
+ return createReference(lower as any);
743
+ }
744
+ return createLiteral(token.normalized || token.value);
745
+
746
+ case 'identifier':
747
+ // Check if it's a variable reference (:varname)
748
+ if (token.value.startsWith(':')) {
749
+ return createReference(token.value as any);
750
+ }
751
+ // Check if it's a built-in reference
752
+ const identLower = token.value.toLowerCase();
753
+ if (['me', 'you', 'it', 'result', 'event', 'target', 'body'].includes(identLower)) {
754
+ return createReference(identLower as any);
755
+ }
756
+ // Regular identifiers are variable references - use 'expression' type
757
+ // which gets converted to 'identifier' AST nodes by semantic-integration.ts
758
+ return { type: 'expression', raw: token.value } as const;
759
+
760
+ case 'url':
761
+ // URLs are treated as string literals (paths/URLs for navigation/fetch)
762
+ return createLiteral(token.value, 'string');
763
+
764
+ default:
765
+ return null;
766
+ }
767
+ }
768
+
769
+ /**
770
+ * Parse a literal value (string, number, boolean).
771
+ */
772
+ private parseLiteralValue(value: string): SemanticValue {
773
+ // String literal
774
+ if (
775
+ value.startsWith('"') ||
776
+ value.startsWith("'") ||
777
+ value.startsWith('`') ||
778
+ value.startsWith('「')
779
+ ) {
780
+ const inner = value.slice(1, -1);
781
+ return createLiteral(inner, 'string');
782
+ }
783
+
784
+ // Boolean
785
+ if (value === 'true') return createLiteral(true, 'boolean');
786
+ if (value === 'false') return createLiteral(false, 'boolean');
787
+
788
+ // Duration (number with suffix)
789
+ const durationMatch = value.match(/^(\d+(?:\.\d+)?)(ms|s|m|h)?$/);
790
+ if (durationMatch) {
791
+ const num = parseFloat(durationMatch[1]);
792
+ const unit = durationMatch[2];
793
+ if (unit) {
794
+ return createLiteral(value, 'duration');
795
+ }
796
+ return createLiteral(num, 'number');
797
+ }
798
+
799
+ // Plain number
800
+ const num = parseFloat(value);
801
+ if (!isNaN(num)) {
802
+ return createLiteral(num, 'number');
803
+ }
804
+
805
+ // Default to string
806
+ return createLiteral(value, 'string');
807
+ }
808
+
809
+ /**
810
+ * Apply extraction rules to fill in default values for missing roles.
811
+ */
812
+ private applyExtractionRules(
813
+ pattern: LanguagePattern,
814
+ captured: Map<SemanticRole, SemanticValue>
815
+ ): void {
816
+ for (const [role, rule] of Object.entries(pattern.extraction)) {
817
+ if (!captured.has(role as SemanticRole) && rule.default) {
818
+ captured.set(role as SemanticRole, rule.default);
819
+ }
820
+ }
821
+ }
822
+
823
+ /**
824
+ * Check if a pattern token is optional.
825
+ */
826
+ private isOptional(patternToken: PatternToken): boolean {
827
+ return (patternToken as any).optional === true;
828
+ }
829
+
830
+ /**
831
+ * Calculate confidence score for a match (0-1).
832
+ *
833
+ * Confidence is reduced for:
834
+ * - Stem matches (morphological normalization has inherent uncertainty)
835
+ * - Missing optional roles (but less penalty if role has a default value)
836
+ *
837
+ * Confidence is increased for:
838
+ * - VSO languages (Arabic) when pattern starts with a verb
839
+ */
840
+ private calculateConfidence(
841
+ pattern: LanguagePattern,
842
+ captured: Map<SemanticRole, SemanticValue>
843
+ ): number {
844
+ let score = 0;
845
+ let maxScore = 0;
846
+
847
+ // Helper to check if a role has a default value in extraction rules
848
+ const hasDefault = (role: SemanticRole): boolean => {
849
+ return pattern.extraction?.[role]?.default !== undefined;
850
+ };
851
+
852
+ // Score based on captured roles
853
+ for (const token of pattern.template.tokens) {
854
+ if (token.type === 'role') {
855
+ maxScore += 1;
856
+ if (captured.has(token.role)) {
857
+ score += 1;
858
+ }
859
+ } else if (token.type === 'group') {
860
+ // Group tokens are optional - weight depends on whether they have defaults
861
+ for (const subToken of token.tokens) {
862
+ if (subToken.type === 'role') {
863
+ const roleHasDefault = hasDefault(subToken.role);
864
+ const weight = 0.8; // Optional roles: 80% weight
865
+ maxScore += weight;
866
+
867
+ if (captured.has(subToken.role)) {
868
+ // Role was explicitly provided by user
869
+ score += weight;
870
+ } else if (roleHasDefault) {
871
+ // Role has a default - give 60% partial credit since command is semantically complete
872
+ // This prevents penalizing common patterns like "toggle .active" (default: me)
873
+ score += weight * 0.6;
874
+ }
875
+ // If no default and not captured, score += 0 (true penalty for missing info)
876
+ }
877
+ }
878
+ }
879
+ }
880
+
881
+ let baseConfidence = maxScore > 0 ? score / maxScore : 1;
882
+
883
+ // Apply penalty for stem matches
884
+ // Each stem match reduces confidence slightly (e.g., 5% per stem match)
885
+ // This ensures exact matches are preferred over morphological matches
886
+ if (this.stemMatchCount > 0 && this.totalKeywordMatches > 0) {
887
+ const stemPenalty = (this.stemMatchCount / this.totalKeywordMatches) * 0.15;
888
+ baseConfidence = Math.max(0.5, baseConfidence - stemPenalty);
889
+ }
890
+
891
+ // Apply VSO confidence boost for Arabic verb-first patterns
892
+ const vsoBoost = this.calculateVSOConfidenceBoost(pattern);
893
+ baseConfidence = Math.min(1.0, baseConfidence + vsoBoost);
894
+
895
+ // Apply preposition disambiguation adjustment for Arabic
896
+ const prepositionAdjustment = this.arabicPrepositionDisambiguation(pattern, captured);
897
+ baseConfidence = Math.max(0.0, Math.min(1.0, baseConfidence + prepositionAdjustment));
898
+
899
+ return baseConfidence;
900
+ }
901
+
902
+ /**
903
+ * Calculate confidence boost for VSO (Verb-Subject-Object) language patterns.
904
+ * Arabic naturally uses VSO word order, so patterns that start with a verb
905
+ * should receive a confidence boost.
906
+ *
907
+ * Returns +0.15 confidence boost if:
908
+ * - Language is Arabic ('ar')
909
+ * - Pattern's first token is a verb keyword
910
+ *
911
+ * @param pattern The language pattern being matched
912
+ * @returns Confidence boost (0 or 0.15)
913
+ */
914
+ private calculateVSOConfidenceBoost(pattern: LanguagePattern): number {
915
+ // Only apply to Arabic
916
+ if (pattern.language !== 'ar') {
917
+ return 0;
918
+ }
919
+
920
+ // Check if first token in pattern is a literal (keyword)
921
+ const firstToken = pattern.template.tokens[0];
922
+ if (!firstToken || firstToken.type !== 'literal') {
923
+ return 0;
924
+ }
925
+
926
+ // List of Arabic verb keywords (command verbs)
927
+ const ARABIC_VERBS = new Set([
928
+ 'بدل',
929
+ 'غير',
930
+ 'أضف',
931
+ 'أزل',
932
+ 'ضع',
933
+ 'اجعل',
934
+ 'عين',
935
+ 'زد',
936
+ 'انقص',
937
+ 'سجل',
938
+ 'أظهر',
939
+ 'أخف',
940
+ 'شغل',
941
+ 'أرسل',
942
+ 'ركز',
943
+ 'شوش',
944
+ 'توقف',
945
+ 'انسخ',
946
+ 'احذف',
947
+ 'اصنع',
948
+ 'انتظر',
949
+ 'انتقال',
950
+ 'أو',
951
+ ]);
952
+
953
+ // Check if first token value is a verb
954
+ if (ARABIC_VERBS.has(firstToken.value)) {
955
+ return 0.15;
956
+ }
957
+
958
+ // Check alternatives
959
+ if (firstToken.alternatives) {
960
+ for (const alt of firstToken.alternatives) {
961
+ if (ARABIC_VERBS.has(alt)) {
962
+ return 0.15;
963
+ }
964
+ }
965
+ }
966
+
967
+ return 0;
968
+ }
969
+
970
+ /**
971
+ * Arabic preposition disambiguation for confidence adjustment.
972
+ *
973
+ * Different Arabic prepositions are more or less natural for different semantic roles:
974
+ * - على (on/upon) is preferred for patient/target roles (element selectors)
975
+ * - إلى (to) is preferred for destination roles
976
+ * - من (from) is preferred for source roles
977
+ * - في (in) is preferred for location roles
978
+ *
979
+ * This method analyzes the prepositions used with captured semantic roles and
980
+ * adjusts confidence based on idiomaticity:
981
+ * - +0.10 for highly idiomatic preposition choices
982
+ * - -0.10 for less natural preposition choices
983
+ *
984
+ * @param pattern The language pattern being matched
985
+ * @param captured The captured semantic values
986
+ * @returns Confidence adjustment (-0.10 to +0.10)
987
+ */
988
+ private arabicPrepositionDisambiguation(
989
+ pattern: LanguagePattern,
990
+ captured: Map<SemanticRole, SemanticValue>
991
+ ): number {
992
+ // Only apply to Arabic
993
+ if (pattern.language !== 'ar') {
994
+ return 0;
995
+ }
996
+
997
+ let adjustment = 0;
998
+
999
+ // Preferred prepositions for each semantic role
1000
+ // Only including roles that commonly use prepositions in Arabic
1001
+ const PREFERRED_PREPOSITIONS: Partial<Record<SemanticRole, string[]>> = {
1002
+ patient: ['على'], // element selectors prefer على (on/upon)
1003
+ destination: ['إلى', 'الى'], // destination prefers إلى (to)
1004
+ source: ['من'], // source prefers من (from)
1005
+ agent: ['من'], // agent/by prefers من (from/by)
1006
+ manner: ['ب'], // manner prefers ب (with/by)
1007
+ style: ['ب'], // style prefers ب (with)
1008
+ goal: ['إلى', 'الى'], // target state prefers إلى (to)
1009
+ method: ['ب'], // method prefers ب (with/by)
1010
+ };
1011
+
1012
+ // Check each captured role for preposition metadata
1013
+ for (const [role, value] of captured.entries()) {
1014
+ // Skip if no preferred prepositions defined for this role
1015
+ const preferred = PREFERRED_PREPOSITIONS[role];
1016
+ if (!preferred || preferred.length === 0) {
1017
+ continue;
1018
+ }
1019
+
1020
+ // Check if the value has preposition metadata (from Arabic tokenizer)
1021
+ // This metadata is attached when a preposition particle token is consumed
1022
+ const metadata = (value as any).metadata;
1023
+ if (metadata && typeof metadata.prepositionValue === 'string') {
1024
+ const usedPreposition = metadata.prepositionValue;
1025
+
1026
+ // Check if the used preposition is in the preferred list
1027
+ if (preferred.includes(usedPreposition)) {
1028
+ // Idiomatic choice - boost confidence
1029
+ adjustment += 0.1;
1030
+ } else {
1031
+ // Less natural choice - reduce confidence
1032
+ adjustment -= 0.1;
1033
+ }
1034
+ }
1035
+ }
1036
+
1037
+ // Cap total adjustment at ±0.10 (even if multiple roles analyzed)
1038
+ return Math.max(-0.1, Math.min(0.1, adjustment));
1039
+ }
1040
+
1041
+ // ===========================================================================
1042
+ // English Idiom Support - Noise Word Handling
1043
+ // ===========================================================================
1044
+
1045
+ /**
1046
+ * Noise words that can be skipped in English for more natural syntax.
1047
+ * - "the" before selectors: "toggle the .active" → "toggle .active"
1048
+ * - "class" after class selectors: "add the .visible class" → "add .visible"
1049
+ */
1050
+ private static readonly ENGLISH_NOISE_WORDS = new Set(['the', 'a', 'an']);
1051
+
1052
+ /**
1053
+ * Skip noise words like "the" before selectors.
1054
+ * This enables more natural English syntax like "toggle the .active".
1055
+ */
1056
+ private skipNoiseWords(tokens: TokenStream): void {
1057
+ const token = tokens.peek();
1058
+ if (!token) return;
1059
+
1060
+ const tokenLower = token.value.toLowerCase();
1061
+
1062
+ // Check if current token is a noise word (like "the")
1063
+ if (PatternMatcher.ENGLISH_NOISE_WORDS.has(tokenLower)) {
1064
+ // Look ahead to see if the next token is a selector
1065
+ const mark = tokens.mark();
1066
+ tokens.advance();
1067
+ const nextToken = tokens.peek();
1068
+
1069
+ if (nextToken && nextToken.kind === 'selector') {
1070
+ // Keep the position after "the" - effectively skipping it
1071
+ return;
1072
+ }
1073
+
1074
+ // Not followed by a selector, revert
1075
+ tokens.reset(mark);
1076
+ }
1077
+
1078
+ // Also handle "class" after class selectors: ".visible class" → ".visible"
1079
+ // This is handled when the selector has already been consumed,
1080
+ // so we check if current token is "class" and skip it
1081
+ if (tokenLower === 'class') {
1082
+ // Skip "class" as it's just noise after a class selector
1083
+ tokens.advance();
1084
+ }
1085
+ }
1086
+
1087
+ /**
1088
+ * Extract event modifiers from the token stream.
1089
+ * Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
1090
+ * that can appear after event names.
1091
+ *
1092
+ * Returns EventModifiers object or undefined if no modifiers found.
1093
+ */
1094
+ extractEventModifiers(tokens: TokenStream): import('../types').EventModifiers | undefined {
1095
+ const modifiers: {
1096
+ once?: boolean;
1097
+ debounce?: number;
1098
+ throttle?: number;
1099
+ queue?: 'first' | 'last' | 'all' | 'none';
1100
+ from?: SemanticValue;
1101
+ } = {};
1102
+
1103
+ let foundModifier = false;
1104
+
1105
+ // Consume all consecutive event modifier tokens
1106
+ while (!tokens.isAtEnd()) {
1107
+ const token = tokens.peek();
1108
+ if (!token || token.kind !== 'event-modifier') {
1109
+ break;
1110
+ }
1111
+
1112
+ const metadata = token.metadata as
1113
+ | { modifierName: string; value?: number | string }
1114
+ | undefined;
1115
+ if (!metadata) {
1116
+ break;
1117
+ }
1118
+
1119
+ foundModifier = true;
1120
+
1121
+ switch (metadata.modifierName) {
1122
+ case 'once':
1123
+ modifiers.once = true;
1124
+ break;
1125
+ case 'debounce':
1126
+ if (typeof metadata.value === 'number') {
1127
+ modifiers.debounce = metadata.value;
1128
+ }
1129
+ break;
1130
+ case 'throttle':
1131
+ if (typeof metadata.value === 'number') {
1132
+ modifiers.throttle = metadata.value;
1133
+ }
1134
+ break;
1135
+ case 'queue':
1136
+ if (
1137
+ metadata.value === 'first' ||
1138
+ metadata.value === 'last' ||
1139
+ metadata.value === 'all' ||
1140
+ metadata.value === 'none'
1141
+ ) {
1142
+ modifiers.queue = metadata.value;
1143
+ }
1144
+ break;
1145
+ }
1146
+
1147
+ tokens.advance();
1148
+ }
1149
+
1150
+ return foundModifier ? modifiers : undefined;
1151
+ }
1152
+ }
1153
+
1154
+ // =============================================================================
1155
+ // Convenience Functions
1156
+ // =============================================================================
1157
+
1158
+ /**
1159
+ * Singleton pattern matcher instance.
1160
+ */
1161
+ export const patternMatcher = new PatternMatcher();
1162
+
1163
+ /**
1164
+ * Match tokens against a pattern.
1165
+ */
1166
+ export function matchPattern(
1167
+ tokens: TokenStream,
1168
+ pattern: LanguagePattern
1169
+ ): PatternMatchResult | null {
1170
+ return patternMatcher.matchPattern(tokens, pattern);
1171
+ }
1172
+
1173
+ /**
1174
+ * Match tokens against multiple patterns, return best match.
1175
+ */
1176
+ export function matchBest(
1177
+ tokens: TokenStream,
1178
+ patterns: LanguagePattern[]
1179
+ ): PatternMatchResult | null {
1180
+ return patternMatcher.matchBest(tokens, patterns);
1181
+ }