@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
@@ -0,0 +1,3426 @@
1
+ /**
2
+ * Grammar Types for Semantic Multilingual Parsing
3
+ *
4
+ * These types define the semantic role system used across all 13 supported languages.
5
+ * Originally from @lokascript/i18n, now consolidated here for package independence.
6
+ *
7
+ * Key Linguistic Concepts:
8
+ * - Word Order: SVO, SOV, VSO (and variations)
9
+ * - Adposition Type: Preposition (English) vs Postposition (Japanese/Korean)
10
+ * - Morphology: Isolating (Chinese) vs Agglutinative (Turkish) vs Fusional (Arabic)
11
+ * - Text Direction: LTR vs RTL
12
+ */
13
+ /**
14
+ * Semantic roles in hyperscript commands.
15
+ * These are universal across all 13 supported languages - only the surface form changes.
16
+ *
17
+ * ## Core Thematic Roles (from linguistic theory)
18
+ * | Role | Usage | Purpose | Example |
19
+ * |-------------|-------|-----------------------------|---------------------------|
20
+ * | action | 100% | Command verb | toggle, put, fetch |
21
+ * | patient | 90% | What is acted upon | .active, #count |
22
+ * | destination | 40% | Where something goes | into #output, to .class |
23
+ * | source | 13% | Where something comes from | from #input, from URL |
24
+ * | event | 106% | Trigger events | click, keydown, submit |
25
+ * | condition | 8% | Boolean expressions | if x > 5, when visible |
26
+ * | agent | 0% | Who performs action | Reserved for future use |
27
+ * | goal | 1% | Target value/state | to 'red' (in transition) |
28
+ *
29
+ * ## Quantitative Roles (answer "how much/long")
30
+ * | Role | Usage | Purpose | Example |
31
+ * |----------|-------|----------------|----------------------|
32
+ * | quantity | 7% | Numeric amount | by 5, 3 times |
33
+ * | duration | 1% | Time span | for 5 seconds, 500ms |
34
+ *
35
+ * ## Adverbial/Modifier Roles (answer "how/by what means")
36
+ * | Role | Usage | Purpose | Example |
37
+ * |--------------|-------|---------------------------|-------------------|
38
+ * | style | 2% | Animation/behavior | with fade |
39
+ * | manner | 2% | Insertion position | before, after |
40
+ * | method | 1% | HTTP method/technique | via POST, as GET |
41
+ * | responseType | 1% | Response format | as json, as html |
42
+ *
43
+ * ## Control Flow Roles
44
+ * | Role | Usage | Purpose | Example |
45
+ * |----------|-------|--------------|-----------------------|
46
+ * | loopType | 6% | Loop variant | forever, until, times |
47
+ *
48
+ * ## Design Notes
49
+ * - Low-usage roles (agent, goal, method, responseType) are intentionally kept for:
50
+ * - Linguistic completeness across all 13 languages
51
+ * - Future extensibility (AI agents, server-side execution)
52
+ * - Command-specific semantics (fetch, transition)
53
+ * - Each role has distinct grammatical markers per language (see profiles/index.ts)
54
+ * - Usage percentages based on pattern database analysis
55
+ */
56
+ type SemanticRole = 'action' | 'agent' | 'patient' | 'source' | 'destination' | 'goal' | 'event' | 'condition' | 'quantity' | 'duration' | 'responseType' | 'method' | 'style' | 'manner' | 'loopType' | 'continues';
57
+ /**
58
+ * Word order patterns
59
+ * These represent the major typological categories
60
+ */
61
+ type WordOrder$1 = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OVS' | 'OSV' | 'free';
62
+ /**
63
+ * Where grammatical markers appear relative to their noun/verb
64
+ */
65
+ type AdpositionType = 'preposition' | 'postposition' | 'circumposition' | 'none';
66
+ /**
67
+ * Morphological typology - how words are constructed
68
+ */
69
+ type MorphologyType = 'isolating' | 'agglutinative' | 'fusional' | 'polysynthetic';
70
+ /**
71
+ * A grammatical marker (particle, case ending, preposition)
72
+ */
73
+ interface GrammaticalMarker {
74
+ form: string;
75
+ role: SemanticRole;
76
+ position: AdpositionType;
77
+ required: boolean;
78
+ alternatives?: string[];
79
+ }
80
+
81
+ /**
82
+ * Semantic-First Multilingual Hyperscript Types
83
+ *
84
+ * This module defines the canonical semantic representation that all languages
85
+ * parse to and render from. The semantic layer is language-neutral - it captures
86
+ * the MEANING of hyperscript commands independent of surface syntax.
87
+ */
88
+
89
+ /**
90
+ * Canonical action names (English-based internally, but not visible to users)
91
+ * These map to hyperscript commands and are used in the semantic AST.
92
+ */
93
+ type ActionType = 'toggle' | 'add' | 'remove' | 'put' | 'append' | 'prepend' | 'take' | 'make' | 'clone' | 'swap' | 'morph' | 'set' | 'get' | 'increment' | 'decrement' | 'log' | 'show' | 'hide' | 'transition' | 'on' | 'trigger' | 'send' | 'focus' | 'blur' | 'go' | 'wait' | 'fetch' | 'settle' | 'measure' | 'install' | 'if' | 'unless' | 'else' | 'repeat' | 'for' | 'while' | 'continue' | 'halt' | 'throw' | 'call' | 'return' | 'js' | 'async' | 'tell' | 'default' | 'init' | 'behavior' | 'compound';
94
+ /**
95
+ * A semantic value represents a typed piece of data in a semantic node.
96
+ * Values are language-neutral - they capture what something IS, not how it's written.
97
+ */
98
+ type SemanticValue = LiteralValue | SelectorValue | ReferenceValue | PropertyPathValue | ExpressionValue;
99
+ interface LiteralValue {
100
+ readonly type: 'literal';
101
+ readonly value: string | number | boolean;
102
+ readonly dataType?: 'string' | 'number' | 'boolean' | 'duration';
103
+ }
104
+ interface SelectorValue {
105
+ readonly type: 'selector';
106
+ readonly value: string;
107
+ readonly selectorKind: 'id' | 'class' | 'attribute' | 'element' | 'complex';
108
+ }
109
+ interface ReferenceValue {
110
+ readonly type: 'reference';
111
+ readonly value: 'me' | 'you' | 'it' | 'result' | 'event' | 'target' | 'body';
112
+ }
113
+ interface PropertyPathValue {
114
+ readonly type: 'property-path';
115
+ readonly object: SemanticValue;
116
+ readonly property: string;
117
+ }
118
+ interface ExpressionValue {
119
+ readonly type: 'expression';
120
+ /** Raw expression string for complex expressions that need further parsing */
121
+ readonly raw: string;
122
+ }
123
+ /**
124
+ * Base interface for all semantic nodes.
125
+ * Semantic nodes capture the MEANING of hyperscript constructs.
126
+ */
127
+ interface SemanticNode {
128
+ readonly kind: 'command' | 'event-handler' | 'conditional' | 'compound' | 'loop';
129
+ readonly action: ActionType;
130
+ readonly roles: ReadonlyMap<SemanticRole, SemanticValue>;
131
+ readonly metadata?: SemanticMetadata;
132
+ }
133
+ /**
134
+ * Metadata about the source of a semantic node.
135
+ * Useful for debugging, error messages, and round-trip conversion.
136
+ */
137
+ interface SemanticMetadata {
138
+ readonly sourceLanguage?: string;
139
+ readonly sourceText?: string;
140
+ readonly sourcePosition?: SourcePosition;
141
+ readonly patternId?: string;
142
+ }
143
+ interface SourcePosition {
144
+ readonly start: number;
145
+ readonly end: number;
146
+ readonly line?: number;
147
+ readonly column?: number;
148
+ }
149
+ /**
150
+ * A command semantic node - represents a single hyperscript command.
151
+ */
152
+ interface CommandSemanticNode extends SemanticNode {
153
+ readonly kind: 'command';
154
+ }
155
+ /**
156
+ * An event handler semantic node - represents "on [event] [commands]".
157
+ */
158
+ interface EventHandlerSemanticNode extends SemanticNode {
159
+ readonly kind: 'event-handler';
160
+ readonly action: 'on';
161
+ readonly body: SemanticNode[];
162
+ readonly eventModifiers?: EventModifiers;
163
+ /**
164
+ * Event parameter names for destructuring.
165
+ * E.g., for "on click(clientX, clientY)", this would be ['clientX', 'clientY']
166
+ */
167
+ readonly parameterNames?: readonly string[];
168
+ }
169
+ interface EventModifiers {
170
+ readonly once?: boolean;
171
+ readonly debounce?: number;
172
+ readonly throttle?: number;
173
+ readonly queue?: 'first' | 'last' | 'all' | 'none';
174
+ readonly from?: SemanticValue;
175
+ }
176
+ /**
177
+ * A conditional semantic node - represents "if [condition] then [body] else [body]".
178
+ */
179
+ interface ConditionalSemanticNode extends SemanticNode {
180
+ readonly kind: 'conditional';
181
+ readonly action: 'if';
182
+ readonly thenBranch: SemanticNode[];
183
+ readonly elseBranch?: SemanticNode[];
184
+ }
185
+ /**
186
+ * A compound semantic node - represents multiple chained statements.
187
+ */
188
+ interface CompoundSemanticNode extends SemanticNode {
189
+ readonly kind: 'compound';
190
+ readonly statements: SemanticNode[];
191
+ readonly chainType: 'then' | 'and' | 'async';
192
+ }
193
+ /**
194
+ * Loop variant discriminant for different loop types.
195
+ */
196
+ type LoopVariant = 'forever' | 'times' | 'for' | 'while' | 'until';
197
+ /**
198
+ * A loop semantic node - represents repeat/for/while loops.
199
+ */
200
+ interface LoopSemanticNode extends SemanticNode {
201
+ readonly kind: 'loop';
202
+ readonly action: 'repeat' | 'for' | 'while';
203
+ /** The type of loop (forever, times, for, while, until) */
204
+ readonly loopVariant: LoopVariant;
205
+ /** Commands to execute in each iteration */
206
+ readonly body: SemanticNode[];
207
+ /** Loop variable name for 'for' loops (e.g., 'item' in 'for item in list') */
208
+ readonly loopVariable?: string;
209
+ /** Index variable name if specified (e.g., 'i' in 'for item with index i') */
210
+ readonly indexVariable?: string;
211
+ }
212
+ /**
213
+ * A pattern defines how a semantic structure appears in a specific language.
214
+ * Patterns enable bidirectional conversion: parse (natural → semantic) and
215
+ * render (semantic → natural).
216
+ */
217
+ interface LanguagePattern {
218
+ /** Unique identifier for this pattern */
219
+ readonly id: string;
220
+ /** ISO 639-1 language code */
221
+ readonly language: string;
222
+ /** Which command this pattern matches */
223
+ readonly command: ActionType;
224
+ /** Priority for disambiguation (higher = checked first) */
225
+ readonly priority: number;
226
+ /** The pattern template with role placeholders */
227
+ readonly template: PatternTemplate;
228
+ /** Rules for extracting semantic roles from matched tokens */
229
+ readonly extraction: ExtractionRules;
230
+ /** Optional constraints on when this pattern applies */
231
+ readonly constraints?: PatternConstraints;
232
+ }
233
+ /**
234
+ * A pattern template defines the expected token sequence.
235
+ *
236
+ * Template syntax:
237
+ * - Literal tokens: "toggle", "を", "على"
238
+ * - Role placeholders: {patient}, {target}, {destination}
239
+ * - Optional groups: [on {target}]
240
+ * - Alternatives in extraction (not in template string)
241
+ *
242
+ * Example templates:
243
+ * - English: "toggle {patient} [on {target}]"
244
+ * - Japanese: "{target} の {patient} を 切り替え"
245
+ * - Arabic: "بدّل {patient} [على {target}]"
246
+ */
247
+ interface PatternTemplate {
248
+ /** Human-readable template string */
249
+ readonly format: string;
250
+ /** Parsed token sequence for matching */
251
+ readonly tokens: PatternToken[];
252
+ }
253
+ type PatternToken = LiteralPatternToken | RolePatternToken | GroupPatternToken;
254
+ interface LiteralPatternToken {
255
+ readonly type: 'literal';
256
+ readonly value: string;
257
+ /** Alternative spellings/forms that also match */
258
+ readonly alternatives?: string[];
259
+ }
260
+ interface RolePatternToken {
261
+ readonly type: 'role';
262
+ readonly role: SemanticRole;
263
+ readonly optional?: boolean;
264
+ /** Expected value types (for validation) */
265
+ readonly expectedTypes?: Array<SemanticValue['type']>;
266
+ }
267
+ interface GroupPatternToken {
268
+ readonly type: 'group';
269
+ readonly tokens: PatternToken[];
270
+ readonly optional?: boolean;
271
+ }
272
+ /**
273
+ * Rules for extracting semantic values from matched tokens.
274
+ */
275
+ interface ExtractionRules {
276
+ readonly [role: string]: ExtractionRule;
277
+ }
278
+ interface ExtractionRule {
279
+ /** Position-based extraction (0-indexed from pattern start) */
280
+ readonly position?: number;
281
+ /** Marker-based extraction (find value after this marker) */
282
+ readonly marker?: string;
283
+ /** Alternative markers that also work */
284
+ readonly markerAlternatives?: string[];
285
+ /** Transform the extracted value */
286
+ readonly transform?: (raw: string) => SemanticValue;
287
+ /** Default value if not found (for optional roles) */
288
+ readonly default?: SemanticValue;
289
+ }
290
+ /**
291
+ * Additional constraints on pattern applicability.
292
+ */
293
+ interface PatternConstraints {
294
+ /** Required roles that must be present */
295
+ readonly requiredRoles?: SemanticRole[];
296
+ /** Roles that must NOT be present */
297
+ readonly forbiddenRoles?: SemanticRole[];
298
+ /** Valid selector types for the patient role */
299
+ readonly validPatientTypes?: Array<SelectorValue['selectorKind']>;
300
+ /** Pattern IDs this conflicts with */
301
+ readonly conflictsWith?: string[];
302
+ }
303
+ /**
304
+ * A token from language-specific tokenization.
305
+ */
306
+ interface LanguageToken {
307
+ readonly value: string;
308
+ readonly kind: TokenKind;
309
+ readonly position: SourcePosition;
310
+ /** Normalized form from explicit keyword map (e.g., 切り替え → toggle) */
311
+ readonly normalized?: string;
312
+ /** Morphologically normalized stem (e.g., 切り替えた → 切り替え) */
313
+ readonly stem?: string;
314
+ /** Confidence in the morphological stem (0.0-1.0) */
315
+ readonly stemConfidence?: number;
316
+ /** Additional metadata for specific token types (e.g., event modifier data) */
317
+ readonly metadata?: Record<string, unknown>;
318
+ }
319
+ type TokenKind = 'keyword' | 'selector' | 'literal' | 'particle' | 'conjunction' | 'event-modifier' | 'identifier' | 'operator' | 'punctuation' | 'url';
320
+ /**
321
+ * A stream of tokens with navigation capabilities.
322
+ */
323
+ interface TokenStream {
324
+ readonly tokens: readonly LanguageToken[];
325
+ readonly language: string;
326
+ /** Look at token at current position + offset without consuming */
327
+ peek(offset?: number): LanguageToken | null;
328
+ /** Consume and return current token, advance position */
329
+ advance(): LanguageToken;
330
+ /** Check if we've consumed all tokens */
331
+ isAtEnd(): boolean;
332
+ /** Save current position for backtracking */
333
+ mark(): StreamMark;
334
+ /** Restore to a saved position */
335
+ reset(mark: StreamMark): void;
336
+ /** Get current position */
337
+ position(): number;
338
+ }
339
+ interface StreamMark {
340
+ readonly position: number;
341
+ }
342
+ /**
343
+ * Result of successfully matching a pattern.
344
+ */
345
+ interface PatternMatchResult {
346
+ readonly pattern: LanguagePattern;
347
+ readonly captured: ReadonlyMap<SemanticRole, SemanticValue>;
348
+ readonly consumedTokens: number;
349
+ readonly confidence: number;
350
+ }
351
+ /**
352
+ * Error when pattern matching fails.
353
+ */
354
+ interface PatternMatchError {
355
+ readonly message: string;
356
+ readonly position: SourcePosition;
357
+ readonly expectedPatterns?: string[];
358
+ readonly partialMatch?: Partial<PatternMatchResult>;
359
+ }
360
+ /**
361
+ * Language-specific tokenizer interface.
362
+ * Each language implements its own tokenizer to handle:
363
+ * - Word boundaries (spaces for English, particles for Japanese)
364
+ * - Character sets (ASCII, CJK, Arabic, etc.)
365
+ * - Special markers (particles, prefixes, suffixes)
366
+ */
367
+ interface LanguageTokenizer {
368
+ readonly language: string;
369
+ readonly direction: 'ltr' | 'rtl';
370
+ /** Convert input string to token stream */
371
+ tokenize(input: string): TokenStream;
372
+ /** Classify a single token */
373
+ classifyToken(token: string): TokenKind;
374
+ }
375
+ /**
376
+ * Main parser interface - converts natural language to semantic nodes.
377
+ */
378
+ interface SemanticParser {
379
+ /** Parse input in specified language to semantic node */
380
+ parse(input: string, language: string): SemanticNode;
381
+ /** Check if input can be parsed in the specified language */
382
+ canParse(input: string, language: string): boolean;
383
+ /** Get all supported languages */
384
+ supportedLanguages(): string[];
385
+ }
386
+ /**
387
+ * Renderer interface - converts semantic nodes to natural language.
388
+ */
389
+ interface SemanticRenderer {
390
+ /** Render semantic node in specified language */
391
+ render(node: SemanticNode, language: string): string;
392
+ /** Render semantic node in explicit mode */
393
+ renderExplicit(node: SemanticNode): string;
394
+ /** Get all supported languages */
395
+ supportedLanguages(): string[];
396
+ }
397
+ /**
398
+ * Create a selector semantic value from a CSS selector string.
399
+ */
400
+ declare function createSelector(value: string): SelectorValue;
401
+ /**
402
+ * Create a literal semantic value.
403
+ */
404
+ declare function createLiteral(value: string | number | boolean, dataType?: LiteralValue['dataType']): LiteralValue;
405
+ /**
406
+ * Create a reference semantic value.
407
+ */
408
+ declare function createReference(value: ReferenceValue['value']): ReferenceValue;
409
+ /**
410
+ * Create a property path semantic value.
411
+ */
412
+ declare function createPropertyPath(object: SemanticValue, property: string): PropertyPathValue;
413
+ /**
414
+ * Create a semantic node with the given action and roles.
415
+ */
416
+ declare function createCommandNode(action: ActionType, roles: Record<string, SemanticValue>, metadata?: SemanticMetadata): CommandSemanticNode;
417
+ /**
418
+ * Create an event handler semantic node.
419
+ */
420
+ declare function createEventHandler(event: SemanticValue, body: SemanticNode[], modifiers?: EventModifiers, metadata?: SemanticMetadata, parameterNames?: string[]): EventHandlerSemanticNode;
421
+ /**
422
+ * Create a compound semantic node (for chained statements).
423
+ */
424
+ declare function createCompoundNode(statements: SemanticNode[], chainType?: 'then' | 'and' | 'async', metadata?: SemanticMetadata): CompoundSemanticNode;
425
+ /**
426
+ * Create a conditional semantic node (if/else).
427
+ */
428
+ declare function createConditionalNode(condition: SemanticValue, thenBranch: SemanticNode[], elseBranch?: SemanticNode[], metadata?: SemanticMetadata): ConditionalSemanticNode;
429
+ /**
430
+ * Create a loop semantic node.
431
+ */
432
+ declare function createLoopNode(action: 'repeat' | 'for' | 'while', loopVariant: LoopVariant, roles: Record<string, SemanticValue>, body: SemanticNode[], options?: {
433
+ loopVariable?: string;
434
+ indexVariable?: string;
435
+ metadata?: SemanticMetadata;
436
+ }): LoopSemanticNode;
437
+ /**
438
+ * Argument with semantic role attached.
439
+ */
440
+ type SemanticArgument = SemanticValue & {
441
+ role?: SemanticRole;
442
+ };
443
+ /**
444
+ * Result of semantic parsing (used by command validator).
445
+ */
446
+ interface SemanticParseResult {
447
+ /** The action/command type */
448
+ readonly action: ActionType;
449
+ /** Confidence score (0-1) */
450
+ readonly confidence: number;
451
+ /** Source language code */
452
+ readonly language: string;
453
+ /** Parsed arguments with roles */
454
+ readonly arguments: SemanticArgument[];
455
+ }
456
+
457
+ /**
458
+ * Morphological Normalizer Types
459
+ *
460
+ * Defines interfaces for language-specific morphological analysis.
461
+ * Normalizers reduce conjugated/inflected forms to canonical stems
462
+ * that can be matched against keyword dictionaries.
463
+ */
464
+ /**
465
+ * Result of morphological normalization.
466
+ */
467
+ interface NormalizationResult {
468
+ /** The extracted stem/root form */
469
+ readonly stem: string;
470
+ /** Confidence in the normalization (0.0-1.0) */
471
+ readonly confidence: number;
472
+ /** Optional metadata about the transformation */
473
+ readonly metadata?: NormalizationMetadata;
474
+ }
475
+ /**
476
+ * Metadata about morphological transformations applied.
477
+ */
478
+ interface NormalizationMetadata {
479
+ /** Prefixes that were removed */
480
+ readonly removedPrefixes?: readonly string[];
481
+ /** Suffixes that were removed */
482
+ readonly removedSuffixes?: readonly string[];
483
+ /** Type of conjugation detected */
484
+ readonly conjugationType?: ConjugationType;
485
+ /** Original form classification */
486
+ readonly originalForm?: string;
487
+ /** Applied transformation rules (for debugging) */
488
+ readonly appliedRules?: readonly string[];
489
+ }
490
+ /**
491
+ * Types of verb conjugation/inflection.
492
+ */
493
+ type ConjugationType = 'present' | 'past' | 'future' | 'progressive' | 'perfect' | 'imperative' | 'subjunctive' | 'conditional' | 'passive' | 'causative' | 'polite' | 'humble' | 'honorific' | 'negative' | 'potential' | 'volitional' | 'conditional-tara' | 'conditional-to' | 'conditional-ba' | 'connective' | 'conditional-myeon' | 'temporal-ttae' | 'causal-nikka' | 'honorific-conditional' | 'honorific-temporal' | 'honorific-causal' | 'honorific-past' | 'honorific-polite' | 'sequential-after' | 'sequential-before' | 'immediate' | 'obligation' | 'reflexive' | 'reflexive-imperative' | 'gerund' | 'participle' | 'conditional-idha' | 'temporal-indama' | 'temporal-hina' | 'temporal-lamma' | 'past-verb' | 'conditional-se' | 'temporal-ince' | 'temporal-dikce' | 'aorist' | 'optative' | 'necessitative' | 'request' | 'casual-request' | 'contracted' | 'contracted-past' | 'compound' | 'te-form' | 'dictionary';
494
+ /**
495
+ * Interface for language-specific morphological normalizers.
496
+ *
497
+ * Normalizers attempt to reduce inflected word forms to their
498
+ * canonical stems. This enables matching conjugated verbs against
499
+ * keyword dictionaries that only contain base forms.
500
+ *
501
+ * Example (Japanese):
502
+ * 切り替えた (past) → { stem: '切り替え', confidence: 0.85 }
503
+ * 切り替えます (polite) → { stem: '切り替え', confidence: 0.85 }
504
+ *
505
+ * Example (Spanish):
506
+ * mostrarse (reflexive infinitive) → { stem: 'mostrar', confidence: 0.85 }
507
+ * alternando (gerund) → { stem: 'alternar', confidence: 0.85 }
508
+ */
509
+ interface MorphologicalNormalizer {
510
+ /** Language code this normalizer handles */
511
+ readonly language: string;
512
+ /**
513
+ * Normalize a word to its canonical stem form.
514
+ *
515
+ * @param word - The word to normalize
516
+ * @returns Normalization result with stem and confidence
517
+ */
518
+ normalize(word: string): NormalizationResult;
519
+ /**
520
+ * Check if a word appears to be a verb form that can be normalized.
521
+ * Optional optimization to skip normalization for non-verb tokens.
522
+ *
523
+ * @param word - The word to check
524
+ * @returns true if the word might be a normalizable verb form
525
+ */
526
+ isNormalizable?(word: string): boolean;
527
+ }
528
+
529
+ /**
530
+ * Base Tokenizer
531
+ *
532
+ * Provides the TokenStream implementation and shared tokenization utilities.
533
+ * Language-specific tokenizers extend these base utilities.
534
+ */
535
+
536
+ /**
537
+ * Configuration for a native language time unit pattern.
538
+ * Used by tryNumberWithTimeUnits() to match language-specific time units.
539
+ */
540
+ interface TimeUnitMapping {
541
+ /** The pattern to match (e.g., 'segundos', 'ミリ秒') */
542
+ readonly pattern: string;
543
+ /** The standard suffix to use (ms, s, m, h) */
544
+ readonly suffix: string;
545
+ /** Length of the pattern (for optimization) */
546
+ readonly length: number;
547
+ /** Whether to check for word boundary after the pattern */
548
+ readonly checkBoundary?: boolean;
549
+ /** Character that cannot follow the pattern (e.g., 's' for 'm' to avoid 'ms') */
550
+ readonly notFollowedBy?: string;
551
+ /** Whether to do case-insensitive matching */
552
+ readonly caseInsensitive?: boolean;
553
+ }
554
+ /**
555
+ * Concrete implementation of TokenStream.
556
+ */
557
+ declare class TokenStreamImpl implements TokenStream {
558
+ readonly tokens: readonly LanguageToken[];
559
+ readonly language: string;
560
+ private pos;
561
+ constructor(tokens: LanguageToken[], language: string);
562
+ peek(offset?: number): LanguageToken | null;
563
+ advance(): LanguageToken;
564
+ isAtEnd(): boolean;
565
+ mark(): StreamMark;
566
+ reset(mark: StreamMark): void;
567
+ position(): number;
568
+ /**
569
+ * Get remaining tokens as an array.
570
+ */
571
+ remaining(): LanguageToken[];
572
+ /**
573
+ * Consume tokens while predicate is true.
574
+ */
575
+ takeWhile(predicate: (token: LanguageToken) => boolean): LanguageToken[];
576
+ /**
577
+ * Skip tokens while predicate is true.
578
+ */
579
+ skipWhile(predicate: (token: LanguageToken) => boolean): void;
580
+ }
581
+ /**
582
+ * Keyword entry for tokenizer - maps native word to normalized English form.
583
+ */
584
+ interface KeywordEntry {
585
+ readonly native: string;
586
+ readonly normalized: string;
587
+ }
588
+ /**
589
+ * Profile interface for keyword derivation.
590
+ * Matches the structure of LanguageProfile but only includes fields needed for tokenization.
591
+ */
592
+ interface TokenizerProfile {
593
+ readonly keywords?: Record<string, {
594
+ primary: string;
595
+ alternatives?: string[];
596
+ normalized?: string;
597
+ }>;
598
+ readonly references?: Record<string, string>;
599
+ readonly roleMarkers?: Record<string, {
600
+ primary: string;
601
+ alternatives?: string[];
602
+ position?: string;
603
+ }>;
604
+ }
605
+ /**
606
+ * Abstract base class for language-specific tokenizers.
607
+ * Provides common functionality for CSS selectors, strings, and numbers.
608
+ */
609
+ declare abstract class BaseTokenizer implements LanguageTokenizer {
610
+ abstract readonly language: string;
611
+ abstract readonly direction: 'ltr' | 'rtl';
612
+ /** Optional morphological normalizer for this language */
613
+ protected normalizer?: MorphologicalNormalizer;
614
+ /** Keywords derived from profile, sorted longest-first for greedy matching */
615
+ protected profileKeywords: KeywordEntry[];
616
+ /** Map for O(1) keyword lookups by lowercase native word */
617
+ protected profileKeywordMap: Map<string, KeywordEntry>;
618
+ abstract tokenize(input: string): TokenStream;
619
+ abstract classifyToken(token: string): TokenKind;
620
+ /**
621
+ * Initialize keyword mappings from a language profile.
622
+ * Builds a list of native→english mappings from:
623
+ * - profile.keywords (primary + alternatives)
624
+ * - profile.references (me, it, you, etc.)
625
+ * - profile.roleMarkers (into, from, with, etc.)
626
+ *
627
+ * Results are sorted longest-first for greedy matching (important for non-space languages).
628
+ * Extras take precedence over profile entries when there are duplicates.
629
+ *
630
+ * @param profile - Language profile containing keyword translations
631
+ * @param extras - Additional keyword entries to include (literals, positional, events)
632
+ */
633
+ protected initializeKeywordsFromProfile(profile: TokenizerProfile, extras?: KeywordEntry[]): void;
634
+ /**
635
+ * Remove diacritical marks from a word for normalization.
636
+ * Primarily for Arabic (shadda, fatha, kasra, damma, sukun, etc.)
637
+ * but could be extended for other languages.
638
+ *
639
+ * @param word - Word to normalize
640
+ * @returns Word without diacritics
641
+ */
642
+ protected removeDiacritics(word: string): string;
643
+ /**
644
+ * Try to match a keyword from profile at the current position.
645
+ * Uses longest-first greedy matching (important for non-space languages).
646
+ *
647
+ * @param input - Input string
648
+ * @param pos - Current position
649
+ * @returns Token if matched, null otherwise
650
+ */
651
+ protected tryProfileKeyword(input: string, pos: number): LanguageToken | null;
652
+ /**
653
+ * Check if the remaining input starts with any known keyword.
654
+ * Useful for non-space languages to detect word boundaries.
655
+ *
656
+ * @param input - Input string
657
+ * @param pos - Current position
658
+ * @returns true if a keyword starts at this position
659
+ */
660
+ protected isKeywordStart(input: string, pos: number): boolean;
661
+ /**
662
+ * Look up a keyword by native word (case-insensitive).
663
+ * O(1) lookup using the keyword map.
664
+ *
665
+ * @param native - Native word to look up
666
+ * @returns KeywordEntry if found, undefined otherwise
667
+ */
668
+ protected lookupKeyword(native: string): KeywordEntry | undefined;
669
+ /**
670
+ * Check if a word is a known keyword (case-insensitive).
671
+ * O(1) lookup using the keyword map.
672
+ *
673
+ * @param native - Native word to check
674
+ * @returns true if the word is a keyword
675
+ */
676
+ protected isKeyword(native: string): boolean;
677
+ /**
678
+ * Set the morphological normalizer for this tokenizer.
679
+ */
680
+ setNormalizer(normalizer: MorphologicalNormalizer): void;
681
+ /**
682
+ * Try to normalize a word using the morphological normalizer.
683
+ * Returns null if no normalizer is set or normalization fails.
684
+ *
685
+ * Note: We don't check isNormalizable() here because the individual tokenizers
686
+ * historically called normalize() directly without that check. The normalize()
687
+ * method itself handles returning noChange() for words that can't be normalized.
688
+ */
689
+ protected tryNormalize(word: string): NormalizationResult | null;
690
+ /**
691
+ * Try morphological normalization and keyword lookup.
692
+ *
693
+ * If the word can be normalized to a stem that matches a known keyword,
694
+ * returns a keyword token with morphological metadata (stem, stemConfidence).
695
+ *
696
+ * This is the common pattern for handling conjugated verbs across languages:
697
+ * 1. Normalize the word (e.g., "toggled" → "toggle")
698
+ * 2. Look up the stem in the keyword map
699
+ * 3. Create a token with both the original form and stem metadata
700
+ *
701
+ * @param word - The word to normalize and look up
702
+ * @param startPos - Start position for the token
703
+ * @param endPos - End position for the token
704
+ * @returns Token if stem matches a keyword, null otherwise
705
+ */
706
+ protected tryMorphKeywordMatch(word: string, startPos: number, endPos: number): LanguageToken | null;
707
+ /**
708
+ * Try to extract a CSS selector at the current position.
709
+ */
710
+ protected trySelector(input: string, pos: number): LanguageToken | null;
711
+ /**
712
+ * Try to extract an event modifier at the current position.
713
+ * Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
714
+ */
715
+ protected tryEventModifier(input: string, pos: number): LanguageToken | null;
716
+ /**
717
+ * Try to extract a string literal at the current position.
718
+ */
719
+ protected tryString(input: string, pos: number): LanguageToken | null;
720
+ /**
721
+ * Try to extract a number at the current position.
722
+ */
723
+ protected tryNumber(input: string, pos: number): LanguageToken | null;
724
+ /**
725
+ * Configuration for native language time units.
726
+ * Maps patterns to their standard suffix (ms, s, m, h).
727
+ */
728
+ protected static readonly STANDARD_TIME_UNITS: readonly TimeUnitMapping[];
729
+ /**
730
+ * Try to match a time unit from a list of patterns.
731
+ *
732
+ * @param input - Input string
733
+ * @param pos - Position after the number
734
+ * @param timeUnits - Array of time unit mappings (native pattern → standard suffix)
735
+ * @param skipWhitespace - Whether to skip whitespace before time unit (default: false)
736
+ * @returns Object with matched suffix and new position, or null if no match
737
+ */
738
+ protected tryMatchTimeUnit(input: string, pos: number, timeUnits: readonly TimeUnitMapping[], skipWhitespace?: boolean): {
739
+ suffix: string;
740
+ endPos: number;
741
+ } | null;
742
+ /**
743
+ * Parse a base number (sign, integer, decimal) without time units.
744
+ * Returns the number string and end position.
745
+ *
746
+ * @param input - Input string
747
+ * @param startPos - Start position
748
+ * @param allowSign - Whether to allow +/- sign (default: true)
749
+ * @returns Object with number string and end position, or null
750
+ */
751
+ protected parseBaseNumber(input: string, startPos: number, allowSign?: boolean): {
752
+ number: string;
753
+ endPos: number;
754
+ } | null;
755
+ /**
756
+ * Try to extract a number with native language time units.
757
+ *
758
+ * This is a template method that handles the common pattern:
759
+ * 1. Parse the base number (sign, integer, decimal)
760
+ * 2. Try to match native language time units
761
+ * 3. Fall back to standard time units (ms, s, m, h)
762
+ *
763
+ * @param input - Input string
764
+ * @param pos - Start position
765
+ * @param nativeTimeUnits - Language-specific time unit mappings
766
+ * @param options - Configuration options
767
+ * @returns Token if number found, null otherwise
768
+ */
769
+ protected tryNumberWithTimeUnits(input: string, pos: number, nativeTimeUnits: readonly TimeUnitMapping[], options?: {
770
+ allowSign?: boolean;
771
+ skipWhitespace?: boolean;
772
+ }): LanguageToken | null;
773
+ /**
774
+ * Try to extract a URL at the current position.
775
+ * Handles /path, ./path, ../path, //domain.com, http://, https://
776
+ */
777
+ protected tryUrl(input: string, pos: number): LanguageToken | null;
778
+ /**
779
+ * Try to extract a variable reference (:varname) at the current position.
780
+ * In hyperscript, :x refers to a local variable named x.
781
+ */
782
+ protected tryVariableRef(input: string, pos: number): LanguageToken | null;
783
+ /**
784
+ * Try to extract an operator or punctuation token at the current position.
785
+ * Handles two-character operators (==, !=, etc.) and single-character operators.
786
+ */
787
+ protected tryOperator(input: string, pos: number): LanguageToken | null;
788
+ /**
789
+ * Try to match a multi-character particle from a list.
790
+ *
791
+ * Used by languages like Japanese, Korean, and Chinese that have
792
+ * multi-character particles (e.g., Japanese から, まで, より).
793
+ *
794
+ * @param input - Input string
795
+ * @param pos - Current position
796
+ * @param particles - Array of multi-character particles to match
797
+ * @returns Token if matched, null otherwise
798
+ */
799
+ protected tryMultiCharParticle(input: string, pos: number, particles: readonly string[]): LanguageToken | null;
800
+ }
801
+
802
+ /**
803
+ * Arabic Tokenizer
804
+ *
805
+ * Tokenizes Arabic hyperscript input.
806
+ * Arabic is challenging because:
807
+ * - Right-to-left (RTL) text direction
808
+ * - Prefix prepositions that attach to words (بـ, لـ, كـ)
809
+ * - Root-pattern morphology
810
+ * - CSS selectors are LTR islands within RTL text
811
+ */
812
+
813
+ declare class ArabicTokenizer extends BaseTokenizer {
814
+ readonly language = "ar";
815
+ readonly direction: "rtl";
816
+ constructor();
817
+ tokenize(input: string): TokenStream;
818
+ classifyToken(token: string): TokenKind;
819
+ /**
820
+ * Try to match an Arabic preposition.
821
+ * Attaches prepositionValue metadata for disambiguation in pattern matching.
822
+ */
823
+ private tryPreposition;
824
+ /**
825
+ * Try to extract a proclitic (conjunction or preposition) that's attached to the following word.
826
+ *
827
+ * Arabic proclitics attach directly to words without space:
828
+ * - والنقر → و + النقر (and + the-click)
829
+ * - فالتبديل → ف + التبديل (then + the-toggle)
830
+ * - بالنقر → ب + النقر (with + the-click)
831
+ * - ولالنقر → و + ل + النقر (and + to + the-click)
832
+ *
833
+ * This enables:
834
+ * - Polysyndetic coordination: A وB وC
835
+ * - Attached prepositions: بالنقر (with-the-click)
836
+ * - Multi-proclitic sequences: ولالنقر (and-to-the-click)
837
+ *
838
+ * Returns null if:
839
+ * - Not a proclitic character/sequence
840
+ * - Proclitic is standalone (followed by space)
841
+ * - Remaining word is too short (< 2 chars, to avoid false positives)
842
+ * - Full word is a recognized keyword (e.g., بدل should NOT be split to ب + دل)
843
+ *
844
+ * @see NATIVE_REVIEW_NEEDED.md for implementation rationale
845
+ */
846
+ private tryProclitic;
847
+ /**
848
+ * Extract an Arabic word.
849
+ * Uses morphological normalization to handle prefix/suffix variations.
850
+ * Attaches metadata for temporal markers (formality, confidence).
851
+ */
852
+ private extractArabicWord;
853
+ /**
854
+ * Extract an ASCII word.
855
+ */
856
+ private extractAsciiWord;
857
+ /**
858
+ * Extract a number, including Arabic time unit suffixes.
859
+ * Arabic allows space between number and unit.
860
+ */
861
+ private extractArabicNumber;
862
+ }
863
+ /**
864
+ * Singleton instance.
865
+ */
866
+ declare const arabicTokenizer: ArabicTokenizer;
867
+
868
+ /**
869
+ * Language Profile Types
870
+ *
871
+ * Type definitions for language profiles, separated for tree-shaking.
872
+ */
873
+
874
+ /**
875
+ * Word order in a language (for declarative statements).
876
+ */
877
+ type WordOrder = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OSV' | 'OVS';
878
+ /**
879
+ * How grammatical relationships are marked.
880
+ */
881
+ type MarkingStrategy$1 = 'preposition' | 'postposition' | 'particle' | 'case-suffix';
882
+ /**
883
+ * A grammatical marker (preposition, particle, etc.) for a semantic role.
884
+ */
885
+ interface RoleMarker {
886
+ /** Primary marker for this role */
887
+ readonly primary: string;
888
+ /** Alternative markers that also work */
889
+ readonly alternatives?: string[];
890
+ /** Position relative to the role value */
891
+ readonly position: 'before' | 'after';
892
+ }
893
+ /**
894
+ * Verb form configuration for a language.
895
+ */
896
+ interface VerbConfig$1 {
897
+ /** Position of verb in the sentence */
898
+ readonly position: 'start' | 'end' | 'second';
899
+ /** Common verb suffixes/conjugations to recognize */
900
+ readonly suffixes?: string[];
901
+ /** Whether the language commonly drops subjects */
902
+ readonly subjectDrop?: boolean;
903
+ }
904
+ /**
905
+ * Configuration for possessive expression construction.
906
+ * Defines how "X's property" is expressed in a language.
907
+ */
908
+ interface PossessiveConfig {
909
+ /** Possessive marker (e.g., "'s" in English, "の" in Japanese) */
910
+ readonly marker: string;
911
+ /** Position of marker: 'after-object' (X's Y), 'between' (X の Y), 'before-property' */
912
+ readonly markerPosition: 'after-object' | 'between' | 'before-property';
913
+ /** Special possessive forms (e.g., 'me' → 'my' in English) */
914
+ readonly specialForms?: Record<string, string>;
915
+ /** Whether to use possessive adjectives instead of marker (e.g., Spanish mi/tu/su) */
916
+ readonly usePossessiveAdjectives?: boolean;
917
+ /**
918
+ * Possessive keywords mapped to their corresponding reference.
919
+ * Used by pattern-matcher to recognize possessive expressions.
920
+ * Example: { my: 'me', your: 'you', its: 'it' }
921
+ */
922
+ readonly keywords?: Record<string, string>;
923
+ }
924
+ /**
925
+ * Complete language profile for pattern generation.
926
+ */
927
+ interface LanguageProfile {
928
+ /** ISO 639-1 language code */
929
+ readonly code: string;
930
+ /** Human-readable language name */
931
+ readonly name: string;
932
+ /** Native name */
933
+ readonly nativeName: string;
934
+ /** Text direction */
935
+ readonly direction: 'ltr' | 'rtl';
936
+ /** Primary word order */
937
+ readonly wordOrder: WordOrder;
938
+ /** How this language marks grammatical roles */
939
+ readonly markingStrategy: MarkingStrategy$1;
940
+ /** Markers for each semantic role */
941
+ readonly roleMarkers: Partial<Record<SemanticRole, RoleMarker>>;
942
+ /** Verb configuration */
943
+ readonly verb: VerbConfig$1;
944
+ /** Command keyword translations */
945
+ readonly keywords: Record<string, KeywordTranslation$1>;
946
+ /** Whether the language uses spaces between words */
947
+ readonly usesSpaces: boolean;
948
+ /** Special tokenization notes */
949
+ readonly tokenization?: TokenizationConfig$1;
950
+ /** Reference translations (me, it, you, etc.) */
951
+ readonly references?: Record<string, string>;
952
+ /** Possessive expression configuration */
953
+ readonly possessive?: PossessiveConfig;
954
+ /** Event handler pattern configuration (for simple SVO languages) */
955
+ readonly eventHandler?: EventHandlerConfig;
956
+ /**
957
+ * Default verb form for command keywords. Defaults to 'infinitive'.
958
+ *
959
+ * Based on software UI localization research:
960
+ * - 'infinitive': Spanish, French, German, Portuguese, Russian (industry standard)
961
+ * - 'imperative': Polish
962
+ * - 'base': English, Japanese, Korean (no distinction or same form)
963
+ *
964
+ * Individual keywords can override this via KeywordTranslation.form
965
+ */
966
+ readonly defaultVerbForm?: VerbForm;
967
+ }
968
+ /**
969
+ * Configuration for event handler pattern generation.
970
+ * Used by simple SVO languages that don't need hand-crafted patterns.
971
+ */
972
+ interface EventHandlerConfig {
973
+ /** Primary event keyword (e.g., 'on', 'bei', 'sur') */
974
+ readonly keyword: KeywordTranslation$1;
975
+ /** Source filter marker (e.g., 'from', 'von', 'de') */
976
+ readonly sourceMarker: RoleMarker;
977
+ /** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
978
+ readonly conditionalKeyword?: KeywordTranslation$1;
979
+ }
980
+ /**
981
+ * Verb form used for command keywords.
982
+ *
983
+ * Based on software localization research:
984
+ * - 'infinitive': Standard for most languages (Spanish, French, German, Russian)
985
+ * Example: "Guardar", "Enregistrer", "Speichern"
986
+ * - 'imperative': Used by some languages (Polish)
987
+ * Example: "Zapisz", "Otwórz"
988
+ * - 'base': For languages where forms are identical (English, Japanese, Korean)
989
+ * or where the distinction doesn't apply
990
+ */
991
+ type VerbForm = 'infinitive' | 'imperative' | 'base';
992
+ /**
993
+ * Translation of a command keyword.
994
+ */
995
+ interface KeywordTranslation$1 {
996
+ /** Primary translation (used for output/rendering) */
997
+ readonly primary: string;
998
+ /** Alternative forms for parsing (conjugations, synonyms, informal variants) */
999
+ readonly alternatives?: string[];
1000
+ /** Normalized English form for internal matching */
1001
+ readonly normalized?: string;
1002
+ /**
1003
+ * The grammatical form of 'primary'. Defaults to 'infinitive'.
1004
+ * This documents the form used and enables future form-switching features.
1005
+ * - 'infinitive': Dictionary form (alternar, basculer) - industry standard
1006
+ * - 'imperative': Command form (alterna, bascule) - for Polish, etc.
1007
+ * - 'base': Same form for both (toggle, トグル) - English, Japanese, Korean
1008
+ */
1009
+ readonly form?: VerbForm;
1010
+ }
1011
+ /**
1012
+ * Special tokenization configuration.
1013
+ */
1014
+ interface TokenizationConfig$1 {
1015
+ /** Particles to recognize (for particle languages) */
1016
+ readonly particles?: string[];
1017
+ /** Prefixes to recognize (for prefixing languages) */
1018
+ readonly prefixes?: string[];
1019
+ /** Word boundary detection strategy */
1020
+ readonly boundaryStrategy?: 'space' | 'particle' | 'character';
1021
+ }
1022
+
1023
+ /**
1024
+ * Arabic Language Profile
1025
+ *
1026
+ * VSO word order, prepositions, RTL (right-to-left), space-separated.
1027
+ * Features root-based morphology and rich verb conjugation.
1028
+ */
1029
+
1030
+ declare const arabicProfile: LanguageProfile;
1031
+
1032
+ /**
1033
+ * German Language Profile
1034
+ *
1035
+ * SVO word order (V2 in main clauses), prepositions, space-separated.
1036
+ * Features case system, compound words, and verb-second word order in main clauses.
1037
+ */
1038
+
1039
+ declare const germanProfile: LanguageProfile;
1040
+
1041
+ /**
1042
+ * English Tokenizer
1043
+ *
1044
+ * Tokenizes English hyperscript input.
1045
+ * English uses space-separated words with prepositions.
1046
+ */
1047
+
1048
+ declare class EnglishTokenizer extends BaseTokenizer {
1049
+ readonly language = "en";
1050
+ readonly direction: "ltr";
1051
+ constructor();
1052
+ tokenize(input: string): TokenStream;
1053
+ classifyToken(token: string): TokenKind;
1054
+ /**
1055
+ * Extract a word (identifier or keyword) from the input.
1056
+ * Handles namespaced event names like "draggable:start".
1057
+ */
1058
+ private extractWord;
1059
+ /**
1060
+ * Try to convert an identifier followed by "class" to a class selector.
1061
+ * E.g., "active class" → ".active"
1062
+ *
1063
+ * This enables natural English syntax like:
1064
+ * - "toggle the active class" → "toggle .active"
1065
+ * - "add the visible class" → "add .visible"
1066
+ */
1067
+ private tryConvertToClassSelector;
1068
+ }
1069
+ /**
1070
+ * Singleton instance.
1071
+ */
1072
+ declare const englishTokenizer: EnglishTokenizer;
1073
+
1074
+ /**
1075
+ * English Language Profile
1076
+ *
1077
+ * SVO word order, prepositions, space-separated.
1078
+ */
1079
+
1080
+ declare const englishProfile: LanguageProfile;
1081
+
1082
+ /**
1083
+ * Spanish Tokenizer
1084
+ *
1085
+ * Tokenizes Spanish hyperscript input.
1086
+ * Spanish is relatively straightforward as it:
1087
+ * - Uses space-separated words like English
1088
+ * - Has similar preposition structure (SVO)
1089
+ * - Uses accent marks that need proper handling
1090
+ */
1091
+
1092
+ declare class SpanishTokenizer extends BaseTokenizer {
1093
+ readonly language = "es";
1094
+ readonly direction: "ltr";
1095
+ constructor();
1096
+ tokenize(input: string): TokenStream;
1097
+ classifyToken(token: string): TokenKind;
1098
+ /**
1099
+ * Try to match multi-word phrases that function as single units.
1100
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
1101
+ * so they'll be matched before their constituent words.
1102
+ */
1103
+ private tryMultiWordPhrase;
1104
+ /**
1105
+ * Extract a Spanish word.
1106
+ *
1107
+ * Uses morphological normalization to handle:
1108
+ * - Reflexive verbs (mostrarse → mostrar)
1109
+ * - Verb conjugations (alternando → alternar)
1110
+ */
1111
+ private extractSpanishWord;
1112
+ /**
1113
+ * Extract a number, including Spanish time unit suffixes.
1114
+ */
1115
+ private extractSpanishNumber;
1116
+ }
1117
+ /**
1118
+ * Singleton instance.
1119
+ */
1120
+ declare const spanishTokenizer: SpanishTokenizer;
1121
+
1122
+ /**
1123
+ * Spanish Language Profile
1124
+ *
1125
+ * SVO word order, prepositions, space-separated.
1126
+ * Features rich verb conjugation with pro-drop (subject omission).
1127
+ */
1128
+
1129
+ declare const spanishProfile: LanguageProfile;
1130
+
1131
+ /**
1132
+ * French Language Profile
1133
+ *
1134
+ * SVO word order, prepositions, space-separated.
1135
+ * Features rich verb conjugation and gendered articles.
1136
+ */
1137
+
1138
+ declare const frenchProfile: LanguageProfile;
1139
+
1140
+ /**
1141
+ * Indonesian Language Profile
1142
+ *
1143
+ * SVO word order, prepositions, space-separated, agglutinative.
1144
+ * Features affixation for verb derivation (me-, ber-, di-, -kan, -i).
1145
+ */
1146
+
1147
+ declare const indonesianProfile: LanguageProfile;
1148
+
1149
+ /**
1150
+ * Japanese Tokenizer
1151
+ *
1152
+ * Tokenizes Japanese hyperscript input.
1153
+ * Japanese is challenging because:
1154
+ * - No spaces between words
1155
+ * - Particles (助詞) mark grammatical roles
1156
+ * - Mixed scripts (hiragana, katakana, kanji, romaji)
1157
+ * - CSS selectors are embedded ASCII
1158
+ */
1159
+
1160
+ declare class JapaneseTokenizer extends BaseTokenizer {
1161
+ readonly language = "ja";
1162
+ readonly direction: "ltr";
1163
+ constructor();
1164
+ tokenize(input: string): TokenStream;
1165
+ classifyToken(token: string): TokenKind;
1166
+ /**
1167
+ * Extract a Japanese word (sequence of kanji/kana).
1168
+ * Stops at particles, ASCII, or whitespace.
1169
+ *
1170
+ * Uses morphological normalization to handle verb conjugations:
1171
+ * 1. First checks if the exact word is in the keyword map
1172
+ * 2. If not found, tries to strip conjugation suffixes and check again
1173
+ */
1174
+ private extractJapaneseWord;
1175
+ /**
1176
+ * Extract an ASCII word (for mixed Japanese/English content).
1177
+ */
1178
+ private extractAsciiWord;
1179
+ /**
1180
+ * Extract a number, including Japanese time unit suffixes.
1181
+ * Japanese time units attach directly without whitespace.
1182
+ */
1183
+ private extractJapaneseNumber;
1184
+ }
1185
+ /**
1186
+ * Singleton instance.
1187
+ */
1188
+ declare const japaneseTokenizer: JapaneseTokenizer;
1189
+
1190
+ /**
1191
+ * Japanese Language Profile
1192
+ *
1193
+ * SOV word order, particles (を, に, で, etc.), no spaces between words.
1194
+ * Agglutinative language with rich verb conjugation.
1195
+ */
1196
+
1197
+ declare const japaneseProfile: LanguageProfile;
1198
+
1199
+ /**
1200
+ * Korean Tokenizer
1201
+ *
1202
+ * Tokenizes Korean hyperscript input.
1203
+ * Korean is an agglutinative language with:
1204
+ * - Hangul syllable blocks (가-힣)
1205
+ * - Particles (조사) mark grammatical roles
1206
+ * - 하다 verbs (noun + 하다)
1207
+ * - CSS selectors are embedded ASCII
1208
+ */
1209
+
1210
+ declare class KoreanTokenizer extends BaseTokenizer {
1211
+ readonly language = "ko";
1212
+ readonly direction: "ltr";
1213
+ constructor();
1214
+ tokenize(input: string): TokenStream;
1215
+ classifyToken(token: string): TokenKind;
1216
+ /**
1217
+ * Extract a Korean word (sequence of Hangul).
1218
+ * Prioritizes known keywords, then uses particle-based word boundaries.
1219
+ *
1220
+ * Uses morphological normalization to handle verb conjugations.
1221
+ */
1222
+ private extractKoreanWord;
1223
+ /**
1224
+ * Extract an ASCII word (for mixed Korean/English content).
1225
+ */
1226
+ private extractAsciiWord;
1227
+ /**
1228
+ * Extract a number, including Korean time unit suffixes.
1229
+ * Korean time units attach directly without whitespace.
1230
+ */
1231
+ private extractKoreanNumber;
1232
+ }
1233
+ /**
1234
+ * Singleton instance.
1235
+ */
1236
+ declare const koreanTokenizer: KoreanTokenizer;
1237
+
1238
+ /**
1239
+ * Korean Language Profile
1240
+ *
1241
+ * SOV word order, particles (을/를, 에, 에서, etc.), space-separated between words.
1242
+ * Agglutinative language with particles attaching to words.
1243
+ */
1244
+
1245
+ declare const koreanProfile: LanguageProfile;
1246
+
1247
+ /**
1248
+ * Portuguese Language Profile
1249
+ *
1250
+ * SVO word order, prepositions, space-separated.
1251
+ * Features rich verb conjugation with pro-drop (subject omission).
1252
+ */
1253
+
1254
+ declare const portugueseProfile: LanguageProfile;
1255
+
1256
+ /**
1257
+ * Quechua Language Profile
1258
+ *
1259
+ * SOV word order, postpositions (suffixes), polysynthetic/agglutinative.
1260
+ * Indigenous language of the Andean region with rich morphology.
1261
+ */
1262
+
1263
+ declare const quechuaProfile: LanguageProfile;
1264
+
1265
+ /**
1266
+ * Swahili Language Profile
1267
+ *
1268
+ * SVO word order, prepositions, space-separated, agglutinative.
1269
+ * Features noun class system (18 classes) and verb agreement prefixes.
1270
+ */
1271
+
1272
+ declare const swahiliProfile: LanguageProfile;
1273
+
1274
+ /**
1275
+ * Turkish Tokenizer
1276
+ *
1277
+ * Tokenizes Turkish hyperscript input.
1278
+ * Turkish is challenging because:
1279
+ * - Highly agglutinative (many suffixes attach to words)
1280
+ * - Strict vowel harmony rules
1281
+ * - Postpositions instead of prepositions
1282
+ * - No grammatical gender
1283
+ * - Word order is typically SOV
1284
+ */
1285
+
1286
+ declare class TurkishTokenizer extends BaseTokenizer {
1287
+ readonly language = "tr";
1288
+ readonly direction: "ltr";
1289
+ constructor();
1290
+ tokenize(input: string): TokenStream;
1291
+ classifyToken(token: string): TokenKind;
1292
+ /**
1293
+ * Extract a Turkish word.
1294
+ * Uses morphological normalization to handle verb conjugations.
1295
+ */
1296
+ private extractTurkishWord;
1297
+ /**
1298
+ * Extract a number, including Turkish time unit suffixes.
1299
+ */
1300
+ private extractTurkishNumber;
1301
+ }
1302
+ /**
1303
+ * Singleton instance.
1304
+ */
1305
+ declare const turkishTokenizer: TurkishTokenizer;
1306
+
1307
+ /**
1308
+ * Turkish Language Profile
1309
+ *
1310
+ * SOV word order, case suffixes (agglutinative), space-separated.
1311
+ * Features vowel harmony and extensive suffixation.
1312
+ */
1313
+
1314
+ declare const turkishProfile: LanguageProfile;
1315
+
1316
+ /**
1317
+ * Chinese Tokenizer
1318
+ *
1319
+ * Tokenizes Chinese hyperscript input.
1320
+ * Chinese is challenging because:
1321
+ * - No spaces between words (like Japanese)
1322
+ * - Uses CJK characters (shared with Japanese Kanji)
1323
+ * - SVO word order (like English)
1324
+ * - Uses prepositions (把, 在, 从, etc.) for grammatical roles
1325
+ * - No conjugation (unlike Japanese/Korean)
1326
+ * - CSS selectors are embedded ASCII
1327
+ */
1328
+
1329
+ declare class ChineseTokenizer extends BaseTokenizer {
1330
+ readonly language = "zh";
1331
+ readonly direction: "ltr";
1332
+ constructor();
1333
+ tokenize(input: string): TokenStream;
1334
+ classifyToken(token: string): TokenKind;
1335
+ /**
1336
+ * Extract a Chinese word.
1337
+ * Uses greedy matching to find the longest known keyword.
1338
+ * Chinese doesn't have inflection, so we don't need morphological normalization.
1339
+ * profileKeywords is already sorted longest-first, enabling greedy matching.
1340
+ */
1341
+ private extractChineseWord;
1342
+ /**
1343
+ * Extract an ASCII word (for mixed Chinese/English content).
1344
+ */
1345
+ private extractAsciiWord;
1346
+ /**
1347
+ * Try to extract a string literal, including Chinese quotes.
1348
+ * Chinese quotes: \u201C " (open) \u201D " (close) \u2018 ' (open) \u2019 ' (close)
1349
+ */
1350
+ private tryChineseString;
1351
+ /**
1352
+ * Extract a number, including Chinese time unit suffixes.
1353
+ * Chinese time units attach directly without whitespace.
1354
+ */
1355
+ private extractChineseNumber;
1356
+ }
1357
+ /**
1358
+ * Singleton instance.
1359
+ */
1360
+ declare const chineseTokenizer: ChineseTokenizer;
1361
+
1362
+ /**
1363
+ * Chinese (Simplified) Language Profile
1364
+ *
1365
+ * SVO word order, no markers (relies on word order), no spaces between words.
1366
+ * Isolating language with topic-comment structure and optional BA construction.
1367
+ */
1368
+
1369
+ declare const chineseProfile: LanguageProfile;
1370
+
1371
+ /**
1372
+ * Pattern Registry
1373
+ *
1374
+ * Pattern cache and lookup functions for the semantic parser.
1375
+ */
1376
+
1377
+ /**
1378
+ * Get all patterns.
1379
+ * @deprecated Use getPatternsForLanguage() for tree-shaking.
1380
+ */
1381
+ declare function getAllPatterns(): LanguagePattern[];
1382
+ /**
1383
+ * Get all patterns for a specific language.
1384
+ * Uses caching for performance.
1385
+ */
1386
+ declare function getPatternsForLanguage(language: string): LanguagePattern[];
1387
+ /**
1388
+ * Get patterns for a specific language and command.
1389
+ */
1390
+ declare function getPatternsForLanguageAndCommand(language: string, command: ActionType): LanguagePattern[];
1391
+ /**
1392
+ * Get all supported languages.
1393
+ */
1394
+ declare function getSupportedLanguages$3(): string[];
1395
+ /**
1396
+ * Get all supported commands.
1397
+ */
1398
+ declare function getSupportedCommands(): ActionType[];
1399
+ /**
1400
+ * Find a pattern by ID.
1401
+ */
1402
+ declare function getPatternById(id: string): LanguagePattern | undefined;
1403
+ interface PatternStats {
1404
+ totalPatterns: number;
1405
+ byLanguage: Record<string, number>;
1406
+ byCommand: Record<string, number>;
1407
+ }
1408
+ /**
1409
+ * Get statistics about registered patterns.
1410
+ */
1411
+ declare function getPatternStats(): PatternStats;
1412
+
1413
+ /**
1414
+ * Toggle Command Patterns
1415
+ *
1416
+ * Hand-crafted patterns for "toggle" command across languages.
1417
+ *
1418
+ * @generated This file is auto-generated. Do not edit manually.
1419
+ */
1420
+
1421
+ /**
1422
+ * Get toggle patterns for a specific language.
1423
+ */
1424
+ declare function getTogglePatternsForLanguage(language: string): LanguagePattern[];
1425
+
1426
+ /**
1427
+ * Put Command Patterns
1428
+ *
1429
+ * Hand-crafted patterns for "put" command across languages.
1430
+ *
1431
+ * @generated This file is auto-generated. Do not edit manually.
1432
+ */
1433
+
1434
+ /**
1435
+ * Get put patterns for a specific language.
1436
+ */
1437
+ declare function getPutPatternsForLanguage(language: string): LanguagePattern[];
1438
+
1439
+ /**
1440
+ * EventHandler Command Patterns
1441
+ *
1442
+ * Hand-crafted patterns for "event-handler" command across languages.
1443
+ *
1444
+ * @generated This file is auto-generated. Do not edit manually.
1445
+ */
1446
+
1447
+ /**
1448
+ * Get event-handler patterns for a specific language.
1449
+ */
1450
+ declare function getEventHandlerPatternsForLanguage(language: string): LanguagePattern[];
1451
+
1452
+ /**
1453
+ * Shared Event Handler Utilities
1454
+ *
1455
+ * Event name translations and normalization used across all languages.
1456
+ */
1457
+ /**
1458
+ * Common event names translated across languages.
1459
+ * Used by tokenizers to normalize event names to English.
1460
+ */
1461
+ declare const eventNameTranslations: Record<string, Record<string, string>>;
1462
+ /**
1463
+ * Normalize an event name to English.
1464
+ */
1465
+ declare function normalizeEventName(event: string, language: string): string;
1466
+
1467
+ /**
1468
+ * Tokenizer Registry
1469
+ *
1470
+ * Provides a unified interface for tokenization.
1471
+ * Delegates to the central registry for language lookups.
1472
+ *
1473
+ * For tree-shaking, import specific tokenizers directly:
1474
+ * import { englishTokenizer } from './tokenizers/english';
1475
+ *
1476
+ * To register languages, import the language modules:
1477
+ * import '@lokascript/semantic/languages/en';
1478
+ */
1479
+
1480
+ /**
1481
+ * Get a tokenizer for the specified language.
1482
+ * Returns undefined if language is not registered.
1483
+ */
1484
+ declare function getTokenizer(language: string): LanguageTokenizer | undefined;
1485
+ /**
1486
+ * Tokenize input in the specified language.
1487
+ * @throws Error if language is not registered
1488
+ */
1489
+ declare function tokenize(input: string, language: string): TokenStream;
1490
+ /**
1491
+ * Get all supported languages.
1492
+ * Returns only languages that have been registered.
1493
+ */
1494
+ declare function getSupportedLanguages$2(): string[];
1495
+ /**
1496
+ * Check if a language is supported.
1497
+ */
1498
+ declare function isLanguageSupported$1(language: string): boolean;
1499
+ /**
1500
+ * Register a custom tokenizer.
1501
+ * Note: For full language support, use registerLanguage() from registry instead.
1502
+ */
1503
+ declare function registerTokenizer(tokenizer: LanguageTokenizer): void;
1504
+
1505
+ declare class PatternMatcher$1 {
1506
+ /** Current language profile for the pattern being matched */
1507
+ private currentProfile;
1508
+ /**
1509
+ * Try to match a single pattern against the token stream.
1510
+ * Returns the match result or null if no match.
1511
+ */
1512
+ matchPattern(tokens: TokenStream, pattern: LanguagePattern): PatternMatchResult | null;
1513
+ /**
1514
+ * Try to match multiple patterns, return the best match.
1515
+ */
1516
+ matchBest(tokens: TokenStream, patterns: LanguagePattern[]): PatternMatchResult | null;
1517
+ /**
1518
+ * Match a sequence of pattern tokens against the token stream.
1519
+ */
1520
+ private matchTokenSequence;
1521
+ /**
1522
+ * Match a single pattern token against the current position in the stream.
1523
+ */
1524
+ private matchPatternToken;
1525
+ /**
1526
+ * Match a literal pattern token (keyword or particle).
1527
+ */
1528
+ private matchLiteralToken;
1529
+ /**
1530
+ * Match a role pattern token (captures a semantic value).
1531
+ * Handles multi-token expressions like:
1532
+ * - 'my value' (possessive keyword + property)
1533
+ * - '#dialog.showModal()' (method call)
1534
+ * - "#element's *opacity" (possessive selector + property)
1535
+ */
1536
+ private matchRoleToken;
1537
+ /**
1538
+ * Try to match a possessive expression like 'my value' or 'its innerHTML'.
1539
+ * Returns the PropertyPathValue if matched, or null if not.
1540
+ */
1541
+ private tryMatchPossessiveExpression;
1542
+ /**
1543
+ * Check if a keyword is a structural keyword (preposition, control flow, etc.)
1544
+ * that shouldn't be consumed as a property name.
1545
+ */
1546
+ private isStructuralKeyword;
1547
+ /**
1548
+ * Try to match a method call expression like '#dialog.showModal()'.
1549
+ * Pattern: selector + '.' + identifier + '(' + [args] + ')'
1550
+ * Returns an expression value if matched, or null if not.
1551
+ */
1552
+ private tryMatchMethodCallExpression;
1553
+ /**
1554
+ * Try to match a property access expression like 'userData.name' or 'it.data'.
1555
+ * Pattern: (identifier | keyword) + '.' + identifier [+ '.' + identifier ...]
1556
+ * Returns an expression value if matched, or null if not.
1557
+ */
1558
+ private tryMatchPropertyAccessExpression;
1559
+ /**
1560
+ * Try to match a possessive selector expression like "#element's *opacity".
1561
+ * Pattern: selector + "'s" + (selector | identifier)
1562
+ * Returns a property-path value if matched, or null if not.
1563
+ */
1564
+ private tryMatchPossessiveSelectorExpression;
1565
+ /**
1566
+ * Try to match a selector + property expression like "#output.innerText".
1567
+ * This handles cases where the tokenizer produces two selector tokens:
1568
+ * - #output (id selector)
1569
+ * - .innerText (looks like class selector, but is actually property)
1570
+ *
1571
+ * Pattern: id-selector + class-selector-that-is-actually-property
1572
+ * Returns a property-path value if matched, or null if not.
1573
+ */
1574
+ private tryMatchSelectorPropertyExpression;
1575
+ /**
1576
+ * Match a group pattern token (optional sequence).
1577
+ */
1578
+ private matchGroupToken;
1579
+ /**
1580
+ * Get the type of match for a token against a value.
1581
+ * Used for confidence calculation.
1582
+ */
1583
+ private getMatchType;
1584
+ /**
1585
+ * Track stem matches for confidence calculation.
1586
+ * This is set during matching and read during confidence calculation.
1587
+ */
1588
+ private stemMatchCount;
1589
+ private totalKeywordMatches;
1590
+ /** Maximum depth for nested property access (e.g., a.b.c.d...) */
1591
+ private static readonly MAX_PROPERTY_DEPTH;
1592
+ /** Maximum number of arguments in method calls */
1593
+ private static readonly MAX_METHOD_ARGS;
1594
+ /**
1595
+ * Convert a language token to a semantic value.
1596
+ */
1597
+ private tokenToSemanticValue;
1598
+ /**
1599
+ * Parse a literal value (string, number, boolean).
1600
+ */
1601
+ private parseLiteralValue;
1602
+ /**
1603
+ * Apply extraction rules to fill in default values for missing roles.
1604
+ */
1605
+ private applyExtractionRules;
1606
+ /**
1607
+ * Check if a pattern token is optional.
1608
+ */
1609
+ private isOptional;
1610
+ /**
1611
+ * Calculate confidence score for a match (0-1).
1612
+ *
1613
+ * Confidence is reduced for:
1614
+ * - Stem matches (morphological normalization has inherent uncertainty)
1615
+ * - Missing optional roles (but less penalty if role has a default value)
1616
+ *
1617
+ * Confidence is increased for:
1618
+ * - VSO languages (Arabic) when pattern starts with a verb
1619
+ */
1620
+ private calculateConfidence;
1621
+ /**
1622
+ * Calculate confidence boost for VSO (Verb-Subject-Object) language patterns.
1623
+ * Arabic naturally uses VSO word order, so patterns that start with a verb
1624
+ * should receive a confidence boost.
1625
+ *
1626
+ * Returns +0.15 confidence boost if:
1627
+ * - Language is Arabic ('ar')
1628
+ * - Pattern's first token is a verb keyword
1629
+ *
1630
+ * @param pattern The language pattern being matched
1631
+ * @returns Confidence boost (0 or 0.15)
1632
+ */
1633
+ private calculateVSOConfidenceBoost;
1634
+ /**
1635
+ * Arabic preposition disambiguation for confidence adjustment.
1636
+ *
1637
+ * Different Arabic prepositions are more or less natural for different semantic roles:
1638
+ * - على (on/upon) is preferred for patient/target roles (element selectors)
1639
+ * - إلى (to) is preferred for destination roles
1640
+ * - من (from) is preferred for source roles
1641
+ * - في (in) is preferred for location roles
1642
+ *
1643
+ * This method analyzes the prepositions used with captured semantic roles and
1644
+ * adjusts confidence based on idiomaticity:
1645
+ * - +0.10 for highly idiomatic preposition choices
1646
+ * - -0.10 for less natural preposition choices
1647
+ *
1648
+ * @param pattern The language pattern being matched
1649
+ * @param captured The captured semantic values
1650
+ * @returns Confidence adjustment (-0.10 to +0.10)
1651
+ */
1652
+ private arabicPrepositionDisambiguation;
1653
+ /**
1654
+ * Noise words that can be skipped in English for more natural syntax.
1655
+ * - "the" before selectors: "toggle the .active" → "toggle .active"
1656
+ * - "class" after class selectors: "add the .visible class" → "add .visible"
1657
+ */
1658
+ private static readonly ENGLISH_NOISE_WORDS;
1659
+ /**
1660
+ * Skip noise words like "the" before selectors.
1661
+ * This enables more natural English syntax like "toggle the .active".
1662
+ */
1663
+ private skipNoiseWords;
1664
+ /**
1665
+ * Extract event modifiers from the token stream.
1666
+ * Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
1667
+ * that can appear after event names.
1668
+ *
1669
+ * Returns EventModifiers object or undefined if no modifiers found.
1670
+ */
1671
+ extractEventModifiers(tokens: TokenStream): EventModifiers | undefined;
1672
+ }
1673
+ /**
1674
+ * Singleton pattern matcher instance.
1675
+ */
1676
+ declare const patternMatcher: PatternMatcher$1;
1677
+ /**
1678
+ * Match tokens against a pattern.
1679
+ */
1680
+ declare function matchPattern(tokens: TokenStream, pattern: LanguagePattern): PatternMatchResult | null;
1681
+ /**
1682
+ * Match tokens against multiple patterns, return best match.
1683
+ */
1684
+ declare function matchBest(tokens: TokenStream, patterns: LanguagePattern[]): PatternMatchResult | null;
1685
+
1686
+ declare class SemanticParserImpl implements SemanticParser {
1687
+ /**
1688
+ * Parse input in the specified language to a semantic node.
1689
+ */
1690
+ parse(input: string, language: string): SemanticNode;
1691
+ /**
1692
+ * Check if input can be parsed in the specified language.
1693
+ */
1694
+ canParse(input: string, language: string): boolean;
1695
+ /**
1696
+ * Get all supported languages.
1697
+ */
1698
+ supportedLanguages(): string[];
1699
+ /**
1700
+ * Build a command semantic node from a pattern match.
1701
+ */
1702
+ private buildCommand;
1703
+ /**
1704
+ * Build an event handler semantic node from a pattern match.
1705
+ */
1706
+ private buildEventHandler;
1707
+ /**
1708
+ * Parse body with proper clause separation.
1709
+ * Splits the token stream at conjunction boundaries (then/それから/ثم/etc.)
1710
+ * and parses each clause independently.
1711
+ *
1712
+ * This handles multi-clause patterns like:
1713
+ * - "toggle .active then remove .hidden"
1714
+ * - ".active を 切り替え それから .hidden を 削除"
1715
+ * - "بدل .active ثم احذف .hidden"
1716
+ *
1717
+ * @param tokens Token stream to parse
1718
+ * @param commandPatterns Command patterns for the language
1719
+ * @param language Language code
1720
+ * @returns Array of semantic nodes (one per clause)
1721
+ */
1722
+ private parseBodyWithClauses;
1723
+ /**
1724
+ * Parse a single clause (sequence of tokens between conjunctions).
1725
+ * Returns array of semantic nodes parsed from the clause.
1726
+ */
1727
+ private parseClause;
1728
+ /**
1729
+ * Parse body commands with support for grammar-transformed patterns.
1730
+ * Used after a grammar-transformed pattern with continuation marker.
1731
+ */
1732
+ private parseBodyWithGrammarPatterns;
1733
+ /**
1734
+ * Check if a token is a 'then' keyword in the given language.
1735
+ */
1736
+ private isThenKeyword;
1737
+ /**
1738
+ * Check if a token is an 'end' keyword in the given language.
1739
+ */
1740
+ private isEndKeyword;
1741
+ }
1742
+ /**
1743
+ * Singleton parser instance.
1744
+ */
1745
+ declare const semanticParser: SemanticParserImpl;
1746
+ /**
1747
+ * Parse input in the specified language.
1748
+ */
1749
+ declare function parse(input: string, language: string): SemanticNode;
1750
+ /**
1751
+ * Check if input can be parsed.
1752
+ */
1753
+ declare function canParse(input: string, language: string): boolean;
1754
+ /**
1755
+ * Parse and return command type if parseable.
1756
+ */
1757
+ declare function getCommandType(input: string, language: string): ActionType | null;
1758
+
1759
+ /**
1760
+ * Explicit Mode Parser
1761
+ *
1762
+ * Parses the explicit [command role:value ...] syntax.
1763
+ * This syntax is universal across all languages and makes
1764
+ * semantic roles visible for learning and debugging.
1765
+ *
1766
+ * Syntax:
1767
+ * [command role1:value1 role2:value2 ...]
1768
+ *
1769
+ * Examples:
1770
+ * [toggle class:.active target:#button]
1771
+ * [put content:"hello" destination:#output]
1772
+ * [on event:click body:[toggle class:.active]]
1773
+ */
1774
+
1775
+ /**
1776
+ * Parse explicit syntax into a semantic node.
1777
+ */
1778
+ declare function parseExplicit(input: string): SemanticNode;
1779
+ /**
1780
+ * Check if input is explicit syntax.
1781
+ */
1782
+ declare function isExplicitSyntax(input: string): boolean;
1783
+
1784
+ /**
1785
+ * Explicit Mode Renderer
1786
+ *
1787
+ * Renders semantic nodes to explicit [command role:value] syntax.
1788
+ * Also renders to natural language syntax for any supported language.
1789
+ */
1790
+
1791
+ declare class SemanticRendererImpl implements SemanticRenderer {
1792
+ /**
1793
+ * Render a semantic node in the specified language.
1794
+ */
1795
+ render(node: SemanticNode, language: string): string;
1796
+ /**
1797
+ * Render a compound node (multiple statements chained with then/and).
1798
+ */
1799
+ private renderCompound;
1800
+ /**
1801
+ * Get the translated chain word for the given language.
1802
+ */
1803
+ private getChainWord;
1804
+ /**
1805
+ * Render a semantic node in explicit mode.
1806
+ */
1807
+ renderExplicit(node: SemanticNode): string;
1808
+ /**
1809
+ * Get all supported languages.
1810
+ */
1811
+ supportedLanguages(): string[];
1812
+ /**
1813
+ * Find the best pattern for rendering a semantic node.
1814
+ *
1815
+ * For rendering, we prefer "standard" patterns (e.g., "on click") over
1816
+ * native idiom patterns (e.g., "when clicked") because standard patterns
1817
+ * are more recognizable and closer to the original hyperscript syntax.
1818
+ */
1819
+ private findBestPattern;
1820
+ /**
1821
+ * Render a semantic node using a specific pattern.
1822
+ */
1823
+ private renderWithPattern;
1824
+ /**
1825
+ * Render a single pattern token.
1826
+ */
1827
+ private renderPatternToken;
1828
+ /**
1829
+ * Convert a semantic value to a string for explicit syntax.
1830
+ */
1831
+ private valueToString;
1832
+ /**
1833
+ * Convert a semantic value to natural language string.
1834
+ * Uses language-specific possessive rendering when language is provided.
1835
+ */
1836
+ private valueToNaturalString;
1837
+ /**
1838
+ * Render a reference value in the target language.
1839
+ */
1840
+ private renderReference;
1841
+ /**
1842
+ * Render a property-path value (possessive expression) in the target language.
1843
+ *
1844
+ * Examples by language:
1845
+ * - English: "my value", "its opacity", "#el's value"
1846
+ * - Japanese: "自分の value", "それの opacity"
1847
+ * - Korean: "내 value", "그것의 opacity"
1848
+ * - Spanish: "mi value", "su opacity"
1849
+ * - Chinese: "我的 value", "它的 opacity"
1850
+ */
1851
+ private renderPropertyPath;
1852
+ }
1853
+ /**
1854
+ * Singleton renderer instance.
1855
+ */
1856
+ declare const semanticRenderer: SemanticRendererImpl;
1857
+ /**
1858
+ * Render a semantic node in the specified language.
1859
+ */
1860
+ declare function render(node: SemanticNode, language: string): string;
1861
+ /**
1862
+ * Render a semantic node in explicit mode.
1863
+ */
1864
+ declare function renderExplicit(node: SemanticNode): string;
1865
+
1866
+ /**
1867
+ * Bidirectional Converter
1868
+ *
1869
+ * Converts between natural language syntax and explicit syntax,
1870
+ * and between different natural languages.
1871
+ */
1872
+
1873
+ /**
1874
+ * Convert natural language hyperscript to explicit syntax.
1875
+ *
1876
+ * @param input Natural language hyperscript
1877
+ * @param sourceLanguage Source language code
1878
+ * @returns Explicit syntax string
1879
+ *
1880
+ * @example
1881
+ * toExplicit('toggle .active on #button', 'en')
1882
+ * // → '[toggle patient:.active destination:#button]'
1883
+ *
1884
+ * toExplicit('#button の .active を 切り替え', 'ja')
1885
+ * // → '[toggle patient:.active destination:#button]'
1886
+ */
1887
+ declare function toExplicit(input: string, sourceLanguage: string): string;
1888
+ /**
1889
+ * Convert explicit syntax to natural language.
1890
+ *
1891
+ * @param explicit Explicit syntax string
1892
+ * @param targetLanguage Target language code
1893
+ * @returns Natural language hyperscript
1894
+ *
1895
+ * @example
1896
+ * fromExplicit('[toggle patient:.active destination:#button]', 'en')
1897
+ * // → 'toggle .active on #button'
1898
+ *
1899
+ * fromExplicit('[toggle patient:.active destination:#button]', 'ja')
1900
+ * // → '#button の .active を 切り替え'
1901
+ */
1902
+ declare function fromExplicit(explicit: string, targetLanguage: string): string;
1903
+ /**
1904
+ * Translate hyperscript from one language to another.
1905
+ *
1906
+ * @param input Natural language hyperscript
1907
+ * @param sourceLanguage Source language code
1908
+ * @param targetLanguage Target language code
1909
+ * @returns Translated hyperscript
1910
+ *
1911
+ * @example
1912
+ * translate('toggle .active on #button', 'en', 'ja')
1913
+ * // → '#button の .active を 切り替え'
1914
+ *
1915
+ * translate('#button の .active を 切り替え', 'ja', 'ar')
1916
+ * // → 'بدّل .active على #button'
1917
+ */
1918
+ declare function translate(input: string, sourceLanguage: string, targetLanguage: string): string;
1919
+ /**
1920
+ * Parse input (either explicit or natural language) to semantic node.
1921
+ *
1922
+ * @param input Hyperscript input (explicit or natural)
1923
+ * @param language Language code (required for natural, ignored for explicit)
1924
+ * @returns Semantic node
1925
+ */
1926
+ declare function parseAny(input: string, language: string): SemanticNode;
1927
+ /**
1928
+ * Round-trip validation: parse and re-render to verify consistency.
1929
+ *
1930
+ * When called with 2 arguments, returns an object with validation info.
1931
+ * When called with 3 arguments, returns the rendered string directly.
1932
+ *
1933
+ * @param input Original input
1934
+ * @param sourceLanguage Source language code
1935
+ * @param targetLanguage Target language code (optional, if provided returns string only)
1936
+ * @returns Object with original, semantic, re-rendered, and match status (or just string if targetLanguage provided)
1937
+ */
1938
+ declare function roundTrip(input: string, sourceLanguage: string, targetLanguage?: string): string | {
1939
+ original: string;
1940
+ semantic: SemanticNode;
1941
+ rendered: string;
1942
+ matches: boolean;
1943
+ };
1944
+ /**
1945
+ * Get all translations of a hyperscript statement.
1946
+ *
1947
+ * @param input Hyperscript input
1948
+ * @param sourceLanguage Source language (or 'explicit')
1949
+ * @param targetLanguages List of target language codes (defaults to all 13 supported languages)
1950
+ * @returns Object mapping language codes to translations
1951
+ */
1952
+ declare function getAllTranslations(input: string, sourceLanguage: string, targetLanguages?: string[]): Record<string, string>;
1953
+ /**
1954
+ * Validate that a translation is semantically equivalent.
1955
+ *
1956
+ * @param original Original hyperscript
1957
+ * @param translated Translated hyperscript
1958
+ * @param originalLang Original language
1959
+ * @param translatedLang Translated language
1960
+ * @returns true if semantically equivalent
1961
+ */
1962
+ declare function validateTranslation(original: string, translated: string, originalLang: string, translatedLang: string): boolean;
1963
+
1964
+ /**
1965
+ * Language Profiles
1966
+ *
1967
+ * Re-exports from individual profile files for backwards compatibility.
1968
+ * For minimal bundles, import specific profiles directly:
1969
+ *
1970
+ * @example
1971
+ * ```typescript
1972
+ * // Tree-shakeable import
1973
+ * import { englishProfile } from './profiles/english';
1974
+ *
1975
+ * // Full import (all profiles bundled)
1976
+ * import { englishProfile, languageProfiles } from './language-profiles';
1977
+ * ```
1978
+ *
1979
+ * @generated This file is auto-generated. Do not edit manually.
1980
+ */
1981
+
1982
+ /**
1983
+ * All available language profiles.
1984
+ * @deprecated Import individual profiles for tree-shaking.
1985
+ */
1986
+ declare const languageProfiles: Record<string, LanguageProfile>;
1987
+ /**
1988
+ * Get a language profile by code.
1989
+ * @deprecated Use the registry's getProfile instead.
1990
+ */
1991
+ declare function getProfile(code: string): LanguageProfile | undefined;
1992
+ /**
1993
+ * Get all supported language codes.
1994
+ * @deprecated Use the registry's getRegisteredLanguages instead.
1995
+ */
1996
+ declare function getSupportedLanguages$1(): string[];
1997
+ /**
1998
+ * Check if a language is supported.
1999
+ * @deprecated Use the registry's isLanguageRegistered instead.
2000
+ */
2001
+ declare function isLanguageSupported(code: string): boolean;
2002
+
2003
+ /**
2004
+ * Language Registry
2005
+ *
2006
+ * Central registration point for language support in the semantic parser.
2007
+ * Languages self-register when their modules are imported, enabling
2008
+ * tree-shaking for minimal bundles.
2009
+ *
2010
+ * @example
2011
+ * ```typescript
2012
+ * // Import only the languages you need
2013
+ * import '@lokascript/semantic/languages/en';
2014
+ * import '@lokascript/semantic/languages/es';
2015
+ *
2016
+ * // Now parse works for registered languages
2017
+ * import { parse } from '@lokascript/semantic';
2018
+ * parse('toggle .active', 'en'); // Works
2019
+ * parse('alternar .activo', 'es'); // Works
2020
+ * parse('切り替え .active', 'ja'); // Error: Language not registered
2021
+ * ```
2022
+ */
2023
+
2024
+ /**
2025
+ * Try to get a profile, returning undefined if not registered.
2026
+ */
2027
+ declare function tryGetProfile(code: string): LanguageProfile | undefined;
2028
+ /**
2029
+ * Get all registered language codes.
2030
+ */
2031
+ declare function getRegisteredLanguages(): string[];
2032
+
2033
+ /**
2034
+ * Semantic Result Cache
2035
+ *
2036
+ * LRU cache for semantic analysis results to optimize repeated parsing.
2037
+ *
2038
+ * Design:
2039
+ * - Cache key: `${language}:${input}` for simple, fast lookups
2040
+ * - LRU eviction when max size reached
2041
+ * - Optional TTL (time-to-live) for cache entries
2042
+ * - Statistics for monitoring cache effectiveness
2043
+ * - Thread-safe for browser environments (single-threaded)
2044
+ */
2045
+
2046
+ /**
2047
+ * Cache configuration options.
2048
+ */
2049
+ interface SemanticCacheConfig {
2050
+ /** Maximum number of entries to cache. Default: 1000 */
2051
+ maxSize?: number;
2052
+ /** Time-to-live in milliseconds. 0 = no expiration. Default: 0 */
2053
+ ttlMs?: number;
2054
+ /** Enable/disable caching. Default: true */
2055
+ enabled?: boolean;
2056
+ }
2057
+ /**
2058
+ * Cache statistics.
2059
+ */
2060
+ interface CacheStats {
2061
+ /** Total cache hits */
2062
+ hits: number;
2063
+ /** Total cache misses */
2064
+ misses: number;
2065
+ /** Current cache size */
2066
+ size: number;
2067
+ /** Maximum cache size */
2068
+ maxSize: number;
2069
+ /** Hit rate (0-1) */
2070
+ hitRate: number;
2071
+ /** Total evictions due to size limit */
2072
+ evictions: number;
2073
+ /** Total expirations due to TTL */
2074
+ expirations: number;
2075
+ /** Whether caching is enabled */
2076
+ enabled: boolean;
2077
+ }
2078
+ /**
2079
+ * LRU Cache for semantic analysis results.
2080
+ *
2081
+ * Uses Map's insertion order for LRU eviction - when we access an entry,
2082
+ * we delete and re-insert it to move it to the end (most recently used).
2083
+ */
2084
+ declare class SemanticCache {
2085
+ private cache;
2086
+ private config;
2087
+ private stats;
2088
+ constructor(config?: SemanticCacheConfig);
2089
+ /**
2090
+ * Generate cache key from input and language.
2091
+ */
2092
+ private makeKey;
2093
+ /**
2094
+ * Check if an entry has expired.
2095
+ */
2096
+ private isExpired;
2097
+ /**
2098
+ * Evict the least recently used entry.
2099
+ */
2100
+ private evictLRU;
2101
+ /**
2102
+ * Get a cached result.
2103
+ *
2104
+ * @param input - The input string
2105
+ * @param language - The language code
2106
+ * @returns The cached result, or undefined if not found/expired
2107
+ */
2108
+ get(input: string, language: string): SemanticAnalysisResult | undefined;
2109
+ /**
2110
+ * Store a result in the cache.
2111
+ *
2112
+ * @param input - The input string
2113
+ * @param language - The language code
2114
+ * @param result - The analysis result to cache
2115
+ */
2116
+ set(input: string, language: string, result: SemanticAnalysisResult): void;
2117
+ /**
2118
+ * Check if a result is cached (without updating LRU).
2119
+ */
2120
+ has(input: string, language: string): boolean;
2121
+ /**
2122
+ * Remove a specific entry from the cache.
2123
+ */
2124
+ delete(input: string, language: string): boolean;
2125
+ /**
2126
+ * Clear all cached entries.
2127
+ */
2128
+ clear(): void;
2129
+ /**
2130
+ * Reset statistics.
2131
+ */
2132
+ resetStats(): void;
2133
+ /**
2134
+ * Get cache statistics.
2135
+ */
2136
+ getStats(): CacheStats;
2137
+ /**
2138
+ * Update cache configuration.
2139
+ */
2140
+ configure(config: Partial<SemanticCacheConfig>): void;
2141
+ /**
2142
+ * Enable caching.
2143
+ */
2144
+ enable(): void;
2145
+ /**
2146
+ * Disable caching.
2147
+ */
2148
+ disable(): void;
2149
+ /**
2150
+ * Get current configuration.
2151
+ */
2152
+ getConfig(): Readonly<Required<SemanticCacheConfig>>;
2153
+ }
2154
+ /**
2155
+ * Default global cache instance.
2156
+ */
2157
+ declare const semanticCache: SemanticCache;
2158
+ /**
2159
+ * Create a cache with custom configuration.
2160
+ */
2161
+ declare function createSemanticCache(config?: SemanticCacheConfig): SemanticCache;
2162
+ /**
2163
+ * Decorator/wrapper for adding caching to an analyze function.
2164
+ *
2165
+ * @param analyzeFn - The analyze function to wrap
2166
+ * @param cache - The cache instance to use
2167
+ * @returns Wrapped function with caching
2168
+ */
2169
+ declare function withCache<T extends (input: string, language: string) => SemanticAnalysisResult>(analyzeFn: T, cache?: SemanticCache): T;
2170
+
2171
+ /**
2172
+ * Core Parser Bridge
2173
+ *
2174
+ * Provides the SemanticAnalyzer interface that integrates semantic parsing
2175
+ * into the core hyperscript parser. This bridge enables confidence-driven
2176
+ * fallback between semantic and traditional parsing.
2177
+ */
2178
+
2179
+ /**
2180
+ * Result of semantic analysis.
2181
+ */
2182
+ interface SemanticAnalysisResult {
2183
+ /** Confidence score (0-1) for this analysis */
2184
+ readonly confidence: number;
2185
+ /** The parsed command info (if successful) */
2186
+ readonly command?: {
2187
+ readonly name: ActionType;
2188
+ readonly roles: ReadonlyMap<SemanticRole, SemanticValue>;
2189
+ };
2190
+ /** The full semantic node (if successful) */
2191
+ readonly node?: SemanticNode;
2192
+ /** Any errors encountered */
2193
+ readonly errors?: string[];
2194
+ /** Number of tokens consumed */
2195
+ readonly tokensConsumed?: number;
2196
+ }
2197
+ /**
2198
+ * Interface for semantic analysis that can be integrated into the core parser.
2199
+ * This allows the core parser to optionally use semantic parsing with
2200
+ * confidence-based fallback to traditional parsing.
2201
+ */
2202
+ interface SemanticAnalyzer {
2203
+ /**
2204
+ * Analyze input in the specified language.
2205
+ *
2206
+ * @param input The input string to analyze
2207
+ * @param language ISO 639-1 language code
2208
+ * @returns Analysis result with confidence score
2209
+ */
2210
+ analyze(input: string, language: string): SemanticAnalysisResult;
2211
+ /**
2212
+ * Check if semantic parsing is available for a language.
2213
+ */
2214
+ supportsLanguage(language: string): boolean;
2215
+ /**
2216
+ * Get the list of supported languages.
2217
+ */
2218
+ supportedLanguages(): string[];
2219
+ /**
2220
+ * Get cache statistics.
2221
+ */
2222
+ getCacheStats(): CacheStats;
2223
+ /**
2224
+ * Clear the result cache.
2225
+ */
2226
+ clearCache(): void;
2227
+ /**
2228
+ * Configure the cache.
2229
+ */
2230
+ configureCache(config: Partial<SemanticCacheConfig>): void;
2231
+ }
2232
+ /**
2233
+ * Options for creating a SemanticAnalyzer.
2234
+ */
2235
+ interface SemanticAnalyzerOptions {
2236
+ /** Cache configuration. Pass false to disable caching. */
2237
+ cache?: SemanticCacheConfig | false;
2238
+ }
2239
+ /**
2240
+ * Implementation of SemanticAnalyzer that wraps the semantic parser.
2241
+ * Includes LRU caching for performance optimization on repeated inputs.
2242
+ */
2243
+ declare class SemanticAnalyzerImpl implements SemanticAnalyzer {
2244
+ private readonly patternMatcher;
2245
+ private readonly languages;
2246
+ private readonly cache;
2247
+ constructor(options?: SemanticAnalyzerOptions);
2248
+ analyze(input: string, language: string): SemanticAnalysisResult;
2249
+ /**
2250
+ * Perform analysis without cache lookup.
2251
+ */
2252
+ private analyzeUncached;
2253
+ supportsLanguage(language: string): boolean;
2254
+ supportedLanguages(): string[];
2255
+ getCacheStats(): CacheStats;
2256
+ clearCache(): void;
2257
+ configureCache(config: Partial<SemanticCacheConfig>): void;
2258
+ private buildSemanticNode;
2259
+ }
2260
+ /**
2261
+ * Create a SemanticAnalyzer instance.
2262
+ *
2263
+ * @param options - Configuration options including cache settings
2264
+ * @returns A new SemanticAnalyzer
2265
+ *
2266
+ * @example
2267
+ * // Default: uses shared global cache
2268
+ * const analyzer = createSemanticAnalyzer();
2269
+ *
2270
+ * @example
2271
+ * // Custom cache size
2272
+ * const analyzer = createSemanticAnalyzer({ cache: { maxSize: 500 } });
2273
+ *
2274
+ * @example
2275
+ * // Disable caching
2276
+ * const analyzer = createSemanticAnalyzer({ cache: false });
2277
+ */
2278
+ declare function createSemanticAnalyzer(options?: SemanticAnalyzerOptions): SemanticAnalyzer;
2279
+
2280
+ /**
2281
+ * Default confidence threshold for preferring semantic parsing.
2282
+ * If confidence is above this, use semantic result; otherwise fallback.
2283
+ */
2284
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.5;
2285
+ /**
2286
+ * High confidence threshold for very certain matches.
2287
+ */
2288
+ declare const HIGH_CONFIDENCE_THRESHOLD = 0.8;
2289
+ /**
2290
+ * Determine if semantic analysis should be used based on confidence.
2291
+ */
2292
+ declare function shouldUseSemanticResult(result: SemanticAnalysisResult, threshold?: number): boolean;
2293
+ /**
2294
+ * Convert semantic roles to the format expected by core parser commands.
2295
+ * This maps semantic roles to the positional/modifier structure used by
2296
+ * the core command implementations.
2297
+ *
2298
+ * Role to preposition mapping:
2299
+ * - patient → first positional arg
2300
+ * - event → first positional arg
2301
+ * - destination → 'into' (put) or 'on' (others)
2302
+ * - source → 'from'
2303
+ * - quantity → 'by'
2304
+ * - duration → 'over' or 'for'
2305
+ * - method → 'as'
2306
+ * - style → 'with'
2307
+ * - condition → 'if'
2308
+ */
2309
+ declare function rolesToCommandArgs(roles: ReadonlyMap<SemanticRole, SemanticValue>, command: ActionType): {
2310
+ args: SemanticValue[];
2311
+ modifiers: Record<string, SemanticValue>;
2312
+ };
2313
+
2314
+ /**
2315
+ * Command Schemas
2316
+ *
2317
+ * Defines the semantic structure of each hyperscript command.
2318
+ * Used by the pattern generator to create language-specific patterns.
2319
+ */
2320
+
2321
+ /**
2322
+ * A role specification in a command schema.
2323
+ */
2324
+ interface RoleSpec {
2325
+ /** The semantic role */
2326
+ readonly role: SemanticRole;
2327
+ /** Description of what this role represents */
2328
+ readonly description: string;
2329
+ /** Whether this role is required */
2330
+ readonly required: boolean;
2331
+ /** Expected value types */
2332
+ readonly expectedTypes: Array<'selector' | 'literal' | 'reference' | 'expression'>;
2333
+ /** Default value if not provided */
2334
+ readonly default?: SemanticValue;
2335
+ /** Position hint for SVO languages (higher = earlier) */
2336
+ readonly svoPosition?: number;
2337
+ /** Position hint for SOV languages (higher = earlier) */
2338
+ readonly sovPosition?: number;
2339
+ /**
2340
+ * Override the default role marker for this command.
2341
+ * Maps language code to the marker to use (e.g., { en: 'to', es: 'a' }).
2342
+ * If not specified, uses the language profile's default roleMarker.
2343
+ */
2344
+ readonly markerOverride?: Record<string, string>;
2345
+ }
2346
+ /**
2347
+ * A precondition that must be met before command execution.
2348
+ * Used for runtime error documentation.
2349
+ */
2350
+ interface CommandPrecondition {
2351
+ /** Human-readable condition description */
2352
+ readonly condition: string;
2353
+ /** Error code thrown when precondition fails */
2354
+ readonly errorCode: string;
2355
+ /** Error message template */
2356
+ readonly message: string;
2357
+ }
2358
+ /**
2359
+ * A command schema defines the semantic structure of a command.
2360
+ */
2361
+ interface CommandSchema {
2362
+ /** The action type (command name) */
2363
+ readonly action: ActionType;
2364
+ /** Human-readable description */
2365
+ readonly description: string;
2366
+ /** Roles this command accepts */
2367
+ readonly roles: RoleSpec[];
2368
+ /** The primary role (what the command acts on) */
2369
+ readonly primaryRole: SemanticRole;
2370
+ /** Category for grouping */
2371
+ readonly category: CommandCategory;
2372
+ /** Whether this command typically has a body (like event handlers) */
2373
+ readonly hasBody?: boolean;
2374
+ /** Notes about special handling */
2375
+ readonly notes?: string;
2376
+ /** Possible runtime error codes this command can throw */
2377
+ readonly errorCodes?: readonly string[];
2378
+ /** Preconditions that must be met before execution */
2379
+ readonly preconditions?: readonly CommandPrecondition[];
2380
+ /** Recovery hints mapping error code to suggestion */
2381
+ readonly recoveryHints?: Readonly<Record<string, string>>;
2382
+ }
2383
+ /**
2384
+ * Command categories for organization.
2385
+ */
2386
+ type CommandCategory = 'dom-class' | 'dom-content' | 'dom-visibility' | 'variable' | 'event' | 'async' | 'navigation' | 'control-flow';
2387
+ /**
2388
+ * Toggle command: adds class/attribute if absent, removes if present.
2389
+ *
2390
+ * Patterns:
2391
+ * - EN: toggle .active on #button
2392
+ * - JA: #button の .active を 切り替え
2393
+ * - AR: بدّل .active على #button
2394
+ */
2395
+ declare const toggleSchema: CommandSchema;
2396
+ /**
2397
+ * Add command: adds a class or attribute.
2398
+ */
2399
+ declare const addSchema: CommandSchema;
2400
+ /**
2401
+ * Remove command: removes a class or attribute.
2402
+ */
2403
+ declare const removeSchema: CommandSchema;
2404
+ /**
2405
+ * Put command: puts content into a target.
2406
+ *
2407
+ * Patterns:
2408
+ * - EN: put "hello" into #output
2409
+ * - JA: #output に "hello" を 置く
2410
+ * - AR: ضع "hello" في #output
2411
+ */
2412
+ declare const putSchema: CommandSchema;
2413
+ /**
2414
+ * Set command: sets a property or variable.
2415
+ *
2416
+ * Patterns:
2417
+ * - EN: set :count to 10
2418
+ * - ES: establecer :count a 10
2419
+ * - JA: :count を 10 に 設定
2420
+ * - KO: :x 에 5 을 설정 (uses default markers)
2421
+ * - TR: :x e 5 i ayarla (uses default markers)
2422
+ *
2423
+ * Note: Only override markers for SVO languages where patient has no default marker.
2424
+ * SOV languages (Korean, Japanese, Turkish) already have correct object markers.
2425
+ */
2426
+ declare const setSchema: CommandSchema;
2427
+ /**
2428
+ * Show command: makes an element visible.
2429
+ */
2430
+ declare const showSchema: CommandSchema;
2431
+ /**
2432
+ * Hide command: makes an element invisible.
2433
+ */
2434
+ declare const hideSchema: CommandSchema;
2435
+ /**
2436
+ * On command: event handler.
2437
+ */
2438
+ declare const onSchema: CommandSchema;
2439
+ /**
2440
+ * Trigger command: dispatches an event.
2441
+ * Supports namespaced events like "draggable:start".
2442
+ */
2443
+ declare const triggerSchema: CommandSchema;
2444
+ /**
2445
+ * Wait command: pauses execution.
2446
+ */
2447
+ declare const waitSchema: CommandSchema;
2448
+ /**
2449
+ * Fetch command: makes HTTP request.
2450
+ */
2451
+ declare const fetchSchema: CommandSchema;
2452
+ /**
2453
+ * Increment command: increases a numeric value.
2454
+ */
2455
+ declare const incrementSchema: CommandSchema;
2456
+ /**
2457
+ * Decrement command: decreases a numeric value.
2458
+ */
2459
+ declare const decrementSchema: CommandSchema;
2460
+ /**
2461
+ * Append command: appends content to an element.
2462
+ */
2463
+ declare const appendSchema: CommandSchema;
2464
+ /**
2465
+ * Prepend command: prepends content to an element.
2466
+ */
2467
+ declare const prependSchema: CommandSchema;
2468
+ /**
2469
+ * All available command schemas.
2470
+ */
2471
+ declare const commandSchemas: Record<ActionType, CommandSchema>;
2472
+ /**
2473
+ * Get a command schema by action type.
2474
+ */
2475
+ declare function getSchema$1(action: ActionType): CommandSchema | undefined;
2476
+ /**
2477
+ * Get all schemas for a category.
2478
+ */
2479
+ declare function getSchemasByCategory(category: CommandCategory): CommandSchema[];
2480
+ /**
2481
+ * Get all fully-defined schemas (with roles).
2482
+ */
2483
+ declare function getDefinedSchemas(): CommandSchema[];
2484
+
2485
+ /**
2486
+ * Pattern Generator
2487
+ *
2488
+ * Generates LanguagePattern objects from CommandSchema + LanguageProfile.
2489
+ * This solves the pattern explosion problem by deriving patterns from
2490
+ * high-level definitions rather than hand-writing each one.
2491
+ */
2492
+
2493
+ /**
2494
+ * Configuration for pattern generation.
2495
+ */
2496
+ interface GeneratorConfig {
2497
+ /** Base priority for generated patterns (higher = checked first) */
2498
+ basePriority?: number;
2499
+ /** Whether to generate simple patterns (without optional roles) */
2500
+ generateSimpleVariants?: boolean;
2501
+ /** Whether to generate alternative keyword patterns */
2502
+ generateAlternatives?: boolean;
2503
+ }
2504
+ /**
2505
+ * Generate a pattern for a command in a specific language.
2506
+ */
2507
+ declare function generatePattern(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern;
2508
+ /**
2509
+ * Generate a simple variant pattern (without optional roles).
2510
+ */
2511
+ declare function generateSimplePattern(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern | null;
2512
+ /**
2513
+ * Generate all pattern variants for a command in a language.
2514
+ */
2515
+ declare function generatePatternVariants(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern[];
2516
+ /**
2517
+ * Generate patterns for all commands in a specific language.
2518
+ */
2519
+ declare function generatePatternsForLanguage(profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern[];
2520
+ /**
2521
+ * Generate patterns for a command across specified profiles.
2522
+ *
2523
+ * @param schema Command schema to generate patterns for
2524
+ * @param profiles Array of language profiles to generate patterns for (defaults to all registered)
2525
+ * @param config Generator configuration
2526
+ */
2527
+ declare function generatePatternsForCommand(schema: CommandSchema, profiles?: LanguageProfile[], config?: GeneratorConfig): LanguagePattern[];
2528
+ /**
2529
+ * Generate all patterns for all commands across specified profiles.
2530
+ *
2531
+ * @param profiles Array of language profiles to generate patterns for (defaults to all registered)
2532
+ * @param config Generator configuration
2533
+ */
2534
+ declare function generateAllPatterns(profiles?: LanguageProfile[], config?: GeneratorConfig): LanguagePattern[];
2535
+ /**
2536
+ * Get a summary of what patterns can be generated.
2537
+ * Note: This requires the registry to have languages registered.
2538
+ */
2539
+ declare function getGeneratorSummary(): {
2540
+ languages: string[];
2541
+ commands: string[];
2542
+ totalPatterns: number;
2543
+ };
2544
+ /**
2545
+ * Validate that all required keywords exist for a language.
2546
+ */
2547
+ declare function validateLanguageKeywords(profile: LanguageProfile, schemas?: CommandSchema[]): {
2548
+ missing: string[];
2549
+ available: string[];
2550
+ };
2551
+
2552
+ /**
2553
+ * Per-Command Semantic Validation
2554
+ *
2555
+ * Validates semantic parse results against command schemas.
2556
+ * Ensures that role assignments match expected types and constraints.
2557
+ *
2558
+ * Design Philosophy:
2559
+ * - Command schemas are the source of truth for validation rules
2560
+ * - Validation happens AFTER pattern matching, BEFORE AST conversion
2561
+ * - Provides detailed error messages for debugging
2562
+ * - Supports confidence scoring for ambiguous parses
2563
+ *
2564
+ * Integration with core validators:
2565
+ * - Uses patterns from @lokascript/core's lightweight-validators where applicable
2566
+ * - Type validation follows same patterns as runtime validation
2567
+ * - Can be extended with custom validators using the same API
2568
+ */
2569
+
2570
+ /**
2571
+ * Validation error with detailed context.
2572
+ */
2573
+ interface ValidationError {
2574
+ /** Error code for programmatic handling */
2575
+ code: 'MISSING_REQUIRED_ROLE' | 'INVALID_TYPE' | 'UNKNOWN_ROLE' | 'CONSTRAINT_VIOLATION';
2576
+ /** Human-readable message */
2577
+ message: string;
2578
+ /** The role that failed validation */
2579
+ role?: SemanticRole;
2580
+ /** Expected types */
2581
+ expected?: string[];
2582
+ /** Actual value */
2583
+ actual?: SemanticValue;
2584
+ /** Severity: 'error' blocks execution, 'warning' is logged */
2585
+ severity: 'error' | 'warning';
2586
+ }
2587
+ /**
2588
+ * Result of command validation.
2589
+ */
2590
+ interface ValidationResult {
2591
+ /** Whether validation passed */
2592
+ valid: boolean;
2593
+ /** Validation errors (if any) */
2594
+ errors: ValidationError[];
2595
+ /** Warnings (non-blocking issues) */
2596
+ warnings: ValidationError[];
2597
+ /** Confidence adjustment (-1 to +1) based on validation */
2598
+ confidenceAdjustment: number;
2599
+ /** Suggested fixes for errors */
2600
+ suggestions: string[];
2601
+ }
2602
+ /**
2603
+ * Maps action types to their schemas.
2604
+ */
2605
+ declare const schemaRegistry: Map<ActionType, CommandSchema>;
2606
+ /**
2607
+ * Get schema for an action type.
2608
+ */
2609
+ declare function getSchema(action: ActionType): CommandSchema | undefined;
2610
+ /**
2611
+ * Register a custom schema.
2612
+ */
2613
+ declare function registerSchema(action: ActionType, schema: CommandSchema): void;
2614
+ /**
2615
+ * Validate a semantic parse result against its command schema.
2616
+ *
2617
+ * @param result - The semantic parse result to validate
2618
+ * @returns Validation result with errors, warnings, and confidence adjustment
2619
+ */
2620
+ declare function validateSemanticResult(result: SemanticParseResult): ValidationResult;
2621
+ /**
2622
+ * Apply validation to a parse result and adjust confidence.
2623
+ *
2624
+ * @param result - The semantic parse result
2625
+ * @returns Updated result with adjusted confidence
2626
+ */
2627
+ declare function validateAndAdjustConfidence(result: SemanticParseResult): SemanticParseResult & {
2628
+ validation: ValidationResult;
2629
+ };
2630
+
2631
+ /**
2632
+ * Unified Language Profile
2633
+ *
2634
+ * Combines parsing-focused features (from semantic) with generation-focused
2635
+ * features (from i18n) into a single profile structure.
2636
+ *
2637
+ * This enables:
2638
+ * - Single source of truth for language configuration
2639
+ * - Bidirectional conversion: parse (natural → semantic) and render (semantic → natural)
2640
+ * - Consistent language support across packages
2641
+ */
2642
+
2643
+ /**
2644
+ * How grammatical relationships are marked (unified from both packages).
2645
+ *
2646
+ * Maps to i18n's AdpositionType:
2647
+ * - preposition → 'preposition'
2648
+ * - postposition → 'postposition'
2649
+ * - particle → 'postposition' (particles are typically postpositional)
2650
+ * - case-suffix → 'postposition' (suffixes attach after)
2651
+ */
2652
+ type MarkingStrategy = 'preposition' | 'postposition' | 'particle' | 'case-suffix';
2653
+ /**
2654
+ * A grammatical marker for a semantic role.
2655
+ * Unified from both packages' marker types.
2656
+ */
2657
+ interface UnifiedRoleMarker {
2658
+ /** Primary marker form */
2659
+ readonly primary: string;
2660
+ /** Alternative forms (conjugations, vowel harmony variants) */
2661
+ readonly alternatives?: string[];
2662
+ /** Position relative to the role value */
2663
+ readonly position: 'before' | 'after';
2664
+ /** Whether this marker is required */
2665
+ readonly required?: boolean;
2666
+ }
2667
+ /**
2668
+ * Verb form configuration for a language.
2669
+ */
2670
+ interface VerbConfig {
2671
+ /** Position of verb in the sentence */
2672
+ readonly position: 'start' | 'end' | 'second';
2673
+ /** Common verb suffixes/conjugations to recognize */
2674
+ readonly suffixes?: string[];
2675
+ /** Whether the language commonly drops subjects */
2676
+ readonly subjectDrop?: boolean;
2677
+ }
2678
+ /**
2679
+ * Translation of a command keyword.
2680
+ */
2681
+ interface KeywordTranslation {
2682
+ /** Primary translation */
2683
+ readonly primary: string;
2684
+ /** Alternative forms (conjugations, synonyms) */
2685
+ readonly alternatives?: string[];
2686
+ /** Normalized form for matching */
2687
+ readonly normalized?: string;
2688
+ }
2689
+ /**
2690
+ * Special tokenization configuration.
2691
+ */
2692
+ interface TokenizationConfig {
2693
+ /** Particles to recognize (for particle languages) */
2694
+ readonly particles?: string[];
2695
+ /** Prefixes to recognize (for prefixing languages) */
2696
+ readonly prefixes?: string[];
2697
+ /** Word boundary detection strategy */
2698
+ readonly boundaryStrategy?: 'space' | 'particle' | 'character';
2699
+ }
2700
+ /**
2701
+ * Unified Language Profile
2702
+ *
2703
+ * Combines all fields needed for both parsing and generation:
2704
+ *
2705
+ * **Shared fields:**
2706
+ * - code, name, nativeName, direction, wordOrder
2707
+ *
2708
+ * **Parsing fields (from semantic):**
2709
+ * - keywords: Command keyword translations
2710
+ * - verb: Verb position and conjugation info
2711
+ * - tokenization: Language-specific tokenization
2712
+ * - usesSpaces: Word boundary info
2713
+ *
2714
+ * **Generation fields (from i18n):**
2715
+ * - morphology: Morphological type for transformation
2716
+ * - canonicalOrder: Role ordering for output
2717
+ * - markers: Grammatical markers for roles
2718
+ */
2719
+ interface UnifiedLanguageProfile {
2720
+ /** ISO 639-1 language code */
2721
+ readonly code: string;
2722
+ /** Human-readable language name */
2723
+ readonly name: string;
2724
+ /** Native language name */
2725
+ readonly nativeName: string;
2726
+ /** Text direction */
2727
+ readonly direction: 'ltr' | 'rtl';
2728
+ /** Primary word order (SVO, SOV, VSO, etc.) */
2729
+ readonly wordOrder: WordOrder$1;
2730
+ /** How grammatical roles are marked */
2731
+ readonly markingStrategy: MarkingStrategy;
2732
+ /** Morphological typology */
2733
+ readonly morphology: MorphologyType;
2734
+ /** Whether the language uses spaces between words */
2735
+ readonly usesSpaces: boolean;
2736
+ /** Markers for each semantic role */
2737
+ readonly roleMarkers: Partial<Record<SemanticRole, UnifiedRoleMarker>>;
2738
+ /** Canonical role order for generation */
2739
+ readonly canonicalOrder: SemanticRole[];
2740
+ /** Verb configuration */
2741
+ readonly verb: VerbConfig;
2742
+ /** Command keyword translations */
2743
+ readonly keywords: Record<string, KeywordTranslation>;
2744
+ /** Special tokenization configuration */
2745
+ readonly tokenization?: TokenizationConfig;
2746
+ /** Special transformation rules */
2747
+ readonly rules?: GrammarRule[];
2748
+ }
2749
+ /**
2750
+ * Grammar rule for special transformations.
2751
+ * (Imported from i18n for consistency)
2752
+ */
2753
+ interface GrammarRule {
2754
+ name: string;
2755
+ description: string;
2756
+ match: PatternMatcher;
2757
+ transform: PatternTransform;
2758
+ priority: number;
2759
+ }
2760
+ interface PatternMatcher {
2761
+ commands?: string[];
2762
+ requiredRoles: SemanticRole[];
2763
+ optionalRoles?: SemanticRole[];
2764
+ predicate?: (parsed: unknown) => boolean;
2765
+ }
2766
+ interface PatternTransform {
2767
+ roleOrder: SemanticRole[];
2768
+ insertMarkers?: boolean;
2769
+ custom?: (parsed: unknown, profile: UnifiedLanguageProfile) => string;
2770
+ }
2771
+ /**
2772
+ * Convert marking strategy to AdpositionType for i18n compatibility.
2773
+ */
2774
+ declare function markingStrategyToAdpositionType(strategy: MarkingStrategy): AdpositionType;
2775
+ /**
2776
+ * Convert UnifiedRoleMarker to GrammaticalMarker for i18n compatibility.
2777
+ */
2778
+ declare function toGrammaticalMarker(role: SemanticRole, marker: UnifiedRoleMarker, strategy: MarkingStrategy): GrammaticalMarker;
2779
+ /**
2780
+ * Convert UnifiedLanguageProfile to i18n LanguageProfile.
2781
+ * This enables using unified profiles with existing i18n code.
2782
+ */
2783
+ declare function toI18nProfile(unified: UnifiedLanguageProfile): {
2784
+ code: string;
2785
+ name: string;
2786
+ wordOrder: WordOrder$1;
2787
+ adpositionType: AdpositionType;
2788
+ morphology: MorphologyType;
2789
+ direction: 'ltr' | 'rtl';
2790
+ markers: GrammaticalMarker[];
2791
+ canonicalOrder: SemanticRole[];
2792
+ };
2793
+ /**
2794
+ * Check if an object is a UnifiedLanguageProfile.
2795
+ */
2796
+ declare function isUnifiedProfile(obj: unknown): obj is UnifiedLanguageProfile;
2797
+
2798
+ /**
2799
+ * Semantic Static Analysis
2800
+ *
2801
+ * Analyzes semantic nodes for potential issues:
2802
+ * - Conflicting actions on same trigger
2803
+ * - Accessibility problems (hover-only interactions)
2804
+ * - Performance concerns (high-frequency triggers)
2805
+ * - Invalid role combinations
2806
+ *
2807
+ * Can be used:
2808
+ * - Standalone: analyze(input, lang)
2809
+ * - Dev mode: Enabled via config, auto-warns on parse
2810
+ * - Build time: Integrate with bundlers
2811
+ */
2812
+
2813
+ type WarningSeverity = 'error' | 'warning' | 'info';
2814
+ type WarningCode = 'HOVER_ONLY_INTERACTION' | 'HIGH_FREQUENCY_TRIGGER' | 'MISSING_REQUIRED_ROLE' | 'INVALID_ROLE_FOR_COMMAND' | 'CONFLICTING_ACTIONS' | 'UNREACHABLE_BEHAVIOR' | 'POTENTIAL_RACE_CONDITION';
2815
+ interface AnalysisWarning {
2816
+ code: WarningCode;
2817
+ severity: WarningSeverity;
2818
+ message: string;
2819
+ suggestion?: string;
2820
+ location?: {
2821
+ input: string;
2822
+ role?: SemanticRole;
2823
+ };
2824
+ }
2825
+ interface AnalysisResult {
2826
+ valid: boolean;
2827
+ warnings: AnalysisWarning[];
2828
+ node: SemanticNode | null;
2829
+ }
2830
+ interface AnalysisConfig {
2831
+ /** Enable accessibility checks (default: true) */
2832
+ accessibility?: boolean;
2833
+ /** Enable performance checks (default: true) */
2834
+ performance?: boolean;
2835
+ /** Enable schema validation (default: true) */
2836
+ schema?: boolean;
2837
+ /** Treat warnings as errors (default: false) */
2838
+ strict?: boolean;
2839
+ }
2840
+ /**
2841
+ * Check for hover-only interactions (accessibility issue).
2842
+ */
2843
+ declare function checkAccessibility(node: SemanticNode, input: string): AnalysisWarning[];
2844
+ /**
2845
+ * Check for high-frequency events without throttling.
2846
+ */
2847
+ declare function checkPerformance(node: SemanticNode, input: string): AnalysisWarning[];
2848
+ /**
2849
+ * Validate role combinations against command schema.
2850
+ */
2851
+ declare function checkSchema(node: SemanticNode, input: string): AnalysisWarning[];
2852
+ /**
2853
+ * Analyze multiple nodes together to detect conflicts.
2854
+ */
2855
+ declare function analyzeMultiple(nodes: SemanticNode[], _config?: AnalysisConfig): AnalysisWarning[];
2856
+ /**
2857
+ * Analyze a single hyperscript input for potential issues.
2858
+ *
2859
+ * @param input - The hyperscript text to analyze
2860
+ * @param lang - The language of the input (default: 'en')
2861
+ * @param config - Analysis configuration
2862
+ * @returns Analysis result with warnings
2863
+ *
2864
+ * @example
2865
+ * ```typescript
2866
+ * const result = analyze('on hover show .tooltip', 'en');
2867
+ * // result.warnings[0].code === 'HOVER_ONLY_INTERACTION'
2868
+ * ```
2869
+ */
2870
+ declare function analyze(input: string, lang?: string, config?: AnalysisConfig): AnalysisResult;
2871
+ /**
2872
+ * Analyze multiple hyperscript inputs together.
2873
+ *
2874
+ * @param inputs - Array of hyperscript texts
2875
+ * @param lang - The language of the inputs
2876
+ * @param config - Analysis configuration
2877
+ * @returns Combined analysis result
2878
+ */
2879
+ declare function analyzeAll(inputs: string[], lang?: string, config?: AnalysisConfig): AnalysisResult;
2880
+ /**
2881
+ * Enable dev mode analysis.
2882
+ * When enabled, every parse() call will run analysis and log warnings.
2883
+ */
2884
+ declare function enableDevMode(config?: AnalysisConfig): void;
2885
+ /**
2886
+ * Disable dev mode analysis.
2887
+ */
2888
+ declare function disableDevMode(): void;
2889
+ /**
2890
+ * Check if dev mode is enabled.
2891
+ */
2892
+ declare function isDevModeEnabled(): boolean;
2893
+ /**
2894
+ * Get current dev mode config.
2895
+ */
2896
+ declare function getDevModeConfig(): AnalysisConfig;
2897
+ /**
2898
+ * Run dev mode analysis if enabled.
2899
+ * Called internally by parser when dev mode is on.
2900
+ */
2901
+ declare function devModeAnalyze(input: string, lang: string, node: SemanticNode | null): void;
2902
+
2903
+ /**
2904
+ * Expression Parser Types
2905
+ *
2906
+ * Defines AST node types for expressions that can be shared between
2907
+ * the semantic package (AST building) and core package (runtime).
2908
+ *
2909
+ * These types are intentionally minimal and focused on expressions only.
2910
+ */
2911
+ /**
2912
+ * Base interface for all expression AST nodes
2913
+ */
2914
+ interface ExpressionNode {
2915
+ readonly type: string;
2916
+ readonly start?: number | undefined;
2917
+ readonly end?: number | undefined;
2918
+ readonly line?: number | undefined;
2919
+ readonly column?: number | undefined;
2920
+ }
2921
+ interface LiteralNode extends ExpressionNode {
2922
+ readonly type: 'literal';
2923
+ readonly value: string | number | boolean | null | undefined;
2924
+ readonly raw?: string | undefined;
2925
+ readonly dataType?: 'string' | 'number' | 'boolean' | 'null' | 'undefined' | 'duration' | undefined;
2926
+ }
2927
+ type SelectorKind = 'id' | 'class' | 'attribute' | 'element' | 'query' | 'complex';
2928
+ interface SelectorNode extends ExpressionNode {
2929
+ readonly type: 'selector' | 'cssSelector' | 'idRef' | 'classRef';
2930
+ readonly value?: string;
2931
+ readonly selector?: string;
2932
+ readonly selectorType?: SelectorKind;
2933
+ }
2934
+ type ContextType = 'me' | 'you' | 'it' | 'its' | 'my' | 'your' | 'result' | 'event' | 'target' | 'body' | 'detail';
2935
+ interface ContextReferenceNode extends ExpressionNode {
2936
+ readonly type: 'contextReference' | 'symbol';
2937
+ readonly contextType?: ContextType;
2938
+ readonly name?: string;
2939
+ }
2940
+ interface PropertyAccessNode extends ExpressionNode {
2941
+ readonly type: 'propertyAccess';
2942
+ readonly object: ExpressionNode;
2943
+ readonly property: string;
2944
+ }
2945
+
2946
+ /**
2947
+ * Semantic Value to AST Node Converters
2948
+ *
2949
+ * Converts SemanticValue types to AST expression nodes.
2950
+ * Used by the AST builder to construct expression trees from semantic parsing results.
2951
+ */
2952
+
2953
+ /**
2954
+ * Convert a SemanticValue to an AST ExpressionNode.
2955
+ *
2956
+ * @param value - The semantic value to convert
2957
+ * @param warnings - Optional array to collect warnings about potentially incorrect type choices
2958
+ * @returns The corresponding AST expression node
2959
+ */
2960
+ declare function convertValue(value: SemanticValue, warnings?: string[]): ExpressionNode;
2961
+ /**
2962
+ * Convert a LiteralValue to a LiteralNode.
2963
+ */
2964
+ declare function convertLiteral(value: LiteralValue): LiteralNode;
2965
+ /**
2966
+ * Convert a SelectorValue to a SelectorNode.
2967
+ *
2968
+ * @param value - The selector value to convert
2969
+ * @param warnings - Optional array to collect warnings
2970
+ */
2971
+ declare function convertSelector(value: SelectorValue, warnings?: string[]): SelectorNode;
2972
+ /**
2973
+ * Convert a ReferenceValue to a ContextReferenceNode.
2974
+ */
2975
+ declare function convertReference(value: ReferenceValue): ContextReferenceNode;
2976
+ /**
2977
+ * Convert a PropertyPathValue to a PropertyAccessNode.
2978
+ * Recursively converts the object part.
2979
+ *
2980
+ * @param value - The property path value to convert
2981
+ * @param warnings - Optional array to collect warnings
2982
+ */
2983
+ declare function convertPropertyPath(value: PropertyPathValue, warnings?: string[]): PropertyAccessNode;
2984
+ /**
2985
+ * Convert an ExpressionValue (raw string) by parsing it with the expression parser.
2986
+ * This is the fallback for complex expressions that couldn't be fully parsed
2987
+ * at the semantic level.
2988
+ */
2989
+ declare function convertExpression(value: ExpressionValue): ExpressionNode;
2990
+
2991
+ /**
2992
+ * Command-specific AST Mappers
2993
+ *
2994
+ * Each command can have a custom mapper that knows how to convert
2995
+ * its semantic roles to the appropriate AST structure.
2996
+ */
2997
+
2998
+ /**
2999
+ * Result from command mapping, including the AST and any warnings.
3000
+ */
3001
+ interface CommandMapperResult {
3002
+ ast: CommandNode;
3003
+ warnings: string[];
3004
+ }
3005
+ /**
3006
+ * Interface for command-specific AST mappers.
3007
+ */
3008
+ interface CommandMapper {
3009
+ /**
3010
+ * The action type this mapper handles.
3011
+ */
3012
+ readonly action: ActionType;
3013
+ /**
3014
+ * Convert a CommandSemanticNode to a CommandNode.
3015
+ *
3016
+ * @param node - The semantic command node
3017
+ * @param builder - The AST builder (for recursive building if needed)
3018
+ * @returns The AST command node with any warnings, or just the AST node for backward compatibility
3019
+ */
3020
+ toAST(node: CommandSemanticNode, builder: ASTBuilder): CommandMapperResult | CommandNode;
3021
+ }
3022
+ /**
3023
+ * Get the command mapper for an action type.
3024
+ *
3025
+ * @param action - The action type
3026
+ * @returns The mapper, or undefined if no specific mapper exists
3027
+ */
3028
+ declare function getCommandMapper(action: ActionType): CommandMapper | undefined;
3029
+ /**
3030
+ * Register a custom command mapper.
3031
+ *
3032
+ * @param mapper - The command mapper to register
3033
+ */
3034
+ declare function registerCommandMapper(mapper: CommandMapper): void;
3035
+ /**
3036
+ * Get all registered command mappers.
3037
+ */
3038
+ declare function getRegisteredMappers(): Map<ActionType, CommandMapper>;
3039
+
3040
+ /**
3041
+ * Semantic to AST Builder
3042
+ *
3043
+ * Converts SemanticNodes directly to AST nodes, bypassing the English text
3044
+ * generation and re-parsing step.
3045
+ *
3046
+ * Flow:
3047
+ * Japanese → Semantic Parser → SemanticNode → AST Builder → AST
3048
+ *
3049
+ * Instead of:
3050
+ * Japanese → Semantic Parser → SemanticNode → English Text → Parser → AST
3051
+ */
3052
+
3053
+ /**
3054
+ * Base AST node interface
3055
+ */
3056
+ interface ASTNode {
3057
+ readonly type: string;
3058
+ readonly start?: number;
3059
+ readonly end?: number;
3060
+ readonly line?: number;
3061
+ readonly column?: number;
3062
+ [key: string]: unknown;
3063
+ }
3064
+ /**
3065
+ * Command AST node
3066
+ */
3067
+ interface CommandNode extends ASTNode {
3068
+ readonly type: 'command';
3069
+ readonly name: string;
3070
+ readonly args: ExpressionNode[];
3071
+ readonly modifiers?: Record<string, ExpressionNode>;
3072
+ readonly isBlocking?: boolean;
3073
+ readonly implicitTarget?: ExpressionNode;
3074
+ }
3075
+ /**
3076
+ * Event handler AST node (compatible with @lokascript/core)
3077
+ */
3078
+ interface EventHandlerNode extends ASTNode {
3079
+ readonly type: 'eventHandler';
3080
+ /** Primary event name */
3081
+ readonly event: string;
3082
+ /** All event names when using "on event1 or event2" syntax */
3083
+ readonly events?: string[];
3084
+ /** CSS selector for event delegation ("from" keyword) */
3085
+ readonly selector?: string;
3086
+ /** Target for "from" clause (as string or expression) */
3087
+ readonly target?: string;
3088
+ /** Optional event condition ("[condition]" syntax) */
3089
+ readonly condition?: ASTNode;
3090
+ /** Attribute name for mutation events ("of @attribute" syntax) */
3091
+ readonly attributeName?: string;
3092
+ /** Target element to watch for changes ("in <target>" syntax) */
3093
+ readonly watchTarget?: ExpressionNode;
3094
+ /** Event parameter names to destructure (e.g., ['clientX', 'clientY']) */
3095
+ readonly args?: string[];
3096
+ /** Event parameters (alias for args) */
3097
+ readonly params?: string[];
3098
+ /** Handler commands */
3099
+ readonly commands: ASTNode[];
3100
+ }
3101
+ /**
3102
+ * Conditional AST node (if/else)
3103
+ *
3104
+ * Note: For runtime compatibility, buildConditional() now produces a CommandNode
3105
+ * with condition and branches as args, matching what IfCommand expects.
3106
+ * This interface is retained for reference but not used as output.
3107
+ */
3108
+ interface ConditionalNode extends ASTNode {
3109
+ readonly type: 'if';
3110
+ readonly condition: ExpressionNode;
3111
+ readonly thenBranch: ASTNode[];
3112
+ readonly elseBranch?: ASTNode[];
3113
+ }
3114
+ /**
3115
+ * Command sequence node (runtime-compatible format for chained commands)
3116
+ */
3117
+ interface CommandSequenceNode extends ASTNode {
3118
+ readonly type: 'CommandSequence';
3119
+ /** Commands in the sequence */
3120
+ readonly commands: ASTNode[];
3121
+ }
3122
+ /**
3123
+ * Block node (for grouping commands)
3124
+ */
3125
+ interface BlockNode extends ASTNode {
3126
+ readonly type: 'block';
3127
+ readonly commands: ASTNode[];
3128
+ }
3129
+ interface ASTBuilderOptions {
3130
+ /**
3131
+ * Fallback function to parse complex expressions that can't be handled
3132
+ * directly by the AST builder. Uses the expression-parser by default.
3133
+ */
3134
+ parseExpression?: (input: string) => ExpressionNode | null;
3135
+ }
3136
+ /**
3137
+ * Builds AST nodes directly from SemanticNodes.
3138
+ */
3139
+ declare class ASTBuilder {
3140
+ /**
3141
+ * Warnings collected during AST building (e.g., type inference issues).
3142
+ */
3143
+ warnings: string[];
3144
+ constructor(_options?: ASTBuilderOptions);
3145
+ /**
3146
+ * Build an AST from a SemanticNode.
3147
+ *
3148
+ * @param node - The semantic node to convert
3149
+ * @returns The corresponding AST node
3150
+ */
3151
+ build(node: SemanticNode): ASTNode;
3152
+ /**
3153
+ * Build a CommandNode from a CommandSemanticNode.
3154
+ */
3155
+ private buildCommand;
3156
+ /**
3157
+ * Generic command builder when no specific mapper is available.
3158
+ * Maps roles to args in a predictable order.
3159
+ */
3160
+ private buildGenericCommand;
3161
+ /**
3162
+ * Map semantic roles to hyperscript modifier keywords.
3163
+ */
3164
+ private roleToModifierKey;
3165
+ /**
3166
+ * Build an EventHandlerNode from an EventHandlerSemanticNode.
3167
+ */
3168
+ private buildEventHandler;
3169
+ /**
3170
+ * Build a CommandNode from a ConditionalSemanticNode.
3171
+ *
3172
+ * Produces a command node with:
3173
+ * - args[0]: condition expression
3174
+ * - args[1]: then block (wrapped in { type: 'block', commands: [...] })
3175
+ * - args[2]: else block (optional, same format)
3176
+ *
3177
+ * This format matches what IfCommand.parseInput() expects.
3178
+ */
3179
+ private buildConditional;
3180
+ /**
3181
+ * Build AST nodes from a CompoundSemanticNode.
3182
+ *
3183
+ * Converts to CommandSequence for runtime compatibility.
3184
+ * The runtime recognizes 'CommandSequence' type and executes commands in order.
3185
+ */
3186
+ private buildCompound;
3187
+ /**
3188
+ * Build a CommandNode from a LoopSemanticNode.
3189
+ *
3190
+ * Produces a 'repeat' command with:
3191
+ * - args[0]: loop type identifier (forever, times, for, while, until)
3192
+ * - args[1]: count/condition/variable depending on loop type
3193
+ * - args[2]: collection (for 'for' loops)
3194
+ * - args[last]: body block
3195
+ *
3196
+ * This format matches what the repeat command parser produces.
3197
+ */
3198
+ private buildLoop;
3199
+ /**
3200
+ * Build a BlockNode from an array of semantic nodes.
3201
+ * Useful for grouping commands in if/else branches.
3202
+ */
3203
+ buildBlock(nodes: SemanticNode[]): BlockNode;
3204
+ }
3205
+ /**
3206
+ * Result from building an AST, including any warnings.
3207
+ */
3208
+ interface BuildASTResult {
3209
+ ast: ASTNode;
3210
+ warnings: string[];
3211
+ }
3212
+ /**
3213
+ * Build an AST from a SemanticNode using default options.
3214
+ *
3215
+ * @param node - The semantic node to convert
3216
+ * @returns The corresponding AST node and any warnings
3217
+ */
3218
+ declare function buildAST(node: SemanticNode): BuildASTResult;
3219
+
3220
+ /**
3221
+ * Language Loader
3222
+ *
3223
+ * Provides lazy loading capabilities for language modules.
3224
+ * Languages can be loaded:
3225
+ * 1. Via dynamic import from package subpath
3226
+ * 2. From a URL (for CDN usage)
3227
+ * 3. From a pre-loaded module object
3228
+ *
3229
+ * @example
3230
+ * ```typescript
3231
+ * import { loadLanguage, parse } from '@lokascript/semantic/browser/lazy';
3232
+ *
3233
+ * // Load Japanese on demand
3234
+ * await loadLanguage('ja');
3235
+ *
3236
+ * // Now parsing works for Japanese
3237
+ * parse('トグル .active', 'ja');
3238
+ * ```
3239
+ */
3240
+
3241
+ /**
3242
+ * Options for loading a language.
3243
+ */
3244
+ interface LoadLanguageOptions {
3245
+ /**
3246
+ * URL to fetch the language module from (for CDN usage).
3247
+ * The module should export: tokenizer, profile, patterns (or buildPatterns)
3248
+ */
3249
+ url?: string;
3250
+ /**
3251
+ * Pre-loaded module object.
3252
+ * Use this if you've already imported the module.
3253
+ */
3254
+ module?: LanguageModule;
3255
+ /**
3256
+ * Skip loading if the language is already registered.
3257
+ * Defaults to true.
3258
+ */
3259
+ skipIfRegistered?: boolean;
3260
+ }
3261
+ /**
3262
+ * A language module that can be registered with the semantic parser.
3263
+ */
3264
+ interface LanguageModule {
3265
+ /** The language tokenizer */
3266
+ tokenizer: LanguageTokenizer;
3267
+ /** The language profile for pattern generation */
3268
+ profile: LanguageProfile;
3269
+ /** Pre-built patterns (optional) */
3270
+ patterns?: LanguagePattern[];
3271
+ /** Function to build patterns lazily (optional) */
3272
+ buildPatterns?: () => LanguagePattern[];
3273
+ }
3274
+ /**
3275
+ * Result of a language loading operation.
3276
+ */
3277
+ interface LoadLanguageResult {
3278
+ /** The language code that was loaded */
3279
+ code: string;
3280
+ /** Whether the language was newly loaded (false if already registered) */
3281
+ loaded: boolean;
3282
+ /** Error message if loading failed */
3283
+ error?: string;
3284
+ }
3285
+ /**
3286
+ * List of all supported language codes.
3287
+ */
3288
+ declare const SUPPORTED_LANGUAGES: string[];
3289
+ /**
3290
+ * Load a single language.
3291
+ *
3292
+ * @param code - The language code (e.g., 'en', 'ja', 'es')
3293
+ * @param options - Loading options
3294
+ * @returns Result indicating success or failure
3295
+ *
3296
+ * @example
3297
+ * ```typescript
3298
+ * // Load from package
3299
+ * await loadLanguage('ja');
3300
+ *
3301
+ * // Load from CDN
3302
+ * await loadLanguage('ko', {
3303
+ * url: 'https://cdn.example.com/hyperfixi-semantic-ko.js'
3304
+ * });
3305
+ *
3306
+ * // Load from pre-loaded module
3307
+ * await loadLanguage('en', { module: myEnglishModule });
3308
+ * ```
3309
+ */
3310
+ declare function loadLanguage(code: string, options?: LoadLanguageOptions): Promise<LoadLanguageResult>;
3311
+ /**
3312
+ * Load multiple languages in parallel.
3313
+ *
3314
+ * @param codes - Array of language codes to load
3315
+ * @param options - Loading options (applied to all languages)
3316
+ * @returns Array of results for each language
3317
+ *
3318
+ * @example
3319
+ * ```typescript
3320
+ * // Load multiple languages
3321
+ * const results = await loadLanguages(['en', 'es', 'ja']);
3322
+ *
3323
+ * // Check results
3324
+ * for (const result of results) {
3325
+ * if (result.error) {
3326
+ * console.error(`Failed to load ${result.code}: ${result.error}`);
3327
+ * }
3328
+ * }
3329
+ * ```
3330
+ */
3331
+ declare function loadLanguages(codes: string[], options?: Omit<LoadLanguageOptions, 'module'>): Promise<LoadLanguageResult[]>;
3332
+ /**
3333
+ * Check if a language can be loaded (is supported).
3334
+ */
3335
+ declare function canLoadLanguage(code: string): boolean;
3336
+ /**
3337
+ * Get list of languages that are currently loaded.
3338
+ */
3339
+ declare function getLoadedLanguages(): string[];
3340
+ /**
3341
+ * Get list of languages that are not yet loaded.
3342
+ */
3343
+ declare function getUnloadedLanguages(): string[];
3344
+
3345
+ /**
3346
+ * Confidence Calculator Utility
3347
+ *
3348
+ * Provides standalone confidence calculation for translations.
3349
+ * Exposes the pattern matcher's confidence scoring for use in scripts.
3350
+ */
3351
+
3352
+ interface ConfidenceResult {
3353
+ /** Confidence score from 0-1 */
3354
+ confidence: number;
3355
+ /** Whether the input parsed successfully */
3356
+ parseSuccess: boolean;
3357
+ /** Pattern ID that matched, if any */
3358
+ patternId?: string;
3359
+ /** The action type (command) that was parsed */
3360
+ action?: ActionType;
3361
+ /** Number of tokens consumed during matching */
3362
+ tokensConsumed?: number;
3363
+ /** Error message if parsing failed */
3364
+ error?: string;
3365
+ }
3366
+ /**
3367
+ * Calculate confidence score for a hyperscript translation.
3368
+ *
3369
+ * Uses the pattern matcher to determine how well the input matches
3370
+ * available patterns for the given language.
3371
+ *
3372
+ * @param hyperscript - The hyperscript code to analyze
3373
+ * @param language - The language code (e.g., 'ja', 'es', 'en')
3374
+ * @returns Confidence result with score and match details
3375
+ */
3376
+ declare function calculateTranslationConfidence(hyperscript: string, language: string): ConfidenceResult;
3377
+ interface ParseWithConfidenceResult {
3378
+ node: SemanticNode | null;
3379
+ confidence: number;
3380
+ error: string | undefined;
3381
+ }
3382
+ /**
3383
+ * Calculate confidence and parse to a semantic node in one call.
3384
+ * Returns both the parsed node and the confidence score.
3385
+ */
3386
+ declare function parseWithConfidence(hyperscript: string, language: string): ParseWithConfidenceResult;
3387
+
3388
+ /**
3389
+ * Semantic-First Multilingual Hyperscript
3390
+ *
3391
+ * This package provides a semantic-first approach to multilingual hyperscript,
3392
+ * enabling true native-language syntax that feels natural to speakers of any language.
3393
+ *
3394
+ * Key Features:
3395
+ * - Parse hyperscript from any supported language (en, ja, ar, es)
3396
+ * - Translate between languages while preserving semantic meaning
3397
+ * - Explicit mode syntax for learning and debugging
3398
+ * - Bidirectional conversion: natural ↔ explicit ↔ natural
3399
+ *
3400
+ * @example
3401
+ * // Parse Japanese to semantic
3402
+ * const node = parse('#button の .active を 切り替え', 'ja');
3403
+ *
3404
+ * // Render in English
3405
+ * const english = render(node, 'en');
3406
+ * // → 'toggle .active on #button'
3407
+ *
3408
+ * // Render in explicit mode
3409
+ * const explicit = renderExplicit(node);
3410
+ * // → '[toggle patient:.active destination:#button]'
3411
+ *
3412
+ * // Translate directly
3413
+ * const arabic = translate('toggle .active on #button', 'en', 'ar');
3414
+ * // → 'بدّل .active على #button'
3415
+ */
3416
+
3417
+ /**
3418
+ * Get all supported languages for parsing.
3419
+ */
3420
+ declare function getSupportedLanguages(): string[];
3421
+ /**
3422
+ * Version of the semantic package.
3423
+ */
3424
+ declare const VERSION = "0.1.0";
3425
+
3426
+ export { ASTBuilder, type ASTBuilderOptions, type ASTNode, type ActionType, type AnalysisConfig, type AnalysisResult, type AnalysisWarning, type BlockNode, type BuildASTResult, type CacheStats, type CommandCategory, type CommandMapper, type CommandMapperResult, type CommandNode, type CommandSchema, type CommandSemanticNode, type CommandSequenceNode, type CompoundSemanticNode, type ConditionalNode, type ConditionalSemanticNode, type ConfidenceResult, DEFAULT_CONFIDENCE_THRESHOLD, type EventHandlerNode, type EventHandlerSemanticNode, type EventModifiers, type ExpressionValue, type ExtractionRule, type ExtractionRules, type GeneratorConfig, type GroupPatternToken, HIGH_CONFIDENCE_THRESHOLD, type KeywordTranslation$1 as KeywordTranslation, SUPPORTED_LANGUAGES as LAZY_LOAD_LANGUAGES, type LanguageModule, type LanguagePattern, type LanguageProfile, type LanguageToken, type LanguageTokenizer, type LiteralPatternToken, type LiteralValue, type LoadLanguageOptions, type LoadLanguageResult, type LoopSemanticNode, type LoopVariant, type MarkingStrategy$1 as MarkingStrategy, type ParseWithConfidenceResult, type PatternConstraints, type PatternMatchError, type PatternMatchResult, PatternMatcher$1 as PatternMatcher, type PatternTemplate, type PatternToken, type PossessiveConfig, type PropertyPathValue, type ReferenceValue, type RoleMarker, type RolePatternToken, type RoleSpec, type SelectorValue, type SemanticAnalysisResult, type SemanticAnalyzer, SemanticAnalyzerImpl, SemanticCache, type SemanticCacheConfig, type SemanticMetadata, type SemanticNode, type SemanticParser, SemanticParserImpl, type SemanticRenderer, SemanticRendererImpl, type SemanticRole, type SemanticValue, type SourcePosition, type StreamMark, type TokenKind, type TokenStream, TokenStreamImpl, type TokenizationConfig$1 as TokenizationConfig, type GrammarRule as UnifiedGrammarRule, type UnifiedLanguageProfile, type PatternMatcher as UnifiedPatternMatcher, type PatternTransform as UnifiedPatternTransform, type UnifiedRoleMarker, VERSION, type ValidationError, type ValidationResult, type VerbConfig$1 as VerbConfig, type VerbForm, type WarningCode, type WarningSeverity, type WordOrder, addSchema, analyze, analyzeAll, analyzeMultiple, appendSchema, arabicProfile, arabicTokenizer, buildAST, calculateTranslationConfidence, canLoadLanguage, canParse, checkAccessibility, checkPerformance, checkSchema, chineseProfile, chineseTokenizer, commandSchemas, convertExpression, convertLiteral, convertPropertyPath, convertReference, convertSelector, convertValue, createCommandNode, createCompoundNode, createConditionalNode, createEventHandler, createLiteral, createLoopNode, createPropertyPath, createReference, createSelector, createSemanticAnalyzer, createSemanticCache, decrementSchema, devModeAnalyze, disableDevMode, enableDevMode, englishProfile, englishTokenizer, eventNameTranslations, fetchSchema, frenchProfile, fromExplicit, generateAllPatterns, generatePattern, generatePatternVariants, generatePatternsForCommand, generatePatternsForLanguage, generateSimplePattern, germanProfile, getAllPatterns, getAllTranslations, getCommandMapper, getCommandType, getDefinedSchemas, getDevModeConfig, getEventHandlerPatternsForLanguage, getSupportedLanguages$1 as getGeneratorLanguages, getGeneratorSummary, getLoadedLanguages, getPatternById, getPatternStats, getPatternsForLanguage, getPatternsForLanguageAndCommand, getProfile, getPutPatternsForLanguage, getRegisteredLanguages, getRegisteredMappers, getSchema$1 as getSchema, getSchemasByCategory, getSupportedCommands, getSupportedLanguages, getSupportedLanguages$3 as getSupportedPatternLanguages, getSupportedLanguages$2 as getSupportedTokenizerLanguages, getTogglePatternsForLanguage, getTokenizer, getUnloadedLanguages, getSchema as getValidatorSchema, hideSchema, incrementSchema, indonesianProfile, isDevModeEnabled, isExplicitSyntax, isLanguageSupported as isGeneratorLanguageSupported, isLanguageSupported$1 as isLanguageSupported, isUnifiedProfile, japaneseProfile, japaneseTokenizer, koreanProfile, koreanTokenizer, languageProfiles, loadLanguage, loadLanguages, markingStrategyToAdpositionType, matchBest, matchPattern, normalizeEventName, onSchema, parse, parseAny, parseExplicit, parseWithConfidence, patternMatcher, portugueseProfile, prependSchema, putSchema, quechuaProfile, registerCommandMapper, registerSchema, registerTokenizer, removeSchema, render, renderExplicit, rolesToCommandArgs, roundTrip, schemaRegistry, semanticCache, semanticParser, semanticRenderer, setSchema, shouldUseSemanticResult, showSchema, spanishProfile, spanishTokenizer, swahiliProfile, toExplicit, toGrammaticalMarker, toI18nProfile, toggleSchema, tokenize, translate, triggerSchema, tryGetProfile, turkishProfile, turkishTokenizer, validateAndAdjustConfidence, validateLanguageKeywords, validateSemanticResult, validateTranslation, waitSchema, withCache };