@lokascript/semantic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +686 -0
  3. package/dist/browser-ar.ar.global.js +2 -0
  4. package/dist/browser-core.core.global.js +2 -0
  5. package/dist/browser-de.de.global.js +2 -0
  6. package/dist/browser-east-asian.east-asian.global.js +2 -0
  7. package/dist/browser-en-tr.en-tr.global.js +2 -0
  8. package/dist/browser-en.en.global.js +2 -0
  9. package/dist/browser-es-en.es-en.global.js +2 -0
  10. package/dist/browser-es.es.global.js +2 -0
  11. package/dist/browser-fr.fr.global.js +2 -0
  12. package/dist/browser-id.id.global.js +2 -0
  13. package/dist/browser-ja.ja.global.js +2 -0
  14. package/dist/browser-ko.ko.global.js +2 -0
  15. package/dist/browser-lazy.lazy.global.js +2 -0
  16. package/dist/browser-priority.priority.global.js +2 -0
  17. package/dist/browser-pt.pt.global.js +2 -0
  18. package/dist/browser-qu.qu.global.js +2 -0
  19. package/dist/browser-sw.sw.global.js +2 -0
  20. package/dist/browser-tr.tr.global.js +2 -0
  21. package/dist/browser-western.western.global.js +2 -0
  22. package/dist/browser-zh.zh.global.js +2 -0
  23. package/dist/browser.global.js +3 -0
  24. package/dist/browser.global.js.map +1 -0
  25. package/dist/index.cjs +35051 -0
  26. package/dist/index.cjs.map +1 -0
  27. package/dist/index.d.cts +3426 -0
  28. package/dist/index.d.ts +3426 -0
  29. package/dist/index.js +34890 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/languages/ar.d.ts +78 -0
  32. package/dist/languages/ar.js +1622 -0
  33. package/dist/languages/ar.js.map +1 -0
  34. package/dist/languages/de.d.ts +38 -0
  35. package/dist/languages/de.js +1168 -0
  36. package/dist/languages/de.js.map +1 -0
  37. package/dist/languages/en.d.ts +44 -0
  38. package/dist/languages/en.js +3491 -0
  39. package/dist/languages/en.js.map +1 -0
  40. package/dist/languages/es.d.ts +52 -0
  41. package/dist/languages/es.js +1493 -0
  42. package/dist/languages/es.js.map +1 -0
  43. package/dist/languages/fr.d.ts +37 -0
  44. package/dist/languages/fr.js +1159 -0
  45. package/dist/languages/fr.js.map +1 -0
  46. package/dist/languages/id.d.ts +35 -0
  47. package/dist/languages/id.js +1152 -0
  48. package/dist/languages/id.js.map +1 -0
  49. package/dist/languages/ja.d.ts +53 -0
  50. package/dist/languages/ja.js +1430 -0
  51. package/dist/languages/ja.js.map +1 -0
  52. package/dist/languages/ko.d.ts +51 -0
  53. package/dist/languages/ko.js +1729 -0
  54. package/dist/languages/ko.js.map +1 -0
  55. package/dist/languages/pt.d.ts +37 -0
  56. package/dist/languages/pt.js +1127 -0
  57. package/dist/languages/pt.js.map +1 -0
  58. package/dist/languages/qu.d.ts +36 -0
  59. package/dist/languages/qu.js +1143 -0
  60. package/dist/languages/qu.js.map +1 -0
  61. package/dist/languages/sw.d.ts +35 -0
  62. package/dist/languages/sw.js +1147 -0
  63. package/dist/languages/sw.js.map +1 -0
  64. package/dist/languages/tr.d.ts +45 -0
  65. package/dist/languages/tr.js +1529 -0
  66. package/dist/languages/tr.js.map +1 -0
  67. package/dist/languages/zh.d.ts +58 -0
  68. package/dist/languages/zh.js +1257 -0
  69. package/dist/languages/zh.js.map +1 -0
  70. package/dist/types-C4dcj53L.d.ts +600 -0
  71. package/package.json +202 -0
  72. package/src/__test-utils__/index.ts +7 -0
  73. package/src/__test-utils__/test-helpers.ts +8 -0
  74. package/src/__types__/test-helpers.ts +122 -0
  75. package/src/analysis/index.ts +479 -0
  76. package/src/ast-builder/command-mappers.ts +1133 -0
  77. package/src/ast-builder/expression-parser/index.ts +41 -0
  78. package/src/ast-builder/expression-parser/parser.ts +563 -0
  79. package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
  80. package/src/ast-builder/expression-parser/types.ts +208 -0
  81. package/src/ast-builder/index.ts +536 -0
  82. package/src/ast-builder/value-converters.ts +172 -0
  83. package/src/bridge.ts +275 -0
  84. package/src/browser-ar.ts +162 -0
  85. package/src/browser-core.ts +231 -0
  86. package/src/browser-de.ts +162 -0
  87. package/src/browser-east-asian.ts +173 -0
  88. package/src/browser-en-tr.ts +165 -0
  89. package/src/browser-en.ts +157 -0
  90. package/src/browser-es-en.ts +200 -0
  91. package/src/browser-es.ts +170 -0
  92. package/src/browser-fr.ts +162 -0
  93. package/src/browser-id.ts +162 -0
  94. package/src/browser-ja.ts +162 -0
  95. package/src/browser-ko.ts +162 -0
  96. package/src/browser-lazy.ts +189 -0
  97. package/src/browser-priority.ts +214 -0
  98. package/src/browser-pt.ts +162 -0
  99. package/src/browser-qu.ts +162 -0
  100. package/src/browser-sw.ts +162 -0
  101. package/src/browser-tr.ts +162 -0
  102. package/src/browser-western.ts +181 -0
  103. package/src/browser-zh.ts +162 -0
  104. package/src/browser.ts +268 -0
  105. package/src/cache/index.ts +14 -0
  106. package/src/cache/semantic-cache.ts +344 -0
  107. package/src/core-bridge.ts +372 -0
  108. package/src/explicit/converter.ts +258 -0
  109. package/src/explicit/index.ts +18 -0
  110. package/src/explicit/parser.ts +236 -0
  111. package/src/explicit/renderer.ts +424 -0
  112. package/src/generators/command-schemas.ts +1636 -0
  113. package/src/generators/event-handler-generator.ts +109 -0
  114. package/src/generators/index.ts +117 -0
  115. package/src/generators/language-profiles.ts +139 -0
  116. package/src/generators/pattern-generator.ts +537 -0
  117. package/src/generators/profiles/arabic.ts +131 -0
  118. package/src/generators/profiles/bengali.ts +132 -0
  119. package/src/generators/profiles/chinese.ts +124 -0
  120. package/src/generators/profiles/english.ts +113 -0
  121. package/src/generators/profiles/french.ts +125 -0
  122. package/src/generators/profiles/german.ts +126 -0
  123. package/src/generators/profiles/hindi.ts +146 -0
  124. package/src/generators/profiles/index.ts +46 -0
  125. package/src/generators/profiles/indonesian.ts +125 -0
  126. package/src/generators/profiles/italian.ts +139 -0
  127. package/src/generators/profiles/japanese.ts +149 -0
  128. package/src/generators/profiles/korean.ts +127 -0
  129. package/src/generators/profiles/marker-templates.ts +288 -0
  130. package/src/generators/profiles/ms.ts +130 -0
  131. package/src/generators/profiles/polish.ts +249 -0
  132. package/src/generators/profiles/portuguese.ts +115 -0
  133. package/src/generators/profiles/quechua.ts +113 -0
  134. package/src/generators/profiles/russian.ts +260 -0
  135. package/src/generators/profiles/spanish.ts +130 -0
  136. package/src/generators/profiles/swahili.ts +129 -0
  137. package/src/generators/profiles/thai.ts +132 -0
  138. package/src/generators/profiles/tl.ts +128 -0
  139. package/src/generators/profiles/turkish.ts +124 -0
  140. package/src/generators/profiles/types.ts +165 -0
  141. package/src/generators/profiles/ukrainian.ts +270 -0
  142. package/src/generators/profiles/vietnamese.ts +133 -0
  143. package/src/generators/schema-error-codes.ts +160 -0
  144. package/src/generators/schema-validator.ts +391 -0
  145. package/src/index.ts +429 -0
  146. package/src/language-building-schema.ts +3170 -0
  147. package/src/language-loader.ts +394 -0
  148. package/src/languages/_all.ts +65 -0
  149. package/src/languages/ar.ts +15 -0
  150. package/src/languages/bn.ts +16 -0
  151. package/src/languages/de.ts +15 -0
  152. package/src/languages/en.ts +29 -0
  153. package/src/languages/es.ts +15 -0
  154. package/src/languages/fr.ts +15 -0
  155. package/src/languages/hi.ts +26 -0
  156. package/src/languages/id.ts +15 -0
  157. package/src/languages/index.ts +18 -0
  158. package/src/languages/it.ts +15 -0
  159. package/src/languages/ja.ts +15 -0
  160. package/src/languages/ko.ts +15 -0
  161. package/src/languages/ms.ts +16 -0
  162. package/src/languages/pl.ts +18 -0
  163. package/src/languages/pt.ts +15 -0
  164. package/src/languages/qu.ts +15 -0
  165. package/src/languages/ru.ts +26 -0
  166. package/src/languages/sw.ts +15 -0
  167. package/src/languages/th.ts +16 -0
  168. package/src/languages/tl.ts +16 -0
  169. package/src/languages/tr.ts +15 -0
  170. package/src/languages/uk.ts +26 -0
  171. package/src/languages/vi.ts +16 -0
  172. package/src/languages/zh.ts +15 -0
  173. package/src/parser/index.ts +15 -0
  174. package/src/parser/pattern-matcher.ts +1181 -0
  175. package/src/parser/semantic-parser.ts +573 -0
  176. package/src/parser/utils/index.ts +35 -0
  177. package/src/parser/utils/marker-resolution.ts +111 -0
  178. package/src/parser/utils/possessive-keywords.ts +43 -0
  179. package/src/parser/utils/role-positioning.ts +70 -0
  180. package/src/parser/utils/type-validation.ts +134 -0
  181. package/src/patterns/add/ar.ts +71 -0
  182. package/src/patterns/add/bn.ts +70 -0
  183. package/src/patterns/add/hi.ts +69 -0
  184. package/src/patterns/add/index.ts +87 -0
  185. package/src/patterns/add/it.ts +61 -0
  186. package/src/patterns/add/ja.ts +93 -0
  187. package/src/patterns/add/ko.ts +74 -0
  188. package/src/patterns/add/ms.ts +30 -0
  189. package/src/patterns/add/pl.ts +62 -0
  190. package/src/patterns/add/ru.ts +62 -0
  191. package/src/patterns/add/th.ts +49 -0
  192. package/src/patterns/add/tl.ts +30 -0
  193. package/src/patterns/add/tr.ts +71 -0
  194. package/src/patterns/add/uk.ts +62 -0
  195. package/src/patterns/add/vi.ts +61 -0
  196. package/src/patterns/add/zh.ts +71 -0
  197. package/src/patterns/builders.ts +207 -0
  198. package/src/patterns/decrement/bn.ts +70 -0
  199. package/src/patterns/decrement/de.ts +42 -0
  200. package/src/patterns/decrement/hi.ts +68 -0
  201. package/src/patterns/decrement/index.ts +79 -0
  202. package/src/patterns/decrement/it.ts +69 -0
  203. package/src/patterns/decrement/ms.ts +30 -0
  204. package/src/patterns/decrement/pl.ts +58 -0
  205. package/src/patterns/decrement/ru.ts +58 -0
  206. package/src/patterns/decrement/th.ts +49 -0
  207. package/src/patterns/decrement/tl.ts +30 -0
  208. package/src/patterns/decrement/tr.ts +48 -0
  209. package/src/patterns/decrement/uk.ts +58 -0
  210. package/src/patterns/decrement/vi.ts +61 -0
  211. package/src/patterns/decrement/zh.ts +32 -0
  212. package/src/patterns/en.ts +302 -0
  213. package/src/patterns/event-handler/ar.ts +151 -0
  214. package/src/patterns/event-handler/bn.ts +72 -0
  215. package/src/patterns/event-handler/de.ts +117 -0
  216. package/src/patterns/event-handler/en.ts +117 -0
  217. package/src/patterns/event-handler/es.ts +136 -0
  218. package/src/patterns/event-handler/fr.ts +117 -0
  219. package/src/patterns/event-handler/hi.ts +64 -0
  220. package/src/patterns/event-handler/id.ts +117 -0
  221. package/src/patterns/event-handler/index.ts +119 -0
  222. package/src/patterns/event-handler/it.ts +54 -0
  223. package/src/patterns/event-handler/ja.ts +118 -0
  224. package/src/patterns/event-handler/ko.ts +133 -0
  225. package/src/patterns/event-handler/ms.ts +30 -0
  226. package/src/patterns/event-handler/pl.ts +62 -0
  227. package/src/patterns/event-handler/pt.ts +117 -0
  228. package/src/patterns/event-handler/qu.ts +66 -0
  229. package/src/patterns/event-handler/ru.ts +62 -0
  230. package/src/patterns/event-handler/shared.ts +270 -0
  231. package/src/patterns/event-handler/sw.ts +117 -0
  232. package/src/patterns/event-handler/th.ts +53 -0
  233. package/src/patterns/event-handler/tl.ts +30 -0
  234. package/src/patterns/event-handler/tr.ts +170 -0
  235. package/src/patterns/event-handler/uk.ts +62 -0
  236. package/src/patterns/event-handler/vi.ts +61 -0
  237. package/src/patterns/event-handler/zh.ts +150 -0
  238. package/src/patterns/get/ar.ts +49 -0
  239. package/src/patterns/get/bn.ts +47 -0
  240. package/src/patterns/get/de.ts +32 -0
  241. package/src/patterns/get/hi.ts +52 -0
  242. package/src/patterns/get/index.ts +83 -0
  243. package/src/patterns/get/it.ts +56 -0
  244. package/src/patterns/get/ja.ts +53 -0
  245. package/src/patterns/get/ko.ts +53 -0
  246. package/src/patterns/get/ms.ts +30 -0
  247. package/src/patterns/get/pl.ts +57 -0
  248. package/src/patterns/get/ru.ts +57 -0
  249. package/src/patterns/get/th.ts +29 -0
  250. package/src/patterns/get/tl.ts +30 -0
  251. package/src/patterns/get/uk.ts +57 -0
  252. package/src/patterns/get/vi.ts +48 -0
  253. package/src/patterns/grammar-transformed/index.ts +39 -0
  254. package/src/patterns/grammar-transformed/ja.ts +1713 -0
  255. package/src/patterns/grammar-transformed/ko.ts +1311 -0
  256. package/src/patterns/grammar-transformed/tr.ts +1067 -0
  257. package/src/patterns/hide/ar.ts +67 -0
  258. package/src/patterns/hide/bn.ts +47 -0
  259. package/src/patterns/hide/de.ts +36 -0
  260. package/src/patterns/hide/hi.ts +61 -0
  261. package/src/patterns/hide/index.ts +91 -0
  262. package/src/patterns/hide/it.ts +56 -0
  263. package/src/patterns/hide/ja.ts +69 -0
  264. package/src/patterns/hide/ko.ts +69 -0
  265. package/src/patterns/hide/ms.ts +30 -0
  266. package/src/patterns/hide/pl.ts +57 -0
  267. package/src/patterns/hide/ru.ts +57 -0
  268. package/src/patterns/hide/th.ts +29 -0
  269. package/src/patterns/hide/tl.ts +30 -0
  270. package/src/patterns/hide/tr.ts +65 -0
  271. package/src/patterns/hide/uk.ts +57 -0
  272. package/src/patterns/hide/vi.ts +56 -0
  273. package/src/patterns/hide/zh.ts +68 -0
  274. package/src/patterns/increment/bn.ts +70 -0
  275. package/src/patterns/increment/de.ts +36 -0
  276. package/src/patterns/increment/hi.ts +68 -0
  277. package/src/patterns/increment/index.ts +79 -0
  278. package/src/patterns/increment/it.ts +69 -0
  279. package/src/patterns/increment/ms.ts +30 -0
  280. package/src/patterns/increment/pl.ts +58 -0
  281. package/src/patterns/increment/ru.ts +58 -0
  282. package/src/patterns/increment/th.ts +49 -0
  283. package/src/patterns/increment/tl.ts +30 -0
  284. package/src/patterns/increment/tr.ts +52 -0
  285. package/src/patterns/increment/uk.ts +58 -0
  286. package/src/patterns/increment/vi.ts +61 -0
  287. package/src/patterns/increment/zh.ts +32 -0
  288. package/src/patterns/index.ts +84 -0
  289. package/src/patterns/languages/en/control-flow.ts +93 -0
  290. package/src/patterns/languages/en/fetch.ts +62 -0
  291. package/src/patterns/languages/en/index.ts +42 -0
  292. package/src/patterns/languages/en/repeat.ts +67 -0
  293. package/src/patterns/languages/en/set.ts +48 -0
  294. package/src/patterns/languages/en/swap.ts +38 -0
  295. package/src/patterns/languages/en/temporal.ts +57 -0
  296. package/src/patterns/put/ar.ts +74 -0
  297. package/src/patterns/put/bn.ts +53 -0
  298. package/src/patterns/put/en.ts +74 -0
  299. package/src/patterns/put/es.ts +74 -0
  300. package/src/patterns/put/hi.ts +69 -0
  301. package/src/patterns/put/id.ts +96 -0
  302. package/src/patterns/put/index.ts +99 -0
  303. package/src/patterns/put/it.ts +56 -0
  304. package/src/patterns/put/ja.ts +75 -0
  305. package/src/patterns/put/ko.ts +67 -0
  306. package/src/patterns/put/ms.ts +30 -0
  307. package/src/patterns/put/pl.ts +81 -0
  308. package/src/patterns/put/ru.ts +85 -0
  309. package/src/patterns/put/th.ts +32 -0
  310. package/src/patterns/put/tl.ts +30 -0
  311. package/src/patterns/put/tr.ts +67 -0
  312. package/src/patterns/put/uk.ts +85 -0
  313. package/src/patterns/put/vi.ts +72 -0
  314. package/src/patterns/put/zh.ts +62 -0
  315. package/src/patterns/registry.ts +163 -0
  316. package/src/patterns/remove/ar.ts +71 -0
  317. package/src/patterns/remove/bn.ts +68 -0
  318. package/src/patterns/remove/hi.ts +69 -0
  319. package/src/patterns/remove/index.ts +87 -0
  320. package/src/patterns/remove/it.ts +69 -0
  321. package/src/patterns/remove/ja.ts +74 -0
  322. package/src/patterns/remove/ko.ts +78 -0
  323. package/src/patterns/remove/ms.ts +30 -0
  324. package/src/patterns/remove/pl.ts +62 -0
  325. package/src/patterns/remove/ru.ts +62 -0
  326. package/src/patterns/remove/th.ts +49 -0
  327. package/src/patterns/remove/tl.ts +30 -0
  328. package/src/patterns/remove/tr.ts +78 -0
  329. package/src/patterns/remove/uk.ts +62 -0
  330. package/src/patterns/remove/vi.ts +61 -0
  331. package/src/patterns/remove/zh.ts +72 -0
  332. package/src/patterns/set/ar.ts +84 -0
  333. package/src/patterns/set/bn.ts +53 -0
  334. package/src/patterns/set/de.ts +84 -0
  335. package/src/patterns/set/es.ts +92 -0
  336. package/src/patterns/set/fr.ts +88 -0
  337. package/src/patterns/set/hi.ts +56 -0
  338. package/src/patterns/set/id.ts +84 -0
  339. package/src/patterns/set/index.ts +107 -0
  340. package/src/patterns/set/it.ts +56 -0
  341. package/src/patterns/set/ja.ts +86 -0
  342. package/src/patterns/set/ko.ts +85 -0
  343. package/src/patterns/set/ms.ts +30 -0
  344. package/src/patterns/set/pl.ts +57 -0
  345. package/src/patterns/set/pt.ts +84 -0
  346. package/src/patterns/set/ru.ts +57 -0
  347. package/src/patterns/set/th.ts +31 -0
  348. package/src/patterns/set/tl.ts +30 -0
  349. package/src/patterns/set/tr.ts +107 -0
  350. package/src/patterns/set/uk.ts +57 -0
  351. package/src/patterns/set/vi.ts +53 -0
  352. package/src/patterns/set/zh.ts +84 -0
  353. package/src/patterns/show/ar.ts +67 -0
  354. package/src/patterns/show/bn.ts +47 -0
  355. package/src/patterns/show/de.ts +32 -0
  356. package/src/patterns/show/fr.ts +32 -0
  357. package/src/patterns/show/hi.ts +61 -0
  358. package/src/patterns/show/index.ts +95 -0
  359. package/src/patterns/show/it.ts +56 -0
  360. package/src/patterns/show/ja.ts +69 -0
  361. package/src/patterns/show/ko.ts +73 -0
  362. package/src/patterns/show/ms.ts +30 -0
  363. package/src/patterns/show/pl.ts +57 -0
  364. package/src/patterns/show/ru.ts +57 -0
  365. package/src/patterns/show/th.ts +29 -0
  366. package/src/patterns/show/tl.ts +30 -0
  367. package/src/patterns/show/tr.ts +65 -0
  368. package/src/patterns/show/uk.ts +57 -0
  369. package/src/patterns/show/vi.ts +56 -0
  370. package/src/patterns/show/zh.ts +68 -0
  371. package/src/patterns/take/ar.ts +51 -0
  372. package/src/patterns/take/index.ts +31 -0
  373. package/src/patterns/toggle/ar.ts +61 -0
  374. package/src/patterns/toggle/bn.ts +70 -0
  375. package/src/patterns/toggle/en.ts +61 -0
  376. package/src/patterns/toggle/es.ts +61 -0
  377. package/src/patterns/toggle/hi.ts +80 -0
  378. package/src/patterns/toggle/index.ts +95 -0
  379. package/src/patterns/toggle/it.ts +69 -0
  380. package/src/patterns/toggle/ja.ts +156 -0
  381. package/src/patterns/toggle/ko.ts +113 -0
  382. package/src/patterns/toggle/ms.ts +30 -0
  383. package/src/patterns/toggle/pl.ts +62 -0
  384. package/src/patterns/toggle/ru.ts +62 -0
  385. package/src/patterns/toggle/th.ts +50 -0
  386. package/src/patterns/toggle/tl.ts +30 -0
  387. package/src/patterns/toggle/tr.ts +88 -0
  388. package/src/patterns/toggle/uk.ts +62 -0
  389. package/src/patterns/toggle/vi.ts +61 -0
  390. package/src/patterns/toggle/zh.ts +99 -0
  391. package/src/public-api.ts +286 -0
  392. package/src/registry.ts +441 -0
  393. package/src/tokenizers/arabic.ts +723 -0
  394. package/src/tokenizers/base.ts +1300 -0
  395. package/src/tokenizers/bengali.ts +289 -0
  396. package/src/tokenizers/chinese.ts +481 -0
  397. package/src/tokenizers/english.ts +416 -0
  398. package/src/tokenizers/french.ts +326 -0
  399. package/src/tokenizers/german.ts +324 -0
  400. package/src/tokenizers/hindi.ts +319 -0
  401. package/src/tokenizers/index.ts +127 -0
  402. package/src/tokenizers/indonesian.ts +306 -0
  403. package/src/tokenizers/italian.ts +458 -0
  404. package/src/tokenizers/japanese.ts +447 -0
  405. package/src/tokenizers/korean.ts +642 -0
  406. package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
  407. package/src/tokenizers/morphology/french-normalizer.ts +268 -0
  408. package/src/tokenizers/morphology/german-normalizer.ts +256 -0
  409. package/src/tokenizers/morphology/index.ts +46 -0
  410. package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
  411. package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
  412. package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
  413. package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
  414. package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
  415. package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
  416. package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
  417. package/src/tokenizers/morphology/types.ts +211 -0
  418. package/src/tokenizers/ms.ts +198 -0
  419. package/src/tokenizers/polish.ts +354 -0
  420. package/src/tokenizers/portuguese.ts +304 -0
  421. package/src/tokenizers/quechua.ts +339 -0
  422. package/src/tokenizers/russian.ts +375 -0
  423. package/src/tokenizers/spanish.ts +403 -0
  424. package/src/tokenizers/swahili.ts +303 -0
  425. package/src/tokenizers/thai.ts +236 -0
  426. package/src/tokenizers/tl.ts +198 -0
  427. package/src/tokenizers/turkish.ts +411 -0
  428. package/src/tokenizers/ukrainian.ts +369 -0
  429. package/src/tokenizers/vietnamese.ts +410 -0
  430. package/src/types/grammar-types.ts +617 -0
  431. package/src/types/unified-profile.ts +267 -0
  432. package/src/types.ts +709 -0
  433. package/src/utils/confidence-calculator.ts +147 -0
  434. package/src/validators/command-validator.ts +380 -0
  435. package/src/validators/index.ts +15 -0
package/README.md ADDED
@@ -0,0 +1,686 @@
1
+ # @lokascript/semantic
2
+
3
+ Semantic-first multilingual parser for LokaScript. Parses hyperscript-like syntax from 23 languages into a language-agnostic semantic representation.
4
+
5
+ ## Features
6
+
7
+ - **23 Languages**: Arabic, Bengali, Chinese, English, French, German, Hindi, Indonesian, Italian, Japanese, Korean, Malay, Polish, Portuguese, Quechua, Russian, Swahili, Thai, Tagalog, Turkish, Ukrainian, Vietnamese
8
+ - **Semantic Roles**: Language-agnostic intermediate representation (patient, destination, source, etc.)
9
+ - **Confidence Scoring**: Graceful degradation with scored parse results
10
+ - **Morphological Normalization**: Handles verb conjugations in agglutinative languages
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ npm install @lokascript/semantic
16
+ ```
17
+
18
+ ## Bundle Selection Guide
19
+
20
+ Choose the right bundle for your use case. All bundles are minified IIFE for direct browser usage.
21
+
22
+ ### Bundle Matrix
23
+
24
+ #### Tier 1: Single Language (~14-20 KB gzip)
25
+
26
+ | Bundle | Global | Size | Languages |
27
+ | ------------------------- | ---------------------- | ------ | --------- |
28
+ | `browser-en.en.global.js` | `LokaScriptSemanticEn` | ~20 KB | en |
29
+ | `browser-es.es.global.js` | `LokaScriptSemanticEs` | ~16 KB | es |
30
+ | `browser-ja.ja.global.js` | `LokaScriptSemanticJa` | ~17 KB | ja |
31
+ | `browser-ar.ar.global.js` | `LokaScriptSemanticAr` | ~17 KB | ar |
32
+ | `browser-ko.ko.global.js` | `LokaScriptSemanticKo` | ~18 KB | ko |
33
+ | `browser-zh.zh.global.js` | `LokaScriptSemanticZh` | ~15 KB | zh |
34
+ | `browser-tr.tr.global.js` | `LokaScriptSemanticTr` | ~18 KB | tr |
35
+ | `browser-pt.pt.global.js` | `LokaScriptSemanticPt` | ~14 KB | pt |
36
+ | `browser-fr.fr.global.js` | `LokaScriptSemanticFr` | ~14 KB | fr |
37
+ | `browser-de.de.global.js` | `LokaScriptSemanticDe` | ~14 KB | de |
38
+ | `browser-id.id.global.js` | `LokaScriptSemanticId` | ~14 KB | id |
39
+ | `browser-qu.qu.global.js` | `LokaScriptSemanticQu` | ~14 KB | qu |
40
+ | `browser-sw.sw.global.js` | `LokaScriptSemanticSw` | ~14 KB | sw |
41
+
42
+ #### Tier 2-5: Multi-Language Bundles
43
+
44
+ | Bundle | Global | Size | Languages |
45
+ | ----------------------------------------- | ----------------------------- | ------ | ------------------ |
46
+ | `browser-es-en.es-en.global.js` | `LokaScriptSemanticEsEn` | ~25 KB | en, es |
47
+ | `browser-western.western.global.js` | `LokaScriptSemanticWestern` | ~30 KB | en, es, pt, fr, de |
48
+ | `browser-east-asian.east-asian.global.js` | `LokaScriptSemanticEastAsian` | ~24 KB | ja, zh, ko |
49
+ | `browser-priority.priority.global.js` | `LokaScriptSemanticPriority` | ~48 KB | 11 languages |
50
+ | `browser.global.js` | `LokaScriptSemantic` | ~61 KB | All 13 |
51
+ | `browser-lazy.lazy.global.js` | `LokaScriptSemanticLazy` | ~15 KB | On-demand |
52
+
53
+ ### Lazy Loading (Smallest Initial Bundle)
54
+
55
+ For applications that need minimal initial bundle size:
56
+
57
+ ```html
58
+ <script src="node_modules/@lokascript/semantic/dist/browser-lazy.lazy.global.js"></script>
59
+ <script>
60
+ const { loadLanguage, parse } = LokaScriptSemanticLazy;
61
+
62
+ // Load languages on demand
63
+ await loadLanguage('en');
64
+ await loadLanguage('ja');
65
+
66
+ // Now parsing works for loaded languages
67
+ parse('toggle .active', 'en');
68
+ parse('トグル .active', 'ja');
69
+ </script>
70
+ ```
71
+
72
+ ### Custom Bundle Generation
73
+
74
+ Generate bundles with only the languages you need:
75
+
76
+ ```bash
77
+ # Preview size estimate
78
+ node scripts/generate-bundle.mjs --estimate ja ko zh
79
+
80
+ # Generate with auto-config
81
+ node scripts/generate-bundle.mjs --auto es pt fr
82
+
83
+ # Use predefined groups
84
+ node scripts/generate-bundle.mjs --group western
85
+ ```
86
+
87
+ ### Browser Usage
88
+
89
+ ```html
90
+ <!-- Spanish only (smallest for Spanish devs) -->
91
+ <script src="node_modules/@lokascript/semantic/dist/browser-es.es.global.js"></script>
92
+ <script>
93
+ const { parse, tokenize } = LokaScriptSemanticEs;
94
+ parse('alternar .activo', 'es');
95
+ </script>
96
+
97
+ <!-- English only (smallest for English devs) -->
98
+ <script src="node_modules/@lokascript/semantic/dist/browser-en.en.global.js"></script>
99
+ <script>
100
+ const { parse, canParse } = LokaScriptSemanticEn;
101
+ </script>
102
+
103
+ <!-- Western languages (European users) -->
104
+ <script src="node_modules/@lokascript/semantic/dist/browser-western.western.global.js"></script>
105
+ <script>
106
+ const { parse, canParse } = LokaScriptSemanticWestern;
107
+ </script>
108
+
109
+ <!-- East Asian languages (CJK users) -->
110
+ <script src="node_modules/@lokascript/semantic/dist/browser-east-asian.east-asian.global.js"></script>
111
+ <script>
112
+ const { parse, canParse } = LokaScriptSemanticEastAsian;
113
+ </script>
114
+
115
+ <!-- Priority languages (11 most common) -->
116
+ <script src="node_modules/@lokascript/semantic/dist/browser-priority.priority.global.js"></script>
117
+ <script>
118
+ const { parse, canParse } = LokaScriptSemanticPriority;
119
+ </script>
120
+
121
+ <!-- Full bundle (all 23 languages) -->
122
+ <script src="node_modules/@lokascript/semantic/dist/browser.global.js"></script>
123
+ <script>
124
+ const { parse, canParse } = LokaScriptSemantic;
125
+ </script>
126
+ ```
127
+
128
+ ### Node.js / Bundler Usage
129
+
130
+ For tree-shakeable imports in bundlers:
131
+
132
+ ```typescript
133
+ // Import specific languages for optimal tree-shaking
134
+ import '@lokascript/semantic/languages/en';
135
+ import '@lokascript/semantic/languages/ja';
136
+
137
+ import { parse } from '@lokascript/semantic';
138
+ ```
139
+
140
+ ## Usage
141
+
142
+ ```typescript
143
+ import { parse, translate, canParse } from '@lokascript/semantic';
144
+
145
+ // Parse from any language
146
+ const node = parse('toggle .active on #button', 'en');
147
+ const nodeJa = parse('.active を 切り替え', 'ja');
148
+
149
+ // Translate between languages
150
+ const arabic = translate('toggle .active', 'en', 'ar');
151
+ // → 'بدّل .active'
152
+
153
+ // Check if input can be parsed
154
+ const result = canParse('クリックしたら 増加', 'ja');
155
+ if (result.canParse) {
156
+ console.log('Confidence:', result.confidence);
157
+ }
158
+ ```
159
+
160
+ ## English Native Idioms
161
+
162
+ LokaScript accepts multiple beginner-friendly English forms to make hyperscript easier to learn and more readable.
163
+
164
+ ### Event Handler Alternatives
165
+
166
+ | Form | Example | Notes |
167
+ | ------------------ | ------------------------------------------ | -------------------------------- |
168
+ | Standard | `on click toggle .active` | Original hyperscript |
169
+ | When (temporal) | `when clicked toggle .active` | **Recommended** - most intuitive |
170
+ | When (with source) | `when clicked from #button toggle .active` | With element filter |
171
+ | Upon (formal) | `upon clicking toggle .active` | Formal/technical style |
172
+ | If (conditional) | `if clicked toggle .active` | Conditional framing |
173
+
174
+ ```typescript
175
+ // All parse to equivalent event handler nodes:
176
+ parse('on click toggle .active', 'en');
177
+ parse('when clicked toggle .active', 'en'); // Native idiom
178
+ parse('upon clicking toggle .active', 'en'); // Formal alternative
179
+ parse('if clicked toggle .active', 'en'); // Conditional style
180
+ ```
181
+
182
+ ### Command Synonyms
183
+
184
+ Alternative verbs that map to standard hyperscript commands:
185
+
186
+ | Synonym | Maps To | Example |
187
+ | ---------- | ----------- | --------------------------- |
188
+ | `flip` | `toggle` | `flip .active` |
189
+ | `switch` | `toggle` | `switch .visible on #panel` |
190
+ | `increase` | `increment` | `increase #counter` |
191
+ | `decrease` | `decrement` | `decrease #counter` |
192
+ | `display` | `show` | `display #tooltip` |
193
+ | `reveal` | `show` | `reveal #modal` |
194
+ | `conceal` | `hide` | `conceal #dropdown` |
195
+
196
+ ```typescript
197
+ // These are equivalent:
198
+ parse('toggle .active', 'en');
199
+ parse('flip .active', 'en');
200
+ parse('switch .active', 'en');
201
+ ```
202
+
203
+ ### Natural Articles
204
+
205
+ Optional articles make code read more like natural English:
206
+
207
+ | Natural Form | Equivalent To |
208
+ | ------------------------- | ---------------- |
209
+ | `toggle the .active` | `toggle .active` |
210
+ | `show the #tooltip` | `show #tooltip` |
211
+ | `add the .visible class` | `add .visible` |
212
+ | `toggle the active class` | `toggle .active` |
213
+
214
+ ```typescript
215
+ // All equivalent:
216
+ parse('toggle .active', 'en');
217
+ parse('toggle the .active', 'en');
218
+ parse('toggle the active class', 'en');
219
+ ```
220
+
221
+ ### Temporal Expressions
222
+
223
+ Natural delay syntax as alternatives to `wait`:
224
+
225
+ | Natural Form | Equivalent To |
226
+ | ------------ | ------------- |
227
+ | `in 2s` | `wait 2s` |
228
+ | `in 500ms` | `wait 500ms` |
229
+ | `after 2s` | `wait 2s` |
230
+
231
+ ```typescript
232
+ // These parse as wait commands:
233
+ parse('in 2s', 'en'); // action: 'wait', duration: '2s'
234
+ parse('after 500ms', 'en'); // action: 'wait', duration: '500ms'
235
+ ```
236
+
237
+ ### British Spelling
238
+
239
+ Common British spellings are automatically normalized:
240
+
241
+ | British | American (normalized) |
242
+ | ------------ | --------------------- |
243
+ | `colour` | `color` |
244
+ | `grey` | `gray` |
245
+ | `centre` | `center` |
246
+ | `behaviour` | `behavior` |
247
+ | `initialise` | `initialize` |
248
+ | `favourite` | `favorite` |
249
+
250
+ ```typescript
251
+ // Both work identically:
252
+ parse('set colour to red', 'en');
253
+ parse('set color to red', 'en');
254
+ ```
255
+
256
+ ## Japanese Native Idioms
257
+
258
+ LokaScript accepts multiple natural Japanese forms, following Nadeshiko's approach of "accepting multiple orthodox native Japanese expressions."
259
+
260
+ | Form | Example | Notes |
261
+ | -------------------- | ------------------------------------ | -------------------------------- |
262
+ | Standard | `クリック で .active を 切り替え` | Instrumental particle (で) |
263
+ | Conditional (したら) | `クリックしたら .active を 切り替え` | Most natural - "if/when clicked" |
264
+ | Conditional (すると) | `クリックすると .active を 切り替え` | Habitual/expected outcome |
265
+ | Conditional (すれば) | `クリックすれば .active を 切り替え` | Hypothetical condition |
266
+ | Temporal (時に) | `クリック時に .active を 切り替え` | Formal - "at the time of click" |
267
+ | Compact | `.activeを切り替え` | No spaces - natural writing |
268
+
269
+ ### Event Handlers
270
+
271
+ ```typescript
272
+ // All parse to equivalent event handler nodes:
273
+ parse('クリック で 増加', 'ja'); // Standard (instrumental)
274
+ parse('クリックしたら 増加', 'ja'); // Conditional (native)
275
+ parse('クリック時に 増加', 'ja'); // Temporal (formal)
276
+
277
+ // With source filter:
278
+ parse('#button から クリックしたら 増加', 'ja');
279
+ ```
280
+
281
+ ### Toggle Commands
282
+
283
+ ```typescript
284
+ // All parse to equivalent toggle nodes:
285
+ parse('.active を 切り替え', 'ja'); // Standard (spaced)
286
+ parse('.activeを切り替え', 'ja'); // Compact (no spaces)
287
+ parse('.active を 切り替える', 'ja'); // With verb ending (る)
288
+ parse('.active を トグルする', 'ja'); // With katakana loanword
289
+ ```
290
+
291
+ ### Morphological Normalization
292
+
293
+ The parser handles Japanese verb conjugations automatically:
294
+
295
+ | Conjugation | Example | Normalized |
296
+ | ------------------ | -------------- | ---------- |
297
+ | Dictionary | クリックする | クリック |
298
+ | Conditional (たら) | クリックしたら | クリック |
299
+ | Conditional (と) | クリックすると | クリック |
300
+ | Conditional (ば) | クリックすれば | クリック |
301
+ | て-form | 切り替えて | 切り替え |
302
+ | Past | 切り替えた | 切り替え |
303
+ | Polite | 切り替えます | 切り替え |
304
+ | Progressive | 切り替えている | 切り替え |
305
+
306
+ ## Korean Native Idioms
307
+
308
+ LokaScript accepts multiple natural Korean forms, following Nadeshiko's approach.
309
+
310
+ | Form | Example | Notes |
311
+ | --------------------- | ---------------------------- | ------------------------------ |
312
+ | Conditional (-면) | `클릭하면 .active 를 토글` | **Recommended** - "if clicked" |
313
+ | Honorific (-시면) | `클릭하시면 .active 를 토글` | Polite/formal - "if you click" |
314
+ | Temporal (-ㄹ 때) | `클릭할때 .active 를 토글` | "When clicking" |
315
+ | Immediate (-자마자) | `클릭하자마자 증가` | "As soon as clicked" |
316
+ | Sequential (-고 나서) | `클릭하고 나서 증가` | "After clicking" |
317
+ | With source | `#button 에서 클릭하면 증가` | "When clicked from #button" |
318
+ | Compact | `.active를토글` | No spaces - natural writing |
319
+
320
+ > **Note**: The `클릭 에` pattern is intentionally omitted because `에` is ambiguous (event marker vs destination marker). Use native conditional forms `-하면` or `-할때` instead—they're more natural Korean!
321
+
322
+ ### Event Handlers
323
+
324
+ ```typescript
325
+ // All parse to equivalent event handler nodes:
326
+ parse('클릭 에 증가', 'ko'); // Standard (location particle)
327
+ parse('클릭하면 증가', 'ko'); // Conditional (native)
328
+ parse('클릭할때 증가', 'ko'); // Temporal (native)
329
+
330
+ // With source filter:
331
+ parse('#button 에서 클릭하면 증가', 'ko');
332
+ ```
333
+
334
+ ### Toggle Commands
335
+
336
+ ```typescript
337
+ // All parse to equivalent toggle nodes:
338
+ parse('.active 를 토글', 'ko'); // Standard (spaced)
339
+ parse('.active를토글', 'ko'); // Compact (no spaces)
340
+ parse('.active 를 토글하다', 'ko'); // With dictionary form
341
+ parse('.active 를 토글해요', 'ko'); // With polite ending
342
+ ```
343
+
344
+ ### Morphological Normalization
345
+
346
+ The parser handles Korean verb conjugations automatically:
347
+
348
+ | Conjugation | Example | Normalized |
349
+ | ------------------- | ---------- | ---------- |
350
+ | Dictionary (-하다) | 클릭하다 | 클릭 |
351
+ | Conditional (-하면) | 클릭하면 | 클릭 |
352
+ | Temporal (-할때) | 클릭할때 | 클릭 |
353
+ | Causal (-하니까) | 클릭하니까 | 클릭 |
354
+ | Polite (-해요) | 토글해요 | 토글 |
355
+ | Formal (-합니다) | 토글합니다 | 토글 |
356
+ | Honorific (-하세요) | 토글하세요 | 토글 |
357
+ | Past (-했어요) | 토글했어요 | 토글 |
358
+
359
+ ## Spanish Native Idioms
360
+
361
+ LokaScript accepts multiple natural Spanish forms.
362
+
363
+ | Form | Example | Notes |
364
+ | ------------------------ | ------------------------------ | ------------------------------------ |
365
+ | Native (al + infinitive) | `al hacer clic toggle .active` | **Most idiomatic** - "upon clicking" |
366
+ | Conditional (si) | `si hace clic toggle .active` | "If clicks" |
367
+ | Standard (en) | `en clic toggle .active` | Direct translation |
368
+ | Temporal (cuando) | `cuando clic toggle .active` | "When" |
369
+
370
+ ### Event Handlers
371
+
372
+ ```typescript
373
+ // All parse to equivalent event handler nodes:
374
+ parse('al hacer clic aumentar', 'es'); // Native (al + infinitive)
375
+ parse('si hace clic aumentar', 'es'); // Conditional
376
+ parse('en clic aumentar', 'es'); // Standard
377
+
378
+ // With source filter:
379
+ parse('al hacer clic en #button aumentar', 'es');
380
+ ```
381
+
382
+ ### Toggle Commands
383
+
384
+ ```typescript
385
+ // All parse to equivalent toggle nodes:
386
+ parse('toggle .active', 'es'); // English loanword
387
+ parse('cambiar .active', 'es'); // Native verb
388
+ parse('alternar .active', 'es'); // Formal alternative
389
+ ```
390
+
391
+ ## Chinese Native Idioms
392
+
393
+ LokaScript accepts multiple natural Chinese forms using native temporal and aspect markers.
394
+
395
+ | Form | Example | Notes |
396
+ | ------------------- | -------------------------- | ----------------- |
397
+ | Standard (当) | `当 点击 切换 .active` | "When" - formal |
398
+ | Temporal (的时候) | `点击 的时候 切换 .active` | "At the time of" |
399
+ | Immediate (一...就) | `一 点击 就 切换 .active` | "As soon as" |
400
+ | Completion (了) | `点击 了 切换 .active` | Perfective aspect |
401
+ | Whenever (每当) | `每当 点击 切换 .active` | "Whenever" |
402
+ | Conditional (如果) | `如果 点击 切换 .active` | "If" |
403
+
404
+ ### Event Handlers
405
+
406
+ ```typescript
407
+ // All parse to equivalent event handler nodes:
408
+ parse('当 点击 切换 .active', 'zh'); // Standard (当)
409
+ parse('点击 的时候 切换 .active', 'zh'); // Temporal (的时候)
410
+ parse('一 点击 就 切换 .active', 'zh'); // Immediate (一...就)
411
+ parse('每当 点击 切换 .active', 'zh'); // Whenever (每当)
412
+
413
+ // With source filter:
414
+ parse('当 从 #button 点击 切换 .active', 'zh');
415
+ ```
416
+
417
+ ### Toggle Commands
418
+
419
+ ```typescript
420
+ // All parse to equivalent toggle nodes:
421
+ parse('切换 .active', 'zh'); // Native verb
422
+ parse('把 .active 切换', 'zh'); // BA construction (把)
423
+ ```
424
+
425
+ ## Arabic Native Idioms
426
+
427
+ LokaScript accepts multiple natural Arabic forms, supporting VSO word order.
428
+
429
+ | Form | Example | Notes |
430
+ | ----------------- | ----------------------------------- | ------------------------- |
431
+ | Standard (عندما) | `عندما نقر بدّل .active` | "When" - formal |
432
+ | Classical (حين) | `حين نقر بدّل .active` | Classical Arabic |
433
+ | Conditional (إذا) | `إذا نقر بدّل .active` | "If" |
434
+ | With source | `عندما نقر من #button بدّل .active` | "When click from #button" |
435
+
436
+ ### Event Handlers
437
+
438
+ ```typescript
439
+ // All parse to equivalent event handler nodes:
440
+ parse('عندما نقر زيادة', 'ar'); // Standard (عندما)
441
+ parse('إذا نقر زيادة', 'ar'); // Conditional (إذا)
442
+ parse('حين نقر زيادة', 'ar'); // Classical (حين)
443
+ ```
444
+
445
+ ### Morphological Normalization
446
+
447
+ The parser handles Arabic verb patterns and prefix stripping:
448
+
449
+ | Input | Normalized | Notes |
450
+ | ------ | ---------- | ------------------------ |
451
+ | النقر | نقر | Article ال stripped |
452
+ | بالنقر | نقر | Prefix بال stripped |
453
+ | والنقر | نقر | Conjunction وال stripped |
454
+
455
+ ## Turkish Native Idioms
456
+
457
+ LokaScript accepts multiple natural Turkish forms with full vowel harmony support.
458
+
459
+ | Form | Example | Notes |
460
+ | ---------------------- | ------------------------------ | --------------------------------- |
461
+ | Conditional (-dığında) | `tıklandığında toggle .active` | **Most natural** - "when clicked" |
462
+ | Temporal (-ınca) | `tıklayınca toggle .active` | "When/upon" |
463
+ | Hypothetical (-rsa) | `tıklarsa toggle .active` | "If" |
464
+ | Simultaneous (-ken) | `tıklarken toggle .active` | "While" |
465
+ | Repetitive (-dikçe) | `tıkladıkça toggle .active` | "Whenever" |
466
+
467
+ ### Vowel Harmony
468
+
469
+ All Turkish suffixes support 4-way vowel harmony + consonant softening:
470
+
471
+ | Base | Back Unrounded | Front Unrounded | Back Rounded | Front Rounded |
472
+ | -------- | -------------- | --------------- | ------------ | ------------- |
473
+ | -dığında | -dığında | -diğinde | -duğunda | -düğünde |
474
+ | -ınca | -ınca | -ince | -unca | -ünce |
475
+ | -dikçe | -dıkça | -dikçe | -dukça | -dükçe |
476
+
477
+ Consonant softening (d→t after voiceless consonants): `-tığında`, `-tikçe`, etc.
478
+
479
+ ### Event Handlers
480
+
481
+ ```typescript
482
+ // All parse to equivalent event handler nodes:
483
+ parse('tıklandığında artır', 'tr'); // Conditional
484
+ parse('tıklayınca artır', 'tr'); // Temporal
485
+ parse('tıklarsa artır', 'tr'); // Hypothetical
486
+
487
+ // With source filter:
488
+ parse('#button den tıklandığında artır', 'tr');
489
+ ```
490
+
491
+ ## Portuguese Native Idioms
492
+
493
+ LokaScript accepts multiple natural Portuguese forms.
494
+
495
+ | Form | Example | Notes |
496
+ | ------------------------ | --------------------------------------- | ------------------------------------ |
497
+ | Native (ao + infinitive) | `ao clicar alternar .active` | **Most idiomatic** - "upon clicking" |
498
+ | Standard (quando) | `quando clicar alternar .active` | "When" |
499
+ | Conditional (se) | `se clicar alternar .active` | "If" |
500
+ | With source | `ao clicar em #button alternar .active` | "Upon clicking on #button" |
501
+
502
+ ### Portuguese Event Handlers
503
+
504
+ ```typescript
505
+ // All parse to equivalent event handler nodes:
506
+ parse('ao clicar incrementar', 'pt'); // Native (ao + infinitive)
507
+ parse('quando clicar incrementar', 'pt'); // Standard (quando)
508
+ parse('se clicar incrementar', 'pt'); // Conditional (se)
509
+
510
+ // With source filter:
511
+ parse('ao clicar em #button incrementar', 'pt');
512
+ ```
513
+
514
+ ### Portuguese Morphological Normalization
515
+
516
+ The parser handles Portuguese verb conjugations automatically:
517
+
518
+ | Conjugation | Example | Normalized |
519
+ | ------------- | -------- | ---------- |
520
+ | Infinitive | clicar | clica |
521
+ | Gerund | clicando | clica |
522
+ | Present (3sg) | clica | clica |
523
+ | Past | clicou | clica |
524
+ | Subjunctive | clique | clica |
525
+
526
+ ## Direct AST Building
527
+
528
+ The semantic package can build AST nodes directly from semantic parsing results, bypassing English text generation and re-parsing.
529
+
530
+ ### The Direct Path
531
+
532
+ ```text
533
+ Input (any language) → Semantic Parser → AST Builder → AST
534
+ ```
535
+
536
+ Instead of the traditional path:
537
+
538
+ ```text
539
+ Input → Semantic Parser → English Text → Core Parser → AST
540
+ ```
541
+
542
+ ### Basic AST Building
543
+
544
+ ```typescript
545
+ import { parse, buildAST } from '@lokascript/semantic';
546
+
547
+ // Parse input to semantic node
548
+ const node = parse('#button の .active を 切り替え', 'ja');
549
+
550
+ // Build AST directly
551
+ const ast = buildAST(node);
552
+ // {
553
+ // type: 'command',
554
+ // name: 'toggle',
555
+ // args: [{ type: 'selector', value: '.active' }],
556
+ // modifiers: { on: { type: 'selector', value: '#button' } }
557
+ // }
558
+ ```
559
+
560
+ ### With MultilingualHyperscript (Core Package)
561
+
562
+ ```typescript
563
+ import { MultilingualHyperscript } from '@lokascript/core';
564
+
565
+ const ml = new MultilingualHyperscript();
566
+ await ml.initialize();
567
+
568
+ // Parse directly to AST
569
+ const ast = await ml.parseToAST('#button の .active を 切り替え', 'ja');
570
+
571
+ // With detailed result
572
+ const result = await ml.parseToASTWithDetails('toggle .active', 'en');
573
+ if (result.usedDirectPath) {
574
+ console.log('Direct AST:', result.ast);
575
+ } else if (result.fallbackText) {
576
+ // Use fallback text with core parser
577
+ console.log('Fallback:', result.fallbackText);
578
+ }
579
+ ```
580
+
581
+ ### AST Node Types
582
+
583
+ | Semantic Node | AST Node |
584
+ | ----------------------- | ---------------------- |
585
+ | `kind: 'command'` | `type: 'command'` |
586
+ | `kind: 'event-handler'` | `type: 'eventHandler'` |
587
+ | `kind: 'conditional'` | `type: 'if'` |
588
+ | `kind: 'compound'` | `type: 'compound'` |
589
+
590
+ ### Command Mappers
591
+
592
+ 46 commands have dedicated mappers that convert semantic roles to AST structure:
593
+
594
+ ```typescript
595
+ // Semantic: toggle patient:.active destination:#button
596
+ // AST: { name: 'toggle', args: ['.active'], modifiers: { on: '#button' } }
597
+
598
+ import { getCommandMapper, registerCommandMapper } from '@lokascript/semantic';
599
+
600
+ // Get mapper for a command
601
+ const mapper = getCommandMapper('toggle');
602
+
603
+ // Register custom mapper
604
+ registerCommandMapper({
605
+ action: 'myCommand',
606
+ toAST(node, builder) {
607
+ return { type: 'command', name: 'myCommand', args: [...] };
608
+ }
609
+ });
610
+ ```
611
+
612
+ ## API Reference
613
+
614
+ ### Core Functions
615
+
616
+ - `parse(input, language)` - Parse input to semantic node
617
+ - `canParse(input, language)` - Check if input can be parsed with confidence
618
+ - `translate(input, fromLang, toLang)` - Translate between languages
619
+ - `tokenize(input, language)` - Get token stream for input
620
+ - `render(node, language)` - Render semantic node to language
621
+ - `buildAST(node)` - Build AST directly from semantic node
622
+
623
+ ### Supported Languages
624
+
625
+ | Code | Language | Word Order | Tier |
626
+ | ---- | ---------- | ---------- | ------ |
627
+ | en | English | SVO | Tier 1 |
628
+ | ja | Japanese | SOV | Tier 1 |
629
+ | ko | Korean | SOV | Tier 1 |
630
+ | es | Spanish | SVO | Tier 1 |
631
+ | zh | Chinese | SVO | Tier 1 |
632
+ | ar | Arabic | VSO | Tier 2 |
633
+ | tr | Turkish | SOV | Tier 2 |
634
+ | de | German | SVO/SOV | Tier 2 |
635
+ | fr | French | SVO | Tier 2 |
636
+ | pt | Portuguese | SVO | Tier 2 |
637
+ | id | Indonesian | SVO | Tier 3 |
638
+ | qu | Quechua | SOV | Tier 3 |
639
+ | sw | Swahili | SVO | Tier 3 |
640
+
641
+ ## Language Support Tiers
642
+
643
+ ### Tier 1: Native Idiom Support (en, ja, ko, es, zh)
644
+
645
+ - Multiple natural phrasings accepted (conditional, temporal, compact forms)
646
+ - Native speaker reviewed patterns
647
+ - Comprehensive morphological normalization
648
+ - Dedicated idiom test suites (50-400+ tests per language)
649
+ - English: Event alternatives (`when`, `upon`, `if`), command synonyms, natural articles, British spelling
650
+
651
+ ### Tier 2: Full Grammar Support (ar, tr, de, fr, pt)
652
+
653
+ - Complete word order transformation (SVO/SOV/VSO)
654
+ - Morphological normalization for verb conjugations
655
+ - Dictionary-based translation
656
+ - Language-specific grammar rules
657
+
658
+ ### Tier 3: Functional (id, qu, sw)
659
+
660
+ - Basic parsing and translation
661
+ - Standard tokenization
662
+ - Community contributions welcome
663
+
664
+ ## Language Editor
665
+
666
+ A browser-based editor for viewing and editing language profiles is available at `editor/index.html`.
667
+
668
+ ```bash
669
+ # From packages/semantic/editor:
670
+ ./serve.sh
671
+
672
+ # Or from project root:
673
+ npx http-server . -p 3000 -c-1
674
+ # Then open: http://127.0.0.1:3000/packages/semantic/editor/
675
+ ```
676
+
677
+ Features:
678
+
679
+ - Edit keywords, role markers, references, possessive config
680
+ - View patterns per language (read-only)
681
+ - Live parse testing
682
+ - Export as TypeScript for PRs
683
+
684
+ ## License
685
+
686
+ MIT