@lokascript/semantic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +686 -0
- package/dist/browser-ar.ar.global.js +2 -0
- package/dist/browser-core.core.global.js +2 -0
- package/dist/browser-de.de.global.js +2 -0
- package/dist/browser-east-asian.east-asian.global.js +2 -0
- package/dist/browser-en-tr.en-tr.global.js +2 -0
- package/dist/browser-en.en.global.js +2 -0
- package/dist/browser-es-en.es-en.global.js +2 -0
- package/dist/browser-es.es.global.js +2 -0
- package/dist/browser-fr.fr.global.js +2 -0
- package/dist/browser-id.id.global.js +2 -0
- package/dist/browser-ja.ja.global.js +2 -0
- package/dist/browser-ko.ko.global.js +2 -0
- package/dist/browser-lazy.lazy.global.js +2 -0
- package/dist/browser-priority.priority.global.js +2 -0
- package/dist/browser-pt.pt.global.js +2 -0
- package/dist/browser-qu.qu.global.js +2 -0
- package/dist/browser-sw.sw.global.js +2 -0
- package/dist/browser-tr.tr.global.js +2 -0
- package/dist/browser-western.western.global.js +2 -0
- package/dist/browser-zh.zh.global.js +2 -0
- package/dist/browser.global.js +3 -0
- package/dist/browser.global.js.map +1 -0
- package/dist/index.cjs +35051 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +3426 -0
- package/dist/index.d.ts +3426 -0
- package/dist/index.js +34890 -0
- package/dist/index.js.map +1 -0
- package/dist/languages/ar.d.ts +78 -0
- package/dist/languages/ar.js +1622 -0
- package/dist/languages/ar.js.map +1 -0
- package/dist/languages/de.d.ts +38 -0
- package/dist/languages/de.js +1168 -0
- package/dist/languages/de.js.map +1 -0
- package/dist/languages/en.d.ts +44 -0
- package/dist/languages/en.js +3491 -0
- package/dist/languages/en.js.map +1 -0
- package/dist/languages/es.d.ts +52 -0
- package/dist/languages/es.js +1493 -0
- package/dist/languages/es.js.map +1 -0
- package/dist/languages/fr.d.ts +37 -0
- package/dist/languages/fr.js +1159 -0
- package/dist/languages/fr.js.map +1 -0
- package/dist/languages/id.d.ts +35 -0
- package/dist/languages/id.js +1152 -0
- package/dist/languages/id.js.map +1 -0
- package/dist/languages/ja.d.ts +53 -0
- package/dist/languages/ja.js +1430 -0
- package/dist/languages/ja.js.map +1 -0
- package/dist/languages/ko.d.ts +51 -0
- package/dist/languages/ko.js +1729 -0
- package/dist/languages/ko.js.map +1 -0
- package/dist/languages/pt.d.ts +37 -0
- package/dist/languages/pt.js +1127 -0
- package/dist/languages/pt.js.map +1 -0
- package/dist/languages/qu.d.ts +36 -0
- package/dist/languages/qu.js +1143 -0
- package/dist/languages/qu.js.map +1 -0
- package/dist/languages/sw.d.ts +35 -0
- package/dist/languages/sw.js +1147 -0
- package/dist/languages/sw.js.map +1 -0
- package/dist/languages/tr.d.ts +45 -0
- package/dist/languages/tr.js +1529 -0
- package/dist/languages/tr.js.map +1 -0
- package/dist/languages/zh.d.ts +58 -0
- package/dist/languages/zh.js +1257 -0
- package/dist/languages/zh.js.map +1 -0
- package/dist/types-C4dcj53L.d.ts +600 -0
- package/package.json +202 -0
- package/src/__test-utils__/index.ts +7 -0
- package/src/__test-utils__/test-helpers.ts +8 -0
- package/src/__types__/test-helpers.ts +122 -0
- package/src/analysis/index.ts +479 -0
- package/src/ast-builder/command-mappers.ts +1133 -0
- package/src/ast-builder/expression-parser/index.ts +41 -0
- package/src/ast-builder/expression-parser/parser.ts +563 -0
- package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
- package/src/ast-builder/expression-parser/types.ts +208 -0
- package/src/ast-builder/index.ts +536 -0
- package/src/ast-builder/value-converters.ts +172 -0
- package/src/bridge.ts +275 -0
- package/src/browser-ar.ts +162 -0
- package/src/browser-core.ts +231 -0
- package/src/browser-de.ts +162 -0
- package/src/browser-east-asian.ts +173 -0
- package/src/browser-en-tr.ts +165 -0
- package/src/browser-en.ts +157 -0
- package/src/browser-es-en.ts +200 -0
- package/src/browser-es.ts +170 -0
- package/src/browser-fr.ts +162 -0
- package/src/browser-id.ts +162 -0
- package/src/browser-ja.ts +162 -0
- package/src/browser-ko.ts +162 -0
- package/src/browser-lazy.ts +189 -0
- package/src/browser-priority.ts +214 -0
- package/src/browser-pt.ts +162 -0
- package/src/browser-qu.ts +162 -0
- package/src/browser-sw.ts +162 -0
- package/src/browser-tr.ts +162 -0
- package/src/browser-western.ts +181 -0
- package/src/browser-zh.ts +162 -0
- package/src/browser.ts +268 -0
- package/src/cache/index.ts +14 -0
- package/src/cache/semantic-cache.ts +344 -0
- package/src/core-bridge.ts +372 -0
- package/src/explicit/converter.ts +258 -0
- package/src/explicit/index.ts +18 -0
- package/src/explicit/parser.ts +236 -0
- package/src/explicit/renderer.ts +424 -0
- package/src/generators/command-schemas.ts +1636 -0
- package/src/generators/event-handler-generator.ts +109 -0
- package/src/generators/index.ts +117 -0
- package/src/generators/language-profiles.ts +139 -0
- package/src/generators/pattern-generator.ts +537 -0
- package/src/generators/profiles/arabic.ts +131 -0
- package/src/generators/profiles/bengali.ts +132 -0
- package/src/generators/profiles/chinese.ts +124 -0
- package/src/generators/profiles/english.ts +113 -0
- package/src/generators/profiles/french.ts +125 -0
- package/src/generators/profiles/german.ts +126 -0
- package/src/generators/profiles/hindi.ts +146 -0
- package/src/generators/profiles/index.ts +46 -0
- package/src/generators/profiles/indonesian.ts +125 -0
- package/src/generators/profiles/italian.ts +139 -0
- package/src/generators/profiles/japanese.ts +149 -0
- package/src/generators/profiles/korean.ts +127 -0
- package/src/generators/profiles/marker-templates.ts +288 -0
- package/src/generators/profiles/ms.ts +130 -0
- package/src/generators/profiles/polish.ts +249 -0
- package/src/generators/profiles/portuguese.ts +115 -0
- package/src/generators/profiles/quechua.ts +113 -0
- package/src/generators/profiles/russian.ts +260 -0
- package/src/generators/profiles/spanish.ts +130 -0
- package/src/generators/profiles/swahili.ts +129 -0
- package/src/generators/profiles/thai.ts +132 -0
- package/src/generators/profiles/tl.ts +128 -0
- package/src/generators/profiles/turkish.ts +124 -0
- package/src/generators/profiles/types.ts +165 -0
- package/src/generators/profiles/ukrainian.ts +270 -0
- package/src/generators/profiles/vietnamese.ts +133 -0
- package/src/generators/schema-error-codes.ts +160 -0
- package/src/generators/schema-validator.ts +391 -0
- package/src/index.ts +429 -0
- package/src/language-building-schema.ts +3170 -0
- package/src/language-loader.ts +394 -0
- package/src/languages/_all.ts +65 -0
- package/src/languages/ar.ts +15 -0
- package/src/languages/bn.ts +16 -0
- package/src/languages/de.ts +15 -0
- package/src/languages/en.ts +29 -0
- package/src/languages/es.ts +15 -0
- package/src/languages/fr.ts +15 -0
- package/src/languages/hi.ts +26 -0
- package/src/languages/id.ts +15 -0
- package/src/languages/index.ts +18 -0
- package/src/languages/it.ts +15 -0
- package/src/languages/ja.ts +15 -0
- package/src/languages/ko.ts +15 -0
- package/src/languages/ms.ts +16 -0
- package/src/languages/pl.ts +18 -0
- package/src/languages/pt.ts +15 -0
- package/src/languages/qu.ts +15 -0
- package/src/languages/ru.ts +26 -0
- package/src/languages/sw.ts +15 -0
- package/src/languages/th.ts +16 -0
- package/src/languages/tl.ts +16 -0
- package/src/languages/tr.ts +15 -0
- package/src/languages/uk.ts +26 -0
- package/src/languages/vi.ts +16 -0
- package/src/languages/zh.ts +15 -0
- package/src/parser/index.ts +15 -0
- package/src/parser/pattern-matcher.ts +1181 -0
- package/src/parser/semantic-parser.ts +573 -0
- package/src/parser/utils/index.ts +35 -0
- package/src/parser/utils/marker-resolution.ts +111 -0
- package/src/parser/utils/possessive-keywords.ts +43 -0
- package/src/parser/utils/role-positioning.ts +70 -0
- package/src/parser/utils/type-validation.ts +134 -0
- package/src/patterns/add/ar.ts +71 -0
- package/src/patterns/add/bn.ts +70 -0
- package/src/patterns/add/hi.ts +69 -0
- package/src/patterns/add/index.ts +87 -0
- package/src/patterns/add/it.ts +61 -0
- package/src/patterns/add/ja.ts +93 -0
- package/src/patterns/add/ko.ts +74 -0
- package/src/patterns/add/ms.ts +30 -0
- package/src/patterns/add/pl.ts +62 -0
- package/src/patterns/add/ru.ts +62 -0
- package/src/patterns/add/th.ts +49 -0
- package/src/patterns/add/tl.ts +30 -0
- package/src/patterns/add/tr.ts +71 -0
- package/src/patterns/add/uk.ts +62 -0
- package/src/patterns/add/vi.ts +61 -0
- package/src/patterns/add/zh.ts +71 -0
- package/src/patterns/builders.ts +207 -0
- package/src/patterns/decrement/bn.ts +70 -0
- package/src/patterns/decrement/de.ts +42 -0
- package/src/patterns/decrement/hi.ts +68 -0
- package/src/patterns/decrement/index.ts +79 -0
- package/src/patterns/decrement/it.ts +69 -0
- package/src/patterns/decrement/ms.ts +30 -0
- package/src/patterns/decrement/pl.ts +58 -0
- package/src/patterns/decrement/ru.ts +58 -0
- package/src/patterns/decrement/th.ts +49 -0
- package/src/patterns/decrement/tl.ts +30 -0
- package/src/patterns/decrement/tr.ts +48 -0
- package/src/patterns/decrement/uk.ts +58 -0
- package/src/patterns/decrement/vi.ts +61 -0
- package/src/patterns/decrement/zh.ts +32 -0
- package/src/patterns/en.ts +302 -0
- package/src/patterns/event-handler/ar.ts +151 -0
- package/src/patterns/event-handler/bn.ts +72 -0
- package/src/patterns/event-handler/de.ts +117 -0
- package/src/patterns/event-handler/en.ts +117 -0
- package/src/patterns/event-handler/es.ts +136 -0
- package/src/patterns/event-handler/fr.ts +117 -0
- package/src/patterns/event-handler/hi.ts +64 -0
- package/src/patterns/event-handler/id.ts +117 -0
- package/src/patterns/event-handler/index.ts +119 -0
- package/src/patterns/event-handler/it.ts +54 -0
- package/src/patterns/event-handler/ja.ts +118 -0
- package/src/patterns/event-handler/ko.ts +133 -0
- package/src/patterns/event-handler/ms.ts +30 -0
- package/src/patterns/event-handler/pl.ts +62 -0
- package/src/patterns/event-handler/pt.ts +117 -0
- package/src/patterns/event-handler/qu.ts +66 -0
- package/src/patterns/event-handler/ru.ts +62 -0
- package/src/patterns/event-handler/shared.ts +270 -0
- package/src/patterns/event-handler/sw.ts +117 -0
- package/src/patterns/event-handler/th.ts +53 -0
- package/src/patterns/event-handler/tl.ts +30 -0
- package/src/patterns/event-handler/tr.ts +170 -0
- package/src/patterns/event-handler/uk.ts +62 -0
- package/src/patterns/event-handler/vi.ts +61 -0
- package/src/patterns/event-handler/zh.ts +150 -0
- package/src/patterns/get/ar.ts +49 -0
- package/src/patterns/get/bn.ts +47 -0
- package/src/patterns/get/de.ts +32 -0
- package/src/patterns/get/hi.ts +52 -0
- package/src/patterns/get/index.ts +83 -0
- package/src/patterns/get/it.ts +56 -0
- package/src/patterns/get/ja.ts +53 -0
- package/src/patterns/get/ko.ts +53 -0
- package/src/patterns/get/ms.ts +30 -0
- package/src/patterns/get/pl.ts +57 -0
- package/src/patterns/get/ru.ts +57 -0
- package/src/patterns/get/th.ts +29 -0
- package/src/patterns/get/tl.ts +30 -0
- package/src/patterns/get/uk.ts +57 -0
- package/src/patterns/get/vi.ts +48 -0
- package/src/patterns/grammar-transformed/index.ts +39 -0
- package/src/patterns/grammar-transformed/ja.ts +1713 -0
- package/src/patterns/grammar-transformed/ko.ts +1311 -0
- package/src/patterns/grammar-transformed/tr.ts +1067 -0
- package/src/patterns/hide/ar.ts +67 -0
- package/src/patterns/hide/bn.ts +47 -0
- package/src/patterns/hide/de.ts +36 -0
- package/src/patterns/hide/hi.ts +61 -0
- package/src/patterns/hide/index.ts +91 -0
- package/src/patterns/hide/it.ts +56 -0
- package/src/patterns/hide/ja.ts +69 -0
- package/src/patterns/hide/ko.ts +69 -0
- package/src/patterns/hide/ms.ts +30 -0
- package/src/patterns/hide/pl.ts +57 -0
- package/src/patterns/hide/ru.ts +57 -0
- package/src/patterns/hide/th.ts +29 -0
- package/src/patterns/hide/tl.ts +30 -0
- package/src/patterns/hide/tr.ts +65 -0
- package/src/patterns/hide/uk.ts +57 -0
- package/src/patterns/hide/vi.ts +56 -0
- package/src/patterns/hide/zh.ts +68 -0
- package/src/patterns/increment/bn.ts +70 -0
- package/src/patterns/increment/de.ts +36 -0
- package/src/patterns/increment/hi.ts +68 -0
- package/src/patterns/increment/index.ts +79 -0
- package/src/patterns/increment/it.ts +69 -0
- package/src/patterns/increment/ms.ts +30 -0
- package/src/patterns/increment/pl.ts +58 -0
- package/src/patterns/increment/ru.ts +58 -0
- package/src/patterns/increment/th.ts +49 -0
- package/src/patterns/increment/tl.ts +30 -0
- package/src/patterns/increment/tr.ts +52 -0
- package/src/patterns/increment/uk.ts +58 -0
- package/src/patterns/increment/vi.ts +61 -0
- package/src/patterns/increment/zh.ts +32 -0
- package/src/patterns/index.ts +84 -0
- package/src/patterns/languages/en/control-flow.ts +93 -0
- package/src/patterns/languages/en/fetch.ts +62 -0
- package/src/patterns/languages/en/index.ts +42 -0
- package/src/patterns/languages/en/repeat.ts +67 -0
- package/src/patterns/languages/en/set.ts +48 -0
- package/src/patterns/languages/en/swap.ts +38 -0
- package/src/patterns/languages/en/temporal.ts +57 -0
- package/src/patterns/put/ar.ts +74 -0
- package/src/patterns/put/bn.ts +53 -0
- package/src/patterns/put/en.ts +74 -0
- package/src/patterns/put/es.ts +74 -0
- package/src/patterns/put/hi.ts +69 -0
- package/src/patterns/put/id.ts +96 -0
- package/src/patterns/put/index.ts +99 -0
- package/src/patterns/put/it.ts +56 -0
- package/src/patterns/put/ja.ts +75 -0
- package/src/patterns/put/ko.ts +67 -0
- package/src/patterns/put/ms.ts +30 -0
- package/src/patterns/put/pl.ts +81 -0
- package/src/patterns/put/ru.ts +85 -0
- package/src/patterns/put/th.ts +32 -0
- package/src/patterns/put/tl.ts +30 -0
- package/src/patterns/put/tr.ts +67 -0
- package/src/patterns/put/uk.ts +85 -0
- package/src/patterns/put/vi.ts +72 -0
- package/src/patterns/put/zh.ts +62 -0
- package/src/patterns/registry.ts +163 -0
- package/src/patterns/remove/ar.ts +71 -0
- package/src/patterns/remove/bn.ts +68 -0
- package/src/patterns/remove/hi.ts +69 -0
- package/src/patterns/remove/index.ts +87 -0
- package/src/patterns/remove/it.ts +69 -0
- package/src/patterns/remove/ja.ts +74 -0
- package/src/patterns/remove/ko.ts +78 -0
- package/src/patterns/remove/ms.ts +30 -0
- package/src/patterns/remove/pl.ts +62 -0
- package/src/patterns/remove/ru.ts +62 -0
- package/src/patterns/remove/th.ts +49 -0
- package/src/patterns/remove/tl.ts +30 -0
- package/src/patterns/remove/tr.ts +78 -0
- package/src/patterns/remove/uk.ts +62 -0
- package/src/patterns/remove/vi.ts +61 -0
- package/src/patterns/remove/zh.ts +72 -0
- package/src/patterns/set/ar.ts +84 -0
- package/src/patterns/set/bn.ts +53 -0
- package/src/patterns/set/de.ts +84 -0
- package/src/patterns/set/es.ts +92 -0
- package/src/patterns/set/fr.ts +88 -0
- package/src/patterns/set/hi.ts +56 -0
- package/src/patterns/set/id.ts +84 -0
- package/src/patterns/set/index.ts +107 -0
- package/src/patterns/set/it.ts +56 -0
- package/src/patterns/set/ja.ts +86 -0
- package/src/patterns/set/ko.ts +85 -0
- package/src/patterns/set/ms.ts +30 -0
- package/src/patterns/set/pl.ts +57 -0
- package/src/patterns/set/pt.ts +84 -0
- package/src/patterns/set/ru.ts +57 -0
- package/src/patterns/set/th.ts +31 -0
- package/src/patterns/set/tl.ts +30 -0
- package/src/patterns/set/tr.ts +107 -0
- package/src/patterns/set/uk.ts +57 -0
- package/src/patterns/set/vi.ts +53 -0
- package/src/patterns/set/zh.ts +84 -0
- package/src/patterns/show/ar.ts +67 -0
- package/src/patterns/show/bn.ts +47 -0
- package/src/patterns/show/de.ts +32 -0
- package/src/patterns/show/fr.ts +32 -0
- package/src/patterns/show/hi.ts +61 -0
- package/src/patterns/show/index.ts +95 -0
- package/src/patterns/show/it.ts +56 -0
- package/src/patterns/show/ja.ts +69 -0
- package/src/patterns/show/ko.ts +73 -0
- package/src/patterns/show/ms.ts +30 -0
- package/src/patterns/show/pl.ts +57 -0
- package/src/patterns/show/ru.ts +57 -0
- package/src/patterns/show/th.ts +29 -0
- package/src/patterns/show/tl.ts +30 -0
- package/src/patterns/show/tr.ts +65 -0
- package/src/patterns/show/uk.ts +57 -0
- package/src/patterns/show/vi.ts +56 -0
- package/src/patterns/show/zh.ts +68 -0
- package/src/patterns/take/ar.ts +51 -0
- package/src/patterns/take/index.ts +31 -0
- package/src/patterns/toggle/ar.ts +61 -0
- package/src/patterns/toggle/bn.ts +70 -0
- package/src/patterns/toggle/en.ts +61 -0
- package/src/patterns/toggle/es.ts +61 -0
- package/src/patterns/toggle/hi.ts +80 -0
- package/src/patterns/toggle/index.ts +95 -0
- package/src/patterns/toggle/it.ts +69 -0
- package/src/patterns/toggle/ja.ts +156 -0
- package/src/patterns/toggle/ko.ts +113 -0
- package/src/patterns/toggle/ms.ts +30 -0
- package/src/patterns/toggle/pl.ts +62 -0
- package/src/patterns/toggle/ru.ts +62 -0
- package/src/patterns/toggle/th.ts +50 -0
- package/src/patterns/toggle/tl.ts +30 -0
- package/src/patterns/toggle/tr.ts +88 -0
- package/src/patterns/toggle/uk.ts +62 -0
- package/src/patterns/toggle/vi.ts +61 -0
- package/src/patterns/toggle/zh.ts +99 -0
- package/src/public-api.ts +286 -0
- package/src/registry.ts +441 -0
- package/src/tokenizers/arabic.ts +723 -0
- package/src/tokenizers/base.ts +1300 -0
- package/src/tokenizers/bengali.ts +289 -0
- package/src/tokenizers/chinese.ts +481 -0
- package/src/tokenizers/english.ts +416 -0
- package/src/tokenizers/french.ts +326 -0
- package/src/tokenizers/german.ts +324 -0
- package/src/tokenizers/hindi.ts +319 -0
- package/src/tokenizers/index.ts +127 -0
- package/src/tokenizers/indonesian.ts +306 -0
- package/src/tokenizers/italian.ts +458 -0
- package/src/tokenizers/japanese.ts +447 -0
- package/src/tokenizers/korean.ts +642 -0
- package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
- package/src/tokenizers/morphology/french-normalizer.ts +268 -0
- package/src/tokenizers/morphology/german-normalizer.ts +256 -0
- package/src/tokenizers/morphology/index.ts +46 -0
- package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
- package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
- package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
- package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
- package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
- package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
- package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
- package/src/tokenizers/morphology/types.ts +211 -0
- package/src/tokenizers/ms.ts +198 -0
- package/src/tokenizers/polish.ts +354 -0
- package/src/tokenizers/portuguese.ts +304 -0
- package/src/tokenizers/quechua.ts +339 -0
- package/src/tokenizers/russian.ts +375 -0
- package/src/tokenizers/spanish.ts +403 -0
- package/src/tokenizers/swahili.ts +303 -0
- package/src/tokenizers/thai.ts +236 -0
- package/src/tokenizers/tl.ts +198 -0
- package/src/tokenizers/turkish.ts +411 -0
- package/src/tokenizers/ukrainian.ts +369 -0
- package/src/tokenizers/vietnamese.ts +410 -0
- package/src/types/grammar-types.ts +617 -0
- package/src/types/unified-profile.ts +267 -0
- package/src/types.ts +709 -0
- package/src/utils/confidence-calculator.ts +147 -0
- package/src/validators/command-validator.ts +380 -0
- package/src/validators/index.ts +15 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,3426 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grammar Types for Semantic Multilingual Parsing
|
|
3
|
+
*
|
|
4
|
+
* These types define the semantic role system used across all 13 supported languages.
|
|
5
|
+
* Originally from @lokascript/i18n, now consolidated here for package independence.
|
|
6
|
+
*
|
|
7
|
+
* Key Linguistic Concepts:
|
|
8
|
+
* - Word Order: SVO, SOV, VSO (and variations)
|
|
9
|
+
* - Adposition Type: Preposition (English) vs Postposition (Japanese/Korean)
|
|
10
|
+
* - Morphology: Isolating (Chinese) vs Agglutinative (Turkish) vs Fusional (Arabic)
|
|
11
|
+
* - Text Direction: LTR vs RTL
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Semantic roles in hyperscript commands.
|
|
15
|
+
* These are universal across all 13 supported languages - only the surface form changes.
|
|
16
|
+
*
|
|
17
|
+
* ## Core Thematic Roles (from linguistic theory)
|
|
18
|
+
* | Role | Usage | Purpose | Example |
|
|
19
|
+
* |-------------|-------|-----------------------------|---------------------------|
|
|
20
|
+
* | action | 100% | Command verb | toggle, put, fetch |
|
|
21
|
+
* | patient | 90% | What is acted upon | .active, #count |
|
|
22
|
+
* | destination | 40% | Where something goes | into #output, to .class |
|
|
23
|
+
* | source | 13% | Where something comes from | from #input, from URL |
|
|
24
|
+
* | event | 106% | Trigger events | click, keydown, submit |
|
|
25
|
+
* | condition | 8% | Boolean expressions | if x > 5, when visible |
|
|
26
|
+
* | agent | 0% | Who performs action | Reserved for future use |
|
|
27
|
+
* | goal | 1% | Target value/state | to 'red' (in transition) |
|
|
28
|
+
*
|
|
29
|
+
* ## Quantitative Roles (answer "how much/long")
|
|
30
|
+
* | Role | Usage | Purpose | Example |
|
|
31
|
+
* |----------|-------|----------------|----------------------|
|
|
32
|
+
* | quantity | 7% | Numeric amount | by 5, 3 times |
|
|
33
|
+
* | duration | 1% | Time span | for 5 seconds, 500ms |
|
|
34
|
+
*
|
|
35
|
+
* ## Adverbial/Modifier Roles (answer "how/by what means")
|
|
36
|
+
* | Role | Usage | Purpose | Example |
|
|
37
|
+
* |--------------|-------|---------------------------|-------------------|
|
|
38
|
+
* | style | 2% | Animation/behavior | with fade |
|
|
39
|
+
* | manner | 2% | Insertion position | before, after |
|
|
40
|
+
* | method | 1% | HTTP method/technique | via POST, as GET |
|
|
41
|
+
* | responseType | 1% | Response format | as json, as html |
|
|
42
|
+
*
|
|
43
|
+
* ## Control Flow Roles
|
|
44
|
+
* | Role | Usage | Purpose | Example |
|
|
45
|
+
* |----------|-------|--------------|-----------------------|
|
|
46
|
+
* | loopType | 6% | Loop variant | forever, until, times |
|
|
47
|
+
*
|
|
48
|
+
* ## Design Notes
|
|
49
|
+
* - Low-usage roles (agent, goal, method, responseType) are intentionally kept for:
|
|
50
|
+
* - Linguistic completeness across all 13 languages
|
|
51
|
+
* - Future extensibility (AI agents, server-side execution)
|
|
52
|
+
* - Command-specific semantics (fetch, transition)
|
|
53
|
+
* - Each role has distinct grammatical markers per language (see profiles/index.ts)
|
|
54
|
+
* - Usage percentages based on pattern database analysis
|
|
55
|
+
*/
|
|
56
|
+
type SemanticRole = 'action' | 'agent' | 'patient' | 'source' | 'destination' | 'goal' | 'event' | 'condition' | 'quantity' | 'duration' | 'responseType' | 'method' | 'style' | 'manner' | 'loopType' | 'continues';
|
|
57
|
+
/**
|
|
58
|
+
* Word order patterns
|
|
59
|
+
* These represent the major typological categories
|
|
60
|
+
*/
|
|
61
|
+
type WordOrder$1 = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OVS' | 'OSV' | 'free';
|
|
62
|
+
/**
|
|
63
|
+
* Where grammatical markers appear relative to their noun/verb
|
|
64
|
+
*/
|
|
65
|
+
type AdpositionType = 'preposition' | 'postposition' | 'circumposition' | 'none';
|
|
66
|
+
/**
|
|
67
|
+
* Morphological typology - how words are constructed
|
|
68
|
+
*/
|
|
69
|
+
type MorphologyType = 'isolating' | 'agglutinative' | 'fusional' | 'polysynthetic';
|
|
70
|
+
/**
|
|
71
|
+
* A grammatical marker (particle, case ending, preposition)
|
|
72
|
+
*/
|
|
73
|
+
interface GrammaticalMarker {
|
|
74
|
+
form: string;
|
|
75
|
+
role: SemanticRole;
|
|
76
|
+
position: AdpositionType;
|
|
77
|
+
required: boolean;
|
|
78
|
+
alternatives?: string[];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Semantic-First Multilingual Hyperscript Types
|
|
83
|
+
*
|
|
84
|
+
* This module defines the canonical semantic representation that all languages
|
|
85
|
+
* parse to and render from. The semantic layer is language-neutral - it captures
|
|
86
|
+
* the MEANING of hyperscript commands independent of surface syntax.
|
|
87
|
+
*/
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Canonical action names (English-based internally, but not visible to users)
|
|
91
|
+
* These map to hyperscript commands and are used in the semantic AST.
|
|
92
|
+
*/
|
|
93
|
+
type ActionType = 'toggle' | 'add' | 'remove' | 'put' | 'append' | 'prepend' | 'take' | 'make' | 'clone' | 'swap' | 'morph' | 'set' | 'get' | 'increment' | 'decrement' | 'log' | 'show' | 'hide' | 'transition' | 'on' | 'trigger' | 'send' | 'focus' | 'blur' | 'go' | 'wait' | 'fetch' | 'settle' | 'measure' | 'install' | 'if' | 'unless' | 'else' | 'repeat' | 'for' | 'while' | 'continue' | 'halt' | 'throw' | 'call' | 'return' | 'js' | 'async' | 'tell' | 'default' | 'init' | 'behavior' | 'compound';
|
|
94
|
+
/**
|
|
95
|
+
* A semantic value represents a typed piece of data in a semantic node.
|
|
96
|
+
* Values are language-neutral - they capture what something IS, not how it's written.
|
|
97
|
+
*/
|
|
98
|
+
type SemanticValue = LiteralValue | SelectorValue | ReferenceValue | PropertyPathValue | ExpressionValue;
|
|
99
|
+
interface LiteralValue {
|
|
100
|
+
readonly type: 'literal';
|
|
101
|
+
readonly value: string | number | boolean;
|
|
102
|
+
readonly dataType?: 'string' | 'number' | 'boolean' | 'duration';
|
|
103
|
+
}
|
|
104
|
+
interface SelectorValue {
|
|
105
|
+
readonly type: 'selector';
|
|
106
|
+
readonly value: string;
|
|
107
|
+
readonly selectorKind: 'id' | 'class' | 'attribute' | 'element' | 'complex';
|
|
108
|
+
}
|
|
109
|
+
interface ReferenceValue {
|
|
110
|
+
readonly type: 'reference';
|
|
111
|
+
readonly value: 'me' | 'you' | 'it' | 'result' | 'event' | 'target' | 'body';
|
|
112
|
+
}
|
|
113
|
+
interface PropertyPathValue {
|
|
114
|
+
readonly type: 'property-path';
|
|
115
|
+
readonly object: SemanticValue;
|
|
116
|
+
readonly property: string;
|
|
117
|
+
}
|
|
118
|
+
interface ExpressionValue {
|
|
119
|
+
readonly type: 'expression';
|
|
120
|
+
/** Raw expression string for complex expressions that need further parsing */
|
|
121
|
+
readonly raw: string;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Base interface for all semantic nodes.
|
|
125
|
+
* Semantic nodes capture the MEANING of hyperscript constructs.
|
|
126
|
+
*/
|
|
127
|
+
interface SemanticNode {
|
|
128
|
+
readonly kind: 'command' | 'event-handler' | 'conditional' | 'compound' | 'loop';
|
|
129
|
+
readonly action: ActionType;
|
|
130
|
+
readonly roles: ReadonlyMap<SemanticRole, SemanticValue>;
|
|
131
|
+
readonly metadata?: SemanticMetadata;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Metadata about the source of a semantic node.
|
|
135
|
+
* Useful for debugging, error messages, and round-trip conversion.
|
|
136
|
+
*/
|
|
137
|
+
interface SemanticMetadata {
|
|
138
|
+
readonly sourceLanguage?: string;
|
|
139
|
+
readonly sourceText?: string;
|
|
140
|
+
readonly sourcePosition?: SourcePosition;
|
|
141
|
+
readonly patternId?: string;
|
|
142
|
+
}
|
|
143
|
+
interface SourcePosition {
|
|
144
|
+
readonly start: number;
|
|
145
|
+
readonly end: number;
|
|
146
|
+
readonly line?: number;
|
|
147
|
+
readonly column?: number;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* A command semantic node - represents a single hyperscript command.
|
|
151
|
+
*/
|
|
152
|
+
interface CommandSemanticNode extends SemanticNode {
|
|
153
|
+
readonly kind: 'command';
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* An event handler semantic node - represents "on [event] [commands]".
|
|
157
|
+
*/
|
|
158
|
+
interface EventHandlerSemanticNode extends SemanticNode {
|
|
159
|
+
readonly kind: 'event-handler';
|
|
160
|
+
readonly action: 'on';
|
|
161
|
+
readonly body: SemanticNode[];
|
|
162
|
+
readonly eventModifiers?: EventModifiers;
|
|
163
|
+
/**
|
|
164
|
+
* Event parameter names for destructuring.
|
|
165
|
+
* E.g., for "on click(clientX, clientY)", this would be ['clientX', 'clientY']
|
|
166
|
+
*/
|
|
167
|
+
readonly parameterNames?: readonly string[];
|
|
168
|
+
}
|
|
169
|
+
interface EventModifiers {
|
|
170
|
+
readonly once?: boolean;
|
|
171
|
+
readonly debounce?: number;
|
|
172
|
+
readonly throttle?: number;
|
|
173
|
+
readonly queue?: 'first' | 'last' | 'all' | 'none';
|
|
174
|
+
readonly from?: SemanticValue;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* A conditional semantic node - represents "if [condition] then [body] else [body]".
|
|
178
|
+
*/
|
|
179
|
+
interface ConditionalSemanticNode extends SemanticNode {
|
|
180
|
+
readonly kind: 'conditional';
|
|
181
|
+
readonly action: 'if';
|
|
182
|
+
readonly thenBranch: SemanticNode[];
|
|
183
|
+
readonly elseBranch?: SemanticNode[];
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* A compound semantic node - represents multiple chained statements.
|
|
187
|
+
*/
|
|
188
|
+
interface CompoundSemanticNode extends SemanticNode {
|
|
189
|
+
readonly kind: 'compound';
|
|
190
|
+
readonly statements: SemanticNode[];
|
|
191
|
+
readonly chainType: 'then' | 'and' | 'async';
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Loop variant discriminant for different loop types.
|
|
195
|
+
*/
|
|
196
|
+
type LoopVariant = 'forever' | 'times' | 'for' | 'while' | 'until';
|
|
197
|
+
/**
|
|
198
|
+
* A loop semantic node - represents repeat/for/while loops.
|
|
199
|
+
*/
|
|
200
|
+
interface LoopSemanticNode extends SemanticNode {
|
|
201
|
+
readonly kind: 'loop';
|
|
202
|
+
readonly action: 'repeat' | 'for' | 'while';
|
|
203
|
+
/** The type of loop (forever, times, for, while, until) */
|
|
204
|
+
readonly loopVariant: LoopVariant;
|
|
205
|
+
/** Commands to execute in each iteration */
|
|
206
|
+
readonly body: SemanticNode[];
|
|
207
|
+
/** Loop variable name for 'for' loops (e.g., 'item' in 'for item in list') */
|
|
208
|
+
readonly loopVariable?: string;
|
|
209
|
+
/** Index variable name if specified (e.g., 'i' in 'for item with index i') */
|
|
210
|
+
readonly indexVariable?: string;
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* A pattern defines how a semantic structure appears in a specific language.
|
|
214
|
+
* Patterns enable bidirectional conversion: parse (natural → semantic) and
|
|
215
|
+
* render (semantic → natural).
|
|
216
|
+
*/
|
|
217
|
+
interface LanguagePattern {
|
|
218
|
+
/** Unique identifier for this pattern */
|
|
219
|
+
readonly id: string;
|
|
220
|
+
/** ISO 639-1 language code */
|
|
221
|
+
readonly language: string;
|
|
222
|
+
/** Which command this pattern matches */
|
|
223
|
+
readonly command: ActionType;
|
|
224
|
+
/** Priority for disambiguation (higher = checked first) */
|
|
225
|
+
readonly priority: number;
|
|
226
|
+
/** The pattern template with role placeholders */
|
|
227
|
+
readonly template: PatternTemplate;
|
|
228
|
+
/** Rules for extracting semantic roles from matched tokens */
|
|
229
|
+
readonly extraction: ExtractionRules;
|
|
230
|
+
/** Optional constraints on when this pattern applies */
|
|
231
|
+
readonly constraints?: PatternConstraints;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* A pattern template defines the expected token sequence.
|
|
235
|
+
*
|
|
236
|
+
* Template syntax:
|
|
237
|
+
* - Literal tokens: "toggle", "を", "على"
|
|
238
|
+
* - Role placeholders: {patient}, {target}, {destination}
|
|
239
|
+
* - Optional groups: [on {target}]
|
|
240
|
+
* - Alternatives in extraction (not in template string)
|
|
241
|
+
*
|
|
242
|
+
* Example templates:
|
|
243
|
+
* - English: "toggle {patient} [on {target}]"
|
|
244
|
+
* - Japanese: "{target} の {patient} を 切り替え"
|
|
245
|
+
* - Arabic: "بدّل {patient} [على {target}]"
|
|
246
|
+
*/
|
|
247
|
+
interface PatternTemplate {
|
|
248
|
+
/** Human-readable template string */
|
|
249
|
+
readonly format: string;
|
|
250
|
+
/** Parsed token sequence for matching */
|
|
251
|
+
readonly tokens: PatternToken[];
|
|
252
|
+
}
|
|
253
|
+
type PatternToken = LiteralPatternToken | RolePatternToken | GroupPatternToken;
|
|
254
|
+
interface LiteralPatternToken {
|
|
255
|
+
readonly type: 'literal';
|
|
256
|
+
readonly value: string;
|
|
257
|
+
/** Alternative spellings/forms that also match */
|
|
258
|
+
readonly alternatives?: string[];
|
|
259
|
+
}
|
|
260
|
+
interface RolePatternToken {
|
|
261
|
+
readonly type: 'role';
|
|
262
|
+
readonly role: SemanticRole;
|
|
263
|
+
readonly optional?: boolean;
|
|
264
|
+
/** Expected value types (for validation) */
|
|
265
|
+
readonly expectedTypes?: Array<SemanticValue['type']>;
|
|
266
|
+
}
|
|
267
|
+
interface GroupPatternToken {
|
|
268
|
+
readonly type: 'group';
|
|
269
|
+
readonly tokens: PatternToken[];
|
|
270
|
+
readonly optional?: boolean;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Rules for extracting semantic values from matched tokens.
|
|
274
|
+
*/
|
|
275
|
+
interface ExtractionRules {
|
|
276
|
+
readonly [role: string]: ExtractionRule;
|
|
277
|
+
}
|
|
278
|
+
interface ExtractionRule {
|
|
279
|
+
/** Position-based extraction (0-indexed from pattern start) */
|
|
280
|
+
readonly position?: number;
|
|
281
|
+
/** Marker-based extraction (find value after this marker) */
|
|
282
|
+
readonly marker?: string;
|
|
283
|
+
/** Alternative markers that also work */
|
|
284
|
+
readonly markerAlternatives?: string[];
|
|
285
|
+
/** Transform the extracted value */
|
|
286
|
+
readonly transform?: (raw: string) => SemanticValue;
|
|
287
|
+
/** Default value if not found (for optional roles) */
|
|
288
|
+
readonly default?: SemanticValue;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Additional constraints on pattern applicability.
|
|
292
|
+
*/
|
|
293
|
+
interface PatternConstraints {
|
|
294
|
+
/** Required roles that must be present */
|
|
295
|
+
readonly requiredRoles?: SemanticRole[];
|
|
296
|
+
/** Roles that must NOT be present */
|
|
297
|
+
readonly forbiddenRoles?: SemanticRole[];
|
|
298
|
+
/** Valid selector types for the patient role */
|
|
299
|
+
readonly validPatientTypes?: Array<SelectorValue['selectorKind']>;
|
|
300
|
+
/** Pattern IDs this conflicts with */
|
|
301
|
+
readonly conflictsWith?: string[];
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* A token from language-specific tokenization.
|
|
305
|
+
*/
|
|
306
|
+
interface LanguageToken {
|
|
307
|
+
readonly value: string;
|
|
308
|
+
readonly kind: TokenKind;
|
|
309
|
+
readonly position: SourcePosition;
|
|
310
|
+
/** Normalized form from explicit keyword map (e.g., 切り替え → toggle) */
|
|
311
|
+
readonly normalized?: string;
|
|
312
|
+
/** Morphologically normalized stem (e.g., 切り替えた → 切り替え) */
|
|
313
|
+
readonly stem?: string;
|
|
314
|
+
/** Confidence in the morphological stem (0.0-1.0) */
|
|
315
|
+
readonly stemConfidence?: number;
|
|
316
|
+
/** Additional metadata for specific token types (e.g., event modifier data) */
|
|
317
|
+
readonly metadata?: Record<string, unknown>;
|
|
318
|
+
}
|
|
319
|
+
type TokenKind = 'keyword' | 'selector' | 'literal' | 'particle' | 'conjunction' | 'event-modifier' | 'identifier' | 'operator' | 'punctuation' | 'url';
|
|
320
|
+
/**
|
|
321
|
+
* A stream of tokens with navigation capabilities.
|
|
322
|
+
*/
|
|
323
|
+
interface TokenStream {
|
|
324
|
+
readonly tokens: readonly LanguageToken[];
|
|
325
|
+
readonly language: string;
|
|
326
|
+
/** Look at token at current position + offset without consuming */
|
|
327
|
+
peek(offset?: number): LanguageToken | null;
|
|
328
|
+
/** Consume and return current token, advance position */
|
|
329
|
+
advance(): LanguageToken;
|
|
330
|
+
/** Check if we've consumed all tokens */
|
|
331
|
+
isAtEnd(): boolean;
|
|
332
|
+
/** Save current position for backtracking */
|
|
333
|
+
mark(): StreamMark;
|
|
334
|
+
/** Restore to a saved position */
|
|
335
|
+
reset(mark: StreamMark): void;
|
|
336
|
+
/** Get current position */
|
|
337
|
+
position(): number;
|
|
338
|
+
}
|
|
339
|
+
interface StreamMark {
|
|
340
|
+
readonly position: number;
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Result of successfully matching a pattern.
|
|
344
|
+
*/
|
|
345
|
+
interface PatternMatchResult {
|
|
346
|
+
readonly pattern: LanguagePattern;
|
|
347
|
+
readonly captured: ReadonlyMap<SemanticRole, SemanticValue>;
|
|
348
|
+
readonly consumedTokens: number;
|
|
349
|
+
readonly confidence: number;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Error when pattern matching fails.
|
|
353
|
+
*/
|
|
354
|
+
interface PatternMatchError {
|
|
355
|
+
readonly message: string;
|
|
356
|
+
readonly position: SourcePosition;
|
|
357
|
+
readonly expectedPatterns?: string[];
|
|
358
|
+
readonly partialMatch?: Partial<PatternMatchResult>;
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Language-specific tokenizer interface.
|
|
362
|
+
* Each language implements its own tokenizer to handle:
|
|
363
|
+
* - Word boundaries (spaces for English, particles for Japanese)
|
|
364
|
+
* - Character sets (ASCII, CJK, Arabic, etc.)
|
|
365
|
+
* - Special markers (particles, prefixes, suffixes)
|
|
366
|
+
*/
|
|
367
|
+
interface LanguageTokenizer {
|
|
368
|
+
readonly language: string;
|
|
369
|
+
readonly direction: 'ltr' | 'rtl';
|
|
370
|
+
/** Convert input string to token stream */
|
|
371
|
+
tokenize(input: string): TokenStream;
|
|
372
|
+
/** Classify a single token */
|
|
373
|
+
classifyToken(token: string): TokenKind;
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Main parser interface - converts natural language to semantic nodes.
|
|
377
|
+
*/
|
|
378
|
+
interface SemanticParser {
|
|
379
|
+
/** Parse input in specified language to semantic node */
|
|
380
|
+
parse(input: string, language: string): SemanticNode;
|
|
381
|
+
/** Check if input can be parsed in the specified language */
|
|
382
|
+
canParse(input: string, language: string): boolean;
|
|
383
|
+
/** Get all supported languages */
|
|
384
|
+
supportedLanguages(): string[];
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Renderer interface - converts semantic nodes to natural language.
|
|
388
|
+
*/
|
|
389
|
+
interface SemanticRenderer {
|
|
390
|
+
/** Render semantic node in specified language */
|
|
391
|
+
render(node: SemanticNode, language: string): string;
|
|
392
|
+
/** Render semantic node in explicit mode */
|
|
393
|
+
renderExplicit(node: SemanticNode): string;
|
|
394
|
+
/** Get all supported languages */
|
|
395
|
+
supportedLanguages(): string[];
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Create a selector semantic value from a CSS selector string.
|
|
399
|
+
*/
|
|
400
|
+
declare function createSelector(value: string): SelectorValue;
|
|
401
|
+
/**
|
|
402
|
+
* Create a literal semantic value.
|
|
403
|
+
*/
|
|
404
|
+
declare function createLiteral(value: string | number | boolean, dataType?: LiteralValue['dataType']): LiteralValue;
|
|
405
|
+
/**
|
|
406
|
+
* Create a reference semantic value.
|
|
407
|
+
*/
|
|
408
|
+
declare function createReference(value: ReferenceValue['value']): ReferenceValue;
|
|
409
|
+
/**
|
|
410
|
+
* Create a property path semantic value.
|
|
411
|
+
*/
|
|
412
|
+
declare function createPropertyPath(object: SemanticValue, property: string): PropertyPathValue;
|
|
413
|
+
/**
|
|
414
|
+
* Create a semantic node with the given action and roles.
|
|
415
|
+
*/
|
|
416
|
+
declare function createCommandNode(action: ActionType, roles: Record<string, SemanticValue>, metadata?: SemanticMetadata): CommandSemanticNode;
|
|
417
|
+
/**
|
|
418
|
+
* Create an event handler semantic node.
|
|
419
|
+
*/
|
|
420
|
+
declare function createEventHandler(event: SemanticValue, body: SemanticNode[], modifiers?: EventModifiers, metadata?: SemanticMetadata, parameterNames?: string[]): EventHandlerSemanticNode;
|
|
421
|
+
/**
|
|
422
|
+
* Create a compound semantic node (for chained statements).
|
|
423
|
+
*/
|
|
424
|
+
declare function createCompoundNode(statements: SemanticNode[], chainType?: 'then' | 'and' | 'async', metadata?: SemanticMetadata): CompoundSemanticNode;
|
|
425
|
+
/**
|
|
426
|
+
* Create a conditional semantic node (if/else).
|
|
427
|
+
*/
|
|
428
|
+
declare function createConditionalNode(condition: SemanticValue, thenBranch: SemanticNode[], elseBranch?: SemanticNode[], metadata?: SemanticMetadata): ConditionalSemanticNode;
|
|
429
|
+
/**
|
|
430
|
+
* Create a loop semantic node.
|
|
431
|
+
*/
|
|
432
|
+
declare function createLoopNode(action: 'repeat' | 'for' | 'while', loopVariant: LoopVariant, roles: Record<string, SemanticValue>, body: SemanticNode[], options?: {
|
|
433
|
+
loopVariable?: string;
|
|
434
|
+
indexVariable?: string;
|
|
435
|
+
metadata?: SemanticMetadata;
|
|
436
|
+
}): LoopSemanticNode;
|
|
437
|
+
/**
|
|
438
|
+
* Argument with semantic role attached.
|
|
439
|
+
*/
|
|
440
|
+
type SemanticArgument = SemanticValue & {
|
|
441
|
+
role?: SemanticRole;
|
|
442
|
+
};
|
|
443
|
+
/**
|
|
444
|
+
* Result of semantic parsing (used by command validator).
|
|
445
|
+
*/
|
|
446
|
+
interface SemanticParseResult {
|
|
447
|
+
/** The action/command type */
|
|
448
|
+
readonly action: ActionType;
|
|
449
|
+
/** Confidence score (0-1) */
|
|
450
|
+
readonly confidence: number;
|
|
451
|
+
/** Source language code */
|
|
452
|
+
readonly language: string;
|
|
453
|
+
/** Parsed arguments with roles */
|
|
454
|
+
readonly arguments: SemanticArgument[];
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Morphological Normalizer Types
|
|
459
|
+
*
|
|
460
|
+
* Defines interfaces for language-specific morphological analysis.
|
|
461
|
+
* Normalizers reduce conjugated/inflected forms to canonical stems
|
|
462
|
+
* that can be matched against keyword dictionaries.
|
|
463
|
+
*/
|
|
464
|
+
/**
|
|
465
|
+
* Result of morphological normalization.
|
|
466
|
+
*/
|
|
467
|
+
interface NormalizationResult {
|
|
468
|
+
/** The extracted stem/root form */
|
|
469
|
+
readonly stem: string;
|
|
470
|
+
/** Confidence in the normalization (0.0-1.0) */
|
|
471
|
+
readonly confidence: number;
|
|
472
|
+
/** Optional metadata about the transformation */
|
|
473
|
+
readonly metadata?: NormalizationMetadata;
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Metadata about morphological transformations applied.
|
|
477
|
+
*/
|
|
478
|
+
interface NormalizationMetadata {
|
|
479
|
+
/** Prefixes that were removed */
|
|
480
|
+
readonly removedPrefixes?: readonly string[];
|
|
481
|
+
/** Suffixes that were removed */
|
|
482
|
+
readonly removedSuffixes?: readonly string[];
|
|
483
|
+
/** Type of conjugation detected */
|
|
484
|
+
readonly conjugationType?: ConjugationType;
|
|
485
|
+
/** Original form classification */
|
|
486
|
+
readonly originalForm?: string;
|
|
487
|
+
/** Applied transformation rules (for debugging) */
|
|
488
|
+
readonly appliedRules?: readonly string[];
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Types of verb conjugation/inflection.
|
|
492
|
+
*/
|
|
493
|
+
type ConjugationType = 'present' | 'past' | 'future' | 'progressive' | 'perfect' | 'imperative' | 'subjunctive' | 'conditional' | 'passive' | 'causative' | 'polite' | 'humble' | 'honorific' | 'negative' | 'potential' | 'volitional' | 'conditional-tara' | 'conditional-to' | 'conditional-ba' | 'connective' | 'conditional-myeon' | 'temporal-ttae' | 'causal-nikka' | 'honorific-conditional' | 'honorific-temporal' | 'honorific-causal' | 'honorific-past' | 'honorific-polite' | 'sequential-after' | 'sequential-before' | 'immediate' | 'obligation' | 'reflexive' | 'reflexive-imperative' | 'gerund' | 'participle' | 'conditional-idha' | 'temporal-indama' | 'temporal-hina' | 'temporal-lamma' | 'past-verb' | 'conditional-se' | 'temporal-ince' | 'temporal-dikce' | 'aorist' | 'optative' | 'necessitative' | 'request' | 'casual-request' | 'contracted' | 'contracted-past' | 'compound' | 'te-form' | 'dictionary';
|
|
494
|
+
/**
|
|
495
|
+
* Interface for language-specific morphological normalizers.
|
|
496
|
+
*
|
|
497
|
+
* Normalizers attempt to reduce inflected word forms to their
|
|
498
|
+
* canonical stems. This enables matching conjugated verbs against
|
|
499
|
+
* keyword dictionaries that only contain base forms.
|
|
500
|
+
*
|
|
501
|
+
* Example (Japanese):
|
|
502
|
+
* 切り替えた (past) → { stem: '切り替え', confidence: 0.85 }
|
|
503
|
+
* 切り替えます (polite) → { stem: '切り替え', confidence: 0.85 }
|
|
504
|
+
*
|
|
505
|
+
* Example (Spanish):
|
|
506
|
+
* mostrarse (reflexive infinitive) → { stem: 'mostrar', confidence: 0.85 }
|
|
507
|
+
* alternando (gerund) → { stem: 'alternar', confidence: 0.85 }
|
|
508
|
+
*/
|
|
509
|
+
interface MorphologicalNormalizer {
|
|
510
|
+
/** Language code this normalizer handles */
|
|
511
|
+
readonly language: string;
|
|
512
|
+
/**
|
|
513
|
+
* Normalize a word to its canonical stem form.
|
|
514
|
+
*
|
|
515
|
+
* @param word - The word to normalize
|
|
516
|
+
* @returns Normalization result with stem and confidence
|
|
517
|
+
*/
|
|
518
|
+
normalize(word: string): NormalizationResult;
|
|
519
|
+
/**
|
|
520
|
+
* Check if a word appears to be a verb form that can be normalized.
|
|
521
|
+
* Optional optimization to skip normalization for non-verb tokens.
|
|
522
|
+
*
|
|
523
|
+
* @param word - The word to check
|
|
524
|
+
* @returns true if the word might be a normalizable verb form
|
|
525
|
+
*/
|
|
526
|
+
isNormalizable?(word: string): boolean;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Base Tokenizer
|
|
531
|
+
*
|
|
532
|
+
* Provides the TokenStream implementation and shared tokenization utilities.
|
|
533
|
+
* Language-specific tokenizers extend these base utilities.
|
|
534
|
+
*/
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Configuration for a native language time unit pattern.
|
|
538
|
+
* Used by tryNumberWithTimeUnits() to match language-specific time units.
|
|
539
|
+
*/
|
|
540
|
+
interface TimeUnitMapping {
|
|
541
|
+
/** The pattern to match (e.g., 'segundos', 'ミリ秒') */
|
|
542
|
+
readonly pattern: string;
|
|
543
|
+
/** The standard suffix to use (ms, s, m, h) */
|
|
544
|
+
readonly suffix: string;
|
|
545
|
+
/** Length of the pattern (for optimization) */
|
|
546
|
+
readonly length: number;
|
|
547
|
+
/** Whether to check for word boundary after the pattern */
|
|
548
|
+
readonly checkBoundary?: boolean;
|
|
549
|
+
/** Character that cannot follow the pattern (e.g., 's' for 'm' to avoid 'ms') */
|
|
550
|
+
readonly notFollowedBy?: string;
|
|
551
|
+
/** Whether to do case-insensitive matching */
|
|
552
|
+
readonly caseInsensitive?: boolean;
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Concrete implementation of TokenStream.
|
|
556
|
+
*/
|
|
557
|
+
declare class TokenStreamImpl implements TokenStream {
|
|
558
|
+
readonly tokens: readonly LanguageToken[];
|
|
559
|
+
readonly language: string;
|
|
560
|
+
private pos;
|
|
561
|
+
constructor(tokens: LanguageToken[], language: string);
|
|
562
|
+
peek(offset?: number): LanguageToken | null;
|
|
563
|
+
advance(): LanguageToken;
|
|
564
|
+
isAtEnd(): boolean;
|
|
565
|
+
mark(): StreamMark;
|
|
566
|
+
reset(mark: StreamMark): void;
|
|
567
|
+
position(): number;
|
|
568
|
+
/**
|
|
569
|
+
* Get remaining tokens as an array.
|
|
570
|
+
*/
|
|
571
|
+
remaining(): LanguageToken[];
|
|
572
|
+
/**
|
|
573
|
+
* Consume tokens while predicate is true.
|
|
574
|
+
*/
|
|
575
|
+
takeWhile(predicate: (token: LanguageToken) => boolean): LanguageToken[];
|
|
576
|
+
/**
|
|
577
|
+
* Skip tokens while predicate is true.
|
|
578
|
+
*/
|
|
579
|
+
skipWhile(predicate: (token: LanguageToken) => boolean): void;
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* Keyword entry for tokenizer - maps native word to normalized English form.
|
|
583
|
+
*/
|
|
584
|
+
interface KeywordEntry {
|
|
585
|
+
readonly native: string;
|
|
586
|
+
readonly normalized: string;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Profile interface for keyword derivation.
|
|
590
|
+
* Matches the structure of LanguageProfile but only includes fields needed for tokenization.
|
|
591
|
+
*/
|
|
592
|
+
interface TokenizerProfile {
|
|
593
|
+
readonly keywords?: Record<string, {
|
|
594
|
+
primary: string;
|
|
595
|
+
alternatives?: string[];
|
|
596
|
+
normalized?: string;
|
|
597
|
+
}>;
|
|
598
|
+
readonly references?: Record<string, string>;
|
|
599
|
+
readonly roleMarkers?: Record<string, {
|
|
600
|
+
primary: string;
|
|
601
|
+
alternatives?: string[];
|
|
602
|
+
position?: string;
|
|
603
|
+
}>;
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Abstract base class for language-specific tokenizers.
|
|
607
|
+
* Provides common functionality for CSS selectors, strings, and numbers.
|
|
608
|
+
*/
|
|
609
|
+
declare abstract class BaseTokenizer implements LanguageTokenizer {
|
|
610
|
+
abstract readonly language: string;
|
|
611
|
+
abstract readonly direction: 'ltr' | 'rtl';
|
|
612
|
+
/** Optional morphological normalizer for this language */
|
|
613
|
+
protected normalizer?: MorphologicalNormalizer;
|
|
614
|
+
/** Keywords derived from profile, sorted longest-first for greedy matching */
|
|
615
|
+
protected profileKeywords: KeywordEntry[];
|
|
616
|
+
/** Map for O(1) keyword lookups by lowercase native word */
|
|
617
|
+
protected profileKeywordMap: Map<string, KeywordEntry>;
|
|
618
|
+
abstract tokenize(input: string): TokenStream;
|
|
619
|
+
abstract classifyToken(token: string): TokenKind;
|
|
620
|
+
/**
|
|
621
|
+
* Initialize keyword mappings from a language profile.
|
|
622
|
+
* Builds a list of native→english mappings from:
|
|
623
|
+
* - profile.keywords (primary + alternatives)
|
|
624
|
+
* - profile.references (me, it, you, etc.)
|
|
625
|
+
* - profile.roleMarkers (into, from, with, etc.)
|
|
626
|
+
*
|
|
627
|
+
* Results are sorted longest-first for greedy matching (important for non-space languages).
|
|
628
|
+
* Extras take precedence over profile entries when there are duplicates.
|
|
629
|
+
*
|
|
630
|
+
* @param profile - Language profile containing keyword translations
|
|
631
|
+
* @param extras - Additional keyword entries to include (literals, positional, events)
|
|
632
|
+
*/
|
|
633
|
+
protected initializeKeywordsFromProfile(profile: TokenizerProfile, extras?: KeywordEntry[]): void;
|
|
634
|
+
/**
|
|
635
|
+
* Remove diacritical marks from a word for normalization.
|
|
636
|
+
* Primarily for Arabic (shadda, fatha, kasra, damma, sukun, etc.)
|
|
637
|
+
* but could be extended for other languages.
|
|
638
|
+
*
|
|
639
|
+
* @param word - Word to normalize
|
|
640
|
+
* @returns Word without diacritics
|
|
641
|
+
*/
|
|
642
|
+
protected removeDiacritics(word: string): string;
|
|
643
|
+
/**
|
|
644
|
+
* Try to match a keyword from profile at the current position.
|
|
645
|
+
* Uses longest-first greedy matching (important for non-space languages).
|
|
646
|
+
*
|
|
647
|
+
* @param input - Input string
|
|
648
|
+
* @param pos - Current position
|
|
649
|
+
* @returns Token if matched, null otherwise
|
|
650
|
+
*/
|
|
651
|
+
protected tryProfileKeyword(input: string, pos: number): LanguageToken | null;
|
|
652
|
+
/**
|
|
653
|
+
* Check if the remaining input starts with any known keyword.
|
|
654
|
+
* Useful for non-space languages to detect word boundaries.
|
|
655
|
+
*
|
|
656
|
+
* @param input - Input string
|
|
657
|
+
* @param pos - Current position
|
|
658
|
+
* @returns true if a keyword starts at this position
|
|
659
|
+
*/
|
|
660
|
+
protected isKeywordStart(input: string, pos: number): boolean;
|
|
661
|
+
/**
|
|
662
|
+
* Look up a keyword by native word (case-insensitive).
|
|
663
|
+
* O(1) lookup using the keyword map.
|
|
664
|
+
*
|
|
665
|
+
* @param native - Native word to look up
|
|
666
|
+
* @returns KeywordEntry if found, undefined otherwise
|
|
667
|
+
*/
|
|
668
|
+
protected lookupKeyword(native: string): KeywordEntry | undefined;
|
|
669
|
+
/**
|
|
670
|
+
* Check if a word is a known keyword (case-insensitive).
|
|
671
|
+
* O(1) lookup using the keyword map.
|
|
672
|
+
*
|
|
673
|
+
* @param native - Native word to check
|
|
674
|
+
* @returns true if the word is a keyword
|
|
675
|
+
*/
|
|
676
|
+
protected isKeyword(native: string): boolean;
|
|
677
|
+
/**
|
|
678
|
+
* Set the morphological normalizer for this tokenizer.
|
|
679
|
+
*/
|
|
680
|
+
setNormalizer(normalizer: MorphologicalNormalizer): void;
|
|
681
|
+
/**
|
|
682
|
+
* Try to normalize a word using the morphological normalizer.
|
|
683
|
+
* Returns null if no normalizer is set or normalization fails.
|
|
684
|
+
*
|
|
685
|
+
* Note: We don't check isNormalizable() here because the individual tokenizers
|
|
686
|
+
* historically called normalize() directly without that check. The normalize()
|
|
687
|
+
* method itself handles returning noChange() for words that can't be normalized.
|
|
688
|
+
*/
|
|
689
|
+
protected tryNormalize(word: string): NormalizationResult | null;
|
|
690
|
+
/**
|
|
691
|
+
* Try morphological normalization and keyword lookup.
|
|
692
|
+
*
|
|
693
|
+
* If the word can be normalized to a stem that matches a known keyword,
|
|
694
|
+
* returns a keyword token with morphological metadata (stem, stemConfidence).
|
|
695
|
+
*
|
|
696
|
+
* This is the common pattern for handling conjugated verbs across languages:
|
|
697
|
+
* 1. Normalize the word (e.g., "toggled" → "toggle")
|
|
698
|
+
* 2. Look up the stem in the keyword map
|
|
699
|
+
* 3. Create a token with both the original form and stem metadata
|
|
700
|
+
*
|
|
701
|
+
* @param word - The word to normalize and look up
|
|
702
|
+
* @param startPos - Start position for the token
|
|
703
|
+
* @param endPos - End position for the token
|
|
704
|
+
* @returns Token if stem matches a keyword, null otherwise
|
|
705
|
+
*/
|
|
706
|
+
protected tryMorphKeywordMatch(word: string, startPos: number, endPos: number): LanguageToken | null;
|
|
707
|
+
/**
|
|
708
|
+
* Try to extract a CSS selector at the current position.
|
|
709
|
+
*/
|
|
710
|
+
protected trySelector(input: string, pos: number): LanguageToken | null;
|
|
711
|
+
/**
|
|
712
|
+
* Try to extract an event modifier at the current position.
|
|
713
|
+
* Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
|
|
714
|
+
*/
|
|
715
|
+
protected tryEventModifier(input: string, pos: number): LanguageToken | null;
|
|
716
|
+
/**
|
|
717
|
+
* Try to extract a string literal at the current position.
|
|
718
|
+
*/
|
|
719
|
+
protected tryString(input: string, pos: number): LanguageToken | null;
|
|
720
|
+
/**
|
|
721
|
+
* Try to extract a number at the current position.
|
|
722
|
+
*/
|
|
723
|
+
protected tryNumber(input: string, pos: number): LanguageToken | null;
|
|
724
|
+
/**
|
|
725
|
+
* Configuration for native language time units.
|
|
726
|
+
* Maps patterns to their standard suffix (ms, s, m, h).
|
|
727
|
+
*/
|
|
728
|
+
protected static readonly STANDARD_TIME_UNITS: readonly TimeUnitMapping[];
|
|
729
|
+
/**
|
|
730
|
+
* Try to match a time unit from a list of patterns.
|
|
731
|
+
*
|
|
732
|
+
* @param input - Input string
|
|
733
|
+
* @param pos - Position after the number
|
|
734
|
+
* @param timeUnits - Array of time unit mappings (native pattern → standard suffix)
|
|
735
|
+
* @param skipWhitespace - Whether to skip whitespace before time unit (default: false)
|
|
736
|
+
* @returns Object with matched suffix and new position, or null if no match
|
|
737
|
+
*/
|
|
738
|
+
protected tryMatchTimeUnit(input: string, pos: number, timeUnits: readonly TimeUnitMapping[], skipWhitespace?: boolean): {
|
|
739
|
+
suffix: string;
|
|
740
|
+
endPos: number;
|
|
741
|
+
} | null;
|
|
742
|
+
/**
|
|
743
|
+
* Parse a base number (sign, integer, decimal) without time units.
|
|
744
|
+
* Returns the number string and end position.
|
|
745
|
+
*
|
|
746
|
+
* @param input - Input string
|
|
747
|
+
* @param startPos - Start position
|
|
748
|
+
* @param allowSign - Whether to allow +/- sign (default: true)
|
|
749
|
+
* @returns Object with number string and end position, or null
|
|
750
|
+
*/
|
|
751
|
+
protected parseBaseNumber(input: string, startPos: number, allowSign?: boolean): {
|
|
752
|
+
number: string;
|
|
753
|
+
endPos: number;
|
|
754
|
+
} | null;
|
|
755
|
+
/**
|
|
756
|
+
* Try to extract a number with native language time units.
|
|
757
|
+
*
|
|
758
|
+
* This is a template method that handles the common pattern:
|
|
759
|
+
* 1. Parse the base number (sign, integer, decimal)
|
|
760
|
+
* 2. Try to match native language time units
|
|
761
|
+
* 3. Fall back to standard time units (ms, s, m, h)
|
|
762
|
+
*
|
|
763
|
+
* @param input - Input string
|
|
764
|
+
* @param pos - Start position
|
|
765
|
+
* @param nativeTimeUnits - Language-specific time unit mappings
|
|
766
|
+
* @param options - Configuration options
|
|
767
|
+
* @returns Token if number found, null otherwise
|
|
768
|
+
*/
|
|
769
|
+
protected tryNumberWithTimeUnits(input: string, pos: number, nativeTimeUnits: readonly TimeUnitMapping[], options?: {
|
|
770
|
+
allowSign?: boolean;
|
|
771
|
+
skipWhitespace?: boolean;
|
|
772
|
+
}): LanguageToken | null;
|
|
773
|
+
/**
|
|
774
|
+
* Try to extract a URL at the current position.
|
|
775
|
+
* Handles /path, ./path, ../path, //domain.com, http://, https://
|
|
776
|
+
*/
|
|
777
|
+
protected tryUrl(input: string, pos: number): LanguageToken | null;
|
|
778
|
+
/**
|
|
779
|
+
* Try to extract a variable reference (:varname) at the current position.
|
|
780
|
+
* In hyperscript, :x refers to a local variable named x.
|
|
781
|
+
*/
|
|
782
|
+
protected tryVariableRef(input: string, pos: number): LanguageToken | null;
|
|
783
|
+
/**
|
|
784
|
+
* Try to extract an operator or punctuation token at the current position.
|
|
785
|
+
* Handles two-character operators (==, !=, etc.) and single-character operators.
|
|
786
|
+
*/
|
|
787
|
+
protected tryOperator(input: string, pos: number): LanguageToken | null;
|
|
788
|
+
/**
|
|
789
|
+
* Try to match a multi-character particle from a list.
|
|
790
|
+
*
|
|
791
|
+
* Used by languages like Japanese, Korean, and Chinese that have
|
|
792
|
+
* multi-character particles (e.g., Japanese から, まで, より).
|
|
793
|
+
*
|
|
794
|
+
* @param input - Input string
|
|
795
|
+
* @param pos - Current position
|
|
796
|
+
* @param particles - Array of multi-character particles to match
|
|
797
|
+
* @returns Token if matched, null otherwise
|
|
798
|
+
*/
|
|
799
|
+
protected tryMultiCharParticle(input: string, pos: number, particles: readonly string[]): LanguageToken | null;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
/**
|
|
803
|
+
* Arabic Tokenizer
|
|
804
|
+
*
|
|
805
|
+
* Tokenizes Arabic hyperscript input.
|
|
806
|
+
* Arabic is challenging because:
|
|
807
|
+
* - Right-to-left (RTL) text direction
|
|
808
|
+
* - Prefix prepositions that attach to words (بـ, لـ, كـ)
|
|
809
|
+
* - Root-pattern morphology
|
|
810
|
+
* - CSS selectors are LTR islands within RTL text
|
|
811
|
+
*/
|
|
812
|
+
|
|
813
|
+
declare class ArabicTokenizer extends BaseTokenizer {
|
|
814
|
+
readonly language = "ar";
|
|
815
|
+
readonly direction: "rtl";
|
|
816
|
+
constructor();
|
|
817
|
+
tokenize(input: string): TokenStream;
|
|
818
|
+
classifyToken(token: string): TokenKind;
|
|
819
|
+
/**
|
|
820
|
+
* Try to match an Arabic preposition.
|
|
821
|
+
* Attaches prepositionValue metadata for disambiguation in pattern matching.
|
|
822
|
+
*/
|
|
823
|
+
private tryPreposition;
|
|
824
|
+
/**
|
|
825
|
+
* Try to extract a proclitic (conjunction or preposition) that's attached to the following word.
|
|
826
|
+
*
|
|
827
|
+
* Arabic proclitics attach directly to words without space:
|
|
828
|
+
* - والنقر → و + النقر (and + the-click)
|
|
829
|
+
* - فالتبديل → ف + التبديل (then + the-toggle)
|
|
830
|
+
* - بالنقر → ب + النقر (with + the-click)
|
|
831
|
+
* - ولالنقر → و + ل + النقر (and + to + the-click)
|
|
832
|
+
*
|
|
833
|
+
* This enables:
|
|
834
|
+
* - Polysyndetic coordination: A وB وC
|
|
835
|
+
* - Attached prepositions: بالنقر (with-the-click)
|
|
836
|
+
* - Multi-proclitic sequences: ولالنقر (and-to-the-click)
|
|
837
|
+
*
|
|
838
|
+
* Returns null if:
|
|
839
|
+
* - Not a proclitic character/sequence
|
|
840
|
+
* - Proclitic is standalone (followed by space)
|
|
841
|
+
* - Remaining word is too short (< 2 chars, to avoid false positives)
|
|
842
|
+
* - Full word is a recognized keyword (e.g., بدل should NOT be split to ب + دل)
|
|
843
|
+
*
|
|
844
|
+
* @see NATIVE_REVIEW_NEEDED.md for implementation rationale
|
|
845
|
+
*/
|
|
846
|
+
private tryProclitic;
|
|
847
|
+
/**
|
|
848
|
+
* Extract an Arabic word.
|
|
849
|
+
* Uses morphological normalization to handle prefix/suffix variations.
|
|
850
|
+
* Attaches metadata for temporal markers (formality, confidence).
|
|
851
|
+
*/
|
|
852
|
+
private extractArabicWord;
|
|
853
|
+
/**
|
|
854
|
+
* Extract an ASCII word.
|
|
855
|
+
*/
|
|
856
|
+
private extractAsciiWord;
|
|
857
|
+
/**
|
|
858
|
+
* Extract a number, including Arabic time unit suffixes.
|
|
859
|
+
* Arabic allows space between number and unit.
|
|
860
|
+
*/
|
|
861
|
+
private extractArabicNumber;
|
|
862
|
+
}
|
|
863
|
+
/**
|
|
864
|
+
* Singleton instance.
|
|
865
|
+
*/
|
|
866
|
+
declare const arabicTokenizer: ArabicTokenizer;
|
|
867
|
+
|
|
868
|
+
/**
|
|
869
|
+
* Language Profile Types
|
|
870
|
+
*
|
|
871
|
+
* Type definitions for language profiles, separated for tree-shaking.
|
|
872
|
+
*/
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Word order in a language (for declarative statements).
|
|
876
|
+
*/
|
|
877
|
+
type WordOrder = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OSV' | 'OVS';
|
|
878
|
+
/**
|
|
879
|
+
* How grammatical relationships are marked.
|
|
880
|
+
*/
|
|
881
|
+
type MarkingStrategy$1 = 'preposition' | 'postposition' | 'particle' | 'case-suffix';
|
|
882
|
+
/**
|
|
883
|
+
* A grammatical marker (preposition, particle, etc.) for a semantic role.
|
|
884
|
+
*/
|
|
885
|
+
interface RoleMarker {
|
|
886
|
+
/** Primary marker for this role */
|
|
887
|
+
readonly primary: string;
|
|
888
|
+
/** Alternative markers that also work */
|
|
889
|
+
readonly alternatives?: string[];
|
|
890
|
+
/** Position relative to the role value */
|
|
891
|
+
readonly position: 'before' | 'after';
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Verb form configuration for a language.
|
|
895
|
+
*/
|
|
896
|
+
interface VerbConfig$1 {
|
|
897
|
+
/** Position of verb in the sentence */
|
|
898
|
+
readonly position: 'start' | 'end' | 'second';
|
|
899
|
+
/** Common verb suffixes/conjugations to recognize */
|
|
900
|
+
readonly suffixes?: string[];
|
|
901
|
+
/** Whether the language commonly drops subjects */
|
|
902
|
+
readonly subjectDrop?: boolean;
|
|
903
|
+
}
|
|
904
|
+
/**
|
|
905
|
+
* Configuration for possessive expression construction.
|
|
906
|
+
* Defines how "X's property" is expressed in a language.
|
|
907
|
+
*/
|
|
908
|
+
interface PossessiveConfig {
|
|
909
|
+
/** Possessive marker (e.g., "'s" in English, "の" in Japanese) */
|
|
910
|
+
readonly marker: string;
|
|
911
|
+
/** Position of marker: 'after-object' (X's Y), 'between' (X の Y), 'before-property' */
|
|
912
|
+
readonly markerPosition: 'after-object' | 'between' | 'before-property';
|
|
913
|
+
/** Special possessive forms (e.g., 'me' → 'my' in English) */
|
|
914
|
+
readonly specialForms?: Record<string, string>;
|
|
915
|
+
/** Whether to use possessive adjectives instead of marker (e.g., Spanish mi/tu/su) */
|
|
916
|
+
readonly usePossessiveAdjectives?: boolean;
|
|
917
|
+
/**
|
|
918
|
+
* Possessive keywords mapped to their corresponding reference.
|
|
919
|
+
* Used by pattern-matcher to recognize possessive expressions.
|
|
920
|
+
* Example: { my: 'me', your: 'you', its: 'it' }
|
|
921
|
+
*/
|
|
922
|
+
readonly keywords?: Record<string, string>;
|
|
923
|
+
}
|
|
924
|
+
/**
|
|
925
|
+
* Complete language profile for pattern generation.
|
|
926
|
+
*/
|
|
927
|
+
interface LanguageProfile {
|
|
928
|
+
/** ISO 639-1 language code */
|
|
929
|
+
readonly code: string;
|
|
930
|
+
/** Human-readable language name */
|
|
931
|
+
readonly name: string;
|
|
932
|
+
/** Native name */
|
|
933
|
+
readonly nativeName: string;
|
|
934
|
+
/** Text direction */
|
|
935
|
+
readonly direction: 'ltr' | 'rtl';
|
|
936
|
+
/** Primary word order */
|
|
937
|
+
readonly wordOrder: WordOrder;
|
|
938
|
+
/** How this language marks grammatical roles */
|
|
939
|
+
readonly markingStrategy: MarkingStrategy$1;
|
|
940
|
+
/** Markers for each semantic role */
|
|
941
|
+
readonly roleMarkers: Partial<Record<SemanticRole, RoleMarker>>;
|
|
942
|
+
/** Verb configuration */
|
|
943
|
+
readonly verb: VerbConfig$1;
|
|
944
|
+
/** Command keyword translations */
|
|
945
|
+
readonly keywords: Record<string, KeywordTranslation$1>;
|
|
946
|
+
/** Whether the language uses spaces between words */
|
|
947
|
+
readonly usesSpaces: boolean;
|
|
948
|
+
/** Special tokenization notes */
|
|
949
|
+
readonly tokenization?: TokenizationConfig$1;
|
|
950
|
+
/** Reference translations (me, it, you, etc.) */
|
|
951
|
+
readonly references?: Record<string, string>;
|
|
952
|
+
/** Possessive expression configuration */
|
|
953
|
+
readonly possessive?: PossessiveConfig;
|
|
954
|
+
/** Event handler pattern configuration (for simple SVO languages) */
|
|
955
|
+
readonly eventHandler?: EventHandlerConfig;
|
|
956
|
+
/**
|
|
957
|
+
* Default verb form for command keywords. Defaults to 'infinitive'.
|
|
958
|
+
*
|
|
959
|
+
* Based on software UI localization research:
|
|
960
|
+
* - 'infinitive': Spanish, French, German, Portuguese, Russian (industry standard)
|
|
961
|
+
* - 'imperative': Polish
|
|
962
|
+
* - 'base': English, Japanese, Korean (no distinction or same form)
|
|
963
|
+
*
|
|
964
|
+
* Individual keywords can override this via KeywordTranslation.form
|
|
965
|
+
*/
|
|
966
|
+
readonly defaultVerbForm?: VerbForm;
|
|
967
|
+
}
|
|
968
|
+
/**
|
|
969
|
+
* Configuration for event handler pattern generation.
|
|
970
|
+
* Used by simple SVO languages that don't need hand-crafted patterns.
|
|
971
|
+
*/
|
|
972
|
+
interface EventHandlerConfig {
|
|
973
|
+
/** Primary event keyword (e.g., 'on', 'bei', 'sur') */
|
|
974
|
+
readonly keyword: KeywordTranslation$1;
|
|
975
|
+
/** Source filter marker (e.g., 'from', 'von', 'de') */
|
|
976
|
+
readonly sourceMarker: RoleMarker;
|
|
977
|
+
/** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
|
|
978
|
+
readonly conditionalKeyword?: KeywordTranslation$1;
|
|
979
|
+
}
|
|
980
|
+
/**
|
|
981
|
+
* Verb form used for command keywords.
|
|
982
|
+
*
|
|
983
|
+
* Based on software localization research:
|
|
984
|
+
* - 'infinitive': Standard for most languages (Spanish, French, German, Russian)
|
|
985
|
+
* Example: "Guardar", "Enregistrer", "Speichern"
|
|
986
|
+
* - 'imperative': Used by some languages (Polish)
|
|
987
|
+
* Example: "Zapisz", "Otwórz"
|
|
988
|
+
* - 'base': For languages where forms are identical (English, Japanese, Korean)
|
|
989
|
+
* or where the distinction doesn't apply
|
|
990
|
+
*/
|
|
991
|
+
type VerbForm = 'infinitive' | 'imperative' | 'base';
|
|
992
|
+
/**
|
|
993
|
+
* Translation of a command keyword.
|
|
994
|
+
*/
|
|
995
|
+
interface KeywordTranslation$1 {
|
|
996
|
+
/** Primary translation (used for output/rendering) */
|
|
997
|
+
readonly primary: string;
|
|
998
|
+
/** Alternative forms for parsing (conjugations, synonyms, informal variants) */
|
|
999
|
+
readonly alternatives?: string[];
|
|
1000
|
+
/** Normalized English form for internal matching */
|
|
1001
|
+
readonly normalized?: string;
|
|
1002
|
+
/**
|
|
1003
|
+
* The grammatical form of 'primary'. Defaults to 'infinitive'.
|
|
1004
|
+
* This documents the form used and enables future form-switching features.
|
|
1005
|
+
* - 'infinitive': Dictionary form (alternar, basculer) - industry standard
|
|
1006
|
+
* - 'imperative': Command form (alterna, bascule) - for Polish, etc.
|
|
1007
|
+
* - 'base': Same form for both (toggle, トグル) - English, Japanese, Korean
|
|
1008
|
+
*/
|
|
1009
|
+
readonly form?: VerbForm;
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Special tokenization configuration.
|
|
1013
|
+
*/
|
|
1014
|
+
interface TokenizationConfig$1 {
|
|
1015
|
+
/** Particles to recognize (for particle languages) */
|
|
1016
|
+
readonly particles?: string[];
|
|
1017
|
+
/** Prefixes to recognize (for prefixing languages) */
|
|
1018
|
+
readonly prefixes?: string[];
|
|
1019
|
+
/** Word boundary detection strategy */
|
|
1020
|
+
readonly boundaryStrategy?: 'space' | 'particle' | 'character';
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Arabic Language Profile
|
|
1025
|
+
*
|
|
1026
|
+
* VSO word order, prepositions, RTL (right-to-left), space-separated.
|
|
1027
|
+
* Features root-based morphology and rich verb conjugation.
|
|
1028
|
+
*/
|
|
1029
|
+
|
|
1030
|
+
declare const arabicProfile: LanguageProfile;
|
|
1031
|
+
|
|
1032
|
+
/**
|
|
1033
|
+
* German Language Profile
|
|
1034
|
+
*
|
|
1035
|
+
* SVO word order (V2 in main clauses), prepositions, space-separated.
|
|
1036
|
+
* Features case system, compound words, and verb-second word order in main clauses.
|
|
1037
|
+
*/
|
|
1038
|
+
|
|
1039
|
+
declare const germanProfile: LanguageProfile;
|
|
1040
|
+
|
|
1041
|
+
/**
|
|
1042
|
+
* English Tokenizer
|
|
1043
|
+
*
|
|
1044
|
+
* Tokenizes English hyperscript input.
|
|
1045
|
+
* English uses space-separated words with prepositions.
|
|
1046
|
+
*/
|
|
1047
|
+
|
|
1048
|
+
declare class EnglishTokenizer extends BaseTokenizer {
|
|
1049
|
+
readonly language = "en";
|
|
1050
|
+
readonly direction: "ltr";
|
|
1051
|
+
constructor();
|
|
1052
|
+
tokenize(input: string): TokenStream;
|
|
1053
|
+
classifyToken(token: string): TokenKind;
|
|
1054
|
+
/**
|
|
1055
|
+
* Extract a word (identifier or keyword) from the input.
|
|
1056
|
+
* Handles namespaced event names like "draggable:start".
|
|
1057
|
+
*/
|
|
1058
|
+
private extractWord;
|
|
1059
|
+
/**
|
|
1060
|
+
* Try to convert an identifier followed by "class" to a class selector.
|
|
1061
|
+
* E.g., "active class" → ".active"
|
|
1062
|
+
*
|
|
1063
|
+
* This enables natural English syntax like:
|
|
1064
|
+
* - "toggle the active class" → "toggle .active"
|
|
1065
|
+
* - "add the visible class" → "add .visible"
|
|
1066
|
+
*/
|
|
1067
|
+
private tryConvertToClassSelector;
|
|
1068
|
+
}
|
|
1069
|
+
/**
|
|
1070
|
+
* Singleton instance.
|
|
1071
|
+
*/
|
|
1072
|
+
declare const englishTokenizer: EnglishTokenizer;
|
|
1073
|
+
|
|
1074
|
+
/**
|
|
1075
|
+
* English Language Profile
|
|
1076
|
+
*
|
|
1077
|
+
* SVO word order, prepositions, space-separated.
|
|
1078
|
+
*/
|
|
1079
|
+
|
|
1080
|
+
declare const englishProfile: LanguageProfile;
|
|
1081
|
+
|
|
1082
|
+
/**
|
|
1083
|
+
* Spanish Tokenizer
|
|
1084
|
+
*
|
|
1085
|
+
* Tokenizes Spanish hyperscript input.
|
|
1086
|
+
* Spanish is relatively straightforward as it:
|
|
1087
|
+
* - Uses space-separated words like English
|
|
1088
|
+
* - Has similar preposition structure (SVO)
|
|
1089
|
+
* - Uses accent marks that need proper handling
|
|
1090
|
+
*/
|
|
1091
|
+
|
|
1092
|
+
declare class SpanishTokenizer extends BaseTokenizer {
|
|
1093
|
+
readonly language = "es";
|
|
1094
|
+
readonly direction: "ltr";
|
|
1095
|
+
constructor();
|
|
1096
|
+
tokenize(input: string): TokenStream;
|
|
1097
|
+
classifyToken(token: string): TokenKind;
|
|
1098
|
+
/**
|
|
1099
|
+
* Try to match multi-word phrases that function as single units.
|
|
1100
|
+
* Multi-word phrases are included in profileKeywords and sorted longest-first,
|
|
1101
|
+
* so they'll be matched before their constituent words.
|
|
1102
|
+
*/
|
|
1103
|
+
private tryMultiWordPhrase;
|
|
1104
|
+
/**
|
|
1105
|
+
* Extract a Spanish word.
|
|
1106
|
+
*
|
|
1107
|
+
* Uses morphological normalization to handle:
|
|
1108
|
+
* - Reflexive verbs (mostrarse → mostrar)
|
|
1109
|
+
* - Verb conjugations (alternando → alternar)
|
|
1110
|
+
*/
|
|
1111
|
+
private extractSpanishWord;
|
|
1112
|
+
/**
|
|
1113
|
+
* Extract a number, including Spanish time unit suffixes.
|
|
1114
|
+
*/
|
|
1115
|
+
private extractSpanishNumber;
|
|
1116
|
+
}
|
|
1117
|
+
/**
|
|
1118
|
+
* Singleton instance.
|
|
1119
|
+
*/
|
|
1120
|
+
declare const spanishTokenizer: SpanishTokenizer;
|
|
1121
|
+
|
|
1122
|
+
/**
|
|
1123
|
+
* Spanish Language Profile
|
|
1124
|
+
*
|
|
1125
|
+
* SVO word order, prepositions, space-separated.
|
|
1126
|
+
* Features rich verb conjugation with pro-drop (subject omission).
|
|
1127
|
+
*/
|
|
1128
|
+
|
|
1129
|
+
declare const spanishProfile: LanguageProfile;
|
|
1130
|
+
|
|
1131
|
+
/**
|
|
1132
|
+
* French Language Profile
|
|
1133
|
+
*
|
|
1134
|
+
* SVO word order, prepositions, space-separated.
|
|
1135
|
+
* Features rich verb conjugation and gendered articles.
|
|
1136
|
+
*/
|
|
1137
|
+
|
|
1138
|
+
declare const frenchProfile: LanguageProfile;
|
|
1139
|
+
|
|
1140
|
+
/**
|
|
1141
|
+
* Indonesian Language Profile
|
|
1142
|
+
*
|
|
1143
|
+
* SVO word order, prepositions, space-separated, agglutinative.
|
|
1144
|
+
* Features affixation for verb derivation (me-, ber-, di-, -kan, -i).
|
|
1145
|
+
*/
|
|
1146
|
+
|
|
1147
|
+
declare const indonesianProfile: LanguageProfile;
|
|
1148
|
+
|
|
1149
|
+
/**
|
|
1150
|
+
* Japanese Tokenizer
|
|
1151
|
+
*
|
|
1152
|
+
* Tokenizes Japanese hyperscript input.
|
|
1153
|
+
* Japanese is challenging because:
|
|
1154
|
+
* - No spaces between words
|
|
1155
|
+
* - Particles (助詞) mark grammatical roles
|
|
1156
|
+
* - Mixed scripts (hiragana, katakana, kanji, romaji)
|
|
1157
|
+
* - CSS selectors are embedded ASCII
|
|
1158
|
+
*/
|
|
1159
|
+
|
|
1160
|
+
declare class JapaneseTokenizer extends BaseTokenizer {
|
|
1161
|
+
readonly language = "ja";
|
|
1162
|
+
readonly direction: "ltr";
|
|
1163
|
+
constructor();
|
|
1164
|
+
tokenize(input: string): TokenStream;
|
|
1165
|
+
classifyToken(token: string): TokenKind;
|
|
1166
|
+
/**
|
|
1167
|
+
* Extract a Japanese word (sequence of kanji/kana).
|
|
1168
|
+
* Stops at particles, ASCII, or whitespace.
|
|
1169
|
+
*
|
|
1170
|
+
* Uses morphological normalization to handle verb conjugations:
|
|
1171
|
+
* 1. First checks if the exact word is in the keyword map
|
|
1172
|
+
* 2. If not found, tries to strip conjugation suffixes and check again
|
|
1173
|
+
*/
|
|
1174
|
+
private extractJapaneseWord;
|
|
1175
|
+
/**
|
|
1176
|
+
* Extract an ASCII word (for mixed Japanese/English content).
|
|
1177
|
+
*/
|
|
1178
|
+
private extractAsciiWord;
|
|
1179
|
+
/**
|
|
1180
|
+
* Extract a number, including Japanese time unit suffixes.
|
|
1181
|
+
* Japanese time units attach directly without whitespace.
|
|
1182
|
+
*/
|
|
1183
|
+
private extractJapaneseNumber;
|
|
1184
|
+
}
|
|
1185
|
+
/**
|
|
1186
|
+
* Singleton instance.
|
|
1187
|
+
*/
|
|
1188
|
+
declare const japaneseTokenizer: JapaneseTokenizer;
|
|
1189
|
+
|
|
1190
|
+
/**
|
|
1191
|
+
* Japanese Language Profile
|
|
1192
|
+
*
|
|
1193
|
+
* SOV word order, particles (を, に, で, etc.), no spaces between words.
|
|
1194
|
+
* Agglutinative language with rich verb conjugation.
|
|
1195
|
+
*/
|
|
1196
|
+
|
|
1197
|
+
declare const japaneseProfile: LanguageProfile;
|
|
1198
|
+
|
|
1199
|
+
/**
|
|
1200
|
+
* Korean Tokenizer
|
|
1201
|
+
*
|
|
1202
|
+
* Tokenizes Korean hyperscript input.
|
|
1203
|
+
* Korean is an agglutinative language with:
|
|
1204
|
+
* - Hangul syllable blocks (가-힣)
|
|
1205
|
+
* - Particles (조사) mark grammatical roles
|
|
1206
|
+
* - 하다 verbs (noun + 하다)
|
|
1207
|
+
* - CSS selectors are embedded ASCII
|
|
1208
|
+
*/
|
|
1209
|
+
|
|
1210
|
+
declare class KoreanTokenizer extends BaseTokenizer {
|
|
1211
|
+
readonly language = "ko";
|
|
1212
|
+
readonly direction: "ltr";
|
|
1213
|
+
constructor();
|
|
1214
|
+
tokenize(input: string): TokenStream;
|
|
1215
|
+
classifyToken(token: string): TokenKind;
|
|
1216
|
+
/**
|
|
1217
|
+
* Extract a Korean word (sequence of Hangul).
|
|
1218
|
+
* Prioritizes known keywords, then uses particle-based word boundaries.
|
|
1219
|
+
*
|
|
1220
|
+
* Uses morphological normalization to handle verb conjugations.
|
|
1221
|
+
*/
|
|
1222
|
+
private extractKoreanWord;
|
|
1223
|
+
/**
|
|
1224
|
+
* Extract an ASCII word (for mixed Korean/English content).
|
|
1225
|
+
*/
|
|
1226
|
+
private extractAsciiWord;
|
|
1227
|
+
/**
|
|
1228
|
+
* Extract a number, including Korean time unit suffixes.
|
|
1229
|
+
* Korean time units attach directly without whitespace.
|
|
1230
|
+
*/
|
|
1231
|
+
private extractKoreanNumber;
|
|
1232
|
+
}
|
|
1233
|
+
/**
|
|
1234
|
+
* Singleton instance.
|
|
1235
|
+
*/
|
|
1236
|
+
declare const koreanTokenizer: KoreanTokenizer;
|
|
1237
|
+
|
|
1238
|
+
/**
|
|
1239
|
+
* Korean Language Profile
|
|
1240
|
+
*
|
|
1241
|
+
* SOV word order, particles (을/를, 에, 에서, etc.), space-separated between words.
|
|
1242
|
+
* Agglutinative language with particles attaching to words.
|
|
1243
|
+
*/
|
|
1244
|
+
|
|
1245
|
+
declare const koreanProfile: LanguageProfile;
|
|
1246
|
+
|
|
1247
|
+
/**
|
|
1248
|
+
* Portuguese Language Profile
|
|
1249
|
+
*
|
|
1250
|
+
* SVO word order, prepositions, space-separated.
|
|
1251
|
+
* Features rich verb conjugation with pro-drop (subject omission).
|
|
1252
|
+
*/
|
|
1253
|
+
|
|
1254
|
+
declare const portugueseProfile: LanguageProfile;
|
|
1255
|
+
|
|
1256
|
+
/**
|
|
1257
|
+
* Quechua Language Profile
|
|
1258
|
+
*
|
|
1259
|
+
* SOV word order, postpositions (suffixes), polysynthetic/agglutinative.
|
|
1260
|
+
* Indigenous language of the Andean region with rich morphology.
|
|
1261
|
+
*/
|
|
1262
|
+
|
|
1263
|
+
declare const quechuaProfile: LanguageProfile;
|
|
1264
|
+
|
|
1265
|
+
/**
|
|
1266
|
+
* Swahili Language Profile
|
|
1267
|
+
*
|
|
1268
|
+
* SVO word order, prepositions, space-separated, agglutinative.
|
|
1269
|
+
* Features noun class system (18 classes) and verb agreement prefixes.
|
|
1270
|
+
*/
|
|
1271
|
+
|
|
1272
|
+
declare const swahiliProfile: LanguageProfile;
|
|
1273
|
+
|
|
1274
|
+
/**
|
|
1275
|
+
* Turkish Tokenizer
|
|
1276
|
+
*
|
|
1277
|
+
* Tokenizes Turkish hyperscript input.
|
|
1278
|
+
* Turkish is challenging because:
|
|
1279
|
+
* - Highly agglutinative (many suffixes attach to words)
|
|
1280
|
+
* - Strict vowel harmony rules
|
|
1281
|
+
* - Postpositions instead of prepositions
|
|
1282
|
+
* - No grammatical gender
|
|
1283
|
+
* - Word order is typically SOV
|
|
1284
|
+
*/
|
|
1285
|
+
|
|
1286
|
+
declare class TurkishTokenizer extends BaseTokenizer {
|
|
1287
|
+
readonly language = "tr";
|
|
1288
|
+
readonly direction: "ltr";
|
|
1289
|
+
constructor();
|
|
1290
|
+
tokenize(input: string): TokenStream;
|
|
1291
|
+
classifyToken(token: string): TokenKind;
|
|
1292
|
+
/**
|
|
1293
|
+
* Extract a Turkish word.
|
|
1294
|
+
* Uses morphological normalization to handle verb conjugations.
|
|
1295
|
+
*/
|
|
1296
|
+
private extractTurkishWord;
|
|
1297
|
+
/**
|
|
1298
|
+
* Extract a number, including Turkish time unit suffixes.
|
|
1299
|
+
*/
|
|
1300
|
+
private extractTurkishNumber;
|
|
1301
|
+
}
|
|
1302
|
+
/**
|
|
1303
|
+
* Singleton instance.
|
|
1304
|
+
*/
|
|
1305
|
+
declare const turkishTokenizer: TurkishTokenizer;
|
|
1306
|
+
|
|
1307
|
+
/**
|
|
1308
|
+
* Turkish Language Profile
|
|
1309
|
+
*
|
|
1310
|
+
* SOV word order, case suffixes (agglutinative), space-separated.
|
|
1311
|
+
* Features vowel harmony and extensive suffixation.
|
|
1312
|
+
*/
|
|
1313
|
+
|
|
1314
|
+
declare const turkishProfile: LanguageProfile;
|
|
1315
|
+
|
|
1316
|
+
/**
|
|
1317
|
+
* Chinese Tokenizer
|
|
1318
|
+
*
|
|
1319
|
+
* Tokenizes Chinese hyperscript input.
|
|
1320
|
+
* Chinese is challenging because:
|
|
1321
|
+
* - No spaces between words (like Japanese)
|
|
1322
|
+
* - Uses CJK characters (shared with Japanese Kanji)
|
|
1323
|
+
* - SVO word order (like English)
|
|
1324
|
+
* - Uses prepositions (把, 在, 从, etc.) for grammatical roles
|
|
1325
|
+
* - No conjugation (unlike Japanese/Korean)
|
|
1326
|
+
* - CSS selectors are embedded ASCII
|
|
1327
|
+
*/
|
|
1328
|
+
|
|
1329
|
+
declare class ChineseTokenizer extends BaseTokenizer {
|
|
1330
|
+
readonly language = "zh";
|
|
1331
|
+
readonly direction: "ltr";
|
|
1332
|
+
constructor();
|
|
1333
|
+
tokenize(input: string): TokenStream;
|
|
1334
|
+
classifyToken(token: string): TokenKind;
|
|
1335
|
+
/**
|
|
1336
|
+
* Extract a Chinese word.
|
|
1337
|
+
* Uses greedy matching to find the longest known keyword.
|
|
1338
|
+
* Chinese doesn't have inflection, so we don't need morphological normalization.
|
|
1339
|
+
* profileKeywords is already sorted longest-first, enabling greedy matching.
|
|
1340
|
+
*/
|
|
1341
|
+
private extractChineseWord;
|
|
1342
|
+
/**
|
|
1343
|
+
* Extract an ASCII word (for mixed Chinese/English content).
|
|
1344
|
+
*/
|
|
1345
|
+
private extractAsciiWord;
|
|
1346
|
+
/**
|
|
1347
|
+
* Try to extract a string literal, including Chinese quotes.
|
|
1348
|
+
* Chinese quotes: \u201C " (open) \u201D " (close) \u2018 ' (open) \u2019 ' (close)
|
|
1349
|
+
*/
|
|
1350
|
+
private tryChineseString;
|
|
1351
|
+
/**
|
|
1352
|
+
* Extract a number, including Chinese time unit suffixes.
|
|
1353
|
+
* Chinese time units attach directly without whitespace.
|
|
1354
|
+
*/
|
|
1355
|
+
private extractChineseNumber;
|
|
1356
|
+
}
|
|
1357
|
+
/**
|
|
1358
|
+
* Singleton instance.
|
|
1359
|
+
*/
|
|
1360
|
+
declare const chineseTokenizer: ChineseTokenizer;
|
|
1361
|
+
|
|
1362
|
+
/**
|
|
1363
|
+
* Chinese (Simplified) Language Profile
|
|
1364
|
+
*
|
|
1365
|
+
* SVO word order, no markers (relies on word order), no spaces between words.
|
|
1366
|
+
* Isolating language with topic-comment structure and optional BA construction.
|
|
1367
|
+
*/
|
|
1368
|
+
|
|
1369
|
+
declare const chineseProfile: LanguageProfile;
|
|
1370
|
+
|
|
1371
|
+
/**
|
|
1372
|
+
* Pattern Registry
|
|
1373
|
+
*
|
|
1374
|
+
* Pattern cache and lookup functions for the semantic parser.
|
|
1375
|
+
*/
|
|
1376
|
+
|
|
1377
|
+
/**
|
|
1378
|
+
* Get all patterns.
|
|
1379
|
+
* @deprecated Use getPatternsForLanguage() for tree-shaking.
|
|
1380
|
+
*/
|
|
1381
|
+
declare function getAllPatterns(): LanguagePattern[];
|
|
1382
|
+
/**
|
|
1383
|
+
* Get all patterns for a specific language.
|
|
1384
|
+
* Uses caching for performance.
|
|
1385
|
+
*/
|
|
1386
|
+
declare function getPatternsForLanguage(language: string): LanguagePattern[];
|
|
1387
|
+
/**
|
|
1388
|
+
* Get patterns for a specific language and command.
|
|
1389
|
+
*/
|
|
1390
|
+
declare function getPatternsForLanguageAndCommand(language: string, command: ActionType): LanguagePattern[];
|
|
1391
|
+
/**
|
|
1392
|
+
* Get all supported languages.
|
|
1393
|
+
*/
|
|
1394
|
+
declare function getSupportedLanguages$3(): string[];
|
|
1395
|
+
/**
|
|
1396
|
+
* Get all supported commands.
|
|
1397
|
+
*/
|
|
1398
|
+
declare function getSupportedCommands(): ActionType[];
|
|
1399
|
+
/**
|
|
1400
|
+
* Find a pattern by ID.
|
|
1401
|
+
*/
|
|
1402
|
+
declare function getPatternById(id: string): LanguagePattern | undefined;
|
|
1403
|
+
interface PatternStats {
|
|
1404
|
+
totalPatterns: number;
|
|
1405
|
+
byLanguage: Record<string, number>;
|
|
1406
|
+
byCommand: Record<string, number>;
|
|
1407
|
+
}
|
|
1408
|
+
/**
|
|
1409
|
+
* Get statistics about registered patterns.
|
|
1410
|
+
*/
|
|
1411
|
+
declare function getPatternStats(): PatternStats;
|
|
1412
|
+
|
|
1413
|
+
/**
|
|
1414
|
+
* Toggle Command Patterns
|
|
1415
|
+
*
|
|
1416
|
+
* Hand-crafted patterns for "toggle" command across languages.
|
|
1417
|
+
*
|
|
1418
|
+
* @generated This file is auto-generated. Do not edit manually.
|
|
1419
|
+
*/
|
|
1420
|
+
|
|
1421
|
+
/**
|
|
1422
|
+
* Get toggle patterns for a specific language.
|
|
1423
|
+
*/
|
|
1424
|
+
declare function getTogglePatternsForLanguage(language: string): LanguagePattern[];
|
|
1425
|
+
|
|
1426
|
+
/**
|
|
1427
|
+
* Put Command Patterns
|
|
1428
|
+
*
|
|
1429
|
+
* Hand-crafted patterns for "put" command across languages.
|
|
1430
|
+
*
|
|
1431
|
+
* @generated This file is auto-generated. Do not edit manually.
|
|
1432
|
+
*/
|
|
1433
|
+
|
|
1434
|
+
/**
|
|
1435
|
+
* Get put patterns for a specific language.
|
|
1436
|
+
*/
|
|
1437
|
+
declare function getPutPatternsForLanguage(language: string): LanguagePattern[];
|
|
1438
|
+
|
|
1439
|
+
/**
|
|
1440
|
+
* EventHandler Command Patterns
|
|
1441
|
+
*
|
|
1442
|
+
* Hand-crafted patterns for "event-handler" command across languages.
|
|
1443
|
+
*
|
|
1444
|
+
* @generated This file is auto-generated. Do not edit manually.
|
|
1445
|
+
*/
|
|
1446
|
+
|
|
1447
|
+
/**
|
|
1448
|
+
* Get event-handler patterns for a specific language.
|
|
1449
|
+
*/
|
|
1450
|
+
declare function getEventHandlerPatternsForLanguage(language: string): LanguagePattern[];
|
|
1451
|
+
|
|
1452
|
+
/**
|
|
1453
|
+
* Shared Event Handler Utilities
|
|
1454
|
+
*
|
|
1455
|
+
* Event name translations and normalization used across all languages.
|
|
1456
|
+
*/
|
|
1457
|
+
/**
|
|
1458
|
+
* Common event names translated across languages.
|
|
1459
|
+
* Used by tokenizers to normalize event names to English.
|
|
1460
|
+
*/
|
|
1461
|
+
declare const eventNameTranslations: Record<string, Record<string, string>>;
|
|
1462
|
+
/**
|
|
1463
|
+
* Normalize an event name to English.
|
|
1464
|
+
*/
|
|
1465
|
+
declare function normalizeEventName(event: string, language: string): string;
|
|
1466
|
+
|
|
1467
|
+
/**
|
|
1468
|
+
* Tokenizer Registry
|
|
1469
|
+
*
|
|
1470
|
+
* Provides a unified interface for tokenization.
|
|
1471
|
+
* Delegates to the central registry for language lookups.
|
|
1472
|
+
*
|
|
1473
|
+
* For tree-shaking, import specific tokenizers directly:
|
|
1474
|
+
* import { englishTokenizer } from './tokenizers/english';
|
|
1475
|
+
*
|
|
1476
|
+
* To register languages, import the language modules:
|
|
1477
|
+
* import '@lokascript/semantic/languages/en';
|
|
1478
|
+
*/
|
|
1479
|
+
|
|
1480
|
+
/**
|
|
1481
|
+
* Get a tokenizer for the specified language.
|
|
1482
|
+
* Returns undefined if language is not registered.
|
|
1483
|
+
*/
|
|
1484
|
+
declare function getTokenizer(language: string): LanguageTokenizer | undefined;
|
|
1485
|
+
/**
|
|
1486
|
+
* Tokenize input in the specified language.
|
|
1487
|
+
* @throws Error if language is not registered
|
|
1488
|
+
*/
|
|
1489
|
+
declare function tokenize(input: string, language: string): TokenStream;
|
|
1490
|
+
/**
|
|
1491
|
+
* Get all supported languages.
|
|
1492
|
+
* Returns only languages that have been registered.
|
|
1493
|
+
*/
|
|
1494
|
+
declare function getSupportedLanguages$2(): string[];
|
|
1495
|
+
/**
|
|
1496
|
+
* Check if a language is supported.
|
|
1497
|
+
*/
|
|
1498
|
+
declare function isLanguageSupported$1(language: string): boolean;
|
|
1499
|
+
/**
|
|
1500
|
+
* Register a custom tokenizer.
|
|
1501
|
+
* Note: For full language support, use registerLanguage() from registry instead.
|
|
1502
|
+
*/
|
|
1503
|
+
declare function registerTokenizer(tokenizer: LanguageTokenizer): void;
|
|
1504
|
+
|
|
1505
|
+
declare class PatternMatcher$1 {
|
|
1506
|
+
/** Current language profile for the pattern being matched */
|
|
1507
|
+
private currentProfile;
|
|
1508
|
+
/**
|
|
1509
|
+
* Try to match a single pattern against the token stream.
|
|
1510
|
+
* Returns the match result or null if no match.
|
|
1511
|
+
*/
|
|
1512
|
+
matchPattern(tokens: TokenStream, pattern: LanguagePattern): PatternMatchResult | null;
|
|
1513
|
+
/**
|
|
1514
|
+
* Try to match multiple patterns, return the best match.
|
|
1515
|
+
*/
|
|
1516
|
+
matchBest(tokens: TokenStream, patterns: LanguagePattern[]): PatternMatchResult | null;
|
|
1517
|
+
/**
|
|
1518
|
+
* Match a sequence of pattern tokens against the token stream.
|
|
1519
|
+
*/
|
|
1520
|
+
private matchTokenSequence;
|
|
1521
|
+
/**
|
|
1522
|
+
* Match a single pattern token against the current position in the stream.
|
|
1523
|
+
*/
|
|
1524
|
+
private matchPatternToken;
|
|
1525
|
+
/**
|
|
1526
|
+
* Match a literal pattern token (keyword or particle).
|
|
1527
|
+
*/
|
|
1528
|
+
private matchLiteralToken;
|
|
1529
|
+
/**
|
|
1530
|
+
* Match a role pattern token (captures a semantic value).
|
|
1531
|
+
* Handles multi-token expressions like:
|
|
1532
|
+
* - 'my value' (possessive keyword + property)
|
|
1533
|
+
* - '#dialog.showModal()' (method call)
|
|
1534
|
+
* - "#element's *opacity" (possessive selector + property)
|
|
1535
|
+
*/
|
|
1536
|
+
private matchRoleToken;
|
|
1537
|
+
/**
|
|
1538
|
+
* Try to match a possessive expression like 'my value' or 'its innerHTML'.
|
|
1539
|
+
* Returns the PropertyPathValue if matched, or null if not.
|
|
1540
|
+
*/
|
|
1541
|
+
private tryMatchPossessiveExpression;
|
|
1542
|
+
/**
|
|
1543
|
+
* Check if a keyword is a structural keyword (preposition, control flow, etc.)
|
|
1544
|
+
* that shouldn't be consumed as a property name.
|
|
1545
|
+
*/
|
|
1546
|
+
private isStructuralKeyword;
|
|
1547
|
+
/**
|
|
1548
|
+
* Try to match a method call expression like '#dialog.showModal()'.
|
|
1549
|
+
* Pattern: selector + '.' + identifier + '(' + [args] + ')'
|
|
1550
|
+
* Returns an expression value if matched, or null if not.
|
|
1551
|
+
*/
|
|
1552
|
+
private tryMatchMethodCallExpression;
|
|
1553
|
+
/**
|
|
1554
|
+
* Try to match a property access expression like 'userData.name' or 'it.data'.
|
|
1555
|
+
* Pattern: (identifier | keyword) + '.' + identifier [+ '.' + identifier ...]
|
|
1556
|
+
* Returns an expression value if matched, or null if not.
|
|
1557
|
+
*/
|
|
1558
|
+
private tryMatchPropertyAccessExpression;
|
|
1559
|
+
/**
|
|
1560
|
+
* Try to match a possessive selector expression like "#element's *opacity".
|
|
1561
|
+
* Pattern: selector + "'s" + (selector | identifier)
|
|
1562
|
+
* Returns a property-path value if matched, or null if not.
|
|
1563
|
+
*/
|
|
1564
|
+
private tryMatchPossessiveSelectorExpression;
|
|
1565
|
+
/**
|
|
1566
|
+
* Try to match a selector + property expression like "#output.innerText".
|
|
1567
|
+
* This handles cases where the tokenizer produces two selector tokens:
|
|
1568
|
+
* - #output (id selector)
|
|
1569
|
+
* - .innerText (looks like class selector, but is actually property)
|
|
1570
|
+
*
|
|
1571
|
+
* Pattern: id-selector + class-selector-that-is-actually-property
|
|
1572
|
+
* Returns a property-path value if matched, or null if not.
|
|
1573
|
+
*/
|
|
1574
|
+
private tryMatchSelectorPropertyExpression;
|
|
1575
|
+
/**
|
|
1576
|
+
* Match a group pattern token (optional sequence).
|
|
1577
|
+
*/
|
|
1578
|
+
private matchGroupToken;
|
|
1579
|
+
/**
|
|
1580
|
+
* Get the type of match for a token against a value.
|
|
1581
|
+
* Used for confidence calculation.
|
|
1582
|
+
*/
|
|
1583
|
+
private getMatchType;
|
|
1584
|
+
/**
|
|
1585
|
+
* Track stem matches for confidence calculation.
|
|
1586
|
+
* This is set during matching and read during confidence calculation.
|
|
1587
|
+
*/
|
|
1588
|
+
private stemMatchCount;
|
|
1589
|
+
private totalKeywordMatches;
|
|
1590
|
+
/** Maximum depth for nested property access (e.g., a.b.c.d...) */
|
|
1591
|
+
private static readonly MAX_PROPERTY_DEPTH;
|
|
1592
|
+
/** Maximum number of arguments in method calls */
|
|
1593
|
+
private static readonly MAX_METHOD_ARGS;
|
|
1594
|
+
/**
|
|
1595
|
+
* Convert a language token to a semantic value.
|
|
1596
|
+
*/
|
|
1597
|
+
private tokenToSemanticValue;
|
|
1598
|
+
/**
|
|
1599
|
+
* Parse a literal value (string, number, boolean).
|
|
1600
|
+
*/
|
|
1601
|
+
private parseLiteralValue;
|
|
1602
|
+
/**
|
|
1603
|
+
* Apply extraction rules to fill in default values for missing roles.
|
|
1604
|
+
*/
|
|
1605
|
+
private applyExtractionRules;
|
|
1606
|
+
/**
|
|
1607
|
+
* Check if a pattern token is optional.
|
|
1608
|
+
*/
|
|
1609
|
+
private isOptional;
|
|
1610
|
+
/**
|
|
1611
|
+
* Calculate confidence score for a match (0-1).
|
|
1612
|
+
*
|
|
1613
|
+
* Confidence is reduced for:
|
|
1614
|
+
* - Stem matches (morphological normalization has inherent uncertainty)
|
|
1615
|
+
* - Missing optional roles (but less penalty if role has a default value)
|
|
1616
|
+
*
|
|
1617
|
+
* Confidence is increased for:
|
|
1618
|
+
* - VSO languages (Arabic) when pattern starts with a verb
|
|
1619
|
+
*/
|
|
1620
|
+
private calculateConfidence;
|
|
1621
|
+
/**
|
|
1622
|
+
* Calculate confidence boost for VSO (Verb-Subject-Object) language patterns.
|
|
1623
|
+
* Arabic naturally uses VSO word order, so patterns that start with a verb
|
|
1624
|
+
* should receive a confidence boost.
|
|
1625
|
+
*
|
|
1626
|
+
* Returns +0.15 confidence boost if:
|
|
1627
|
+
* - Language is Arabic ('ar')
|
|
1628
|
+
* - Pattern's first token is a verb keyword
|
|
1629
|
+
*
|
|
1630
|
+
* @param pattern The language pattern being matched
|
|
1631
|
+
* @returns Confidence boost (0 or 0.15)
|
|
1632
|
+
*/
|
|
1633
|
+
private calculateVSOConfidenceBoost;
|
|
1634
|
+
/**
|
|
1635
|
+
* Arabic preposition disambiguation for confidence adjustment.
|
|
1636
|
+
*
|
|
1637
|
+
* Different Arabic prepositions are more or less natural for different semantic roles:
|
|
1638
|
+
* - على (on/upon) is preferred for patient/target roles (element selectors)
|
|
1639
|
+
* - إلى (to) is preferred for destination roles
|
|
1640
|
+
* - من (from) is preferred for source roles
|
|
1641
|
+
* - في (in) is preferred for location roles
|
|
1642
|
+
*
|
|
1643
|
+
* This method analyzes the prepositions used with captured semantic roles and
|
|
1644
|
+
* adjusts confidence based on idiomaticity:
|
|
1645
|
+
* - +0.10 for highly idiomatic preposition choices
|
|
1646
|
+
* - -0.10 for less natural preposition choices
|
|
1647
|
+
*
|
|
1648
|
+
* @param pattern The language pattern being matched
|
|
1649
|
+
* @param captured The captured semantic values
|
|
1650
|
+
* @returns Confidence adjustment (-0.10 to +0.10)
|
|
1651
|
+
*/
|
|
1652
|
+
private arabicPrepositionDisambiguation;
|
|
1653
|
+
/**
|
|
1654
|
+
* Noise words that can be skipped in English for more natural syntax.
|
|
1655
|
+
* - "the" before selectors: "toggle the .active" → "toggle .active"
|
|
1656
|
+
* - "class" after class selectors: "add the .visible class" → "add .visible"
|
|
1657
|
+
*/
|
|
1658
|
+
private static readonly ENGLISH_NOISE_WORDS;
|
|
1659
|
+
/**
|
|
1660
|
+
* Skip noise words like "the" before selectors.
|
|
1661
|
+
* This enables more natural English syntax like "toggle the .active".
|
|
1662
|
+
*/
|
|
1663
|
+
private skipNoiseWords;
|
|
1664
|
+
/**
|
|
1665
|
+
* Extract event modifiers from the token stream.
|
|
1666
|
+
* Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
|
|
1667
|
+
* that can appear after event names.
|
|
1668
|
+
*
|
|
1669
|
+
* Returns EventModifiers object or undefined if no modifiers found.
|
|
1670
|
+
*/
|
|
1671
|
+
extractEventModifiers(tokens: TokenStream): EventModifiers | undefined;
|
|
1672
|
+
}
|
|
1673
|
+
/**
|
|
1674
|
+
* Singleton pattern matcher instance.
|
|
1675
|
+
*/
|
|
1676
|
+
declare const patternMatcher: PatternMatcher$1;
|
|
1677
|
+
/**
|
|
1678
|
+
* Match tokens against a pattern.
|
|
1679
|
+
*/
|
|
1680
|
+
declare function matchPattern(tokens: TokenStream, pattern: LanguagePattern): PatternMatchResult | null;
|
|
1681
|
+
/**
|
|
1682
|
+
* Match tokens against multiple patterns, return best match.
|
|
1683
|
+
*/
|
|
1684
|
+
declare function matchBest(tokens: TokenStream, patterns: LanguagePattern[]): PatternMatchResult | null;
|
|
1685
|
+
|
|
1686
|
+
declare class SemanticParserImpl implements SemanticParser {
|
|
1687
|
+
/**
|
|
1688
|
+
* Parse input in the specified language to a semantic node.
|
|
1689
|
+
*/
|
|
1690
|
+
parse(input: string, language: string): SemanticNode;
|
|
1691
|
+
/**
|
|
1692
|
+
* Check if input can be parsed in the specified language.
|
|
1693
|
+
*/
|
|
1694
|
+
canParse(input: string, language: string): boolean;
|
|
1695
|
+
/**
|
|
1696
|
+
* Get all supported languages.
|
|
1697
|
+
*/
|
|
1698
|
+
supportedLanguages(): string[];
|
|
1699
|
+
/**
|
|
1700
|
+
* Build a command semantic node from a pattern match.
|
|
1701
|
+
*/
|
|
1702
|
+
private buildCommand;
|
|
1703
|
+
/**
|
|
1704
|
+
* Build an event handler semantic node from a pattern match.
|
|
1705
|
+
*/
|
|
1706
|
+
private buildEventHandler;
|
|
1707
|
+
/**
|
|
1708
|
+
* Parse body with proper clause separation.
|
|
1709
|
+
* Splits the token stream at conjunction boundaries (then/それから/ثم/etc.)
|
|
1710
|
+
* and parses each clause independently.
|
|
1711
|
+
*
|
|
1712
|
+
* This handles multi-clause patterns like:
|
|
1713
|
+
* - "toggle .active then remove .hidden"
|
|
1714
|
+
* - ".active を 切り替え それから .hidden を 削除"
|
|
1715
|
+
* - "بدل .active ثم احذف .hidden"
|
|
1716
|
+
*
|
|
1717
|
+
* @param tokens Token stream to parse
|
|
1718
|
+
* @param commandPatterns Command patterns for the language
|
|
1719
|
+
* @param language Language code
|
|
1720
|
+
* @returns Array of semantic nodes (one per clause)
|
|
1721
|
+
*/
|
|
1722
|
+
private parseBodyWithClauses;
|
|
1723
|
+
/**
|
|
1724
|
+
* Parse a single clause (sequence of tokens between conjunctions).
|
|
1725
|
+
* Returns array of semantic nodes parsed from the clause.
|
|
1726
|
+
*/
|
|
1727
|
+
private parseClause;
|
|
1728
|
+
/**
|
|
1729
|
+
* Parse body commands with support for grammar-transformed patterns.
|
|
1730
|
+
* Used after a grammar-transformed pattern with continuation marker.
|
|
1731
|
+
*/
|
|
1732
|
+
private parseBodyWithGrammarPatterns;
|
|
1733
|
+
/**
|
|
1734
|
+
* Check if a token is a 'then' keyword in the given language.
|
|
1735
|
+
*/
|
|
1736
|
+
private isThenKeyword;
|
|
1737
|
+
/**
|
|
1738
|
+
* Check if a token is an 'end' keyword in the given language.
|
|
1739
|
+
*/
|
|
1740
|
+
private isEndKeyword;
|
|
1741
|
+
}
|
|
1742
|
+
/**
|
|
1743
|
+
* Singleton parser instance.
|
|
1744
|
+
*/
|
|
1745
|
+
declare const semanticParser: SemanticParserImpl;
|
|
1746
|
+
/**
|
|
1747
|
+
* Parse input in the specified language.
|
|
1748
|
+
*/
|
|
1749
|
+
declare function parse(input: string, language: string): SemanticNode;
|
|
1750
|
+
/**
|
|
1751
|
+
* Check if input can be parsed.
|
|
1752
|
+
*/
|
|
1753
|
+
declare function canParse(input: string, language: string): boolean;
|
|
1754
|
+
/**
|
|
1755
|
+
* Parse and return command type if parseable.
|
|
1756
|
+
*/
|
|
1757
|
+
declare function getCommandType(input: string, language: string): ActionType | null;
|
|
1758
|
+
|
|
1759
|
+
/**
|
|
1760
|
+
* Explicit Mode Parser
|
|
1761
|
+
*
|
|
1762
|
+
* Parses the explicit [command role:value ...] syntax.
|
|
1763
|
+
* This syntax is universal across all languages and makes
|
|
1764
|
+
* semantic roles visible for learning and debugging.
|
|
1765
|
+
*
|
|
1766
|
+
* Syntax:
|
|
1767
|
+
* [command role1:value1 role2:value2 ...]
|
|
1768
|
+
*
|
|
1769
|
+
* Examples:
|
|
1770
|
+
* [toggle class:.active target:#button]
|
|
1771
|
+
* [put content:"hello" destination:#output]
|
|
1772
|
+
* [on event:click body:[toggle class:.active]]
|
|
1773
|
+
*/
|
|
1774
|
+
|
|
1775
|
+
/**
|
|
1776
|
+
* Parse explicit syntax into a semantic node.
|
|
1777
|
+
*/
|
|
1778
|
+
declare function parseExplicit(input: string): SemanticNode;
|
|
1779
|
+
/**
|
|
1780
|
+
* Check if input is explicit syntax.
|
|
1781
|
+
*/
|
|
1782
|
+
declare function isExplicitSyntax(input: string): boolean;
|
|
1783
|
+
|
|
1784
|
+
/**
|
|
1785
|
+
* Explicit Mode Renderer
|
|
1786
|
+
*
|
|
1787
|
+
* Renders semantic nodes to explicit [command role:value] syntax.
|
|
1788
|
+
* Also renders to natural language syntax for any supported language.
|
|
1789
|
+
*/
|
|
1790
|
+
|
|
1791
|
+
declare class SemanticRendererImpl implements SemanticRenderer {
|
|
1792
|
+
/**
|
|
1793
|
+
* Render a semantic node in the specified language.
|
|
1794
|
+
*/
|
|
1795
|
+
render(node: SemanticNode, language: string): string;
|
|
1796
|
+
/**
|
|
1797
|
+
* Render a compound node (multiple statements chained with then/and).
|
|
1798
|
+
*/
|
|
1799
|
+
private renderCompound;
|
|
1800
|
+
/**
|
|
1801
|
+
* Get the translated chain word for the given language.
|
|
1802
|
+
*/
|
|
1803
|
+
private getChainWord;
|
|
1804
|
+
/**
|
|
1805
|
+
* Render a semantic node in explicit mode.
|
|
1806
|
+
*/
|
|
1807
|
+
renderExplicit(node: SemanticNode): string;
|
|
1808
|
+
/**
|
|
1809
|
+
* Get all supported languages.
|
|
1810
|
+
*/
|
|
1811
|
+
supportedLanguages(): string[];
|
|
1812
|
+
/**
|
|
1813
|
+
* Find the best pattern for rendering a semantic node.
|
|
1814
|
+
*
|
|
1815
|
+
* For rendering, we prefer "standard" patterns (e.g., "on click") over
|
|
1816
|
+
* native idiom patterns (e.g., "when clicked") because standard patterns
|
|
1817
|
+
* are more recognizable and closer to the original hyperscript syntax.
|
|
1818
|
+
*/
|
|
1819
|
+
private findBestPattern;
|
|
1820
|
+
/**
|
|
1821
|
+
* Render a semantic node using a specific pattern.
|
|
1822
|
+
*/
|
|
1823
|
+
private renderWithPattern;
|
|
1824
|
+
/**
|
|
1825
|
+
* Render a single pattern token.
|
|
1826
|
+
*/
|
|
1827
|
+
private renderPatternToken;
|
|
1828
|
+
/**
|
|
1829
|
+
* Convert a semantic value to a string for explicit syntax.
|
|
1830
|
+
*/
|
|
1831
|
+
private valueToString;
|
|
1832
|
+
/**
|
|
1833
|
+
* Convert a semantic value to natural language string.
|
|
1834
|
+
* Uses language-specific possessive rendering when language is provided.
|
|
1835
|
+
*/
|
|
1836
|
+
private valueToNaturalString;
|
|
1837
|
+
/**
|
|
1838
|
+
* Render a reference value in the target language.
|
|
1839
|
+
*/
|
|
1840
|
+
private renderReference;
|
|
1841
|
+
/**
|
|
1842
|
+
* Render a property-path value (possessive expression) in the target language.
|
|
1843
|
+
*
|
|
1844
|
+
* Examples by language:
|
|
1845
|
+
* - English: "my value", "its opacity", "#el's value"
|
|
1846
|
+
* - Japanese: "自分の value", "それの opacity"
|
|
1847
|
+
* - Korean: "내 value", "그것의 opacity"
|
|
1848
|
+
* - Spanish: "mi value", "su opacity"
|
|
1849
|
+
* - Chinese: "我的 value", "它的 opacity"
|
|
1850
|
+
*/
|
|
1851
|
+
private renderPropertyPath;
|
|
1852
|
+
}
|
|
1853
|
+
/**
|
|
1854
|
+
* Singleton renderer instance.
|
|
1855
|
+
*/
|
|
1856
|
+
declare const semanticRenderer: SemanticRendererImpl;
|
|
1857
|
+
/**
|
|
1858
|
+
* Render a semantic node in the specified language.
|
|
1859
|
+
*/
|
|
1860
|
+
declare function render(node: SemanticNode, language: string): string;
|
|
1861
|
+
/**
|
|
1862
|
+
* Render a semantic node in explicit mode.
|
|
1863
|
+
*/
|
|
1864
|
+
declare function renderExplicit(node: SemanticNode): string;
|
|
1865
|
+
|
|
1866
|
+
/**
|
|
1867
|
+
* Bidirectional Converter
|
|
1868
|
+
*
|
|
1869
|
+
* Converts between natural language syntax and explicit syntax,
|
|
1870
|
+
* and between different natural languages.
|
|
1871
|
+
*/
|
|
1872
|
+
|
|
1873
|
+
/**
|
|
1874
|
+
* Convert natural language hyperscript to explicit syntax.
|
|
1875
|
+
*
|
|
1876
|
+
* @param input Natural language hyperscript
|
|
1877
|
+
* @param sourceLanguage Source language code
|
|
1878
|
+
* @returns Explicit syntax string
|
|
1879
|
+
*
|
|
1880
|
+
* @example
|
|
1881
|
+
* toExplicit('toggle .active on #button', 'en')
|
|
1882
|
+
* // → '[toggle patient:.active destination:#button]'
|
|
1883
|
+
*
|
|
1884
|
+
* toExplicit('#button の .active を 切り替え', 'ja')
|
|
1885
|
+
* // → '[toggle patient:.active destination:#button]'
|
|
1886
|
+
*/
|
|
1887
|
+
declare function toExplicit(input: string, sourceLanguage: string): string;
|
|
1888
|
+
/**
|
|
1889
|
+
* Convert explicit syntax to natural language.
|
|
1890
|
+
*
|
|
1891
|
+
* @param explicit Explicit syntax string
|
|
1892
|
+
* @param targetLanguage Target language code
|
|
1893
|
+
* @returns Natural language hyperscript
|
|
1894
|
+
*
|
|
1895
|
+
* @example
|
|
1896
|
+
* fromExplicit('[toggle patient:.active destination:#button]', 'en')
|
|
1897
|
+
* // → 'toggle .active on #button'
|
|
1898
|
+
*
|
|
1899
|
+
* fromExplicit('[toggle patient:.active destination:#button]', 'ja')
|
|
1900
|
+
* // → '#button の .active を 切り替え'
|
|
1901
|
+
*/
|
|
1902
|
+
declare function fromExplicit(explicit: string, targetLanguage: string): string;
|
|
1903
|
+
/**
|
|
1904
|
+
* Translate hyperscript from one language to another.
|
|
1905
|
+
*
|
|
1906
|
+
* @param input Natural language hyperscript
|
|
1907
|
+
* @param sourceLanguage Source language code
|
|
1908
|
+
* @param targetLanguage Target language code
|
|
1909
|
+
* @returns Translated hyperscript
|
|
1910
|
+
*
|
|
1911
|
+
* @example
|
|
1912
|
+
* translate('toggle .active on #button', 'en', 'ja')
|
|
1913
|
+
* // → '#button の .active を 切り替え'
|
|
1914
|
+
*
|
|
1915
|
+
* translate('#button の .active を 切り替え', 'ja', 'ar')
|
|
1916
|
+
* // → 'بدّل .active على #button'
|
|
1917
|
+
*/
|
|
1918
|
+
declare function translate(input: string, sourceLanguage: string, targetLanguage: string): string;
|
|
1919
|
+
/**
|
|
1920
|
+
* Parse input (either explicit or natural language) to semantic node.
|
|
1921
|
+
*
|
|
1922
|
+
* @param input Hyperscript input (explicit or natural)
|
|
1923
|
+
* @param language Language code (required for natural, ignored for explicit)
|
|
1924
|
+
* @returns Semantic node
|
|
1925
|
+
*/
|
|
1926
|
+
declare function parseAny(input: string, language: string): SemanticNode;
|
|
1927
|
+
/**
|
|
1928
|
+
* Round-trip validation: parse and re-render to verify consistency.
|
|
1929
|
+
*
|
|
1930
|
+
* When called with 2 arguments, returns an object with validation info.
|
|
1931
|
+
* When called with 3 arguments, returns the rendered string directly.
|
|
1932
|
+
*
|
|
1933
|
+
* @param input Original input
|
|
1934
|
+
* @param sourceLanguage Source language code
|
|
1935
|
+
* @param targetLanguage Target language code (optional, if provided returns string only)
|
|
1936
|
+
* @returns Object with original, semantic, re-rendered, and match status (or just string if targetLanguage provided)
|
|
1937
|
+
*/
|
|
1938
|
+
declare function roundTrip(input: string, sourceLanguage: string, targetLanguage?: string): string | {
|
|
1939
|
+
original: string;
|
|
1940
|
+
semantic: SemanticNode;
|
|
1941
|
+
rendered: string;
|
|
1942
|
+
matches: boolean;
|
|
1943
|
+
};
|
|
1944
|
+
/**
|
|
1945
|
+
* Get all translations of a hyperscript statement.
|
|
1946
|
+
*
|
|
1947
|
+
* @param input Hyperscript input
|
|
1948
|
+
* @param sourceLanguage Source language (or 'explicit')
|
|
1949
|
+
* @param targetLanguages List of target language codes (defaults to all 13 supported languages)
|
|
1950
|
+
* @returns Object mapping language codes to translations
|
|
1951
|
+
*/
|
|
1952
|
+
declare function getAllTranslations(input: string, sourceLanguage: string, targetLanguages?: string[]): Record<string, string>;
|
|
1953
|
+
/**
|
|
1954
|
+
* Validate that a translation is semantically equivalent.
|
|
1955
|
+
*
|
|
1956
|
+
* @param original Original hyperscript
|
|
1957
|
+
* @param translated Translated hyperscript
|
|
1958
|
+
* @param originalLang Original language
|
|
1959
|
+
* @param translatedLang Translated language
|
|
1960
|
+
* @returns true if semantically equivalent
|
|
1961
|
+
*/
|
|
1962
|
+
declare function validateTranslation(original: string, translated: string, originalLang: string, translatedLang: string): boolean;
|
|
1963
|
+
|
|
1964
|
+
/**
|
|
1965
|
+
* Language Profiles
|
|
1966
|
+
*
|
|
1967
|
+
* Re-exports from individual profile files for backwards compatibility.
|
|
1968
|
+
* For minimal bundles, import specific profiles directly:
|
|
1969
|
+
*
|
|
1970
|
+
* @example
|
|
1971
|
+
* ```typescript
|
|
1972
|
+
* // Tree-shakeable import
|
|
1973
|
+
* import { englishProfile } from './profiles/english';
|
|
1974
|
+
*
|
|
1975
|
+
* // Full import (all profiles bundled)
|
|
1976
|
+
* import { englishProfile, languageProfiles } from './language-profiles';
|
|
1977
|
+
* ```
|
|
1978
|
+
*
|
|
1979
|
+
* @generated This file is auto-generated. Do not edit manually.
|
|
1980
|
+
*/
|
|
1981
|
+
|
|
1982
|
+
/**
|
|
1983
|
+
* All available language profiles.
|
|
1984
|
+
* @deprecated Import individual profiles for tree-shaking.
|
|
1985
|
+
*/
|
|
1986
|
+
declare const languageProfiles: Record<string, LanguageProfile>;
|
|
1987
|
+
/**
|
|
1988
|
+
* Get a language profile by code.
|
|
1989
|
+
* @deprecated Use the registry's getProfile instead.
|
|
1990
|
+
*/
|
|
1991
|
+
declare function getProfile(code: string): LanguageProfile | undefined;
|
|
1992
|
+
/**
|
|
1993
|
+
* Get all supported language codes.
|
|
1994
|
+
* @deprecated Use the registry's getRegisteredLanguages instead.
|
|
1995
|
+
*/
|
|
1996
|
+
declare function getSupportedLanguages$1(): string[];
|
|
1997
|
+
/**
|
|
1998
|
+
* Check if a language is supported.
|
|
1999
|
+
* @deprecated Use the registry's isLanguageRegistered instead.
|
|
2000
|
+
*/
|
|
2001
|
+
declare function isLanguageSupported(code: string): boolean;
|
|
2002
|
+
|
|
2003
|
+
/**
|
|
2004
|
+
* Language Registry
|
|
2005
|
+
*
|
|
2006
|
+
* Central registration point for language support in the semantic parser.
|
|
2007
|
+
* Languages self-register when their modules are imported, enabling
|
|
2008
|
+
* tree-shaking for minimal bundles.
|
|
2009
|
+
*
|
|
2010
|
+
* @example
|
|
2011
|
+
* ```typescript
|
|
2012
|
+
* // Import only the languages you need
|
|
2013
|
+
* import '@lokascript/semantic/languages/en';
|
|
2014
|
+
* import '@lokascript/semantic/languages/es';
|
|
2015
|
+
*
|
|
2016
|
+
* // Now parse works for registered languages
|
|
2017
|
+
* import { parse } from '@lokascript/semantic';
|
|
2018
|
+
* parse('toggle .active', 'en'); // Works
|
|
2019
|
+
* parse('alternar .activo', 'es'); // Works
|
|
2020
|
+
* parse('切り替え .active', 'ja'); // Error: Language not registered
|
|
2021
|
+
* ```
|
|
2022
|
+
*/
|
|
2023
|
+
|
|
2024
|
+
/**
|
|
2025
|
+
* Try to get a profile, returning undefined if not registered.
|
|
2026
|
+
*/
|
|
2027
|
+
declare function tryGetProfile(code: string): LanguageProfile | undefined;
|
|
2028
|
+
/**
|
|
2029
|
+
* Get all registered language codes.
|
|
2030
|
+
*/
|
|
2031
|
+
declare function getRegisteredLanguages(): string[];
|
|
2032
|
+
|
|
2033
|
+
/**
|
|
2034
|
+
* Semantic Result Cache
|
|
2035
|
+
*
|
|
2036
|
+
* LRU cache for semantic analysis results to optimize repeated parsing.
|
|
2037
|
+
*
|
|
2038
|
+
* Design:
|
|
2039
|
+
* - Cache key: `${language}:${input}` for simple, fast lookups
|
|
2040
|
+
* - LRU eviction when max size reached
|
|
2041
|
+
* - Optional TTL (time-to-live) for cache entries
|
|
2042
|
+
* - Statistics for monitoring cache effectiveness
|
|
2043
|
+
* - Thread-safe for browser environments (single-threaded)
|
|
2044
|
+
*/
|
|
2045
|
+
|
|
2046
|
+
/**
|
|
2047
|
+
* Cache configuration options.
|
|
2048
|
+
*/
|
|
2049
|
+
interface SemanticCacheConfig {
|
|
2050
|
+
/** Maximum number of entries to cache. Default: 1000 */
|
|
2051
|
+
maxSize?: number;
|
|
2052
|
+
/** Time-to-live in milliseconds. 0 = no expiration. Default: 0 */
|
|
2053
|
+
ttlMs?: number;
|
|
2054
|
+
/** Enable/disable caching. Default: true */
|
|
2055
|
+
enabled?: boolean;
|
|
2056
|
+
}
|
|
2057
|
+
/**
|
|
2058
|
+
* Cache statistics.
|
|
2059
|
+
*/
|
|
2060
|
+
interface CacheStats {
|
|
2061
|
+
/** Total cache hits */
|
|
2062
|
+
hits: number;
|
|
2063
|
+
/** Total cache misses */
|
|
2064
|
+
misses: number;
|
|
2065
|
+
/** Current cache size */
|
|
2066
|
+
size: number;
|
|
2067
|
+
/** Maximum cache size */
|
|
2068
|
+
maxSize: number;
|
|
2069
|
+
/** Hit rate (0-1) */
|
|
2070
|
+
hitRate: number;
|
|
2071
|
+
/** Total evictions due to size limit */
|
|
2072
|
+
evictions: number;
|
|
2073
|
+
/** Total expirations due to TTL */
|
|
2074
|
+
expirations: number;
|
|
2075
|
+
/** Whether caching is enabled */
|
|
2076
|
+
enabled: boolean;
|
|
2077
|
+
}
|
|
2078
|
+
/**
|
|
2079
|
+
* LRU Cache for semantic analysis results.
|
|
2080
|
+
*
|
|
2081
|
+
* Uses Map's insertion order for LRU eviction - when we access an entry,
|
|
2082
|
+
* we delete and re-insert it to move it to the end (most recently used).
|
|
2083
|
+
*/
|
|
2084
|
+
declare class SemanticCache {
|
|
2085
|
+
private cache;
|
|
2086
|
+
private config;
|
|
2087
|
+
private stats;
|
|
2088
|
+
constructor(config?: SemanticCacheConfig);
|
|
2089
|
+
/**
|
|
2090
|
+
* Generate cache key from input and language.
|
|
2091
|
+
*/
|
|
2092
|
+
private makeKey;
|
|
2093
|
+
/**
|
|
2094
|
+
* Check if an entry has expired.
|
|
2095
|
+
*/
|
|
2096
|
+
private isExpired;
|
|
2097
|
+
/**
|
|
2098
|
+
* Evict the least recently used entry.
|
|
2099
|
+
*/
|
|
2100
|
+
private evictLRU;
|
|
2101
|
+
/**
|
|
2102
|
+
* Get a cached result.
|
|
2103
|
+
*
|
|
2104
|
+
* @param input - The input string
|
|
2105
|
+
* @param language - The language code
|
|
2106
|
+
* @returns The cached result, or undefined if not found/expired
|
|
2107
|
+
*/
|
|
2108
|
+
get(input: string, language: string): SemanticAnalysisResult | undefined;
|
|
2109
|
+
/**
|
|
2110
|
+
* Store a result in the cache.
|
|
2111
|
+
*
|
|
2112
|
+
* @param input - The input string
|
|
2113
|
+
* @param language - The language code
|
|
2114
|
+
* @param result - The analysis result to cache
|
|
2115
|
+
*/
|
|
2116
|
+
set(input: string, language: string, result: SemanticAnalysisResult): void;
|
|
2117
|
+
/**
|
|
2118
|
+
* Check if a result is cached (without updating LRU).
|
|
2119
|
+
*/
|
|
2120
|
+
has(input: string, language: string): boolean;
|
|
2121
|
+
/**
|
|
2122
|
+
* Remove a specific entry from the cache.
|
|
2123
|
+
*/
|
|
2124
|
+
delete(input: string, language: string): boolean;
|
|
2125
|
+
/**
|
|
2126
|
+
* Clear all cached entries.
|
|
2127
|
+
*/
|
|
2128
|
+
clear(): void;
|
|
2129
|
+
/**
|
|
2130
|
+
* Reset statistics.
|
|
2131
|
+
*/
|
|
2132
|
+
resetStats(): void;
|
|
2133
|
+
/**
|
|
2134
|
+
* Get cache statistics.
|
|
2135
|
+
*/
|
|
2136
|
+
getStats(): CacheStats;
|
|
2137
|
+
/**
|
|
2138
|
+
* Update cache configuration.
|
|
2139
|
+
*/
|
|
2140
|
+
configure(config: Partial<SemanticCacheConfig>): void;
|
|
2141
|
+
/**
|
|
2142
|
+
* Enable caching.
|
|
2143
|
+
*/
|
|
2144
|
+
enable(): void;
|
|
2145
|
+
/**
|
|
2146
|
+
* Disable caching.
|
|
2147
|
+
*/
|
|
2148
|
+
disable(): void;
|
|
2149
|
+
/**
|
|
2150
|
+
* Get current configuration.
|
|
2151
|
+
*/
|
|
2152
|
+
getConfig(): Readonly<Required<SemanticCacheConfig>>;
|
|
2153
|
+
}
|
|
2154
|
+
/**
|
|
2155
|
+
* Default global cache instance.
|
|
2156
|
+
*/
|
|
2157
|
+
declare const semanticCache: SemanticCache;
|
|
2158
|
+
/**
|
|
2159
|
+
* Create a cache with custom configuration.
|
|
2160
|
+
*/
|
|
2161
|
+
declare function createSemanticCache(config?: SemanticCacheConfig): SemanticCache;
|
|
2162
|
+
/**
|
|
2163
|
+
* Decorator/wrapper for adding caching to an analyze function.
|
|
2164
|
+
*
|
|
2165
|
+
* @param analyzeFn - The analyze function to wrap
|
|
2166
|
+
* @param cache - The cache instance to use
|
|
2167
|
+
* @returns Wrapped function with caching
|
|
2168
|
+
*/
|
|
2169
|
+
declare function withCache<T extends (input: string, language: string) => SemanticAnalysisResult>(analyzeFn: T, cache?: SemanticCache): T;
|
|
2170
|
+
|
|
2171
|
+
/**
|
|
2172
|
+
* Core Parser Bridge
|
|
2173
|
+
*
|
|
2174
|
+
* Provides the SemanticAnalyzer interface that integrates semantic parsing
|
|
2175
|
+
* into the core hyperscript parser. This bridge enables confidence-driven
|
|
2176
|
+
* fallback between semantic and traditional parsing.
|
|
2177
|
+
*/
|
|
2178
|
+
|
|
2179
|
+
/**
|
|
2180
|
+
* Result of semantic analysis.
|
|
2181
|
+
*/
|
|
2182
|
+
interface SemanticAnalysisResult {
|
|
2183
|
+
/** Confidence score (0-1) for this analysis */
|
|
2184
|
+
readonly confidence: number;
|
|
2185
|
+
/** The parsed command info (if successful) */
|
|
2186
|
+
readonly command?: {
|
|
2187
|
+
readonly name: ActionType;
|
|
2188
|
+
readonly roles: ReadonlyMap<SemanticRole, SemanticValue>;
|
|
2189
|
+
};
|
|
2190
|
+
/** The full semantic node (if successful) */
|
|
2191
|
+
readonly node?: SemanticNode;
|
|
2192
|
+
/** Any errors encountered */
|
|
2193
|
+
readonly errors?: string[];
|
|
2194
|
+
/** Number of tokens consumed */
|
|
2195
|
+
readonly tokensConsumed?: number;
|
|
2196
|
+
}
|
|
2197
|
+
/**
|
|
2198
|
+
* Interface for semantic analysis that can be integrated into the core parser.
|
|
2199
|
+
* This allows the core parser to optionally use semantic parsing with
|
|
2200
|
+
* confidence-based fallback to traditional parsing.
|
|
2201
|
+
*/
|
|
2202
|
+
interface SemanticAnalyzer {
|
|
2203
|
+
/**
|
|
2204
|
+
* Analyze input in the specified language.
|
|
2205
|
+
*
|
|
2206
|
+
* @param input The input string to analyze
|
|
2207
|
+
* @param language ISO 639-1 language code
|
|
2208
|
+
* @returns Analysis result with confidence score
|
|
2209
|
+
*/
|
|
2210
|
+
analyze(input: string, language: string): SemanticAnalysisResult;
|
|
2211
|
+
/**
|
|
2212
|
+
* Check if semantic parsing is available for a language.
|
|
2213
|
+
*/
|
|
2214
|
+
supportsLanguage(language: string): boolean;
|
|
2215
|
+
/**
|
|
2216
|
+
* Get the list of supported languages.
|
|
2217
|
+
*/
|
|
2218
|
+
supportedLanguages(): string[];
|
|
2219
|
+
/**
|
|
2220
|
+
* Get cache statistics.
|
|
2221
|
+
*/
|
|
2222
|
+
getCacheStats(): CacheStats;
|
|
2223
|
+
/**
|
|
2224
|
+
* Clear the result cache.
|
|
2225
|
+
*/
|
|
2226
|
+
clearCache(): void;
|
|
2227
|
+
/**
|
|
2228
|
+
* Configure the cache.
|
|
2229
|
+
*/
|
|
2230
|
+
configureCache(config: Partial<SemanticCacheConfig>): void;
|
|
2231
|
+
}
|
|
2232
|
+
/**
|
|
2233
|
+
* Options for creating a SemanticAnalyzer.
|
|
2234
|
+
*/
|
|
2235
|
+
interface SemanticAnalyzerOptions {
|
|
2236
|
+
/** Cache configuration. Pass false to disable caching. */
|
|
2237
|
+
cache?: SemanticCacheConfig | false;
|
|
2238
|
+
}
|
|
2239
|
+
/**
|
|
2240
|
+
* Implementation of SemanticAnalyzer that wraps the semantic parser.
|
|
2241
|
+
* Includes LRU caching for performance optimization on repeated inputs.
|
|
2242
|
+
*/
|
|
2243
|
+
declare class SemanticAnalyzerImpl implements SemanticAnalyzer {
|
|
2244
|
+
private readonly patternMatcher;
|
|
2245
|
+
private readonly languages;
|
|
2246
|
+
private readonly cache;
|
|
2247
|
+
constructor(options?: SemanticAnalyzerOptions);
|
|
2248
|
+
analyze(input: string, language: string): SemanticAnalysisResult;
|
|
2249
|
+
/**
|
|
2250
|
+
* Perform analysis without cache lookup.
|
|
2251
|
+
*/
|
|
2252
|
+
private analyzeUncached;
|
|
2253
|
+
supportsLanguage(language: string): boolean;
|
|
2254
|
+
supportedLanguages(): string[];
|
|
2255
|
+
getCacheStats(): CacheStats;
|
|
2256
|
+
clearCache(): void;
|
|
2257
|
+
configureCache(config: Partial<SemanticCacheConfig>): void;
|
|
2258
|
+
private buildSemanticNode;
|
|
2259
|
+
}
|
|
2260
|
+
/**
|
|
2261
|
+
* Create a SemanticAnalyzer instance.
|
|
2262
|
+
*
|
|
2263
|
+
* @param options - Configuration options including cache settings
|
|
2264
|
+
* @returns A new SemanticAnalyzer
|
|
2265
|
+
*
|
|
2266
|
+
* @example
|
|
2267
|
+
* // Default: uses shared global cache
|
|
2268
|
+
* const analyzer = createSemanticAnalyzer();
|
|
2269
|
+
*
|
|
2270
|
+
* @example
|
|
2271
|
+
* // Custom cache size
|
|
2272
|
+
* const analyzer = createSemanticAnalyzer({ cache: { maxSize: 500 } });
|
|
2273
|
+
*
|
|
2274
|
+
* @example
|
|
2275
|
+
* // Disable caching
|
|
2276
|
+
* const analyzer = createSemanticAnalyzer({ cache: false });
|
|
2277
|
+
*/
|
|
2278
|
+
declare function createSemanticAnalyzer(options?: SemanticAnalyzerOptions): SemanticAnalyzer;
|
|
2279
|
+
|
|
2280
|
+
/**
|
|
2281
|
+
* Default confidence threshold for preferring semantic parsing.
|
|
2282
|
+
* If confidence is above this, use semantic result; otherwise fallback.
|
|
2283
|
+
*/
|
|
2284
|
+
declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.5;
|
|
2285
|
+
/**
|
|
2286
|
+
* High confidence threshold for very certain matches.
|
|
2287
|
+
*/
|
|
2288
|
+
declare const HIGH_CONFIDENCE_THRESHOLD = 0.8;
|
|
2289
|
+
/**
|
|
2290
|
+
* Determine if semantic analysis should be used based on confidence.
|
|
2291
|
+
*/
|
|
2292
|
+
declare function shouldUseSemanticResult(result: SemanticAnalysisResult, threshold?: number): boolean;
|
|
2293
|
+
/**
|
|
2294
|
+
* Convert semantic roles to the format expected by core parser commands.
|
|
2295
|
+
* This maps semantic roles to the positional/modifier structure used by
|
|
2296
|
+
* the core command implementations.
|
|
2297
|
+
*
|
|
2298
|
+
* Role to preposition mapping:
|
|
2299
|
+
* - patient → first positional arg
|
|
2300
|
+
* - event → first positional arg
|
|
2301
|
+
* - destination → 'into' (put) or 'on' (others)
|
|
2302
|
+
* - source → 'from'
|
|
2303
|
+
* - quantity → 'by'
|
|
2304
|
+
* - duration → 'over' or 'for'
|
|
2305
|
+
* - method → 'as'
|
|
2306
|
+
* - style → 'with'
|
|
2307
|
+
* - condition → 'if'
|
|
2308
|
+
*/
|
|
2309
|
+
declare function rolesToCommandArgs(roles: ReadonlyMap<SemanticRole, SemanticValue>, command: ActionType): {
|
|
2310
|
+
args: SemanticValue[];
|
|
2311
|
+
modifiers: Record<string, SemanticValue>;
|
|
2312
|
+
};
|
|
2313
|
+
|
|
2314
|
+
/**
|
|
2315
|
+
* Command Schemas
|
|
2316
|
+
*
|
|
2317
|
+
* Defines the semantic structure of each hyperscript command.
|
|
2318
|
+
* Used by the pattern generator to create language-specific patterns.
|
|
2319
|
+
*/
|
|
2320
|
+
|
|
2321
|
+
/**
|
|
2322
|
+
* A role specification in a command schema.
|
|
2323
|
+
*/
|
|
2324
|
+
interface RoleSpec {
|
|
2325
|
+
/** The semantic role */
|
|
2326
|
+
readonly role: SemanticRole;
|
|
2327
|
+
/** Description of what this role represents */
|
|
2328
|
+
readonly description: string;
|
|
2329
|
+
/** Whether this role is required */
|
|
2330
|
+
readonly required: boolean;
|
|
2331
|
+
/** Expected value types */
|
|
2332
|
+
readonly expectedTypes: Array<'selector' | 'literal' | 'reference' | 'expression'>;
|
|
2333
|
+
/** Default value if not provided */
|
|
2334
|
+
readonly default?: SemanticValue;
|
|
2335
|
+
/** Position hint for SVO languages (higher = earlier) */
|
|
2336
|
+
readonly svoPosition?: number;
|
|
2337
|
+
/** Position hint for SOV languages (higher = earlier) */
|
|
2338
|
+
readonly sovPosition?: number;
|
|
2339
|
+
/**
|
|
2340
|
+
* Override the default role marker for this command.
|
|
2341
|
+
* Maps language code to the marker to use (e.g., { en: 'to', es: 'a' }).
|
|
2342
|
+
* If not specified, uses the language profile's default roleMarker.
|
|
2343
|
+
*/
|
|
2344
|
+
readonly markerOverride?: Record<string, string>;
|
|
2345
|
+
}
|
|
2346
|
+
/**
|
|
2347
|
+
* A precondition that must be met before command execution.
|
|
2348
|
+
* Used for runtime error documentation.
|
|
2349
|
+
*/
|
|
2350
|
+
interface CommandPrecondition {
|
|
2351
|
+
/** Human-readable condition description */
|
|
2352
|
+
readonly condition: string;
|
|
2353
|
+
/** Error code thrown when precondition fails */
|
|
2354
|
+
readonly errorCode: string;
|
|
2355
|
+
/** Error message template */
|
|
2356
|
+
readonly message: string;
|
|
2357
|
+
}
|
|
2358
|
+
/**
|
|
2359
|
+
* A command schema defines the semantic structure of a command.
|
|
2360
|
+
*/
|
|
2361
|
+
interface CommandSchema {
|
|
2362
|
+
/** The action type (command name) */
|
|
2363
|
+
readonly action: ActionType;
|
|
2364
|
+
/** Human-readable description */
|
|
2365
|
+
readonly description: string;
|
|
2366
|
+
/** Roles this command accepts */
|
|
2367
|
+
readonly roles: RoleSpec[];
|
|
2368
|
+
/** The primary role (what the command acts on) */
|
|
2369
|
+
readonly primaryRole: SemanticRole;
|
|
2370
|
+
/** Category for grouping */
|
|
2371
|
+
readonly category: CommandCategory;
|
|
2372
|
+
/** Whether this command typically has a body (like event handlers) */
|
|
2373
|
+
readonly hasBody?: boolean;
|
|
2374
|
+
/** Notes about special handling */
|
|
2375
|
+
readonly notes?: string;
|
|
2376
|
+
/** Possible runtime error codes this command can throw */
|
|
2377
|
+
readonly errorCodes?: readonly string[];
|
|
2378
|
+
/** Preconditions that must be met before execution */
|
|
2379
|
+
readonly preconditions?: readonly CommandPrecondition[];
|
|
2380
|
+
/** Recovery hints mapping error code to suggestion */
|
|
2381
|
+
readonly recoveryHints?: Readonly<Record<string, string>>;
|
|
2382
|
+
}
|
|
2383
|
+
/**
|
|
2384
|
+
* Command categories for organization.
|
|
2385
|
+
*/
|
|
2386
|
+
type CommandCategory = 'dom-class' | 'dom-content' | 'dom-visibility' | 'variable' | 'event' | 'async' | 'navigation' | 'control-flow';
|
|
2387
|
+
/**
|
|
2388
|
+
* Toggle command: adds class/attribute if absent, removes if present.
|
|
2389
|
+
*
|
|
2390
|
+
* Patterns:
|
|
2391
|
+
* - EN: toggle .active on #button
|
|
2392
|
+
* - JA: #button の .active を 切り替え
|
|
2393
|
+
* - AR: بدّل .active على #button
|
|
2394
|
+
*/
|
|
2395
|
+
declare const toggleSchema: CommandSchema;
|
|
2396
|
+
/**
|
|
2397
|
+
* Add command: adds a class or attribute.
|
|
2398
|
+
*/
|
|
2399
|
+
declare const addSchema: CommandSchema;
|
|
2400
|
+
/**
|
|
2401
|
+
* Remove command: removes a class or attribute.
|
|
2402
|
+
*/
|
|
2403
|
+
declare const removeSchema: CommandSchema;
|
|
2404
|
+
/**
|
|
2405
|
+
* Put command: puts content into a target.
|
|
2406
|
+
*
|
|
2407
|
+
* Patterns:
|
|
2408
|
+
* - EN: put "hello" into #output
|
|
2409
|
+
* - JA: #output に "hello" を 置く
|
|
2410
|
+
* - AR: ضع "hello" في #output
|
|
2411
|
+
*/
|
|
2412
|
+
declare const putSchema: CommandSchema;
|
|
2413
|
+
/**
|
|
2414
|
+
* Set command: sets a property or variable.
|
|
2415
|
+
*
|
|
2416
|
+
* Patterns:
|
|
2417
|
+
* - EN: set :count to 10
|
|
2418
|
+
* - ES: establecer :count a 10
|
|
2419
|
+
* - JA: :count を 10 に 設定
|
|
2420
|
+
* - KO: :x 에 5 을 설정 (uses default markers)
|
|
2421
|
+
* - TR: :x e 5 i ayarla (uses default markers)
|
|
2422
|
+
*
|
|
2423
|
+
* Note: Only override markers for SVO languages where patient has no default marker.
|
|
2424
|
+
* SOV languages (Korean, Japanese, Turkish) already have correct object markers.
|
|
2425
|
+
*/
|
|
2426
|
+
declare const setSchema: CommandSchema;
|
|
2427
|
+
/**
|
|
2428
|
+
* Show command: makes an element visible.
|
|
2429
|
+
*/
|
|
2430
|
+
declare const showSchema: CommandSchema;
|
|
2431
|
+
/**
|
|
2432
|
+
* Hide command: makes an element invisible.
|
|
2433
|
+
*/
|
|
2434
|
+
declare const hideSchema: CommandSchema;
|
|
2435
|
+
/**
|
|
2436
|
+
* On command: event handler.
|
|
2437
|
+
*/
|
|
2438
|
+
declare const onSchema: CommandSchema;
|
|
2439
|
+
/**
|
|
2440
|
+
* Trigger command: dispatches an event.
|
|
2441
|
+
* Supports namespaced events like "draggable:start".
|
|
2442
|
+
*/
|
|
2443
|
+
declare const triggerSchema: CommandSchema;
|
|
2444
|
+
/**
|
|
2445
|
+
* Wait command: pauses execution.
|
|
2446
|
+
*/
|
|
2447
|
+
declare const waitSchema: CommandSchema;
|
|
2448
|
+
/**
|
|
2449
|
+
* Fetch command: makes HTTP request.
|
|
2450
|
+
*/
|
|
2451
|
+
declare const fetchSchema: CommandSchema;
|
|
2452
|
+
/**
|
|
2453
|
+
* Increment command: increases a numeric value.
|
|
2454
|
+
*/
|
|
2455
|
+
declare const incrementSchema: CommandSchema;
|
|
2456
|
+
/**
|
|
2457
|
+
* Decrement command: decreases a numeric value.
|
|
2458
|
+
*/
|
|
2459
|
+
declare const decrementSchema: CommandSchema;
|
|
2460
|
+
/**
|
|
2461
|
+
* Append command: appends content to an element.
|
|
2462
|
+
*/
|
|
2463
|
+
declare const appendSchema: CommandSchema;
|
|
2464
|
+
/**
|
|
2465
|
+
* Prepend command: prepends content to an element.
|
|
2466
|
+
*/
|
|
2467
|
+
declare const prependSchema: CommandSchema;
|
|
2468
|
+
/**
|
|
2469
|
+
* All available command schemas.
|
|
2470
|
+
*/
|
|
2471
|
+
declare const commandSchemas: Record<ActionType, CommandSchema>;
|
|
2472
|
+
/**
|
|
2473
|
+
* Get a command schema by action type.
|
|
2474
|
+
*/
|
|
2475
|
+
declare function getSchema$1(action: ActionType): CommandSchema | undefined;
|
|
2476
|
+
/**
|
|
2477
|
+
* Get all schemas for a category.
|
|
2478
|
+
*/
|
|
2479
|
+
declare function getSchemasByCategory(category: CommandCategory): CommandSchema[];
|
|
2480
|
+
/**
|
|
2481
|
+
* Get all fully-defined schemas (with roles).
|
|
2482
|
+
*/
|
|
2483
|
+
declare function getDefinedSchemas(): CommandSchema[];
|
|
2484
|
+
|
|
2485
|
+
/**
|
|
2486
|
+
* Pattern Generator
|
|
2487
|
+
*
|
|
2488
|
+
* Generates LanguagePattern objects from CommandSchema + LanguageProfile.
|
|
2489
|
+
* This solves the pattern explosion problem by deriving patterns from
|
|
2490
|
+
* high-level definitions rather than hand-writing each one.
|
|
2491
|
+
*/
|
|
2492
|
+
|
|
2493
|
+
/**
|
|
2494
|
+
* Configuration for pattern generation.
|
|
2495
|
+
*/
|
|
2496
|
+
interface GeneratorConfig {
|
|
2497
|
+
/** Base priority for generated patterns (higher = checked first) */
|
|
2498
|
+
basePriority?: number;
|
|
2499
|
+
/** Whether to generate simple patterns (without optional roles) */
|
|
2500
|
+
generateSimpleVariants?: boolean;
|
|
2501
|
+
/** Whether to generate alternative keyword patterns */
|
|
2502
|
+
generateAlternatives?: boolean;
|
|
2503
|
+
}
|
|
2504
|
+
/**
|
|
2505
|
+
* Generate a pattern for a command in a specific language.
|
|
2506
|
+
*/
|
|
2507
|
+
declare function generatePattern(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern;
|
|
2508
|
+
/**
|
|
2509
|
+
* Generate a simple variant pattern (without optional roles).
|
|
2510
|
+
*/
|
|
2511
|
+
declare function generateSimplePattern(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern | null;
|
|
2512
|
+
/**
|
|
2513
|
+
* Generate all pattern variants for a command in a language.
|
|
2514
|
+
*/
|
|
2515
|
+
declare function generatePatternVariants(schema: CommandSchema, profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern[];
|
|
2516
|
+
/**
|
|
2517
|
+
* Generate patterns for all commands in a specific language.
|
|
2518
|
+
*/
|
|
2519
|
+
declare function generatePatternsForLanguage(profile: LanguageProfile, config?: GeneratorConfig): LanguagePattern[];
|
|
2520
|
+
/**
|
|
2521
|
+
* Generate patterns for a command across specified profiles.
|
|
2522
|
+
*
|
|
2523
|
+
* @param schema Command schema to generate patterns for
|
|
2524
|
+
* @param profiles Array of language profiles to generate patterns for (defaults to all registered)
|
|
2525
|
+
* @param config Generator configuration
|
|
2526
|
+
*/
|
|
2527
|
+
declare function generatePatternsForCommand(schema: CommandSchema, profiles?: LanguageProfile[], config?: GeneratorConfig): LanguagePattern[];
|
|
2528
|
+
/**
|
|
2529
|
+
* Generate all patterns for all commands across specified profiles.
|
|
2530
|
+
*
|
|
2531
|
+
* @param profiles Array of language profiles to generate patterns for (defaults to all registered)
|
|
2532
|
+
* @param config Generator configuration
|
|
2533
|
+
*/
|
|
2534
|
+
declare function generateAllPatterns(profiles?: LanguageProfile[], config?: GeneratorConfig): LanguagePattern[];
|
|
2535
|
+
/**
|
|
2536
|
+
* Get a summary of what patterns can be generated.
|
|
2537
|
+
* Note: This requires the registry to have languages registered.
|
|
2538
|
+
*/
|
|
2539
|
+
declare function getGeneratorSummary(): {
|
|
2540
|
+
languages: string[];
|
|
2541
|
+
commands: string[];
|
|
2542
|
+
totalPatterns: number;
|
|
2543
|
+
};
|
|
2544
|
+
/**
|
|
2545
|
+
* Validate that all required keywords exist for a language.
|
|
2546
|
+
*/
|
|
2547
|
+
declare function validateLanguageKeywords(profile: LanguageProfile, schemas?: CommandSchema[]): {
|
|
2548
|
+
missing: string[];
|
|
2549
|
+
available: string[];
|
|
2550
|
+
};
|
|
2551
|
+
|
|
2552
|
+
/**
|
|
2553
|
+
* Per-Command Semantic Validation
|
|
2554
|
+
*
|
|
2555
|
+
* Validates semantic parse results against command schemas.
|
|
2556
|
+
* Ensures that role assignments match expected types and constraints.
|
|
2557
|
+
*
|
|
2558
|
+
* Design Philosophy:
|
|
2559
|
+
* - Command schemas are the source of truth for validation rules
|
|
2560
|
+
* - Validation happens AFTER pattern matching, BEFORE AST conversion
|
|
2561
|
+
* - Provides detailed error messages for debugging
|
|
2562
|
+
* - Supports confidence scoring for ambiguous parses
|
|
2563
|
+
*
|
|
2564
|
+
* Integration with core validators:
|
|
2565
|
+
* - Uses patterns from @lokascript/core's lightweight-validators where applicable
|
|
2566
|
+
* - Type validation follows same patterns as runtime validation
|
|
2567
|
+
* - Can be extended with custom validators using the same API
|
|
2568
|
+
*/
|
|
2569
|
+
|
|
2570
|
+
/**
|
|
2571
|
+
* Validation error with detailed context.
|
|
2572
|
+
*/
|
|
2573
|
+
interface ValidationError {
|
|
2574
|
+
/** Error code for programmatic handling */
|
|
2575
|
+
code: 'MISSING_REQUIRED_ROLE' | 'INVALID_TYPE' | 'UNKNOWN_ROLE' | 'CONSTRAINT_VIOLATION';
|
|
2576
|
+
/** Human-readable message */
|
|
2577
|
+
message: string;
|
|
2578
|
+
/** The role that failed validation */
|
|
2579
|
+
role?: SemanticRole;
|
|
2580
|
+
/** Expected types */
|
|
2581
|
+
expected?: string[];
|
|
2582
|
+
/** Actual value */
|
|
2583
|
+
actual?: SemanticValue;
|
|
2584
|
+
/** Severity: 'error' blocks execution, 'warning' is logged */
|
|
2585
|
+
severity: 'error' | 'warning';
|
|
2586
|
+
}
|
|
2587
|
+
/**
|
|
2588
|
+
* Result of command validation.
|
|
2589
|
+
*/
|
|
2590
|
+
interface ValidationResult {
|
|
2591
|
+
/** Whether validation passed */
|
|
2592
|
+
valid: boolean;
|
|
2593
|
+
/** Validation errors (if any) */
|
|
2594
|
+
errors: ValidationError[];
|
|
2595
|
+
/** Warnings (non-blocking issues) */
|
|
2596
|
+
warnings: ValidationError[];
|
|
2597
|
+
/** Confidence adjustment (-1 to +1) based on validation */
|
|
2598
|
+
confidenceAdjustment: number;
|
|
2599
|
+
/** Suggested fixes for errors */
|
|
2600
|
+
suggestions: string[];
|
|
2601
|
+
}
|
|
2602
|
+
/**
|
|
2603
|
+
* Maps action types to their schemas.
|
|
2604
|
+
*/
|
|
2605
|
+
declare const schemaRegistry: Map<ActionType, CommandSchema>;
|
|
2606
|
+
/**
|
|
2607
|
+
* Get schema for an action type.
|
|
2608
|
+
*/
|
|
2609
|
+
declare function getSchema(action: ActionType): CommandSchema | undefined;
|
|
2610
|
+
/**
|
|
2611
|
+
* Register a custom schema.
|
|
2612
|
+
*/
|
|
2613
|
+
declare function registerSchema(action: ActionType, schema: CommandSchema): void;
|
|
2614
|
+
/**
|
|
2615
|
+
* Validate a semantic parse result against its command schema.
|
|
2616
|
+
*
|
|
2617
|
+
* @param result - The semantic parse result to validate
|
|
2618
|
+
* @returns Validation result with errors, warnings, and confidence adjustment
|
|
2619
|
+
*/
|
|
2620
|
+
declare function validateSemanticResult(result: SemanticParseResult): ValidationResult;
|
|
2621
|
+
/**
|
|
2622
|
+
* Apply validation to a parse result and adjust confidence.
|
|
2623
|
+
*
|
|
2624
|
+
* @param result - The semantic parse result
|
|
2625
|
+
* @returns Updated result with adjusted confidence
|
|
2626
|
+
*/
|
|
2627
|
+
declare function validateAndAdjustConfidence(result: SemanticParseResult): SemanticParseResult & {
|
|
2628
|
+
validation: ValidationResult;
|
|
2629
|
+
};
|
|
2630
|
+
|
|
2631
|
+
/**
|
|
2632
|
+
* Unified Language Profile
|
|
2633
|
+
*
|
|
2634
|
+
* Combines parsing-focused features (from semantic) with generation-focused
|
|
2635
|
+
* features (from i18n) into a single profile structure.
|
|
2636
|
+
*
|
|
2637
|
+
* This enables:
|
|
2638
|
+
* - Single source of truth for language configuration
|
|
2639
|
+
* - Bidirectional conversion: parse (natural → semantic) and render (semantic → natural)
|
|
2640
|
+
* - Consistent language support across packages
|
|
2641
|
+
*/
|
|
2642
|
+
|
|
2643
|
+
/**
|
|
2644
|
+
* How grammatical relationships are marked (unified from both packages).
|
|
2645
|
+
*
|
|
2646
|
+
* Maps to i18n's AdpositionType:
|
|
2647
|
+
* - preposition → 'preposition'
|
|
2648
|
+
* - postposition → 'postposition'
|
|
2649
|
+
* - particle → 'postposition' (particles are typically postpositional)
|
|
2650
|
+
* - case-suffix → 'postposition' (suffixes attach after)
|
|
2651
|
+
*/
|
|
2652
|
+
type MarkingStrategy = 'preposition' | 'postposition' | 'particle' | 'case-suffix';
|
|
2653
|
+
/**
|
|
2654
|
+
* A grammatical marker for a semantic role.
|
|
2655
|
+
* Unified from both packages' marker types.
|
|
2656
|
+
*/
|
|
2657
|
+
interface UnifiedRoleMarker {
|
|
2658
|
+
/** Primary marker form */
|
|
2659
|
+
readonly primary: string;
|
|
2660
|
+
/** Alternative forms (conjugations, vowel harmony variants) */
|
|
2661
|
+
readonly alternatives?: string[];
|
|
2662
|
+
/** Position relative to the role value */
|
|
2663
|
+
readonly position: 'before' | 'after';
|
|
2664
|
+
/** Whether this marker is required */
|
|
2665
|
+
readonly required?: boolean;
|
|
2666
|
+
}
|
|
2667
|
+
/**
|
|
2668
|
+
* Verb form configuration for a language.
|
|
2669
|
+
*/
|
|
2670
|
+
interface VerbConfig {
|
|
2671
|
+
/** Position of verb in the sentence */
|
|
2672
|
+
readonly position: 'start' | 'end' | 'second';
|
|
2673
|
+
/** Common verb suffixes/conjugations to recognize */
|
|
2674
|
+
readonly suffixes?: string[];
|
|
2675
|
+
/** Whether the language commonly drops subjects */
|
|
2676
|
+
readonly subjectDrop?: boolean;
|
|
2677
|
+
}
|
|
2678
|
+
/**
|
|
2679
|
+
* Translation of a command keyword.
|
|
2680
|
+
*/
|
|
2681
|
+
interface KeywordTranslation {
|
|
2682
|
+
/** Primary translation */
|
|
2683
|
+
readonly primary: string;
|
|
2684
|
+
/** Alternative forms (conjugations, synonyms) */
|
|
2685
|
+
readonly alternatives?: string[];
|
|
2686
|
+
/** Normalized form for matching */
|
|
2687
|
+
readonly normalized?: string;
|
|
2688
|
+
}
|
|
2689
|
+
/**
|
|
2690
|
+
* Special tokenization configuration.
|
|
2691
|
+
*/
|
|
2692
|
+
interface TokenizationConfig {
|
|
2693
|
+
/** Particles to recognize (for particle languages) */
|
|
2694
|
+
readonly particles?: string[];
|
|
2695
|
+
/** Prefixes to recognize (for prefixing languages) */
|
|
2696
|
+
readonly prefixes?: string[];
|
|
2697
|
+
/** Word boundary detection strategy */
|
|
2698
|
+
readonly boundaryStrategy?: 'space' | 'particle' | 'character';
|
|
2699
|
+
}
|
|
2700
|
+
/**
|
|
2701
|
+
* Unified Language Profile
|
|
2702
|
+
*
|
|
2703
|
+
* Combines all fields needed for both parsing and generation:
|
|
2704
|
+
*
|
|
2705
|
+
* **Shared fields:**
|
|
2706
|
+
* - code, name, nativeName, direction, wordOrder
|
|
2707
|
+
*
|
|
2708
|
+
* **Parsing fields (from semantic):**
|
|
2709
|
+
* - keywords: Command keyword translations
|
|
2710
|
+
* - verb: Verb position and conjugation info
|
|
2711
|
+
* - tokenization: Language-specific tokenization
|
|
2712
|
+
* - usesSpaces: Word boundary info
|
|
2713
|
+
*
|
|
2714
|
+
* **Generation fields (from i18n):**
|
|
2715
|
+
* - morphology: Morphological type for transformation
|
|
2716
|
+
* - canonicalOrder: Role ordering for output
|
|
2717
|
+
* - markers: Grammatical markers for roles
|
|
2718
|
+
*/
|
|
2719
|
+
interface UnifiedLanguageProfile {
|
|
2720
|
+
/** ISO 639-1 language code */
|
|
2721
|
+
readonly code: string;
|
|
2722
|
+
/** Human-readable language name */
|
|
2723
|
+
readonly name: string;
|
|
2724
|
+
/** Native language name */
|
|
2725
|
+
readonly nativeName: string;
|
|
2726
|
+
/** Text direction */
|
|
2727
|
+
readonly direction: 'ltr' | 'rtl';
|
|
2728
|
+
/** Primary word order (SVO, SOV, VSO, etc.) */
|
|
2729
|
+
readonly wordOrder: WordOrder$1;
|
|
2730
|
+
/** How grammatical roles are marked */
|
|
2731
|
+
readonly markingStrategy: MarkingStrategy;
|
|
2732
|
+
/** Morphological typology */
|
|
2733
|
+
readonly morphology: MorphologyType;
|
|
2734
|
+
/** Whether the language uses spaces between words */
|
|
2735
|
+
readonly usesSpaces: boolean;
|
|
2736
|
+
/** Markers for each semantic role */
|
|
2737
|
+
readonly roleMarkers: Partial<Record<SemanticRole, UnifiedRoleMarker>>;
|
|
2738
|
+
/** Canonical role order for generation */
|
|
2739
|
+
readonly canonicalOrder: SemanticRole[];
|
|
2740
|
+
/** Verb configuration */
|
|
2741
|
+
readonly verb: VerbConfig;
|
|
2742
|
+
/** Command keyword translations */
|
|
2743
|
+
readonly keywords: Record<string, KeywordTranslation>;
|
|
2744
|
+
/** Special tokenization configuration */
|
|
2745
|
+
readonly tokenization?: TokenizationConfig;
|
|
2746
|
+
/** Special transformation rules */
|
|
2747
|
+
readonly rules?: GrammarRule[];
|
|
2748
|
+
}
|
|
2749
|
+
/**
|
|
2750
|
+
* Grammar rule for special transformations.
|
|
2751
|
+
* (Imported from i18n for consistency)
|
|
2752
|
+
*/
|
|
2753
|
+
interface GrammarRule {
|
|
2754
|
+
name: string;
|
|
2755
|
+
description: string;
|
|
2756
|
+
match: PatternMatcher;
|
|
2757
|
+
transform: PatternTransform;
|
|
2758
|
+
priority: number;
|
|
2759
|
+
}
|
|
2760
|
+
interface PatternMatcher {
|
|
2761
|
+
commands?: string[];
|
|
2762
|
+
requiredRoles: SemanticRole[];
|
|
2763
|
+
optionalRoles?: SemanticRole[];
|
|
2764
|
+
predicate?: (parsed: unknown) => boolean;
|
|
2765
|
+
}
|
|
2766
|
+
interface PatternTransform {
|
|
2767
|
+
roleOrder: SemanticRole[];
|
|
2768
|
+
insertMarkers?: boolean;
|
|
2769
|
+
custom?: (parsed: unknown, profile: UnifiedLanguageProfile) => string;
|
|
2770
|
+
}
|
|
2771
|
+
/**
|
|
2772
|
+
* Convert marking strategy to AdpositionType for i18n compatibility.
|
|
2773
|
+
*/
|
|
2774
|
+
declare function markingStrategyToAdpositionType(strategy: MarkingStrategy): AdpositionType;
|
|
2775
|
+
/**
|
|
2776
|
+
* Convert UnifiedRoleMarker to GrammaticalMarker for i18n compatibility.
|
|
2777
|
+
*/
|
|
2778
|
+
declare function toGrammaticalMarker(role: SemanticRole, marker: UnifiedRoleMarker, strategy: MarkingStrategy): GrammaticalMarker;
|
|
2779
|
+
/**
|
|
2780
|
+
* Convert UnifiedLanguageProfile to i18n LanguageProfile.
|
|
2781
|
+
* This enables using unified profiles with existing i18n code.
|
|
2782
|
+
*/
|
|
2783
|
+
declare function toI18nProfile(unified: UnifiedLanguageProfile): {
|
|
2784
|
+
code: string;
|
|
2785
|
+
name: string;
|
|
2786
|
+
wordOrder: WordOrder$1;
|
|
2787
|
+
adpositionType: AdpositionType;
|
|
2788
|
+
morphology: MorphologyType;
|
|
2789
|
+
direction: 'ltr' | 'rtl';
|
|
2790
|
+
markers: GrammaticalMarker[];
|
|
2791
|
+
canonicalOrder: SemanticRole[];
|
|
2792
|
+
};
|
|
2793
|
+
/**
|
|
2794
|
+
* Check if an object is a UnifiedLanguageProfile.
|
|
2795
|
+
*/
|
|
2796
|
+
declare function isUnifiedProfile(obj: unknown): obj is UnifiedLanguageProfile;
|
|
2797
|
+
|
|
2798
|
+
/**
|
|
2799
|
+
* Semantic Static Analysis
|
|
2800
|
+
*
|
|
2801
|
+
* Analyzes semantic nodes for potential issues:
|
|
2802
|
+
* - Conflicting actions on same trigger
|
|
2803
|
+
* - Accessibility problems (hover-only interactions)
|
|
2804
|
+
* - Performance concerns (high-frequency triggers)
|
|
2805
|
+
* - Invalid role combinations
|
|
2806
|
+
*
|
|
2807
|
+
* Can be used:
|
|
2808
|
+
* - Standalone: analyze(input, lang)
|
|
2809
|
+
* - Dev mode: Enabled via config, auto-warns on parse
|
|
2810
|
+
* - Build time: Integrate with bundlers
|
|
2811
|
+
*/
|
|
2812
|
+
|
|
2813
|
+
type WarningSeverity = 'error' | 'warning' | 'info';
|
|
2814
|
+
type WarningCode = 'HOVER_ONLY_INTERACTION' | 'HIGH_FREQUENCY_TRIGGER' | 'MISSING_REQUIRED_ROLE' | 'INVALID_ROLE_FOR_COMMAND' | 'CONFLICTING_ACTIONS' | 'UNREACHABLE_BEHAVIOR' | 'POTENTIAL_RACE_CONDITION';
|
|
2815
|
+
interface AnalysisWarning {
|
|
2816
|
+
code: WarningCode;
|
|
2817
|
+
severity: WarningSeverity;
|
|
2818
|
+
message: string;
|
|
2819
|
+
suggestion?: string;
|
|
2820
|
+
location?: {
|
|
2821
|
+
input: string;
|
|
2822
|
+
role?: SemanticRole;
|
|
2823
|
+
};
|
|
2824
|
+
}
|
|
2825
|
+
interface AnalysisResult {
|
|
2826
|
+
valid: boolean;
|
|
2827
|
+
warnings: AnalysisWarning[];
|
|
2828
|
+
node: SemanticNode | null;
|
|
2829
|
+
}
|
|
2830
|
+
interface AnalysisConfig {
|
|
2831
|
+
/** Enable accessibility checks (default: true) */
|
|
2832
|
+
accessibility?: boolean;
|
|
2833
|
+
/** Enable performance checks (default: true) */
|
|
2834
|
+
performance?: boolean;
|
|
2835
|
+
/** Enable schema validation (default: true) */
|
|
2836
|
+
schema?: boolean;
|
|
2837
|
+
/** Treat warnings as errors (default: false) */
|
|
2838
|
+
strict?: boolean;
|
|
2839
|
+
}
|
|
2840
|
+
/**
|
|
2841
|
+
* Check for hover-only interactions (accessibility issue).
|
|
2842
|
+
*/
|
|
2843
|
+
declare function checkAccessibility(node: SemanticNode, input: string): AnalysisWarning[];
|
|
2844
|
+
/**
|
|
2845
|
+
* Check for high-frequency events without throttling.
|
|
2846
|
+
*/
|
|
2847
|
+
declare function checkPerformance(node: SemanticNode, input: string): AnalysisWarning[];
|
|
2848
|
+
/**
|
|
2849
|
+
* Validate role combinations against command schema.
|
|
2850
|
+
*/
|
|
2851
|
+
declare function checkSchema(node: SemanticNode, input: string): AnalysisWarning[];
|
|
2852
|
+
/**
|
|
2853
|
+
* Analyze multiple nodes together to detect conflicts.
|
|
2854
|
+
*/
|
|
2855
|
+
declare function analyzeMultiple(nodes: SemanticNode[], _config?: AnalysisConfig): AnalysisWarning[];
|
|
2856
|
+
/**
|
|
2857
|
+
* Analyze a single hyperscript input for potential issues.
|
|
2858
|
+
*
|
|
2859
|
+
* @param input - The hyperscript text to analyze
|
|
2860
|
+
* @param lang - The language of the input (default: 'en')
|
|
2861
|
+
* @param config - Analysis configuration
|
|
2862
|
+
* @returns Analysis result with warnings
|
|
2863
|
+
*
|
|
2864
|
+
* @example
|
|
2865
|
+
* ```typescript
|
|
2866
|
+
* const result = analyze('on hover show .tooltip', 'en');
|
|
2867
|
+
* // result.warnings[0].code === 'HOVER_ONLY_INTERACTION'
|
|
2868
|
+
* ```
|
|
2869
|
+
*/
|
|
2870
|
+
declare function analyze(input: string, lang?: string, config?: AnalysisConfig): AnalysisResult;
|
|
2871
|
+
/**
|
|
2872
|
+
* Analyze multiple hyperscript inputs together.
|
|
2873
|
+
*
|
|
2874
|
+
* @param inputs - Array of hyperscript texts
|
|
2875
|
+
* @param lang - The language of the inputs
|
|
2876
|
+
* @param config - Analysis configuration
|
|
2877
|
+
* @returns Combined analysis result
|
|
2878
|
+
*/
|
|
2879
|
+
declare function analyzeAll(inputs: string[], lang?: string, config?: AnalysisConfig): AnalysisResult;
|
|
2880
|
+
/**
|
|
2881
|
+
* Enable dev mode analysis.
|
|
2882
|
+
* When enabled, every parse() call will run analysis and log warnings.
|
|
2883
|
+
*/
|
|
2884
|
+
declare function enableDevMode(config?: AnalysisConfig): void;
|
|
2885
|
+
/**
|
|
2886
|
+
* Disable dev mode analysis.
|
|
2887
|
+
*/
|
|
2888
|
+
declare function disableDevMode(): void;
|
|
2889
|
+
/**
|
|
2890
|
+
* Check if dev mode is enabled.
|
|
2891
|
+
*/
|
|
2892
|
+
declare function isDevModeEnabled(): boolean;
|
|
2893
|
+
/**
|
|
2894
|
+
* Get current dev mode config.
|
|
2895
|
+
*/
|
|
2896
|
+
declare function getDevModeConfig(): AnalysisConfig;
|
|
2897
|
+
/**
|
|
2898
|
+
* Run dev mode analysis if enabled.
|
|
2899
|
+
* Called internally by parser when dev mode is on.
|
|
2900
|
+
*/
|
|
2901
|
+
declare function devModeAnalyze(input: string, lang: string, node: SemanticNode | null): void;
|
|
2902
|
+
|
|
2903
|
+
/**
|
|
2904
|
+
* Expression Parser Types
|
|
2905
|
+
*
|
|
2906
|
+
* Defines AST node types for expressions that can be shared between
|
|
2907
|
+
* the semantic package (AST building) and core package (runtime).
|
|
2908
|
+
*
|
|
2909
|
+
* These types are intentionally minimal and focused on expressions only.
|
|
2910
|
+
*/
|
|
2911
|
+
/**
|
|
2912
|
+
* Base interface for all expression AST nodes
|
|
2913
|
+
*/
|
|
2914
|
+
interface ExpressionNode {
|
|
2915
|
+
readonly type: string;
|
|
2916
|
+
readonly start?: number | undefined;
|
|
2917
|
+
readonly end?: number | undefined;
|
|
2918
|
+
readonly line?: number | undefined;
|
|
2919
|
+
readonly column?: number | undefined;
|
|
2920
|
+
}
|
|
2921
|
+
interface LiteralNode extends ExpressionNode {
|
|
2922
|
+
readonly type: 'literal';
|
|
2923
|
+
readonly value: string | number | boolean | null | undefined;
|
|
2924
|
+
readonly raw?: string | undefined;
|
|
2925
|
+
readonly dataType?: 'string' | 'number' | 'boolean' | 'null' | 'undefined' | 'duration' | undefined;
|
|
2926
|
+
}
|
|
2927
|
+
type SelectorKind = 'id' | 'class' | 'attribute' | 'element' | 'query' | 'complex';
|
|
2928
|
+
interface SelectorNode extends ExpressionNode {
|
|
2929
|
+
readonly type: 'selector' | 'cssSelector' | 'idRef' | 'classRef';
|
|
2930
|
+
readonly value?: string;
|
|
2931
|
+
readonly selector?: string;
|
|
2932
|
+
readonly selectorType?: SelectorKind;
|
|
2933
|
+
}
|
|
2934
|
+
type ContextType = 'me' | 'you' | 'it' | 'its' | 'my' | 'your' | 'result' | 'event' | 'target' | 'body' | 'detail';
|
|
2935
|
+
interface ContextReferenceNode extends ExpressionNode {
|
|
2936
|
+
readonly type: 'contextReference' | 'symbol';
|
|
2937
|
+
readonly contextType?: ContextType;
|
|
2938
|
+
readonly name?: string;
|
|
2939
|
+
}
|
|
2940
|
+
interface PropertyAccessNode extends ExpressionNode {
|
|
2941
|
+
readonly type: 'propertyAccess';
|
|
2942
|
+
readonly object: ExpressionNode;
|
|
2943
|
+
readonly property: string;
|
|
2944
|
+
}
|
|
2945
|
+
|
|
2946
|
+
/**
|
|
2947
|
+
* Semantic Value to AST Node Converters
|
|
2948
|
+
*
|
|
2949
|
+
* Converts SemanticValue types to AST expression nodes.
|
|
2950
|
+
* Used by the AST builder to construct expression trees from semantic parsing results.
|
|
2951
|
+
*/
|
|
2952
|
+
|
|
2953
|
+
/**
|
|
2954
|
+
* Convert a SemanticValue to an AST ExpressionNode.
|
|
2955
|
+
*
|
|
2956
|
+
* @param value - The semantic value to convert
|
|
2957
|
+
* @param warnings - Optional array to collect warnings about potentially incorrect type choices
|
|
2958
|
+
* @returns The corresponding AST expression node
|
|
2959
|
+
*/
|
|
2960
|
+
declare function convertValue(value: SemanticValue, warnings?: string[]): ExpressionNode;
|
|
2961
|
+
/**
|
|
2962
|
+
* Convert a LiteralValue to a LiteralNode.
|
|
2963
|
+
*/
|
|
2964
|
+
declare function convertLiteral(value: LiteralValue): LiteralNode;
|
|
2965
|
+
/**
|
|
2966
|
+
* Convert a SelectorValue to a SelectorNode.
|
|
2967
|
+
*
|
|
2968
|
+
* @param value - The selector value to convert
|
|
2969
|
+
* @param warnings - Optional array to collect warnings
|
|
2970
|
+
*/
|
|
2971
|
+
declare function convertSelector(value: SelectorValue, warnings?: string[]): SelectorNode;
|
|
2972
|
+
/**
|
|
2973
|
+
* Convert a ReferenceValue to a ContextReferenceNode.
|
|
2974
|
+
*/
|
|
2975
|
+
declare function convertReference(value: ReferenceValue): ContextReferenceNode;
|
|
2976
|
+
/**
|
|
2977
|
+
* Convert a PropertyPathValue to a PropertyAccessNode.
|
|
2978
|
+
* Recursively converts the object part.
|
|
2979
|
+
*
|
|
2980
|
+
* @param value - The property path value to convert
|
|
2981
|
+
* @param warnings - Optional array to collect warnings
|
|
2982
|
+
*/
|
|
2983
|
+
declare function convertPropertyPath(value: PropertyPathValue, warnings?: string[]): PropertyAccessNode;
|
|
2984
|
+
/**
|
|
2985
|
+
* Convert an ExpressionValue (raw string) by parsing it with the expression parser.
|
|
2986
|
+
* This is the fallback for complex expressions that couldn't be fully parsed
|
|
2987
|
+
* at the semantic level.
|
|
2988
|
+
*/
|
|
2989
|
+
declare function convertExpression(value: ExpressionValue): ExpressionNode;
|
|
2990
|
+
|
|
2991
|
+
/**
|
|
2992
|
+
* Command-specific AST Mappers
|
|
2993
|
+
*
|
|
2994
|
+
* Each command can have a custom mapper that knows how to convert
|
|
2995
|
+
* its semantic roles to the appropriate AST structure.
|
|
2996
|
+
*/
|
|
2997
|
+
|
|
2998
|
+
/**
|
|
2999
|
+
* Result from command mapping, including the AST and any warnings.
|
|
3000
|
+
*/
|
|
3001
|
+
interface CommandMapperResult {
|
|
3002
|
+
ast: CommandNode;
|
|
3003
|
+
warnings: string[];
|
|
3004
|
+
}
|
|
3005
|
+
/**
|
|
3006
|
+
* Interface for command-specific AST mappers.
|
|
3007
|
+
*/
|
|
3008
|
+
interface CommandMapper {
|
|
3009
|
+
/**
|
|
3010
|
+
* The action type this mapper handles.
|
|
3011
|
+
*/
|
|
3012
|
+
readonly action: ActionType;
|
|
3013
|
+
/**
|
|
3014
|
+
* Convert a CommandSemanticNode to a CommandNode.
|
|
3015
|
+
*
|
|
3016
|
+
* @param node - The semantic command node
|
|
3017
|
+
* @param builder - The AST builder (for recursive building if needed)
|
|
3018
|
+
* @returns The AST command node with any warnings, or just the AST node for backward compatibility
|
|
3019
|
+
*/
|
|
3020
|
+
toAST(node: CommandSemanticNode, builder: ASTBuilder): CommandMapperResult | CommandNode;
|
|
3021
|
+
}
|
|
3022
|
+
/**
|
|
3023
|
+
* Get the command mapper for an action type.
|
|
3024
|
+
*
|
|
3025
|
+
* @param action - The action type
|
|
3026
|
+
* @returns The mapper, or undefined if no specific mapper exists
|
|
3027
|
+
*/
|
|
3028
|
+
declare function getCommandMapper(action: ActionType): CommandMapper | undefined;
|
|
3029
|
+
/**
|
|
3030
|
+
* Register a custom command mapper.
|
|
3031
|
+
*
|
|
3032
|
+
* @param mapper - The command mapper to register
|
|
3033
|
+
*/
|
|
3034
|
+
declare function registerCommandMapper(mapper: CommandMapper): void;
|
|
3035
|
+
/**
|
|
3036
|
+
* Get all registered command mappers.
|
|
3037
|
+
*/
|
|
3038
|
+
declare function getRegisteredMappers(): Map<ActionType, CommandMapper>;
|
|
3039
|
+
|
|
3040
|
+
/**
|
|
3041
|
+
* Semantic to AST Builder
|
|
3042
|
+
*
|
|
3043
|
+
* Converts SemanticNodes directly to AST nodes, bypassing the English text
|
|
3044
|
+
* generation and re-parsing step.
|
|
3045
|
+
*
|
|
3046
|
+
* Flow:
|
|
3047
|
+
* Japanese → Semantic Parser → SemanticNode → AST Builder → AST
|
|
3048
|
+
*
|
|
3049
|
+
* Instead of:
|
|
3050
|
+
* Japanese → Semantic Parser → SemanticNode → English Text → Parser → AST
|
|
3051
|
+
*/
|
|
3052
|
+
|
|
3053
|
+
/**
|
|
3054
|
+
* Base AST node interface
|
|
3055
|
+
*/
|
|
3056
|
+
interface ASTNode {
|
|
3057
|
+
readonly type: string;
|
|
3058
|
+
readonly start?: number;
|
|
3059
|
+
readonly end?: number;
|
|
3060
|
+
readonly line?: number;
|
|
3061
|
+
readonly column?: number;
|
|
3062
|
+
[key: string]: unknown;
|
|
3063
|
+
}
|
|
3064
|
+
/**
|
|
3065
|
+
* Command AST node
|
|
3066
|
+
*/
|
|
3067
|
+
interface CommandNode extends ASTNode {
|
|
3068
|
+
readonly type: 'command';
|
|
3069
|
+
readonly name: string;
|
|
3070
|
+
readonly args: ExpressionNode[];
|
|
3071
|
+
readonly modifiers?: Record<string, ExpressionNode>;
|
|
3072
|
+
readonly isBlocking?: boolean;
|
|
3073
|
+
readonly implicitTarget?: ExpressionNode;
|
|
3074
|
+
}
|
|
3075
|
+
/**
|
|
3076
|
+
* Event handler AST node (compatible with @lokascript/core)
|
|
3077
|
+
*/
|
|
3078
|
+
interface EventHandlerNode extends ASTNode {
|
|
3079
|
+
readonly type: 'eventHandler';
|
|
3080
|
+
/** Primary event name */
|
|
3081
|
+
readonly event: string;
|
|
3082
|
+
/** All event names when using "on event1 or event2" syntax */
|
|
3083
|
+
readonly events?: string[];
|
|
3084
|
+
/** CSS selector for event delegation ("from" keyword) */
|
|
3085
|
+
readonly selector?: string;
|
|
3086
|
+
/** Target for "from" clause (as string or expression) */
|
|
3087
|
+
readonly target?: string;
|
|
3088
|
+
/** Optional event condition ("[condition]" syntax) */
|
|
3089
|
+
readonly condition?: ASTNode;
|
|
3090
|
+
/** Attribute name for mutation events ("of @attribute" syntax) */
|
|
3091
|
+
readonly attributeName?: string;
|
|
3092
|
+
/** Target element to watch for changes ("in <target>" syntax) */
|
|
3093
|
+
readonly watchTarget?: ExpressionNode;
|
|
3094
|
+
/** Event parameter names to destructure (e.g., ['clientX', 'clientY']) */
|
|
3095
|
+
readonly args?: string[];
|
|
3096
|
+
/** Event parameters (alias for args) */
|
|
3097
|
+
readonly params?: string[];
|
|
3098
|
+
/** Handler commands */
|
|
3099
|
+
readonly commands: ASTNode[];
|
|
3100
|
+
}
|
|
3101
|
+
/**
|
|
3102
|
+
* Conditional AST node (if/else)
|
|
3103
|
+
*
|
|
3104
|
+
* Note: For runtime compatibility, buildConditional() now produces a CommandNode
|
|
3105
|
+
* with condition and branches as args, matching what IfCommand expects.
|
|
3106
|
+
* This interface is retained for reference but not used as output.
|
|
3107
|
+
*/
|
|
3108
|
+
interface ConditionalNode extends ASTNode {
|
|
3109
|
+
readonly type: 'if';
|
|
3110
|
+
readonly condition: ExpressionNode;
|
|
3111
|
+
readonly thenBranch: ASTNode[];
|
|
3112
|
+
readonly elseBranch?: ASTNode[];
|
|
3113
|
+
}
|
|
3114
|
+
/**
|
|
3115
|
+
* Command sequence node (runtime-compatible format for chained commands)
|
|
3116
|
+
*/
|
|
3117
|
+
interface CommandSequenceNode extends ASTNode {
|
|
3118
|
+
readonly type: 'CommandSequence';
|
|
3119
|
+
/** Commands in the sequence */
|
|
3120
|
+
readonly commands: ASTNode[];
|
|
3121
|
+
}
|
|
3122
|
+
/**
|
|
3123
|
+
* Block node (for grouping commands)
|
|
3124
|
+
*/
|
|
3125
|
+
interface BlockNode extends ASTNode {
|
|
3126
|
+
readonly type: 'block';
|
|
3127
|
+
readonly commands: ASTNode[];
|
|
3128
|
+
}
|
|
3129
|
+
interface ASTBuilderOptions {
|
|
3130
|
+
/**
|
|
3131
|
+
* Fallback function to parse complex expressions that can't be handled
|
|
3132
|
+
* directly by the AST builder. Uses the expression-parser by default.
|
|
3133
|
+
*/
|
|
3134
|
+
parseExpression?: (input: string) => ExpressionNode | null;
|
|
3135
|
+
}
|
|
3136
|
+
/**
|
|
3137
|
+
* Builds AST nodes directly from SemanticNodes.
|
|
3138
|
+
*/
|
|
3139
|
+
declare class ASTBuilder {
|
|
3140
|
+
/**
|
|
3141
|
+
* Warnings collected during AST building (e.g., type inference issues).
|
|
3142
|
+
*/
|
|
3143
|
+
warnings: string[];
|
|
3144
|
+
constructor(_options?: ASTBuilderOptions);
|
|
3145
|
+
/**
|
|
3146
|
+
* Build an AST from a SemanticNode.
|
|
3147
|
+
*
|
|
3148
|
+
* @param node - The semantic node to convert
|
|
3149
|
+
* @returns The corresponding AST node
|
|
3150
|
+
*/
|
|
3151
|
+
build(node: SemanticNode): ASTNode;
|
|
3152
|
+
/**
|
|
3153
|
+
* Build a CommandNode from a CommandSemanticNode.
|
|
3154
|
+
*/
|
|
3155
|
+
private buildCommand;
|
|
3156
|
+
/**
|
|
3157
|
+
* Generic command builder when no specific mapper is available.
|
|
3158
|
+
* Maps roles to args in a predictable order.
|
|
3159
|
+
*/
|
|
3160
|
+
private buildGenericCommand;
|
|
3161
|
+
/**
|
|
3162
|
+
* Map semantic roles to hyperscript modifier keywords.
|
|
3163
|
+
*/
|
|
3164
|
+
private roleToModifierKey;
|
|
3165
|
+
/**
|
|
3166
|
+
* Build an EventHandlerNode from an EventHandlerSemanticNode.
|
|
3167
|
+
*/
|
|
3168
|
+
private buildEventHandler;
|
|
3169
|
+
/**
|
|
3170
|
+
* Build a CommandNode from a ConditionalSemanticNode.
|
|
3171
|
+
*
|
|
3172
|
+
* Produces a command node with:
|
|
3173
|
+
* - args[0]: condition expression
|
|
3174
|
+
* - args[1]: then block (wrapped in { type: 'block', commands: [...] })
|
|
3175
|
+
* - args[2]: else block (optional, same format)
|
|
3176
|
+
*
|
|
3177
|
+
* This format matches what IfCommand.parseInput() expects.
|
|
3178
|
+
*/
|
|
3179
|
+
private buildConditional;
|
|
3180
|
+
/**
|
|
3181
|
+
* Build AST nodes from a CompoundSemanticNode.
|
|
3182
|
+
*
|
|
3183
|
+
* Converts to CommandSequence for runtime compatibility.
|
|
3184
|
+
* The runtime recognizes 'CommandSequence' type and executes commands in order.
|
|
3185
|
+
*/
|
|
3186
|
+
private buildCompound;
|
|
3187
|
+
/**
|
|
3188
|
+
* Build a CommandNode from a LoopSemanticNode.
|
|
3189
|
+
*
|
|
3190
|
+
* Produces a 'repeat' command with:
|
|
3191
|
+
* - args[0]: loop type identifier (forever, times, for, while, until)
|
|
3192
|
+
* - args[1]: count/condition/variable depending on loop type
|
|
3193
|
+
* - args[2]: collection (for 'for' loops)
|
|
3194
|
+
* - args[last]: body block
|
|
3195
|
+
*
|
|
3196
|
+
* This format matches what the repeat command parser produces.
|
|
3197
|
+
*/
|
|
3198
|
+
private buildLoop;
|
|
3199
|
+
/**
|
|
3200
|
+
* Build a BlockNode from an array of semantic nodes.
|
|
3201
|
+
* Useful for grouping commands in if/else branches.
|
|
3202
|
+
*/
|
|
3203
|
+
buildBlock(nodes: SemanticNode[]): BlockNode;
|
|
3204
|
+
}
|
|
3205
|
+
/**
|
|
3206
|
+
* Result from building an AST, including any warnings.
|
|
3207
|
+
*/
|
|
3208
|
+
interface BuildASTResult {
|
|
3209
|
+
ast: ASTNode;
|
|
3210
|
+
warnings: string[];
|
|
3211
|
+
}
|
|
3212
|
+
/**
|
|
3213
|
+
* Build an AST from a SemanticNode using default options.
|
|
3214
|
+
*
|
|
3215
|
+
* @param node - The semantic node to convert
|
|
3216
|
+
* @returns The corresponding AST node and any warnings
|
|
3217
|
+
*/
|
|
3218
|
+
declare function buildAST(node: SemanticNode): BuildASTResult;
|
|
3219
|
+
|
|
3220
|
+
/**
|
|
3221
|
+
* Language Loader
|
|
3222
|
+
*
|
|
3223
|
+
* Provides lazy loading capabilities for language modules.
|
|
3224
|
+
* Languages can be loaded:
|
|
3225
|
+
* 1. Via dynamic import from package subpath
|
|
3226
|
+
* 2. From a URL (for CDN usage)
|
|
3227
|
+
* 3. From a pre-loaded module object
|
|
3228
|
+
*
|
|
3229
|
+
* @example
|
|
3230
|
+
* ```typescript
|
|
3231
|
+
* import { loadLanguage, parse } from '@lokascript/semantic/browser/lazy';
|
|
3232
|
+
*
|
|
3233
|
+
* // Load Japanese on demand
|
|
3234
|
+
* await loadLanguage('ja');
|
|
3235
|
+
*
|
|
3236
|
+
* // Now parsing works for Japanese
|
|
3237
|
+
* parse('トグル .active', 'ja');
|
|
3238
|
+
* ```
|
|
3239
|
+
*/
|
|
3240
|
+
|
|
3241
|
+
/**
|
|
3242
|
+
* Options for loading a language.
|
|
3243
|
+
*/
|
|
3244
|
+
interface LoadLanguageOptions {
|
|
3245
|
+
/**
|
|
3246
|
+
* URL to fetch the language module from (for CDN usage).
|
|
3247
|
+
* The module should export: tokenizer, profile, patterns (or buildPatterns)
|
|
3248
|
+
*/
|
|
3249
|
+
url?: string;
|
|
3250
|
+
/**
|
|
3251
|
+
* Pre-loaded module object.
|
|
3252
|
+
* Use this if you've already imported the module.
|
|
3253
|
+
*/
|
|
3254
|
+
module?: LanguageModule;
|
|
3255
|
+
/**
|
|
3256
|
+
* Skip loading if the language is already registered.
|
|
3257
|
+
* Defaults to true.
|
|
3258
|
+
*/
|
|
3259
|
+
skipIfRegistered?: boolean;
|
|
3260
|
+
}
|
|
3261
|
+
/**
|
|
3262
|
+
* A language module that can be registered with the semantic parser.
|
|
3263
|
+
*/
|
|
3264
|
+
interface LanguageModule {
|
|
3265
|
+
/** The language tokenizer */
|
|
3266
|
+
tokenizer: LanguageTokenizer;
|
|
3267
|
+
/** The language profile for pattern generation */
|
|
3268
|
+
profile: LanguageProfile;
|
|
3269
|
+
/** Pre-built patterns (optional) */
|
|
3270
|
+
patterns?: LanguagePattern[];
|
|
3271
|
+
/** Function to build patterns lazily (optional) */
|
|
3272
|
+
buildPatterns?: () => LanguagePattern[];
|
|
3273
|
+
}
|
|
3274
|
+
/**
|
|
3275
|
+
* Result of a language loading operation.
|
|
3276
|
+
*/
|
|
3277
|
+
interface LoadLanguageResult {
|
|
3278
|
+
/** The language code that was loaded */
|
|
3279
|
+
code: string;
|
|
3280
|
+
/** Whether the language was newly loaded (false if already registered) */
|
|
3281
|
+
loaded: boolean;
|
|
3282
|
+
/** Error message if loading failed */
|
|
3283
|
+
error?: string;
|
|
3284
|
+
}
|
|
3285
|
+
/**
|
|
3286
|
+
* List of all supported language codes.
|
|
3287
|
+
*/
|
|
3288
|
+
declare const SUPPORTED_LANGUAGES: string[];
|
|
3289
|
+
/**
|
|
3290
|
+
* Load a single language.
|
|
3291
|
+
*
|
|
3292
|
+
* @param code - The language code (e.g., 'en', 'ja', 'es')
|
|
3293
|
+
* @param options - Loading options
|
|
3294
|
+
* @returns Result indicating success or failure
|
|
3295
|
+
*
|
|
3296
|
+
* @example
|
|
3297
|
+
* ```typescript
|
|
3298
|
+
* // Load from package
|
|
3299
|
+
* await loadLanguage('ja');
|
|
3300
|
+
*
|
|
3301
|
+
* // Load from CDN
|
|
3302
|
+
* await loadLanguage('ko', {
|
|
3303
|
+
* url: 'https://cdn.example.com/hyperfixi-semantic-ko.js'
|
|
3304
|
+
* });
|
|
3305
|
+
*
|
|
3306
|
+
* // Load from pre-loaded module
|
|
3307
|
+
* await loadLanguage('en', { module: myEnglishModule });
|
|
3308
|
+
* ```
|
|
3309
|
+
*/
|
|
3310
|
+
declare function loadLanguage(code: string, options?: LoadLanguageOptions): Promise<LoadLanguageResult>;
|
|
3311
|
+
/**
|
|
3312
|
+
* Load multiple languages in parallel.
|
|
3313
|
+
*
|
|
3314
|
+
* @param codes - Array of language codes to load
|
|
3315
|
+
* @param options - Loading options (applied to all languages)
|
|
3316
|
+
* @returns Array of results for each language
|
|
3317
|
+
*
|
|
3318
|
+
* @example
|
|
3319
|
+
* ```typescript
|
|
3320
|
+
* // Load multiple languages
|
|
3321
|
+
* const results = await loadLanguages(['en', 'es', 'ja']);
|
|
3322
|
+
*
|
|
3323
|
+
* // Check results
|
|
3324
|
+
* for (const result of results) {
|
|
3325
|
+
* if (result.error) {
|
|
3326
|
+
* console.error(`Failed to load ${result.code}: ${result.error}`);
|
|
3327
|
+
* }
|
|
3328
|
+
* }
|
|
3329
|
+
* ```
|
|
3330
|
+
*/
|
|
3331
|
+
declare function loadLanguages(codes: string[], options?: Omit<LoadLanguageOptions, 'module'>): Promise<LoadLanguageResult[]>;
|
|
3332
|
+
/**
|
|
3333
|
+
* Check if a language can be loaded (is supported).
|
|
3334
|
+
*/
|
|
3335
|
+
declare function canLoadLanguage(code: string): boolean;
|
|
3336
|
+
/**
|
|
3337
|
+
* Get list of languages that are currently loaded.
|
|
3338
|
+
*/
|
|
3339
|
+
declare function getLoadedLanguages(): string[];
|
|
3340
|
+
/**
|
|
3341
|
+
* Get list of languages that are not yet loaded.
|
|
3342
|
+
*/
|
|
3343
|
+
declare function getUnloadedLanguages(): string[];
|
|
3344
|
+
|
|
3345
|
+
/**
|
|
3346
|
+
* Confidence Calculator Utility
|
|
3347
|
+
*
|
|
3348
|
+
* Provides standalone confidence calculation for translations.
|
|
3349
|
+
* Exposes the pattern matcher's confidence scoring for use in scripts.
|
|
3350
|
+
*/
|
|
3351
|
+
|
|
3352
|
+
interface ConfidenceResult {
|
|
3353
|
+
/** Confidence score from 0-1 */
|
|
3354
|
+
confidence: number;
|
|
3355
|
+
/** Whether the input parsed successfully */
|
|
3356
|
+
parseSuccess: boolean;
|
|
3357
|
+
/** Pattern ID that matched, if any */
|
|
3358
|
+
patternId?: string;
|
|
3359
|
+
/** The action type (command) that was parsed */
|
|
3360
|
+
action?: ActionType;
|
|
3361
|
+
/** Number of tokens consumed during matching */
|
|
3362
|
+
tokensConsumed?: number;
|
|
3363
|
+
/** Error message if parsing failed */
|
|
3364
|
+
error?: string;
|
|
3365
|
+
}
|
|
3366
|
+
/**
|
|
3367
|
+
* Calculate confidence score for a hyperscript translation.
|
|
3368
|
+
*
|
|
3369
|
+
* Uses the pattern matcher to determine how well the input matches
|
|
3370
|
+
* available patterns for the given language.
|
|
3371
|
+
*
|
|
3372
|
+
* @param hyperscript - The hyperscript code to analyze
|
|
3373
|
+
* @param language - The language code (e.g., 'ja', 'es', 'en')
|
|
3374
|
+
* @returns Confidence result with score and match details
|
|
3375
|
+
*/
|
|
3376
|
+
declare function calculateTranslationConfidence(hyperscript: string, language: string): ConfidenceResult;
|
|
3377
|
+
interface ParseWithConfidenceResult {
|
|
3378
|
+
node: SemanticNode | null;
|
|
3379
|
+
confidence: number;
|
|
3380
|
+
error: string | undefined;
|
|
3381
|
+
}
|
|
3382
|
+
/**
|
|
3383
|
+
* Calculate confidence and parse to a semantic node in one call.
|
|
3384
|
+
* Returns both the parsed node and the confidence score.
|
|
3385
|
+
*/
|
|
3386
|
+
declare function parseWithConfidence(hyperscript: string, language: string): ParseWithConfidenceResult;
|
|
3387
|
+
|
|
3388
|
+
/**
|
|
3389
|
+
* Semantic-First Multilingual Hyperscript
|
|
3390
|
+
*
|
|
3391
|
+
* This package provides a semantic-first approach to multilingual hyperscript,
|
|
3392
|
+
* enabling true native-language syntax that feels natural to speakers of any language.
|
|
3393
|
+
*
|
|
3394
|
+
* Key Features:
|
|
3395
|
+
* - Parse hyperscript from any supported language (en, ja, ar, es)
|
|
3396
|
+
* - Translate between languages while preserving semantic meaning
|
|
3397
|
+
* - Explicit mode syntax for learning and debugging
|
|
3398
|
+
* - Bidirectional conversion: natural ↔ explicit ↔ natural
|
|
3399
|
+
*
|
|
3400
|
+
* @example
|
|
3401
|
+
* // Parse Japanese to semantic
|
|
3402
|
+
* const node = parse('#button の .active を 切り替え', 'ja');
|
|
3403
|
+
*
|
|
3404
|
+
* // Render in English
|
|
3405
|
+
* const english = render(node, 'en');
|
|
3406
|
+
* // → 'toggle .active on #button'
|
|
3407
|
+
*
|
|
3408
|
+
* // Render in explicit mode
|
|
3409
|
+
* const explicit = renderExplicit(node);
|
|
3410
|
+
* // → '[toggle patient:.active destination:#button]'
|
|
3411
|
+
*
|
|
3412
|
+
* // Translate directly
|
|
3413
|
+
* const arabic = translate('toggle .active on #button', 'en', 'ar');
|
|
3414
|
+
* // → 'بدّل .active على #button'
|
|
3415
|
+
*/
|
|
3416
|
+
|
|
3417
|
+
/**
|
|
3418
|
+
* Get all supported languages for parsing.
|
|
3419
|
+
*/
|
|
3420
|
+
declare function getSupportedLanguages(): string[];
|
|
3421
|
+
/**
|
|
3422
|
+
* Version of the semantic package.
|
|
3423
|
+
*/
|
|
3424
|
+
declare const VERSION = "0.1.0";
|
|
3425
|
+
|
|
3426
|
+
export { ASTBuilder, type ASTBuilderOptions, type ASTNode, type ActionType, type AnalysisConfig, type AnalysisResult, type AnalysisWarning, type BlockNode, type BuildASTResult, type CacheStats, type CommandCategory, type CommandMapper, type CommandMapperResult, type CommandNode, type CommandSchema, type CommandSemanticNode, type CommandSequenceNode, type CompoundSemanticNode, type ConditionalNode, type ConditionalSemanticNode, type ConfidenceResult, DEFAULT_CONFIDENCE_THRESHOLD, type EventHandlerNode, type EventHandlerSemanticNode, type EventModifiers, type ExpressionValue, type ExtractionRule, type ExtractionRules, type GeneratorConfig, type GroupPatternToken, HIGH_CONFIDENCE_THRESHOLD, type KeywordTranslation$1 as KeywordTranslation, SUPPORTED_LANGUAGES as LAZY_LOAD_LANGUAGES, type LanguageModule, type LanguagePattern, type LanguageProfile, type LanguageToken, type LanguageTokenizer, type LiteralPatternToken, type LiteralValue, type LoadLanguageOptions, type LoadLanguageResult, type LoopSemanticNode, type LoopVariant, type MarkingStrategy$1 as MarkingStrategy, type ParseWithConfidenceResult, type PatternConstraints, type PatternMatchError, type PatternMatchResult, PatternMatcher$1 as PatternMatcher, type PatternTemplate, type PatternToken, type PossessiveConfig, type PropertyPathValue, type ReferenceValue, type RoleMarker, type RolePatternToken, type RoleSpec, type SelectorValue, type SemanticAnalysisResult, type SemanticAnalyzer, SemanticAnalyzerImpl, SemanticCache, type SemanticCacheConfig, type SemanticMetadata, type SemanticNode, type SemanticParser, SemanticParserImpl, type SemanticRenderer, SemanticRendererImpl, type SemanticRole, type SemanticValue, type SourcePosition, type StreamMark, type TokenKind, type TokenStream, TokenStreamImpl, type TokenizationConfig$1 as TokenizationConfig, type GrammarRule as UnifiedGrammarRule, type UnifiedLanguageProfile, type PatternMatcher as UnifiedPatternMatcher, type PatternTransform as UnifiedPatternTransform, type UnifiedRoleMarker, VERSION, type ValidationError, type ValidationResult, type VerbConfig$1 as VerbConfig, type VerbForm, type WarningCode, type WarningSeverity, type WordOrder, addSchema, analyze, analyzeAll, analyzeMultiple, appendSchema, arabicProfile, arabicTokenizer, buildAST, calculateTranslationConfidence, canLoadLanguage, canParse, checkAccessibility, checkPerformance, checkSchema, chineseProfile, chineseTokenizer, commandSchemas, convertExpression, convertLiteral, convertPropertyPath, convertReference, convertSelector, convertValue, createCommandNode, createCompoundNode, createConditionalNode, createEventHandler, createLiteral, createLoopNode, createPropertyPath, createReference, createSelector, createSemanticAnalyzer, createSemanticCache, decrementSchema, devModeAnalyze, disableDevMode, enableDevMode, englishProfile, englishTokenizer, eventNameTranslations, fetchSchema, frenchProfile, fromExplicit, generateAllPatterns, generatePattern, generatePatternVariants, generatePatternsForCommand, generatePatternsForLanguage, generateSimplePattern, germanProfile, getAllPatterns, getAllTranslations, getCommandMapper, getCommandType, getDefinedSchemas, getDevModeConfig, getEventHandlerPatternsForLanguage, getSupportedLanguages$1 as getGeneratorLanguages, getGeneratorSummary, getLoadedLanguages, getPatternById, getPatternStats, getPatternsForLanguage, getPatternsForLanguageAndCommand, getProfile, getPutPatternsForLanguage, getRegisteredLanguages, getRegisteredMappers, getSchema$1 as getSchema, getSchemasByCategory, getSupportedCommands, getSupportedLanguages, getSupportedLanguages$3 as getSupportedPatternLanguages, getSupportedLanguages$2 as getSupportedTokenizerLanguages, getTogglePatternsForLanguage, getTokenizer, getUnloadedLanguages, getSchema as getValidatorSchema, hideSchema, incrementSchema, indonesianProfile, isDevModeEnabled, isExplicitSyntax, isLanguageSupported as isGeneratorLanguageSupported, isLanguageSupported$1 as isLanguageSupported, isUnifiedProfile, japaneseProfile, japaneseTokenizer, koreanProfile, koreanTokenizer, languageProfiles, loadLanguage, loadLanguages, markingStrategyToAdpositionType, matchBest, matchPattern, normalizeEventName, onSchema, parse, parseAny, parseExplicit, parseWithConfidence, patternMatcher, portugueseProfile, prependSchema, putSchema, quechuaProfile, registerCommandMapper, registerSchema, registerTokenizer, removeSchema, render, renderExplicit, rolesToCommandArgs, roundTrip, schemaRegistry, semanticCache, semanticParser, semanticRenderer, setSchema, shouldUseSemanticResult, showSchema, spanishProfile, spanishTokenizer, swahiliProfile, toExplicit, toGrammaticalMarker, toI18nProfile, toggleSchema, tokenize, translate, triggerSchema, tryGetProfile, turkishProfile, turkishTokenizer, validateAndAdjustConfidence, validateLanguageKeywords, validateSemanticResult, validateTranslation, waitSchema, withCache };
|