@lokascript/semantic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +686 -0
- package/dist/browser-ar.ar.global.js +2 -0
- package/dist/browser-core.core.global.js +2 -0
- package/dist/browser-de.de.global.js +2 -0
- package/dist/browser-east-asian.east-asian.global.js +2 -0
- package/dist/browser-en-tr.en-tr.global.js +2 -0
- package/dist/browser-en.en.global.js +2 -0
- package/dist/browser-es-en.es-en.global.js +2 -0
- package/dist/browser-es.es.global.js +2 -0
- package/dist/browser-fr.fr.global.js +2 -0
- package/dist/browser-id.id.global.js +2 -0
- package/dist/browser-ja.ja.global.js +2 -0
- package/dist/browser-ko.ko.global.js +2 -0
- package/dist/browser-lazy.lazy.global.js +2 -0
- package/dist/browser-priority.priority.global.js +2 -0
- package/dist/browser-pt.pt.global.js +2 -0
- package/dist/browser-qu.qu.global.js +2 -0
- package/dist/browser-sw.sw.global.js +2 -0
- package/dist/browser-tr.tr.global.js +2 -0
- package/dist/browser-western.western.global.js +2 -0
- package/dist/browser-zh.zh.global.js +2 -0
- package/dist/browser.global.js +3 -0
- package/dist/browser.global.js.map +1 -0
- package/dist/index.cjs +35051 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +3426 -0
- package/dist/index.d.ts +3426 -0
- package/dist/index.js +34890 -0
- package/dist/index.js.map +1 -0
- package/dist/languages/ar.d.ts +78 -0
- package/dist/languages/ar.js +1622 -0
- package/dist/languages/ar.js.map +1 -0
- package/dist/languages/de.d.ts +38 -0
- package/dist/languages/de.js +1168 -0
- package/dist/languages/de.js.map +1 -0
- package/dist/languages/en.d.ts +44 -0
- package/dist/languages/en.js +3491 -0
- package/dist/languages/en.js.map +1 -0
- package/dist/languages/es.d.ts +52 -0
- package/dist/languages/es.js +1493 -0
- package/dist/languages/es.js.map +1 -0
- package/dist/languages/fr.d.ts +37 -0
- package/dist/languages/fr.js +1159 -0
- package/dist/languages/fr.js.map +1 -0
- package/dist/languages/id.d.ts +35 -0
- package/dist/languages/id.js +1152 -0
- package/dist/languages/id.js.map +1 -0
- package/dist/languages/ja.d.ts +53 -0
- package/dist/languages/ja.js +1430 -0
- package/dist/languages/ja.js.map +1 -0
- package/dist/languages/ko.d.ts +51 -0
- package/dist/languages/ko.js +1729 -0
- package/dist/languages/ko.js.map +1 -0
- package/dist/languages/pt.d.ts +37 -0
- package/dist/languages/pt.js +1127 -0
- package/dist/languages/pt.js.map +1 -0
- package/dist/languages/qu.d.ts +36 -0
- package/dist/languages/qu.js +1143 -0
- package/dist/languages/qu.js.map +1 -0
- package/dist/languages/sw.d.ts +35 -0
- package/dist/languages/sw.js +1147 -0
- package/dist/languages/sw.js.map +1 -0
- package/dist/languages/tr.d.ts +45 -0
- package/dist/languages/tr.js +1529 -0
- package/dist/languages/tr.js.map +1 -0
- package/dist/languages/zh.d.ts +58 -0
- package/dist/languages/zh.js +1257 -0
- package/dist/languages/zh.js.map +1 -0
- package/dist/types-C4dcj53L.d.ts +600 -0
- package/package.json +202 -0
- package/src/__test-utils__/index.ts +7 -0
- package/src/__test-utils__/test-helpers.ts +8 -0
- package/src/__types__/test-helpers.ts +122 -0
- package/src/analysis/index.ts +479 -0
- package/src/ast-builder/command-mappers.ts +1133 -0
- package/src/ast-builder/expression-parser/index.ts +41 -0
- package/src/ast-builder/expression-parser/parser.ts +563 -0
- package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
- package/src/ast-builder/expression-parser/types.ts +208 -0
- package/src/ast-builder/index.ts +536 -0
- package/src/ast-builder/value-converters.ts +172 -0
- package/src/bridge.ts +275 -0
- package/src/browser-ar.ts +162 -0
- package/src/browser-core.ts +231 -0
- package/src/browser-de.ts +162 -0
- package/src/browser-east-asian.ts +173 -0
- package/src/browser-en-tr.ts +165 -0
- package/src/browser-en.ts +157 -0
- package/src/browser-es-en.ts +200 -0
- package/src/browser-es.ts +170 -0
- package/src/browser-fr.ts +162 -0
- package/src/browser-id.ts +162 -0
- package/src/browser-ja.ts +162 -0
- package/src/browser-ko.ts +162 -0
- package/src/browser-lazy.ts +189 -0
- package/src/browser-priority.ts +214 -0
- package/src/browser-pt.ts +162 -0
- package/src/browser-qu.ts +162 -0
- package/src/browser-sw.ts +162 -0
- package/src/browser-tr.ts +162 -0
- package/src/browser-western.ts +181 -0
- package/src/browser-zh.ts +162 -0
- package/src/browser.ts +268 -0
- package/src/cache/index.ts +14 -0
- package/src/cache/semantic-cache.ts +344 -0
- package/src/core-bridge.ts +372 -0
- package/src/explicit/converter.ts +258 -0
- package/src/explicit/index.ts +18 -0
- package/src/explicit/parser.ts +236 -0
- package/src/explicit/renderer.ts +424 -0
- package/src/generators/command-schemas.ts +1636 -0
- package/src/generators/event-handler-generator.ts +109 -0
- package/src/generators/index.ts +117 -0
- package/src/generators/language-profiles.ts +139 -0
- package/src/generators/pattern-generator.ts +537 -0
- package/src/generators/profiles/arabic.ts +131 -0
- package/src/generators/profiles/bengali.ts +132 -0
- package/src/generators/profiles/chinese.ts +124 -0
- package/src/generators/profiles/english.ts +113 -0
- package/src/generators/profiles/french.ts +125 -0
- package/src/generators/profiles/german.ts +126 -0
- package/src/generators/profiles/hindi.ts +146 -0
- package/src/generators/profiles/index.ts +46 -0
- package/src/generators/profiles/indonesian.ts +125 -0
- package/src/generators/profiles/italian.ts +139 -0
- package/src/generators/profiles/japanese.ts +149 -0
- package/src/generators/profiles/korean.ts +127 -0
- package/src/generators/profiles/marker-templates.ts +288 -0
- package/src/generators/profiles/ms.ts +130 -0
- package/src/generators/profiles/polish.ts +249 -0
- package/src/generators/profiles/portuguese.ts +115 -0
- package/src/generators/profiles/quechua.ts +113 -0
- package/src/generators/profiles/russian.ts +260 -0
- package/src/generators/profiles/spanish.ts +130 -0
- package/src/generators/profiles/swahili.ts +129 -0
- package/src/generators/profiles/thai.ts +132 -0
- package/src/generators/profiles/tl.ts +128 -0
- package/src/generators/profiles/turkish.ts +124 -0
- package/src/generators/profiles/types.ts +165 -0
- package/src/generators/profiles/ukrainian.ts +270 -0
- package/src/generators/profiles/vietnamese.ts +133 -0
- package/src/generators/schema-error-codes.ts +160 -0
- package/src/generators/schema-validator.ts +391 -0
- package/src/index.ts +429 -0
- package/src/language-building-schema.ts +3170 -0
- package/src/language-loader.ts +394 -0
- package/src/languages/_all.ts +65 -0
- package/src/languages/ar.ts +15 -0
- package/src/languages/bn.ts +16 -0
- package/src/languages/de.ts +15 -0
- package/src/languages/en.ts +29 -0
- package/src/languages/es.ts +15 -0
- package/src/languages/fr.ts +15 -0
- package/src/languages/hi.ts +26 -0
- package/src/languages/id.ts +15 -0
- package/src/languages/index.ts +18 -0
- package/src/languages/it.ts +15 -0
- package/src/languages/ja.ts +15 -0
- package/src/languages/ko.ts +15 -0
- package/src/languages/ms.ts +16 -0
- package/src/languages/pl.ts +18 -0
- package/src/languages/pt.ts +15 -0
- package/src/languages/qu.ts +15 -0
- package/src/languages/ru.ts +26 -0
- package/src/languages/sw.ts +15 -0
- package/src/languages/th.ts +16 -0
- package/src/languages/tl.ts +16 -0
- package/src/languages/tr.ts +15 -0
- package/src/languages/uk.ts +26 -0
- package/src/languages/vi.ts +16 -0
- package/src/languages/zh.ts +15 -0
- package/src/parser/index.ts +15 -0
- package/src/parser/pattern-matcher.ts +1181 -0
- package/src/parser/semantic-parser.ts +573 -0
- package/src/parser/utils/index.ts +35 -0
- package/src/parser/utils/marker-resolution.ts +111 -0
- package/src/parser/utils/possessive-keywords.ts +43 -0
- package/src/parser/utils/role-positioning.ts +70 -0
- package/src/parser/utils/type-validation.ts +134 -0
- package/src/patterns/add/ar.ts +71 -0
- package/src/patterns/add/bn.ts +70 -0
- package/src/patterns/add/hi.ts +69 -0
- package/src/patterns/add/index.ts +87 -0
- package/src/patterns/add/it.ts +61 -0
- package/src/patterns/add/ja.ts +93 -0
- package/src/patterns/add/ko.ts +74 -0
- package/src/patterns/add/ms.ts +30 -0
- package/src/patterns/add/pl.ts +62 -0
- package/src/patterns/add/ru.ts +62 -0
- package/src/patterns/add/th.ts +49 -0
- package/src/patterns/add/tl.ts +30 -0
- package/src/patterns/add/tr.ts +71 -0
- package/src/patterns/add/uk.ts +62 -0
- package/src/patterns/add/vi.ts +61 -0
- package/src/patterns/add/zh.ts +71 -0
- package/src/patterns/builders.ts +207 -0
- package/src/patterns/decrement/bn.ts +70 -0
- package/src/patterns/decrement/de.ts +42 -0
- package/src/patterns/decrement/hi.ts +68 -0
- package/src/patterns/decrement/index.ts +79 -0
- package/src/patterns/decrement/it.ts +69 -0
- package/src/patterns/decrement/ms.ts +30 -0
- package/src/patterns/decrement/pl.ts +58 -0
- package/src/patterns/decrement/ru.ts +58 -0
- package/src/patterns/decrement/th.ts +49 -0
- package/src/patterns/decrement/tl.ts +30 -0
- package/src/patterns/decrement/tr.ts +48 -0
- package/src/patterns/decrement/uk.ts +58 -0
- package/src/patterns/decrement/vi.ts +61 -0
- package/src/patterns/decrement/zh.ts +32 -0
- package/src/patterns/en.ts +302 -0
- package/src/patterns/event-handler/ar.ts +151 -0
- package/src/patterns/event-handler/bn.ts +72 -0
- package/src/patterns/event-handler/de.ts +117 -0
- package/src/patterns/event-handler/en.ts +117 -0
- package/src/patterns/event-handler/es.ts +136 -0
- package/src/patterns/event-handler/fr.ts +117 -0
- package/src/patterns/event-handler/hi.ts +64 -0
- package/src/patterns/event-handler/id.ts +117 -0
- package/src/patterns/event-handler/index.ts +119 -0
- package/src/patterns/event-handler/it.ts +54 -0
- package/src/patterns/event-handler/ja.ts +118 -0
- package/src/patterns/event-handler/ko.ts +133 -0
- package/src/patterns/event-handler/ms.ts +30 -0
- package/src/patterns/event-handler/pl.ts +62 -0
- package/src/patterns/event-handler/pt.ts +117 -0
- package/src/patterns/event-handler/qu.ts +66 -0
- package/src/patterns/event-handler/ru.ts +62 -0
- package/src/patterns/event-handler/shared.ts +270 -0
- package/src/patterns/event-handler/sw.ts +117 -0
- package/src/patterns/event-handler/th.ts +53 -0
- package/src/patterns/event-handler/tl.ts +30 -0
- package/src/patterns/event-handler/tr.ts +170 -0
- package/src/patterns/event-handler/uk.ts +62 -0
- package/src/patterns/event-handler/vi.ts +61 -0
- package/src/patterns/event-handler/zh.ts +150 -0
- package/src/patterns/get/ar.ts +49 -0
- package/src/patterns/get/bn.ts +47 -0
- package/src/patterns/get/de.ts +32 -0
- package/src/patterns/get/hi.ts +52 -0
- package/src/patterns/get/index.ts +83 -0
- package/src/patterns/get/it.ts +56 -0
- package/src/patterns/get/ja.ts +53 -0
- package/src/patterns/get/ko.ts +53 -0
- package/src/patterns/get/ms.ts +30 -0
- package/src/patterns/get/pl.ts +57 -0
- package/src/patterns/get/ru.ts +57 -0
- package/src/patterns/get/th.ts +29 -0
- package/src/patterns/get/tl.ts +30 -0
- package/src/patterns/get/uk.ts +57 -0
- package/src/patterns/get/vi.ts +48 -0
- package/src/patterns/grammar-transformed/index.ts +39 -0
- package/src/patterns/grammar-transformed/ja.ts +1713 -0
- package/src/patterns/grammar-transformed/ko.ts +1311 -0
- package/src/patterns/grammar-transformed/tr.ts +1067 -0
- package/src/patterns/hide/ar.ts +67 -0
- package/src/patterns/hide/bn.ts +47 -0
- package/src/patterns/hide/de.ts +36 -0
- package/src/patterns/hide/hi.ts +61 -0
- package/src/patterns/hide/index.ts +91 -0
- package/src/patterns/hide/it.ts +56 -0
- package/src/patterns/hide/ja.ts +69 -0
- package/src/patterns/hide/ko.ts +69 -0
- package/src/patterns/hide/ms.ts +30 -0
- package/src/patterns/hide/pl.ts +57 -0
- package/src/patterns/hide/ru.ts +57 -0
- package/src/patterns/hide/th.ts +29 -0
- package/src/patterns/hide/tl.ts +30 -0
- package/src/patterns/hide/tr.ts +65 -0
- package/src/patterns/hide/uk.ts +57 -0
- package/src/patterns/hide/vi.ts +56 -0
- package/src/patterns/hide/zh.ts +68 -0
- package/src/patterns/increment/bn.ts +70 -0
- package/src/patterns/increment/de.ts +36 -0
- package/src/patterns/increment/hi.ts +68 -0
- package/src/patterns/increment/index.ts +79 -0
- package/src/patterns/increment/it.ts +69 -0
- package/src/patterns/increment/ms.ts +30 -0
- package/src/patterns/increment/pl.ts +58 -0
- package/src/patterns/increment/ru.ts +58 -0
- package/src/patterns/increment/th.ts +49 -0
- package/src/patterns/increment/tl.ts +30 -0
- package/src/patterns/increment/tr.ts +52 -0
- package/src/patterns/increment/uk.ts +58 -0
- package/src/patterns/increment/vi.ts +61 -0
- package/src/patterns/increment/zh.ts +32 -0
- package/src/patterns/index.ts +84 -0
- package/src/patterns/languages/en/control-flow.ts +93 -0
- package/src/patterns/languages/en/fetch.ts +62 -0
- package/src/patterns/languages/en/index.ts +42 -0
- package/src/patterns/languages/en/repeat.ts +67 -0
- package/src/patterns/languages/en/set.ts +48 -0
- package/src/patterns/languages/en/swap.ts +38 -0
- package/src/patterns/languages/en/temporal.ts +57 -0
- package/src/patterns/put/ar.ts +74 -0
- package/src/patterns/put/bn.ts +53 -0
- package/src/patterns/put/en.ts +74 -0
- package/src/patterns/put/es.ts +74 -0
- package/src/patterns/put/hi.ts +69 -0
- package/src/patterns/put/id.ts +96 -0
- package/src/patterns/put/index.ts +99 -0
- package/src/patterns/put/it.ts +56 -0
- package/src/patterns/put/ja.ts +75 -0
- package/src/patterns/put/ko.ts +67 -0
- package/src/patterns/put/ms.ts +30 -0
- package/src/patterns/put/pl.ts +81 -0
- package/src/patterns/put/ru.ts +85 -0
- package/src/patterns/put/th.ts +32 -0
- package/src/patterns/put/tl.ts +30 -0
- package/src/patterns/put/tr.ts +67 -0
- package/src/patterns/put/uk.ts +85 -0
- package/src/patterns/put/vi.ts +72 -0
- package/src/patterns/put/zh.ts +62 -0
- package/src/patterns/registry.ts +163 -0
- package/src/patterns/remove/ar.ts +71 -0
- package/src/patterns/remove/bn.ts +68 -0
- package/src/patterns/remove/hi.ts +69 -0
- package/src/patterns/remove/index.ts +87 -0
- package/src/patterns/remove/it.ts +69 -0
- package/src/patterns/remove/ja.ts +74 -0
- package/src/patterns/remove/ko.ts +78 -0
- package/src/patterns/remove/ms.ts +30 -0
- package/src/patterns/remove/pl.ts +62 -0
- package/src/patterns/remove/ru.ts +62 -0
- package/src/patterns/remove/th.ts +49 -0
- package/src/patterns/remove/tl.ts +30 -0
- package/src/patterns/remove/tr.ts +78 -0
- package/src/patterns/remove/uk.ts +62 -0
- package/src/patterns/remove/vi.ts +61 -0
- package/src/patterns/remove/zh.ts +72 -0
- package/src/patterns/set/ar.ts +84 -0
- package/src/patterns/set/bn.ts +53 -0
- package/src/patterns/set/de.ts +84 -0
- package/src/patterns/set/es.ts +92 -0
- package/src/patterns/set/fr.ts +88 -0
- package/src/patterns/set/hi.ts +56 -0
- package/src/patterns/set/id.ts +84 -0
- package/src/patterns/set/index.ts +107 -0
- package/src/patterns/set/it.ts +56 -0
- package/src/patterns/set/ja.ts +86 -0
- package/src/patterns/set/ko.ts +85 -0
- package/src/patterns/set/ms.ts +30 -0
- package/src/patterns/set/pl.ts +57 -0
- package/src/patterns/set/pt.ts +84 -0
- package/src/patterns/set/ru.ts +57 -0
- package/src/patterns/set/th.ts +31 -0
- package/src/patterns/set/tl.ts +30 -0
- package/src/patterns/set/tr.ts +107 -0
- package/src/patterns/set/uk.ts +57 -0
- package/src/patterns/set/vi.ts +53 -0
- package/src/patterns/set/zh.ts +84 -0
- package/src/patterns/show/ar.ts +67 -0
- package/src/patterns/show/bn.ts +47 -0
- package/src/patterns/show/de.ts +32 -0
- package/src/patterns/show/fr.ts +32 -0
- package/src/patterns/show/hi.ts +61 -0
- package/src/patterns/show/index.ts +95 -0
- package/src/patterns/show/it.ts +56 -0
- package/src/patterns/show/ja.ts +69 -0
- package/src/patterns/show/ko.ts +73 -0
- package/src/patterns/show/ms.ts +30 -0
- package/src/patterns/show/pl.ts +57 -0
- package/src/patterns/show/ru.ts +57 -0
- package/src/patterns/show/th.ts +29 -0
- package/src/patterns/show/tl.ts +30 -0
- package/src/patterns/show/tr.ts +65 -0
- package/src/patterns/show/uk.ts +57 -0
- package/src/patterns/show/vi.ts +56 -0
- package/src/patterns/show/zh.ts +68 -0
- package/src/patterns/take/ar.ts +51 -0
- package/src/patterns/take/index.ts +31 -0
- package/src/patterns/toggle/ar.ts +61 -0
- package/src/patterns/toggle/bn.ts +70 -0
- package/src/patterns/toggle/en.ts +61 -0
- package/src/patterns/toggle/es.ts +61 -0
- package/src/patterns/toggle/hi.ts +80 -0
- package/src/patterns/toggle/index.ts +95 -0
- package/src/patterns/toggle/it.ts +69 -0
- package/src/patterns/toggle/ja.ts +156 -0
- package/src/patterns/toggle/ko.ts +113 -0
- package/src/patterns/toggle/ms.ts +30 -0
- package/src/patterns/toggle/pl.ts +62 -0
- package/src/patterns/toggle/ru.ts +62 -0
- package/src/patterns/toggle/th.ts +50 -0
- package/src/patterns/toggle/tl.ts +30 -0
- package/src/patterns/toggle/tr.ts +88 -0
- package/src/patterns/toggle/uk.ts +62 -0
- package/src/patterns/toggle/vi.ts +61 -0
- package/src/patterns/toggle/zh.ts +99 -0
- package/src/public-api.ts +286 -0
- package/src/registry.ts +441 -0
- package/src/tokenizers/arabic.ts +723 -0
- package/src/tokenizers/base.ts +1300 -0
- package/src/tokenizers/bengali.ts +289 -0
- package/src/tokenizers/chinese.ts +481 -0
- package/src/tokenizers/english.ts +416 -0
- package/src/tokenizers/french.ts +326 -0
- package/src/tokenizers/german.ts +324 -0
- package/src/tokenizers/hindi.ts +319 -0
- package/src/tokenizers/index.ts +127 -0
- package/src/tokenizers/indonesian.ts +306 -0
- package/src/tokenizers/italian.ts +458 -0
- package/src/tokenizers/japanese.ts +447 -0
- package/src/tokenizers/korean.ts +642 -0
- package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
- package/src/tokenizers/morphology/french-normalizer.ts +268 -0
- package/src/tokenizers/morphology/german-normalizer.ts +256 -0
- package/src/tokenizers/morphology/index.ts +46 -0
- package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
- package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
- package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
- package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
- package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
- package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
- package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
- package/src/tokenizers/morphology/types.ts +211 -0
- package/src/tokenizers/ms.ts +198 -0
- package/src/tokenizers/polish.ts +354 -0
- package/src/tokenizers/portuguese.ts +304 -0
- package/src/tokenizers/quechua.ts +339 -0
- package/src/tokenizers/russian.ts +375 -0
- package/src/tokenizers/spanish.ts +403 -0
- package/src/tokenizers/swahili.ts +303 -0
- package/src/tokenizers/thai.ts +236 -0
- package/src/tokenizers/tl.ts +198 -0
- package/src/tokenizers/turkish.ts +411 -0
- package/src/tokenizers/ukrainian.ts +369 -0
- package/src/tokenizers/vietnamese.ts +410 -0
- package/src/types/grammar-types.ts +617 -0
- package/src/types/unified-profile.ts +267 -0
- package/src/types.ts +709 -0
- package/src/utils/confidence-calculator.ts +147 -0
- package/src/validators/command-validator.ts +380 -0
- package/src/validators/index.ts +15 -0
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grammar Types for Semantic Multilingual Parsing
|
|
3
|
+
*
|
|
4
|
+
* These types define the semantic role system used across all 13 supported languages.
|
|
5
|
+
* Originally from @lokascript/i18n, now consolidated here for package independence.
|
|
6
|
+
*
|
|
7
|
+
* Key Linguistic Concepts:
|
|
8
|
+
* - Word Order: SVO, SOV, VSO (and variations)
|
|
9
|
+
* - Adposition Type: Preposition (English) vs Postposition (Japanese/Korean)
|
|
10
|
+
* - Morphology: Isolating (Chinese) vs Agglutinative (Turkish) vs Fusional (Arabic)
|
|
11
|
+
* - Text Direction: LTR vs RTL
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Semantic roles in hyperscript commands.
|
|
15
|
+
* These are universal across all 13 supported languages - only the surface form changes.
|
|
16
|
+
*
|
|
17
|
+
* ## Core Thematic Roles (from linguistic theory)
|
|
18
|
+
* | Role | Usage | Purpose | Example |
|
|
19
|
+
* |-------------|-------|-----------------------------|---------------------------|
|
|
20
|
+
* | action | 100% | Command verb | toggle, put, fetch |
|
|
21
|
+
* | patient | 90% | What is acted upon | .active, #count |
|
|
22
|
+
* | destination | 40% | Where something goes | into #output, to .class |
|
|
23
|
+
* | source | 13% | Where something comes from | from #input, from URL |
|
|
24
|
+
* | event | 106% | Trigger events | click, keydown, submit |
|
|
25
|
+
* | condition | 8% | Boolean expressions | if x > 5, when visible |
|
|
26
|
+
* | agent | 0% | Who performs action | Reserved for future use |
|
|
27
|
+
* | goal | 1% | Target value/state | to 'red' (in transition) |
|
|
28
|
+
*
|
|
29
|
+
* ## Quantitative Roles (answer "how much/long")
|
|
30
|
+
* | Role | Usage | Purpose | Example |
|
|
31
|
+
* |----------|-------|----------------|----------------------|
|
|
32
|
+
* | quantity | 7% | Numeric amount | by 5, 3 times |
|
|
33
|
+
* | duration | 1% | Time span | for 5 seconds, 500ms |
|
|
34
|
+
*
|
|
35
|
+
* ## Adverbial/Modifier Roles (answer "how/by what means")
|
|
36
|
+
* | Role | Usage | Purpose | Example |
|
|
37
|
+
* |--------------|-------|---------------------------|-------------------|
|
|
38
|
+
* | style | 2% | Animation/behavior | with fade |
|
|
39
|
+
* | manner | 2% | Insertion position | before, after |
|
|
40
|
+
* | method | 1% | HTTP method/technique | via POST, as GET |
|
|
41
|
+
* | responseType | 1% | Response format | as json, as html |
|
|
42
|
+
*
|
|
43
|
+
* ## Control Flow Roles
|
|
44
|
+
* | Role | Usage | Purpose | Example |
|
|
45
|
+
* |----------|-------|--------------|-----------------------|
|
|
46
|
+
* | loopType | 6% | Loop variant | forever, until, times |
|
|
47
|
+
*
|
|
48
|
+
* ## Design Notes
|
|
49
|
+
* - Low-usage roles (agent, goal, method, responseType) are intentionally kept for:
|
|
50
|
+
* - Linguistic completeness across all 13 languages
|
|
51
|
+
* - Future extensibility (AI agents, server-side execution)
|
|
52
|
+
* - Command-specific semantics (fetch, transition)
|
|
53
|
+
* - Each role has distinct grammatical markers per language (see profiles/index.ts)
|
|
54
|
+
* - Usage percentages based on pattern database analysis
|
|
55
|
+
*/
|
|
56
|
+
type SemanticRole = 'action' | 'agent' | 'patient' | 'source' | 'destination' | 'goal' | 'event' | 'condition' | 'quantity' | 'duration' | 'responseType' | 'method' | 'style' | 'manner' | 'loopType' | 'continues';
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Semantic-First Multilingual Hyperscript Types
|
|
60
|
+
*
|
|
61
|
+
* This module defines the canonical semantic representation that all languages
|
|
62
|
+
* parse to and render from. The semantic layer is language-neutral - it captures
|
|
63
|
+
* the MEANING of hyperscript commands independent of surface syntax.
|
|
64
|
+
*/
|
|
65
|
+
|
|
66
|
+
interface SourcePosition {
|
|
67
|
+
readonly start: number;
|
|
68
|
+
readonly end: number;
|
|
69
|
+
readonly line?: number;
|
|
70
|
+
readonly column?: number;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* A token from language-specific tokenization.
|
|
74
|
+
*/
|
|
75
|
+
interface LanguageToken {
|
|
76
|
+
readonly value: string;
|
|
77
|
+
readonly kind: TokenKind;
|
|
78
|
+
readonly position: SourcePosition;
|
|
79
|
+
/** Normalized form from explicit keyword map (e.g., 切り替え → toggle) */
|
|
80
|
+
readonly normalized?: string;
|
|
81
|
+
/** Morphologically normalized stem (e.g., 切り替えた → 切り替え) */
|
|
82
|
+
readonly stem?: string;
|
|
83
|
+
/** Confidence in the morphological stem (0.0-1.0) */
|
|
84
|
+
readonly stemConfidence?: number;
|
|
85
|
+
/** Additional metadata for specific token types (e.g., event modifier data) */
|
|
86
|
+
readonly metadata?: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
type TokenKind = 'keyword' | 'selector' | 'literal' | 'particle' | 'conjunction' | 'event-modifier' | 'identifier' | 'operator' | 'punctuation' | 'url';
|
|
89
|
+
/**
|
|
90
|
+
* A stream of tokens with navigation capabilities.
|
|
91
|
+
*/
|
|
92
|
+
interface TokenStream {
|
|
93
|
+
readonly tokens: readonly LanguageToken[];
|
|
94
|
+
readonly language: string;
|
|
95
|
+
/** Look at token at current position + offset without consuming */
|
|
96
|
+
peek(offset?: number): LanguageToken | null;
|
|
97
|
+
/** Consume and return current token, advance position */
|
|
98
|
+
advance(): LanguageToken;
|
|
99
|
+
/** Check if we've consumed all tokens */
|
|
100
|
+
isAtEnd(): boolean;
|
|
101
|
+
/** Save current position for backtracking */
|
|
102
|
+
mark(): StreamMark;
|
|
103
|
+
/** Restore to a saved position */
|
|
104
|
+
reset(mark: StreamMark): void;
|
|
105
|
+
/** Get current position */
|
|
106
|
+
position(): number;
|
|
107
|
+
}
|
|
108
|
+
interface StreamMark {
|
|
109
|
+
readonly position: number;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Language-specific tokenizer interface.
|
|
113
|
+
* Each language implements its own tokenizer to handle:
|
|
114
|
+
* - Word boundaries (spaces for English, particles for Japanese)
|
|
115
|
+
* - Character sets (ASCII, CJK, Arabic, etc.)
|
|
116
|
+
* - Special markers (particles, prefixes, suffixes)
|
|
117
|
+
*/
|
|
118
|
+
interface LanguageTokenizer {
|
|
119
|
+
readonly language: string;
|
|
120
|
+
readonly direction: 'ltr' | 'rtl';
|
|
121
|
+
/** Convert input string to token stream */
|
|
122
|
+
tokenize(input: string): TokenStream;
|
|
123
|
+
/** Classify a single token */
|
|
124
|
+
classifyToken(token: string): TokenKind;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Morphological Normalizer Types
|
|
129
|
+
*
|
|
130
|
+
* Defines interfaces for language-specific morphological analysis.
|
|
131
|
+
* Normalizers reduce conjugated/inflected forms to canonical stems
|
|
132
|
+
* that can be matched against keyword dictionaries.
|
|
133
|
+
*/
|
|
134
|
+
/**
|
|
135
|
+
* Result of morphological normalization.
|
|
136
|
+
*/
|
|
137
|
+
interface NormalizationResult {
|
|
138
|
+
/** The extracted stem/root form */
|
|
139
|
+
readonly stem: string;
|
|
140
|
+
/** Confidence in the normalization (0.0-1.0) */
|
|
141
|
+
readonly confidence: number;
|
|
142
|
+
/** Optional metadata about the transformation */
|
|
143
|
+
readonly metadata?: NormalizationMetadata;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Metadata about morphological transformations applied.
|
|
147
|
+
*/
|
|
148
|
+
interface NormalizationMetadata {
|
|
149
|
+
/** Prefixes that were removed */
|
|
150
|
+
readonly removedPrefixes?: readonly string[];
|
|
151
|
+
/** Suffixes that were removed */
|
|
152
|
+
readonly removedSuffixes?: readonly string[];
|
|
153
|
+
/** Type of conjugation detected */
|
|
154
|
+
readonly conjugationType?: ConjugationType;
|
|
155
|
+
/** Original form classification */
|
|
156
|
+
readonly originalForm?: string;
|
|
157
|
+
/** Applied transformation rules (for debugging) */
|
|
158
|
+
readonly appliedRules?: readonly string[];
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Types of verb conjugation/inflection.
|
|
162
|
+
*/
|
|
163
|
+
type ConjugationType = 'present' | 'past' | 'future' | 'progressive' | 'perfect' | 'imperative' | 'subjunctive' | 'conditional' | 'passive' | 'causative' | 'polite' | 'humble' | 'honorific' | 'negative' | 'potential' | 'volitional' | 'conditional-tara' | 'conditional-to' | 'conditional-ba' | 'connective' | 'conditional-myeon' | 'temporal-ttae' | 'causal-nikka' | 'honorific-conditional' | 'honorific-temporal' | 'honorific-causal' | 'honorific-past' | 'honorific-polite' | 'sequential-after' | 'sequential-before' | 'immediate' | 'obligation' | 'reflexive' | 'reflexive-imperative' | 'gerund' | 'participle' | 'conditional-idha' | 'temporal-indama' | 'temporal-hina' | 'temporal-lamma' | 'past-verb' | 'conditional-se' | 'temporal-ince' | 'temporal-dikce' | 'aorist' | 'optative' | 'necessitative' | 'request' | 'casual-request' | 'contracted' | 'contracted-past' | 'compound' | 'te-form' | 'dictionary';
|
|
164
|
+
/**
|
|
165
|
+
* Interface for language-specific morphological normalizers.
|
|
166
|
+
*
|
|
167
|
+
* Normalizers attempt to reduce inflected word forms to their
|
|
168
|
+
* canonical stems. This enables matching conjugated verbs against
|
|
169
|
+
* keyword dictionaries that only contain base forms.
|
|
170
|
+
*
|
|
171
|
+
* Example (Japanese):
|
|
172
|
+
* 切り替えた (past) → { stem: '切り替え', confidence: 0.85 }
|
|
173
|
+
* 切り替えます (polite) → { stem: '切り替え', confidence: 0.85 }
|
|
174
|
+
*
|
|
175
|
+
* Example (Spanish):
|
|
176
|
+
* mostrarse (reflexive infinitive) → { stem: 'mostrar', confidence: 0.85 }
|
|
177
|
+
* alternando (gerund) → { stem: 'alternar', confidence: 0.85 }
|
|
178
|
+
*/
|
|
179
|
+
interface MorphologicalNormalizer {
|
|
180
|
+
/** Language code this normalizer handles */
|
|
181
|
+
readonly language: string;
|
|
182
|
+
/**
|
|
183
|
+
* Normalize a word to its canonical stem form.
|
|
184
|
+
*
|
|
185
|
+
* @param word - The word to normalize
|
|
186
|
+
* @returns Normalization result with stem and confidence
|
|
187
|
+
*/
|
|
188
|
+
normalize(word: string): NormalizationResult;
|
|
189
|
+
/**
|
|
190
|
+
* Check if a word appears to be a verb form that can be normalized.
|
|
191
|
+
* Optional optimization to skip normalization for non-verb tokens.
|
|
192
|
+
*
|
|
193
|
+
* @param word - The word to check
|
|
194
|
+
* @returns true if the word might be a normalizable verb form
|
|
195
|
+
*/
|
|
196
|
+
isNormalizable?(word: string): boolean;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Base Tokenizer
|
|
201
|
+
*
|
|
202
|
+
* Provides the TokenStream implementation and shared tokenization utilities.
|
|
203
|
+
* Language-specific tokenizers extend these base utilities.
|
|
204
|
+
*/
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Configuration for a native language time unit pattern.
|
|
208
|
+
* Used by tryNumberWithTimeUnits() to match language-specific time units.
|
|
209
|
+
*/
|
|
210
|
+
interface TimeUnitMapping {
|
|
211
|
+
/** The pattern to match (e.g., 'segundos', 'ミリ秒') */
|
|
212
|
+
readonly pattern: string;
|
|
213
|
+
/** The standard suffix to use (ms, s, m, h) */
|
|
214
|
+
readonly suffix: string;
|
|
215
|
+
/** Length of the pattern (for optimization) */
|
|
216
|
+
readonly length: number;
|
|
217
|
+
/** Whether to check for word boundary after the pattern */
|
|
218
|
+
readonly checkBoundary?: boolean;
|
|
219
|
+
/** Character that cannot follow the pattern (e.g., 's' for 'm' to avoid 'ms') */
|
|
220
|
+
readonly notFollowedBy?: string;
|
|
221
|
+
/** Whether to do case-insensitive matching */
|
|
222
|
+
readonly caseInsensitive?: boolean;
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Keyword entry for tokenizer - maps native word to normalized English form.
|
|
226
|
+
*/
|
|
227
|
+
interface KeywordEntry {
|
|
228
|
+
readonly native: string;
|
|
229
|
+
readonly normalized: string;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Profile interface for keyword derivation.
|
|
233
|
+
* Matches the structure of LanguageProfile but only includes fields needed for tokenization.
|
|
234
|
+
*/
|
|
235
|
+
interface TokenizerProfile {
|
|
236
|
+
readonly keywords?: Record<string, {
|
|
237
|
+
primary: string;
|
|
238
|
+
alternatives?: string[];
|
|
239
|
+
normalized?: string;
|
|
240
|
+
}>;
|
|
241
|
+
readonly references?: Record<string, string>;
|
|
242
|
+
readonly roleMarkers?: Record<string, {
|
|
243
|
+
primary: string;
|
|
244
|
+
alternatives?: string[];
|
|
245
|
+
position?: string;
|
|
246
|
+
}>;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Abstract base class for language-specific tokenizers.
|
|
250
|
+
* Provides common functionality for CSS selectors, strings, and numbers.
|
|
251
|
+
*/
|
|
252
|
+
declare abstract class BaseTokenizer implements LanguageTokenizer {
|
|
253
|
+
abstract readonly language: string;
|
|
254
|
+
abstract readonly direction: 'ltr' | 'rtl';
|
|
255
|
+
/** Optional morphological normalizer for this language */
|
|
256
|
+
protected normalizer?: MorphologicalNormalizer;
|
|
257
|
+
/** Keywords derived from profile, sorted longest-first for greedy matching */
|
|
258
|
+
protected profileKeywords: KeywordEntry[];
|
|
259
|
+
/** Map for O(1) keyword lookups by lowercase native word */
|
|
260
|
+
protected profileKeywordMap: Map<string, KeywordEntry>;
|
|
261
|
+
abstract tokenize(input: string): TokenStream;
|
|
262
|
+
abstract classifyToken(token: string): TokenKind;
|
|
263
|
+
/**
|
|
264
|
+
* Initialize keyword mappings from a language profile.
|
|
265
|
+
* Builds a list of native→english mappings from:
|
|
266
|
+
* - profile.keywords (primary + alternatives)
|
|
267
|
+
* - profile.references (me, it, you, etc.)
|
|
268
|
+
* - profile.roleMarkers (into, from, with, etc.)
|
|
269
|
+
*
|
|
270
|
+
* Results are sorted longest-first for greedy matching (important for non-space languages).
|
|
271
|
+
* Extras take precedence over profile entries when there are duplicates.
|
|
272
|
+
*
|
|
273
|
+
* @param profile - Language profile containing keyword translations
|
|
274
|
+
* @param extras - Additional keyword entries to include (literals, positional, events)
|
|
275
|
+
*/
|
|
276
|
+
protected initializeKeywordsFromProfile(profile: TokenizerProfile, extras?: KeywordEntry[]): void;
|
|
277
|
+
/**
|
|
278
|
+
* Remove diacritical marks from a word for normalization.
|
|
279
|
+
* Primarily for Arabic (shadda, fatha, kasra, damma, sukun, etc.)
|
|
280
|
+
* but could be extended for other languages.
|
|
281
|
+
*
|
|
282
|
+
* @param word - Word to normalize
|
|
283
|
+
* @returns Word without diacritics
|
|
284
|
+
*/
|
|
285
|
+
protected removeDiacritics(word: string): string;
|
|
286
|
+
/**
|
|
287
|
+
* Try to match a keyword from profile at the current position.
|
|
288
|
+
* Uses longest-first greedy matching (important for non-space languages).
|
|
289
|
+
*
|
|
290
|
+
* @param input - Input string
|
|
291
|
+
* @param pos - Current position
|
|
292
|
+
* @returns Token if matched, null otherwise
|
|
293
|
+
*/
|
|
294
|
+
protected tryProfileKeyword(input: string, pos: number): LanguageToken | null;
|
|
295
|
+
/**
|
|
296
|
+
* Check if the remaining input starts with any known keyword.
|
|
297
|
+
* Useful for non-space languages to detect word boundaries.
|
|
298
|
+
*
|
|
299
|
+
* @param input - Input string
|
|
300
|
+
* @param pos - Current position
|
|
301
|
+
* @returns true if a keyword starts at this position
|
|
302
|
+
*/
|
|
303
|
+
protected isKeywordStart(input: string, pos: number): boolean;
|
|
304
|
+
/**
|
|
305
|
+
* Look up a keyword by native word (case-insensitive).
|
|
306
|
+
* O(1) lookup using the keyword map.
|
|
307
|
+
*
|
|
308
|
+
* @param native - Native word to look up
|
|
309
|
+
* @returns KeywordEntry if found, undefined otherwise
|
|
310
|
+
*/
|
|
311
|
+
protected lookupKeyword(native: string): KeywordEntry | undefined;
|
|
312
|
+
/**
|
|
313
|
+
* Check if a word is a known keyword (case-insensitive).
|
|
314
|
+
* O(1) lookup using the keyword map.
|
|
315
|
+
*
|
|
316
|
+
* @param native - Native word to check
|
|
317
|
+
* @returns true if the word is a keyword
|
|
318
|
+
*/
|
|
319
|
+
protected isKeyword(native: string): boolean;
|
|
320
|
+
/**
|
|
321
|
+
* Set the morphological normalizer for this tokenizer.
|
|
322
|
+
*/
|
|
323
|
+
setNormalizer(normalizer: MorphologicalNormalizer): void;
|
|
324
|
+
/**
|
|
325
|
+
* Try to normalize a word using the morphological normalizer.
|
|
326
|
+
* Returns null if no normalizer is set or normalization fails.
|
|
327
|
+
*
|
|
328
|
+
* Note: We don't check isNormalizable() here because the individual tokenizers
|
|
329
|
+
* historically called normalize() directly without that check. The normalize()
|
|
330
|
+
* method itself handles returning noChange() for words that can't be normalized.
|
|
331
|
+
*/
|
|
332
|
+
protected tryNormalize(word: string): NormalizationResult | null;
|
|
333
|
+
/**
|
|
334
|
+
* Try morphological normalization and keyword lookup.
|
|
335
|
+
*
|
|
336
|
+
* If the word can be normalized to a stem that matches a known keyword,
|
|
337
|
+
* returns a keyword token with morphological metadata (stem, stemConfidence).
|
|
338
|
+
*
|
|
339
|
+
* This is the common pattern for handling conjugated verbs across languages:
|
|
340
|
+
* 1. Normalize the word (e.g., "toggled" → "toggle")
|
|
341
|
+
* 2. Look up the stem in the keyword map
|
|
342
|
+
* 3. Create a token with both the original form and stem metadata
|
|
343
|
+
*
|
|
344
|
+
* @param word - The word to normalize and look up
|
|
345
|
+
* @param startPos - Start position for the token
|
|
346
|
+
* @param endPos - End position for the token
|
|
347
|
+
* @returns Token if stem matches a keyword, null otherwise
|
|
348
|
+
*/
|
|
349
|
+
protected tryMorphKeywordMatch(word: string, startPos: number, endPos: number): LanguageToken | null;
|
|
350
|
+
/**
|
|
351
|
+
* Try to extract a CSS selector at the current position.
|
|
352
|
+
*/
|
|
353
|
+
protected trySelector(input: string, pos: number): LanguageToken | null;
|
|
354
|
+
/**
|
|
355
|
+
* Try to extract an event modifier at the current position.
|
|
356
|
+
* Event modifiers are .once, .debounce(N), .throttle(N), .queue(strategy)
|
|
357
|
+
*/
|
|
358
|
+
protected tryEventModifier(input: string, pos: number): LanguageToken | null;
|
|
359
|
+
/**
|
|
360
|
+
* Try to extract a string literal at the current position.
|
|
361
|
+
*/
|
|
362
|
+
protected tryString(input: string, pos: number): LanguageToken | null;
|
|
363
|
+
/**
|
|
364
|
+
* Try to extract a number at the current position.
|
|
365
|
+
*/
|
|
366
|
+
protected tryNumber(input: string, pos: number): LanguageToken | null;
|
|
367
|
+
/**
|
|
368
|
+
* Configuration for native language time units.
|
|
369
|
+
* Maps patterns to their standard suffix (ms, s, m, h).
|
|
370
|
+
*/
|
|
371
|
+
protected static readonly STANDARD_TIME_UNITS: readonly TimeUnitMapping[];
|
|
372
|
+
/**
|
|
373
|
+
* Try to match a time unit from a list of patterns.
|
|
374
|
+
*
|
|
375
|
+
* @param input - Input string
|
|
376
|
+
* @param pos - Position after the number
|
|
377
|
+
* @param timeUnits - Array of time unit mappings (native pattern → standard suffix)
|
|
378
|
+
* @param skipWhitespace - Whether to skip whitespace before time unit (default: false)
|
|
379
|
+
* @returns Object with matched suffix and new position, or null if no match
|
|
380
|
+
*/
|
|
381
|
+
protected tryMatchTimeUnit(input: string, pos: number, timeUnits: readonly TimeUnitMapping[], skipWhitespace?: boolean): {
|
|
382
|
+
suffix: string;
|
|
383
|
+
endPos: number;
|
|
384
|
+
} | null;
|
|
385
|
+
/**
|
|
386
|
+
* Parse a base number (sign, integer, decimal) without time units.
|
|
387
|
+
* Returns the number string and end position.
|
|
388
|
+
*
|
|
389
|
+
* @param input - Input string
|
|
390
|
+
* @param startPos - Start position
|
|
391
|
+
* @param allowSign - Whether to allow +/- sign (default: true)
|
|
392
|
+
* @returns Object with number string and end position, or null
|
|
393
|
+
*/
|
|
394
|
+
protected parseBaseNumber(input: string, startPos: number, allowSign?: boolean): {
|
|
395
|
+
number: string;
|
|
396
|
+
endPos: number;
|
|
397
|
+
} | null;
|
|
398
|
+
/**
|
|
399
|
+
* Try to extract a number with native language time units.
|
|
400
|
+
*
|
|
401
|
+
* This is a template method that handles the common pattern:
|
|
402
|
+
* 1. Parse the base number (sign, integer, decimal)
|
|
403
|
+
* 2. Try to match native language time units
|
|
404
|
+
* 3. Fall back to standard time units (ms, s, m, h)
|
|
405
|
+
*
|
|
406
|
+
* @param input - Input string
|
|
407
|
+
* @param pos - Start position
|
|
408
|
+
* @param nativeTimeUnits - Language-specific time unit mappings
|
|
409
|
+
* @param options - Configuration options
|
|
410
|
+
* @returns Token if number found, null otherwise
|
|
411
|
+
*/
|
|
412
|
+
protected tryNumberWithTimeUnits(input: string, pos: number, nativeTimeUnits: readonly TimeUnitMapping[], options?: {
|
|
413
|
+
allowSign?: boolean;
|
|
414
|
+
skipWhitespace?: boolean;
|
|
415
|
+
}): LanguageToken | null;
|
|
416
|
+
/**
|
|
417
|
+
* Try to extract a URL at the current position.
|
|
418
|
+
* Handles /path, ./path, ../path, //domain.com, http://, https://
|
|
419
|
+
*/
|
|
420
|
+
protected tryUrl(input: string, pos: number): LanguageToken | null;
|
|
421
|
+
/**
|
|
422
|
+
* Try to extract a variable reference (:varname) at the current position.
|
|
423
|
+
* In hyperscript, :x refers to a local variable named x.
|
|
424
|
+
*/
|
|
425
|
+
protected tryVariableRef(input: string, pos: number): LanguageToken | null;
|
|
426
|
+
/**
|
|
427
|
+
* Try to extract an operator or punctuation token at the current position.
|
|
428
|
+
* Handles two-character operators (==, !=, etc.) and single-character operators.
|
|
429
|
+
*/
|
|
430
|
+
protected tryOperator(input: string, pos: number): LanguageToken | null;
|
|
431
|
+
/**
|
|
432
|
+
* Try to match a multi-character particle from a list.
|
|
433
|
+
*
|
|
434
|
+
* Used by languages like Japanese, Korean, and Chinese that have
|
|
435
|
+
* multi-character particles (e.g., Japanese から, まで, より).
|
|
436
|
+
*
|
|
437
|
+
* @param input - Input string
|
|
438
|
+
* @param pos - Current position
|
|
439
|
+
* @param particles - Array of multi-character particles to match
|
|
440
|
+
* @returns Token if matched, null otherwise
|
|
441
|
+
*/
|
|
442
|
+
protected tryMultiCharParticle(input: string, pos: number, particles: readonly string[]): LanguageToken | null;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Language Profile Types
|
|
447
|
+
*
|
|
448
|
+
* Type definitions for language profiles, separated for tree-shaking.
|
|
449
|
+
*/
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Word order in a language (for declarative statements).
|
|
453
|
+
*/
|
|
454
|
+
type WordOrder = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OSV' | 'OVS';
|
|
455
|
+
/**
|
|
456
|
+
* How grammatical relationships are marked.
|
|
457
|
+
*/
|
|
458
|
+
type MarkingStrategy = 'preposition' | 'postposition' | 'particle' | 'case-suffix';
|
|
459
|
+
/**
|
|
460
|
+
* A grammatical marker (preposition, particle, etc.) for a semantic role.
|
|
461
|
+
*/
|
|
462
|
+
interface RoleMarker {
|
|
463
|
+
/** Primary marker for this role */
|
|
464
|
+
readonly primary: string;
|
|
465
|
+
/** Alternative markers that also work */
|
|
466
|
+
readonly alternatives?: string[];
|
|
467
|
+
/** Position relative to the role value */
|
|
468
|
+
readonly position: 'before' | 'after';
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Verb form configuration for a language.
|
|
472
|
+
*/
|
|
473
|
+
interface VerbConfig {
|
|
474
|
+
/** Position of verb in the sentence */
|
|
475
|
+
readonly position: 'start' | 'end' | 'second';
|
|
476
|
+
/** Common verb suffixes/conjugations to recognize */
|
|
477
|
+
readonly suffixes?: string[];
|
|
478
|
+
/** Whether the language commonly drops subjects */
|
|
479
|
+
readonly subjectDrop?: boolean;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Configuration for possessive expression construction.
|
|
483
|
+
* Defines how "X's property" is expressed in a language.
|
|
484
|
+
*/
|
|
485
|
+
interface PossessiveConfig {
|
|
486
|
+
/** Possessive marker (e.g., "'s" in English, "の" in Japanese) */
|
|
487
|
+
readonly marker: string;
|
|
488
|
+
/** Position of marker: 'after-object' (X's Y), 'between' (X の Y), 'before-property' */
|
|
489
|
+
readonly markerPosition: 'after-object' | 'between' | 'before-property';
|
|
490
|
+
/** Special possessive forms (e.g., 'me' → 'my' in English) */
|
|
491
|
+
readonly specialForms?: Record<string, string>;
|
|
492
|
+
/** Whether to use possessive adjectives instead of marker (e.g., Spanish mi/tu/su) */
|
|
493
|
+
readonly usePossessiveAdjectives?: boolean;
|
|
494
|
+
/**
|
|
495
|
+
* Possessive keywords mapped to their corresponding reference.
|
|
496
|
+
* Used by pattern-matcher to recognize possessive expressions.
|
|
497
|
+
* Example: { my: 'me', your: 'you', its: 'it' }
|
|
498
|
+
*/
|
|
499
|
+
readonly keywords?: Record<string, string>;
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Complete language profile for pattern generation.
|
|
503
|
+
*/
|
|
504
|
+
interface LanguageProfile {
|
|
505
|
+
/** ISO 639-1 language code */
|
|
506
|
+
readonly code: string;
|
|
507
|
+
/** Human-readable language name */
|
|
508
|
+
readonly name: string;
|
|
509
|
+
/** Native name */
|
|
510
|
+
readonly nativeName: string;
|
|
511
|
+
/** Text direction */
|
|
512
|
+
readonly direction: 'ltr' | 'rtl';
|
|
513
|
+
/** Primary word order */
|
|
514
|
+
readonly wordOrder: WordOrder;
|
|
515
|
+
/** How this language marks grammatical roles */
|
|
516
|
+
readonly markingStrategy: MarkingStrategy;
|
|
517
|
+
/** Markers for each semantic role */
|
|
518
|
+
readonly roleMarkers: Partial<Record<SemanticRole, RoleMarker>>;
|
|
519
|
+
/** Verb configuration */
|
|
520
|
+
readonly verb: VerbConfig;
|
|
521
|
+
/** Command keyword translations */
|
|
522
|
+
readonly keywords: Record<string, KeywordTranslation>;
|
|
523
|
+
/** Whether the language uses spaces between words */
|
|
524
|
+
readonly usesSpaces: boolean;
|
|
525
|
+
/** Special tokenization notes */
|
|
526
|
+
readonly tokenization?: TokenizationConfig;
|
|
527
|
+
/** Reference translations (me, it, you, etc.) */
|
|
528
|
+
readonly references?: Record<string, string>;
|
|
529
|
+
/** Possessive expression configuration */
|
|
530
|
+
readonly possessive?: PossessiveConfig;
|
|
531
|
+
/** Event handler pattern configuration (for simple SVO languages) */
|
|
532
|
+
readonly eventHandler?: EventHandlerConfig;
|
|
533
|
+
/**
|
|
534
|
+
* Default verb form for command keywords. Defaults to 'infinitive'.
|
|
535
|
+
*
|
|
536
|
+
* Based on software UI localization research:
|
|
537
|
+
* - 'infinitive': Spanish, French, German, Portuguese, Russian (industry standard)
|
|
538
|
+
* - 'imperative': Polish
|
|
539
|
+
* - 'base': English, Japanese, Korean (no distinction or same form)
|
|
540
|
+
*
|
|
541
|
+
* Individual keywords can override this via KeywordTranslation.form
|
|
542
|
+
*/
|
|
543
|
+
readonly defaultVerbForm?: VerbForm;
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Configuration for event handler pattern generation.
|
|
547
|
+
* Used by simple SVO languages that don't need hand-crafted patterns.
|
|
548
|
+
*/
|
|
549
|
+
interface EventHandlerConfig {
|
|
550
|
+
/** Primary event keyword (e.g., 'on', 'bei', 'sur') */
|
|
551
|
+
readonly keyword: KeywordTranslation;
|
|
552
|
+
/** Source filter marker (e.g., 'from', 'von', 'de') */
|
|
553
|
+
readonly sourceMarker: RoleMarker;
|
|
554
|
+
/** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
|
|
555
|
+
readonly conditionalKeyword?: KeywordTranslation;
|
|
556
|
+
}
|
|
557
|
+
/**
|
|
558
|
+
* Verb form used for command keywords.
|
|
559
|
+
*
|
|
560
|
+
* Based on software localization research:
|
|
561
|
+
* - 'infinitive': Standard for most languages (Spanish, French, German, Russian)
|
|
562
|
+
* Example: "Guardar", "Enregistrer", "Speichern"
|
|
563
|
+
* - 'imperative': Used by some languages (Polish)
|
|
564
|
+
* Example: "Zapisz", "Otwórz"
|
|
565
|
+
* - 'base': For languages where forms are identical (English, Japanese, Korean)
|
|
566
|
+
* or where the distinction doesn't apply
|
|
567
|
+
*/
|
|
568
|
+
type VerbForm = 'infinitive' | 'imperative' | 'base';
|
|
569
|
+
/**
|
|
570
|
+
* Translation of a command keyword.
|
|
571
|
+
*/
|
|
572
|
+
interface KeywordTranslation {
|
|
573
|
+
/** Primary translation (used for output/rendering) */
|
|
574
|
+
readonly primary: string;
|
|
575
|
+
/** Alternative forms for parsing (conjugations, synonyms, informal variants) */
|
|
576
|
+
readonly alternatives?: string[];
|
|
577
|
+
/** Normalized English form for internal matching */
|
|
578
|
+
readonly normalized?: string;
|
|
579
|
+
/**
|
|
580
|
+
* The grammatical form of 'primary'. Defaults to 'infinitive'.
|
|
581
|
+
* This documents the form used and enables future form-switching features.
|
|
582
|
+
* - 'infinitive': Dictionary form (alternar, basculer) - industry standard
|
|
583
|
+
* - 'imperative': Command form (alterna, bascule) - for Polish, etc.
|
|
584
|
+
* - 'base': Same form for both (toggle, トグル) - English, Japanese, Korean
|
|
585
|
+
*/
|
|
586
|
+
readonly form?: VerbForm;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Special tokenization configuration.
|
|
590
|
+
*/
|
|
591
|
+
interface TokenizationConfig {
|
|
592
|
+
/** Particles to recognize (for particle languages) */
|
|
593
|
+
readonly particles?: string[];
|
|
594
|
+
/** Prefixes to recognize (for prefixing languages) */
|
|
595
|
+
readonly prefixes?: string[];
|
|
596
|
+
/** Word boundary detection strategy */
|
|
597
|
+
readonly boundaryStrategy?: 'space' | 'particle' | 'character';
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
export { BaseTokenizer as B, type LanguageProfile as L, type TokenStream as T, type TokenKind as a };
|