@lokascript/semantic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +686 -0
- package/dist/browser-ar.ar.global.js +2 -0
- package/dist/browser-core.core.global.js +2 -0
- package/dist/browser-de.de.global.js +2 -0
- package/dist/browser-east-asian.east-asian.global.js +2 -0
- package/dist/browser-en-tr.en-tr.global.js +2 -0
- package/dist/browser-en.en.global.js +2 -0
- package/dist/browser-es-en.es-en.global.js +2 -0
- package/dist/browser-es.es.global.js +2 -0
- package/dist/browser-fr.fr.global.js +2 -0
- package/dist/browser-id.id.global.js +2 -0
- package/dist/browser-ja.ja.global.js +2 -0
- package/dist/browser-ko.ko.global.js +2 -0
- package/dist/browser-lazy.lazy.global.js +2 -0
- package/dist/browser-priority.priority.global.js +2 -0
- package/dist/browser-pt.pt.global.js +2 -0
- package/dist/browser-qu.qu.global.js +2 -0
- package/dist/browser-sw.sw.global.js +2 -0
- package/dist/browser-tr.tr.global.js +2 -0
- package/dist/browser-western.western.global.js +2 -0
- package/dist/browser-zh.zh.global.js +2 -0
- package/dist/browser.global.js +3 -0
- package/dist/browser.global.js.map +1 -0
- package/dist/index.cjs +35051 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +3426 -0
- package/dist/index.d.ts +3426 -0
- package/dist/index.js +34890 -0
- package/dist/index.js.map +1 -0
- package/dist/languages/ar.d.ts +78 -0
- package/dist/languages/ar.js +1622 -0
- package/dist/languages/ar.js.map +1 -0
- package/dist/languages/de.d.ts +38 -0
- package/dist/languages/de.js +1168 -0
- package/dist/languages/de.js.map +1 -0
- package/dist/languages/en.d.ts +44 -0
- package/dist/languages/en.js +3491 -0
- package/dist/languages/en.js.map +1 -0
- package/dist/languages/es.d.ts +52 -0
- package/dist/languages/es.js +1493 -0
- package/dist/languages/es.js.map +1 -0
- package/dist/languages/fr.d.ts +37 -0
- package/dist/languages/fr.js +1159 -0
- package/dist/languages/fr.js.map +1 -0
- package/dist/languages/id.d.ts +35 -0
- package/dist/languages/id.js +1152 -0
- package/dist/languages/id.js.map +1 -0
- package/dist/languages/ja.d.ts +53 -0
- package/dist/languages/ja.js +1430 -0
- package/dist/languages/ja.js.map +1 -0
- package/dist/languages/ko.d.ts +51 -0
- package/dist/languages/ko.js +1729 -0
- package/dist/languages/ko.js.map +1 -0
- package/dist/languages/pt.d.ts +37 -0
- package/dist/languages/pt.js +1127 -0
- package/dist/languages/pt.js.map +1 -0
- package/dist/languages/qu.d.ts +36 -0
- package/dist/languages/qu.js +1143 -0
- package/dist/languages/qu.js.map +1 -0
- package/dist/languages/sw.d.ts +35 -0
- package/dist/languages/sw.js +1147 -0
- package/dist/languages/sw.js.map +1 -0
- package/dist/languages/tr.d.ts +45 -0
- package/dist/languages/tr.js +1529 -0
- package/dist/languages/tr.js.map +1 -0
- package/dist/languages/zh.d.ts +58 -0
- package/dist/languages/zh.js +1257 -0
- package/dist/languages/zh.js.map +1 -0
- package/dist/types-C4dcj53L.d.ts +600 -0
- package/package.json +202 -0
- package/src/__test-utils__/index.ts +7 -0
- package/src/__test-utils__/test-helpers.ts +8 -0
- package/src/__types__/test-helpers.ts +122 -0
- package/src/analysis/index.ts +479 -0
- package/src/ast-builder/command-mappers.ts +1133 -0
- package/src/ast-builder/expression-parser/index.ts +41 -0
- package/src/ast-builder/expression-parser/parser.ts +563 -0
- package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
- package/src/ast-builder/expression-parser/types.ts +208 -0
- package/src/ast-builder/index.ts +536 -0
- package/src/ast-builder/value-converters.ts +172 -0
- package/src/bridge.ts +275 -0
- package/src/browser-ar.ts +162 -0
- package/src/browser-core.ts +231 -0
- package/src/browser-de.ts +162 -0
- package/src/browser-east-asian.ts +173 -0
- package/src/browser-en-tr.ts +165 -0
- package/src/browser-en.ts +157 -0
- package/src/browser-es-en.ts +200 -0
- package/src/browser-es.ts +170 -0
- package/src/browser-fr.ts +162 -0
- package/src/browser-id.ts +162 -0
- package/src/browser-ja.ts +162 -0
- package/src/browser-ko.ts +162 -0
- package/src/browser-lazy.ts +189 -0
- package/src/browser-priority.ts +214 -0
- package/src/browser-pt.ts +162 -0
- package/src/browser-qu.ts +162 -0
- package/src/browser-sw.ts +162 -0
- package/src/browser-tr.ts +162 -0
- package/src/browser-western.ts +181 -0
- package/src/browser-zh.ts +162 -0
- package/src/browser.ts +268 -0
- package/src/cache/index.ts +14 -0
- package/src/cache/semantic-cache.ts +344 -0
- package/src/core-bridge.ts +372 -0
- package/src/explicit/converter.ts +258 -0
- package/src/explicit/index.ts +18 -0
- package/src/explicit/parser.ts +236 -0
- package/src/explicit/renderer.ts +424 -0
- package/src/generators/command-schemas.ts +1636 -0
- package/src/generators/event-handler-generator.ts +109 -0
- package/src/generators/index.ts +117 -0
- package/src/generators/language-profiles.ts +139 -0
- package/src/generators/pattern-generator.ts +537 -0
- package/src/generators/profiles/arabic.ts +131 -0
- package/src/generators/profiles/bengali.ts +132 -0
- package/src/generators/profiles/chinese.ts +124 -0
- package/src/generators/profiles/english.ts +113 -0
- package/src/generators/profiles/french.ts +125 -0
- package/src/generators/profiles/german.ts +126 -0
- package/src/generators/profiles/hindi.ts +146 -0
- package/src/generators/profiles/index.ts +46 -0
- package/src/generators/profiles/indonesian.ts +125 -0
- package/src/generators/profiles/italian.ts +139 -0
- package/src/generators/profiles/japanese.ts +149 -0
- package/src/generators/profiles/korean.ts +127 -0
- package/src/generators/profiles/marker-templates.ts +288 -0
- package/src/generators/profiles/ms.ts +130 -0
- package/src/generators/profiles/polish.ts +249 -0
- package/src/generators/profiles/portuguese.ts +115 -0
- package/src/generators/profiles/quechua.ts +113 -0
- package/src/generators/profiles/russian.ts +260 -0
- package/src/generators/profiles/spanish.ts +130 -0
- package/src/generators/profiles/swahili.ts +129 -0
- package/src/generators/profiles/thai.ts +132 -0
- package/src/generators/profiles/tl.ts +128 -0
- package/src/generators/profiles/turkish.ts +124 -0
- package/src/generators/profiles/types.ts +165 -0
- package/src/generators/profiles/ukrainian.ts +270 -0
- package/src/generators/profiles/vietnamese.ts +133 -0
- package/src/generators/schema-error-codes.ts +160 -0
- package/src/generators/schema-validator.ts +391 -0
- package/src/index.ts +429 -0
- package/src/language-building-schema.ts +3170 -0
- package/src/language-loader.ts +394 -0
- package/src/languages/_all.ts +65 -0
- package/src/languages/ar.ts +15 -0
- package/src/languages/bn.ts +16 -0
- package/src/languages/de.ts +15 -0
- package/src/languages/en.ts +29 -0
- package/src/languages/es.ts +15 -0
- package/src/languages/fr.ts +15 -0
- package/src/languages/hi.ts +26 -0
- package/src/languages/id.ts +15 -0
- package/src/languages/index.ts +18 -0
- package/src/languages/it.ts +15 -0
- package/src/languages/ja.ts +15 -0
- package/src/languages/ko.ts +15 -0
- package/src/languages/ms.ts +16 -0
- package/src/languages/pl.ts +18 -0
- package/src/languages/pt.ts +15 -0
- package/src/languages/qu.ts +15 -0
- package/src/languages/ru.ts +26 -0
- package/src/languages/sw.ts +15 -0
- package/src/languages/th.ts +16 -0
- package/src/languages/tl.ts +16 -0
- package/src/languages/tr.ts +15 -0
- package/src/languages/uk.ts +26 -0
- package/src/languages/vi.ts +16 -0
- package/src/languages/zh.ts +15 -0
- package/src/parser/index.ts +15 -0
- package/src/parser/pattern-matcher.ts +1181 -0
- package/src/parser/semantic-parser.ts +573 -0
- package/src/parser/utils/index.ts +35 -0
- package/src/parser/utils/marker-resolution.ts +111 -0
- package/src/parser/utils/possessive-keywords.ts +43 -0
- package/src/parser/utils/role-positioning.ts +70 -0
- package/src/parser/utils/type-validation.ts +134 -0
- package/src/patterns/add/ar.ts +71 -0
- package/src/patterns/add/bn.ts +70 -0
- package/src/patterns/add/hi.ts +69 -0
- package/src/patterns/add/index.ts +87 -0
- package/src/patterns/add/it.ts +61 -0
- package/src/patterns/add/ja.ts +93 -0
- package/src/patterns/add/ko.ts +74 -0
- package/src/patterns/add/ms.ts +30 -0
- package/src/patterns/add/pl.ts +62 -0
- package/src/patterns/add/ru.ts +62 -0
- package/src/patterns/add/th.ts +49 -0
- package/src/patterns/add/tl.ts +30 -0
- package/src/patterns/add/tr.ts +71 -0
- package/src/patterns/add/uk.ts +62 -0
- package/src/patterns/add/vi.ts +61 -0
- package/src/patterns/add/zh.ts +71 -0
- package/src/patterns/builders.ts +207 -0
- package/src/patterns/decrement/bn.ts +70 -0
- package/src/patterns/decrement/de.ts +42 -0
- package/src/patterns/decrement/hi.ts +68 -0
- package/src/patterns/decrement/index.ts +79 -0
- package/src/patterns/decrement/it.ts +69 -0
- package/src/patterns/decrement/ms.ts +30 -0
- package/src/patterns/decrement/pl.ts +58 -0
- package/src/patterns/decrement/ru.ts +58 -0
- package/src/patterns/decrement/th.ts +49 -0
- package/src/patterns/decrement/tl.ts +30 -0
- package/src/patterns/decrement/tr.ts +48 -0
- package/src/patterns/decrement/uk.ts +58 -0
- package/src/patterns/decrement/vi.ts +61 -0
- package/src/patterns/decrement/zh.ts +32 -0
- package/src/patterns/en.ts +302 -0
- package/src/patterns/event-handler/ar.ts +151 -0
- package/src/patterns/event-handler/bn.ts +72 -0
- package/src/patterns/event-handler/de.ts +117 -0
- package/src/patterns/event-handler/en.ts +117 -0
- package/src/patterns/event-handler/es.ts +136 -0
- package/src/patterns/event-handler/fr.ts +117 -0
- package/src/patterns/event-handler/hi.ts +64 -0
- package/src/patterns/event-handler/id.ts +117 -0
- package/src/patterns/event-handler/index.ts +119 -0
- package/src/patterns/event-handler/it.ts +54 -0
- package/src/patterns/event-handler/ja.ts +118 -0
- package/src/patterns/event-handler/ko.ts +133 -0
- package/src/patterns/event-handler/ms.ts +30 -0
- package/src/patterns/event-handler/pl.ts +62 -0
- package/src/patterns/event-handler/pt.ts +117 -0
- package/src/patterns/event-handler/qu.ts +66 -0
- package/src/patterns/event-handler/ru.ts +62 -0
- package/src/patterns/event-handler/shared.ts +270 -0
- package/src/patterns/event-handler/sw.ts +117 -0
- package/src/patterns/event-handler/th.ts +53 -0
- package/src/patterns/event-handler/tl.ts +30 -0
- package/src/patterns/event-handler/tr.ts +170 -0
- package/src/patterns/event-handler/uk.ts +62 -0
- package/src/patterns/event-handler/vi.ts +61 -0
- package/src/patterns/event-handler/zh.ts +150 -0
- package/src/patterns/get/ar.ts +49 -0
- package/src/patterns/get/bn.ts +47 -0
- package/src/patterns/get/de.ts +32 -0
- package/src/patterns/get/hi.ts +52 -0
- package/src/patterns/get/index.ts +83 -0
- package/src/patterns/get/it.ts +56 -0
- package/src/patterns/get/ja.ts +53 -0
- package/src/patterns/get/ko.ts +53 -0
- package/src/patterns/get/ms.ts +30 -0
- package/src/patterns/get/pl.ts +57 -0
- package/src/patterns/get/ru.ts +57 -0
- package/src/patterns/get/th.ts +29 -0
- package/src/patterns/get/tl.ts +30 -0
- package/src/patterns/get/uk.ts +57 -0
- package/src/patterns/get/vi.ts +48 -0
- package/src/patterns/grammar-transformed/index.ts +39 -0
- package/src/patterns/grammar-transformed/ja.ts +1713 -0
- package/src/patterns/grammar-transformed/ko.ts +1311 -0
- package/src/patterns/grammar-transformed/tr.ts +1067 -0
- package/src/patterns/hide/ar.ts +67 -0
- package/src/patterns/hide/bn.ts +47 -0
- package/src/patterns/hide/de.ts +36 -0
- package/src/patterns/hide/hi.ts +61 -0
- package/src/patterns/hide/index.ts +91 -0
- package/src/patterns/hide/it.ts +56 -0
- package/src/patterns/hide/ja.ts +69 -0
- package/src/patterns/hide/ko.ts +69 -0
- package/src/patterns/hide/ms.ts +30 -0
- package/src/patterns/hide/pl.ts +57 -0
- package/src/patterns/hide/ru.ts +57 -0
- package/src/patterns/hide/th.ts +29 -0
- package/src/patterns/hide/tl.ts +30 -0
- package/src/patterns/hide/tr.ts +65 -0
- package/src/patterns/hide/uk.ts +57 -0
- package/src/patterns/hide/vi.ts +56 -0
- package/src/patterns/hide/zh.ts +68 -0
- package/src/patterns/increment/bn.ts +70 -0
- package/src/patterns/increment/de.ts +36 -0
- package/src/patterns/increment/hi.ts +68 -0
- package/src/patterns/increment/index.ts +79 -0
- package/src/patterns/increment/it.ts +69 -0
- package/src/patterns/increment/ms.ts +30 -0
- package/src/patterns/increment/pl.ts +58 -0
- package/src/patterns/increment/ru.ts +58 -0
- package/src/patterns/increment/th.ts +49 -0
- package/src/patterns/increment/tl.ts +30 -0
- package/src/patterns/increment/tr.ts +52 -0
- package/src/patterns/increment/uk.ts +58 -0
- package/src/patterns/increment/vi.ts +61 -0
- package/src/patterns/increment/zh.ts +32 -0
- package/src/patterns/index.ts +84 -0
- package/src/patterns/languages/en/control-flow.ts +93 -0
- package/src/patterns/languages/en/fetch.ts +62 -0
- package/src/patterns/languages/en/index.ts +42 -0
- package/src/patterns/languages/en/repeat.ts +67 -0
- package/src/patterns/languages/en/set.ts +48 -0
- package/src/patterns/languages/en/swap.ts +38 -0
- package/src/patterns/languages/en/temporal.ts +57 -0
- package/src/patterns/put/ar.ts +74 -0
- package/src/patterns/put/bn.ts +53 -0
- package/src/patterns/put/en.ts +74 -0
- package/src/patterns/put/es.ts +74 -0
- package/src/patterns/put/hi.ts +69 -0
- package/src/patterns/put/id.ts +96 -0
- package/src/patterns/put/index.ts +99 -0
- package/src/patterns/put/it.ts +56 -0
- package/src/patterns/put/ja.ts +75 -0
- package/src/patterns/put/ko.ts +67 -0
- package/src/patterns/put/ms.ts +30 -0
- package/src/patterns/put/pl.ts +81 -0
- package/src/patterns/put/ru.ts +85 -0
- package/src/patterns/put/th.ts +32 -0
- package/src/patterns/put/tl.ts +30 -0
- package/src/patterns/put/tr.ts +67 -0
- package/src/patterns/put/uk.ts +85 -0
- package/src/patterns/put/vi.ts +72 -0
- package/src/patterns/put/zh.ts +62 -0
- package/src/patterns/registry.ts +163 -0
- package/src/patterns/remove/ar.ts +71 -0
- package/src/patterns/remove/bn.ts +68 -0
- package/src/patterns/remove/hi.ts +69 -0
- package/src/patterns/remove/index.ts +87 -0
- package/src/patterns/remove/it.ts +69 -0
- package/src/patterns/remove/ja.ts +74 -0
- package/src/patterns/remove/ko.ts +78 -0
- package/src/patterns/remove/ms.ts +30 -0
- package/src/patterns/remove/pl.ts +62 -0
- package/src/patterns/remove/ru.ts +62 -0
- package/src/patterns/remove/th.ts +49 -0
- package/src/patterns/remove/tl.ts +30 -0
- package/src/patterns/remove/tr.ts +78 -0
- package/src/patterns/remove/uk.ts +62 -0
- package/src/patterns/remove/vi.ts +61 -0
- package/src/patterns/remove/zh.ts +72 -0
- package/src/patterns/set/ar.ts +84 -0
- package/src/patterns/set/bn.ts +53 -0
- package/src/patterns/set/de.ts +84 -0
- package/src/patterns/set/es.ts +92 -0
- package/src/patterns/set/fr.ts +88 -0
- package/src/patterns/set/hi.ts +56 -0
- package/src/patterns/set/id.ts +84 -0
- package/src/patterns/set/index.ts +107 -0
- package/src/patterns/set/it.ts +56 -0
- package/src/patterns/set/ja.ts +86 -0
- package/src/patterns/set/ko.ts +85 -0
- package/src/patterns/set/ms.ts +30 -0
- package/src/patterns/set/pl.ts +57 -0
- package/src/patterns/set/pt.ts +84 -0
- package/src/patterns/set/ru.ts +57 -0
- package/src/patterns/set/th.ts +31 -0
- package/src/patterns/set/tl.ts +30 -0
- package/src/patterns/set/tr.ts +107 -0
- package/src/patterns/set/uk.ts +57 -0
- package/src/patterns/set/vi.ts +53 -0
- package/src/patterns/set/zh.ts +84 -0
- package/src/patterns/show/ar.ts +67 -0
- package/src/patterns/show/bn.ts +47 -0
- package/src/patterns/show/de.ts +32 -0
- package/src/patterns/show/fr.ts +32 -0
- package/src/patterns/show/hi.ts +61 -0
- package/src/patterns/show/index.ts +95 -0
- package/src/patterns/show/it.ts +56 -0
- package/src/patterns/show/ja.ts +69 -0
- package/src/patterns/show/ko.ts +73 -0
- package/src/patterns/show/ms.ts +30 -0
- package/src/patterns/show/pl.ts +57 -0
- package/src/patterns/show/ru.ts +57 -0
- package/src/patterns/show/th.ts +29 -0
- package/src/patterns/show/tl.ts +30 -0
- package/src/patterns/show/tr.ts +65 -0
- package/src/patterns/show/uk.ts +57 -0
- package/src/patterns/show/vi.ts +56 -0
- package/src/patterns/show/zh.ts +68 -0
- package/src/patterns/take/ar.ts +51 -0
- package/src/patterns/take/index.ts +31 -0
- package/src/patterns/toggle/ar.ts +61 -0
- package/src/patterns/toggle/bn.ts +70 -0
- package/src/patterns/toggle/en.ts +61 -0
- package/src/patterns/toggle/es.ts +61 -0
- package/src/patterns/toggle/hi.ts +80 -0
- package/src/patterns/toggle/index.ts +95 -0
- package/src/patterns/toggle/it.ts +69 -0
- package/src/patterns/toggle/ja.ts +156 -0
- package/src/patterns/toggle/ko.ts +113 -0
- package/src/patterns/toggle/ms.ts +30 -0
- package/src/patterns/toggle/pl.ts +62 -0
- package/src/patterns/toggle/ru.ts +62 -0
- package/src/patterns/toggle/th.ts +50 -0
- package/src/patterns/toggle/tl.ts +30 -0
- package/src/patterns/toggle/tr.ts +88 -0
- package/src/patterns/toggle/uk.ts +62 -0
- package/src/patterns/toggle/vi.ts +61 -0
- package/src/patterns/toggle/zh.ts +99 -0
- package/src/public-api.ts +286 -0
- package/src/registry.ts +441 -0
- package/src/tokenizers/arabic.ts +723 -0
- package/src/tokenizers/base.ts +1300 -0
- package/src/tokenizers/bengali.ts +289 -0
- package/src/tokenizers/chinese.ts +481 -0
- package/src/tokenizers/english.ts +416 -0
- package/src/tokenizers/french.ts +326 -0
- package/src/tokenizers/german.ts +324 -0
- package/src/tokenizers/hindi.ts +319 -0
- package/src/tokenizers/index.ts +127 -0
- package/src/tokenizers/indonesian.ts +306 -0
- package/src/tokenizers/italian.ts +458 -0
- package/src/tokenizers/japanese.ts +447 -0
- package/src/tokenizers/korean.ts +642 -0
- package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
- package/src/tokenizers/morphology/french-normalizer.ts +268 -0
- package/src/tokenizers/morphology/german-normalizer.ts +256 -0
- package/src/tokenizers/morphology/index.ts +46 -0
- package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
- package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
- package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
- package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
- package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
- package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
- package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
- package/src/tokenizers/morphology/types.ts +211 -0
- package/src/tokenizers/ms.ts +198 -0
- package/src/tokenizers/polish.ts +354 -0
- package/src/tokenizers/portuguese.ts +304 -0
- package/src/tokenizers/quechua.ts +339 -0
- package/src/tokenizers/russian.ts +375 -0
- package/src/tokenizers/spanish.ts +403 -0
- package/src/tokenizers/swahili.ts +303 -0
- package/src/tokenizers/thai.ts +236 -0
- package/src/tokenizers/tl.ts +198 -0
- package/src/tokenizers/turkish.ts +411 -0
- package/src/tokenizers/ukrainian.ts +369 -0
- package/src/tokenizers/vietnamese.ts +410 -0
- package/src/types/grammar-types.ts +617 -0
- package/src/types/unified-profile.ts +267 -0
- package/src/types.ts +709 -0
- package/src/utils/confidence-calculator.ts +147 -0
- package/src/validators/command-validator.ts +380 -0
- package/src/validators/index.ts +15 -0
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grammar Types for Semantic Multilingual Parsing
|
|
3
|
+
*
|
|
4
|
+
* These types define the semantic role system used across all 13 supported languages.
|
|
5
|
+
* Originally from @lokascript/i18n, now consolidated here for package independence.
|
|
6
|
+
*
|
|
7
|
+
* Key Linguistic Concepts:
|
|
8
|
+
* - Word Order: SVO, SOV, VSO (and variations)
|
|
9
|
+
* - Adposition Type: Preposition (English) vs Postposition (Japanese/Korean)
|
|
10
|
+
* - Morphology: Isolating (Chinese) vs Agglutinative (Turkish) vs Fusional (Arabic)
|
|
11
|
+
* - Text Direction: LTR vs RTL
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// =============================================================================
|
|
15
|
+
// Core Types
|
|
16
|
+
// =============================================================================
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Semantic roles in hyperscript commands.
|
|
20
|
+
* These are universal across all 13 supported languages - only the surface form changes.
|
|
21
|
+
*
|
|
22
|
+
* ## Core Thematic Roles (from linguistic theory)
|
|
23
|
+
* | Role | Usage | Purpose | Example |
|
|
24
|
+
* |-------------|-------|-----------------------------|---------------------------|
|
|
25
|
+
* | action | 100% | Command verb | toggle, put, fetch |
|
|
26
|
+
* | patient | 90% | What is acted upon | .active, #count |
|
|
27
|
+
* | destination | 40% | Where something goes | into #output, to .class |
|
|
28
|
+
* | source | 13% | Where something comes from | from #input, from URL |
|
|
29
|
+
* | event | 106% | Trigger events | click, keydown, submit |
|
|
30
|
+
* | condition | 8% | Boolean expressions | if x > 5, when visible |
|
|
31
|
+
* | agent | 0% | Who performs action | Reserved for future use |
|
|
32
|
+
* | goal | 1% | Target value/state | to 'red' (in transition) |
|
|
33
|
+
*
|
|
34
|
+
* ## Quantitative Roles (answer "how much/long")
|
|
35
|
+
* | Role | Usage | Purpose | Example |
|
|
36
|
+
* |----------|-------|----------------|----------------------|
|
|
37
|
+
* | quantity | 7% | Numeric amount | by 5, 3 times |
|
|
38
|
+
* | duration | 1% | Time span | for 5 seconds, 500ms |
|
|
39
|
+
*
|
|
40
|
+
* ## Adverbial/Modifier Roles (answer "how/by what means")
|
|
41
|
+
* | Role | Usage | Purpose | Example |
|
|
42
|
+
* |--------------|-------|---------------------------|-------------------|
|
|
43
|
+
* | style | 2% | Animation/behavior | with fade |
|
|
44
|
+
* | manner | 2% | Insertion position | before, after |
|
|
45
|
+
* | method | 1% | HTTP method/technique | via POST, as GET |
|
|
46
|
+
* | responseType | 1% | Response format | as json, as html |
|
|
47
|
+
*
|
|
48
|
+
* ## Control Flow Roles
|
|
49
|
+
* | Role | Usage | Purpose | Example |
|
|
50
|
+
* |----------|-------|--------------|-----------------------|
|
|
51
|
+
* | loopType | 6% | Loop variant | forever, until, times |
|
|
52
|
+
*
|
|
53
|
+
* ## Design Notes
|
|
54
|
+
* - Low-usage roles (agent, goal, method, responseType) are intentionally kept for:
|
|
55
|
+
* - Linguistic completeness across all 13 languages
|
|
56
|
+
* - Future extensibility (AI agents, server-side execution)
|
|
57
|
+
* - Command-specific semantics (fetch, transition)
|
|
58
|
+
* - Each role has distinct grammatical markers per language (see profiles/index.ts)
|
|
59
|
+
* - Usage percentages based on pattern database analysis
|
|
60
|
+
*/
|
|
61
|
+
export type SemanticRole =
|
|
62
|
+
// Core thematic roles
|
|
63
|
+
| 'action' // The command/verb (increment, put, toggle)
|
|
64
|
+
| 'agent' // Who/what performs action (reserved for future: AI agents, server-side)
|
|
65
|
+
| 'patient' // What is acted upon (the counter, .active)
|
|
66
|
+
| 'source' // Origin (from #input, from URL)
|
|
67
|
+
| 'destination' // Target location (into #output, to .class)
|
|
68
|
+
| 'goal' // Target value/state (to 'red', to 100)
|
|
69
|
+
| 'event' // Trigger (click, input, keydown)
|
|
70
|
+
| 'condition' // Boolean expression (if x > 5)
|
|
71
|
+
// Quantitative roles
|
|
72
|
+
| 'quantity' // Numeric amount (by 5, 3 times)
|
|
73
|
+
| 'duration' // Time span (for 5 seconds, over 500ms)
|
|
74
|
+
// Adverbial roles
|
|
75
|
+
| 'responseType' // Response format (as json, as text, as html)
|
|
76
|
+
| 'method' // HTTP method/technique (via POST, using GET)
|
|
77
|
+
| 'style' // Visual/behavioral manner (with fade, smoothly)
|
|
78
|
+
| 'manner' // Insertion position (before, after)
|
|
79
|
+
// Control flow roles
|
|
80
|
+
| 'loopType' // Loop variant: forever, times, for, while, until, until-event
|
|
81
|
+
// Structural roles (for parser control)
|
|
82
|
+
| 'continues'; // Continuation marker (then-chains)
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Word order patterns
|
|
86
|
+
* These represent the major typological categories
|
|
87
|
+
*/
|
|
88
|
+
export type WordOrder = 'SVO' | 'SOV' | 'VSO' | 'VOS' | 'OVS' | 'OSV' | 'free';
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Where grammatical markers appear relative to their noun/verb
|
|
92
|
+
*/
|
|
93
|
+
export type AdpositionType = 'preposition' | 'postposition' | 'circumposition' | 'none';
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Morphological typology - how words are constructed
|
|
97
|
+
*/
|
|
98
|
+
export type MorphologyType =
|
|
99
|
+
| 'isolating' // Chinese - no inflection, word order matters
|
|
100
|
+
| 'agglutinative' // Turkish, Japanese - morphemes stack predictably
|
|
101
|
+
| 'fusional' // Arabic, Spanish - morphemes blend together
|
|
102
|
+
| 'polysynthetic'; // Quechua - complex words encode full sentences
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* A grammatical marker (particle, case ending, preposition)
|
|
106
|
+
*/
|
|
107
|
+
export interface GrammaticalMarker {
|
|
108
|
+
form: string; // The actual text (を, に, to, 的)
|
|
109
|
+
role: SemanticRole; // What semantic role it marks
|
|
110
|
+
position: AdpositionType; // Where it appears
|
|
111
|
+
required: boolean; // Is it mandatory?
|
|
112
|
+
alternatives?: string[]; // Alternative forms (e.g., 을/를 in Korean)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// =============================================================================
|
|
116
|
+
// Language Profile
|
|
117
|
+
// =============================================================================
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Complete grammatical profile for a language
|
|
121
|
+
* This captures the essential typological features needed for transformation
|
|
122
|
+
*/
|
|
123
|
+
export interface LanguageProfile {
|
|
124
|
+
code: string; // ISO 639-1 code
|
|
125
|
+
name: string; // Native name
|
|
126
|
+
|
|
127
|
+
// Typological features
|
|
128
|
+
wordOrder: WordOrder;
|
|
129
|
+
adpositionType: AdpositionType;
|
|
130
|
+
morphology: MorphologyType;
|
|
131
|
+
direction: 'ltr' | 'rtl';
|
|
132
|
+
|
|
133
|
+
// Grammatical markers for each semantic role
|
|
134
|
+
markers: GrammaticalMarker[];
|
|
135
|
+
|
|
136
|
+
// Role ordering - which semantic roles come in what order
|
|
137
|
+
// E.g., Japanese: ['patient', 'source', 'destination', 'action']
|
|
138
|
+
// E.g., English: ['action', 'patient', 'source', 'destination']
|
|
139
|
+
canonicalOrder: SemanticRole[];
|
|
140
|
+
|
|
141
|
+
// Special rules
|
|
142
|
+
rules?: GrammarRule[];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Pattern for transforming hyperscript structures
|
|
147
|
+
*/
|
|
148
|
+
export interface GrammarRule {
|
|
149
|
+
name: string;
|
|
150
|
+
description: string;
|
|
151
|
+
|
|
152
|
+
// Pattern matching (in canonical English form)
|
|
153
|
+
match: PatternMatcher;
|
|
154
|
+
|
|
155
|
+
// How to transform for this language
|
|
156
|
+
transform: PatternTransform;
|
|
157
|
+
|
|
158
|
+
// Priority (higher = checked first)
|
|
159
|
+
priority: number;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Matches a hyperscript pattern
|
|
164
|
+
*/
|
|
165
|
+
export interface PatternMatcher {
|
|
166
|
+
// Command type(s) this matches
|
|
167
|
+
commands?: string[];
|
|
168
|
+
|
|
169
|
+
// Required semantic roles
|
|
170
|
+
requiredRoles: SemanticRole[];
|
|
171
|
+
|
|
172
|
+
// Optional roles
|
|
173
|
+
optionalRoles?: SemanticRole[];
|
|
174
|
+
|
|
175
|
+
// Custom predicate for complex matching
|
|
176
|
+
predicate?: (parsed: ParsedStatement) => boolean;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Defines how to transform a matched pattern
|
|
181
|
+
*/
|
|
182
|
+
export interface PatternTransform {
|
|
183
|
+
// Reorder roles for target language
|
|
184
|
+
roleOrder: SemanticRole[];
|
|
185
|
+
|
|
186
|
+
// Insert markers between roles
|
|
187
|
+
insertMarkers?: boolean;
|
|
188
|
+
|
|
189
|
+
// Custom transformation function
|
|
190
|
+
custom?: (parsed: ParsedStatement, profile: LanguageProfile) => string;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// =============================================================================
|
|
194
|
+
// Parsed Structures
|
|
195
|
+
// =============================================================================
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* A parsed hyperscript statement broken into semantic components
|
|
199
|
+
*/
|
|
200
|
+
export interface ParsedStatement {
|
|
201
|
+
type: 'event-handler' | 'command' | 'conditional' | 'loop';
|
|
202
|
+
roles: Map<SemanticRole, ParsedElement>;
|
|
203
|
+
original: string;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* A single element with its semantic role
|
|
208
|
+
*/
|
|
209
|
+
export interface ParsedElement {
|
|
210
|
+
role: SemanticRole;
|
|
211
|
+
value: string; // Original English value
|
|
212
|
+
translated?: string; // Translated value
|
|
213
|
+
isSelector?: boolean; // CSS selector (don't translate)
|
|
214
|
+
isLiteral?: boolean; // Literal value (don't translate)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// =============================================================================
|
|
218
|
+
// Universal Pattern Templates
|
|
219
|
+
// =============================================================================
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Universal templates for common hyperscript patterns
|
|
223
|
+
* These define the semantic structure independent of surface form
|
|
224
|
+
*/
|
|
225
|
+
export const UNIVERSAL_PATTERNS = {
|
|
226
|
+
// on click increment #count
|
|
227
|
+
eventIncrement: {
|
|
228
|
+
name: 'event-increment',
|
|
229
|
+
roles: ['event', 'action', 'patient'] as SemanticRole[],
|
|
230
|
+
english: 'on {event} {action} {patient}',
|
|
231
|
+
},
|
|
232
|
+
|
|
233
|
+
// put X into Y
|
|
234
|
+
putInto: {
|
|
235
|
+
name: 'put-into',
|
|
236
|
+
roles: ['action', 'patient', 'destination'] as SemanticRole[],
|
|
237
|
+
english: '{action} {patient} into {destination}',
|
|
238
|
+
},
|
|
239
|
+
|
|
240
|
+
// add .class to element
|
|
241
|
+
addTo: {
|
|
242
|
+
name: 'add-to',
|
|
243
|
+
roles: ['action', 'patient', 'destination'] as SemanticRole[],
|
|
244
|
+
english: '{action} {patient} to {destination}',
|
|
245
|
+
},
|
|
246
|
+
|
|
247
|
+
// toggle .class on element
|
|
248
|
+
toggleOn: {
|
|
249
|
+
name: 'toggle-on',
|
|
250
|
+
roles: ['action', 'patient', 'destination'] as SemanticRole[],
|
|
251
|
+
english: '{action} {patient} on {destination}',
|
|
252
|
+
},
|
|
253
|
+
|
|
254
|
+
// wait 2 seconds
|
|
255
|
+
waitDuration: {
|
|
256
|
+
name: 'wait-duration',
|
|
257
|
+
roles: ['action', 'quantity'] as SemanticRole[],
|
|
258
|
+
english: '{action} {quantity}',
|
|
259
|
+
},
|
|
260
|
+
|
|
261
|
+
// if condition then ... end
|
|
262
|
+
conditional: {
|
|
263
|
+
name: 'conditional',
|
|
264
|
+
roles: ['action', 'condition'] as SemanticRole[],
|
|
265
|
+
english: '{action} {condition} then ... end',
|
|
266
|
+
},
|
|
267
|
+
|
|
268
|
+
// fetch URL as type
|
|
269
|
+
fetchAs: {
|
|
270
|
+
name: 'fetch-as',
|
|
271
|
+
roles: ['action', 'source', 'method'] as SemanticRole[],
|
|
272
|
+
english: '{action} {source} as {method}',
|
|
273
|
+
},
|
|
274
|
+
|
|
275
|
+
// show element with animation
|
|
276
|
+
showWith: {
|
|
277
|
+
name: 'show-with',
|
|
278
|
+
roles: ['action', 'patient', 'style'] as SemanticRole[],
|
|
279
|
+
english: '{action} {patient} with {style}',
|
|
280
|
+
},
|
|
281
|
+
|
|
282
|
+
// transition property over duration
|
|
283
|
+
transitionOver: {
|
|
284
|
+
name: 'transition-over',
|
|
285
|
+
roles: ['action', 'patient', 'duration'] as SemanticRole[],
|
|
286
|
+
english: '{action} {patient} over {duration}',
|
|
287
|
+
},
|
|
288
|
+
} as const;
|
|
289
|
+
|
|
290
|
+
// =============================================================================
|
|
291
|
+
// Language Family Defaults
|
|
292
|
+
// =============================================================================
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Default profiles for major language families
|
|
296
|
+
* Individual languages inherit and override these
|
|
297
|
+
*/
|
|
298
|
+
export const LANGUAGE_FAMILY_DEFAULTS: Record<string, Partial<LanguageProfile>> = {
|
|
299
|
+
// Germanic (English, German, Dutch)
|
|
300
|
+
germanic: {
|
|
301
|
+
wordOrder: 'SVO',
|
|
302
|
+
adpositionType: 'preposition',
|
|
303
|
+
morphology: 'fusional',
|
|
304
|
+
direction: 'ltr',
|
|
305
|
+
canonicalOrder: [
|
|
306
|
+
'action',
|
|
307
|
+
'patient',
|
|
308
|
+
'source',
|
|
309
|
+
'destination',
|
|
310
|
+
'quantity',
|
|
311
|
+
'duration',
|
|
312
|
+
'method',
|
|
313
|
+
'style',
|
|
314
|
+
],
|
|
315
|
+
},
|
|
316
|
+
|
|
317
|
+
// Romance (Spanish, French, Italian, Portuguese)
|
|
318
|
+
romance: {
|
|
319
|
+
wordOrder: 'SVO',
|
|
320
|
+
adpositionType: 'preposition',
|
|
321
|
+
morphology: 'fusional',
|
|
322
|
+
direction: 'ltr',
|
|
323
|
+
canonicalOrder: [
|
|
324
|
+
'action',
|
|
325
|
+
'patient',
|
|
326
|
+
'source',
|
|
327
|
+
'destination',
|
|
328
|
+
'quantity',
|
|
329
|
+
'duration',
|
|
330
|
+
'method',
|
|
331
|
+
'style',
|
|
332
|
+
],
|
|
333
|
+
},
|
|
334
|
+
|
|
335
|
+
// Japonic (Japanese)
|
|
336
|
+
japonic: {
|
|
337
|
+
wordOrder: 'SOV',
|
|
338
|
+
adpositionType: 'postposition',
|
|
339
|
+
morphology: 'agglutinative',
|
|
340
|
+
direction: 'ltr',
|
|
341
|
+
canonicalOrder: [
|
|
342
|
+
'patient',
|
|
343
|
+
'source',
|
|
344
|
+
'destination',
|
|
345
|
+
'quantity',
|
|
346
|
+
'duration',
|
|
347
|
+
'method',
|
|
348
|
+
'style',
|
|
349
|
+
'action',
|
|
350
|
+
],
|
|
351
|
+
},
|
|
352
|
+
|
|
353
|
+
// Koreanic (Korean)
|
|
354
|
+
koreanic: {
|
|
355
|
+
wordOrder: 'SOV',
|
|
356
|
+
adpositionType: 'postposition',
|
|
357
|
+
morphology: 'agglutinative',
|
|
358
|
+
direction: 'ltr',
|
|
359
|
+
canonicalOrder: [
|
|
360
|
+
'patient',
|
|
361
|
+
'source',
|
|
362
|
+
'destination',
|
|
363
|
+
'quantity',
|
|
364
|
+
'duration',
|
|
365
|
+
'method',
|
|
366
|
+
'style',
|
|
367
|
+
'action',
|
|
368
|
+
],
|
|
369
|
+
},
|
|
370
|
+
|
|
371
|
+
// Turkic (Turkish, Azerbaijani)
|
|
372
|
+
turkic: {
|
|
373
|
+
wordOrder: 'SOV',
|
|
374
|
+
adpositionType: 'postposition',
|
|
375
|
+
morphology: 'agglutinative',
|
|
376
|
+
direction: 'ltr',
|
|
377
|
+
canonicalOrder: [
|
|
378
|
+
'patient',
|
|
379
|
+
'source',
|
|
380
|
+
'destination',
|
|
381
|
+
'quantity',
|
|
382
|
+
'duration',
|
|
383
|
+
'method',
|
|
384
|
+
'style',
|
|
385
|
+
'action',
|
|
386
|
+
],
|
|
387
|
+
},
|
|
388
|
+
|
|
389
|
+
// Sinitic (Chinese, Cantonese)
|
|
390
|
+
sinitic: {
|
|
391
|
+
wordOrder: 'SVO', // Topic-prominent, flexible
|
|
392
|
+
adpositionType: 'preposition',
|
|
393
|
+
morphology: 'isolating',
|
|
394
|
+
direction: 'ltr',
|
|
395
|
+
canonicalOrder: [
|
|
396
|
+
'action',
|
|
397
|
+
'patient',
|
|
398
|
+
'source',
|
|
399
|
+
'destination',
|
|
400
|
+
'quantity',
|
|
401
|
+
'duration',
|
|
402
|
+
'method',
|
|
403
|
+
'style',
|
|
404
|
+
],
|
|
405
|
+
},
|
|
406
|
+
|
|
407
|
+
// Semitic (Arabic, Hebrew)
|
|
408
|
+
semitic: {
|
|
409
|
+
wordOrder: 'VSO',
|
|
410
|
+
adpositionType: 'preposition',
|
|
411
|
+
morphology: 'fusional', // Root-pattern system
|
|
412
|
+
direction: 'rtl',
|
|
413
|
+
canonicalOrder: [
|
|
414
|
+
'action',
|
|
415
|
+
'agent',
|
|
416
|
+
'patient',
|
|
417
|
+
'destination',
|
|
418
|
+
'source',
|
|
419
|
+
'quantity',
|
|
420
|
+
'duration',
|
|
421
|
+
'method',
|
|
422
|
+
'style',
|
|
423
|
+
],
|
|
424
|
+
},
|
|
425
|
+
|
|
426
|
+
// Austronesian (Indonesian, Tagalog)
|
|
427
|
+
austronesian: {
|
|
428
|
+
wordOrder: 'SVO',
|
|
429
|
+
adpositionType: 'preposition',
|
|
430
|
+
morphology: 'agglutinative',
|
|
431
|
+
direction: 'ltr',
|
|
432
|
+
canonicalOrder: [
|
|
433
|
+
'action',
|
|
434
|
+
'patient',
|
|
435
|
+
'source',
|
|
436
|
+
'destination',
|
|
437
|
+
'quantity',
|
|
438
|
+
'duration',
|
|
439
|
+
'method',
|
|
440
|
+
'style',
|
|
441
|
+
],
|
|
442
|
+
},
|
|
443
|
+
|
|
444
|
+
// Quechuan (Quechua)
|
|
445
|
+
quechuan: {
|
|
446
|
+
wordOrder: 'SOV',
|
|
447
|
+
adpositionType: 'postposition',
|
|
448
|
+
morphology: 'agglutinative', // Actually polysynthetic but simplified
|
|
449
|
+
direction: 'ltr',
|
|
450
|
+
canonicalOrder: [
|
|
451
|
+
'patient',
|
|
452
|
+
'source',
|
|
453
|
+
'destination',
|
|
454
|
+
'quantity',
|
|
455
|
+
'duration',
|
|
456
|
+
'method',
|
|
457
|
+
'style',
|
|
458
|
+
'action',
|
|
459
|
+
],
|
|
460
|
+
},
|
|
461
|
+
|
|
462
|
+
// Bantu (Swahili)
|
|
463
|
+
bantu: {
|
|
464
|
+
wordOrder: 'SVO',
|
|
465
|
+
adpositionType: 'preposition',
|
|
466
|
+
morphology: 'agglutinative',
|
|
467
|
+
direction: 'ltr',
|
|
468
|
+
canonicalOrder: [
|
|
469
|
+
'action',
|
|
470
|
+
'patient',
|
|
471
|
+
'source',
|
|
472
|
+
'destination',
|
|
473
|
+
'quantity',
|
|
474
|
+
'duration',
|
|
475
|
+
'method',
|
|
476
|
+
'style',
|
|
477
|
+
],
|
|
478
|
+
},
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
// =============================================================================
|
|
482
|
+
// Transformation Utilities
|
|
483
|
+
// =============================================================================
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Reorder semantic roles according to target language.
|
|
487
|
+
* Includes a safety net to append any roles present in input
|
|
488
|
+
* but missing from the target order, preventing data loss.
|
|
489
|
+
*/
|
|
490
|
+
export function reorderRoles(
|
|
491
|
+
roles: Map<SemanticRole, ParsedElement>,
|
|
492
|
+
targetOrder: SemanticRole[]
|
|
493
|
+
): ParsedElement[] {
|
|
494
|
+
const result: ParsedElement[] = [];
|
|
495
|
+
const usedRoles = new Set<SemanticRole>();
|
|
496
|
+
|
|
497
|
+
// 1. Add roles that are explicitly in the canonical order
|
|
498
|
+
for (const role of targetOrder) {
|
|
499
|
+
const element = roles.get(role);
|
|
500
|
+
if (element) {
|
|
501
|
+
result.push(element);
|
|
502
|
+
usedRoles.add(role);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// 2. Safety Net: Append any roles present in input but missing from target order
|
|
507
|
+
// This prevents data loss (e.g., if 'manner' or 'instrument' isn't in the profile)
|
|
508
|
+
for (const [role, element] of roles) {
|
|
509
|
+
if (!usedRoles.has(role)) {
|
|
510
|
+
result.push(element);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
return result;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Insert grammatical markers between elements
|
|
519
|
+
*/
|
|
520
|
+
export function insertMarkers(
|
|
521
|
+
elements: ParsedElement[],
|
|
522
|
+
markers: GrammaticalMarker[],
|
|
523
|
+
adpositionType: AdpositionType
|
|
524
|
+
): string[] {
|
|
525
|
+
const result: string[] = [];
|
|
526
|
+
|
|
527
|
+
for (const element of elements) {
|
|
528
|
+
const marker = markers.find(m => m.role === element.role);
|
|
529
|
+
|
|
530
|
+
if (marker) {
|
|
531
|
+
if (adpositionType === 'preposition') {
|
|
532
|
+
// Marker before element: "to element"
|
|
533
|
+
if (marker.form) result.push(marker.form);
|
|
534
|
+
result.push(element.translated || element.value);
|
|
535
|
+
} else if (adpositionType === 'postposition') {
|
|
536
|
+
// Marker after element: "element を"
|
|
537
|
+
result.push(element.translated || element.value);
|
|
538
|
+
if (marker.form) result.push(marker.form);
|
|
539
|
+
} else {
|
|
540
|
+
result.push(element.translated || element.value);
|
|
541
|
+
}
|
|
542
|
+
} else {
|
|
543
|
+
result.push(element.translated || element.value);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return result;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Intelligently joins tokens, handling agglutinative suffixes and prefixes.
|
|
552
|
+
*
|
|
553
|
+
* Rules:
|
|
554
|
+
* 1. If a token ends with '-' (prefix marker), no space after it
|
|
555
|
+
* 2. If a token starts with '-' (suffix marker), no space before it
|
|
556
|
+
* 3. Removes the hyphen indicators from the final output
|
|
557
|
+
*
|
|
558
|
+
* Examples:
|
|
559
|
+
* - ['#count', '-ta'] → '#countta' (Quechua accusative suffix)
|
|
560
|
+
* - ['بـ-', 'الماوس'] → 'بـالماوس' (Arabic prefix attachment)
|
|
561
|
+
* - ['value', 'を'] → 'value を' (Japanese particle, normal spacing)
|
|
562
|
+
*/
|
|
563
|
+
export function joinTokens(tokens: string[]): string {
|
|
564
|
+
if (tokens.length === 0) return '';
|
|
565
|
+
|
|
566
|
+
let result = '';
|
|
567
|
+
|
|
568
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
569
|
+
const token = tokens[i];
|
|
570
|
+
const nextToken = tokens[i + 1];
|
|
571
|
+
|
|
572
|
+
// Check if current token is a prefix (ends with -)
|
|
573
|
+
const isPrefix = token.endsWith('-');
|
|
574
|
+
// Check if current token is a suffix (starts with -)
|
|
575
|
+
const isSuffix = token.startsWith('-');
|
|
576
|
+
|
|
577
|
+
// Get the display form (strip hyphen markers)
|
|
578
|
+
let displayToken = token;
|
|
579
|
+
if (isPrefix) displayToken = token.slice(0, -1);
|
|
580
|
+
if (isSuffix) displayToken = token.substring(1);
|
|
581
|
+
|
|
582
|
+
result += displayToken;
|
|
583
|
+
|
|
584
|
+
// Determine if we need a space before the next token
|
|
585
|
+
if (nextToken) {
|
|
586
|
+
const nextIsSuffix = nextToken.startsWith('-');
|
|
587
|
+
|
|
588
|
+
// Don't add space if:
|
|
589
|
+
// - Current token is a prefix (ends with -)
|
|
590
|
+
// - Next token is a suffix (starts with -)
|
|
591
|
+
if (!isPrefix && !nextIsSuffix) {
|
|
592
|
+
result += ' ';
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return result;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Transform a parsed statement to target language
|
|
602
|
+
*/
|
|
603
|
+
export function transformStatement(
|
|
604
|
+
parsed: ParsedStatement,
|
|
605
|
+
_sourceProfile: LanguageProfile,
|
|
606
|
+
targetProfile: LanguageProfile
|
|
607
|
+
): string {
|
|
608
|
+
// 1. Reorder roles for target language
|
|
609
|
+
const reordered = reorderRoles(parsed.roles, targetProfile.canonicalOrder);
|
|
610
|
+
|
|
611
|
+
// 2. Insert grammatical markers
|
|
612
|
+
const withMarkers = insertMarkers(reordered, targetProfile.markers, targetProfile.adpositionType);
|
|
613
|
+
|
|
614
|
+
// 3. Join with intelligent spacing for agglutinative languages
|
|
615
|
+
// (handles suffixes like -ta, prefixes like بـ-, etc.)
|
|
616
|
+
return joinTokens(withMarkers);
|
|
617
|
+
}
|