@lokascript/semantic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +686 -0
- package/dist/browser-ar.ar.global.js +2 -0
- package/dist/browser-core.core.global.js +2 -0
- package/dist/browser-de.de.global.js +2 -0
- package/dist/browser-east-asian.east-asian.global.js +2 -0
- package/dist/browser-en-tr.en-tr.global.js +2 -0
- package/dist/browser-en.en.global.js +2 -0
- package/dist/browser-es-en.es-en.global.js +2 -0
- package/dist/browser-es.es.global.js +2 -0
- package/dist/browser-fr.fr.global.js +2 -0
- package/dist/browser-id.id.global.js +2 -0
- package/dist/browser-ja.ja.global.js +2 -0
- package/dist/browser-ko.ko.global.js +2 -0
- package/dist/browser-lazy.lazy.global.js +2 -0
- package/dist/browser-priority.priority.global.js +2 -0
- package/dist/browser-pt.pt.global.js +2 -0
- package/dist/browser-qu.qu.global.js +2 -0
- package/dist/browser-sw.sw.global.js +2 -0
- package/dist/browser-tr.tr.global.js +2 -0
- package/dist/browser-western.western.global.js +2 -0
- package/dist/browser-zh.zh.global.js +2 -0
- package/dist/browser.global.js +3 -0
- package/dist/browser.global.js.map +1 -0
- package/dist/index.cjs +35051 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +3426 -0
- package/dist/index.d.ts +3426 -0
- package/dist/index.js +34890 -0
- package/dist/index.js.map +1 -0
- package/dist/languages/ar.d.ts +78 -0
- package/dist/languages/ar.js +1622 -0
- package/dist/languages/ar.js.map +1 -0
- package/dist/languages/de.d.ts +38 -0
- package/dist/languages/de.js +1168 -0
- package/dist/languages/de.js.map +1 -0
- package/dist/languages/en.d.ts +44 -0
- package/dist/languages/en.js +3491 -0
- package/dist/languages/en.js.map +1 -0
- package/dist/languages/es.d.ts +52 -0
- package/dist/languages/es.js +1493 -0
- package/dist/languages/es.js.map +1 -0
- package/dist/languages/fr.d.ts +37 -0
- package/dist/languages/fr.js +1159 -0
- package/dist/languages/fr.js.map +1 -0
- package/dist/languages/id.d.ts +35 -0
- package/dist/languages/id.js +1152 -0
- package/dist/languages/id.js.map +1 -0
- package/dist/languages/ja.d.ts +53 -0
- package/dist/languages/ja.js +1430 -0
- package/dist/languages/ja.js.map +1 -0
- package/dist/languages/ko.d.ts +51 -0
- package/dist/languages/ko.js +1729 -0
- package/dist/languages/ko.js.map +1 -0
- package/dist/languages/pt.d.ts +37 -0
- package/dist/languages/pt.js +1127 -0
- package/dist/languages/pt.js.map +1 -0
- package/dist/languages/qu.d.ts +36 -0
- package/dist/languages/qu.js +1143 -0
- package/dist/languages/qu.js.map +1 -0
- package/dist/languages/sw.d.ts +35 -0
- package/dist/languages/sw.js +1147 -0
- package/dist/languages/sw.js.map +1 -0
- package/dist/languages/tr.d.ts +45 -0
- package/dist/languages/tr.js +1529 -0
- package/dist/languages/tr.js.map +1 -0
- package/dist/languages/zh.d.ts +58 -0
- package/dist/languages/zh.js +1257 -0
- package/dist/languages/zh.js.map +1 -0
- package/dist/types-C4dcj53L.d.ts +600 -0
- package/package.json +202 -0
- package/src/__test-utils__/index.ts +7 -0
- package/src/__test-utils__/test-helpers.ts +8 -0
- package/src/__types__/test-helpers.ts +122 -0
- package/src/analysis/index.ts +479 -0
- package/src/ast-builder/command-mappers.ts +1133 -0
- package/src/ast-builder/expression-parser/index.ts +41 -0
- package/src/ast-builder/expression-parser/parser.ts +563 -0
- package/src/ast-builder/expression-parser/tokenizer.ts +394 -0
- package/src/ast-builder/expression-parser/types.ts +208 -0
- package/src/ast-builder/index.ts +536 -0
- package/src/ast-builder/value-converters.ts +172 -0
- package/src/bridge.ts +275 -0
- package/src/browser-ar.ts +162 -0
- package/src/browser-core.ts +231 -0
- package/src/browser-de.ts +162 -0
- package/src/browser-east-asian.ts +173 -0
- package/src/browser-en-tr.ts +165 -0
- package/src/browser-en.ts +157 -0
- package/src/browser-es-en.ts +200 -0
- package/src/browser-es.ts +170 -0
- package/src/browser-fr.ts +162 -0
- package/src/browser-id.ts +162 -0
- package/src/browser-ja.ts +162 -0
- package/src/browser-ko.ts +162 -0
- package/src/browser-lazy.ts +189 -0
- package/src/browser-priority.ts +214 -0
- package/src/browser-pt.ts +162 -0
- package/src/browser-qu.ts +162 -0
- package/src/browser-sw.ts +162 -0
- package/src/browser-tr.ts +162 -0
- package/src/browser-western.ts +181 -0
- package/src/browser-zh.ts +162 -0
- package/src/browser.ts +268 -0
- package/src/cache/index.ts +14 -0
- package/src/cache/semantic-cache.ts +344 -0
- package/src/core-bridge.ts +372 -0
- package/src/explicit/converter.ts +258 -0
- package/src/explicit/index.ts +18 -0
- package/src/explicit/parser.ts +236 -0
- package/src/explicit/renderer.ts +424 -0
- package/src/generators/command-schemas.ts +1636 -0
- package/src/generators/event-handler-generator.ts +109 -0
- package/src/generators/index.ts +117 -0
- package/src/generators/language-profiles.ts +139 -0
- package/src/generators/pattern-generator.ts +537 -0
- package/src/generators/profiles/arabic.ts +131 -0
- package/src/generators/profiles/bengali.ts +132 -0
- package/src/generators/profiles/chinese.ts +124 -0
- package/src/generators/profiles/english.ts +113 -0
- package/src/generators/profiles/french.ts +125 -0
- package/src/generators/profiles/german.ts +126 -0
- package/src/generators/profiles/hindi.ts +146 -0
- package/src/generators/profiles/index.ts +46 -0
- package/src/generators/profiles/indonesian.ts +125 -0
- package/src/generators/profiles/italian.ts +139 -0
- package/src/generators/profiles/japanese.ts +149 -0
- package/src/generators/profiles/korean.ts +127 -0
- package/src/generators/profiles/marker-templates.ts +288 -0
- package/src/generators/profiles/ms.ts +130 -0
- package/src/generators/profiles/polish.ts +249 -0
- package/src/generators/profiles/portuguese.ts +115 -0
- package/src/generators/profiles/quechua.ts +113 -0
- package/src/generators/profiles/russian.ts +260 -0
- package/src/generators/profiles/spanish.ts +130 -0
- package/src/generators/profiles/swahili.ts +129 -0
- package/src/generators/profiles/thai.ts +132 -0
- package/src/generators/profiles/tl.ts +128 -0
- package/src/generators/profiles/turkish.ts +124 -0
- package/src/generators/profiles/types.ts +165 -0
- package/src/generators/profiles/ukrainian.ts +270 -0
- package/src/generators/profiles/vietnamese.ts +133 -0
- package/src/generators/schema-error-codes.ts +160 -0
- package/src/generators/schema-validator.ts +391 -0
- package/src/index.ts +429 -0
- package/src/language-building-schema.ts +3170 -0
- package/src/language-loader.ts +394 -0
- package/src/languages/_all.ts +65 -0
- package/src/languages/ar.ts +15 -0
- package/src/languages/bn.ts +16 -0
- package/src/languages/de.ts +15 -0
- package/src/languages/en.ts +29 -0
- package/src/languages/es.ts +15 -0
- package/src/languages/fr.ts +15 -0
- package/src/languages/hi.ts +26 -0
- package/src/languages/id.ts +15 -0
- package/src/languages/index.ts +18 -0
- package/src/languages/it.ts +15 -0
- package/src/languages/ja.ts +15 -0
- package/src/languages/ko.ts +15 -0
- package/src/languages/ms.ts +16 -0
- package/src/languages/pl.ts +18 -0
- package/src/languages/pt.ts +15 -0
- package/src/languages/qu.ts +15 -0
- package/src/languages/ru.ts +26 -0
- package/src/languages/sw.ts +15 -0
- package/src/languages/th.ts +16 -0
- package/src/languages/tl.ts +16 -0
- package/src/languages/tr.ts +15 -0
- package/src/languages/uk.ts +26 -0
- package/src/languages/vi.ts +16 -0
- package/src/languages/zh.ts +15 -0
- package/src/parser/index.ts +15 -0
- package/src/parser/pattern-matcher.ts +1181 -0
- package/src/parser/semantic-parser.ts +573 -0
- package/src/parser/utils/index.ts +35 -0
- package/src/parser/utils/marker-resolution.ts +111 -0
- package/src/parser/utils/possessive-keywords.ts +43 -0
- package/src/parser/utils/role-positioning.ts +70 -0
- package/src/parser/utils/type-validation.ts +134 -0
- package/src/patterns/add/ar.ts +71 -0
- package/src/patterns/add/bn.ts +70 -0
- package/src/patterns/add/hi.ts +69 -0
- package/src/patterns/add/index.ts +87 -0
- package/src/patterns/add/it.ts +61 -0
- package/src/patterns/add/ja.ts +93 -0
- package/src/patterns/add/ko.ts +74 -0
- package/src/patterns/add/ms.ts +30 -0
- package/src/patterns/add/pl.ts +62 -0
- package/src/patterns/add/ru.ts +62 -0
- package/src/patterns/add/th.ts +49 -0
- package/src/patterns/add/tl.ts +30 -0
- package/src/patterns/add/tr.ts +71 -0
- package/src/patterns/add/uk.ts +62 -0
- package/src/patterns/add/vi.ts +61 -0
- package/src/patterns/add/zh.ts +71 -0
- package/src/patterns/builders.ts +207 -0
- package/src/patterns/decrement/bn.ts +70 -0
- package/src/patterns/decrement/de.ts +42 -0
- package/src/patterns/decrement/hi.ts +68 -0
- package/src/patterns/decrement/index.ts +79 -0
- package/src/patterns/decrement/it.ts +69 -0
- package/src/patterns/decrement/ms.ts +30 -0
- package/src/patterns/decrement/pl.ts +58 -0
- package/src/patterns/decrement/ru.ts +58 -0
- package/src/patterns/decrement/th.ts +49 -0
- package/src/patterns/decrement/tl.ts +30 -0
- package/src/patterns/decrement/tr.ts +48 -0
- package/src/patterns/decrement/uk.ts +58 -0
- package/src/patterns/decrement/vi.ts +61 -0
- package/src/patterns/decrement/zh.ts +32 -0
- package/src/patterns/en.ts +302 -0
- package/src/patterns/event-handler/ar.ts +151 -0
- package/src/patterns/event-handler/bn.ts +72 -0
- package/src/patterns/event-handler/de.ts +117 -0
- package/src/patterns/event-handler/en.ts +117 -0
- package/src/patterns/event-handler/es.ts +136 -0
- package/src/patterns/event-handler/fr.ts +117 -0
- package/src/patterns/event-handler/hi.ts +64 -0
- package/src/patterns/event-handler/id.ts +117 -0
- package/src/patterns/event-handler/index.ts +119 -0
- package/src/patterns/event-handler/it.ts +54 -0
- package/src/patterns/event-handler/ja.ts +118 -0
- package/src/patterns/event-handler/ko.ts +133 -0
- package/src/patterns/event-handler/ms.ts +30 -0
- package/src/patterns/event-handler/pl.ts +62 -0
- package/src/patterns/event-handler/pt.ts +117 -0
- package/src/patterns/event-handler/qu.ts +66 -0
- package/src/patterns/event-handler/ru.ts +62 -0
- package/src/patterns/event-handler/shared.ts +270 -0
- package/src/patterns/event-handler/sw.ts +117 -0
- package/src/patterns/event-handler/th.ts +53 -0
- package/src/patterns/event-handler/tl.ts +30 -0
- package/src/patterns/event-handler/tr.ts +170 -0
- package/src/patterns/event-handler/uk.ts +62 -0
- package/src/patterns/event-handler/vi.ts +61 -0
- package/src/patterns/event-handler/zh.ts +150 -0
- package/src/patterns/get/ar.ts +49 -0
- package/src/patterns/get/bn.ts +47 -0
- package/src/patterns/get/de.ts +32 -0
- package/src/patterns/get/hi.ts +52 -0
- package/src/patterns/get/index.ts +83 -0
- package/src/patterns/get/it.ts +56 -0
- package/src/patterns/get/ja.ts +53 -0
- package/src/patterns/get/ko.ts +53 -0
- package/src/patterns/get/ms.ts +30 -0
- package/src/patterns/get/pl.ts +57 -0
- package/src/patterns/get/ru.ts +57 -0
- package/src/patterns/get/th.ts +29 -0
- package/src/patterns/get/tl.ts +30 -0
- package/src/patterns/get/uk.ts +57 -0
- package/src/patterns/get/vi.ts +48 -0
- package/src/patterns/grammar-transformed/index.ts +39 -0
- package/src/patterns/grammar-transformed/ja.ts +1713 -0
- package/src/patterns/grammar-transformed/ko.ts +1311 -0
- package/src/patterns/grammar-transformed/tr.ts +1067 -0
- package/src/patterns/hide/ar.ts +67 -0
- package/src/patterns/hide/bn.ts +47 -0
- package/src/patterns/hide/de.ts +36 -0
- package/src/patterns/hide/hi.ts +61 -0
- package/src/patterns/hide/index.ts +91 -0
- package/src/patterns/hide/it.ts +56 -0
- package/src/patterns/hide/ja.ts +69 -0
- package/src/patterns/hide/ko.ts +69 -0
- package/src/patterns/hide/ms.ts +30 -0
- package/src/patterns/hide/pl.ts +57 -0
- package/src/patterns/hide/ru.ts +57 -0
- package/src/patterns/hide/th.ts +29 -0
- package/src/patterns/hide/tl.ts +30 -0
- package/src/patterns/hide/tr.ts +65 -0
- package/src/patterns/hide/uk.ts +57 -0
- package/src/patterns/hide/vi.ts +56 -0
- package/src/patterns/hide/zh.ts +68 -0
- package/src/patterns/increment/bn.ts +70 -0
- package/src/patterns/increment/de.ts +36 -0
- package/src/patterns/increment/hi.ts +68 -0
- package/src/patterns/increment/index.ts +79 -0
- package/src/patterns/increment/it.ts +69 -0
- package/src/patterns/increment/ms.ts +30 -0
- package/src/patterns/increment/pl.ts +58 -0
- package/src/patterns/increment/ru.ts +58 -0
- package/src/patterns/increment/th.ts +49 -0
- package/src/patterns/increment/tl.ts +30 -0
- package/src/patterns/increment/tr.ts +52 -0
- package/src/patterns/increment/uk.ts +58 -0
- package/src/patterns/increment/vi.ts +61 -0
- package/src/patterns/increment/zh.ts +32 -0
- package/src/patterns/index.ts +84 -0
- package/src/patterns/languages/en/control-flow.ts +93 -0
- package/src/patterns/languages/en/fetch.ts +62 -0
- package/src/patterns/languages/en/index.ts +42 -0
- package/src/patterns/languages/en/repeat.ts +67 -0
- package/src/patterns/languages/en/set.ts +48 -0
- package/src/patterns/languages/en/swap.ts +38 -0
- package/src/patterns/languages/en/temporal.ts +57 -0
- package/src/patterns/put/ar.ts +74 -0
- package/src/patterns/put/bn.ts +53 -0
- package/src/patterns/put/en.ts +74 -0
- package/src/patterns/put/es.ts +74 -0
- package/src/patterns/put/hi.ts +69 -0
- package/src/patterns/put/id.ts +96 -0
- package/src/patterns/put/index.ts +99 -0
- package/src/patterns/put/it.ts +56 -0
- package/src/patterns/put/ja.ts +75 -0
- package/src/patterns/put/ko.ts +67 -0
- package/src/patterns/put/ms.ts +30 -0
- package/src/patterns/put/pl.ts +81 -0
- package/src/patterns/put/ru.ts +85 -0
- package/src/patterns/put/th.ts +32 -0
- package/src/patterns/put/tl.ts +30 -0
- package/src/patterns/put/tr.ts +67 -0
- package/src/patterns/put/uk.ts +85 -0
- package/src/patterns/put/vi.ts +72 -0
- package/src/patterns/put/zh.ts +62 -0
- package/src/patterns/registry.ts +163 -0
- package/src/patterns/remove/ar.ts +71 -0
- package/src/patterns/remove/bn.ts +68 -0
- package/src/patterns/remove/hi.ts +69 -0
- package/src/patterns/remove/index.ts +87 -0
- package/src/patterns/remove/it.ts +69 -0
- package/src/patterns/remove/ja.ts +74 -0
- package/src/patterns/remove/ko.ts +78 -0
- package/src/patterns/remove/ms.ts +30 -0
- package/src/patterns/remove/pl.ts +62 -0
- package/src/patterns/remove/ru.ts +62 -0
- package/src/patterns/remove/th.ts +49 -0
- package/src/patterns/remove/tl.ts +30 -0
- package/src/patterns/remove/tr.ts +78 -0
- package/src/patterns/remove/uk.ts +62 -0
- package/src/patterns/remove/vi.ts +61 -0
- package/src/patterns/remove/zh.ts +72 -0
- package/src/patterns/set/ar.ts +84 -0
- package/src/patterns/set/bn.ts +53 -0
- package/src/patterns/set/de.ts +84 -0
- package/src/patterns/set/es.ts +92 -0
- package/src/patterns/set/fr.ts +88 -0
- package/src/patterns/set/hi.ts +56 -0
- package/src/patterns/set/id.ts +84 -0
- package/src/patterns/set/index.ts +107 -0
- package/src/patterns/set/it.ts +56 -0
- package/src/patterns/set/ja.ts +86 -0
- package/src/patterns/set/ko.ts +85 -0
- package/src/patterns/set/ms.ts +30 -0
- package/src/patterns/set/pl.ts +57 -0
- package/src/patterns/set/pt.ts +84 -0
- package/src/patterns/set/ru.ts +57 -0
- package/src/patterns/set/th.ts +31 -0
- package/src/patterns/set/tl.ts +30 -0
- package/src/patterns/set/tr.ts +107 -0
- package/src/patterns/set/uk.ts +57 -0
- package/src/patterns/set/vi.ts +53 -0
- package/src/patterns/set/zh.ts +84 -0
- package/src/patterns/show/ar.ts +67 -0
- package/src/patterns/show/bn.ts +47 -0
- package/src/patterns/show/de.ts +32 -0
- package/src/patterns/show/fr.ts +32 -0
- package/src/patterns/show/hi.ts +61 -0
- package/src/patterns/show/index.ts +95 -0
- package/src/patterns/show/it.ts +56 -0
- package/src/patterns/show/ja.ts +69 -0
- package/src/patterns/show/ko.ts +73 -0
- package/src/patterns/show/ms.ts +30 -0
- package/src/patterns/show/pl.ts +57 -0
- package/src/patterns/show/ru.ts +57 -0
- package/src/patterns/show/th.ts +29 -0
- package/src/patterns/show/tl.ts +30 -0
- package/src/patterns/show/tr.ts +65 -0
- package/src/patterns/show/uk.ts +57 -0
- package/src/patterns/show/vi.ts +56 -0
- package/src/patterns/show/zh.ts +68 -0
- package/src/patterns/take/ar.ts +51 -0
- package/src/patterns/take/index.ts +31 -0
- package/src/patterns/toggle/ar.ts +61 -0
- package/src/patterns/toggle/bn.ts +70 -0
- package/src/patterns/toggle/en.ts +61 -0
- package/src/patterns/toggle/es.ts +61 -0
- package/src/patterns/toggle/hi.ts +80 -0
- package/src/patterns/toggle/index.ts +95 -0
- package/src/patterns/toggle/it.ts +69 -0
- package/src/patterns/toggle/ja.ts +156 -0
- package/src/patterns/toggle/ko.ts +113 -0
- package/src/patterns/toggle/ms.ts +30 -0
- package/src/patterns/toggle/pl.ts +62 -0
- package/src/patterns/toggle/ru.ts +62 -0
- package/src/patterns/toggle/th.ts +50 -0
- package/src/patterns/toggle/tl.ts +30 -0
- package/src/patterns/toggle/tr.ts +88 -0
- package/src/patterns/toggle/uk.ts +62 -0
- package/src/patterns/toggle/vi.ts +61 -0
- package/src/patterns/toggle/zh.ts +99 -0
- package/src/public-api.ts +286 -0
- package/src/registry.ts +441 -0
- package/src/tokenizers/arabic.ts +723 -0
- package/src/tokenizers/base.ts +1300 -0
- package/src/tokenizers/bengali.ts +289 -0
- package/src/tokenizers/chinese.ts +481 -0
- package/src/tokenizers/english.ts +416 -0
- package/src/tokenizers/french.ts +326 -0
- package/src/tokenizers/german.ts +324 -0
- package/src/tokenizers/hindi.ts +319 -0
- package/src/tokenizers/index.ts +127 -0
- package/src/tokenizers/indonesian.ts +306 -0
- package/src/tokenizers/italian.ts +458 -0
- package/src/tokenizers/japanese.ts +447 -0
- package/src/tokenizers/korean.ts +642 -0
- package/src/tokenizers/morphology/arabic-normalizer.ts +242 -0
- package/src/tokenizers/morphology/french-normalizer.ts +268 -0
- package/src/tokenizers/morphology/german-normalizer.ts +256 -0
- package/src/tokenizers/morphology/index.ts +46 -0
- package/src/tokenizers/morphology/italian-normalizer.ts +329 -0
- package/src/tokenizers/morphology/japanese-normalizer.ts +288 -0
- package/src/tokenizers/morphology/korean-normalizer.ts +428 -0
- package/src/tokenizers/morphology/polish-normalizer.ts +264 -0
- package/src/tokenizers/morphology/portuguese-normalizer.ts +310 -0
- package/src/tokenizers/morphology/spanish-normalizer.ts +327 -0
- package/src/tokenizers/morphology/turkish-normalizer.ts +412 -0
- package/src/tokenizers/morphology/types.ts +211 -0
- package/src/tokenizers/ms.ts +198 -0
- package/src/tokenizers/polish.ts +354 -0
- package/src/tokenizers/portuguese.ts +304 -0
- package/src/tokenizers/quechua.ts +339 -0
- package/src/tokenizers/russian.ts +375 -0
- package/src/tokenizers/spanish.ts +403 -0
- package/src/tokenizers/swahili.ts +303 -0
- package/src/tokenizers/thai.ts +236 -0
- package/src/tokenizers/tl.ts +198 -0
- package/src/tokenizers/turkish.ts +411 -0
- package/src/tokenizers/ukrainian.ts +369 -0
- package/src/tokenizers/vietnamese.ts +410 -0
- package/src/types/grammar-types.ts +617 -0
- package/src/types/unified-profile.ts +267 -0
- package/src/types.ts +709 -0
- package/src/utils/confidence-calculator.ts +147 -0
- package/src/validators/command-validator.ts +380 -0
- package/src/validators/index.ts +15 -0
package/src/registry.ts
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Language Registry
|
|
3
|
+
*
|
|
4
|
+
* Central registration point for language support in the semantic parser.
|
|
5
|
+
* Languages self-register when their modules are imported, enabling
|
|
6
|
+
* tree-shaking for minimal bundles.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* // Import only the languages you need
|
|
11
|
+
* import '@lokascript/semantic/languages/en';
|
|
12
|
+
* import '@lokascript/semantic/languages/es';
|
|
13
|
+
*
|
|
14
|
+
* // Now parse works for registered languages
|
|
15
|
+
* import { parse } from '@lokascript/semantic';
|
|
16
|
+
* parse('toggle .active', 'en'); // Works
|
|
17
|
+
* parse('alternar .activo', 'es'); // Works
|
|
18
|
+
* parse('切り替え .active', 'ja'); // Error: Language not registered
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type { LanguageTokenizer, LanguagePattern, TokenStream } from './types';
|
|
23
|
+
|
|
24
|
+
// Re-export profile types from generators for convenience
|
|
25
|
+
export type {
|
|
26
|
+
LanguageProfile,
|
|
27
|
+
WordOrder,
|
|
28
|
+
MarkingStrategy,
|
|
29
|
+
RoleMarker,
|
|
30
|
+
VerbConfig,
|
|
31
|
+
PossessiveConfig,
|
|
32
|
+
KeywordTranslation,
|
|
33
|
+
TokenizationConfig,
|
|
34
|
+
} from './generators/language-profiles';
|
|
35
|
+
|
|
36
|
+
import type { LanguageProfile } from './generators/language-profiles';
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// External Pattern Source Interface
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Interface for external pattern sources (e.g., @lokascript/patterns-reference database).
|
|
44
|
+
* External sources can provide additional patterns at runtime.
|
|
45
|
+
*/
|
|
46
|
+
export interface ExternalPatternsSource {
|
|
47
|
+
/** Unique identifier for the source */
|
|
48
|
+
id: string;
|
|
49
|
+
/** Human-readable name */
|
|
50
|
+
name: string;
|
|
51
|
+
/** Get patterns for a specific language */
|
|
52
|
+
getPatternsForLanguage(language: string): Promise<ExternalPatternEntry[]>;
|
|
53
|
+
/** Get patterns for a specific command */
|
|
54
|
+
getPatternsForCommand(command: string, language?: string): Promise<ExternalPatternEntry[]>;
|
|
55
|
+
/** Check if source has patterns for a language */
|
|
56
|
+
hasPatterns(language: string): Promise<boolean>;
|
|
57
|
+
/** Get all supported languages */
|
|
58
|
+
getSupportedLanguages(): Promise<string[]>;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Pattern entry from external source.
|
|
63
|
+
*/
|
|
64
|
+
export interface ExternalPatternEntry {
|
|
65
|
+
id: string;
|
|
66
|
+
code: string;
|
|
67
|
+
command: string | null;
|
|
68
|
+
language: string;
|
|
69
|
+
confidence: number;
|
|
70
|
+
verified: boolean;
|
|
71
|
+
title?: string;
|
|
72
|
+
category?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// =============================================================================
|
|
76
|
+
// Registry State
|
|
77
|
+
// =============================================================================
|
|
78
|
+
|
|
79
|
+
const tokenizers = new Map<string, LanguageTokenizer>();
|
|
80
|
+
const profiles = new Map<string, LanguageProfile>();
|
|
81
|
+
const patternCache = new Map<string, LanguagePattern[]>();
|
|
82
|
+
|
|
83
|
+
// External pattern sources (e.g., @lokascript/patterns-reference database)
|
|
84
|
+
const externalSources = new Map<string, ExternalPatternsSource>();
|
|
85
|
+
|
|
86
|
+
// Pattern generator function - set by patterns module to avoid circular deps
|
|
87
|
+
let patternGenerator: ((profile: LanguageProfile) => LanguagePattern[]) | null = null;
|
|
88
|
+
|
|
89
|
+
// =============================================================================
|
|
90
|
+
// Registration Functions
|
|
91
|
+
// =============================================================================
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Register a language with its tokenizer and profile.
|
|
95
|
+
* Called automatically by language modules when imported.
|
|
96
|
+
*/
|
|
97
|
+
export function registerLanguage(
|
|
98
|
+
code: string,
|
|
99
|
+
tokenizer: LanguageTokenizer,
|
|
100
|
+
profile: LanguageProfile
|
|
101
|
+
): void {
|
|
102
|
+
tokenizers.set(code, tokenizer);
|
|
103
|
+
profiles.set(code, profile);
|
|
104
|
+
// Clear pattern cache for this language if it was previously cached
|
|
105
|
+
patternCache.delete(code);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Register only a tokenizer (for backwards compatibility).
|
|
110
|
+
*/
|
|
111
|
+
export function registerTokenizer(tokenizer: LanguageTokenizer): void {
|
|
112
|
+
tokenizers.set(tokenizer.language, tokenizer);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Register only a profile (for backwards compatibility).
|
|
117
|
+
*/
|
|
118
|
+
export function registerProfile(profile: LanguageProfile): void {
|
|
119
|
+
profiles.set(profile.code, profile);
|
|
120
|
+
patternCache.delete(profile.code);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Set the pattern generator function.
|
|
125
|
+
* Called by patterns module to inject the generator without circular deps.
|
|
126
|
+
*/
|
|
127
|
+
export function setPatternGenerator(
|
|
128
|
+
generator: (profile: LanguageProfile) => LanguagePattern[]
|
|
129
|
+
): void {
|
|
130
|
+
patternGenerator = generator;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Direct pattern registration map (for tree-shaking)
|
|
134
|
+
const registeredPatterns = new Map<string, LanguagePattern[]>();
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Register patterns directly for a language.
|
|
138
|
+
* This enables tree-shaking by allowing each language module to register
|
|
139
|
+
* only its own patterns.
|
|
140
|
+
*/
|
|
141
|
+
export function registerPatterns(code: string, patterns: LanguagePattern[]): void {
|
|
142
|
+
registeredPatterns.set(code, patterns);
|
|
143
|
+
// Clear cached patterns if any
|
|
144
|
+
patternCache.delete(code);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Check if patterns are directly registered for a language.
|
|
149
|
+
*/
|
|
150
|
+
export function hasRegisteredPatterns(code: string): boolean {
|
|
151
|
+
return registeredPatterns.has(code);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Get directly registered patterns for a language.
|
|
156
|
+
*/
|
|
157
|
+
export function getRegisteredPatterns(code: string): LanguagePattern[] | undefined {
|
|
158
|
+
return registeredPatterns.get(code);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// =============================================================================
|
|
162
|
+
// External Pattern Sources
|
|
163
|
+
// =============================================================================
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Register an external pattern source.
|
|
167
|
+
* External sources (like @lokascript/patterns-reference) can provide
|
|
168
|
+
* additional patterns at runtime.
|
|
169
|
+
*
|
|
170
|
+
* @example
|
|
171
|
+
* ```typescript
|
|
172
|
+
* import { registerPatternsSource } from '@lokascript/semantic';
|
|
173
|
+
* import { createPatternsProvider } from '@lokascript/patterns-reference';
|
|
174
|
+
*
|
|
175
|
+
* const provider = createPatternsProvider();
|
|
176
|
+
* registerPatternsSource(provider);
|
|
177
|
+
* ```
|
|
178
|
+
*/
|
|
179
|
+
export function registerPatternsSource(source: ExternalPatternsSource): void {
|
|
180
|
+
externalSources.set(source.id, source);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Unregister an external pattern source.
|
|
185
|
+
*/
|
|
186
|
+
export function unregisterPatternsSource(sourceId: string): boolean {
|
|
187
|
+
return externalSources.delete(sourceId);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Get a registered external pattern source.
|
|
192
|
+
*/
|
|
193
|
+
export function getPatternsSource(sourceId: string): ExternalPatternsSource | undefined {
|
|
194
|
+
return externalSources.get(sourceId);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Get all registered external pattern sources.
|
|
199
|
+
*/
|
|
200
|
+
export function getAllPatternsSources(): ExternalPatternsSource[] {
|
|
201
|
+
return Array.from(externalSources.values());
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Check if any external pattern sources are registered.
|
|
206
|
+
*/
|
|
207
|
+
export function hasExternalSources(): boolean {
|
|
208
|
+
return externalSources.size > 0;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Query patterns from all external sources for a language.
|
|
213
|
+
* Returns patterns sorted by confidence.
|
|
214
|
+
*/
|
|
215
|
+
export async function queryExternalPatterns(language: string): Promise<ExternalPatternEntry[]> {
|
|
216
|
+
if (externalSources.size === 0) {
|
|
217
|
+
return [];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const allPatterns: ExternalPatternEntry[] = [];
|
|
221
|
+
|
|
222
|
+
for (const source of externalSources.values()) {
|
|
223
|
+
try {
|
|
224
|
+
const patterns = await source.getPatternsForLanguage(language);
|
|
225
|
+
allPatterns.push(...patterns);
|
|
226
|
+
} catch (error) {
|
|
227
|
+
console.warn(
|
|
228
|
+
`[Registry] Failed to query patterns from ${source.name}:`,
|
|
229
|
+
error instanceof Error ? error.message : String(error)
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Sort by confidence (highest first)
|
|
235
|
+
return allPatterns.sort((a, b) => b.confidence - a.confidence);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Query patterns from all external sources for a command.
|
|
240
|
+
*/
|
|
241
|
+
export async function queryExternalPatternsForCommand(
|
|
242
|
+
command: string,
|
|
243
|
+
language?: string
|
|
244
|
+
): Promise<ExternalPatternEntry[]> {
|
|
245
|
+
if (externalSources.size === 0) {
|
|
246
|
+
return [];
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const allPatterns: ExternalPatternEntry[] = [];
|
|
250
|
+
|
|
251
|
+
for (const source of externalSources.values()) {
|
|
252
|
+
try {
|
|
253
|
+
const patterns = await source.getPatternsForCommand(command, language);
|
|
254
|
+
allPatterns.push(...patterns);
|
|
255
|
+
} catch (error) {
|
|
256
|
+
console.warn(
|
|
257
|
+
`[Registry] Failed to query patterns from ${source.name}:`,
|
|
258
|
+
error instanceof Error ? error.message : String(error)
|
|
259
|
+
);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return allPatterns.sort((a, b) => b.confidence - a.confidence);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// =============================================================================
|
|
267
|
+
// Query Functions
|
|
268
|
+
// =============================================================================
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Get a tokenizer for the specified language.
|
|
272
|
+
* @throws Error if language is not registered
|
|
273
|
+
*/
|
|
274
|
+
export function getTokenizer(code: string): LanguageTokenizer {
|
|
275
|
+
const tokenizer = tokenizers.get(code);
|
|
276
|
+
if (!tokenizer) {
|
|
277
|
+
const registered = Array.from(tokenizers.keys()).join(', ');
|
|
278
|
+
throw new Error(
|
|
279
|
+
`Language '${code}' is not registered. ` +
|
|
280
|
+
`Registered languages: ${registered || 'none'}. ` +
|
|
281
|
+
`Import the language module first: import '@lokascript/semantic/languages/${code}';`
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
return tokenizer;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Get a profile for the specified language.
|
|
289
|
+
* @throws Error if language is not registered
|
|
290
|
+
*/
|
|
291
|
+
export function getProfile(code: string): LanguageProfile {
|
|
292
|
+
const profile = profiles.get(code);
|
|
293
|
+
if (!profile) {
|
|
294
|
+
const registered = Array.from(profiles.keys()).join(', ');
|
|
295
|
+
throw new Error(
|
|
296
|
+
`Language profile '${code}' is not registered. ` +
|
|
297
|
+
`Registered languages: ${registered || 'none'}. ` +
|
|
298
|
+
`Import the language module first: import '@lokascript/semantic/languages/${code}';`
|
|
299
|
+
);
|
|
300
|
+
}
|
|
301
|
+
return profile;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Try to get a tokenizer, returning undefined if not registered.
|
|
306
|
+
*/
|
|
307
|
+
export function tryGetTokenizer(code: string): LanguageTokenizer | undefined {
|
|
308
|
+
return tokenizers.get(code);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Try to get a profile, returning undefined if not registered.
|
|
313
|
+
*/
|
|
314
|
+
export function tryGetProfile(code: string): LanguageProfile | undefined {
|
|
315
|
+
return profiles.get(code);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Get all registered language codes.
|
|
320
|
+
*/
|
|
321
|
+
export function getRegisteredLanguages(): string[] {
|
|
322
|
+
return Array.from(tokenizers.keys());
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Check if a language is registered.
|
|
327
|
+
*/
|
|
328
|
+
export function isLanguageRegistered(code: string): boolean {
|
|
329
|
+
return tokenizers.has(code) && profiles.has(code);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Check if a language is supported (alias for isLanguageRegistered).
|
|
334
|
+
* For backwards compatibility with tokenizers API.
|
|
335
|
+
*/
|
|
336
|
+
export function isLanguageSupported(code: string): boolean {
|
|
337
|
+
return tokenizers.has(code);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// =============================================================================
|
|
341
|
+
// Tokenization
|
|
342
|
+
// =============================================================================
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Tokenize input in the specified language.
|
|
346
|
+
* @throws Error if language is not registered
|
|
347
|
+
*/
|
|
348
|
+
export function tokenize(input: string, language: string): TokenStream {
|
|
349
|
+
const tokenizer = getTokenizer(language);
|
|
350
|
+
return tokenizer.tokenize(input);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// =============================================================================
|
|
354
|
+
// Pattern Access (Lazy Generation)
|
|
355
|
+
// =============================================================================
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Get patterns for a specific language.
|
|
359
|
+
* First checks for directly registered patterns (for tree-shaking),
|
|
360
|
+
* then falls back to pattern generator.
|
|
361
|
+
* @throws Error if language is not registered
|
|
362
|
+
*/
|
|
363
|
+
export function getPatternsForLanguage(code: string): LanguagePattern[] {
|
|
364
|
+
// Check cache first
|
|
365
|
+
const cached = patternCache.get(code);
|
|
366
|
+
if (cached) {
|
|
367
|
+
return cached;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Check for directly registered patterns (tree-shakeable path)
|
|
371
|
+
const registered = registeredPatterns.get(code);
|
|
372
|
+
if (registered) {
|
|
373
|
+
patternCache.set(code, registered);
|
|
374
|
+
return registered;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Fall back to pattern generator
|
|
378
|
+
if (!patternGenerator) {
|
|
379
|
+
throw new Error(
|
|
380
|
+
`No patterns registered for language '${code}'. ` +
|
|
381
|
+
'Either import the language module or set a pattern generator.'
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Get profile (throws if not registered)
|
|
386
|
+
const profile = getProfile(code);
|
|
387
|
+
const patterns = patternGenerator(profile);
|
|
388
|
+
patternCache.set(code, patterns);
|
|
389
|
+
return patterns;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Get patterns for a specific language and command.
|
|
394
|
+
*/
|
|
395
|
+
export function getPatternsForLanguageAndCommand(
|
|
396
|
+
language: string,
|
|
397
|
+
command: string
|
|
398
|
+
): LanguagePattern[] {
|
|
399
|
+
return getPatternsForLanguage(language)
|
|
400
|
+
.filter(p => p.command === command)
|
|
401
|
+
.sort((a, b) => b.priority - a.priority);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Clear the pattern cache for a language (useful for testing).
|
|
406
|
+
*/
|
|
407
|
+
export function clearPatternCache(code?: string): void {
|
|
408
|
+
if (code) {
|
|
409
|
+
patternCache.delete(code);
|
|
410
|
+
} else {
|
|
411
|
+
patternCache.clear();
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// =============================================================================
|
|
416
|
+
// Backwards Compatibility
|
|
417
|
+
// =============================================================================
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Get all profiles as a record (for backwards compatibility).
|
|
421
|
+
* Note: Only returns registered profiles.
|
|
422
|
+
*/
|
|
423
|
+
export function getAllProfiles(): Record<string, LanguageProfile> {
|
|
424
|
+
const result: Record<string, LanguageProfile> = {};
|
|
425
|
+
for (const [code, profile] of profiles) {
|
|
426
|
+
result[code] = profile;
|
|
427
|
+
}
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
/**
|
|
432
|
+
* Get all tokenizers as a record (for backwards compatibility).
|
|
433
|
+
* Note: Only returns registered tokenizers.
|
|
434
|
+
*/
|
|
435
|
+
export function getAllTokenizers(): Record<string, LanguageTokenizer> {
|
|
436
|
+
const result: Record<string, LanguageTokenizer> = {};
|
|
437
|
+
for (const [code, tokenizer] of tokenizers) {
|
|
438
|
+
result[code] = tokenizer;
|
|
439
|
+
}
|
|
440
|
+
return result;
|
|
441
|
+
}
|