@lokascript/semantic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ar.ar.global.js +2 -2
- package/dist/browser-core.core.global.js +2 -2
- package/dist/browser-de.de.global.js +2 -2
- package/dist/browser-east-asian.east-asian.global.js +2 -2
- package/dist/browser-en-tr.en-tr.global.js +2 -2
- package/dist/browser-en.en.global.js +2 -2
- package/dist/browser-es-en.es-en.global.js +2 -2
- package/dist/browser-es.es.global.js +2 -2
- package/dist/browser-fr.fr.global.js +2 -2
- package/dist/browser-id.id.global.js +2 -2
- package/dist/browser-ja.ja.global.js +2 -2
- package/dist/browser-ko.ko.global.js +2 -2
- package/dist/browser-lazy.lazy.global.js +2 -2
- package/dist/browser-priority.priority.global.js +2 -2
- package/dist/browser-pt.pt.global.js +2 -2
- package/dist/browser-qu.qu.global.js +2 -2
- package/dist/browser-sw.sw.global.js +2 -2
- package/dist/browser-tr.tr.global.js +2 -2
- package/dist/browser-western.western.global.js +2 -2
- package/dist/browser-zh.zh.global.js +2 -2
- package/dist/browser.global.js +2 -2
- package/dist/browser.global.js.map +1 -1
- package/dist/index.cjs +13042 -17462
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +49 -5
- package/dist/index.d.ts +49 -5
- package/dist/index.js +14044 -18464
- package/dist/index.js.map +1 -1
- package/dist/languages/ar.d.ts +1 -1
- package/dist/languages/ar.js +31 -44
- package/dist/languages/ar.js.map +1 -1
- package/dist/languages/de.d.ts +1 -1
- package/dist/languages/de.js +14 -2
- package/dist/languages/de.js.map +1 -1
- package/dist/languages/en.d.ts +1 -1
- package/dist/languages/en.js +558 -12
- package/dist/languages/en.js.map +1 -1
- package/dist/languages/es.d.ts +1 -1
- package/dist/languages/es.js +16 -0
- package/dist/languages/es.js.map +1 -1
- package/dist/languages/fr.d.ts +1 -1
- package/dist/languages/fr.js +14 -2
- package/dist/languages/fr.js.map +1 -1
- package/dist/languages/id.d.ts +1 -1
- package/dist/languages/id.js +14 -2
- package/dist/languages/id.js.map +1 -1
- package/dist/languages/ja.d.ts +1 -1
- package/dist/languages/ja.js +18 -3
- package/dist/languages/ja.js.map +1 -1
- package/dist/languages/ko.d.ts +8 -1
- package/dist/languages/ko.js +75 -43
- package/dist/languages/ko.js.map +1 -1
- package/dist/languages/pt.d.ts +1 -1
- package/dist/languages/pt.js +17 -0
- package/dist/languages/pt.js.map +1 -1
- package/dist/languages/qu.d.ts +12 -1
- package/dist/languages/qu.js +77 -2
- package/dist/languages/qu.js.map +1 -1
- package/dist/languages/sw.d.ts +1 -1
- package/dist/languages/sw.js.map +1 -1
- package/dist/languages/tr.d.ts +9 -1
- package/dist/languages/tr.js +96 -72
- package/dist/languages/tr.js.map +1 -1
- package/dist/languages/zh.d.ts +1 -1
- package/dist/languages/zh.js +16 -0
- package/dist/languages/zh.js.map +1 -1
- package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
- package/package.json +20 -29
- package/src/generators/command-schemas.ts +21 -10
- package/src/generators/event-handler-generator.ts +50 -44
- package/src/generators/language-profiles.ts +6 -0
- package/src/generators/pattern-generator.ts +883 -1
- package/src/generators/profiles/arabic.ts +19 -3
- package/src/generators/profiles/bengali.ts +12 -1
- package/src/generators/profiles/chinese.ts +15 -0
- package/src/generators/profiles/french.ts +12 -1
- package/src/generators/profiles/german.ts +12 -1
- package/src/generators/profiles/hebrew.ts +148 -0
- package/src/generators/profiles/hindi.ts +12 -1
- package/src/generators/profiles/index.ts +2 -0
- package/src/generators/profiles/indonesian.ts +12 -1
- package/src/generators/profiles/italian.ts +16 -0
- package/src/generators/profiles/japanese.ts +11 -2
- package/src/generators/profiles/korean.ts +15 -1
- package/src/generators/profiles/polish.ts +12 -0
- package/src/generators/profiles/portuguese.ts +16 -0
- package/src/generators/profiles/russian.ts +11 -0
- package/src/generators/profiles/spanish.ts +15 -0
- package/src/generators/profiles/spanishMexico.ts +176 -0
- package/src/generators/profiles/thai.ts +11 -0
- package/src/generators/profiles/turkish.ts +49 -7
- package/src/generators/profiles/types.ts +21 -5
- package/src/generators/profiles/ukrainian.ts +11 -0
- package/src/generators/profiles/vietnamese.ts +11 -0
- package/src/language-building-schema.ts +111 -0
- package/src/languages/_all.ts +5 -1
- package/src/languages/es-MX.ts +32 -0
- package/src/languages/he.ts +15 -0
- package/src/parser/pattern-matcher.ts +10 -1
- package/src/parser/semantic-parser.ts +3 -0
- package/src/patterns/add/ar.ts +3 -59
- package/src/patterns/add/index.ts +5 -1
- package/src/patterns/add/ja.ts +3 -81
- package/src/patterns/add/ko.ts +3 -62
- package/src/patterns/add/qu.ts +69 -0
- package/src/patterns/add/tr.ts +3 -59
- package/src/patterns/builders.ts +1 -0
- package/src/patterns/decrement/tr.ts +3 -36
- package/src/patterns/event-handler/ar.ts +3 -139
- package/src/patterns/event-handler/he.ts +15 -0
- package/src/patterns/event-handler/index.ts +5 -1
- package/src/patterns/event-handler/ja.ts +3 -106
- package/src/patterns/event-handler/ko.ts +3 -121
- package/src/patterns/event-handler/ms.ts +45 -20
- package/src/patterns/event-handler/tr.ts +3 -158
- package/src/patterns/get/ar.ts +3 -37
- package/src/patterns/get/ja.ts +3 -41
- package/src/patterns/get/ko.ts +3 -41
- package/src/patterns/grammar-transformed/ja.ts +3 -1701
- package/src/patterns/grammar-transformed/ko.ts +3 -1299
- package/src/patterns/grammar-transformed/tr.ts +3 -1055
- package/src/patterns/hide/ar.ts +3 -55
- package/src/patterns/hide/ja.ts +3 -57
- package/src/patterns/hide/ko.ts +3 -57
- package/src/patterns/hide/tr.ts +3 -53
- package/src/patterns/increment/tr.ts +3 -40
- package/src/patterns/put/ar.ts +3 -62
- package/src/patterns/put/ja.ts +3 -63
- package/src/patterns/put/ko.ts +3 -55
- package/src/patterns/put/tr.ts +3 -55
- package/src/patterns/remove/ar.ts +3 -59
- package/src/patterns/remove/index.ts +5 -1
- package/src/patterns/remove/ja.ts +3 -62
- package/src/patterns/remove/ko.ts +3 -66
- package/src/patterns/remove/qu.ts +69 -0
- package/src/patterns/remove/tr.ts +3 -66
- package/src/patterns/set/ar.ts +3 -72
- package/src/patterns/set/ja.ts +3 -74
- package/src/patterns/set/ko.ts +3 -73
- package/src/patterns/set/tr.ts +3 -95
- package/src/patterns/show/ar.ts +3 -55
- package/src/patterns/show/ja.ts +3 -57
- package/src/patterns/show/ko.ts +3 -61
- package/src/patterns/show/tr.ts +3 -53
- package/src/patterns/take/ar.ts +3 -39
- package/src/patterns/toggle/ar.ts +3 -49
- package/src/patterns/toggle/index.ts +5 -1
- package/src/patterns/toggle/ja.ts +3 -144
- package/src/patterns/toggle/ko.ts +3 -101
- package/src/patterns/toggle/qu.ts +90 -0
- package/src/patterns/toggle/tr.ts +3 -76
- package/src/registry.ts +179 -15
- package/src/tokenizers/arabic.ts +13 -46
- package/src/tokenizers/bengali.ts +2 -16
- package/src/tokenizers/he.ts +542 -0
- package/src/tokenizers/index.ts +1 -0
- package/src/tokenizers/japanese.ts +3 -1
- package/src/tokenizers/korean.ts +104 -48
- package/src/tokenizers/ms.ts +3 -0
- package/src/tokenizers/quechua.ts +101 -2
- package/src/tokenizers/turkish.ts +64 -69
- package/src/types.ts +13 -0
package/dist/index.d.cts
CHANGED
|
@@ -139,6 +139,15 @@ interface SemanticMetadata {
|
|
|
139
139
|
readonly sourceText?: string;
|
|
140
140
|
readonly sourcePosition?: SourcePosition;
|
|
141
141
|
readonly patternId?: string;
|
|
142
|
+
/**
|
|
143
|
+
* Confidence score for the parse (0-1).
|
|
144
|
+
* Higher values indicate more certain matches.
|
|
145
|
+
* - 1.0: Exact match with all roles captured
|
|
146
|
+
* - 0.8-0.99: High confidence with minor uncertainty (stem matching, optional roles)
|
|
147
|
+
* - 0.6-0.8: Medium confidence (morphological normalization, defaults applied)
|
|
148
|
+
* - <0.6: Low confidence (may need fallback to traditional parser)
|
|
149
|
+
*/
|
|
150
|
+
readonly confidence?: number;
|
|
142
151
|
}
|
|
143
152
|
interface SourcePosition {
|
|
144
153
|
readonly start: number;
|
|
@@ -286,6 +295,10 @@ interface ExtractionRule {
|
|
|
286
295
|
readonly transform?: (raw: string) => SemanticValue;
|
|
287
296
|
/** Default value if not found (for optional roles) */
|
|
288
297
|
readonly default?: SemanticValue;
|
|
298
|
+
/** Static value extraction (for event handler wrapped commands) */
|
|
299
|
+
readonly value?: string;
|
|
300
|
+
/** Extract value from a pattern role by name */
|
|
301
|
+
readonly fromRole?: string;
|
|
289
302
|
}
|
|
290
303
|
/**
|
|
291
304
|
* Additional constraints on pattern applicability.
|
|
@@ -925,7 +938,7 @@ interface PossessiveConfig {
|
|
|
925
938
|
* Complete language profile for pattern generation.
|
|
926
939
|
*/
|
|
927
940
|
interface LanguageProfile {
|
|
928
|
-
/** ISO 639-1 language code */
|
|
941
|
+
/** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
|
|
929
942
|
readonly code: string;
|
|
930
943
|
/** Human-readable language name */
|
|
931
944
|
readonly name: string;
|
|
@@ -964,18 +977,33 @@ interface LanguageProfile {
|
|
|
964
977
|
* Individual keywords can override this via KeywordTranslation.form
|
|
965
978
|
*/
|
|
966
979
|
readonly defaultVerbForm?: VerbForm;
|
|
980
|
+
/**
|
|
981
|
+
* Base language code to extend (for regional variants).
|
|
982
|
+
* When set, this profile inherits from the base and overrides specific fields.
|
|
983
|
+
* Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
|
|
984
|
+
*/
|
|
985
|
+
readonly extends?: string;
|
|
967
986
|
}
|
|
968
987
|
/**
|
|
969
988
|
* Configuration for event handler pattern generation.
|
|
970
|
-
*
|
|
989
|
+
* Supports both SVO and SOV/VSO languages.
|
|
971
990
|
*/
|
|
972
991
|
interface EventHandlerConfig {
|
|
973
|
-
/** Primary event keyword (e.g., 'on', 'bei', 'sur') */
|
|
974
|
-
readonly keyword
|
|
992
|
+
/** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
|
|
993
|
+
readonly keyword?: KeywordTranslation$1;
|
|
975
994
|
/** Source filter marker (e.g., 'from', 'von', 'de') */
|
|
976
|
-
readonly sourceMarker
|
|
995
|
+
readonly sourceMarker?: RoleMarker;
|
|
977
996
|
/** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
|
|
978
997
|
readonly conditionalKeyword?: KeywordTranslation$1;
|
|
998
|
+
/** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
|
|
999
|
+
readonly eventMarker?: RoleMarker;
|
|
1000
|
+
/** Temporal/conditional markers that can optionally appear with events */
|
|
1001
|
+
readonly temporalMarkers?: string[];
|
|
1002
|
+
/**
|
|
1003
|
+
* Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
|
|
1004
|
+
* Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
|
|
1005
|
+
*/
|
|
1006
|
+
readonly negationMarker?: RoleMarker;
|
|
979
1007
|
}
|
|
980
1008
|
/**
|
|
981
1009
|
* Verb form used for command keywords.
|
|
@@ -1229,6 +1257,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
|
|
|
1229
1257
|
* Korean time units attach directly without whitespace.
|
|
1230
1258
|
*/
|
|
1231
1259
|
private extractKoreanNumber;
|
|
1260
|
+
/**
|
|
1261
|
+
* Try to split a temporal event suffix from a word token.
|
|
1262
|
+
* This handles compact forms like 클릭할때 → 클릭 + 할때
|
|
1263
|
+
*
|
|
1264
|
+
* @returns Split tokens if a suffix is found, null otherwise
|
|
1265
|
+
*/
|
|
1266
|
+
private trySplitTemporalSuffix;
|
|
1232
1267
|
}
|
|
1233
1268
|
/**
|
|
1234
1269
|
* Singleton instance.
|
|
@@ -1289,6 +1324,14 @@ declare class TurkishTokenizer extends BaseTokenizer {
|
|
|
1289
1324
|
constructor();
|
|
1290
1325
|
tokenize(input: string): TokenStream;
|
|
1291
1326
|
classifyToken(token: string): TokenKind;
|
|
1327
|
+
/**
|
|
1328
|
+
* Try to match multi-word phrases that function as single units.
|
|
1329
|
+
* Multi-word phrases are included in profileKeywords and sorted longest-first,
|
|
1330
|
+
* so they'll be matched before their constituent words.
|
|
1331
|
+
*
|
|
1332
|
+
* Examples: "üzerine gelme" (hover), "fare üzerinde" (mouseover)
|
|
1333
|
+
*/
|
|
1334
|
+
private tryMultiWordPhrase;
|
|
1292
1335
|
/**
|
|
1293
1336
|
* Extract a Turkish word.
|
|
1294
1337
|
* Uses morphological normalization to handle verb conjugations.
|
|
@@ -2023,6 +2066,7 @@ declare function isLanguageSupported(code: string): boolean;
|
|
|
2023
2066
|
|
|
2024
2067
|
/**
|
|
2025
2068
|
* Try to get a profile, returning undefined if not registered.
|
|
2069
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
2026
2070
|
*/
|
|
2027
2071
|
declare function tryGetProfile(code: string): LanguageProfile | undefined;
|
|
2028
2072
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -139,6 +139,15 @@ interface SemanticMetadata {
|
|
|
139
139
|
readonly sourceText?: string;
|
|
140
140
|
readonly sourcePosition?: SourcePosition;
|
|
141
141
|
readonly patternId?: string;
|
|
142
|
+
/**
|
|
143
|
+
* Confidence score for the parse (0-1).
|
|
144
|
+
* Higher values indicate more certain matches.
|
|
145
|
+
* - 1.0: Exact match with all roles captured
|
|
146
|
+
* - 0.8-0.99: High confidence with minor uncertainty (stem matching, optional roles)
|
|
147
|
+
* - 0.6-0.8: Medium confidence (morphological normalization, defaults applied)
|
|
148
|
+
* - <0.6: Low confidence (may need fallback to traditional parser)
|
|
149
|
+
*/
|
|
150
|
+
readonly confidence?: number;
|
|
142
151
|
}
|
|
143
152
|
interface SourcePosition {
|
|
144
153
|
readonly start: number;
|
|
@@ -286,6 +295,10 @@ interface ExtractionRule {
|
|
|
286
295
|
readonly transform?: (raw: string) => SemanticValue;
|
|
287
296
|
/** Default value if not found (for optional roles) */
|
|
288
297
|
readonly default?: SemanticValue;
|
|
298
|
+
/** Static value extraction (for event handler wrapped commands) */
|
|
299
|
+
readonly value?: string;
|
|
300
|
+
/** Extract value from a pattern role by name */
|
|
301
|
+
readonly fromRole?: string;
|
|
289
302
|
}
|
|
290
303
|
/**
|
|
291
304
|
* Additional constraints on pattern applicability.
|
|
@@ -925,7 +938,7 @@ interface PossessiveConfig {
|
|
|
925
938
|
* Complete language profile for pattern generation.
|
|
926
939
|
*/
|
|
927
940
|
interface LanguageProfile {
|
|
928
|
-
/** ISO 639-1 language code */
|
|
941
|
+
/** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
|
|
929
942
|
readonly code: string;
|
|
930
943
|
/** Human-readable language name */
|
|
931
944
|
readonly name: string;
|
|
@@ -964,18 +977,33 @@ interface LanguageProfile {
|
|
|
964
977
|
* Individual keywords can override this via KeywordTranslation.form
|
|
965
978
|
*/
|
|
966
979
|
readonly defaultVerbForm?: VerbForm;
|
|
980
|
+
/**
|
|
981
|
+
* Base language code to extend (for regional variants).
|
|
982
|
+
* When set, this profile inherits from the base and overrides specific fields.
|
|
983
|
+
* Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
|
|
984
|
+
*/
|
|
985
|
+
readonly extends?: string;
|
|
967
986
|
}
|
|
968
987
|
/**
|
|
969
988
|
* Configuration for event handler pattern generation.
|
|
970
|
-
*
|
|
989
|
+
* Supports both SVO and SOV/VSO languages.
|
|
971
990
|
*/
|
|
972
991
|
interface EventHandlerConfig {
|
|
973
|
-
/** Primary event keyword (e.g., 'on', 'bei', 'sur') */
|
|
974
|
-
readonly keyword
|
|
992
|
+
/** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
|
|
993
|
+
readonly keyword?: KeywordTranslation$1;
|
|
975
994
|
/** Source filter marker (e.g., 'from', 'von', 'de') */
|
|
976
|
-
readonly sourceMarker
|
|
995
|
+
readonly sourceMarker?: RoleMarker;
|
|
977
996
|
/** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
|
|
978
997
|
readonly conditionalKeyword?: KeywordTranslation$1;
|
|
998
|
+
/** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
|
|
999
|
+
readonly eventMarker?: RoleMarker;
|
|
1000
|
+
/** Temporal/conditional markers that can optionally appear with events */
|
|
1001
|
+
readonly temporalMarkers?: string[];
|
|
1002
|
+
/**
|
|
1003
|
+
* Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
|
|
1004
|
+
* Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
|
|
1005
|
+
*/
|
|
1006
|
+
readonly negationMarker?: RoleMarker;
|
|
979
1007
|
}
|
|
980
1008
|
/**
|
|
981
1009
|
* Verb form used for command keywords.
|
|
@@ -1229,6 +1257,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
|
|
|
1229
1257
|
* Korean time units attach directly without whitespace.
|
|
1230
1258
|
*/
|
|
1231
1259
|
private extractKoreanNumber;
|
|
1260
|
+
/**
|
|
1261
|
+
* Try to split a temporal event suffix from a word token.
|
|
1262
|
+
* This handles compact forms like 클릭할때 → 클릭 + 할때
|
|
1263
|
+
*
|
|
1264
|
+
* @returns Split tokens if a suffix is found, null otherwise
|
|
1265
|
+
*/
|
|
1266
|
+
private trySplitTemporalSuffix;
|
|
1232
1267
|
}
|
|
1233
1268
|
/**
|
|
1234
1269
|
* Singleton instance.
|
|
@@ -1289,6 +1324,14 @@ declare class TurkishTokenizer extends BaseTokenizer {
|
|
|
1289
1324
|
constructor();
|
|
1290
1325
|
tokenize(input: string): TokenStream;
|
|
1291
1326
|
classifyToken(token: string): TokenKind;
|
|
1327
|
+
/**
|
|
1328
|
+
* Try to match multi-word phrases that function as single units.
|
|
1329
|
+
* Multi-word phrases are included in profileKeywords and sorted longest-first,
|
|
1330
|
+
* so they'll be matched before their constituent words.
|
|
1331
|
+
*
|
|
1332
|
+
* Examples: "üzerine gelme" (hover), "fare üzerinde" (mouseover)
|
|
1333
|
+
*/
|
|
1334
|
+
private tryMultiWordPhrase;
|
|
1292
1335
|
/**
|
|
1293
1336
|
* Extract a Turkish word.
|
|
1294
1337
|
* Uses morphological normalization to handle verb conjugations.
|
|
@@ -2023,6 +2066,7 @@ declare function isLanguageSupported(code: string): boolean;
|
|
|
2023
2066
|
|
|
2024
2067
|
/**
|
|
2025
2068
|
* Try to get a profile, returning undefined if not registered.
|
|
2069
|
+
* Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
|
|
2026
2070
|
*/
|
|
2027
2071
|
declare function tryGetProfile(code: string): LanguageProfile | undefined;
|
|
2028
2072
|
/**
|