@lokascript/semantic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/browser-ar.ar.global.js +2 -2
  2. package/dist/browser-core.core.global.js +2 -2
  3. package/dist/browser-de.de.global.js +2 -2
  4. package/dist/browser-east-asian.east-asian.global.js +2 -2
  5. package/dist/browser-en-tr.en-tr.global.js +2 -2
  6. package/dist/browser-en.en.global.js +2 -2
  7. package/dist/browser-es-en.es-en.global.js +2 -2
  8. package/dist/browser-es.es.global.js +2 -2
  9. package/dist/browser-fr.fr.global.js +2 -2
  10. package/dist/browser-id.id.global.js +2 -2
  11. package/dist/browser-ja.ja.global.js +2 -2
  12. package/dist/browser-ko.ko.global.js +2 -2
  13. package/dist/browser-lazy.lazy.global.js +2 -2
  14. package/dist/browser-priority.priority.global.js +2 -2
  15. package/dist/browser-pt.pt.global.js +2 -2
  16. package/dist/browser-qu.qu.global.js +2 -2
  17. package/dist/browser-sw.sw.global.js +2 -2
  18. package/dist/browser-tr.tr.global.js +2 -2
  19. package/dist/browser-western.western.global.js +2 -2
  20. package/dist/browser-zh.zh.global.js +2 -2
  21. package/dist/browser.global.js +2 -2
  22. package/dist/browser.global.js.map +1 -1
  23. package/dist/index.cjs +13042 -17462
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +49 -5
  26. package/dist/index.d.ts +49 -5
  27. package/dist/index.js +14044 -18464
  28. package/dist/index.js.map +1 -1
  29. package/dist/languages/ar.d.ts +1 -1
  30. package/dist/languages/ar.js +31 -44
  31. package/dist/languages/ar.js.map +1 -1
  32. package/dist/languages/de.d.ts +1 -1
  33. package/dist/languages/de.js +14 -2
  34. package/dist/languages/de.js.map +1 -1
  35. package/dist/languages/en.d.ts +1 -1
  36. package/dist/languages/en.js +558 -12
  37. package/dist/languages/en.js.map +1 -1
  38. package/dist/languages/es.d.ts +1 -1
  39. package/dist/languages/es.js +16 -0
  40. package/dist/languages/es.js.map +1 -1
  41. package/dist/languages/fr.d.ts +1 -1
  42. package/dist/languages/fr.js +14 -2
  43. package/dist/languages/fr.js.map +1 -1
  44. package/dist/languages/id.d.ts +1 -1
  45. package/dist/languages/id.js +14 -2
  46. package/dist/languages/id.js.map +1 -1
  47. package/dist/languages/ja.d.ts +1 -1
  48. package/dist/languages/ja.js +18 -3
  49. package/dist/languages/ja.js.map +1 -1
  50. package/dist/languages/ko.d.ts +8 -1
  51. package/dist/languages/ko.js +75 -43
  52. package/dist/languages/ko.js.map +1 -1
  53. package/dist/languages/pt.d.ts +1 -1
  54. package/dist/languages/pt.js +17 -0
  55. package/dist/languages/pt.js.map +1 -1
  56. package/dist/languages/qu.d.ts +12 -1
  57. package/dist/languages/qu.js +77 -2
  58. package/dist/languages/qu.js.map +1 -1
  59. package/dist/languages/sw.d.ts +1 -1
  60. package/dist/languages/sw.js.map +1 -1
  61. package/dist/languages/tr.d.ts +9 -1
  62. package/dist/languages/tr.js +96 -72
  63. package/dist/languages/tr.js.map +1 -1
  64. package/dist/languages/zh.d.ts +1 -1
  65. package/dist/languages/zh.js +16 -0
  66. package/dist/languages/zh.js.map +1 -1
  67. package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
  68. package/package.json +20 -29
  69. package/src/generators/command-schemas.ts +21 -10
  70. package/src/generators/event-handler-generator.ts +50 -44
  71. package/src/generators/language-profiles.ts +6 -0
  72. package/src/generators/pattern-generator.ts +883 -1
  73. package/src/generators/profiles/arabic.ts +19 -3
  74. package/src/generators/profiles/bengali.ts +12 -1
  75. package/src/generators/profiles/chinese.ts +15 -0
  76. package/src/generators/profiles/french.ts +12 -1
  77. package/src/generators/profiles/german.ts +12 -1
  78. package/src/generators/profiles/hebrew.ts +148 -0
  79. package/src/generators/profiles/hindi.ts +12 -1
  80. package/src/generators/profiles/index.ts +2 -0
  81. package/src/generators/profiles/indonesian.ts +12 -1
  82. package/src/generators/profiles/italian.ts +16 -0
  83. package/src/generators/profiles/japanese.ts +11 -2
  84. package/src/generators/profiles/korean.ts +15 -1
  85. package/src/generators/profiles/polish.ts +12 -0
  86. package/src/generators/profiles/portuguese.ts +16 -0
  87. package/src/generators/profiles/russian.ts +11 -0
  88. package/src/generators/profiles/spanish.ts +15 -0
  89. package/src/generators/profiles/spanishMexico.ts +176 -0
  90. package/src/generators/profiles/thai.ts +11 -0
  91. package/src/generators/profiles/turkish.ts +49 -7
  92. package/src/generators/profiles/types.ts +21 -5
  93. package/src/generators/profiles/ukrainian.ts +11 -0
  94. package/src/generators/profiles/vietnamese.ts +11 -0
  95. package/src/language-building-schema.ts +111 -0
  96. package/src/languages/_all.ts +5 -1
  97. package/src/languages/es-MX.ts +32 -0
  98. package/src/languages/he.ts +15 -0
  99. package/src/parser/pattern-matcher.ts +10 -1
  100. package/src/parser/semantic-parser.ts +3 -0
  101. package/src/patterns/add/ar.ts +3 -59
  102. package/src/patterns/add/index.ts +5 -1
  103. package/src/patterns/add/ja.ts +3 -81
  104. package/src/patterns/add/ko.ts +3 -62
  105. package/src/patterns/add/qu.ts +69 -0
  106. package/src/patterns/add/tr.ts +3 -59
  107. package/src/patterns/builders.ts +1 -0
  108. package/src/patterns/decrement/tr.ts +3 -36
  109. package/src/patterns/event-handler/ar.ts +3 -139
  110. package/src/patterns/event-handler/he.ts +15 -0
  111. package/src/patterns/event-handler/index.ts +5 -1
  112. package/src/patterns/event-handler/ja.ts +3 -106
  113. package/src/patterns/event-handler/ko.ts +3 -121
  114. package/src/patterns/event-handler/ms.ts +45 -20
  115. package/src/patterns/event-handler/tr.ts +3 -158
  116. package/src/patterns/get/ar.ts +3 -37
  117. package/src/patterns/get/ja.ts +3 -41
  118. package/src/patterns/get/ko.ts +3 -41
  119. package/src/patterns/grammar-transformed/ja.ts +3 -1701
  120. package/src/patterns/grammar-transformed/ko.ts +3 -1299
  121. package/src/patterns/grammar-transformed/tr.ts +3 -1055
  122. package/src/patterns/hide/ar.ts +3 -55
  123. package/src/patterns/hide/ja.ts +3 -57
  124. package/src/patterns/hide/ko.ts +3 -57
  125. package/src/patterns/hide/tr.ts +3 -53
  126. package/src/patterns/increment/tr.ts +3 -40
  127. package/src/patterns/put/ar.ts +3 -62
  128. package/src/patterns/put/ja.ts +3 -63
  129. package/src/patterns/put/ko.ts +3 -55
  130. package/src/patterns/put/tr.ts +3 -55
  131. package/src/patterns/remove/ar.ts +3 -59
  132. package/src/patterns/remove/index.ts +5 -1
  133. package/src/patterns/remove/ja.ts +3 -62
  134. package/src/patterns/remove/ko.ts +3 -66
  135. package/src/patterns/remove/qu.ts +69 -0
  136. package/src/patterns/remove/tr.ts +3 -66
  137. package/src/patterns/set/ar.ts +3 -72
  138. package/src/patterns/set/ja.ts +3 -74
  139. package/src/patterns/set/ko.ts +3 -73
  140. package/src/patterns/set/tr.ts +3 -95
  141. package/src/patterns/show/ar.ts +3 -55
  142. package/src/patterns/show/ja.ts +3 -57
  143. package/src/patterns/show/ko.ts +3 -61
  144. package/src/patterns/show/tr.ts +3 -53
  145. package/src/patterns/take/ar.ts +3 -39
  146. package/src/patterns/toggle/ar.ts +3 -49
  147. package/src/patterns/toggle/index.ts +5 -1
  148. package/src/patterns/toggle/ja.ts +3 -144
  149. package/src/patterns/toggle/ko.ts +3 -101
  150. package/src/patterns/toggle/qu.ts +90 -0
  151. package/src/patterns/toggle/tr.ts +3 -76
  152. package/src/registry.ts +179 -15
  153. package/src/tokenizers/arabic.ts +13 -46
  154. package/src/tokenizers/bengali.ts +2 -16
  155. package/src/tokenizers/he.ts +542 -0
  156. package/src/tokenizers/index.ts +1 -0
  157. package/src/tokenizers/japanese.ts +3 -1
  158. package/src/tokenizers/korean.ts +104 -48
  159. package/src/tokenizers/ms.ts +3 -0
  160. package/src/tokenizers/quechua.ts +101 -2
  161. package/src/tokenizers/turkish.ts +64 -69
  162. package/src/types.ts +13 -0
package/dist/index.d.cts CHANGED
@@ -139,6 +139,15 @@ interface SemanticMetadata {
139
139
  readonly sourceText?: string;
140
140
  readonly sourcePosition?: SourcePosition;
141
141
  readonly patternId?: string;
142
+ /**
143
+ * Confidence score for the parse (0-1).
144
+ * Higher values indicate more certain matches.
145
+ * - 1.0: Exact match with all roles captured
146
+ * - 0.8-0.99: High confidence with minor uncertainty (stem matching, optional roles)
147
+ * - 0.6-0.8: Medium confidence (morphological normalization, defaults applied)
148
+ * - <0.6: Low confidence (may need fallback to traditional parser)
149
+ */
150
+ readonly confidence?: number;
142
151
  }
143
152
  interface SourcePosition {
144
153
  readonly start: number;
@@ -286,6 +295,10 @@ interface ExtractionRule {
286
295
  readonly transform?: (raw: string) => SemanticValue;
287
296
  /** Default value if not found (for optional roles) */
288
297
  readonly default?: SemanticValue;
298
+ /** Static value extraction (for event handler wrapped commands) */
299
+ readonly value?: string;
300
+ /** Extract value from a pattern role by name */
301
+ readonly fromRole?: string;
289
302
  }
290
303
  /**
291
304
  * Additional constraints on pattern applicability.
@@ -925,7 +938,7 @@ interface PossessiveConfig {
925
938
  * Complete language profile for pattern generation.
926
939
  */
927
940
  interface LanguageProfile {
928
- /** ISO 639-1 language code */
941
+ /** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
929
942
  readonly code: string;
930
943
  /** Human-readable language name */
931
944
  readonly name: string;
@@ -964,18 +977,33 @@ interface LanguageProfile {
964
977
  * Individual keywords can override this via KeywordTranslation.form
965
978
  */
966
979
  readonly defaultVerbForm?: VerbForm;
980
+ /**
981
+ * Base language code to extend (for regional variants).
982
+ * When set, this profile inherits from the base and overrides specific fields.
983
+ * Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
984
+ */
985
+ readonly extends?: string;
967
986
  }
968
987
  /**
969
988
  * Configuration for event handler pattern generation.
970
- * Used by simple SVO languages that don't need hand-crafted patterns.
989
+ * Supports both SVO and SOV/VSO languages.
971
990
  */
972
991
  interface EventHandlerConfig {
973
- /** Primary event keyword (e.g., 'on', 'bei', 'sur') */
974
- readonly keyword: KeywordTranslation$1;
992
+ /** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
993
+ readonly keyword?: KeywordTranslation$1;
975
994
  /** Source filter marker (e.g., 'from', 'von', 'de') */
976
- readonly sourceMarker: RoleMarker;
995
+ readonly sourceMarker?: RoleMarker;
977
996
  /** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
978
997
  readonly conditionalKeyword?: KeywordTranslation$1;
998
+ /** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
999
+ readonly eventMarker?: RoleMarker;
1000
+ /** Temporal/conditional markers that can optionally appear with events */
1001
+ readonly temporalMarkers?: string[];
1002
+ /**
1003
+ * Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
1004
+ * Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
1005
+ */
1006
+ readonly negationMarker?: RoleMarker;
979
1007
  }
980
1008
  /**
981
1009
  * Verb form used for command keywords.
@@ -1229,6 +1257,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
1229
1257
  * Korean time units attach directly without whitespace.
1230
1258
  */
1231
1259
  private extractKoreanNumber;
1260
+ /**
1261
+ * Try to split a temporal event suffix from a word token.
1262
+ * This handles compact forms like 클릭할때 → 클릭 + 할때
1263
+ *
1264
+ * @returns Split tokens if a suffix is found, null otherwise
1265
+ */
1266
+ private trySplitTemporalSuffix;
1232
1267
  }
1233
1268
  /**
1234
1269
  * Singleton instance.
@@ -1289,6 +1324,14 @@ declare class TurkishTokenizer extends BaseTokenizer {
1289
1324
  constructor();
1290
1325
  tokenize(input: string): TokenStream;
1291
1326
  classifyToken(token: string): TokenKind;
1327
+ /**
1328
+ * Try to match multi-word phrases that function as single units.
1329
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
1330
+ * so they'll be matched before their constituent words.
1331
+ *
1332
+ * Examples: "üzerine gelme" (hover), "fare üzerinde" (mouseover)
1333
+ */
1334
+ private tryMultiWordPhrase;
1292
1335
  /**
1293
1336
  * Extract a Turkish word.
1294
1337
  * Uses morphological normalization to handle verb conjugations.
@@ -2023,6 +2066,7 @@ declare function isLanguageSupported(code: string): boolean;
2023
2066
 
2024
2067
  /**
2025
2068
  * Try to get a profile, returning undefined if not registered.
2069
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
2026
2070
  */
2027
2071
  declare function tryGetProfile(code: string): LanguageProfile | undefined;
2028
2072
  /**
package/dist/index.d.ts CHANGED
@@ -139,6 +139,15 @@ interface SemanticMetadata {
139
139
  readonly sourceText?: string;
140
140
  readonly sourcePosition?: SourcePosition;
141
141
  readonly patternId?: string;
142
+ /**
143
+ * Confidence score for the parse (0-1).
144
+ * Higher values indicate more certain matches.
145
+ * - 1.0: Exact match with all roles captured
146
+ * - 0.8-0.99: High confidence with minor uncertainty (stem matching, optional roles)
147
+ * - 0.6-0.8: Medium confidence (morphological normalization, defaults applied)
148
+ * - <0.6: Low confidence (may need fallback to traditional parser)
149
+ */
150
+ readonly confidence?: number;
142
151
  }
143
152
  interface SourcePosition {
144
153
  readonly start: number;
@@ -286,6 +295,10 @@ interface ExtractionRule {
286
295
  readonly transform?: (raw: string) => SemanticValue;
287
296
  /** Default value if not found (for optional roles) */
288
297
  readonly default?: SemanticValue;
298
+ /** Static value extraction (for event handler wrapped commands) */
299
+ readonly value?: string;
300
+ /** Extract value from a pattern role by name */
301
+ readonly fromRole?: string;
289
302
  }
290
303
  /**
291
304
  * Additional constraints on pattern applicability.
@@ -925,7 +938,7 @@ interface PossessiveConfig {
925
938
  * Complete language profile for pattern generation.
926
939
  */
927
940
  interface LanguageProfile {
928
- /** ISO 639-1 language code */
941
+ /** ISO 639-1 or BCP 47 language code (e.g., 'es' or 'es-MX') */
929
942
  readonly code: string;
930
943
  /** Human-readable language name */
931
944
  readonly name: string;
@@ -964,18 +977,33 @@ interface LanguageProfile {
964
977
  * Individual keywords can override this via KeywordTranslation.form
965
978
  */
966
979
  readonly defaultVerbForm?: VerbForm;
980
+ /**
981
+ * Base language code to extend (for regional variants).
982
+ * When set, this profile inherits from the base and overrides specific fields.
983
+ * Example: 'es-MX' profile with extends: 'es' inherits from Spanish base.
984
+ */
985
+ readonly extends?: string;
967
986
  }
968
987
  /**
969
988
  * Configuration for event handler pattern generation.
970
- * Used by simple SVO languages that don't need hand-crafted patterns.
989
+ * Supports both SVO and SOV/VSO languages.
971
990
  */
972
991
  interface EventHandlerConfig {
973
- /** Primary event keyword (e.g., 'on', 'bei', 'sur') */
974
- readonly keyword: KeywordTranslation$1;
992
+ /** Primary event keyword (e.g., 'on', 'bei', 'sur') for SVO */
993
+ readonly keyword?: KeywordTranslation$1;
975
994
  /** Source filter marker (e.g., 'from', 'von', 'de') */
976
- readonly sourceMarker: RoleMarker;
995
+ readonly sourceMarker?: RoleMarker;
977
996
  /** Conditional keyword (e.g., 'when', 'wenn', 'quand') */
978
997
  readonly conditionalKeyword?: KeywordTranslation$1;
998
+ /** Event marker for SOV/VSO languages (e.g., で (Japanese), 할 때 (Korean), da (Turkish), عند (Arabic)) */
999
+ readonly eventMarker?: RoleMarker;
1000
+ /** Temporal/conditional markers that can optionally appear with events */
1001
+ readonly temporalMarkers?: string[];
1002
+ /**
1003
+ * Negation marker for expressing negated events (e.g., Arabic عدم = "not/lack of").
1004
+ * Used in patterns like: عند عدم التركيز = "when not focusing" = "on blur"
1005
+ */
1006
+ readonly negationMarker?: RoleMarker;
979
1007
  }
980
1008
  /**
981
1009
  * Verb form used for command keywords.
@@ -1229,6 +1257,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
1229
1257
  * Korean time units attach directly without whitespace.
1230
1258
  */
1231
1259
  private extractKoreanNumber;
1260
+ /**
1261
+ * Try to split a temporal event suffix from a word token.
1262
+ * This handles compact forms like 클릭할때 → 클릭 + 할때
1263
+ *
1264
+ * @returns Split tokens if a suffix is found, null otherwise
1265
+ */
1266
+ private trySplitTemporalSuffix;
1232
1267
  }
1233
1268
  /**
1234
1269
  * Singleton instance.
@@ -1289,6 +1324,14 @@ declare class TurkishTokenizer extends BaseTokenizer {
1289
1324
  constructor();
1290
1325
  tokenize(input: string): TokenStream;
1291
1326
  classifyToken(token: string): TokenKind;
1327
+ /**
1328
+ * Try to match multi-word phrases that function as single units.
1329
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
1330
+ * so they'll be matched before their constituent words.
1331
+ *
1332
+ * Examples: "üzerine gelme" (hover), "fare üzerinde" (mouseover)
1333
+ */
1334
+ private tryMultiWordPhrase;
1292
1335
  /**
1293
1336
  * Extract a Turkish word.
1294
1337
  * Uses morphological normalization to handle verb conjugations.
@@ -2023,6 +2066,7 @@ declare function isLanguageSupported(code: string): boolean;
2023
2066
 
2024
2067
  /**
2025
2068
  * Try to get a profile, returning undefined if not registered.
2069
+ * Supports fallback: if 'es-MX' is not registered, falls back to 'es'.
2026
2070
  */
2027
2071
  declare function tryGetProfile(code: string): LanguageProfile | undefined;
2028
2072
  /**