@lokascript/semantic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/browser-ar.ar.global.js +2 -2
  2. package/dist/browser-core.core.global.js +2 -2
  3. package/dist/browser-de.de.global.js +2 -2
  4. package/dist/browser-east-asian.east-asian.global.js +2 -2
  5. package/dist/browser-en-tr.en-tr.global.js +2 -2
  6. package/dist/browser-en.en.global.js +2 -2
  7. package/dist/browser-es-en.es-en.global.js +2 -2
  8. package/dist/browser-es.es.global.js +2 -2
  9. package/dist/browser-fr.fr.global.js +2 -2
  10. package/dist/browser-id.id.global.js +2 -2
  11. package/dist/browser-ja.ja.global.js +2 -2
  12. package/dist/browser-ko.ko.global.js +2 -2
  13. package/dist/browser-lazy.lazy.global.js +2 -2
  14. package/dist/browser-priority.priority.global.js +2 -2
  15. package/dist/browser-pt.pt.global.js +2 -2
  16. package/dist/browser-qu.qu.global.js +2 -2
  17. package/dist/browser-sw.sw.global.js +2 -2
  18. package/dist/browser-tr.tr.global.js +2 -2
  19. package/dist/browser-western.western.global.js +2 -2
  20. package/dist/browser-zh.zh.global.js +2 -2
  21. package/dist/browser.global.js +2 -2
  22. package/dist/browser.global.js.map +1 -1
  23. package/dist/index.cjs +13042 -17462
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +49 -5
  26. package/dist/index.d.ts +49 -5
  27. package/dist/index.js +14044 -18464
  28. package/dist/index.js.map +1 -1
  29. package/dist/languages/ar.d.ts +1 -1
  30. package/dist/languages/ar.js +31 -44
  31. package/dist/languages/ar.js.map +1 -1
  32. package/dist/languages/de.d.ts +1 -1
  33. package/dist/languages/de.js +14 -2
  34. package/dist/languages/de.js.map +1 -1
  35. package/dist/languages/en.d.ts +1 -1
  36. package/dist/languages/en.js +558 -12
  37. package/dist/languages/en.js.map +1 -1
  38. package/dist/languages/es.d.ts +1 -1
  39. package/dist/languages/es.js +16 -0
  40. package/dist/languages/es.js.map +1 -1
  41. package/dist/languages/fr.d.ts +1 -1
  42. package/dist/languages/fr.js +14 -2
  43. package/dist/languages/fr.js.map +1 -1
  44. package/dist/languages/id.d.ts +1 -1
  45. package/dist/languages/id.js +14 -2
  46. package/dist/languages/id.js.map +1 -1
  47. package/dist/languages/ja.d.ts +1 -1
  48. package/dist/languages/ja.js +18 -3
  49. package/dist/languages/ja.js.map +1 -1
  50. package/dist/languages/ko.d.ts +8 -1
  51. package/dist/languages/ko.js +75 -43
  52. package/dist/languages/ko.js.map +1 -1
  53. package/dist/languages/pt.d.ts +1 -1
  54. package/dist/languages/pt.js +17 -0
  55. package/dist/languages/pt.js.map +1 -1
  56. package/dist/languages/qu.d.ts +12 -1
  57. package/dist/languages/qu.js +77 -2
  58. package/dist/languages/qu.js.map +1 -1
  59. package/dist/languages/sw.d.ts +1 -1
  60. package/dist/languages/sw.js.map +1 -1
  61. package/dist/languages/tr.d.ts +9 -1
  62. package/dist/languages/tr.js +96 -72
  63. package/dist/languages/tr.js.map +1 -1
  64. package/dist/languages/zh.d.ts +1 -1
  65. package/dist/languages/zh.js +16 -0
  66. package/dist/languages/zh.js.map +1 -1
  67. package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
  68. package/package.json +20 -29
  69. package/src/generators/command-schemas.ts +21 -10
  70. package/src/generators/event-handler-generator.ts +50 -44
  71. package/src/generators/language-profiles.ts +6 -0
  72. package/src/generators/pattern-generator.ts +883 -1
  73. package/src/generators/profiles/arabic.ts +19 -3
  74. package/src/generators/profiles/bengali.ts +12 -1
  75. package/src/generators/profiles/chinese.ts +15 -0
  76. package/src/generators/profiles/french.ts +12 -1
  77. package/src/generators/profiles/german.ts +12 -1
  78. package/src/generators/profiles/hebrew.ts +148 -0
  79. package/src/generators/profiles/hindi.ts +12 -1
  80. package/src/generators/profiles/index.ts +2 -0
  81. package/src/generators/profiles/indonesian.ts +12 -1
  82. package/src/generators/profiles/italian.ts +16 -0
  83. package/src/generators/profiles/japanese.ts +11 -2
  84. package/src/generators/profiles/korean.ts +15 -1
  85. package/src/generators/profiles/polish.ts +12 -0
  86. package/src/generators/profiles/portuguese.ts +16 -0
  87. package/src/generators/profiles/russian.ts +11 -0
  88. package/src/generators/profiles/spanish.ts +15 -0
  89. package/src/generators/profiles/spanishMexico.ts +176 -0
  90. package/src/generators/profiles/thai.ts +11 -0
  91. package/src/generators/profiles/turkish.ts +49 -7
  92. package/src/generators/profiles/types.ts +21 -5
  93. package/src/generators/profiles/ukrainian.ts +11 -0
  94. package/src/generators/profiles/vietnamese.ts +11 -0
  95. package/src/language-building-schema.ts +111 -0
  96. package/src/languages/_all.ts +5 -1
  97. package/src/languages/es-MX.ts +32 -0
  98. package/src/languages/he.ts +15 -0
  99. package/src/parser/pattern-matcher.ts +10 -1
  100. package/src/parser/semantic-parser.ts +3 -0
  101. package/src/patterns/add/ar.ts +3 -59
  102. package/src/patterns/add/index.ts +5 -1
  103. package/src/patterns/add/ja.ts +3 -81
  104. package/src/patterns/add/ko.ts +3 -62
  105. package/src/patterns/add/qu.ts +69 -0
  106. package/src/patterns/add/tr.ts +3 -59
  107. package/src/patterns/builders.ts +1 -0
  108. package/src/patterns/decrement/tr.ts +3 -36
  109. package/src/patterns/event-handler/ar.ts +3 -139
  110. package/src/patterns/event-handler/he.ts +15 -0
  111. package/src/patterns/event-handler/index.ts +5 -1
  112. package/src/patterns/event-handler/ja.ts +3 -106
  113. package/src/patterns/event-handler/ko.ts +3 -121
  114. package/src/patterns/event-handler/ms.ts +45 -20
  115. package/src/patterns/event-handler/tr.ts +3 -158
  116. package/src/patterns/get/ar.ts +3 -37
  117. package/src/patterns/get/ja.ts +3 -41
  118. package/src/patterns/get/ko.ts +3 -41
  119. package/src/patterns/grammar-transformed/ja.ts +3 -1701
  120. package/src/patterns/grammar-transformed/ko.ts +3 -1299
  121. package/src/patterns/grammar-transformed/tr.ts +3 -1055
  122. package/src/patterns/hide/ar.ts +3 -55
  123. package/src/patterns/hide/ja.ts +3 -57
  124. package/src/patterns/hide/ko.ts +3 -57
  125. package/src/patterns/hide/tr.ts +3 -53
  126. package/src/patterns/increment/tr.ts +3 -40
  127. package/src/patterns/put/ar.ts +3 -62
  128. package/src/patterns/put/ja.ts +3 -63
  129. package/src/patterns/put/ko.ts +3 -55
  130. package/src/patterns/put/tr.ts +3 -55
  131. package/src/patterns/remove/ar.ts +3 -59
  132. package/src/patterns/remove/index.ts +5 -1
  133. package/src/patterns/remove/ja.ts +3 -62
  134. package/src/patterns/remove/ko.ts +3 -66
  135. package/src/patterns/remove/qu.ts +69 -0
  136. package/src/patterns/remove/tr.ts +3 -66
  137. package/src/patterns/set/ar.ts +3 -72
  138. package/src/patterns/set/ja.ts +3 -74
  139. package/src/patterns/set/ko.ts +3 -73
  140. package/src/patterns/set/tr.ts +3 -95
  141. package/src/patterns/show/ar.ts +3 -55
  142. package/src/patterns/show/ja.ts +3 -57
  143. package/src/patterns/show/ko.ts +3 -61
  144. package/src/patterns/show/tr.ts +3 -53
  145. package/src/patterns/take/ar.ts +3 -39
  146. package/src/patterns/toggle/ar.ts +3 -49
  147. package/src/patterns/toggle/index.ts +5 -1
  148. package/src/patterns/toggle/ja.ts +3 -144
  149. package/src/patterns/toggle/ko.ts +3 -101
  150. package/src/patterns/toggle/qu.ts +90 -0
  151. package/src/patterns/toggle/tr.ts +3 -76
  152. package/src/registry.ts +179 -15
  153. package/src/tokenizers/arabic.ts +13 -46
  154. package/src/tokenizers/bengali.ts +2 -16
  155. package/src/tokenizers/he.ts +542 -0
  156. package/src/tokenizers/index.ts +1 -0
  157. package/src/tokenizers/japanese.ts +3 -1
  158. package/src/tokenizers/korean.ts +104 -48
  159. package/src/tokenizers/ms.ts +3 -0
  160. package/src/tokenizers/quechua.ts +101 -2
  161. package/src/tokenizers/turkish.ts +64 -69
  162. package/src/types.ts +13 -0
@@ -1,4 +1,4 @@
1
- import { B as BaseTokenizer, T as TokenStream, a as TokenKind, L as LanguageProfile } from '../types-C4dcj53L.js';
1
+ import { B as BaseTokenizer, T as TokenStream, a as TokenKind, L as LanguageProfile } from '../types-BY3Id07j.js';
2
2
 
3
3
  /**
4
4
  * Korean Tokenizer
@@ -33,6 +33,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
33
33
  * Korean time units attach directly without whitespace.
34
34
  */
35
35
  private extractKoreanNumber;
36
+ /**
37
+ * Try to split a temporal event suffix from a word token.
38
+ * This handles compact forms like 클릭할때 → 클릭 + 할때
39
+ *
40
+ * @returns Split tokens if a suffix is found, null otherwise
41
+ */
42
+ private trySplitTemporalSuffix;
36
43
  }
37
44
  /**
38
45
  * Singleton instance.
@@ -1162,7 +1162,7 @@ var koreanProfile = {
1162
1162
  },
1163
1163
  roleMarkers: {
1164
1164
  patient: { primary: "\uC744", alternatives: ["\uB97C"], position: "after" },
1165
- destination: { primary: "\uC5D0", alternatives: ["\uC73C\uB85C", "\uB85C", "\uC5D0\uC11C"], position: "after" },
1165
+ destination: { primary: "\uC5D0", alternatives: ["\uC73C\uB85C", "\uB85C", "\uC5D0\uC11C", "\uC758"], position: "after" },
1166
1166
  source: { primary: "\uC5D0\uC11C", alternatives: ["\uBD80\uD130"], position: "after" },
1167
1167
  style: { primary: "\uB85C", alternatives: ["\uC73C\uB85C"], position: "after" },
1168
1168
  event: { primary: "\uC744", alternatives: ["\uB97C"], position: "after" }
@@ -1198,6 +1198,12 @@ var koreanProfile = {
1198
1198
  // DOM focus
1199
1199
  focus: { primary: "\uD3EC\uCEE4\uC2A4", normalized: "focus" },
1200
1200
  blur: { primary: "\uBE14\uB7EC", normalized: "blur" },
1201
+ // Common event names (for event handler patterns)
1202
+ click: { primary: "\uD074\uB9AD", normalized: "click" },
1203
+ hover: { primary: "\uD638\uBC84", normalized: "hover" },
1204
+ submit: { primary: "\uC81C\uCD9C", normalized: "submit" },
1205
+ input: { primary: "\uC785\uB825", normalized: "input" },
1206
+ change: { primary: "\uBCC0\uACBD", normalized: "change" },
1201
1207
  // Navigation
1202
1208
  go: { primary: "\uC774\uB3D9", normalized: "go" },
1203
1209
  // Async
@@ -1241,6 +1247,15 @@ var koreanProfile = {
1241
1247
  tokenization: {
1242
1248
  particles: ["\uC744", "\uB97C", "\uC774", "\uAC00", "\uC740", "\uB294", "\uC5D0", "\uC5D0\uC11C", "\uC73C\uB85C", "\uB85C", "\uC640", "\uACFC", "\uB3C4"],
1243
1249
  boundaryStrategy: "space"
1250
+ },
1251
+ eventHandler: {
1252
+ // Event marker: 할 때 (when/at the time of), used in SOV pattern
1253
+ // Pattern: [event] 할 때 [destination] 의 [patient] 를 [action]
1254
+ // Example: 클릭 할 때 #button 의 .active 를 토글
1255
+ // Compact forms (no space): 클릭할때 .active를토글
1256
+ eventMarker: { primary: "\uD560 \uB54C", alternatives: ["\uD560\uB54C", "\uB54C", "\uC5D0"], position: "after" },
1257
+ temporalMarkers: ["\uD560 \uB54C", "\uD560\uB54C", "\uB54C"]
1258
+ // temporal markers (with and without space)
1244
1259
  }
1245
1260
  };
1246
1261
 
@@ -1314,6 +1329,7 @@ var SINGLE_CHAR_PARTICLES = /* @__PURE__ */ new Set([
1314
1329
  "\uB9CC"
1315
1330
  ]);
1316
1331
  var MULTI_CHAR_PARTICLES = ["\uC5D0\uC11C", "\uC73C\uB85C", "\uBD80\uD130", "\uAE4C\uC9C0", "\uCC98\uB7FC", "\uBCF4\uB2E4"];
1332
+ var TEMPORAL_EVENT_SUFFIXES = ["\uD560\uB54C", "\uD558\uBA74", "\uD558\uB2C8\uAE4C", "\uD560 \uB54C"];
1317
1333
  var PARTICLE_ROLES = /* @__PURE__ */ new Map([
1318
1334
  // Subject markers (vowel harmony pair)
1319
1335
  [
@@ -1442,59 +1458,23 @@ var KOREAN_EXTRAS = [
1442
1458
  { native: "\uD0A4\uC5C5", normalized: "keyup" },
1443
1459
  { native: "\uB9C8\uC6B0\uC2A4\uC624\uBC84", normalized: "mouseover" },
1444
1460
  { native: "\uB9C8\uC6B0\uC2A4\uC544\uC6C3", normalized: "mouseout" },
1445
- // References (additional forms)
1461
+ // References (additional forms not in profile)
1446
1462
  { native: "\uB0B4", normalized: "my" },
1447
1463
  { native: "\uADF8\uAC83\uC758", normalized: "its" },
1448
- // Conditional event forms (native idioms)
1449
- { native: "\uD558\uBA74", normalized: "on" },
1450
- { native: "\uC73C\uBA74", normalized: "on" },
1451
- { native: "\uBA74", normalized: "on" },
1452
- { native: "\uD560\uB54C", normalized: "on" },
1453
- { native: "\uD560 \uB54C", normalized: "on" },
1454
- { native: "\uC744\uB54C", normalized: "on" },
1455
- { native: "\uC744 \uB54C", normalized: "on" },
1456
- { native: "\uD558\uB2C8\uAE4C", normalized: "on" },
1457
- { native: "\uB2C8\uAE4C", normalized: "on" },
1458
- // Control flow helpers
1459
- { native: "\uADF8\uB7EC\uBA74", normalized: "then" },
1460
- { native: "\uADF8\uB807\uC9C0\uC54A\uC73C\uBA74", normalized: "otherwise" },
1461
- { native: "\uC911\uB2E8", normalized: "break" },
1462
1464
  // Logical
1463
1465
  { native: "\uADF8\uB9AC\uACE0", normalized: "and" },
1464
1466
  { native: "\uB610\uB294", normalized: "or" },
1465
1467
  { native: "\uC544\uB2C8", normalized: "not" },
1466
1468
  { native: "\uC774\uB2E4", normalized: "is" },
1467
- // Command overrides (ensure correct mapping when profile has multiple meanings)
1468
- { native: "\uCD94\uAC00", normalized: "add" },
1469
- // Profile may have this as 'append'
1470
- // Attached particle forms (native idioms - particle + verb without space)
1471
- // Object particle 를 (after vowel)
1472
- { native: "\uB97C\uD1A0\uAE00", normalized: "toggle" },
1473
- { native: "\uB97C\uC804\uD658", normalized: "toggle" },
1474
- { native: "\uB97C\uCD94\uAC00", normalized: "add" },
1475
- { native: "\uB97C\uC81C\uAC70", normalized: "remove" },
1476
- { native: "\uB97C\uC0AD\uC81C", normalized: "remove" },
1477
- { native: "\uB97C\uC99D\uAC00", normalized: "increment" },
1478
- { native: "\uB97C\uAC10\uC18C", normalized: "decrement" },
1479
- { native: "\uB97C\uD45C\uC2DC", normalized: "show" },
1480
- { native: "\uB97C\uC228\uAE30\uB2E4", normalized: "hide" },
1481
- { native: "\uB97C\uC124\uC815", normalized: "set" },
1482
- // Object particle 을 (after consonant)
1483
- { native: "\uC744\uD1A0\uAE00", normalized: "toggle" },
1484
- { native: "\uC744\uC804\uD658", normalized: "toggle" },
1485
- { native: "\uC744\uCD94\uAC00", normalized: "add" },
1486
- { native: "\uC744\uC81C\uAC70", normalized: "remove" },
1487
- { native: "\uC744\uC0AD\uC81C", normalized: "remove" },
1488
- { native: "\uC744\uC99D\uAC00", normalized: "increment" },
1489
- { native: "\uC744\uAC10\uC18C", normalized: "decrement" },
1490
- { native: "\uC744\uD45C\uC2DC", normalized: "show" },
1491
- { native: "\uC744\uC228\uAE30\uB2E4", normalized: "hide" },
1492
- { native: "\uC744\uC124\uC815", normalized: "set" },
1493
1469
  // Time units
1494
1470
  { native: "\uCD08", normalized: "s" },
1495
1471
  { native: "\uBC00\uB9AC\uCD08", normalized: "ms" },
1496
1472
  { native: "\uBD84", normalized: "m" },
1497
1473
  { native: "\uC2DC\uAC04", normalized: "h" }
1474
+ // Note: Attached particle+verb forms (를토글, 을토글, etc.) are intentionally
1475
+ // NOT included because they cause ambiguous parsing. The separate particle + verb
1476
+ // pattern (를 + 토글) is preferred for consistent semantic analysis.
1477
+ // This follows the same approach as the Japanese tokenizer.
1498
1478
  ];
1499
1479
  var KOREAN_TIME_UNITS = [
1500
1480
  { pattern: "\uBC00\uB9AC\uCD08", suffix: "ms", length: 3 },
@@ -1565,7 +1545,13 @@ var KoreanTokenizer = class extends BaseTokenizer {
1565
1545
  if (isKorean(input[pos])) {
1566
1546
  const wordToken = this.extractKoreanWord(input, pos);
1567
1547
  if (wordToken) {
1568
- tokens.push(wordToken);
1548
+ const splitResult = this.trySplitTemporalSuffix(wordToken);
1549
+ if (splitResult) {
1550
+ tokens.push(splitResult.stemToken);
1551
+ tokens.push(splitResult.suffixToken);
1552
+ } else {
1553
+ tokens.push(wordToken);
1554
+ }
1569
1555
  pos = wordToken.position.end;
1570
1556
  continue;
1571
1557
  }
@@ -1644,6 +1630,13 @@ var KoreanTokenizer = class extends BaseTokenizer {
1644
1630
  }
1645
1631
  }
1646
1632
  if (!allKorean) continue;
1633
+ if (PARTICLES.has(candidate) && startPos === startPos) {
1634
+ const afterCandidate = startPos + len;
1635
+ const nextChar = afterCandidate < input.length ? input[afterCandidate] : "";
1636
+ if (nextChar === "" || isWhitespace(nextChar) || !isKorean(nextChar)) {
1637
+ return null;
1638
+ }
1639
+ }
1647
1640
  const keywordEntry2 = this.lookupKeyword(candidate);
1648
1641
  if (keywordEntry2) {
1649
1642
  return createToken(
@@ -1661,6 +1654,9 @@ var KoreanTokenizer = class extends BaseTokenizer {
1661
1654
  while (pos < input.length) {
1662
1655
  const char = input[pos];
1663
1656
  const nextChar = pos + 1 < input.length ? input[pos + 1] : "";
1657
+ if (word.length === 0 && SINGLE_CHAR_PARTICLES.has(char)) {
1658
+ return null;
1659
+ }
1664
1660
  if (SINGLE_CHAR_PARTICLES.has(char) && word.length > 0) {
1665
1661
  const isWordBoundary = nextChar === "" || isWhitespace(nextChar) || !isKorean(nextChar) || SINGLE_CHAR_PARTICLES.has(nextChar);
1666
1662
  if (isWordBoundary) {
@@ -1687,6 +1683,9 @@ var KoreanTokenizer = class extends BaseTokenizer {
1687
1683
  }
1688
1684
  }
1689
1685
  if (!word) return null;
1686
+ if (PARTICLES.has(word)) {
1687
+ return null;
1688
+ }
1690
1689
  const keywordEntry = this.lookupKeyword(word);
1691
1690
  if (keywordEntry) {
1692
1691
  return createToken(word, "keyword", createPosition(startPos, pos), keywordEntry.normalized);
@@ -1717,6 +1716,39 @@ var KoreanTokenizer = class extends BaseTokenizer {
1717
1716
  skipWhitespace: false
1718
1717
  });
1719
1718
  }
1719
+ /**
1720
+ * Try to split a temporal event suffix from a word token.
1721
+ * This handles compact forms like 클릭할때 → 클릭 + 할때
1722
+ *
1723
+ * @returns Split tokens if a suffix is found, null otherwise
1724
+ */
1725
+ trySplitTemporalSuffix(wordToken) {
1726
+ const word = wordToken.value;
1727
+ for (const suffix of TEMPORAL_EVENT_SUFFIXES) {
1728
+ if (word.endsWith(suffix) && word.length > suffix.length) {
1729
+ const stem = word.slice(0, -suffix.length);
1730
+ const stemLower = stem.toLowerCase();
1731
+ const keywordEntry = this.lookupKeyword(stemLower);
1732
+ if (!keywordEntry) continue;
1733
+ const stemEnd = wordToken.position.start + stem.length;
1734
+ const stemToken = createToken(
1735
+ stem,
1736
+ "keyword",
1737
+ createPosition(wordToken.position.start, stemEnd),
1738
+ keywordEntry.normalized
1739
+ );
1740
+ const suffixToken = createToken(
1741
+ suffix,
1742
+ "keyword",
1743
+ createPosition(stemEnd, wordToken.position.end),
1744
+ "when"
1745
+ // Normalize temporal suffixes to 'when'
1746
+ );
1747
+ return { stemToken, suffixToken };
1748
+ }
1749
+ }
1750
+ return null;
1751
+ }
1720
1752
  };
1721
1753
  var koreanTokenizer = new KoreanTokenizer();
1722
1754