@lokascript/semantic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/dist/browser-ar.ar.global.js +2 -2
  2. package/dist/browser-core.core.global.js +2 -2
  3. package/dist/browser-de.de.global.js +2 -2
  4. package/dist/browser-east-asian.east-asian.global.js +2 -2
  5. package/dist/browser-en-tr.en-tr.global.js +2 -2
  6. package/dist/browser-en.en.global.js +2 -2
  7. package/dist/browser-es-en.es-en.global.js +2 -2
  8. package/dist/browser-es.es.global.js +2 -2
  9. package/dist/browser-fr.fr.global.js +2 -2
  10. package/dist/browser-id.id.global.js +2 -2
  11. package/dist/browser-ja.ja.global.js +2 -2
  12. package/dist/browser-ko.ko.global.js +2 -2
  13. package/dist/browser-lazy.lazy.global.js +2 -2
  14. package/dist/browser-priority.priority.global.js +2 -2
  15. package/dist/browser-pt.pt.global.js +2 -2
  16. package/dist/browser-qu.qu.global.js +2 -2
  17. package/dist/browser-sw.sw.global.js +2 -2
  18. package/dist/browser-tr.tr.global.js +2 -2
  19. package/dist/browser-western.western.global.js +2 -2
  20. package/dist/browser-zh.zh.global.js +2 -2
  21. package/dist/browser.global.js +2 -2
  22. package/dist/browser.global.js.map +1 -1
  23. package/dist/index.cjs +13042 -17462
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +49 -5
  26. package/dist/index.d.ts +49 -5
  27. package/dist/index.js +14044 -18464
  28. package/dist/index.js.map +1 -1
  29. package/dist/languages/ar.d.ts +1 -1
  30. package/dist/languages/ar.js +31 -44
  31. package/dist/languages/ar.js.map +1 -1
  32. package/dist/languages/de.d.ts +1 -1
  33. package/dist/languages/de.js +14 -2
  34. package/dist/languages/de.js.map +1 -1
  35. package/dist/languages/en.d.ts +1 -1
  36. package/dist/languages/en.js +558 -12
  37. package/dist/languages/en.js.map +1 -1
  38. package/dist/languages/es.d.ts +1 -1
  39. package/dist/languages/es.js +16 -0
  40. package/dist/languages/es.js.map +1 -1
  41. package/dist/languages/fr.d.ts +1 -1
  42. package/dist/languages/fr.js +14 -2
  43. package/dist/languages/fr.js.map +1 -1
  44. package/dist/languages/id.d.ts +1 -1
  45. package/dist/languages/id.js +14 -2
  46. package/dist/languages/id.js.map +1 -1
  47. package/dist/languages/ja.d.ts +1 -1
  48. package/dist/languages/ja.js +18 -3
  49. package/dist/languages/ja.js.map +1 -1
  50. package/dist/languages/ko.d.ts +8 -1
  51. package/dist/languages/ko.js +75 -43
  52. package/dist/languages/ko.js.map +1 -1
  53. package/dist/languages/pt.d.ts +1 -1
  54. package/dist/languages/pt.js +17 -0
  55. package/dist/languages/pt.js.map +1 -1
  56. package/dist/languages/qu.d.ts +12 -1
  57. package/dist/languages/qu.js +77 -2
  58. package/dist/languages/qu.js.map +1 -1
  59. package/dist/languages/sw.d.ts +1 -1
  60. package/dist/languages/sw.js.map +1 -1
  61. package/dist/languages/tr.d.ts +9 -1
  62. package/dist/languages/tr.js +96 -72
  63. package/dist/languages/tr.js.map +1 -1
  64. package/dist/languages/zh.d.ts +1 -1
  65. package/dist/languages/zh.js +16 -0
  66. package/dist/languages/zh.js.map +1 -1
  67. package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
  68. package/package.json +20 -29
  69. package/src/generators/command-schemas.ts +21 -10
  70. package/src/generators/event-handler-generator.ts +50 -44
  71. package/src/generators/language-profiles.ts +6 -0
  72. package/src/generators/pattern-generator.ts +883 -1
  73. package/src/generators/profiles/arabic.ts +19 -3
  74. package/src/generators/profiles/bengali.ts +12 -1
  75. package/src/generators/profiles/chinese.ts +15 -0
  76. package/src/generators/profiles/french.ts +12 -1
  77. package/src/generators/profiles/german.ts +12 -1
  78. package/src/generators/profiles/hebrew.ts +148 -0
  79. package/src/generators/profiles/hindi.ts +12 -1
  80. package/src/generators/profiles/index.ts +2 -0
  81. package/src/generators/profiles/indonesian.ts +12 -1
  82. package/src/generators/profiles/italian.ts +16 -0
  83. package/src/generators/profiles/japanese.ts +11 -2
  84. package/src/generators/profiles/korean.ts +15 -1
  85. package/src/generators/profiles/polish.ts +12 -0
  86. package/src/generators/profiles/portuguese.ts +16 -0
  87. package/src/generators/profiles/russian.ts +11 -0
  88. package/src/generators/profiles/spanish.ts +15 -0
  89. package/src/generators/profiles/spanishMexico.ts +176 -0
  90. package/src/generators/profiles/thai.ts +11 -0
  91. package/src/generators/profiles/turkish.ts +49 -7
  92. package/src/generators/profiles/types.ts +21 -5
  93. package/src/generators/profiles/ukrainian.ts +11 -0
  94. package/src/generators/profiles/vietnamese.ts +11 -0
  95. package/src/language-building-schema.ts +111 -0
  96. package/src/languages/_all.ts +5 -1
  97. package/src/languages/es-MX.ts +32 -0
  98. package/src/languages/he.ts +15 -0
  99. package/src/parser/pattern-matcher.ts +10 -1
  100. package/src/parser/semantic-parser.ts +3 -0
  101. package/src/patterns/add/ar.ts +3 -59
  102. package/src/patterns/add/index.ts +5 -1
  103. package/src/patterns/add/ja.ts +3 -81
  104. package/src/patterns/add/ko.ts +3 -62
  105. package/src/patterns/add/qu.ts +69 -0
  106. package/src/patterns/add/tr.ts +3 -59
  107. package/src/patterns/builders.ts +1 -0
  108. package/src/patterns/decrement/tr.ts +3 -36
  109. package/src/patterns/event-handler/ar.ts +3 -139
  110. package/src/patterns/event-handler/he.ts +15 -0
  111. package/src/patterns/event-handler/index.ts +5 -1
  112. package/src/patterns/event-handler/ja.ts +3 -106
  113. package/src/patterns/event-handler/ko.ts +3 -121
  114. package/src/patterns/event-handler/ms.ts +45 -20
  115. package/src/patterns/event-handler/tr.ts +3 -158
  116. package/src/patterns/get/ar.ts +3 -37
  117. package/src/patterns/get/ja.ts +3 -41
  118. package/src/patterns/get/ko.ts +3 -41
  119. package/src/patterns/grammar-transformed/ja.ts +3 -1701
  120. package/src/patterns/grammar-transformed/ko.ts +3 -1299
  121. package/src/patterns/grammar-transformed/tr.ts +3 -1055
  122. package/src/patterns/hide/ar.ts +3 -55
  123. package/src/patterns/hide/ja.ts +3 -57
  124. package/src/patterns/hide/ko.ts +3 -57
  125. package/src/patterns/hide/tr.ts +3 -53
  126. package/src/patterns/increment/tr.ts +3 -40
  127. package/src/patterns/put/ar.ts +3 -62
  128. package/src/patterns/put/ja.ts +3 -63
  129. package/src/patterns/put/ko.ts +3 -55
  130. package/src/patterns/put/tr.ts +3 -55
  131. package/src/patterns/remove/ar.ts +3 -59
  132. package/src/patterns/remove/index.ts +5 -1
  133. package/src/patterns/remove/ja.ts +3 -62
  134. package/src/patterns/remove/ko.ts +3 -66
  135. package/src/patterns/remove/qu.ts +69 -0
  136. package/src/patterns/remove/tr.ts +3 -66
  137. package/src/patterns/set/ar.ts +3 -72
  138. package/src/patterns/set/ja.ts +3 -74
  139. package/src/patterns/set/ko.ts +3 -73
  140. package/src/patterns/set/tr.ts +3 -95
  141. package/src/patterns/show/ar.ts +3 -55
  142. package/src/patterns/show/ja.ts +3 -57
  143. package/src/patterns/show/ko.ts +3 -61
  144. package/src/patterns/show/tr.ts +3 -53
  145. package/src/patterns/take/ar.ts +3 -39
  146. package/src/patterns/toggle/ar.ts +3 -49
  147. package/src/patterns/toggle/index.ts +5 -1
  148. package/src/patterns/toggle/ja.ts +3 -144
  149. package/src/patterns/toggle/ko.ts +3 -101
  150. package/src/patterns/toggle/qu.ts +90 -0
  151. package/src/patterns/toggle/tr.ts +3 -76
  152. package/src/registry.ts +179 -15
  153. package/src/tokenizers/arabic.ts +13 -46
  154. package/src/tokenizers/bengali.ts +2 -16
  155. package/src/tokenizers/he.ts +542 -0
  156. package/src/tokenizers/index.ts +1 -0
  157. package/src/tokenizers/japanese.ts +3 -1
  158. package/src/tokenizers/korean.ts +104 -48
  159. package/src/tokenizers/ms.ts +3 -0
  160. package/src/tokenizers/quechua.ts +101 -2
  161. package/src/tokenizers/turkish.ts +64 -69
  162. package/src/types.ts +13 -0
@@ -1128,10 +1128,38 @@ var turkishProfile = {
1128
1128
  }
1129
1129
  },
1130
1130
  roleMarkers: {
1131
- patient: { primary: "i", alternatives: ["\u0131", "u", "\xFC"], position: "after" },
1132
- // Accusative
1133
- destination: { primary: "e", alternatives: ["a", "de", "da", "te", "ta"], position: "after" },
1134
- // Dative/Locative
1131
+ patient: {
1132
+ primary: "i",
1133
+ alternatives: ["\u0131", "u", "\xFC", "yi", "y\u0131", "yu", "y\xFC", "ni", "n\u0131", "nu", "n\xFC"],
1134
+ position: "after"
1135
+ },
1136
+ // Accusative (with buffer consonants y/n)
1137
+ destination: {
1138
+ primary: "e",
1139
+ // Include both dative (e/a) and genitive (ın/in/un/ün) for possessive patterns
1140
+ // Genitive is used in "X's Y" patterns: #button ın .active = "#button's .active"
1141
+ alternatives: [
1142
+ "a",
1143
+ "ye",
1144
+ "ya",
1145
+ "ne",
1146
+ "na",
1147
+ "de",
1148
+ "da",
1149
+ "te",
1150
+ "ta",
1151
+ "\u0131n",
1152
+ "in",
1153
+ "un",
1154
+ "\xFCn",
1155
+ "n\u0131n",
1156
+ "nin",
1157
+ "nun",
1158
+ "n\xFCn"
1159
+ ],
1160
+ position: "after"
1161
+ },
1162
+ // Dative/Locative + Genitive (with buffer consonants)
1135
1163
  source: { primary: "den", alternatives: ["dan", "ten", "tan"], position: "after" },
1136
1164
  // Ablative
1137
1165
  style: { primary: "le", alternatives: ["la", "yle", "yla"], position: "after" },
@@ -1147,10 +1175,12 @@ var turkishProfile = {
1147
1175
  // Content operations
1148
1176
  put: { primary: "koy", normalized: "put" },
1149
1177
  append: { primary: "ekle", normalized: "append" },
1150
- take: { primary: "al", normalized: "take" },
1178
+ take: { primary: "tut", normalized: "take" },
1179
+ // al removed to avoid collision with get
1151
1180
  make: { primary: "yap", normalized: "make" },
1152
1181
  clone: { primary: "kopyala", normalized: "clone" },
1153
- swap: { primary: "de\u011Fi\u015Ftir", alternatives: ["takas"], normalized: "swap" },
1182
+ swap: { primary: "takas", normalized: "swap" },
1183
+ // Removed değiştir alternative to avoid collision with toggle
1154
1184
  morph: { primary: "d\xF6n\xFC\u015Ft\xFCr", alternatives: ["\u015Fekil de\u011Fi\u015Ftir"], normalized: "morph" },
1155
1185
  // Variable operations
1156
1186
  set: { primary: "ayarla", alternatives: ["yap", "belirle"], normalized: "set" },
@@ -1167,8 +1197,14 @@ var turkishProfile = {
1167
1197
  trigger: { primary: "tetikle", normalized: "trigger" },
1168
1198
  send: { primary: "g\xF6nder", normalized: "send" },
1169
1199
  // DOM focus
1170
- focus: { primary: "odak", normalized: "focus" },
1171
- blur: { primary: "bulan\u0131k", normalized: "blur" },
1200
+ focus: { primary: "odak", alternatives: ["odaklanma"], normalized: "focus" },
1201
+ blur: { primary: "bulan\u0131k", alternatives: ["bulan\u0131kl\u0131k"], normalized: "blur" },
1202
+ // Common event names (for event handler patterns)
1203
+ click: { primary: "t\u0131klama", alternatives: ["t\u0131kla"], normalized: "click" },
1204
+ hover: { primary: "\xFCzerine gelme", alternatives: ["\xFCzerinde gezinme"], normalized: "hover" },
1205
+ submit: { primary: "g\xF6nderme", alternatives: ["g\xF6nder"], normalized: "submit" },
1206
+ input: { primary: "giri\u015F", alternatives: ["girdi"], normalized: "input" },
1207
+ change: { primary: "de\u011Fi\u015Fiklik", alternatives: ["de\u011Fi\u015Fim"], normalized: "change" },
1172
1208
  // Navigation
1173
1209
  go: { primary: "git", normalized: "go" },
1174
1210
  // Async
@@ -1207,7 +1243,16 @@ var turkishProfile = {
1207
1243
  // Event modifiers (for repeat until event)
1208
1244
  until: { primary: "kadar", normalized: "until" },
1209
1245
  event: { primary: "olay", normalized: "event" },
1210
- from: { primary: "-den", alternatives: ["-dan"], normalized: "from" }
1246
+ from: { primary: "den", alternatives: ["dan"], normalized: "from" }
1247
+ },
1248
+ eventHandler: {
1249
+ // Event marker: da/de/ta/te (locative case suffix with vowel harmony), used in SOV pattern
1250
+ // Pattern: [event] da [patient] i [action]
1251
+ // Example: tıklama da .active i değiştir
1252
+ // Note: Vowel harmony variants (da/de/ta/te) should be handled by vowel harmony expansion
1253
+ eventMarker: { primary: "da", alternatives: ["de", "ta", "te"], position: "after" },
1254
+ temporalMarkers: ["d\u0131\u011F\u0131nda", "di\u011Finde"]
1255
+ // temporal converb suffixes (when)
1211
1256
  }
1212
1257
  };
1213
1258
 
@@ -1321,15 +1366,9 @@ var TURKISH_EXTRAS = [
1321
1366
  { native: "tus_bas", normalized: "keydown" },
1322
1367
  { native: "tu\u015F_b\u0131rak", normalized: "keyup" },
1323
1368
  { native: "tus_birak", normalized: "keyup" },
1324
- // References
1325
- { native: "ben", normalized: "me" },
1369
+ // References (possessive forms not in profile)
1326
1370
  { native: "benim", normalized: "my" },
1327
- { native: "o", normalized: "it" },
1328
1371
  { native: "onun", normalized: "its" },
1329
- { native: "sonu\xE7", normalized: "result" },
1330
- { native: "sonuc", normalized: "result" },
1331
- { native: "olay", normalized: "event" },
1332
- { native: "hedef", normalized: "target" },
1333
1372
  // Time units
1334
1373
  { native: "saniye", normalized: "s" },
1335
1374
  { native: "milisaniye", normalized: "ms" },
@@ -1339,59 +1378,9 @@ var TURKISH_EXTRAS = [
1339
1378
  { native: "ve", normalized: "and" },
1340
1379
  { native: "veya", normalized: "or" },
1341
1380
  { native: "de\u011Fil", normalized: "not" },
1342
- { native: "degil", normalized: "not" },
1343
- // Event triggers (on)
1344
- { native: "\xFCzerinde", normalized: "on" },
1345
- { native: "uzerinde", normalized: "on" },
1346
- { native: "oldu\u011Funda", normalized: "on" },
1347
- { native: "oldugunda", normalized: "on" },
1348
- // Command overrides (ensure correct mapping when profile has multiple meanings)
1349
- { native: "ekle", normalized: "add" },
1350
- // Profile may have this as 'append'
1351
- { native: "de\u011Fi\u015Ftir", normalized: "toggle" },
1352
- // Profile has this as 'swap'
1353
- // Diacritic-free variants of commands
1354
- { native: "de\u011Fistir", normalized: "toggle" },
1355
- { native: "kaldir", normalized: "remove" },
1356
- { native: "yerlestir", normalized: "put" },
1357
- { native: "olustur", normalized: "make" },
1358
- { native: "artir", normalized: "increment" },
1359
- { native: "yazdir", normalized: "log" },
1360
- { native: "goster", normalized: "show" },
1361
- { native: "gecis", normalized: "transition" },
1362
- { native: "atesle", normalized: "trigger" },
1363
- { native: "gonder", normalized: "send" },
1364
- { native: "bulaniklastir", normalized: "blur" },
1365
- { native: "odak_kaldir", normalized: "blur" },
1366
- { native: "yonlendir", normalized: "go" },
1367
- { native: "cek", normalized: "fetch" },
1368
- { native: "yerles", normalized: "settle" },
1369
- { native: "eger", normalized: "if" },
1370
- { native: "degilse", normalized: "else" },
1371
- { native: "firlat", normalized: "throw" },
1372
- { native: "cagir", normalized: "call" },
1373
- { native: "don", normalized: "return" },
1374
- { native: "dondur", normalized: "return" },
1375
- { native: "eszamansiz", normalized: "async" },
1376
- { native: "soyle", normalized: "tell" },
1377
- { native: "varsayilan", normalized: "default" },
1378
- { native: "baslat", normalized: "init" },
1379
- { native: "basla", normalized: "init" },
1380
- { native: "davranis", normalized: "behavior" },
1381
- { native: "yukle", normalized: "install" },
1382
- { native: "olc", normalized: "measure" },
1383
- { native: "icine", normalized: "into" },
1384
- { native: "once", normalized: "before" },
1385
- { native: "icin", normalized: "for" },
1386
- // Colloquial forms
1387
- { native: "al", normalized: "get" },
1388
- { native: "yap", normalized: "set" },
1389
- // Control flow helpers
1390
- { native: "o_zaman", normalized: "then" },
1391
- { native: "bitir", normalized: "end" },
1392
- // Case suffix modifiers
1393
- { native: "-den", normalized: "from" },
1394
- { native: "-dan", normalized: "from" }
1381
+ { native: "degil", normalized: "not" }
1382
+ // Note: Command synonyms and diacritic-free variants should be in profile alternatives.
1383
+ // Event triggers (üzerinde, olduğunda) should be in profile as 'on' alternatives.
1395
1384
  ];
1396
1385
  var TURKISH_TIME_UNITS = [
1397
1386
  { pattern: "milisaniye", suffix: "ms", length: 10, caseInsensitive: true },
@@ -1463,6 +1452,12 @@ var TurkishTokenizer = class extends BaseTokenizer {
1463
1452
  pos = varToken.position.end;
1464
1453
  continue;
1465
1454
  }
1455
+ const phraseToken = this.tryMultiWordPhrase(input, pos);
1456
+ if (phraseToken) {
1457
+ tokens.push(phraseToken);
1458
+ pos = phraseToken.position.end;
1459
+ continue;
1460
+ }
1466
1461
  if (isTurkishLetter2(input[pos])) {
1467
1462
  const wordToken = this.extractTurkishWord(input, pos);
1468
1463
  if (wordToken) {
@@ -1485,6 +1480,32 @@ var TurkishTokenizer = class extends BaseTokenizer {
1485
1480
  if (/^\d/.test(token)) return "literal";
1486
1481
  return "identifier";
1487
1482
  }
1483
+ /**
1484
+ * Try to match multi-word phrases that function as single units.
1485
+ * Multi-word phrases are included in profileKeywords and sorted longest-first,
1486
+ * so they'll be matched before their constituent words.
1487
+ *
1488
+ * Examples: "üzerine gelme" (hover), "fare üzerinde" (mouseover)
1489
+ */
1490
+ tryMultiWordPhrase(input, pos) {
1491
+ for (const entry of this.profileKeywords) {
1492
+ if (!entry.native.includes(" ")) continue;
1493
+ const phrase = entry.native;
1494
+ const candidate = input.slice(pos, pos + phrase.length).toLowerCase();
1495
+ if (candidate === phrase.toLowerCase()) {
1496
+ const nextPos = pos + phrase.length;
1497
+ if (nextPos >= input.length || isWhitespace(input[nextPos]) || !isTurkishLetter2(input[nextPos])) {
1498
+ return createToken(
1499
+ input.slice(pos, pos + phrase.length),
1500
+ "keyword",
1501
+ createPosition(pos, nextPos),
1502
+ entry.normalized
1503
+ );
1504
+ }
1505
+ }
1506
+ }
1507
+ return null;
1508
+ }
1488
1509
  /**
1489
1510
  * Extract a Turkish word.
1490
1511
  * Uses morphological normalization to handle verb conjugations.
@@ -1497,13 +1518,16 @@ var TurkishTokenizer = class extends BaseTokenizer {
1497
1518
  }
1498
1519
  if (!word) return null;
1499
1520
  const lowerWord = word.toLowerCase();
1500
- const keywordEntry = this.lookupKeyword(lowerWord);
1501
- if (keywordEntry) {
1502
- return createToken(word, "keyword", createPosition(startPos, pos), keywordEntry.normalized);
1521
+ if (CASE_SUFFIXES.has(lowerWord)) {
1522
+ return createToken(word, "particle", createPosition(startPos, pos));
1503
1523
  }
1504
1524
  if (POSTPOSITIONS.has(lowerWord)) {
1505
1525
  return createToken(word, "particle", createPosition(startPos, pos));
1506
1526
  }
1527
+ const keywordEntry = this.lookupKeyword(lowerWord);
1528
+ if (keywordEntry) {
1529
+ return createToken(word, "keyword", createPosition(startPos, pos), keywordEntry.normalized);
1530
+ }
1507
1531
  const morphToken = this.tryMorphKeywordMatch(lowerWord, startPos, pos);
1508
1532
  if (morphToken) return morphToken;
1509
1533
  return createToken(word, "identifier", createPosition(startPos, pos));