@lokascript/semantic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ar.ar.global.js +2 -2
- package/dist/browser-core.core.global.js +2 -2
- package/dist/browser-de.de.global.js +2 -2
- package/dist/browser-east-asian.east-asian.global.js +2 -2
- package/dist/browser-en-tr.en-tr.global.js +2 -2
- package/dist/browser-en.en.global.js +2 -2
- package/dist/browser-es-en.es-en.global.js +2 -2
- package/dist/browser-es.es.global.js +2 -2
- package/dist/browser-fr.fr.global.js +2 -2
- package/dist/browser-id.id.global.js +2 -2
- package/dist/browser-ja.ja.global.js +2 -2
- package/dist/browser-ko.ko.global.js +2 -2
- package/dist/browser-lazy.lazy.global.js +2 -2
- package/dist/browser-priority.priority.global.js +2 -2
- package/dist/browser-pt.pt.global.js +2 -2
- package/dist/browser-qu.qu.global.js +2 -2
- package/dist/browser-sw.sw.global.js +2 -2
- package/dist/browser-tr.tr.global.js +2 -2
- package/dist/browser-western.western.global.js +2 -2
- package/dist/browser-zh.zh.global.js +2 -2
- package/dist/browser.global.js +2 -2
- package/dist/browser.global.js.map +1 -1
- package/dist/index.cjs +13042 -17462
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +49 -5
- package/dist/index.d.ts +49 -5
- package/dist/index.js +14044 -18464
- package/dist/index.js.map +1 -1
- package/dist/languages/ar.d.ts +1 -1
- package/dist/languages/ar.js +31 -44
- package/dist/languages/ar.js.map +1 -1
- package/dist/languages/de.d.ts +1 -1
- package/dist/languages/de.js +14 -2
- package/dist/languages/de.js.map +1 -1
- package/dist/languages/en.d.ts +1 -1
- package/dist/languages/en.js +558 -12
- package/dist/languages/en.js.map +1 -1
- package/dist/languages/es.d.ts +1 -1
- package/dist/languages/es.js +16 -0
- package/dist/languages/es.js.map +1 -1
- package/dist/languages/fr.d.ts +1 -1
- package/dist/languages/fr.js +14 -2
- package/dist/languages/fr.js.map +1 -1
- package/dist/languages/id.d.ts +1 -1
- package/dist/languages/id.js +14 -2
- package/dist/languages/id.js.map +1 -1
- package/dist/languages/ja.d.ts +1 -1
- package/dist/languages/ja.js +18 -3
- package/dist/languages/ja.js.map +1 -1
- package/dist/languages/ko.d.ts +8 -1
- package/dist/languages/ko.js +75 -43
- package/dist/languages/ko.js.map +1 -1
- package/dist/languages/pt.d.ts +1 -1
- package/dist/languages/pt.js +17 -0
- package/dist/languages/pt.js.map +1 -1
- package/dist/languages/qu.d.ts +12 -1
- package/dist/languages/qu.js +77 -2
- package/dist/languages/qu.js.map +1 -1
- package/dist/languages/sw.d.ts +1 -1
- package/dist/languages/sw.js.map +1 -1
- package/dist/languages/tr.d.ts +9 -1
- package/dist/languages/tr.js +96 -72
- package/dist/languages/tr.js.map +1 -1
- package/dist/languages/zh.d.ts +1 -1
- package/dist/languages/zh.js +16 -0
- package/dist/languages/zh.js.map +1 -1
- package/dist/{types-C4dcj53L.d.ts → types-BY3Id07j.d.ts} +20 -5
- package/package.json +20 -29
- package/src/generators/command-schemas.ts +21 -10
- package/src/generators/event-handler-generator.ts +50 -44
- package/src/generators/language-profiles.ts +6 -0
- package/src/generators/pattern-generator.ts +883 -1
- package/src/generators/profiles/arabic.ts +19 -3
- package/src/generators/profiles/bengali.ts +12 -1
- package/src/generators/profiles/chinese.ts +15 -0
- package/src/generators/profiles/french.ts +12 -1
- package/src/generators/profiles/german.ts +12 -1
- package/src/generators/profiles/hebrew.ts +148 -0
- package/src/generators/profiles/hindi.ts +12 -1
- package/src/generators/profiles/index.ts +2 -0
- package/src/generators/profiles/indonesian.ts +12 -1
- package/src/generators/profiles/italian.ts +16 -0
- package/src/generators/profiles/japanese.ts +11 -2
- package/src/generators/profiles/korean.ts +15 -1
- package/src/generators/profiles/polish.ts +12 -0
- package/src/generators/profiles/portuguese.ts +16 -0
- package/src/generators/profiles/russian.ts +11 -0
- package/src/generators/profiles/spanish.ts +15 -0
- package/src/generators/profiles/spanishMexico.ts +176 -0
- package/src/generators/profiles/thai.ts +11 -0
- package/src/generators/profiles/turkish.ts +49 -7
- package/src/generators/profiles/types.ts +21 -5
- package/src/generators/profiles/ukrainian.ts +11 -0
- package/src/generators/profiles/vietnamese.ts +11 -0
- package/src/language-building-schema.ts +111 -0
- package/src/languages/_all.ts +5 -1
- package/src/languages/es-MX.ts +32 -0
- package/src/languages/he.ts +15 -0
- package/src/parser/pattern-matcher.ts +10 -1
- package/src/parser/semantic-parser.ts +3 -0
- package/src/patterns/add/ar.ts +3 -59
- package/src/patterns/add/index.ts +5 -1
- package/src/patterns/add/ja.ts +3 -81
- package/src/patterns/add/ko.ts +3 -62
- package/src/patterns/add/qu.ts +69 -0
- package/src/patterns/add/tr.ts +3 -59
- package/src/patterns/builders.ts +1 -0
- package/src/patterns/decrement/tr.ts +3 -36
- package/src/patterns/event-handler/ar.ts +3 -139
- package/src/patterns/event-handler/he.ts +15 -0
- package/src/patterns/event-handler/index.ts +5 -1
- package/src/patterns/event-handler/ja.ts +3 -106
- package/src/patterns/event-handler/ko.ts +3 -121
- package/src/patterns/event-handler/ms.ts +45 -20
- package/src/patterns/event-handler/tr.ts +3 -158
- package/src/patterns/get/ar.ts +3 -37
- package/src/patterns/get/ja.ts +3 -41
- package/src/patterns/get/ko.ts +3 -41
- package/src/patterns/grammar-transformed/ja.ts +3 -1701
- package/src/patterns/grammar-transformed/ko.ts +3 -1299
- package/src/patterns/grammar-transformed/tr.ts +3 -1055
- package/src/patterns/hide/ar.ts +3 -55
- package/src/patterns/hide/ja.ts +3 -57
- package/src/patterns/hide/ko.ts +3 -57
- package/src/patterns/hide/tr.ts +3 -53
- package/src/patterns/increment/tr.ts +3 -40
- package/src/patterns/put/ar.ts +3 -62
- package/src/patterns/put/ja.ts +3 -63
- package/src/patterns/put/ko.ts +3 -55
- package/src/patterns/put/tr.ts +3 -55
- package/src/patterns/remove/ar.ts +3 -59
- package/src/patterns/remove/index.ts +5 -1
- package/src/patterns/remove/ja.ts +3 -62
- package/src/patterns/remove/ko.ts +3 -66
- package/src/patterns/remove/qu.ts +69 -0
- package/src/patterns/remove/tr.ts +3 -66
- package/src/patterns/set/ar.ts +3 -72
- package/src/patterns/set/ja.ts +3 -74
- package/src/patterns/set/ko.ts +3 -73
- package/src/patterns/set/tr.ts +3 -95
- package/src/patterns/show/ar.ts +3 -55
- package/src/patterns/show/ja.ts +3 -57
- package/src/patterns/show/ko.ts +3 -61
- package/src/patterns/show/tr.ts +3 -53
- package/src/patterns/take/ar.ts +3 -39
- package/src/patterns/toggle/ar.ts +3 -49
- package/src/patterns/toggle/index.ts +5 -1
- package/src/patterns/toggle/ja.ts +3 -144
- package/src/patterns/toggle/ko.ts +3 -101
- package/src/patterns/toggle/qu.ts +90 -0
- package/src/patterns/toggle/tr.ts +3 -76
- package/src/registry.ts +179 -15
- package/src/tokenizers/arabic.ts +13 -46
- package/src/tokenizers/bengali.ts +2 -16
- package/src/tokenizers/he.ts +542 -0
- package/src/tokenizers/index.ts +1 -0
- package/src/tokenizers/japanese.ts +3 -1
- package/src/tokenizers/korean.ts +104 -48
- package/src/tokenizers/ms.ts +3 -0
- package/src/tokenizers/quechua.ts +101 -2
- package/src/tokenizers/turkish.ts +64 -69
- package/src/types.ts +13 -0
package/dist/languages/ko.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { B as BaseTokenizer, T as TokenStream, a as TokenKind, L as LanguageProfile } from '../types-
|
|
1
|
+
import { B as BaseTokenizer, T as TokenStream, a as TokenKind, L as LanguageProfile } from '../types-BY3Id07j.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Korean Tokenizer
|
|
@@ -33,6 +33,13 @@ declare class KoreanTokenizer extends BaseTokenizer {
|
|
|
33
33
|
* Korean time units attach directly without whitespace.
|
|
34
34
|
*/
|
|
35
35
|
private extractKoreanNumber;
|
|
36
|
+
/**
|
|
37
|
+
* Try to split a temporal event suffix from a word token.
|
|
38
|
+
* This handles compact forms like 클릭할때 → 클릭 + 할때
|
|
39
|
+
*
|
|
40
|
+
* @returns Split tokens if a suffix is found, null otherwise
|
|
41
|
+
*/
|
|
42
|
+
private trySplitTemporalSuffix;
|
|
36
43
|
}
|
|
37
44
|
/**
|
|
38
45
|
* Singleton instance.
|
package/dist/languages/ko.js
CHANGED
|
@@ -1162,7 +1162,7 @@ var koreanProfile = {
|
|
|
1162
1162
|
},
|
|
1163
1163
|
roleMarkers: {
|
|
1164
1164
|
patient: { primary: "\uC744", alternatives: ["\uB97C"], position: "after" },
|
|
1165
|
-
destination: { primary: "\uC5D0", alternatives: ["\uC73C\uB85C", "\uB85C", "\uC5D0\uC11C"], position: "after" },
|
|
1165
|
+
destination: { primary: "\uC5D0", alternatives: ["\uC73C\uB85C", "\uB85C", "\uC5D0\uC11C", "\uC758"], position: "after" },
|
|
1166
1166
|
source: { primary: "\uC5D0\uC11C", alternatives: ["\uBD80\uD130"], position: "after" },
|
|
1167
1167
|
style: { primary: "\uB85C", alternatives: ["\uC73C\uB85C"], position: "after" },
|
|
1168
1168
|
event: { primary: "\uC744", alternatives: ["\uB97C"], position: "after" }
|
|
@@ -1198,6 +1198,12 @@ var koreanProfile = {
|
|
|
1198
1198
|
// DOM focus
|
|
1199
1199
|
focus: { primary: "\uD3EC\uCEE4\uC2A4", normalized: "focus" },
|
|
1200
1200
|
blur: { primary: "\uBE14\uB7EC", normalized: "blur" },
|
|
1201
|
+
// Common event names (for event handler patterns)
|
|
1202
|
+
click: { primary: "\uD074\uB9AD", normalized: "click" },
|
|
1203
|
+
hover: { primary: "\uD638\uBC84", normalized: "hover" },
|
|
1204
|
+
submit: { primary: "\uC81C\uCD9C", normalized: "submit" },
|
|
1205
|
+
input: { primary: "\uC785\uB825", normalized: "input" },
|
|
1206
|
+
change: { primary: "\uBCC0\uACBD", normalized: "change" },
|
|
1201
1207
|
// Navigation
|
|
1202
1208
|
go: { primary: "\uC774\uB3D9", normalized: "go" },
|
|
1203
1209
|
// Async
|
|
@@ -1241,6 +1247,15 @@ var koreanProfile = {
|
|
|
1241
1247
|
tokenization: {
|
|
1242
1248
|
particles: ["\uC744", "\uB97C", "\uC774", "\uAC00", "\uC740", "\uB294", "\uC5D0", "\uC5D0\uC11C", "\uC73C\uB85C", "\uB85C", "\uC640", "\uACFC", "\uB3C4"],
|
|
1243
1249
|
boundaryStrategy: "space"
|
|
1250
|
+
},
|
|
1251
|
+
eventHandler: {
|
|
1252
|
+
// Event marker: 할 때 (when/at the time of), used in SOV pattern
|
|
1253
|
+
// Pattern: [event] 할 때 [destination] 의 [patient] 를 [action]
|
|
1254
|
+
// Example: 클릭 할 때 #button 의 .active 를 토글
|
|
1255
|
+
// Compact forms (no space): 클릭할때 .active를토글
|
|
1256
|
+
eventMarker: { primary: "\uD560 \uB54C", alternatives: ["\uD560\uB54C", "\uB54C", "\uC5D0"], position: "after" },
|
|
1257
|
+
temporalMarkers: ["\uD560 \uB54C", "\uD560\uB54C", "\uB54C"]
|
|
1258
|
+
// temporal markers (with and without space)
|
|
1244
1259
|
}
|
|
1245
1260
|
};
|
|
1246
1261
|
|
|
@@ -1314,6 +1329,7 @@ var SINGLE_CHAR_PARTICLES = /* @__PURE__ */ new Set([
|
|
|
1314
1329
|
"\uB9CC"
|
|
1315
1330
|
]);
|
|
1316
1331
|
var MULTI_CHAR_PARTICLES = ["\uC5D0\uC11C", "\uC73C\uB85C", "\uBD80\uD130", "\uAE4C\uC9C0", "\uCC98\uB7FC", "\uBCF4\uB2E4"];
|
|
1332
|
+
var TEMPORAL_EVENT_SUFFIXES = ["\uD560\uB54C", "\uD558\uBA74", "\uD558\uB2C8\uAE4C", "\uD560 \uB54C"];
|
|
1317
1333
|
var PARTICLE_ROLES = /* @__PURE__ */ new Map([
|
|
1318
1334
|
// Subject markers (vowel harmony pair)
|
|
1319
1335
|
[
|
|
@@ -1442,59 +1458,23 @@ var KOREAN_EXTRAS = [
|
|
|
1442
1458
|
{ native: "\uD0A4\uC5C5", normalized: "keyup" },
|
|
1443
1459
|
{ native: "\uB9C8\uC6B0\uC2A4\uC624\uBC84", normalized: "mouseover" },
|
|
1444
1460
|
{ native: "\uB9C8\uC6B0\uC2A4\uC544\uC6C3", normalized: "mouseout" },
|
|
1445
|
-
// References (additional forms)
|
|
1461
|
+
// References (additional forms not in profile)
|
|
1446
1462
|
{ native: "\uB0B4", normalized: "my" },
|
|
1447
1463
|
{ native: "\uADF8\uAC83\uC758", normalized: "its" },
|
|
1448
|
-
// Conditional event forms (native idioms)
|
|
1449
|
-
{ native: "\uD558\uBA74", normalized: "on" },
|
|
1450
|
-
{ native: "\uC73C\uBA74", normalized: "on" },
|
|
1451
|
-
{ native: "\uBA74", normalized: "on" },
|
|
1452
|
-
{ native: "\uD560\uB54C", normalized: "on" },
|
|
1453
|
-
{ native: "\uD560 \uB54C", normalized: "on" },
|
|
1454
|
-
{ native: "\uC744\uB54C", normalized: "on" },
|
|
1455
|
-
{ native: "\uC744 \uB54C", normalized: "on" },
|
|
1456
|
-
{ native: "\uD558\uB2C8\uAE4C", normalized: "on" },
|
|
1457
|
-
{ native: "\uB2C8\uAE4C", normalized: "on" },
|
|
1458
|
-
// Control flow helpers
|
|
1459
|
-
{ native: "\uADF8\uB7EC\uBA74", normalized: "then" },
|
|
1460
|
-
{ native: "\uADF8\uB807\uC9C0\uC54A\uC73C\uBA74", normalized: "otherwise" },
|
|
1461
|
-
{ native: "\uC911\uB2E8", normalized: "break" },
|
|
1462
1464
|
// Logical
|
|
1463
1465
|
{ native: "\uADF8\uB9AC\uACE0", normalized: "and" },
|
|
1464
1466
|
{ native: "\uB610\uB294", normalized: "or" },
|
|
1465
1467
|
{ native: "\uC544\uB2C8", normalized: "not" },
|
|
1466
1468
|
{ native: "\uC774\uB2E4", normalized: "is" },
|
|
1467
|
-
// Command overrides (ensure correct mapping when profile has multiple meanings)
|
|
1468
|
-
{ native: "\uCD94\uAC00", normalized: "add" },
|
|
1469
|
-
// Profile may have this as 'append'
|
|
1470
|
-
// Attached particle forms (native idioms - particle + verb without space)
|
|
1471
|
-
// Object particle 를 (after vowel)
|
|
1472
|
-
{ native: "\uB97C\uD1A0\uAE00", normalized: "toggle" },
|
|
1473
|
-
{ native: "\uB97C\uC804\uD658", normalized: "toggle" },
|
|
1474
|
-
{ native: "\uB97C\uCD94\uAC00", normalized: "add" },
|
|
1475
|
-
{ native: "\uB97C\uC81C\uAC70", normalized: "remove" },
|
|
1476
|
-
{ native: "\uB97C\uC0AD\uC81C", normalized: "remove" },
|
|
1477
|
-
{ native: "\uB97C\uC99D\uAC00", normalized: "increment" },
|
|
1478
|
-
{ native: "\uB97C\uAC10\uC18C", normalized: "decrement" },
|
|
1479
|
-
{ native: "\uB97C\uD45C\uC2DC", normalized: "show" },
|
|
1480
|
-
{ native: "\uB97C\uC228\uAE30\uB2E4", normalized: "hide" },
|
|
1481
|
-
{ native: "\uB97C\uC124\uC815", normalized: "set" },
|
|
1482
|
-
// Object particle 을 (after consonant)
|
|
1483
|
-
{ native: "\uC744\uD1A0\uAE00", normalized: "toggle" },
|
|
1484
|
-
{ native: "\uC744\uC804\uD658", normalized: "toggle" },
|
|
1485
|
-
{ native: "\uC744\uCD94\uAC00", normalized: "add" },
|
|
1486
|
-
{ native: "\uC744\uC81C\uAC70", normalized: "remove" },
|
|
1487
|
-
{ native: "\uC744\uC0AD\uC81C", normalized: "remove" },
|
|
1488
|
-
{ native: "\uC744\uC99D\uAC00", normalized: "increment" },
|
|
1489
|
-
{ native: "\uC744\uAC10\uC18C", normalized: "decrement" },
|
|
1490
|
-
{ native: "\uC744\uD45C\uC2DC", normalized: "show" },
|
|
1491
|
-
{ native: "\uC744\uC228\uAE30\uB2E4", normalized: "hide" },
|
|
1492
|
-
{ native: "\uC744\uC124\uC815", normalized: "set" },
|
|
1493
1469
|
// Time units
|
|
1494
1470
|
{ native: "\uCD08", normalized: "s" },
|
|
1495
1471
|
{ native: "\uBC00\uB9AC\uCD08", normalized: "ms" },
|
|
1496
1472
|
{ native: "\uBD84", normalized: "m" },
|
|
1497
1473
|
{ native: "\uC2DC\uAC04", normalized: "h" }
|
|
1474
|
+
// Note: Attached particle+verb forms (를토글, 을토글, etc.) are intentionally
|
|
1475
|
+
// NOT included because they cause ambiguous parsing. The separate particle + verb
|
|
1476
|
+
// pattern (를 + 토글) is preferred for consistent semantic analysis.
|
|
1477
|
+
// This follows the same approach as the Japanese tokenizer.
|
|
1498
1478
|
];
|
|
1499
1479
|
var KOREAN_TIME_UNITS = [
|
|
1500
1480
|
{ pattern: "\uBC00\uB9AC\uCD08", suffix: "ms", length: 3 },
|
|
@@ -1565,7 +1545,13 @@ var KoreanTokenizer = class extends BaseTokenizer {
|
|
|
1565
1545
|
if (isKorean(input[pos])) {
|
|
1566
1546
|
const wordToken = this.extractKoreanWord(input, pos);
|
|
1567
1547
|
if (wordToken) {
|
|
1568
|
-
|
|
1548
|
+
const splitResult = this.trySplitTemporalSuffix(wordToken);
|
|
1549
|
+
if (splitResult) {
|
|
1550
|
+
tokens.push(splitResult.stemToken);
|
|
1551
|
+
tokens.push(splitResult.suffixToken);
|
|
1552
|
+
} else {
|
|
1553
|
+
tokens.push(wordToken);
|
|
1554
|
+
}
|
|
1569
1555
|
pos = wordToken.position.end;
|
|
1570
1556
|
continue;
|
|
1571
1557
|
}
|
|
@@ -1644,6 +1630,13 @@ var KoreanTokenizer = class extends BaseTokenizer {
|
|
|
1644
1630
|
}
|
|
1645
1631
|
}
|
|
1646
1632
|
if (!allKorean) continue;
|
|
1633
|
+
if (PARTICLES.has(candidate) && startPos === startPos) {
|
|
1634
|
+
const afterCandidate = startPos + len;
|
|
1635
|
+
const nextChar = afterCandidate < input.length ? input[afterCandidate] : "";
|
|
1636
|
+
if (nextChar === "" || isWhitespace(nextChar) || !isKorean(nextChar)) {
|
|
1637
|
+
return null;
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1647
1640
|
const keywordEntry2 = this.lookupKeyword(candidate);
|
|
1648
1641
|
if (keywordEntry2) {
|
|
1649
1642
|
return createToken(
|
|
@@ -1661,6 +1654,9 @@ var KoreanTokenizer = class extends BaseTokenizer {
|
|
|
1661
1654
|
while (pos < input.length) {
|
|
1662
1655
|
const char = input[pos];
|
|
1663
1656
|
const nextChar = pos + 1 < input.length ? input[pos + 1] : "";
|
|
1657
|
+
if (word.length === 0 && SINGLE_CHAR_PARTICLES.has(char)) {
|
|
1658
|
+
return null;
|
|
1659
|
+
}
|
|
1664
1660
|
if (SINGLE_CHAR_PARTICLES.has(char) && word.length > 0) {
|
|
1665
1661
|
const isWordBoundary = nextChar === "" || isWhitespace(nextChar) || !isKorean(nextChar) || SINGLE_CHAR_PARTICLES.has(nextChar);
|
|
1666
1662
|
if (isWordBoundary) {
|
|
@@ -1687,6 +1683,9 @@ var KoreanTokenizer = class extends BaseTokenizer {
|
|
|
1687
1683
|
}
|
|
1688
1684
|
}
|
|
1689
1685
|
if (!word) return null;
|
|
1686
|
+
if (PARTICLES.has(word)) {
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1690
1689
|
const keywordEntry = this.lookupKeyword(word);
|
|
1691
1690
|
if (keywordEntry) {
|
|
1692
1691
|
return createToken(word, "keyword", createPosition(startPos, pos), keywordEntry.normalized);
|
|
@@ -1717,6 +1716,39 @@ var KoreanTokenizer = class extends BaseTokenizer {
|
|
|
1717
1716
|
skipWhitespace: false
|
|
1718
1717
|
});
|
|
1719
1718
|
}
|
|
1719
|
+
/**
|
|
1720
|
+
* Try to split a temporal event suffix from a word token.
|
|
1721
|
+
* This handles compact forms like 클릭할때 → 클릭 + 할때
|
|
1722
|
+
*
|
|
1723
|
+
* @returns Split tokens if a suffix is found, null otherwise
|
|
1724
|
+
*/
|
|
1725
|
+
trySplitTemporalSuffix(wordToken) {
|
|
1726
|
+
const word = wordToken.value;
|
|
1727
|
+
for (const suffix of TEMPORAL_EVENT_SUFFIXES) {
|
|
1728
|
+
if (word.endsWith(suffix) && word.length > suffix.length) {
|
|
1729
|
+
const stem = word.slice(0, -suffix.length);
|
|
1730
|
+
const stemLower = stem.toLowerCase();
|
|
1731
|
+
const keywordEntry = this.lookupKeyword(stemLower);
|
|
1732
|
+
if (!keywordEntry) continue;
|
|
1733
|
+
const stemEnd = wordToken.position.start + stem.length;
|
|
1734
|
+
const stemToken = createToken(
|
|
1735
|
+
stem,
|
|
1736
|
+
"keyword",
|
|
1737
|
+
createPosition(wordToken.position.start, stemEnd),
|
|
1738
|
+
keywordEntry.normalized
|
|
1739
|
+
);
|
|
1740
|
+
const suffixToken = createToken(
|
|
1741
|
+
suffix,
|
|
1742
|
+
"keyword",
|
|
1743
|
+
createPosition(stemEnd, wordToken.position.end),
|
|
1744
|
+
"when"
|
|
1745
|
+
// Normalize temporal suffixes to 'when'
|
|
1746
|
+
);
|
|
1747
|
+
return { stemToken, suffixToken };
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
return null;
|
|
1751
|
+
}
|
|
1720
1752
|
};
|
|
1721
1753
|
var koreanTokenizer = new KoreanTokenizer();
|
|
1722
1754
|
|