glost-ko 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +38 -0
- package/dist/constants.d.ts +281 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +315 -0
- package/dist/constants.js.map +1 -0
- package/dist/helpers.d.ts +35 -0
- package/dist/helpers.d.ts.map +1 -0
- package/dist/helpers.js +43 -0
- package/dist/helpers.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/package.json +57 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 GLOST Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# glost-ko
|
|
2
|
+
|
|
3
|
+
Korean language support for GLOST (Glossed Syntax Tree).
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This package provides Korean-specific helper functions for the GLOST framework.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install glost-ko glost glost-common
|
|
13
|
+
# or
|
|
14
|
+
pnpm add glost-ko glost glost-common
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Features
|
|
18
|
+
|
|
19
|
+
- **Helper Functions**: Convenience functions for creating Korean GLOST word nodes
|
|
20
|
+
- **Romanization Support**: Built-in support for Revised Romanization (RR)
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
### Creating Korean Words
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
import { createKoreanWord } from 'glost-ko';
|
|
28
|
+
|
|
29
|
+
const word = createKoreanWord({
|
|
30
|
+
text: '안녕하세요',
|
|
31
|
+
romanization: 'annyeonghaseyo',
|
|
32
|
+
partOfSpeech: 'interjection'
|
|
33
|
+
});
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## License
|
|
37
|
+
|
|
38
|
+
MIT
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Korean Language Constants and Utilities
|
|
3
|
+
*
|
|
4
|
+
* Korean-specific constants for Hangul script classification, transcription schemes,
|
|
5
|
+
* and language-specific information.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Korean transcription/romanization schemes
|
|
11
|
+
*
|
|
12
|
+
* Common romanization systems for Korean
|
|
13
|
+
*/
|
|
14
|
+
export declare const KOREAN_TRANSCRIPTION_SCHEMES: {
|
|
15
|
+
/** Revised Romanization (official South Korean standard since 2000) */
|
|
16
|
+
readonly RR: "rr";
|
|
17
|
+
/** McCune-Reischauer (older academic standard) */
|
|
18
|
+
readonly MCCUNE_REISCHAUER: "mr";
|
|
19
|
+
/** Yale Romanization (linguistics and academic) */
|
|
20
|
+
readonly YALE: "yale";
|
|
21
|
+
/** Hangul (native Korean script) */
|
|
22
|
+
readonly HANGUL: "hangul";
|
|
23
|
+
/** International Phonetic Alphabet */
|
|
24
|
+
readonly IPA: "ipa";
|
|
25
|
+
};
|
|
26
|
+
export type KoreanTranscriptionScheme = typeof KOREAN_TRANSCRIPTION_SCHEMES[keyof typeof KOREAN_TRANSCRIPTION_SCHEMES];
|
|
27
|
+
/**
|
|
28
|
+
* Display names for Korean transcription schemes
|
|
29
|
+
*/
|
|
30
|
+
export declare const KOREAN_TRANSCRIPTION_SCHEME_NAMES: Record<string, string>;
|
|
31
|
+
/**
|
|
32
|
+
* Hangul Jamo (consonants and vowels) Unicode ranges
|
|
33
|
+
*/
|
|
34
|
+
export declare const HANGUL_JAMO_RANGES: {
|
|
35
|
+
/** Initial consonants (choseong): ㄱ-ㅎ (U+1100 to U+1112) */
|
|
36
|
+
readonly INITIAL: {
|
|
37
|
+
readonly start: 4352;
|
|
38
|
+
readonly end: 4370;
|
|
39
|
+
};
|
|
40
|
+
/** Medial vowels (jungseong): ㅏ-ㅣ (U+1161 to U+1175) */
|
|
41
|
+
readonly MEDIAL: {
|
|
42
|
+
readonly start: 4449;
|
|
43
|
+
readonly end: 4469;
|
|
44
|
+
};
|
|
45
|
+
/** Final consonants (jongseong): ㄱ-ㅎ (U+11A8 to U+11C2) */
|
|
46
|
+
readonly FINAL: {
|
|
47
|
+
readonly start: 4520;
|
|
48
|
+
readonly end: 4546;
|
|
49
|
+
};
|
|
50
|
+
/** Compatibility Jamo (separate characters): ㄱ-ㅣ (U+3131 to U+318E) */
|
|
51
|
+
readonly COMPATIBILITY: {
|
|
52
|
+
readonly start: 12593;
|
|
53
|
+
readonly end: 12686;
|
|
54
|
+
};
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Korean script Unicode ranges
|
|
58
|
+
*/
|
|
59
|
+
export declare const KOREAN_UNICODE_RANGES: {
|
|
60
|
+
/** Hangul syllables: 가-힣 (U+AC00 to U+D7AF) - composed syllables */
|
|
61
|
+
readonly SYLLABLES: {
|
|
62
|
+
readonly start: 44032;
|
|
63
|
+
readonly end: 55215;
|
|
64
|
+
};
|
|
65
|
+
/** Hangul Jamo (all components): U+1100 to U+11FF */
|
|
66
|
+
readonly JAMO: {
|
|
67
|
+
readonly start: 4352;
|
|
68
|
+
readonly end: 4607;
|
|
69
|
+
};
|
|
70
|
+
/** Hangul Compatibility Jamo: ㄱ-ㅣ (U+3130 to U+318F) */
|
|
71
|
+
readonly JAMO_COMPAT: {
|
|
72
|
+
readonly start: 12592;
|
|
73
|
+
readonly end: 12687;
|
|
74
|
+
};
|
|
75
|
+
/** Hangul Jamo Extended-A: (U+A960 to U+A97F) */
|
|
76
|
+
readonly JAMO_EXTENDED_A: {
|
|
77
|
+
readonly start: 43360;
|
|
78
|
+
readonly end: 43391;
|
|
79
|
+
};
|
|
80
|
+
/** Hangul Jamo Extended-B: (U+D7B0 to U+D7FF) */
|
|
81
|
+
readonly JAMO_EXTENDED_B: {
|
|
82
|
+
readonly start: 55216;
|
|
83
|
+
readonly end: 55295;
|
|
84
|
+
};
|
|
85
|
+
};
|
|
86
|
+
/**
|
|
87
|
+
* Regular expressions for Korean script detection
|
|
88
|
+
*/
|
|
89
|
+
export declare const KOREAN_REGEX: {
|
|
90
|
+
/** Match any Hangul syllable */
|
|
91
|
+
readonly SYLLABLE: RegExp;
|
|
92
|
+
/** Match any Hangul Jamo */
|
|
93
|
+
readonly JAMO: RegExp;
|
|
94
|
+
/** Match any Korean character (syllables or Jamo) */
|
|
95
|
+
readonly ANY_KOREAN: RegExp;
|
|
96
|
+
/** Match Korean word (syllables only) */
|
|
97
|
+
readonly WORD: RegExp;
|
|
98
|
+
/** Match Korean punctuation and symbols */
|
|
99
|
+
readonly PUNCTUATION: RegExp;
|
|
100
|
+
};
|
|
101
|
+
/**
|
|
102
|
+
* Korean language metadata
|
|
103
|
+
*/
|
|
104
|
+
export declare const KOREAN_LANGUAGE_INFO: {
|
|
105
|
+
/** BCP-47 language code */
|
|
106
|
+
readonly code: "ko";
|
|
107
|
+
/** BCP-47 with region (South Korea) */
|
|
108
|
+
readonly codeWithRegion: "ko-KR";
|
|
109
|
+
/** Script name */
|
|
110
|
+
readonly script: "Hangul";
|
|
111
|
+
/** ISO 15924 script code */
|
|
112
|
+
readonly scriptCode: "Hang";
|
|
113
|
+
/** Writing direction */
|
|
114
|
+
readonly direction: "ltr";
|
|
115
|
+
/** Language name in English */
|
|
116
|
+
readonly nameEn: "Korean";
|
|
117
|
+
/** Language name in native script */
|
|
118
|
+
readonly nameNative: "한국어";
|
|
119
|
+
};
|
|
120
|
+
/**
|
|
121
|
+
* Hangul syllable decomposition constants
|
|
122
|
+
*
|
|
123
|
+
* Used for breaking down composed Hangul syllables into Jamo components
|
|
124
|
+
*/
|
|
125
|
+
export declare const HANGUL_DECOMPOSITION: {
|
|
126
|
+
/** Base code point for Hangul syllables */
|
|
127
|
+
readonly BASE: 44032;
|
|
128
|
+
/** Number of initial consonants (choseong) */
|
|
129
|
+
readonly INITIAL_COUNT: 19;
|
|
130
|
+
/** Number of medial vowels (jungseong) */
|
|
131
|
+
readonly MEDIAL_COUNT: 21;
|
|
132
|
+
/** Number of final consonants (jongseong) + 1 for none */
|
|
133
|
+
readonly FINAL_COUNT: 28;
|
|
134
|
+
};
|
|
135
|
+
/**
|
|
136
|
+
* Initial consonants (choseong) list in order
|
|
137
|
+
*/
|
|
138
|
+
export declare const INITIAL_CONSONANTS: readonly ["ㄱ", "ㄲ", "ㄴ", "ㄷ", "ㄸ", "ㄹ", "ㅁ", "ㅂ", "ㅃ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅉ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ"];
|
|
139
|
+
/**
|
|
140
|
+
* Medial vowels (jungseong) list in order
|
|
141
|
+
*/
|
|
142
|
+
export declare const MEDIAL_VOWELS: readonly ["ㅏ", "ㅐ", "ㅑ", "ㅒ", "ㅓ", "ㅔ", "ㅕ", "ㅖ", "ㅗ", "ㅘ", "ㅙ", "ㅚ", "ㅛ", "ㅜ", "ㅝ", "ㅞ", "ㅟ", "ㅠ", "ㅡ", "ㅢ", "ㅣ"];
|
|
143
|
+
/**
|
|
144
|
+
* Final consonants (jongseong) list in order (empty string for no final)
|
|
145
|
+
*/
|
|
146
|
+
export declare const FINAL_CONSONANTS: readonly ["", "ㄱ", "ㄲ", "ㄳ", "ㄴ", "ㄵ", "ㄶ", "ㄷ", "ㄹ", "ㄺ", "ㄻ", "ㄼ", "ㄽ", "ㄾ", "ㄿ", "ㅀ", "ㅁ", "ㅂ", "ㅄ", "ㅅ", "ㅆ", "ㅇ", "ㅈ", "ㅊ", "ㅋ", "ㅌ", "ㅍ", "ㅎ"];
|
|
147
|
+
/**
|
|
148
|
+
* Check if a character is a Hangul syllable
|
|
149
|
+
*
|
|
150
|
+
* @param char - Character to check
|
|
151
|
+
* @returns true if the character is a Hangul syllable
|
|
152
|
+
*
|
|
153
|
+
* @example
|
|
154
|
+
* ```typescript
|
|
155
|
+
* isHangulSyllable('가'); // true
|
|
156
|
+
* isHangulSyllable('ㄱ'); // false (Jamo)
|
|
157
|
+
* isHangulSyllable('a'); // false
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
export declare function isHangulSyllable(char: string): boolean;
|
|
161
|
+
/**
|
|
162
|
+
* Check if a character is a Hangul Jamo
|
|
163
|
+
*
|
|
164
|
+
* @param char - Character to check
|
|
165
|
+
* @returns true if the character is a Hangul Jamo
|
|
166
|
+
*
|
|
167
|
+
* @example
|
|
168
|
+
* ```typescript
|
|
169
|
+
* isHangulJamo('ㄱ'); // true
|
|
170
|
+
* isHangulJamo('가'); // false (syllable)
|
|
171
|
+
* isHangulJamo('a'); // false
|
|
172
|
+
* ```
|
|
173
|
+
*/
|
|
174
|
+
export declare function isHangulJamo(char: string): boolean;
|
|
175
|
+
/**
|
|
176
|
+
* Check if a character is any Korean script
|
|
177
|
+
*
|
|
178
|
+
* @param char - Character to check
|
|
179
|
+
* @returns true if the character is Hangul (syllable or Jamo)
|
|
180
|
+
*
|
|
181
|
+
* @example
|
|
182
|
+
* ```typescript
|
|
183
|
+
* isKoreanCharacter('가'); // true
|
|
184
|
+
* isKoreanCharacter('ㄱ'); // true
|
|
185
|
+
* isKoreanCharacter('a'); // false
|
|
186
|
+
* ```
|
|
187
|
+
*/
|
|
188
|
+
export declare function isKoreanCharacter(char: string): boolean;
|
|
189
|
+
/**
|
|
190
|
+
* Check if a string contains Korean characters
|
|
191
|
+
*
|
|
192
|
+
* @param text - Text to check
|
|
193
|
+
* @returns true if the text contains at least one Korean character
|
|
194
|
+
*
|
|
195
|
+
* @example
|
|
196
|
+
* ```typescript
|
|
197
|
+
* containsKoreanCharacters('안녕하세요'); // true
|
|
198
|
+
* containsKoreanCharacters('hello'); // false
|
|
199
|
+
* containsKoreanCharacters('hello 안녕'); // true
|
|
200
|
+
* ```
|
|
201
|
+
*/
|
|
202
|
+
export declare function containsKoreanCharacters(text: string): boolean;
|
|
203
|
+
/**
|
|
204
|
+
* Check if a string is entirely Korean script
|
|
205
|
+
*
|
|
206
|
+
* @param text - Text to check
|
|
207
|
+
* @param allowSpaces - Whether to allow spaces (default: true)
|
|
208
|
+
* @returns true if the text is entirely Korean (and optionally spaces)
|
|
209
|
+
*
|
|
210
|
+
* @example
|
|
211
|
+
* ```typescript
|
|
212
|
+
* isKoreanText('안녕하세요'); // true
|
|
213
|
+
* isKoreanText('안녕 하세요'); // true
|
|
214
|
+
* isKoreanText('안녕 hello'); // false
|
|
215
|
+
* ```
|
|
216
|
+
*/
|
|
217
|
+
export declare function isKoreanText(text: string, allowSpaces?: boolean): boolean;
|
|
218
|
+
/**
|
|
219
|
+
* Decompose a Hangul syllable into its Jamo components
|
|
220
|
+
*
|
|
221
|
+
* @param syllable - Hangul syllable character
|
|
222
|
+
* @returns Object with initial, medial, and final Jamo, or null if not a syllable
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* decomposeHangul('한');
|
|
227
|
+
* // { initial: 'ㅎ', medial: 'ㅏ', final: 'ㄴ' }
|
|
228
|
+
*
|
|
229
|
+
* decomposeHangul('가');
|
|
230
|
+
* // { initial: 'ㄱ', medial: 'ㅏ', final: '' }
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
export declare function decomposeHangul(syllable: string): {
|
|
234
|
+
initial: string;
|
|
235
|
+
medial: string;
|
|
236
|
+
final: string;
|
|
237
|
+
} | null;
|
|
238
|
+
/**
|
|
239
|
+
* Compose Jamo components into a Hangul syllable
|
|
240
|
+
*
|
|
241
|
+
* @param initial - Initial consonant (choseong)
|
|
242
|
+
* @param medial - Medial vowel (jungseong)
|
|
243
|
+
* @param final - Final consonant (jongseong), optional
|
|
244
|
+
* @returns Composed Hangul syllable or null if invalid
|
|
245
|
+
*
|
|
246
|
+
* @example
|
|
247
|
+
* ```typescript
|
|
248
|
+
* composeHangul('ㅎ', 'ㅏ', 'ㄴ'); // '한'
|
|
249
|
+
* composeHangul('ㄱ', 'ㅏ', ''); // '가'
|
|
250
|
+
* composeHangul('ㄱ', 'ㅏ'); // '가'
|
|
251
|
+
* ```
|
|
252
|
+
*/
|
|
253
|
+
export declare function composeHangul(initial: string, medial: string, final?: string): string | null;
|
|
254
|
+
/**
|
|
255
|
+
* Validate a Korean transcription scheme
|
|
256
|
+
*
|
|
257
|
+
* @param scheme - Scheme to validate
|
|
258
|
+
* @returns true if the scheme is a valid Korean transcription scheme
|
|
259
|
+
*
|
|
260
|
+
* @example
|
|
261
|
+
* ```typescript
|
|
262
|
+
* isValidKoreanTranscriptionScheme('rr'); // true
|
|
263
|
+
* isValidKoreanTranscriptionScheme('mr'); // true
|
|
264
|
+
* isValidKoreanTranscriptionScheme('invalid'); // false
|
|
265
|
+
* ```
|
|
266
|
+
*/
|
|
267
|
+
export declare function isValidKoreanTranscriptionScheme(scheme: string): scheme is KoreanTranscriptionScheme;
|
|
268
|
+
/**
|
|
269
|
+
* Get display name for a Korean transcription scheme
|
|
270
|
+
*
|
|
271
|
+
* @param scheme - Transcription scheme
|
|
272
|
+
* @returns Display name or the scheme itself if not found
|
|
273
|
+
*
|
|
274
|
+
* @example
|
|
275
|
+
* ```typescript
|
|
276
|
+
* getKoreanTranscriptionSchemeName('rr'); // 'Revised Romanization'
|
|
277
|
+
* getKoreanTranscriptionSchemeName('mr'); // 'McCune-Reischauer'
|
|
278
|
+
* ```
|
|
279
|
+
*/
|
|
280
|
+
export declare function getKoreanTranscriptionSchemeName(scheme: string): string;
|
|
281
|
+
//# sourceMappingURL=constants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH;;;;GAIG;AACH,eAAO,MAAM,4BAA4B;IACvC,uEAAuE;;IAEvE,kDAAkD;;IAElD,mDAAmD;;IAEnD,oCAAoC;;IAEpC,sCAAsC;;CAE9B,CAAC;AAEX,MAAM,MAAM,yBAAyB,GAAG,OAAO,4BAA4B,CAAC,MAAM,OAAO,4BAA4B,CAAC,CAAC;AAEvH;;GAEG;AACH,eAAO,MAAM,iCAAiC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAMpE,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,kBAAkB;IAC7B,4DAA4D;;;;;IAE5D,wDAAwD;;;;;IAExD,2DAA2D;;;;;IAE3D,uEAAuE;;;;;CAE/D,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,qBAAqB;IAChC,oEAAoE;;;;;IAEpE,qDAAqD;;;;;IAErD,wDAAwD;;;;;IAExD,iDAAiD;;;;;IAEjD,iDAAiD;;;;;CAEzC,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,YAAY;IACvB,gCAAgC;;IAEhC,4BAA4B;;IAE5B,qDAAqD;;IAErD,yCAAyC;;IAEzC,2CAA2C;;CAEnC,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,oBAAoB;IAC/B,2BAA2B;;IAE3B,uCAAuC;;IAEvC,kBAAkB;;IAElB,4BAA4B;;IAE5B,wBAAwB;;IAExB,+BAA+B;;IAE/B,qCAAqC;;CAE7B,CAAC;AAEX;;;;GAIG;AACH,eAAO,MAAM,oBAAoB;IAC/B,2CAA2C;;IAE3C,8CAA8C;;IAE9C,0CAA0C;;IAE1C,0DAA0D;;CAElD,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,kBAAkB,0GAGrB,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,aAAa,oHAGhB,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,gBAAgB,sJAInB,CAAC;AAEX;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAItD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAKlD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAEvD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE9D;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,UAAO,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAc3G;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,GAAE,MAAW,GAAG,MAAM,GAAG,IAAI,CAehG;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,IAAI,yBAAyB,CAEpG;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,gCAAgC,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAEvE"}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Korean Language Constants and Utilities
|
|
3
|
+
*
|
|
4
|
+
* Korean-specific constants for Hangul script classification, transcription schemes,
|
|
5
|
+
* and language-specific information.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Korean transcription/romanization schemes
|
|
11
|
+
*
|
|
12
|
+
* Common romanization systems for Korean
|
|
13
|
+
*/
|
|
14
|
+
export const KOREAN_TRANSCRIPTION_SCHEMES = {
|
|
15
|
+
/** Revised Romanization (official South Korean standard since 2000) */
|
|
16
|
+
RR: 'rr',
|
|
17
|
+
/** McCune-Reischauer (older academic standard) */
|
|
18
|
+
MCCUNE_REISCHAUER: 'mr',
|
|
19
|
+
/** Yale Romanization (linguistics and academic) */
|
|
20
|
+
YALE: 'yale',
|
|
21
|
+
/** Hangul (native Korean script) */
|
|
22
|
+
HANGUL: 'hangul',
|
|
23
|
+
/** International Phonetic Alphabet */
|
|
24
|
+
IPA: 'ipa',
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Display names for Korean transcription schemes
|
|
28
|
+
*/
|
|
29
|
+
export const KOREAN_TRANSCRIPTION_SCHEME_NAMES = {
|
|
30
|
+
[KOREAN_TRANSCRIPTION_SCHEMES.RR]: 'Revised Romanization',
|
|
31
|
+
[KOREAN_TRANSCRIPTION_SCHEMES.MCCUNE_REISCHAUER]: 'McCune-Reischauer',
|
|
32
|
+
[KOREAN_TRANSCRIPTION_SCHEMES.YALE]: 'Yale',
|
|
33
|
+
[KOREAN_TRANSCRIPTION_SCHEMES.HANGUL]: 'Hangul',
|
|
34
|
+
[KOREAN_TRANSCRIPTION_SCHEMES.IPA]: 'IPA',
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Hangul Jamo (consonants and vowels) Unicode ranges
|
|
38
|
+
*/
|
|
39
|
+
export const HANGUL_JAMO_RANGES = {
|
|
40
|
+
/** Initial consonants (choseong): ㄱ-ㅎ (U+1100 to U+1112) */
|
|
41
|
+
INITIAL: { start: 0x1100, end: 0x1112 },
|
|
42
|
+
/** Medial vowels (jungseong): ㅏ-ㅣ (U+1161 to U+1175) */
|
|
43
|
+
MEDIAL: { start: 0x1161, end: 0x1175 },
|
|
44
|
+
/** Final consonants (jongseong): ㄱ-ㅎ (U+11A8 to U+11C2) */
|
|
45
|
+
FINAL: { start: 0x11A8, end: 0x11C2 },
|
|
46
|
+
/** Compatibility Jamo (separate characters): ㄱ-ㅣ (U+3131 to U+318E) */
|
|
47
|
+
COMPATIBILITY: { start: 0x3131, end: 0x318E },
|
|
48
|
+
};
|
|
49
|
+
/**
|
|
50
|
+
* Korean script Unicode ranges
|
|
51
|
+
*/
|
|
52
|
+
export const KOREAN_UNICODE_RANGES = {
|
|
53
|
+
/** Hangul syllables: 가-힣 (U+AC00 to U+D7AF) - composed syllables */
|
|
54
|
+
SYLLABLES: { start: 0xAC00, end: 0xD7AF },
|
|
55
|
+
/** Hangul Jamo (all components): U+1100 to U+11FF */
|
|
56
|
+
JAMO: { start: 0x1100, end: 0x11FF },
|
|
57
|
+
/** Hangul Compatibility Jamo: ㄱ-ㅣ (U+3130 to U+318F) */
|
|
58
|
+
JAMO_COMPAT: { start: 0x3130, end: 0x318F },
|
|
59
|
+
/** Hangul Jamo Extended-A: (U+A960 to U+A97F) */
|
|
60
|
+
JAMO_EXTENDED_A: { start: 0xA960, end: 0xA97F },
|
|
61
|
+
/** Hangul Jamo Extended-B: (U+D7B0 to U+D7FF) */
|
|
62
|
+
JAMO_EXTENDED_B: { start: 0xD7B0, end: 0xD7FF },
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Regular expressions for Korean script detection
|
|
66
|
+
*/
|
|
67
|
+
export const KOREAN_REGEX = {
|
|
68
|
+
/** Match any Hangul syllable */
|
|
69
|
+
SYLLABLE: /[\uAC00-\uD7AF]/,
|
|
70
|
+
/** Match any Hangul Jamo */
|
|
71
|
+
JAMO: /[\u1100-\u11FF\u3130-\u318F]/,
|
|
72
|
+
/** Match any Korean character (syllables or Jamo) */
|
|
73
|
+
ANY_KOREAN: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/,
|
|
74
|
+
/** Match Korean word (syllables only) */
|
|
75
|
+
WORD: /[\uAC00-\uD7AF]+/,
|
|
76
|
+
/** Match Korean punctuation and symbols */
|
|
77
|
+
PUNCTUATION: /[\u3000-\u303F]/,
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Korean language metadata
|
|
81
|
+
*/
|
|
82
|
+
export const KOREAN_LANGUAGE_INFO = {
|
|
83
|
+
/** BCP-47 language code */
|
|
84
|
+
code: 'ko',
|
|
85
|
+
/** BCP-47 with region (South Korea) */
|
|
86
|
+
codeWithRegion: 'ko-KR',
|
|
87
|
+
/** Script name */
|
|
88
|
+
script: 'Hangul',
|
|
89
|
+
/** ISO 15924 script code */
|
|
90
|
+
scriptCode: 'Hang',
|
|
91
|
+
/** Writing direction */
|
|
92
|
+
direction: 'ltr',
|
|
93
|
+
/** Language name in English */
|
|
94
|
+
nameEn: 'Korean',
|
|
95
|
+
/** Language name in native script */
|
|
96
|
+
nameNative: '한국어',
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Hangul syllable decomposition constants
|
|
100
|
+
*
|
|
101
|
+
* Used for breaking down composed Hangul syllables into Jamo components
|
|
102
|
+
*/
|
|
103
|
+
export const HANGUL_DECOMPOSITION = {
|
|
104
|
+
/** Base code point for Hangul syllables */
|
|
105
|
+
BASE: 0xAC00,
|
|
106
|
+
/** Number of initial consonants (choseong) */
|
|
107
|
+
INITIAL_COUNT: 19,
|
|
108
|
+
/** Number of medial vowels (jungseong) */
|
|
109
|
+
MEDIAL_COUNT: 21,
|
|
110
|
+
/** Number of final consonants (jongseong) + 1 for none */
|
|
111
|
+
FINAL_COUNT: 28,
|
|
112
|
+
};
|
|
113
|
+
/**
|
|
114
|
+
* Initial consonants (choseong) list in order
|
|
115
|
+
*/
|
|
116
|
+
export const INITIAL_CONSONANTS = [
|
|
117
|
+
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ',
|
|
118
|
+
'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
|
|
119
|
+
];
|
|
120
|
+
/**
|
|
121
|
+
* Medial vowels (jungseong) list in order
|
|
122
|
+
*/
|
|
123
|
+
export const MEDIAL_VOWELS = [
|
|
124
|
+
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ',
|
|
125
|
+
'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'
|
|
126
|
+
];
|
|
127
|
+
/**
|
|
128
|
+
* Final consonants (jongseong) list in order (empty string for no final)
|
|
129
|
+
*/
|
|
130
|
+
export const FINAL_CONSONANTS = [
|
|
131
|
+
'', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ',
|
|
132
|
+
'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ',
|
|
133
|
+
'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
|
|
134
|
+
];
|
|
135
|
+
/**
|
|
136
|
+
* Check if a character is a Hangul syllable
|
|
137
|
+
*
|
|
138
|
+
* @param char - Character to check
|
|
139
|
+
* @returns true if the character is a Hangul syllable
|
|
140
|
+
*
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript
|
|
143
|
+
* isHangulSyllable('가'); // true
|
|
144
|
+
* isHangulSyllable('ㄱ'); // false (Jamo)
|
|
145
|
+
* isHangulSyllable('a'); // false
|
|
146
|
+
* ```
|
|
147
|
+
*/
|
|
148
|
+
export function isHangulSyllable(char) {
|
|
149
|
+
if (!char || char.length === 0)
|
|
150
|
+
return false;
|
|
151
|
+
const code = char.charCodeAt(0);
|
|
152
|
+
return code >= KOREAN_UNICODE_RANGES.SYLLABLES.start && code <= KOREAN_UNICODE_RANGES.SYLLABLES.end;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Check if a character is a Hangul Jamo
|
|
156
|
+
*
|
|
157
|
+
* @param char - Character to check
|
|
158
|
+
* @returns true if the character is a Hangul Jamo
|
|
159
|
+
*
|
|
160
|
+
* @example
|
|
161
|
+
* ```typescript
|
|
162
|
+
* isHangulJamo('ㄱ'); // true
|
|
163
|
+
* isHangulJamo('가'); // false (syllable)
|
|
164
|
+
* isHangulJamo('a'); // false
|
|
165
|
+
* ```
|
|
166
|
+
*/
|
|
167
|
+
export function isHangulJamo(char) {
|
|
168
|
+
if (!char || char.length === 0)
|
|
169
|
+
return false;
|
|
170
|
+
const code = char.charCodeAt(0);
|
|
171
|
+
return (code >= KOREAN_UNICODE_RANGES.JAMO.start && code <= KOREAN_UNICODE_RANGES.JAMO.end) ||
|
|
172
|
+
(code >= KOREAN_UNICODE_RANGES.JAMO_COMPAT.start && code <= KOREAN_UNICODE_RANGES.JAMO_COMPAT.end);
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Check if a character is any Korean script
|
|
176
|
+
*
|
|
177
|
+
* @param char - Character to check
|
|
178
|
+
* @returns true if the character is Hangul (syllable or Jamo)
|
|
179
|
+
*
|
|
180
|
+
* @example
|
|
181
|
+
* ```typescript
|
|
182
|
+
* isKoreanCharacter('가'); // true
|
|
183
|
+
* isKoreanCharacter('ㄱ'); // true
|
|
184
|
+
* isKoreanCharacter('a'); // false
|
|
185
|
+
* ```
|
|
186
|
+
*/
|
|
187
|
+
export function isKoreanCharacter(char) {
|
|
188
|
+
return isHangulSyllable(char) || isHangulJamo(char);
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Check if a string contains Korean characters
|
|
192
|
+
*
|
|
193
|
+
* @param text - Text to check
|
|
194
|
+
* @returns true if the text contains at least one Korean character
|
|
195
|
+
*
|
|
196
|
+
* @example
|
|
197
|
+
* ```typescript
|
|
198
|
+
* containsKoreanCharacters('안녕하세요'); // true
|
|
199
|
+
* containsKoreanCharacters('hello'); // false
|
|
200
|
+
* containsKoreanCharacters('hello 안녕'); // true
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
export function containsKoreanCharacters(text) {
|
|
204
|
+
return KOREAN_REGEX.ANY_KOREAN.test(text);
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Check if a string is entirely Korean script
|
|
208
|
+
*
|
|
209
|
+
* @param text - Text to check
|
|
210
|
+
* @param allowSpaces - Whether to allow spaces (default: true)
|
|
211
|
+
* @returns true if the text is entirely Korean (and optionally spaces)
|
|
212
|
+
*
|
|
213
|
+
* @example
|
|
214
|
+
* ```typescript
|
|
215
|
+
* isKoreanText('안녕하세요'); // true
|
|
216
|
+
* isKoreanText('안녕 하세요'); // true
|
|
217
|
+
* isKoreanText('안녕 hello'); // false
|
|
218
|
+
* ```
|
|
219
|
+
*/
|
|
220
|
+
export function isKoreanText(text, allowSpaces = true) {
|
|
221
|
+
if (!text)
|
|
222
|
+
return false;
|
|
223
|
+
const testText = allowSpaces ? text.replace(/\s/g, '') : text;
|
|
224
|
+
if (testText.length === 0)
|
|
225
|
+
return false;
|
|
226
|
+
return Array.from(testText).every(char => isKoreanCharacter(char));
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Decompose a Hangul syllable into its Jamo components
|
|
230
|
+
*
|
|
231
|
+
* @param syllable - Hangul syllable character
|
|
232
|
+
* @returns Object with initial, medial, and final Jamo, or null if not a syllable
|
|
233
|
+
*
|
|
234
|
+
* @example
|
|
235
|
+
* ```typescript
|
|
236
|
+
* decomposeHangul('한');
|
|
237
|
+
* // { initial: 'ㅎ', medial: 'ㅏ', final: 'ㄴ' }
|
|
238
|
+
*
|
|
239
|
+
* decomposeHangul('가');
|
|
240
|
+
* // { initial: 'ㄱ', medial: 'ㅏ', final: '' }
|
|
241
|
+
* ```
|
|
242
|
+
*/
|
|
243
|
+
export function decomposeHangul(syllable) {
|
|
244
|
+
if (!isHangulSyllable(syllable))
|
|
245
|
+
return null;
|
|
246
|
+
const code = syllable.charCodeAt(0) - HANGUL_DECOMPOSITION.BASE;
|
|
247
|
+
const finalIndex = code % HANGUL_DECOMPOSITION.FINAL_COUNT;
|
|
248
|
+
const medialIndex = Math.floor(code / HANGUL_DECOMPOSITION.FINAL_COUNT) % HANGUL_DECOMPOSITION.MEDIAL_COUNT;
|
|
249
|
+
const initialIndex = Math.floor(code / (HANGUL_DECOMPOSITION.FINAL_COUNT * HANGUL_DECOMPOSITION.MEDIAL_COUNT));
|
|
250
|
+
return {
|
|
251
|
+
initial: INITIAL_CONSONANTS[initialIndex],
|
|
252
|
+
medial: MEDIAL_VOWELS[medialIndex],
|
|
253
|
+
final: FINAL_CONSONANTS[finalIndex],
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Compose Jamo components into a Hangul syllable
|
|
258
|
+
*
|
|
259
|
+
* @param initial - Initial consonant (choseong)
|
|
260
|
+
* @param medial - Medial vowel (jungseong)
|
|
261
|
+
* @param final - Final consonant (jongseong), optional
|
|
262
|
+
* @returns Composed Hangul syllable or null if invalid
|
|
263
|
+
*
|
|
264
|
+
* @example
|
|
265
|
+
* ```typescript
|
|
266
|
+
* composeHangul('ㅎ', 'ㅏ', 'ㄴ'); // '한'
|
|
267
|
+
* composeHangul('ㄱ', 'ㅏ', ''); // '가'
|
|
268
|
+
* composeHangul('ㄱ', 'ㅏ'); // '가'
|
|
269
|
+
* ```
|
|
270
|
+
*/
|
|
271
|
+
export function composeHangul(initial, medial, final = '') {
|
|
272
|
+
const initialIndex = INITIAL_CONSONANTS.indexOf(initial);
|
|
273
|
+
const medialIndex = MEDIAL_VOWELS.indexOf(medial);
|
|
274
|
+
const finalIndex = FINAL_CONSONANTS.indexOf(final);
|
|
275
|
+
if (initialIndex === -1 || medialIndex === -1 || finalIndex === -1) {
|
|
276
|
+
return null;
|
|
277
|
+
}
|
|
278
|
+
const code = HANGUL_DECOMPOSITION.BASE +
|
|
279
|
+
(initialIndex * HANGUL_DECOMPOSITION.MEDIAL_COUNT * HANGUL_DECOMPOSITION.FINAL_COUNT) +
|
|
280
|
+
(medialIndex * HANGUL_DECOMPOSITION.FINAL_COUNT) +
|
|
281
|
+
finalIndex;
|
|
282
|
+
return String.fromCharCode(code);
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Validate a Korean transcription scheme
|
|
286
|
+
*
|
|
287
|
+
* @param scheme - Scheme to validate
|
|
288
|
+
* @returns true if the scheme is a valid Korean transcription scheme
|
|
289
|
+
*
|
|
290
|
+
* @example
|
|
291
|
+
* ```typescript
|
|
292
|
+
* isValidKoreanTranscriptionScheme('rr'); // true
|
|
293
|
+
* isValidKoreanTranscriptionScheme('mr'); // true
|
|
294
|
+
* isValidKoreanTranscriptionScheme('invalid'); // false
|
|
295
|
+
* ```
|
|
296
|
+
*/
|
|
297
|
+
export function isValidKoreanTranscriptionScheme(scheme) {
|
|
298
|
+
return Object.values(KOREAN_TRANSCRIPTION_SCHEMES).includes(scheme);
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Get display name for a Korean transcription scheme
|
|
302
|
+
*
|
|
303
|
+
* @param scheme - Transcription scheme
|
|
304
|
+
* @returns Display name or the scheme itself if not found
|
|
305
|
+
*
|
|
306
|
+
* @example
|
|
307
|
+
* ```typescript
|
|
308
|
+
* getKoreanTranscriptionSchemeName('rr'); // 'Revised Romanization'
|
|
309
|
+
* getKoreanTranscriptionSchemeName('mr'); // 'McCune-Reischauer'
|
|
310
|
+
* ```
|
|
311
|
+
*/
|
|
312
|
+
export function getKoreanTranscriptionSchemeName(scheme) {
|
|
313
|
+
return KOREAN_TRANSCRIPTION_SCHEME_NAMES[scheme] ?? scheme;
|
|
314
|
+
}
|
|
315
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH;;;;GAIG;AACH,MAAM,CAAC,MAAM,4BAA4B,GAAG;IAC1C,uEAAuE;IACvE,EAAE,EAAE,IAAI;IACR,kDAAkD;IAClD,iBAAiB,EAAE,IAAI;IACvB,mDAAmD;IACnD,IAAI,EAAE,MAAM;IACZ,oCAAoC;IACpC,MAAM,EAAE,QAAQ;IAChB,sCAAsC;IACtC,GAAG,EAAE,KAAK;CACF,CAAC;AAIX;;GAEG;AACH,MAAM,CAAC,MAAM,iCAAiC,GAA2B;IACvE,CAAC,4BAA4B,CAAC,EAAE,CAAC,EAAE,sBAAsB;IACzD,CAAC,4BAA4B,CAAC,iBAAiB,CAAC,EAAE,mBAAmB;IACrE,CAAC,4BAA4B,CAAC,IAAI,CAAC,EAAE,MAAM;IAC3C,CAAC,4BAA4B,CAAC,MAAM,CAAC,EAAE,QAAQ;IAC/C,CAAC,4BAA4B,CAAC,GAAG,CAAC,EAAE,KAAK;CAC1C,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG;IAChC,4DAA4D;IAC5D,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IACvC,wDAAwD;IACxD,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IACtC,2DAA2D;IAC3D,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IACrC,uEAAuE;IACvE,aAAa,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;CACrC,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACnC,oEAAoE;IACpE,SAAS,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IACzC,qDAAqD;IACrD,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IACpC,wDAAwD;IACxD,WAAW,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IAC3C,iDAAiD;IACjD,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;IAC/C,iDAAiD;IACjD,eAAe,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE;CACvC,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG;IAC1B,gCAAgC;IAChC,QAAQ,EAAE,iBAAiB;IAC3B,4BAA4B;IAC5B,IAAI,EAAE,8BAA8B;IACpC,qDAAqD;IACrD,UAAU,EAAE,2CAA2C;IACvD,yCAAyC;IACzC,IAAI,EAAE,kBAAkB;IACxB,2CAA2C;IAC3C,WAAW,EAAE,iBAAiB;CACtB,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,2BAA2B;IAC3B,IAAI,EAAE,IAAI;IACV,uCAAuC;IACvC,cAAc,EAAE,OAAO;IACvB,kBAAkB;IAClB,MAAM,EAAE,QAAQ;IAChB,4BAA4B;IAC5B,UAAU,EAAE,MAAM;IAClB,wBAAwB;IACxB,SAAS,EAAE,KAAc;IACzB,+BAA+B;IAC/B,MAAM,EAAE,QAAQ;IAChB,qCAAqC;IACrC,UAAU,EAAE,KAAK;CACT,CAAC;AAEX;;;;GAIG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,2CAA2C;IAC3C,IAAI,EAAE,MAAM;IACZ,8CAA8C;IAC9C,aAAa,EAAE,EAAE;IACjB,0CAA0C;IAC1C,YAAY,EAAE,EAAE;IAChB,0DAA0D;IAC1D,WAAW,EAAE,EAAE;CACP,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG;IAChC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CACnC,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG;IAC3B,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CAC7C,CAAC;AAEX;;GAEG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAC/C,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAChD,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;CAC9B,CAAC;AAEX;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IAChC,OAAO,IAAI,IAAI,qBAAqB,CAAC,SAAS,CAAC,KAAK,IAAI,IAAI,IAAI,qBAAqB,CAAC,SAAS,CAAC,GAAG,CAAC;AACtG,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IAChC,OAAO,CAAC,IAAI,IAAI,qBAAqB,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,IAAI,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC;QACpF,CAAC,IAAI,IAAI,qBAAqB,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI,IAAI,qBAAqB,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;AAC5G,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,gBAAgB,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,wBAAwB,CAAC,IAAY;IACnD,OAAO,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,WAAW,GAAG,IAAI;IAC3D,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAC;IACxB,MAAM,QAAQ,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;AACrE,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC9C,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7C,MAAM,IAAI,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,oBAAoB,CAAC,IAAI,CAAC;IAEhE,MAAM,UAAU,GAAG,IAAI,GAAG,oBAAoB,CAAC,WAAW,CAAC;IAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,oBAAoB,CAAC,WAAW,CAAC,GAAG,oBAAoB,CAAC,YAAY,CAAC;IAC5G,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,oBAAoB,CAAC,WAAW,GAAG,oBAAoB,CAAC,YAAY,CAAC,CAAC,CAAC;IAE/G,OAAO;QACL,OAAO,EAAE,kBAAkB,CAAC,YAAY,CAAC;QACzC,MAAM,EAAE,aAAa,CAAC,WAAW,CAAC;QAClC,KAAK,EAAE,gBAAgB,CAAC,UAAU,CAAC;KACpC,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,MAAc,EAAE,QAAgB,EAAE;IAC/E,MAAM,YAAY,GAAG,kBAAkB,CAAC,OAAO,CAAC,OAAc,CAAC,CAAC;IAChE,MAAM,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,MAAa,CAAC,CAAC;IACzD,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAY,CAAC,CAAC;IAE1D,IAAI,YAAY,KAAK,CAAC,CAAC,IAAI,WAAW,KAAK,CAAC,CAAC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACnE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,oBAAoB,CAAC,IAAI;QACpC,CAAC,YAAY,GAAG,oBAAoB,CAAC,YAAY,GAAG,oBAAoB,CAAC,WAAW,CAAC;QACrF,CAAC,WAAW,GAAG,oBAAoB,CAAC,WAAW,CAAC;QAChD,UAAU,CAAC;IAEb,OAAO,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;AACnC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gCAAgC,CAAC,MAAc;IAC7D,OAAO,MAAM,CAAC,MAAM,CAAC,4BAA4B,CAAC,CAAC,QAAQ,CAAC,MAAmC,CAAC,CAAC;AACnG,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,gCAAgC,CAAC,MAAc;IAC7D,OAAO,iCAAiC,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Korean Language Helper Functions
|
|
3
|
+
*
|
|
4
|
+
* Convenience functions for creating Korean GLOST nodes with romanization.
|
|
5
|
+
*/
|
|
6
|
+
import type { GLOSTWord } from 'glost';
|
|
7
|
+
/**
|
|
8
|
+
* Options for creating a Korean word node
|
|
9
|
+
*/
|
|
10
|
+
export interface CreateKoreanWordOptions {
|
|
11
|
+
/** Korean text (Hangul) */
|
|
12
|
+
text: string;
|
|
13
|
+
/** Romanization (RR: Revised Romanization) - optional */
|
|
14
|
+
romanization?: string;
|
|
15
|
+
/** Part of speech (default: "unknown") */
|
|
16
|
+
partOfSpeech?: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Create a Korean word node with optional romanization
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* // With romanization
|
|
24
|
+
* const word = createKoreanWord({
|
|
25
|
+
* text: "안녕하세요",
|
|
26
|
+
* romanization: "annyeonghaseyo",
|
|
27
|
+
* partOfSpeech: "interjection"
|
|
28
|
+
* });
|
|
29
|
+
*
|
|
30
|
+
* // Without romanization (to be added by extensions)
|
|
31
|
+
* const word = createKoreanWord({ text: "안녕하세요" });
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare function createKoreanWord(options: CreateKoreanWordOptions): GLOSTWord;
|
|
35
|
+
//# sourceMappingURL=helpers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../src/helpers.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,SAAS,EAA2C,MAAM,OAAO,CAAC;AAGhF;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,2BAA2B;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,yDAAyD;IACzD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,0CAA0C;IAC1C,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,uBAAuB,GAC/B,SAAS,CAsBX"}
|
package/dist/helpers.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Korean Language Helper Functions
|
|
3
|
+
*
|
|
4
|
+
* Convenience functions for creating Korean GLOST nodes with romanization.
|
|
5
|
+
*/
|
|
6
|
+
import { createGLOSTWordNode } from 'glost';
|
|
7
|
+
/**
|
|
8
|
+
* Create a Korean word node with optional romanization
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // With romanization
|
|
13
|
+
* const word = createKoreanWord({
|
|
14
|
+
* text: "안녕하세요",
|
|
15
|
+
* romanization: "annyeonghaseyo",
|
|
16
|
+
* partOfSpeech: "interjection"
|
|
17
|
+
* });
|
|
18
|
+
*
|
|
19
|
+
* // Without romanization (to be added by extensions)
|
|
20
|
+
* const word = createKoreanWord({ text: "안녕하세요" });
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export function createKoreanWord(options) {
|
|
24
|
+
const { text, romanization, partOfSpeech = "unknown" } = options;
|
|
25
|
+
// Only create transcription if romanization is provided
|
|
26
|
+
const transcription = romanization ? {
|
|
27
|
+
romanization: {
|
|
28
|
+
text: romanization,
|
|
29
|
+
syllables: [text],
|
|
30
|
+
},
|
|
31
|
+
} : undefined;
|
|
32
|
+
const metadata = {
|
|
33
|
+
partOfSpeech,
|
|
34
|
+
};
|
|
35
|
+
return createGLOSTWordNode({
|
|
36
|
+
value: text,
|
|
37
|
+
transcription,
|
|
38
|
+
metadata,
|
|
39
|
+
lang: "ko",
|
|
40
|
+
script: "hangul",
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=helpers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../src/helpers.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,mBAAmB,EAAE,MAAM,OAAO,CAAC;AAc5C;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,gBAAgB,CAC9B,OAAgC;IAEhC,MAAM,EAAE,IAAI,EAAE,YAAY,EAAE,YAAY,GAAG,SAAS,EAAE,GAAG,OAAO,CAAC;IAEjE,wDAAwD;IACxD,MAAM,aAAa,GAAoC,YAAY,CAAC,CAAC,CAAC;QACpE,YAAY,EAAE;YACZ,IAAI,EAAE,YAAY;YAClB,SAAS,EAAE,CAAC,IAAI,CAAC;SAClB;KACF,CAAC,CAAC,CAAC,SAAS,CAAC;IAEd,MAAM,QAAQ,GAAuB;QACnC,YAAY;KACb,CAAC;IAEF,OAAO,mBAAmB,CAAC;QACzB,KAAK,EAAE,IAAI;QACX,aAAa;QACb,QAAQ;QACR,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,QAAQ;KACjB,CAAC,CAAC;AACL,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* glost-ko - Korean Language Support for GLOST
|
|
3
|
+
*
|
|
4
|
+
* Provides Korean-specific helper functions, constants, and utilities.
|
|
5
|
+
*
|
|
6
|
+
* @packageDocumentation
|
|
7
|
+
*/
|
|
8
|
+
export * from './constants.js';
|
|
9
|
+
export * from './helpers.js';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,cAAc,gBAAgB,CAAC;AAC/B,cAAc,cAAc,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* glost-ko - Korean Language Support for GLOST
|
|
3
|
+
*
|
|
4
|
+
* Provides Korean-specific helper functions, constants, and utilities.
|
|
5
|
+
*
|
|
6
|
+
* @packageDocumentation
|
|
7
|
+
*/
|
|
8
|
+
export * from './constants.js';
|
|
9
|
+
export * from './helpers.js';
|
|
10
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,cAAc,gBAAgB,CAAC;AAC/B,cAAc,cAAc,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "glost-ko",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Korean language support for GLOST - helper functions",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"glost",
|
|
8
|
+
"korean",
|
|
9
|
+
"language",
|
|
10
|
+
"romanization",
|
|
11
|
+
"nlp"
|
|
12
|
+
],
|
|
13
|
+
"author": "fustilio",
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "https://github.com/fustilio/glost.git",
|
|
18
|
+
"directory": "packages/languages/ko"
|
|
19
|
+
},
|
|
20
|
+
"main": "./dist/index.js",
|
|
21
|
+
"types": "./dist/index.d.ts",
|
|
22
|
+
"exports": {
|
|
23
|
+
".": {
|
|
24
|
+
"types": "./dist/index.d.ts",
|
|
25
|
+
"default": "./dist/index.js"
|
|
26
|
+
},
|
|
27
|
+
"./constants": {
|
|
28
|
+
"types": "./dist/constants.d.ts",
|
|
29
|
+
"default": "./dist/constants.js"
|
|
30
|
+
},
|
|
31
|
+
"./helpers": {
|
|
32
|
+
"types": "./dist/helpers.d.ts",
|
|
33
|
+
"default": "./dist/helpers.js"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"files": [
|
|
37
|
+
"dist",
|
|
38
|
+
"README.md"
|
|
39
|
+
],
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"glost": "0.5.0",
|
|
42
|
+
"glost-common": "0.2.0"
|
|
43
|
+
},
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"@types/node": "^20.0.0",
|
|
46
|
+
"typescript": "^5.8.3",
|
|
47
|
+
"vitest": "^3.0.5"
|
|
48
|
+
},
|
|
49
|
+
"publishConfig": {
|
|
50
|
+
"access": "public"
|
|
51
|
+
},
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "tsc",
|
|
54
|
+
"clean": "rm -rf dist",
|
|
55
|
+
"test": "vitest --passWithNoTests"
|
|
56
|
+
}
|
|
57
|
+
}
|