mecab-ko-wasm 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +315 -0
- package/index.d.ts +90 -0
- package/index.js +19 -0
- package/mecab_ko_wasm.d.ts +148 -0
- package/mecab_ko_wasm.js +9 -0
- package/mecab_ko_wasm_bg.js +548 -0
- package/mecab_ko_wasm_bg.wasm +0 -0
- package/package.json +62 -0
package/README.md
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# mecab-ko-wasm
|
|
2
|
+
|
|
3
|
+
WebAssembly bindings for MeCab-Ko, a Korean morphological analyzer.
|
|
4
|
+
|
|
5
|
+
This package enables Korean morphological analysis in web browsers and Node.js environments through WebAssembly.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Fast**: Compiled to WebAssembly for near-native performance
|
|
10
|
+
- **Lightweight**: No external dependencies required in the browser
|
|
11
|
+
- **Cross-platform**: Works in both browser and Node.js environments
|
|
12
|
+
- **Type-safe**: Full TypeScript type definitions included
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
### Using npm
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install mecab-ko-wasm
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Using yarn
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
yarn add mecab-ko-wasm
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
### Browser (ES Modules)
|
|
31
|
+
|
|
32
|
+
```javascript
|
|
33
|
+
import init, { Mecab } from 'mecab-ko-wasm';
|
|
34
|
+
|
|
35
|
+
async function analyze() {
|
|
36
|
+
// Initialize the WASM module
|
|
37
|
+
await init();
|
|
38
|
+
|
|
39
|
+
// Create a Mecab instance
|
|
40
|
+
const mecab = new Mecab();
|
|
41
|
+
|
|
42
|
+
// Extract morphemes
|
|
43
|
+
const morphs = mecab.morphs("안녕하세요");
|
|
44
|
+
console.log(morphs); // ["안녕", "하", "세요"]
|
|
45
|
+
|
|
46
|
+
// Get part-of-speech tags
|
|
47
|
+
const posJson = mecab.pos("형태소 분석");
|
|
48
|
+
const pos = JSON.parse(posJson);
|
|
49
|
+
console.log(pos); // [["형태소", "NNG"], ["분석", "NNG"]]
|
|
50
|
+
|
|
51
|
+
// Get detailed token information
|
|
52
|
+
const tokens = mecab.tokenize("한국어 분석기");
|
|
53
|
+
tokens.forEach(token => {
|
|
54
|
+
console.log(`${token.surface}: ${token.pos}`);
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
analyze();
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Node.js
|
|
62
|
+
|
|
63
|
+
```javascript
|
|
64
|
+
const { Mecab } = require('mecab-ko-wasm');
|
|
65
|
+
|
|
66
|
+
const mecab = new Mecab();
|
|
67
|
+
|
|
68
|
+
// Extract morphemes
|
|
69
|
+
const morphs = mecab.morphs("안녕하세요");
|
|
70
|
+
console.log(morphs); // ["안녕", "하", "세요"]
|
|
71
|
+
|
|
72
|
+
// Extract nouns
|
|
73
|
+
const nouns = mecab.nouns("형태소 분석기입니다");
|
|
74
|
+
console.log(nouns); // ["형태소", "분석기"]
|
|
75
|
+
|
|
76
|
+
// Wakati tokenization
|
|
77
|
+
const words = mecab.wakati("한국어 처리");
|
|
78
|
+
console.log(words); // ["한국어", "처리"]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### TypeScript
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import init, { Mecab, WasmToken } from 'mecab-ko-wasm';
|
|
85
|
+
|
|
86
|
+
async function analyze(text: string): Promise<void> {
|
|
87
|
+
await init();
|
|
88
|
+
|
|
89
|
+
const mecab = new Mecab();
|
|
90
|
+
|
|
91
|
+
// Tokenize with full information
|
|
92
|
+
const tokens: WasmToken[] = mecab.tokenize(text);
|
|
93
|
+
tokens.forEach((token: WasmToken) => {
|
|
94
|
+
console.log({
|
|
95
|
+
surface: token.surface,
|
|
96
|
+
pos: token.pos,
|
|
97
|
+
start: token.start,
|
|
98
|
+
end: token.end,
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// Extract morphemes
|
|
103
|
+
const morphs: string[] = mecab.morphs(text);
|
|
104
|
+
console.log('Morphemes:', morphs);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
analyze("한국어 형태소 분석");
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## API Reference
|
|
111
|
+
|
|
112
|
+
### `Mecab`
|
|
113
|
+
|
|
114
|
+
The main class for Korean morphological analysis.
|
|
115
|
+
|
|
116
|
+
#### Constructor
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
new Mecab(): Mecab
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Creates a new Mecab instance with the default dictionary.
|
|
123
|
+
|
|
124
|
+
**Throws**: Error if initialization fails
|
|
125
|
+
|
|
126
|
+
#### Methods
|
|
127
|
+
|
|
128
|
+
##### `tokenize(text: string): WasmToken[]`
|
|
129
|
+
|
|
130
|
+
Tokenizes the input text and returns detailed token information.
|
|
131
|
+
|
|
132
|
+
**Parameters:**
|
|
133
|
+
- `text`: Input text to analyze
|
|
134
|
+
|
|
135
|
+
**Returns**: Array of `WasmToken` objects containing surface form, POS tag, and position information
|
|
136
|
+
|
|
137
|
+
**Example:**
|
|
138
|
+
```javascript
|
|
139
|
+
const tokens = mecab.tokenize("안녕하세요");
|
|
140
|
+
// [
|
|
141
|
+
// { surface: "안녕", pos: "NNG", start: 0, end: 6, ... },
|
|
142
|
+
// { surface: "하", pos: "XSV", start: 6, end: 9, ... },
|
|
143
|
+
// ...
|
|
144
|
+
// ]
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
##### `morphs(text: string): string[]`
|
|
148
|
+
|
|
149
|
+
Extracts morphemes from the input text.
|
|
150
|
+
|
|
151
|
+
**Parameters:**
|
|
152
|
+
- `text`: Input text to analyze
|
|
153
|
+
|
|
154
|
+
**Returns**: Array of morpheme strings
|
|
155
|
+
|
|
156
|
+
**Example:**
|
|
157
|
+
```javascript
|
|
158
|
+
const morphs = mecab.morphs("안녕하세요");
|
|
159
|
+
// ["안녕", "하", "세요"]
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
##### `pos(text: string): string`
|
|
163
|
+
|
|
164
|
+
Extracts part-of-speech tagged pairs as a JSON string.
|
|
165
|
+
|
|
166
|
+
**Parameters:**
|
|
167
|
+
- `text`: Input text to analyze
|
|
168
|
+
|
|
169
|
+
**Returns**: JSON string containing an array of `[surface, pos]` pairs
|
|
170
|
+
|
|
171
|
+
**Example:**
|
|
172
|
+
```javascript
|
|
173
|
+
const posJson = mecab.pos("안녕하세요");
|
|
174
|
+
const pos = JSON.parse(posJson);
|
|
175
|
+
// [["안녕", "NNG"], ["하", "XSV"], ["세요", "EP+EF"]]
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
##### `nouns(text: string): string[]`
|
|
179
|
+
|
|
180
|
+
Extracts only nouns from the input text.
|
|
181
|
+
|
|
182
|
+
**Parameters:**
|
|
183
|
+
- `text`: Input text to analyze
|
|
184
|
+
|
|
185
|
+
**Returns**: Array of noun strings
|
|
186
|
+
|
|
187
|
+
**Example:**
|
|
188
|
+
```javascript
|
|
189
|
+
const nouns = mecab.nouns("형태소 분석기입니다");
|
|
190
|
+
// ["형태소", "분석기"]
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
##### `wakati(text: string): string[]`
|
|
194
|
+
|
|
195
|
+
Performs wakati (space-separated) tokenization.
|
|
196
|
+
|
|
197
|
+
**Parameters:**
|
|
198
|
+
- `text`: Input text to analyze
|
|
199
|
+
|
|
200
|
+
**Returns**: Array of morpheme strings
|
|
201
|
+
|
|
202
|
+
**Example:**
|
|
203
|
+
```javascript
|
|
204
|
+
const words = mecab.wakati("형태소 분석");
|
|
205
|
+
// ["형태소", "분석"]
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### `WasmToken`
|
|
209
|
+
|
|
210
|
+
Represents a single token with detailed morphological information.
|
|
211
|
+
|
|
212
|
+
#### Properties
|
|
213
|
+
|
|
214
|
+
- `surface: string` - The surface form (표면형) of the token
|
|
215
|
+
- `pos: string` - Part-of-speech tag (품사 태그)
|
|
216
|
+
- `start: number` - Start position in bytes
|
|
217
|
+
- `end: number` - End position in bytes
|
|
218
|
+
- `reading: string | undefined` - Reading of the token (if available)
|
|
219
|
+
- `lemma: string | undefined` - Base form/lemma (if available)
|
|
220
|
+
|
|
221
|
+
#### Methods
|
|
222
|
+
|
|
223
|
+
##### `toJSON(): string`
|
|
224
|
+
|
|
225
|
+
Converts the token to a JSON string.
|
|
226
|
+
|
|
227
|
+
**Returns**: JSON string representation of the token
|
|
228
|
+
|
|
229
|
+
## Building from Source
|
|
230
|
+
|
|
231
|
+
### Prerequisites
|
|
232
|
+
|
|
233
|
+
- Rust (1.75+)
|
|
234
|
+
- wasm-pack
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
cargo install wasm-pack
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Build
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# Build for browser
|
|
244
|
+
wasm-pack build --target web
|
|
245
|
+
|
|
246
|
+
# Build for Node.js
|
|
247
|
+
wasm-pack build --target nodejs
|
|
248
|
+
|
|
249
|
+
# Build for bundlers (webpack, etc.)
|
|
250
|
+
wasm-pack build --target bundler
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Development
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
# Run tests
|
|
257
|
+
wasm-pack test --node
|
|
258
|
+
|
|
259
|
+
# Run tests in browser (requires Chrome/Firefox)
|
|
260
|
+
wasm-pack test --headless --firefox
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Part-of-Speech Tags
|
|
264
|
+
|
|
265
|
+
MeCab-Ko uses the Sejong corpus POS tag set. Common tags include:
|
|
266
|
+
|
|
267
|
+
- `NNG`: General noun (일반 명사)
|
|
268
|
+
- `NNP`: Proper noun (고유 명사)
|
|
269
|
+
- `VV`: Verb (동사)
|
|
270
|
+
- `VA`: Adjective (형용사)
|
|
271
|
+
- `MAG`: General adverb (일반 부사)
|
|
272
|
+
- `JKS`: Subjective case particle (주격 조사)
|
|
273
|
+
- `JKO`: Objective case particle (목적격 조사)
|
|
274
|
+
- `EP`: Pre-final ending (선어말 어미)
|
|
275
|
+
- `EF`: Final ending (어말 어미)
|
|
276
|
+
|
|
277
|
+
For a complete list, see [Sejong POS Tags](https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY).
|
|
278
|
+
|
|
279
|
+
## Performance
|
|
280
|
+
|
|
281
|
+
MeCab-Ko WASM provides near-native performance through WebAssembly compilation:
|
|
282
|
+
|
|
283
|
+
- **Tokenization**: ~1-2ms for typical sentences (10-20 words)
|
|
284
|
+
- **Memory**: ~2-5MB WASM module size (with dictionary)
|
|
285
|
+
- **Initialization**: ~10-50ms first load (cached afterwards)
|
|
286
|
+
|
|
287
|
+
## Browser Compatibility
|
|
288
|
+
|
|
289
|
+
- Chrome/Edge 57+
|
|
290
|
+
- Firefox 52+
|
|
291
|
+
- Safari 11+
|
|
292
|
+
- Node.js 12+
|
|
293
|
+
|
|
294
|
+
## License
|
|
295
|
+
|
|
296
|
+
Licensed under either of:
|
|
297
|
+
|
|
298
|
+
- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
|
|
299
|
+
- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
|
|
300
|
+
|
|
301
|
+
at your option.
|
|
302
|
+
|
|
303
|
+
## Contributing
|
|
304
|
+
|
|
305
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for guidelines.
|
|
306
|
+
|
|
307
|
+
## Related Projects
|
|
308
|
+
|
|
309
|
+
- [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko) - Original C++ implementation
|
|
310
|
+
- [mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic) - Korean dictionary for MeCab
|
|
311
|
+
- [konlpy](https://konlpy.org/) - Python Korean NLP library
|
|
312
|
+
|
|
313
|
+
## Acknowledgments
|
|
314
|
+
|
|
315
|
+
This project is based on MeCab-Ko, originally developed by the Eunjeon project.
|
package/index.d.ts
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MeCab-Ko WebAssembly TypeScript Definitions
|
|
3
|
+
* Korean morphological analyzer for browser and Node.js
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* A token representing a morpheme in Korean text
|
|
8
|
+
*/
|
|
9
|
+
export interface Token {
|
|
10
|
+
/** The surface form (original text) */
|
|
11
|
+
readonly surface: string;
|
|
12
|
+
/** Part-of-speech tag */
|
|
13
|
+
readonly pos: string;
|
|
14
|
+
/** Start byte offset in the input text */
|
|
15
|
+
readonly start: number;
|
|
16
|
+
/** End byte offset in the input text */
|
|
17
|
+
readonly end: number;
|
|
18
|
+
/** Reading form (if available) */
|
|
19
|
+
readonly reading?: string;
|
|
20
|
+
/** Lemma/dictionary form (if available) */
|
|
21
|
+
readonly lemma?: string;
|
|
22
|
+
/** Convert token to JSON string */
|
|
23
|
+
toJSON(): string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* MeCab-Ko tokenizer class
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* import init, { Mecab } from 'mecab-ko-wasm';
|
|
32
|
+
*
|
|
33
|
+
* await init();
|
|
34
|
+
* const mecab = new Mecab();
|
|
35
|
+
*
|
|
36
|
+
* const tokens = mecab.tokenize('안녕하세요');
|
|
37
|
+
* console.log(tokens);
|
|
38
|
+
*
|
|
39
|
+
* const morphs = mecab.morphs('형태소 분석');
|
|
40
|
+
* console.log(morphs); // ['형태소', '분석']
|
|
41
|
+
*
|
|
42
|
+
* const nouns = mecab.nouns('한국어 형태소 분석기');
|
|
43
|
+
* console.log(nouns); // ['한국어', '형태소', '분석기']
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
export class Mecab {
|
|
47
|
+
/** Create a new MeCab tokenizer instance */
|
|
48
|
+
constructor();
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Tokenize Korean text into morphemes
|
|
52
|
+
* @param text - Input Korean text
|
|
53
|
+
* @returns Array of Token objects
|
|
54
|
+
*/
|
|
55
|
+
tokenize(text: string): Token[];
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Extract morpheme surface forms
|
|
59
|
+
* @param text - Input Korean text
|
|
60
|
+
* @returns Array of surface form strings
|
|
61
|
+
*/
|
|
62
|
+
morphs(text: string): string[];
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Extract part-of-speech tagged pairs
|
|
66
|
+
* @param text - Input Korean text
|
|
67
|
+
* @returns Array of [surface, pos] tuples
|
|
68
|
+
*/
|
|
69
|
+
pos(text: string): [string, string][];
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Extract only nouns from text
|
|
73
|
+
* @param text - Input Korean text
|
|
74
|
+
* @returns Array of noun strings
|
|
75
|
+
*/
|
|
76
|
+
nouns(text: string): string[];
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Split text into space-separated morphemes (wakati mode)
|
|
80
|
+
* @param text - Input Korean text
|
|
81
|
+
* @returns Array of morpheme strings
|
|
82
|
+
*/
|
|
83
|
+
wakati(text: string): string[];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Initialize the WebAssembly module
|
|
88
|
+
* Must be called before creating Mecab instances
|
|
89
|
+
*/
|
|
90
|
+
export default function init(): Promise<void>;
|
package/index.js
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import init, * as wasm from './mecab_ko_wasm.js';
|
|
2
|
+
|
|
3
|
+
let initialized = false;
|
|
4
|
+
|
|
5
|
+
export async function initialize() {
|
|
6
|
+
if (!initialized) {
|
|
7
|
+
await init();
|
|
8
|
+
initialized = true;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const tokenize = wasm.tokenize;
|
|
13
|
+
export const morphs = wasm.morphs;
|
|
14
|
+
export const nouns = wasm.nouns;
|
|
15
|
+
export const pos = wasm.pos;
|
|
16
|
+
export const wakati = wasm.wakati;
|
|
17
|
+
export const getVersion = wasm.getVersion;
|
|
18
|
+
|
|
19
|
+
export default { initialize, tokenize, morphs, nouns, pos, wakati, getVersion };
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* The main MeCab-Ko tokenizer for WebAssembly
|
|
6
|
+
*
|
|
7
|
+
* This class provides Korean morphological analysis capabilities
|
|
8
|
+
* in JavaScript/TypeScript environments.
|
|
9
|
+
*/
|
|
10
|
+
export class Mecab {
|
|
11
|
+
free(): void;
|
|
12
|
+
[Symbol.dispose](): void;
|
|
13
|
+
/**
|
|
14
|
+
* Extract morphemes (형태소) from text
|
|
15
|
+
*
|
|
16
|
+
* Returns an array of morpheme strings without POS information.
|
|
17
|
+
*
|
|
18
|
+
* # Example (JavaScript)
|
|
19
|
+
*
|
|
20
|
+
* ```javascript
|
|
21
|
+
* const morphs = mecab.morphs("안녕하세요");
|
|
22
|
+
* console.log(morphs); // ["안녕", "하", "세요"]
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
morphs(text: string): string[];
|
|
26
|
+
/**
|
|
27
|
+
* Create a new Mecab instance with the default dictionary
|
|
28
|
+
*
|
|
29
|
+
* # Example (JavaScript)
|
|
30
|
+
*
|
|
31
|
+
* ```javascript
|
|
32
|
+
* const mecab = new Mecab();
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* # Errors
|
|
36
|
+
*
|
|
37
|
+
* Returns an error if tokenizer initialization fails
|
|
38
|
+
*/
|
|
39
|
+
constructor();
|
|
40
|
+
/**
|
|
41
|
+
* Extract nouns (명사) from text
|
|
42
|
+
*
|
|
43
|
+
* Returns an array of noun strings.
|
|
44
|
+
*
|
|
45
|
+
* # Example (JavaScript)
|
|
46
|
+
*
|
|
47
|
+
* ```javascript
|
|
48
|
+
* const nouns = mecab.nouns("형태소 분석기입니다");
|
|
49
|
+
* console.log(nouns); // ["형태소", "분석기"]
|
|
50
|
+
* ```
|
|
51
|
+
*/
|
|
52
|
+
nouns(text: string): string[];
|
|
53
|
+
/**
|
|
54
|
+
* Extract part-of-speech tagged pairs
|
|
55
|
+
*
|
|
56
|
+
* Returns a JSON string containing an array of [surface, pos] pairs.
|
|
57
|
+
*
|
|
58
|
+
* # Example (JavaScript)
|
|
59
|
+
*
|
|
60
|
+
* ```javascript
|
|
61
|
+
* const posJson = mecab.pos("안녕하세요");
|
|
62
|
+
* const pos = JSON.parse(posJson);
|
|
63
|
+
* console.log(pos); // [["안녕", "NNG"], ["하", "XSV"], ["세요", "EP+EF"]]
|
|
64
|
+
* ```
|
|
65
|
+
*
|
|
66
|
+
* # Errors
|
|
67
|
+
*
|
|
68
|
+
* Returns an error if JSON serialization fails
|
|
69
|
+
*/
|
|
70
|
+
pos(text: string): string;
|
|
71
|
+
/**
|
|
72
|
+
* Tokenize text and return detailed token information
|
|
73
|
+
*
|
|
74
|
+
* Returns an array of tokens with surface form, POS tag, and position information.
|
|
75
|
+
*
|
|
76
|
+
* # Example (JavaScript)
|
|
77
|
+
*
|
|
78
|
+
* ```javascript
|
|
79
|
+
* const tokens = mecab.tokenize("안녕하세요");
|
|
80
|
+
* tokens.forEach(token => {
|
|
81
|
+
* console.log(`${token.surface}: ${token.pos}`);
|
|
82
|
+
* });
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
tokenize(text: string): WasmToken[];
|
|
86
|
+
/**
|
|
87
|
+
* Perform wakati (분리) tokenization
|
|
88
|
+
*
|
|
89
|
+
* Returns an array of morpheme strings, similar to `morphs()`.
|
|
90
|
+
*
|
|
91
|
+
* # Example (JavaScript)
|
|
92
|
+
*
|
|
93
|
+
* ```javascript
|
|
94
|
+
* const words = mecab.wakati("형태소 분석");
|
|
95
|
+
* console.log(words); // ["형태소", "분석"]
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
wakati(text: string): string[];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* A JavaScript-friendly token representation
|
|
103
|
+
*/
|
|
104
|
+
export class WasmToken {
|
|
105
|
+
private constructor();
|
|
106
|
+
free(): void;
|
|
107
|
+
[Symbol.dispose](): void;
|
|
108
|
+
/**
|
|
109
|
+
* Convert to JSON string for easier JavaScript interop
|
|
110
|
+
*
|
|
111
|
+
* # Errors
|
|
112
|
+
*
|
|
113
|
+
* Returns an error if serialization fails
|
|
114
|
+
*/
|
|
115
|
+
toJSON(): string;
|
|
116
|
+
/**
|
|
117
|
+
* Get the end position in bytes
|
|
118
|
+
*/
|
|
119
|
+
readonly end: number;
|
|
120
|
+
/**
|
|
121
|
+
* Get the lemma/base form (if available)
|
|
122
|
+
*/
|
|
123
|
+
readonly lemma: string | undefined;
|
|
124
|
+
/**
|
|
125
|
+
* Get the part-of-speech tag (품사)
|
|
126
|
+
*/
|
|
127
|
+
readonly pos: string;
|
|
128
|
+
/**
|
|
129
|
+
* Get the reading (if available)
|
|
130
|
+
*/
|
|
131
|
+
readonly reading: string | undefined;
|
|
132
|
+
/**
|
|
133
|
+
* Get the start position in bytes
|
|
134
|
+
*/
|
|
135
|
+
readonly start: number;
|
|
136
|
+
/**
|
|
137
|
+
* Get the surface form (표면형)
|
|
138
|
+
*/
|
|
139
|
+
readonly surface: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Initialize the WASM module
|
|
144
|
+
*
|
|
145
|
+
* This function should be called once before using the library.
|
|
146
|
+
* It sets up panic hooks for better error messages in development.
|
|
147
|
+
*/
|
|
148
|
+
export function init(): void;
|
package/mecab_ko_wasm.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* @ts-self-types="./mecab_ko_wasm.d.ts" */
|
|
2
|
+
|
|
3
|
+
import * as wasm from "./mecab_ko_wasm_bg.wasm";
|
|
4
|
+
import { __wbg_set_wasm } from "./mecab_ko_wasm_bg.js";
|
|
5
|
+
__wbg_set_wasm(wasm);
|
|
6
|
+
wasm.__wbindgen_start();
|
|
7
|
+
export {
|
|
8
|
+
Mecab, WasmToken, init
|
|
9
|
+
} from "./mecab_ko_wasm_bg.js";
|
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The main MeCab-Ko tokenizer for WebAssembly
|
|
3
|
+
*
|
|
4
|
+
* This class provides Korean morphological analysis capabilities
|
|
5
|
+
* in JavaScript/TypeScript environments.
|
|
6
|
+
*/
|
|
7
|
+
export class Mecab {
|
|
8
|
+
__destroy_into_raw() {
|
|
9
|
+
const ptr = this.__wbg_ptr;
|
|
10
|
+
this.__wbg_ptr = 0;
|
|
11
|
+
MecabFinalization.unregister(this);
|
|
12
|
+
return ptr;
|
|
13
|
+
}
|
|
14
|
+
free() {
|
|
15
|
+
const ptr = this.__destroy_into_raw();
|
|
16
|
+
wasm.__wbg_mecab_free(ptr, 0);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Extract morphemes (형태소) from text
|
|
20
|
+
*
|
|
21
|
+
* Returns an array of morpheme strings without POS information.
|
|
22
|
+
*
|
|
23
|
+
* # Example (JavaScript)
|
|
24
|
+
*
|
|
25
|
+
* ```javascript
|
|
26
|
+
* const morphs = mecab.morphs("안녕하세요");
|
|
27
|
+
* console.log(morphs); // ["안녕", "하", "세요"]
|
|
28
|
+
* ```
|
|
29
|
+
* @param {string} text
|
|
30
|
+
* @returns {string[]}
|
|
31
|
+
*/
|
|
32
|
+
morphs(text) {
|
|
33
|
+
try {
|
|
34
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
35
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
36
|
+
const len0 = WASM_VECTOR_LEN;
|
|
37
|
+
wasm.mecab_morphs(retptr, this.__wbg_ptr, ptr0, len0);
|
|
38
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
39
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
40
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
41
|
+
wasm.__wbindgen_export(r0, r1 * 4, 4);
|
|
42
|
+
return v2;
|
|
43
|
+
} finally {
|
|
44
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Create a new Mecab instance with the default dictionary
|
|
49
|
+
*
|
|
50
|
+
* # Example (JavaScript)
|
|
51
|
+
*
|
|
52
|
+
* ```javascript
|
|
53
|
+
* const mecab = new Mecab();
|
|
54
|
+
* ```
|
|
55
|
+
*
|
|
56
|
+
* # Errors
|
|
57
|
+
*
|
|
58
|
+
* Returns an error if tokenizer initialization fails
|
|
59
|
+
*/
|
|
60
|
+
constructor() {
|
|
61
|
+
try {
|
|
62
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
63
|
+
wasm.mecab_new(retptr);
|
|
64
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
65
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
66
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
67
|
+
if (r2) {
|
|
68
|
+
throw takeObject(r1);
|
|
69
|
+
}
|
|
70
|
+
this.__wbg_ptr = r0 >>> 0;
|
|
71
|
+
MecabFinalization.register(this, this.__wbg_ptr, this);
|
|
72
|
+
return this;
|
|
73
|
+
} finally {
|
|
74
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Extract nouns (명사) from text
|
|
79
|
+
*
|
|
80
|
+
* Returns an array of noun strings.
|
|
81
|
+
*
|
|
82
|
+
* # Example (JavaScript)
|
|
83
|
+
*
|
|
84
|
+
* ```javascript
|
|
85
|
+
* const nouns = mecab.nouns("형태소 분석기입니다");
|
|
86
|
+
* console.log(nouns); // ["형태소", "분석기"]
|
|
87
|
+
* ```
|
|
88
|
+
* @param {string} text
|
|
89
|
+
* @returns {string[]}
|
|
90
|
+
*/
|
|
91
|
+
nouns(text) {
|
|
92
|
+
try {
|
|
93
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
94
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
95
|
+
const len0 = WASM_VECTOR_LEN;
|
|
96
|
+
wasm.mecab_nouns(retptr, this.__wbg_ptr, ptr0, len0);
|
|
97
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
98
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
99
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
100
|
+
wasm.__wbindgen_export(r0, r1 * 4, 4);
|
|
101
|
+
return v2;
|
|
102
|
+
} finally {
|
|
103
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Extract part-of-speech tagged pairs
|
|
108
|
+
*
|
|
109
|
+
* Returns a JSON string containing an array of [surface, pos] pairs.
|
|
110
|
+
*
|
|
111
|
+
* # Example (JavaScript)
|
|
112
|
+
*
|
|
113
|
+
* ```javascript
|
|
114
|
+
* const posJson = mecab.pos("안녕하세요");
|
|
115
|
+
* const pos = JSON.parse(posJson);
|
|
116
|
+
* console.log(pos); // [["안녕", "NNG"], ["하", "XSV"], ["세요", "EP+EF"]]
|
|
117
|
+
* ```
|
|
118
|
+
*
|
|
119
|
+
* # Errors
|
|
120
|
+
*
|
|
121
|
+
* Returns an error if JSON serialization fails
|
|
122
|
+
* @param {string} text
|
|
123
|
+
* @returns {string}
|
|
124
|
+
*/
|
|
125
|
+
pos(text) {
|
|
126
|
+
let deferred3_0;
|
|
127
|
+
let deferred3_1;
|
|
128
|
+
try {
|
|
129
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
130
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
131
|
+
const len0 = WASM_VECTOR_LEN;
|
|
132
|
+
wasm.mecab_pos(retptr, this.__wbg_ptr, ptr0, len0);
|
|
133
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
134
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
135
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
136
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
137
|
+
var ptr2 = r0;
|
|
138
|
+
var len2 = r1;
|
|
139
|
+
if (r3) {
|
|
140
|
+
ptr2 = 0; len2 = 0;
|
|
141
|
+
throw takeObject(r2);
|
|
142
|
+
}
|
|
143
|
+
deferred3_0 = ptr2;
|
|
144
|
+
deferred3_1 = len2;
|
|
145
|
+
return getStringFromWasm0(ptr2, len2);
|
|
146
|
+
} finally {
|
|
147
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
148
|
+
wasm.__wbindgen_export(deferred3_0, deferred3_1, 1);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Tokenize text and return detailed token information
|
|
153
|
+
*
|
|
154
|
+
* Returns an array of tokens with surface form, POS tag, and position information.
|
|
155
|
+
*
|
|
156
|
+
* # Example (JavaScript)
|
|
157
|
+
*
|
|
158
|
+
* ```javascript
|
|
159
|
+
* const tokens = mecab.tokenize("안녕하세요");
|
|
160
|
+
* tokens.forEach(token => {
|
|
161
|
+
* console.log(`${token.surface}: ${token.pos}`);
|
|
162
|
+
* });
|
|
163
|
+
* ```
|
|
164
|
+
* @param {string} text
|
|
165
|
+
* @returns {WasmToken[]}
|
|
166
|
+
*/
|
|
167
|
+
tokenize(text) {
|
|
168
|
+
try {
|
|
169
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
170
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
171
|
+
const len0 = WASM_VECTOR_LEN;
|
|
172
|
+
wasm.mecab_tokenize(retptr, this.__wbg_ptr, ptr0, len0);
|
|
173
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
174
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
175
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
176
|
+
wasm.__wbindgen_export(r0, r1 * 4, 4);
|
|
177
|
+
return v2;
|
|
178
|
+
} finally {
|
|
179
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Perform wakati (분리) tokenization
|
|
184
|
+
*
|
|
185
|
+
* Returns an array of morpheme strings, similar to `morphs()`.
|
|
186
|
+
*
|
|
187
|
+
* # Example (JavaScript)
|
|
188
|
+
*
|
|
189
|
+
* ```javascript
|
|
190
|
+
* const words = mecab.wakati("형태소 분석");
|
|
191
|
+
* console.log(words); // ["형태소", "분석"]
|
|
192
|
+
* ```
|
|
193
|
+
* @param {string} text
|
|
194
|
+
* @returns {string[]}
|
|
195
|
+
*/
|
|
196
|
+
wakati(text) {
|
|
197
|
+
try {
|
|
198
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
199
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
200
|
+
const len0 = WASM_VECTOR_LEN;
|
|
201
|
+
wasm.mecab_wakati(retptr, this.__wbg_ptr, ptr0, len0);
|
|
202
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
203
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
204
|
+
var v2 = getArrayJsValueFromWasm0(r0, r1).slice();
|
|
205
|
+
wasm.__wbindgen_export(r0, r1 * 4, 4);
|
|
206
|
+
return v2;
|
|
207
|
+
} finally {
|
|
208
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (Symbol.dispose) Mecab.prototype[Symbol.dispose] = Mecab.prototype.free;
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* A JavaScript-friendly token representation
|
|
216
|
+
*/
|
|
217
|
+
export class WasmToken {
|
|
218
|
+
static __wrap(ptr) {
|
|
219
|
+
ptr = ptr >>> 0;
|
|
220
|
+
const obj = Object.create(WasmToken.prototype);
|
|
221
|
+
obj.__wbg_ptr = ptr;
|
|
222
|
+
WasmTokenFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
223
|
+
return obj;
|
|
224
|
+
}
|
|
225
|
+
__destroy_into_raw() {
|
|
226
|
+
const ptr = this.__wbg_ptr;
|
|
227
|
+
this.__wbg_ptr = 0;
|
|
228
|
+
WasmTokenFinalization.unregister(this);
|
|
229
|
+
return ptr;
|
|
230
|
+
}
|
|
231
|
+
free() {
|
|
232
|
+
const ptr = this.__destroy_into_raw();
|
|
233
|
+
wasm.__wbg_wasmtoken_free(ptr, 0);
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Get the end position in bytes
|
|
237
|
+
* @returns {number}
|
|
238
|
+
*/
|
|
239
|
+
get end() {
|
|
240
|
+
const ret = wasm.wasmtoken_end(this.__wbg_ptr);
|
|
241
|
+
return ret >>> 0;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get the lemma/base form (if available)
|
|
245
|
+
* @returns {string | undefined}
|
|
246
|
+
*/
|
|
247
|
+
get lemma() {
|
|
248
|
+
try {
|
|
249
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
250
|
+
wasm.wasmtoken_lemma(retptr, this.__wbg_ptr);
|
|
251
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
252
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
253
|
+
let v1;
|
|
254
|
+
if (r0 !== 0) {
|
|
255
|
+
v1 = getStringFromWasm0(r0, r1).slice();
|
|
256
|
+
wasm.__wbindgen_export(r0, r1 * 1, 1);
|
|
257
|
+
}
|
|
258
|
+
return v1;
|
|
259
|
+
} finally {
|
|
260
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Get the part-of-speech tag (품사)
|
|
265
|
+
* @returns {string}
|
|
266
|
+
*/
|
|
267
|
+
get pos() {
|
|
268
|
+
let deferred1_0;
|
|
269
|
+
let deferred1_1;
|
|
270
|
+
try {
|
|
271
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
272
|
+
wasm.wasmtoken_pos(retptr, this.__wbg_ptr);
|
|
273
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
274
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
275
|
+
deferred1_0 = r0;
|
|
276
|
+
deferred1_1 = r1;
|
|
277
|
+
return getStringFromWasm0(r0, r1);
|
|
278
|
+
} finally {
|
|
279
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
280
|
+
wasm.__wbindgen_export(deferred1_0, deferred1_1, 1);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Get the reading (if available)
|
|
285
|
+
* @returns {string | undefined}
|
|
286
|
+
*/
|
|
287
|
+
get reading() {
|
|
288
|
+
try {
|
|
289
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
290
|
+
wasm.wasmtoken_reading(retptr, this.__wbg_ptr);
|
|
291
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
292
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
293
|
+
let v1;
|
|
294
|
+
if (r0 !== 0) {
|
|
295
|
+
v1 = getStringFromWasm0(r0, r1).slice();
|
|
296
|
+
wasm.__wbindgen_export(r0, r1 * 1, 1);
|
|
297
|
+
}
|
|
298
|
+
return v1;
|
|
299
|
+
} finally {
|
|
300
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Get the start position in bytes
|
|
305
|
+
* @returns {number}
|
|
306
|
+
*/
|
|
307
|
+
get start() {
|
|
308
|
+
const ret = wasm.wasmtoken_start(this.__wbg_ptr);
|
|
309
|
+
return ret >>> 0;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Get the surface form (표면형)
|
|
313
|
+
* @returns {string}
|
|
314
|
+
*/
|
|
315
|
+
get surface() {
|
|
316
|
+
let deferred1_0;
|
|
317
|
+
let deferred1_1;
|
|
318
|
+
try {
|
|
319
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
320
|
+
wasm.wasmtoken_surface(retptr, this.__wbg_ptr);
|
|
321
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
322
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
323
|
+
deferred1_0 = r0;
|
|
324
|
+
deferred1_1 = r1;
|
|
325
|
+
return getStringFromWasm0(r0, r1);
|
|
326
|
+
} finally {
|
|
327
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
328
|
+
wasm.__wbindgen_export(deferred1_0, deferred1_1, 1);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Convert to JSON string for easier JavaScript interop
|
|
333
|
+
*
|
|
334
|
+
* # Errors
|
|
335
|
+
*
|
|
336
|
+
* Returns an error if serialization fails
|
|
337
|
+
* @returns {string}
|
|
338
|
+
*/
|
|
339
|
+
toJSON() {
|
|
340
|
+
let deferred2_0;
|
|
341
|
+
let deferred2_1;
|
|
342
|
+
try {
|
|
343
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
344
|
+
wasm.wasmtoken_toJSON(retptr, this.__wbg_ptr);
|
|
345
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
346
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
347
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
348
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
349
|
+
var ptr1 = r0;
|
|
350
|
+
var len1 = r1;
|
|
351
|
+
if (r3) {
|
|
352
|
+
ptr1 = 0; len1 = 0;
|
|
353
|
+
throw takeObject(r2);
|
|
354
|
+
}
|
|
355
|
+
deferred2_0 = ptr1;
|
|
356
|
+
deferred2_1 = len1;
|
|
357
|
+
return getStringFromWasm0(ptr1, len1);
|
|
358
|
+
} finally {
|
|
359
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
360
|
+
wasm.__wbindgen_export(deferred2_0, deferred2_1, 1);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
if (Symbol.dispose) WasmToken.prototype[Symbol.dispose] = WasmToken.prototype.free;
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Initialize the WASM module
|
|
368
|
+
*
|
|
369
|
+
* This function should be called once before using the library.
|
|
370
|
+
* It sets up panic hooks for better error messages in development.
|
|
371
|
+
*/
|
|
372
|
+
export function init() {
|
|
373
|
+
wasm.init();
|
|
374
|
+
}
|
|
375
|
+
export function __wbg___wbindgen_throw_6ddd609b62940d55(arg0, arg1) {
|
|
376
|
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
377
|
+
}
|
|
378
|
+
export function __wbg_error_a6fa202b58aa1cd3(arg0, arg1) {
|
|
379
|
+
let deferred0_0;
|
|
380
|
+
let deferred0_1;
|
|
381
|
+
try {
|
|
382
|
+
deferred0_0 = arg0;
|
|
383
|
+
deferred0_1 = arg1;
|
|
384
|
+
console.error(getStringFromWasm0(arg0, arg1));
|
|
385
|
+
} finally {
|
|
386
|
+
wasm.__wbindgen_export(deferred0_0, deferred0_1, 1);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
export function __wbg_new_227d7c05414eb861() {
|
|
390
|
+
const ret = new Error();
|
|
391
|
+
return addHeapObject(ret);
|
|
392
|
+
}
|
|
393
|
+
export function __wbg_stack_3b0d974bbf31e44f(arg0, arg1) {
|
|
394
|
+
const ret = getObject(arg1).stack;
|
|
395
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_export2, wasm.__wbindgen_export3);
|
|
396
|
+
const len1 = WASM_VECTOR_LEN;
|
|
397
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
398
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
399
|
+
}
|
|
400
|
+
export function __wbg_wasmtoken_new(arg0) {
|
|
401
|
+
const ret = WasmToken.__wrap(arg0);
|
|
402
|
+
return addHeapObject(ret);
|
|
403
|
+
}
|
|
404
|
+
export function __wbindgen_cast_0000000000000001(arg0, arg1) {
|
|
405
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
406
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
407
|
+
return addHeapObject(ret);
|
|
408
|
+
}
|
|
409
|
+
export function __wbindgen_object_drop_ref(arg0) {
|
|
410
|
+
takeObject(arg0);
|
|
411
|
+
}
|
|
412
|
+
const MecabFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
413
|
+
? { register: () => {}, unregister: () => {} }
|
|
414
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_mecab_free(ptr >>> 0, 1));
|
|
415
|
+
const WasmTokenFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
416
|
+
? { register: () => {}, unregister: () => {} }
|
|
417
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmtoken_free(ptr >>> 0, 1));
|
|
418
|
+
|
|
419
|
+
function addHeapObject(obj) {
|
|
420
|
+
if (heap_next === heap.length) heap.push(heap.length + 1);
|
|
421
|
+
const idx = heap_next;
|
|
422
|
+
heap_next = heap[idx];
|
|
423
|
+
|
|
424
|
+
heap[idx] = obj;
|
|
425
|
+
return idx;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
function dropObject(idx) {
|
|
429
|
+
if (idx < 1028) return;
|
|
430
|
+
heap[idx] = heap_next;
|
|
431
|
+
heap_next = idx;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
function getArrayJsValueFromWasm0(ptr, len) {
|
|
435
|
+
ptr = ptr >>> 0;
|
|
436
|
+
const mem = getDataViewMemory0();
|
|
437
|
+
const result = [];
|
|
438
|
+
for (let i = ptr; i < ptr + 4 * len; i += 4) {
|
|
439
|
+
result.push(takeObject(mem.getUint32(i, true)));
|
|
440
|
+
}
|
|
441
|
+
return result;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
let cachedDataViewMemory0 = null;
|
|
445
|
+
function getDataViewMemory0() {
|
|
446
|
+
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
|
447
|
+
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
|
448
|
+
}
|
|
449
|
+
return cachedDataViewMemory0;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
function getStringFromWasm0(ptr, len) {
|
|
453
|
+
ptr = ptr >>> 0;
|
|
454
|
+
return decodeText(ptr, len);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
let cachedUint8ArrayMemory0 = null;
|
|
458
|
+
function getUint8ArrayMemory0() {
|
|
459
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
460
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
461
|
+
}
|
|
462
|
+
return cachedUint8ArrayMemory0;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
function getObject(idx) { return heap[idx]; }
|
|
466
|
+
|
|
467
|
+
let heap = new Array(1024).fill(undefined);
|
|
468
|
+
heap.push(undefined, null, true, false);
|
|
469
|
+
|
|
470
|
+
let heap_next = heap.length;
|
|
471
|
+
|
|
472
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
473
|
+
if (realloc === undefined) {
|
|
474
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
475
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
476
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
477
|
+
WASM_VECTOR_LEN = buf.length;
|
|
478
|
+
return ptr;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
let len = arg.length;
|
|
482
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
483
|
+
|
|
484
|
+
const mem = getUint8ArrayMemory0();
|
|
485
|
+
|
|
486
|
+
let offset = 0;
|
|
487
|
+
|
|
488
|
+
for (; offset < len; offset++) {
|
|
489
|
+
const code = arg.charCodeAt(offset);
|
|
490
|
+
if (code > 0x7F) break;
|
|
491
|
+
mem[ptr + offset] = code;
|
|
492
|
+
}
|
|
493
|
+
if (offset !== len) {
|
|
494
|
+
if (offset !== 0) {
|
|
495
|
+
arg = arg.slice(offset);
|
|
496
|
+
}
|
|
497
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
498
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
499
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
500
|
+
|
|
501
|
+
offset += ret.written;
|
|
502
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
WASM_VECTOR_LEN = offset;
|
|
506
|
+
return ptr;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function takeObject(idx) {
|
|
510
|
+
const ret = getObject(idx);
|
|
511
|
+
dropObject(idx);
|
|
512
|
+
return ret;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
516
|
+
cachedTextDecoder.decode();
|
|
517
|
+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
|
518
|
+
let numBytesDecoded = 0;
|
|
519
|
+
function decodeText(ptr, len) {
|
|
520
|
+
numBytesDecoded += len;
|
|
521
|
+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
|
522
|
+
cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
523
|
+
cachedTextDecoder.decode();
|
|
524
|
+
numBytesDecoded = len;
|
|
525
|
+
}
|
|
526
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const cachedTextEncoder = new TextEncoder();
|
|
530
|
+
|
|
531
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
532
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
533
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
534
|
+
view.set(buf);
|
|
535
|
+
return {
|
|
536
|
+
read: arg.length,
|
|
537
|
+
written: buf.length
|
|
538
|
+
};
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
let WASM_VECTOR_LEN = 0;
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
let wasm;
|
|
546
|
+
export function __wbg_set_wasm(val) {
|
|
547
|
+
wasm = val;
|
|
548
|
+
}
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mecab-ko-wasm",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "WebAssembly bindings for MeCab-Ko Korean morphological analyzer",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"module": "mecab_ko_wasm.js",
|
|
7
|
+
"types": "index.d.ts",
|
|
8
|
+
"sideEffects": [
|
|
9
|
+
"./mecab_ko_wasm.js"
|
|
10
|
+
],
|
|
11
|
+
"files": [
|
|
12
|
+
"index.js",
|
|
13
|
+
"index.d.ts",
|
|
14
|
+
"mecab_ko_wasm_bg.wasm",
|
|
15
|
+
"mecab_ko_wasm.js",
|
|
16
|
+
"mecab_ko_wasm.d.ts",
|
|
17
|
+
"mecab_ko_wasm_bg.js",
|
|
18
|
+
"README.md"
|
|
19
|
+
],
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "wasm-pack build --target bundler --out-dir pkg",
|
|
22
|
+
"build:node": "wasm-pack build --target nodejs --out-dir pkg-node",
|
|
23
|
+
"build:web": "wasm-pack build --target web --out-dir pkg-web",
|
|
24
|
+
"test": "wasm-pack test --headless --firefox"
|
|
25
|
+
},
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/hephaex/mecab-ko"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"korean",
|
|
32
|
+
"nlp",
|
|
33
|
+
"morphological-analysis",
|
|
34
|
+
"tokenizer",
|
|
35
|
+
"mecab",
|
|
36
|
+
"wasm",
|
|
37
|
+
"webassembly"
|
|
38
|
+
],
|
|
39
|
+
"author": "hephaex <hephaex@gmail.com>",
|
|
40
|
+
"license": "MIT OR Apache-2.0",
|
|
41
|
+
"bugs": {
|
|
42
|
+
"url": "https://github.com/hephaex/mecab-ko/issues"
|
|
43
|
+
},
|
|
44
|
+
"homepage": "https://github.com/hephaex/mecab-ko#readme",
|
|
45
|
+
"exports": {
|
|
46
|
+
".": {
|
|
47
|
+
"import": "./mecab_ko_wasm.js",
|
|
48
|
+
"require": "./index.js",
|
|
49
|
+
"types": "./index.d.ts"
|
|
50
|
+
},
|
|
51
|
+
"./package.json": "./package.json"
|
|
52
|
+
},
|
|
53
|
+
"engines": {
|
|
54
|
+
"node": ">=16"
|
|
55
|
+
},
|
|
56
|
+
"publishConfig": {
|
|
57
|
+
"access": "public"
|
|
58
|
+
},
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"@types/node": "^20.0.0"
|
|
61
|
+
}
|
|
62
|
+
}
|