garu-ko 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +14 -2
- package/package.json +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# garu-ko
|
|
2
|
+
|
|
3
|
+
**Browser-native Korean morphological analyzer.** No server required.
|
|
4
|
+
|
|
5
|
+
- **2.2MB model** bundled in npm package (no CDN needed)
|
|
6
|
+
- **93KB WASM** engine -- runs in any modern browser
|
|
7
|
+
- **F1 95.3%** on NIKL MP benchmark (vs. Kiwi 87.9%)
|
|
8
|
+
- **< 1ms** inference per sentence
|
|
9
|
+
- **Offline-ready** -- works without network
|
|
10
|
+
|
|
11
|
+
## Comparison
|
|
12
|
+
|
|
13
|
+
| | Kiwi | MeCab-ko | garu-ko |
|
|
14
|
+
|---|---|---|---|
|
|
15
|
+
| Model size | ~40MB | ~50MB | **2.2MB** |
|
|
16
|
+
| npm package | No | No | **Yes** |
|
|
17
|
+
| F1 (NIKL MP) | 87.9% | ~85% | **95.3%** |
|
|
18
|
+
| Browser support | Impractical | No | **Yes** |
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install garu-ko
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
import { Garu } from 'garu-ko';
|
|
28
|
+
|
|
29
|
+
const garu = await Garu.load();
|
|
30
|
+
|
|
31
|
+
// Morphological analysis
|
|
32
|
+
const result = garu.analyze('배가 아파서 약을 먹었다');
|
|
33
|
+
console.log(result.tokens);
|
|
34
|
+
// [
|
|
35
|
+
// { text: '배', pos: 'NNG', start: 0, end: 2 },
|
|
36
|
+
// { text: '가', pos: 'JKS', start: 0, end: 2 },
|
|
37
|
+
// { text: '아프', pos: 'VA', start: 3, end: 6 },
|
|
38
|
+
// { text: '어서', pos: 'EC', start: 3, end: 6 },
|
|
39
|
+
// { text: '약', pos: 'NNG', start: 7, end: 9 },
|
|
40
|
+
// { text: '을', pos: 'JKO', start: 7, end: 9 },
|
|
41
|
+
// { text: '먹', pos: 'VV', start: 10, end: 13 },
|
|
42
|
+
// { text: '었', pos: 'EP', start: 10, end: 13 },
|
|
43
|
+
// { text: '다', pos: 'EF', start: 10, end: 13 },
|
|
44
|
+
// ]
|
|
45
|
+
|
|
46
|
+
// Simple tokenization
|
|
47
|
+
const tokens = garu.tokenize('나는 학교에 간다');
|
|
48
|
+
// ['나', '는', '학교', '에', '간다']
|
|
49
|
+
|
|
50
|
+
garu.destroy(); // free WASM memory
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Custom Model
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
// Load from custom URL
|
|
57
|
+
const garu = await Garu.load({ modelUrl: '/models/custom.gmdl' });
|
|
58
|
+
|
|
59
|
+
// Load from ArrayBuffer
|
|
60
|
+
const res = await fetch('/models/custom.gmdl');
|
|
61
|
+
const garu = await Garu.load({ modelData: await res.arrayBuffer() });
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## API
|
|
65
|
+
|
|
66
|
+
### `Garu.load(options?): Promise<Garu>`
|
|
67
|
+
|
|
68
|
+
Initialize WASM and load model. Uses bundled model by default.
|
|
69
|
+
|
|
70
|
+
| Option | Type | Description |
|
|
71
|
+
|---|---|---|
|
|
72
|
+
| `modelData` | `ArrayBuffer` | Provide model bytes directly |
|
|
73
|
+
| `modelUrl` | `string` | Fetch model from URL |
|
|
74
|
+
|
|
75
|
+
### `garu.analyze(text, options?): AnalyzeResult`
|
|
76
|
+
|
|
77
|
+
Returns morphological tokens with POS tags (Sejong tagset).
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
interface Token {
|
|
81
|
+
text: string; // surface form
|
|
82
|
+
pos: POS; // POS tag
|
|
83
|
+
start: number; // eojeol start offset
|
|
84
|
+
end: number; // eojeol end offset
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Set `options.topN > 1` to get N-best results as an array.
|
|
89
|
+
|
|
90
|
+
### `garu.tokenize(text): string[]`
|
|
91
|
+
|
|
92
|
+
Returns surface-form strings only. Lightweight alternative to `analyze()`.
|
|
93
|
+
|
|
94
|
+
### `garu.destroy(): void`
|
|
95
|
+
|
|
96
|
+
Free WASM memory. Instance is unusable after this call.
|
|
97
|
+
|
|
98
|
+
## License
|
|
99
|
+
|
|
100
|
+
MIT
|
package/dist/index.d.ts
CHANGED
|
@@ -47,6 +47,10 @@ export declare class Garu {
|
|
|
47
47
|
* Quick tokenisation — returns an array of surface-form strings.
|
|
48
48
|
*/
|
|
49
49
|
tokenize(text: string): string[];
|
|
50
|
+
/**
|
|
51
|
+
* Extract nouns (NNG, NNP) from text.
|
|
52
|
+
*/
|
|
53
|
+
nouns(text: string): string[];
|
|
50
54
|
/**
|
|
51
55
|
* Whether the WASM analyzer is loaded and ready.
|
|
52
56
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const DEFAULT_MODEL_URL = '
|
|
1
|
+
const DEFAULT_MODEL_URL = new URL('../models/base.gmdl', import.meta.url).href;
|
|
2
2
|
const EMPTY_RESULT = Object.freeze({
|
|
3
3
|
tokens: [],
|
|
4
4
|
score: 0,
|
|
@@ -68,6 +68,18 @@ export class Garu {
|
|
|
68
68
|
}
|
|
69
69
|
return this._wasm.tokenize(text);
|
|
70
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Extract nouns (NNG, NNP) from text.
|
|
73
|
+
*/
|
|
74
|
+
nouns(text) {
|
|
75
|
+
if (text === '') {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
const result = this._wasm.analyze(text);
|
|
79
|
+
return result.tokens
|
|
80
|
+
.filter((t) => t.pos === 'NNG' || t.pos === 'NNP')
|
|
81
|
+
.map((t) => t.text);
|
|
82
|
+
}
|
|
71
83
|
/**
|
|
72
84
|
* Whether the WASM analyzer is loaded and ready.
|
|
73
85
|
*/
|
|
@@ -81,7 +93,7 @@ export class Garu {
|
|
|
81
93
|
return {
|
|
82
94
|
version: this._wasm.constructor.version(),
|
|
83
95
|
size: this._modelSize,
|
|
84
|
-
accuracy: 0.
|
|
96
|
+
accuracy: 0.953,
|
|
85
97
|
};
|
|
86
98
|
}
|
|
87
99
|
/**
|