garu-ko 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # garu-ko
2
+
3
+ **Browser-native Korean morphological analyzer.** No server required.
4
+
5
+ - **2.2MB model** bundled in npm package (no CDN needed)
6
+ - **93KB WASM** engine -- runs in any modern browser
7
+ - **F1 95.3%** on NIKL MP benchmark (vs. Kiwi 87.9%)
8
+ - **< 1ms** inference per sentence
9
+ - **Offline-ready** -- works without network
10
+
11
+ ## Comparison
12
+
13
+ | | Kiwi | MeCab-ko | garu-ko |
14
+ |---|---|---|---|
15
+ | Model size | ~40MB | ~50MB | **2.2MB** |
16
+ | npm package | No | No | **Yes** |
17
+ | F1 (NIKL MP) | 87.9% | ~85% | **95.3%** |
18
+ | Browser support | Impractical | No | **Yes** |
19
+
20
+ ## Quick Start
21
+
22
+ ```bash
23
+ npm install garu-ko
24
+ ```
25
+
26
+ ```typescript
27
+ import { Garu } from 'garu-ko';
28
+
29
+ const garu = await Garu.load();
30
+
31
+ // Morphological analysis
32
+ const result = garu.analyze('배가 아파서 약을 먹었다');
33
+ console.log(result.tokens);
34
+ // [
35
+ // { text: '배', pos: 'NNG', start: 0, end: 2 },
36
+ // { text: '가', pos: 'JKS', start: 0, end: 2 },
37
+ // { text: '아프', pos: 'VA', start: 3, end: 6 },
38
+ // { text: '어서', pos: 'EC', start: 3, end: 6 },
39
+ // { text: '약', pos: 'NNG', start: 7, end: 9 },
40
+ // { text: '을', pos: 'JKO', start: 7, end: 9 },
41
+ // { text: '먹', pos: 'VV', start: 10, end: 13 },
42
+ // { text: '었', pos: 'EP', start: 10, end: 13 },
43
+ // { text: '다', pos: 'EF', start: 10, end: 13 },
44
+ // ]
45
+
46
+ // Simple tokenization
47
+ const tokens = garu.tokenize('나는 학교에 간다');
48
+ // ['나', '는', '학교', '에', '간다']
49
+
50
+ garu.destroy(); // free WASM memory
51
+ ```
52
+
53
+ ## Custom Model
54
+
55
+ ```typescript
56
+ // Load from custom URL
57
+ const garu = await Garu.load({ modelUrl: '/models/custom.gmdl' });
58
+
59
+ // Load from ArrayBuffer
60
+ const res = await fetch('/models/custom.gmdl');
61
+ const garu = await Garu.load({ modelData: await res.arrayBuffer() });
62
+ ```
63
+
64
+ ## API
65
+
66
+ ### `Garu.load(options?): Promise<Garu>`
67
+
68
+ Initialize WASM and load model. Uses bundled model by default.
69
+
70
+ | Option | Type | Description |
71
+ |---|---|---|
72
+ | `modelData` | `ArrayBuffer` | Provide model bytes directly |
73
+ | `modelUrl` | `string` | Fetch model from URL |
74
+
75
+ ### `garu.analyze(text, options?): AnalyzeResult`
76
+
77
+ Returns morphological tokens with POS tags (Sejong tagset).
78
+
79
+ ```typescript
80
+ interface Token {
81
+ text: string; // surface form
82
+ pos: POS; // POS tag
83
+ start: number; // eojeol start offset
84
+ end: number; // eojeol end offset
85
+ }
86
+ ```
87
+
88
+ Set `options.topN > 1` to get N-best results as an array.
89
+
90
+ ### `garu.tokenize(text): string[]`
91
+
92
+ Returns surface-form strings only. Lightweight alternative to `analyze()`.
93
+
94
+ ### `garu.destroy(): void`
95
+
96
+ Free WASM memory. Instance is unusable after this call.
97
+
98
+ ## License
99
+
100
+ MIT
package/dist/index.d.ts CHANGED
@@ -47,6 +47,10 @@ export declare class Garu {
47
47
  * Quick tokenisation — returns an array of surface-form strings.
48
48
  */
49
49
  tokenize(text: string): string[];
50
+ /**
51
+ * Extract nouns (NNG, NNP) from text.
52
+ */
53
+ nouns(text: string): string[];
50
54
  /**
51
55
  * Whether the WASM analyzer is loaded and ready.
52
56
  */
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- const DEFAULT_MODEL_URL = 'https://cdn.jsdelivr.net/npm/garu@latest/models/base.gmdl';
1
+ const DEFAULT_MODEL_URL = new URL('../models/base.gmdl', import.meta.url).href;
2
2
  const EMPTY_RESULT = Object.freeze({
3
3
  tokens: [],
4
4
  score: 0,
@@ -68,6 +68,18 @@ export class Garu {
68
68
  }
69
69
  return this._wasm.tokenize(text);
70
70
  }
71
+ /**
72
+ * Extract nouns (NNG, NNP) from text.
73
+ */
74
+ nouns(text) {
75
+ if (text === '') {
76
+ return [];
77
+ }
78
+ const result = this._wasm.analyze(text);
79
+ return result.tokens
80
+ .filter((t) => t.pos === 'NNG' || t.pos === 'NNP')
81
+ .map((t) => t.text);
82
+ }
71
83
  /**
72
84
  * Whether the WASM analyzer is loaded and ready.
73
85
  */
@@ -81,7 +93,7 @@ export class Garu {
81
93
  return {
82
94
  version: this._wasm.constructor.version(),
83
95
  size: this._modelSize,
84
- accuracy: 0.8,
96
+ accuracy: 0.953,
85
97
  };
86
98
  }
87
99
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "garu-ko",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Ultra-lightweight Korean morphological analyzer for the web (2.2MB model, WASM 93KB, F1 95.3%)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",