opencc-wasm 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +373 -70
- package/README.zh.md +432 -0
- package/dist/data/config/t2cngov.json +29 -0
- package/dist/data/config/t2cngov_keep_simp.json +29 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/esm/index.js +44 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,74 +1,330 @@
|
|
|
1
1
|
# opencc-wasm
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/opencc-wasm)
|
|
4
|
+
[](https://cdn.jsdelivr.net/npm/opencc-wasm@latest/dist/esm/index.js)
|
|
5
|
+
[](LICENSE)
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
[繁體中文](README.zh.md)
|
|
8
|
+
|
|
9
|
+
> 🚀 **Out-of-the-box Chinese text conversion library** - 3 lines of code, auto-loads configs and dictionaries from CDN!
|
|
10
|
+
|
|
11
|
+
WebAssembly port of OpenCC (Open Chinese Convert) with full API compatibility. Bundles the official OpenCC C++ core compiled via Emscripten, plus all official configs and prebuilt `.ocd2` dictionaries.
|
|
12
|
+
|
|
13
|
+
**License:** Apache-2.0
|
|
14
|
+
|
|
15
|
+
## ✨ Features
|
|
16
|
+
|
|
17
|
+
- 🎯 **Zero Configuration** - Auto-loads all configs and dictionaries from CDN
|
|
18
|
+
- 🔥 **3 Lines to Start** - Simplest API, just import and use
|
|
19
|
+
- 🌐 **CDN Ready** - Use directly from jsDelivr/unpkg without bundler
|
|
20
|
+
- 📦 **All-in-One** - Includes all 14+ official conversion types
|
|
21
|
+
- ⚡ **Auto Caching** - Resources cached after first load
|
|
22
|
+
- 🔧 **Full Compatibility** - Compatible with `opencc-js` API
|
|
23
|
+
- 🚫 **No Native Bindings** - Pure WASM, cross-platform
|
|
24
|
+
- 💻 **Universal** - Works in Node.js, browsers, Deno, etc.
|
|
25
|
+
|
|
26
|
+
## 🚀 Quick Start
|
|
27
|
+
|
|
28
|
+
### Browser (CDN - Zero Install!)
|
|
29
|
+
|
|
30
|
+
```html
|
|
31
|
+
<script type="module">
|
|
32
|
+
// 1. Import from CDN
|
|
33
|
+
import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.0/dist/esm/index.js";
|
|
34
|
+
|
|
35
|
+
// 2. Create converter (auto-downloads everything!)
|
|
36
|
+
const converter = OpenCC.Converter({ from: "cn", to: "tw" });
|
|
37
|
+
|
|
38
|
+
// 3. Convert - Done!
|
|
39
|
+
const result = await converter("简体中文");
|
|
40
|
+
console.log(result); // 簡體中文
|
|
41
|
+
</script>
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**That's it!** All configs and dictionaries are automatically downloaded from CDN.
|
|
45
|
+
|
|
46
|
+
### CDN (Converter API)
|
|
47
|
+
|
|
48
|
+
```javascript
|
|
49
|
+
import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.0/dist/esm/index.js";
|
|
50
|
+
|
|
51
|
+
const converter = OpenCC.Converter({ from: "cn", to: "t" });
|
|
52
|
+
const result = await converter("简体中文");
|
|
53
|
+
console.log(result); // 簡體中文
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Example source: `test/cdn-simple.mjs`
|
|
57
|
+
|
|
58
|
+
### Node.js (NPM)
|
|
10
59
|
|
|
11
|
-
## Installation
|
|
12
60
|
```bash
|
|
13
61
|
npm install opencc-wasm
|
|
14
62
|
```
|
|
15
63
|
|
|
16
|
-
|
|
17
|
-
```js
|
|
64
|
+
```javascript
|
|
18
65
|
import OpenCC from "opencc-wasm";
|
|
19
66
|
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
console.log(
|
|
67
|
+
const converter = OpenCC.Converter({ from: "cn", to: "tw" });
|
|
68
|
+
const result = await converter("简体中文");
|
|
69
|
+
console.log(result); // 簡體中文
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## 📖 API Reference
|
|
73
|
+
|
|
74
|
+
### OpenCC.Converter() - Create Converter
|
|
75
|
+
|
|
76
|
+
Two ways to specify conversions:
|
|
77
|
+
|
|
78
|
+
#### Method 1: Using `config` parameter (Recommended)
|
|
79
|
+
|
|
80
|
+
Directly specify OpenCC config file name:
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
// Simplified → Traditional (Taiwan phrases)
|
|
84
|
+
const converter = OpenCC.Converter({ config: "s2twp" });
|
|
85
|
+
const result = await converter("服务器软件"); // 伺服器軟體
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**Supported configs:**
|
|
89
|
+
|
|
90
|
+
| Config | Description | Example |
|
|
91
|
+
|--------|-------------|---------|
|
|
92
|
+
| `s2t` | Simplified → Traditional | 简体 → 簡體 |
|
|
93
|
+
| `s2tw` | Simplified → Taiwan | 软件 → 軟件 |
|
|
94
|
+
| `s2twp` | Simplified → Taiwan (phrases) | 软件 → 軟體 |
|
|
95
|
+
| `s2hk` | Simplified → Hong Kong | 打印机 → 打印機 |
|
|
96
|
+
| `t2s` | Traditional → Simplified | 繁體 → 繁体 |
|
|
97
|
+
| `t2tw` | Traditional → Taiwan | 台灣 → 臺灣 |
|
|
98
|
+
| `t2hk` | Traditional → Hong Kong | 香港 → 香港 |
|
|
99
|
+
| `t2jp` | Traditional → Japanese Shinjitai | 繁體 → 繁体 |
|
|
100
|
+
| `tw2s` | Taiwan → Simplified | 軟體 → 软件 |
|
|
101
|
+
| `tw2sp` | Taiwan → Simplified (phrases) | 滑鼠 → 鼠标 |
|
|
102
|
+
| `tw2t` | Taiwan → Traditional | 臺灣 → 台灣 |
|
|
103
|
+
| `hk2s` | Hong Kong → Simplified | 打印機 → 打印机 |
|
|
104
|
+
| `hk2t` | Hong Kong → Traditional | 香港 → 香港 |
|
|
105
|
+
| `jp2t` | Japanese Shinjitai → Traditional | 繁体 → 繁體 |
|
|
106
|
+
| `t2cngov` | Traditional → CN Gov Standard | 潮溼 → 潮湿 |
|
|
107
|
+
| `t2cngov_keep_simp` | Traditional → CN Gov (Keep Simp) | 简体繁體 → 简体繁體 |
|
|
23
108
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
109
|
+
#### Method 2: Using `from`/`to` parameters (Legacy)
|
|
110
|
+
|
|
111
|
+
Specify source and target locales:
|
|
112
|
+
|
|
113
|
+
```javascript
|
|
114
|
+
const converter = OpenCC.Converter({ from: "cn", to: "twp" });
|
|
115
|
+
const result = await converter("服务器"); // 伺服器
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Locale codes:**
|
|
119
|
+
|
|
120
|
+
| Code | Description |
|
|
121
|
+
|------|-------------|
|
|
122
|
+
| `cn` | Simplified Chinese (Mainland) |
|
|
123
|
+
| `tw` | Traditional Chinese (Taiwan) |
|
|
124
|
+
| `twp` | Taiwan with phrases |
|
|
125
|
+
| `hk` | Traditional Chinese (Hong Kong) |
|
|
126
|
+
| `t` | Traditional Chinese (general) |
|
|
127
|
+
| `s` | Simplified Chinese (alias) |
|
|
128
|
+
| `sp` | Simplified with phrases |
|
|
129
|
+
| `jp` | Japanese Shinjitai |
|
|
130
|
+
|
|
131
|
+
**Both methods work identically!** Choose what you prefer.
|
|
132
|
+
|
|
133
|
+
### OpenCC.ConverterFactory() - With Custom Dictionary
|
|
134
|
+
|
|
135
|
+
```javascript
|
|
136
|
+
const converter = OpenCC.ConverterFactory(
|
|
137
|
+
"cn", // from
|
|
138
|
+
"tw", // to
|
|
139
|
+
[ // custom dictionaries
|
|
140
|
+
[["服务器", "伺服器"], ["文件", "檔案"]],
|
|
141
|
+
"網路 网络 | 檔案 文件"
|
|
142
|
+
]
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
const result = await converter("服务器上的文件通过网络传输");
|
|
146
|
+
// Output: 伺服器上的檔案通過網路傳輸
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### OpenCC.CustomConverter() - Pure Custom Converter
|
|
150
|
+
|
|
151
|
+
```javascript
|
|
152
|
+
const converter = OpenCC.CustomConverter([
|
|
153
|
+
[""", "「"],
|
|
154
|
+
[""", "」"],
|
|
155
|
+
["'", "『"],
|
|
156
|
+
["'", "』"],
|
|
30
157
|
]);
|
|
31
|
-
|
|
32
|
-
|
|
158
|
+
|
|
159
|
+
const result = converter("这是"引号"和'单引号'");
|
|
160
|
+
// Output: 这是「引号」和『单引号』
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 💡 Usage Examples
|
|
164
|
+
|
|
165
|
+
### React
|
|
166
|
+
|
|
167
|
+
```jsx
|
|
168
|
+
import { useState } from 'react';
|
|
169
|
+
import OpenCC from 'opencc-wasm';
|
|
170
|
+
|
|
171
|
+
function App() {
|
|
172
|
+
const [output, setOutput] = useState('');
|
|
173
|
+
|
|
174
|
+
const handleConvert = async () => {
|
|
175
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
176
|
+
setOutput(await converter("简体中文"));
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
return (
|
|
180
|
+
<div>
|
|
181
|
+
<button onClick={handleConvert}>Convert</button>
|
|
182
|
+
<div>{output}</div>
|
|
183
|
+
</div>
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Vue 3
|
|
189
|
+
|
|
190
|
+
```vue
|
|
191
|
+
<script setup>
|
|
192
|
+
import { ref } from 'vue';
|
|
193
|
+
import OpenCC from 'opencc-wasm';
|
|
194
|
+
|
|
195
|
+
const output = ref('');
|
|
196
|
+
|
|
197
|
+
async function handleConvert() {
|
|
198
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
199
|
+
output.value = await converter("简体中文");
|
|
200
|
+
}
|
|
201
|
+
</script>
|
|
202
|
+
|
|
203
|
+
<template>
|
|
204
|
+
<button @click="handleConvert">Convert</button>
|
|
205
|
+
<div>{{ output }}</div>
|
|
206
|
+
</template>
|
|
33
207
|
```
|
|
34
208
|
|
|
35
|
-
### Node
|
|
36
|
-
|
|
37
|
-
|
|
209
|
+
### Node.js CLI
|
|
210
|
+
|
|
211
|
+
```javascript
|
|
212
|
+
#!/usr/bin/env node
|
|
213
|
+
import OpenCC from 'opencc-wasm';
|
|
214
|
+
|
|
215
|
+
const text = process.argv[2] || "简体中文";
|
|
216
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
217
|
+
console.log(await converter(text));
|
|
38
218
|
```
|
|
39
219
|
|
|
40
|
-
|
|
220
|
+
### Web Worker
|
|
41
221
|
|
|
42
|
-
|
|
222
|
+
```javascript
|
|
223
|
+
// worker.js
|
|
224
|
+
import OpenCC from 'opencc-wasm';
|
|
43
225
|
|
|
44
|
-
|
|
226
|
+
let converters = {};
|
|
227
|
+
|
|
228
|
+
self.onmessage = async (e) => {
|
|
229
|
+
const { config, text } = e.data;
|
|
230
|
+
|
|
231
|
+
if (!converters[config]) {
|
|
232
|
+
converters[config] = OpenCC.Converter({ config });
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const result = await converters[config](text);
|
|
236
|
+
self.postMessage(result);
|
|
237
|
+
};
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
```javascript
|
|
241
|
+
// main.js
|
|
242
|
+
const worker = new Worker('worker.js', { type: 'module' });
|
|
243
|
+
|
|
244
|
+
worker.onmessage = (e) => {
|
|
245
|
+
console.log('Result:', e.data);
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
worker.postMessage({ config: 's2tw', text: '简体中文' });
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 🔧 Best Practices
|
|
252
|
+
|
|
253
|
+
### ✅ Reuse Converter Instances
|
|
254
|
+
|
|
255
|
+
```javascript
|
|
256
|
+
// ✅ Good: Create once, use many times
|
|
257
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
258
|
+
|
|
259
|
+
for (const text of manyTexts) {
|
|
260
|
+
await converter(text); // Fast!
|
|
261
|
+
}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
```javascript
|
|
265
|
+
// ❌ Avoid: Creating new instances every time
|
|
266
|
+
for (const text of manyTexts) {
|
|
267
|
+
const converter = OpenCC.Converter({ config: "s2tw" }); // Slow!
|
|
268
|
+
await converter(text);
|
|
269
|
+
}
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Multiple Converters (Auto-cached)
|
|
273
|
+
|
|
274
|
+
```javascript
|
|
275
|
+
// Create multiple converters (resources auto-cached)
|
|
276
|
+
const s2t = OpenCC.Converter({ config: "s2t" });
|
|
277
|
+
const s2tw = OpenCC.Converter({ config: "s2tw" });
|
|
278
|
+
const t2s = OpenCC.Converter({ config: "t2s" });
|
|
279
|
+
|
|
280
|
+
// Use independently
|
|
281
|
+
console.log(await s2t("简体")); // 簡體
|
|
282
|
+
console.log(await s2tw("软件")); // 軟體
|
|
283
|
+
console.log(await t2s("繁體")); // 繁体
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### TypeScript
|
|
287
|
+
|
|
288
|
+
```typescript
|
|
289
|
+
import OpenCC from 'opencc-wasm';
|
|
290
|
+
|
|
291
|
+
type ConfigName = 's2t' | 's2tw' | 's2twp' | 't2s';
|
|
292
|
+
|
|
293
|
+
async function convert(config: ConfigName, text: string): Promise<string> {
|
|
294
|
+
const converter = OpenCC.Converter({ config });
|
|
295
|
+
return await converter(text);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const result = await convert('s2tw', '简体中文');
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## 🏗️ Build
|
|
302
|
+
|
|
303
|
+
The project uses a two-stage build process:
|
|
304
|
+
|
|
305
|
+
### Stage 1: Build WASM
|
|
45
306
|
|
|
46
307
|
```bash
|
|
47
308
|
./build.sh
|
|
48
309
|
```
|
|
49
310
|
|
|
50
|
-
Compiles OpenCC + marisa-trie to WASM
|
|
51
|
-
- `build/opencc-wasm.esm.js` - ESM WASM glue
|
|
52
|
-
- `build/opencc-wasm.cjs` - CJS WASM glue
|
|
311
|
+
Compiles OpenCC + marisa-trie to WASM, outputs to `build/`:
|
|
312
|
+
- `build/opencc-wasm.esm.js` - ESM WASM glue
|
|
313
|
+
- `build/opencc-wasm.cjs` - CJS WASM glue
|
|
53
314
|
- `build/opencc-wasm.wasm` - WASM binary
|
|
54
315
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
### Stage 2: Build API wrappers (publishable dist)
|
|
316
|
+
### Stage 2: Build API
|
|
58
317
|
|
|
59
318
|
```bash
|
|
60
319
|
node scripts/build-api.js
|
|
61
320
|
```
|
|
62
321
|
|
|
63
322
|
Generates publishable distribution in `dist/`:
|
|
64
|
-
- Copies WASM
|
|
65
|
-
- Transforms source
|
|
66
|
-
- Generates `dist/cjs/index.cjs` with CJS-compatible wrapper
|
|
323
|
+
- Copies WASM files to `dist/esm/` and `dist/cjs/`
|
|
324
|
+
- Transforms source to production paths
|
|
67
325
|
- Copies data files to `dist/data/`
|
|
68
326
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
### Complete build
|
|
327
|
+
### Complete Build
|
|
72
328
|
|
|
73
329
|
```bash
|
|
74
330
|
npm run build
|
|
@@ -76,54 +332,101 @@ npm run build
|
|
|
76
332
|
|
|
77
333
|
Runs both stages automatically.
|
|
78
334
|
|
|
79
|
-
## Testing
|
|
335
|
+
## 🧪 Testing
|
|
336
|
+
|
|
80
337
|
```bash
|
|
81
338
|
npm test
|
|
82
339
|
```
|
|
83
340
|
|
|
84
|
-
|
|
85
|
-
This ensures tests validate the actual build output, not stale dist files.
|
|
86
|
-
|
|
87
|
-
Runs the upstream OpenCC testcases (converted to JSON) against the WASM build.
|
|
341
|
+
Runs the upstream OpenCC test cases against the WASM build.
|
|
88
342
|
|
|
89
|
-
## Project Structure
|
|
343
|
+
## 📁 Project Structure
|
|
90
344
|
|
|
91
345
|
```
|
|
92
346
|
wasm-lib/
|
|
93
|
-
├── build/ ← Intermediate WASM artifacts (gitignored
|
|
94
|
-
|
|
95
|
-
│ ├── opencc-wasm.cjs
|
|
96
|
-
│ └── opencc-wasm.wasm
|
|
97
|
-
├── dist/ ← Publishable distribution (committed to git)
|
|
347
|
+
├── build/ ← Intermediate WASM artifacts (gitignored)
|
|
348
|
+
├── dist/ ← Publishable distribution (committed)
|
|
98
349
|
│ ├── esm/
|
|
99
350
|
│ │ ├── index.js
|
|
100
|
-
│ │
|
|
351
|
+
│ │ ├── opencc-wasm.js
|
|
352
|
+
│ │ └── opencc-wasm.wasm
|
|
101
353
|
│ ├── cjs/
|
|
102
354
|
│ │ ├── index.cjs
|
|
103
|
-
│ │
|
|
104
|
-
│
|
|
105
|
-
│ └── data/ ← OpenCC
|
|
106
|
-
├── index.js ← Source API
|
|
355
|
+
│ │ ├── opencc-wasm.cjs
|
|
356
|
+
│ │ └── opencc-wasm.wasm
|
|
357
|
+
│ └── data/ ← OpenCC configs + dicts
|
|
358
|
+
├── index.js ← Source API
|
|
107
359
|
├── index.d.ts ← TypeScript definitions
|
|
108
360
|
└── scripts/
|
|
109
|
-
└── build-api.js ←
|
|
361
|
+
└── build-api.js ← Build script
|
|
110
362
|
```
|
|
111
363
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
-
|
|
115
|
-
|
|
364
|
+
## ❓ FAQ
|
|
365
|
+
|
|
366
|
+
**Q: Do configs and dicts auto-load or do I need to download them?**
|
|
367
|
+
|
|
368
|
+
A: Auto-load! The high-level API (`OpenCC.Converter()`) automatically downloads everything from CDN.
|
|
369
|
+
|
|
370
|
+
**Q: Does it re-download every time?**
|
|
371
|
+
|
|
372
|
+
A: No! Resources are cached after first load.
|
|
373
|
+
|
|
374
|
+
**Q: Works offline?**
|
|
375
|
+
|
|
376
|
+
A: Yes! If installed via npm, all resources are bundled. For browsers, use Service Worker for offline caching.
|
|
377
|
+
|
|
378
|
+
**Q: Which method to use: `config` or `from`/`to`?**
|
|
379
|
+
|
|
380
|
+
A: Both work identically. Use `config` if you know OpenCC config names, or `from`/`to` for locale-based approach.
|
|
381
|
+
|
|
382
|
+
**Q: Why is the first conversion slow?**
|
|
383
|
+
|
|
384
|
+
A: Initial load downloads configs + dicts (~1-2MB). Subsequent conversions are fast (cached).
|
|
385
|
+
|
|
386
|
+
## 📝 Notes
|
|
387
|
+
|
|
388
|
+
- Uses persistent OpenCC handles to avoid reloading configs
|
|
389
|
+
- Dictionaries stored in `/data/dict/` in virtual FS
|
|
390
|
+
- Memory grows on demand (`ALLOW_MEMORY_GROWTH=1`)
|
|
391
|
+
- Performance: Focuses on fidelity and compatibility with official OpenCC. May be slower than pure-JS implementations for raw throughput, but guarantees full OpenCC behavior.
|
|
392
|
+
|
|
393
|
+
## 📜 Changelog
|
|
394
|
+
|
|
395
|
+
### 0.4.0 - 2026-01-04
|
|
396
|
+
|
|
397
|
+
**Added:**
|
|
398
|
+
- `config` parameter in `Converter()` for direct OpenCC config names
|
|
399
|
+
- New CN Government Standard conversions: `t2cngov`, `t2cngov_keep_simp`
|
|
400
|
+
- New demo page and regression tests for new configs
|
|
401
|
+
|
|
402
|
+
**Fixed:**
|
|
403
|
+
- s2twp duplication bug (issue #950)
|
|
404
|
+
- tw2sp `方程式` conversion regression and dictionary sync
|
|
405
|
+
- Missing cngov configs/dicts in wasm-lib distribution
|
|
406
|
+
|
|
407
|
+
### 0.3.0 - 2026-01-03
|
|
408
|
+
|
|
409
|
+
**🚨 BREAKING: New Distribution Layout**
|
|
410
|
+
|
|
411
|
+
`.wasm` files moved to be co-located with glue code:
|
|
412
|
+
- `dist/esm/opencc-wasm.wasm` (was: `dist/opencc-wasm.esm.wasm`)
|
|
413
|
+
- `dist/cjs/opencc-wasm.wasm` (was: `dist/opencc-wasm.cjs.wasm`)
|
|
414
|
+
|
|
415
|
+
**Added:**
|
|
416
|
+
- CDN support for direct browser usage
|
|
417
|
+
- Comprehensive test suite
|
|
418
|
+
- Auto-loading of configs and dictionaries
|
|
419
|
+
|
|
420
|
+
### 0.2.1
|
|
421
|
+
|
|
422
|
+
- Ship both wasm filenames for compatibility
|
|
423
|
+
|
|
424
|
+
### 0.2.0
|
|
116
425
|
|
|
117
|
-
|
|
118
|
-
-
|
|
119
|
-
-
|
|
120
|
-
- Memory grows on demand (`ALLOW_MEMORY_GROWTH=1`); no native dependencies needed.
|
|
121
|
-
- Performance note: opencc-wasm focuses on fidelity and compatibility (uses official configs and `.ocd2`, matches Node OpenCC output 1:1). Raw throughput can be slower than pure JS implementations like `opencc-js`, but the WASM version guarantees full OpenCC behavior and config coverage.
|
|
426
|
+
- Rebuilt from OpenCC commit [`36c7cbbc`](https://github.com/frankslin/OpenCC/commit/36c7cbbc9702d2a46a89ea7a55ff8ba5656455df)
|
|
427
|
+
- New dist layout with ESM/CJS separation
|
|
428
|
+
- Tests rewritten using `node:test`
|
|
122
429
|
|
|
123
|
-
|
|
124
|
-
- Ship both wasm filenames (`opencc-wasm.wasm` and `opencc-wasm.esm.wasm`) in `dist/` so either glue name resolves without patches; glues remain at `dist/esm/opencc-wasm.js` and `dist/cjs/opencc-wasm.cjs`.
|
|
430
|
+
---
|
|
125
431
|
|
|
126
|
-
|
|
127
|
-
- Conversion rules and bundled dictionaries are rebuilt from OpenCC commit [`36c7cbbc`](https://github.com/frankslin/OpenCC/commit/36c7cbbc9702d2a46a89ea7a55ff8ba5656455df). This aligns the WASM build with the upstream configs in that revision (including updated `.ocd2` data).
|
|
128
|
-
- Output layout now mirrors the new `dist/` structure: ESM glue under `dist/esm/`, CJS glue under `dist/cjs/`, shared `opencc-wasm.wasm` at `dist/opencc-wasm.wasm`, and configs/dicts in `dist/data/`. Adjust your bundler/static hosting paths accordingly.
|
|
129
|
-
- Tests are rewritten to use `node:test` with data-driven cases (`test/testcases.json`) instead of ad-hoc assertions, keeping coverage aligned with upstream OpenCC fixtures.
|
|
432
|
+
**Made with ❤️ for the Chinese NLP community**
|
package/README.zh.md
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
# opencc-wasm
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/opencc-wasm)
|
|
4
|
+
[](https://cdn.jsdelivr.net/npm/opencc-wasm@latest/dist/esm/index.js)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
[English](README.md)
|
|
8
|
+
|
|
9
|
+
> 🚀 **開箱即用的中文簡繁轉換程式庫** - 3 行程式碼搞定,自動從 CDN 載入設定和字典!
|
|
10
|
+
|
|
11
|
+
OpenCC(Open Chinese Convert)的 WebAssembly 移植版本,完全相容原版 API。內建官方 OpenCC C++ 核心(透過 Emscripten 編譯),以及所有官方設定檔和預先建置的 `.ocd2` 字典檔。
|
|
12
|
+
|
|
13
|
+
**授權條款:** Apache-2.0
|
|
14
|
+
|
|
15
|
+
## ✨ 特色功能
|
|
16
|
+
|
|
17
|
+
- 🎯 **零設定** - 自動從 CDN 載入所有設定檔和字典檔
|
|
18
|
+
- 🔥 **3 行開始** - 最簡單的 API,匯入即用
|
|
19
|
+
- 🌐 **CDN 就緒** - 可直接從 jsDelivr/unpkg 使用,無需打包工具
|
|
20
|
+
- 📦 **一應俱全** - 包含所有 14+ 種官方轉換類型
|
|
21
|
+
- ⚡ **自動快取** - 資源首次載入後自動快取
|
|
22
|
+
- 🔧 **完全相容** - 相容 `opencc-js` API
|
|
23
|
+
- 🚫 **無需原生綁定** - 純 WASM,跨平台
|
|
24
|
+
- 💻 **通用支援** - 支援 Node.js、瀏覽器、Deno 等環境
|
|
25
|
+
|
|
26
|
+
## 🚀 快速開始
|
|
27
|
+
|
|
28
|
+
### 瀏覽器(CDN - 零安裝!)
|
|
29
|
+
|
|
30
|
+
```html
|
|
31
|
+
<script type="module">
|
|
32
|
+
// 1. 從 CDN 匯入
|
|
33
|
+
import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.0/dist/esm/index.js";
|
|
34
|
+
|
|
35
|
+
// 2. 建立轉換器(自動下載所有資源!)
|
|
36
|
+
const converter = OpenCC.Converter({ from: "cn", to: "tw" });
|
|
37
|
+
|
|
38
|
+
// 3. 轉換 - 完成!
|
|
39
|
+
const result = await converter("简体中文");
|
|
40
|
+
console.log(result); // 簡體中文
|
|
41
|
+
</script>
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**就是這麼簡單!** 所有設定檔和字典檔都會自動從 CDN 下載。
|
|
45
|
+
|
|
46
|
+
### CDN(Converter API)
|
|
47
|
+
|
|
48
|
+
```javascript
|
|
49
|
+
import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.0/dist/esm/index.js";
|
|
50
|
+
|
|
51
|
+
const converter = OpenCC.Converter({ from: "cn", to: "t" });
|
|
52
|
+
const result = await converter("简体中文");
|
|
53
|
+
console.log(result); // 繁體中文
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
範例來源:`test/cdn-simple.mjs`
|
|
57
|
+
|
|
58
|
+
### Node.js(NPM)
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
npm install opencc-wasm
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```javascript
|
|
65
|
+
import OpenCC from "opencc-wasm";
|
|
66
|
+
|
|
67
|
+
const converter = OpenCC.Converter({ from: "cn", to: "tw" });
|
|
68
|
+
const result = await converter("简体中文");
|
|
69
|
+
console.log(result); // 簡體中文
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## 📖 API 參考
|
|
73
|
+
|
|
74
|
+
### OpenCC.Converter() - 建立轉換器
|
|
75
|
+
|
|
76
|
+
兩種方式指定轉換:
|
|
77
|
+
|
|
78
|
+
#### 方式 1:使用 `config` 參數(推薦)
|
|
79
|
+
|
|
80
|
+
直接指定 OpenCC 設定檔名稱:
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
// 簡體 → 繁體(台灣慣用詞)
|
|
84
|
+
const converter = OpenCC.Converter({ config: "s2twp" });
|
|
85
|
+
const result = await converter("服务器软件"); // 伺服器軟體
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**支援的設定檔:**
|
|
89
|
+
|
|
90
|
+
| 設定檔 | 說明 | 範例 |
|
|
91
|
+
|--------|------|------|
|
|
92
|
+
| `s2t` | 簡體 → 繁體 | 简体 → 簡體 |
|
|
93
|
+
| `s2tw` | 簡體 → 台灣正體 | 软件 → 軟件 |
|
|
94
|
+
| `s2twp` | 簡體 → 台灣正體(慣用詞) | 软件 → 軟體 |
|
|
95
|
+
| `s2hk` | 簡體 → 香港繁體 | 打印机 → 打印機 |
|
|
96
|
+
| `t2s` | 繁體 → 簡體 | 繁體 → 繁体 |
|
|
97
|
+
| `t2tw` | 繁體 → 台灣正體 | 台灣 → 臺灣 |
|
|
98
|
+
| `t2hk` | 繁體 → 香港繁體 | 香港 → 香港 |
|
|
99
|
+
| `t2jp` | 繁體 → 日文新字體 | 繁體 → 繁体 |
|
|
100
|
+
| `tw2s` | 台灣 → 簡體 | 軟體 → 软件 |
|
|
101
|
+
| `tw2sp` | 台灣 → 簡體(慣用詞) | 滑鼠 → 鼠标 |
|
|
102
|
+
| `tw2t` | 台灣 → 繁體 | 臺灣 → 台灣 |
|
|
103
|
+
| `hk2s` | 香港 → 簡體 | 打印機 → 打印机 |
|
|
104
|
+
| `hk2t` | 香港 → 繁體 | 香港 → 香港 |
|
|
105
|
+
| `jp2t` | 日文新字體 → 繁體 | 繁体 → 繁體 |
|
|
106
|
+
| `t2cngov` | 繁體 → 大陸政府標準繁體 | 潮溼 → 潮湿 |
|
|
107
|
+
| `t2cngov_keep_simp` | 繁體 → 大陸政府標準(保留簡體) | 简体繁體 → 简体繁體 |
|
|
108
|
+
|
|
109
|
+
#### 方式 2:使用 `from`/`to` 參數(傳統)
|
|
110
|
+
|
|
111
|
+
指定來源和目標語系:
|
|
112
|
+
|
|
113
|
+
```javascript
|
|
114
|
+
const converter = OpenCC.Converter({ from: "cn", to: "twp" });
|
|
115
|
+
const result = await converter("服务器"); // 伺服器
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**語系代碼:**
|
|
119
|
+
|
|
120
|
+
| 代碼 | 說明 |
|
|
121
|
+
|------|------|
|
|
122
|
+
| `cn` | 簡體中文(中國大陸) |
|
|
123
|
+
| `tw` | 繁體中文(台灣) |
|
|
124
|
+
| `twp` | 台灣正體(含慣用詞) |
|
|
125
|
+
| `hk` | 繁體中文(香港) |
|
|
126
|
+
| `t` | 繁體中文(通用) |
|
|
127
|
+
| `s` | 簡體中文(別名) |
|
|
128
|
+
| `sp` | 簡體(含慣用詞) |
|
|
129
|
+
| `jp` | 日文新字體 |
|
|
130
|
+
|
|
131
|
+
**兩種方式功能完全相同!** 選擇您喜歡的即可。
|
|
132
|
+
|
|
133
|
+
### OpenCC.ConverterFactory() - 含自訂字典的轉換器
|
|
134
|
+
|
|
135
|
+
```javascript
|
|
136
|
+
const converter = OpenCC.ConverterFactory(
|
|
137
|
+
"cn", // from
|
|
138
|
+
"tw", // to
|
|
139
|
+
[ // 自訂字典
|
|
140
|
+
[["服务器", "伺服器"], ["文件", "檔案"]],
|
|
141
|
+
"網路 网络 | 檔案 文件"
|
|
142
|
+
]
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
const result = await converter("服务器上的文件通过网络传输");
|
|
146
|
+
// 輸出:伺服器上的檔案通過網路傳輸
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### OpenCC.CustomConverter() - 純自訂轉換器
|
|
150
|
+
|
|
151
|
+
```javascript
|
|
152
|
+
const converter = OpenCC.CustomConverter([
|
|
153
|
+
[""", "「"],
|
|
154
|
+
[""", "」"],
|
|
155
|
+
["'", "『"],
|
|
156
|
+
["'", "』"],
|
|
157
|
+
]);
|
|
158
|
+
|
|
159
|
+
const result = converter("这是"引号"和'单引号'");
|
|
160
|
+
// 輸出:这是「引号」和『单引号』
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 💡 使用範例
|
|
164
|
+
|
|
165
|
+
### React
|
|
166
|
+
|
|
167
|
+
```jsx
|
|
168
|
+
import { useState } from 'react';
|
|
169
|
+
import OpenCC from 'opencc-wasm';
|
|
170
|
+
|
|
171
|
+
function App() {
|
|
172
|
+
const [output, setOutput] = useState('');
|
|
173
|
+
|
|
174
|
+
const handleConvert = async () => {
|
|
175
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
176
|
+
setOutput(await converter("简体中文"));
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
return (
|
|
180
|
+
<div>
|
|
181
|
+
<button onClick={handleConvert}>轉換</button>
|
|
182
|
+
<div>{output}</div>
|
|
183
|
+
</div>
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Vue 3
|
|
189
|
+
|
|
190
|
+
```vue
|
|
191
|
+
<script setup>
|
|
192
|
+
import { ref } from 'vue';
|
|
193
|
+
import OpenCC from 'opencc-wasm';
|
|
194
|
+
|
|
195
|
+
const output = ref('');
|
|
196
|
+
|
|
197
|
+
async function handleConvert() {
|
|
198
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
199
|
+
output.value = await converter("简体中文");
|
|
200
|
+
}
|
|
201
|
+
</script>
|
|
202
|
+
|
|
203
|
+
<template>
|
|
204
|
+
<button @click="handleConvert">轉換</button>
|
|
205
|
+
<div>{{ output }}</div>
|
|
206
|
+
</template>
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Node.js CLI 工具
|
|
210
|
+
|
|
211
|
+
```javascript
|
|
212
|
+
#!/usr/bin/env node
|
|
213
|
+
import OpenCC from 'opencc-wasm';
|
|
214
|
+
|
|
215
|
+
const text = process.argv[2] || "简体中文";
|
|
216
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
217
|
+
console.log(await converter(text));
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Web Worker
|
|
221
|
+
|
|
222
|
+
```javascript
|
|
223
|
+
// worker.js
|
|
224
|
+
import OpenCC from 'opencc-wasm';
|
|
225
|
+
|
|
226
|
+
let converters = {};
|
|
227
|
+
|
|
228
|
+
self.onmessage = async (e) => {
|
|
229
|
+
const { config, text } = e.data;
|
|
230
|
+
|
|
231
|
+
if (!converters[config]) {
|
|
232
|
+
converters[config] = OpenCC.Converter({ config });
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const result = await converters[config](text);
|
|
236
|
+
self.postMessage(result);
|
|
237
|
+
};
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
```javascript
|
|
241
|
+
// main.js
|
|
242
|
+
const worker = new Worker('worker.js', { type: 'module' });
|
|
243
|
+
|
|
244
|
+
worker.onmessage = (e) => {
|
|
245
|
+
console.log('結果:', e.data);
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
worker.postMessage({ config: 's2tw', text: '简体中文' });
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 🔧 最佳實務
|
|
252
|
+
|
|
253
|
+
### ✅ 重複使用轉換器實例
|
|
254
|
+
|
|
255
|
+
```javascript
|
|
256
|
+
// ✅ 好:建立一次,多次使用
|
|
257
|
+
const converter = OpenCC.Converter({ config: "s2tw" });
|
|
258
|
+
|
|
259
|
+
for (const text of manyTexts) {
|
|
260
|
+
await converter(text); // 快!
|
|
261
|
+
}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
```javascript
|
|
265
|
+
// ❌ 避免:每次都建立新實例
|
|
266
|
+
for (const text of manyTexts) {
|
|
267
|
+
const converter = OpenCC.Converter({ config: "s2tw" }); // 慢!
|
|
268
|
+
await converter(text);
|
|
269
|
+
}
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### 多個轉換器(自動快取)
|
|
273
|
+
|
|
274
|
+
```javascript
|
|
275
|
+
// 建立多個轉換器(資源自動快取)
|
|
276
|
+
const s2t = OpenCC.Converter({ config: "s2t" });
|
|
277
|
+
const s2tw = OpenCC.Converter({ config: "s2tw" });
|
|
278
|
+
const t2s = OpenCC.Converter({ config: "t2s" });
|
|
279
|
+
|
|
280
|
+
// 獨立使用
|
|
281
|
+
console.log(await s2t("简体")); // 簡體
|
|
282
|
+
console.log(await s2tw("软件")); // 軟體
|
|
283
|
+
console.log(await t2s("繁體")); // 繁体
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### TypeScript
|
|
287
|
+
|
|
288
|
+
```typescript
|
|
289
|
+
import OpenCC from 'opencc-wasm';
|
|
290
|
+
|
|
291
|
+
type ConfigName = 's2t' | 's2tw' | 's2twp' | 't2s';
|
|
292
|
+
|
|
293
|
+
async function convert(config: ConfigName, text: string): Promise<string> {
|
|
294
|
+
const converter = OpenCC.Converter({ config });
|
|
295
|
+
return await converter(text);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const result = await convert('s2tw', '简体中文');
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## 🏗️ 建置
|
|
302
|
+
|
|
303
|
+
專案使用兩階段建置流程:
|
|
304
|
+
|
|
305
|
+
### 階段 1:建置 WASM
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
./build.sh
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
編譯 OpenCC + marisa-trie 成 WASM,輸出至 `build/`:
|
|
312
|
+
- `build/opencc-wasm.esm.js` - ESM WASM 膠合程式
|
|
313
|
+
- `build/opencc-wasm.cjs` - CJS WASM 膠合程式
|
|
314
|
+
- `build/opencc-wasm.wasm` - WASM 二進位檔
|
|
315
|
+
|
|
316
|
+
### 階段 2:建置 API
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
node scripts/build-api.js
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
產生可發佈的發行版至 `dist/`:
|
|
323
|
+
- 複製 WASM 檔案至 `dist/esm/` 和 `dist/cjs/`
|
|
324
|
+
- 轉換原始碼至生產環境路徑
|
|
325
|
+
- 複製資料檔至 `dist/data/`
|
|
326
|
+
|
|
327
|
+
### 完整建置
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
npm run build
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
自動執行兩個階段。
|
|
334
|
+
|
|
335
|
+
## 🧪 測試
|
|
336
|
+
|
|
337
|
+
```bash
|
|
338
|
+
npm test
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
執行上游 OpenCC 測試案例來驗證 WASM 建置。
|
|
342
|
+
|
|
343
|
+
## 📁 專案結構
|
|
344
|
+
|
|
345
|
+
```
|
|
346
|
+
wasm-lib/
|
|
347
|
+
├── build/ ← 中間產物(gitignored)
|
|
348
|
+
├── dist/ ← 可發佈版本(已提交)
|
|
349
|
+
│ ├── esm/
|
|
350
|
+
│ │ ├── index.js
|
|
351
|
+
│ │ ├── opencc-wasm.js
|
|
352
|
+
│ │ └── opencc-wasm.wasm
|
|
353
|
+
│ ├── cjs/
|
|
354
|
+
│ │ ├── index.cjs
|
|
355
|
+
│ │ ├── opencc-wasm.cjs
|
|
356
|
+
│ │ └── opencc-wasm.wasm
|
|
357
|
+
│ └── data/ ← OpenCC 設定檔 + 字典
|
|
358
|
+
├── index.js ← 原始碼 API
|
|
359
|
+
├── index.d.ts ← TypeScript 型別定義
|
|
360
|
+
└── scripts/
|
|
361
|
+
└── build-api.js ← 建置腳本
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
## ❓ 常見問題
|
|
365
|
+
|
|
366
|
+
**Q:設定檔和字典會自動載入嗎?還是需要我手動下載?**
|
|
367
|
+
|
|
368
|
+
A:自動載入!高階 API(`OpenCC.Converter()`)會自動從 CDN 下載所有需要的檔案。
|
|
369
|
+
|
|
370
|
+
**Q:每次轉換都會重新下載嗎?**
|
|
371
|
+
|
|
372
|
+
A:不會!資源在首次載入後會快取起來。
|
|
373
|
+
|
|
374
|
+
**Q:可以離線使用嗎?**
|
|
375
|
+
|
|
376
|
+
A:可以!透過 npm 安裝時,所有資源都已包含在套件中。瀏覽器環境可使用 Service Worker 實現離線快取。
|
|
377
|
+
|
|
378
|
+
**Q:應該用 `config` 還是 `from`/`to` 參數?**
|
|
379
|
+
|
|
380
|
+
A:兩者功能完全相同。如果您熟悉 OpenCC 設定檔名稱,用 `config`;若偏好語系導向的方式,用 `from`/`to`。
|
|
381
|
+
|
|
382
|
+
**Q:為什麼第一次轉換比較慢?**
|
|
383
|
+
|
|
384
|
+
A:首次載入需要下載設定檔和字典檔(約 1-2MB)。後續轉換會很快(已快取)。
|
|
385
|
+
|
|
386
|
+
## 📝 注意事項
|
|
387
|
+
|
|
388
|
+
- 使用持久的 OpenCC 控制代碼避免重複載入設定
|
|
389
|
+
- 字典儲存在虛擬檔案系統的 `/data/dict/` 中
|
|
390
|
+
- 記憶體按需成長(`ALLOW_MEMORY_GROWTH=1`)
|
|
391
|
+
- 效能:專注於精確度和與官方 OpenCC 的相容性。原始吞吐量可能比純 JavaScript 實作慢,但保證完整的 OpenCC 行為。
|
|
392
|
+
|
|
393
|
+
## 📜 變更歷史
|
|
394
|
+
|
|
395
|
+
### 0.4.0 - 2026-01-04
|
|
396
|
+
|
|
397
|
+
**新增:**
|
|
398
|
+
- `Converter()` 新增 `config` 參數,可直接使用 OpenCC 設定檔名稱
|
|
399
|
+
- 新增中國政府規範字轉換:`t2cngov`、`t2cngov_keep_simp`
|
|
400
|
+
- 新增示範頁與回歸測試,涵蓋新設定
|
|
401
|
+
|
|
402
|
+
**修正:**
|
|
403
|
+
- 修復 s2twp 重複字元問題(issue #950)
|
|
404
|
+
- 修正 tw2sp `方程式` 轉換錯誤並同步字典
|
|
405
|
+
- 補齊 wasm-lib 發行包中的 cngov 設定與字典
|
|
406
|
+
|
|
407
|
+
### 0.3.0 - 2026-01-03
|
|
408
|
+
|
|
409
|
+
**🚨 重大變更:新的發行版佈局**
|
|
410
|
+
|
|
411
|
+
`.wasm` 檔案已移至與膠合程式同目錄:
|
|
412
|
+
- `dist/esm/opencc-wasm.wasm`(舊:`dist/opencc-wasm.esm.wasm`)
|
|
413
|
+
- `dist/cjs/opencc-wasm.wasm`(舊:`dist/opencc-wasm.cjs.wasm`)
|
|
414
|
+
|
|
415
|
+
**新增:**
|
|
416
|
+
- CDN 支援,可直接在瀏覽器中使用
|
|
417
|
+
- 完整測試套件
|
|
418
|
+
- 自動載入設定檔和字典檔
|
|
419
|
+
|
|
420
|
+
### 0.2.1
|
|
421
|
+
|
|
422
|
+
- 提供兩種 wasm 檔名以相容性
|
|
423
|
+
|
|
424
|
+
### 0.2.0
|
|
425
|
+
|
|
426
|
+
- 從 OpenCC commit [`36c7cbbc`](https://github.com/frankslin/OpenCC/commit/36c7cbbc9702d2a46a89ea7a55ff8ba5656455df) 重新建置
|
|
427
|
+
- 新的 dist 佈局,ESM/CJS 分離
|
|
428
|
+
- 測試改用 `node:test` 重寫
|
|
429
|
+
|
|
430
|
+
---
|
|
431
|
+
|
|
432
|
+
**用 ❤️ 為中文 NLP 社群打造**
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Traditional Chinese to CN Government Standard",
|
|
3
|
+
"author": "TerryTian-tech",
|
|
4
|
+
"license": "Apache License 2.0",
|
|
5
|
+
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
+
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
+
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
+
"description": "Converts traditional Chinese (from various standards) to China's government standard traditional characters. Includes simplified-to-standard conversion for mixed documents.",
|
|
9
|
+
|
|
10
|
+
"segmentation": {
|
|
11
|
+
"type": "mmseg",
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "ocd2",
|
|
14
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"conversion_chain": [{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [{
|
|
21
|
+
"type": "ocd2",
|
|
22
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
23
|
+
}, {
|
|
24
|
+
"type": "ocd2",
|
|
25
|
+
"file": "cngov/TGCharacters.ocd2"
|
|
26
|
+
}]
|
|
27
|
+
}
|
|
28
|
+
}]
|
|
29
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Traditional Chinese to CN Government Standard (Keep Simplified)",
|
|
3
|
+
"author": "TerryTian-tech",
|
|
4
|
+
"license": "Apache License 2.0",
|
|
5
|
+
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
+
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
+
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
+
"description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
|
|
9
|
+
|
|
10
|
+
"segmentation": {
|
|
11
|
+
"type": "mmseg",
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "ocd2",
|
|
14
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"conversion_chain": [{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [{
|
|
21
|
+
"type": "ocd2",
|
|
22
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
23
|
+
}, {
|
|
24
|
+
"type": "ocd2",
|
|
25
|
+
"file": "cngov/TGCharacters_keep_simp.ocd2"
|
|
26
|
+
}]
|
|
27
|
+
}
|
|
28
|
+
}]
|
|
29
|
+
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/dist/esm/index.js
CHANGED
|
@@ -29,11 +29,37 @@ const readFileBuffer = (url) => {
|
|
|
29
29
|
|
|
30
30
|
// 预设映射:from -> to -> config 文件名
|
|
31
31
|
const CONFIG_MAP = {
|
|
32
|
-
cn: {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
cn: {
|
|
33
|
+
t: "s2t.json",
|
|
34
|
+
tw: "s2tw.json",
|
|
35
|
+
twp: "s2twp.json", // 台湾惯用词
|
|
36
|
+
hk: "s2hk.json",
|
|
37
|
+
cn: null
|
|
38
|
+
},
|
|
39
|
+
tw: {
|
|
40
|
+
cn: "tw2s.json",
|
|
41
|
+
s: "tw2s.json", // 别名
|
|
42
|
+
sp: "tw2sp.json", // 简体惯用词
|
|
43
|
+
t: "tw2t.json",
|
|
44
|
+
tw: null
|
|
45
|
+
},
|
|
46
|
+
hk: {
|
|
47
|
+
cn: "hk2s.json",
|
|
48
|
+
s: "hk2s.json", // 别名
|
|
49
|
+
t: "hk2t.json",
|
|
50
|
+
hk: null
|
|
51
|
+
},
|
|
52
|
+
t: {
|
|
53
|
+
cn: "t2s.json",
|
|
54
|
+
s: "t2s.json", // 别名
|
|
55
|
+
tw: "t2tw.json",
|
|
56
|
+
hk: "t2hk.json",
|
|
57
|
+
jp: "t2jp.json",
|
|
58
|
+
t: null
|
|
59
|
+
},
|
|
60
|
+
jp: {
|
|
61
|
+
t: "jp2t.json"
|
|
62
|
+
},
|
|
37
63
|
};
|
|
38
64
|
|
|
39
65
|
// 缓存已加载的配置/字典与打开的句柄,避免重复加载和重复构建
|
|
@@ -158,7 +184,19 @@ function resolveConfig(from, to) {
|
|
|
158
184
|
}
|
|
159
185
|
|
|
160
186
|
function createConverter({ from, to, config }) {
|
|
161
|
-
|
|
187
|
+
// Support direct config name (e.g., "s2twp.json" or "s2twp")
|
|
188
|
+
let configName;
|
|
189
|
+
|
|
190
|
+
if (config) {
|
|
191
|
+
// Direct config parameter takes priority
|
|
192
|
+
configName = config.endsWith('.json') ? config : `${config}.json`;
|
|
193
|
+
} else if (from && to) {
|
|
194
|
+
// Legacy from/to parameters
|
|
195
|
+
configName = resolveConfig(from, to);
|
|
196
|
+
} else {
|
|
197
|
+
throw new Error('Either "config" or both "from" and "to" must be specified');
|
|
198
|
+
}
|
|
199
|
+
|
|
162
200
|
return async (text) => {
|
|
163
201
|
if (configName === null) return text; // no-op
|
|
164
202
|
const handle = await ensureConfig(configName);
|