opencc-wasm 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +50 -5
  2. package/README.zh.md +51 -6
  3. package/dist/cjs/index.cjs +9 -3
  4. package/dist/cjs/opencc-wasm.cjs +1 -1
  5. package/dist/cjs/opencc-wasm.wasm +0 -0
  6. package/dist/data/config/hk2s.json +30 -26
  7. package/dist/data/config/hk2sp.json +38 -0
  8. package/dist/data/config/hk2sp_jieba.json +61 -0
  9. package/dist/data/config/hk2t.json +14 -18
  10. package/dist/data/config/jp2t.json +15 -22
  11. package/dist/data/config/opencc_config.schema.json +45 -94
  12. package/dist/data/config/s2hk.json +37 -18
  13. package/dist/data/config/s2hk_jieba.json +51 -16
  14. package/dist/data/config/s2hkp.json +47 -0
  15. package/dist/data/config/s2hkp_jieba.json +66 -0
  16. package/dist/data/config/s2t.json +22 -19
  17. package/dist/data/config/s2t_cngov.json +16 -18
  18. package/dist/data/config/s2t_jieba.json +35 -11
  19. package/dist/data/config/s2tw.json +38 -19
  20. package/dist/data/config/s2tw_jieba.json +51 -16
  21. package/dist/data/config/s2twp.json +39 -24
  22. package/dist/data/config/s2twp_jieba.json +55 -21
  23. package/dist/data/config/t2cngov.json +16 -18
  24. package/dist/data/config/t2cngov_keep_simp.json +16 -18
  25. package/dist/data/config/t2hk.json +15 -13
  26. package/dist/data/config/t2jp.json +7 -14
  27. package/dist/data/config/t2s.json +20 -19
  28. package/dist/data/config/t2s_cngov.json +16 -18
  29. package/dist/data/config/t2tw.json +15 -13
  30. package/dist/data/config/tw2s.json +31 -27
  31. package/dist/data/config/tw2sp.json +32 -30
  32. package/dist/data/config/tw2sp_jieba.json +50 -25
  33. package/dist/data/config/tw2t.json +14 -18
  34. package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
  35. package/dist/data/dict/HKPhrases.ocd2 +0 -0
  36. package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
  37. package/dist/data/dict/HKVariants.ocd2 +0 -0
  38. package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
  39. package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
  40. package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
  41. package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
  42. package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
  43. package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
  44. package/dist/data/dict/STCharacters.ocd2 +0 -0
  45. package/dist/data/dict/STPhrases.ocd2 +0 -0
  46. package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
  47. package/dist/data/dict/TSCharacters.ocd2 +0 -0
  48. package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
  49. package/dist/data/dict/TSPhrases.ocd2 +0 -0
  50. package/dist/data/dict/TWPhrases.ocd2 +0 -0
  51. package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
  52. package/dist/data/dict/TWVariants.ocd2 +0 -0
  53. package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
  54. package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
  55. package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
  56. package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
  57. package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
  58. package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
  59. package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
  60. package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
  61. package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
  62. package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
  63. package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
  64. package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
  65. package/dist/data/jieba_dict/user.dict.utf8 +2 -1
  66. package/dist/esm/index.js +8 -0
  67. package/dist/esm/opencc-wasm.js +1 -1
  68. package/dist/esm/opencc-wasm.wasm +0 -0
  69. package/dist/opencc-wasm.wasm +0 -0
  70. package/index.d.ts +1 -0
  71. package/package.json +1 -1
  72. package/dist/data/dict/JPVariants.ocd2 +0 -0
  73. package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
  74. package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
  75. package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  > 🚀 **Out-of-the-box Chinese text conversion library** - 3 lines of code, auto-loads configs and dictionaries from CDN!
10
10
 
11
- WebAssembly port of OpenCC (Open Chinese Convert) with full API compatibility. Bundles the official OpenCC C++ core compiled via Emscripten, plus all official configs and prebuilt `.ocd2` dictionaries.
11
+ WebAssembly port of OpenCC (Open Chinese Convert) with full API compatibility. Bundles the official OpenCC C++ core compiled via Emscripten, official configs, bundled CN Government Standard configs, Jieba-backed configs, and prebuilt `.ocd2` dictionaries.
12
12
 
13
13
  **License:** Apache-2.0
14
14
 
@@ -17,7 +17,7 @@ WebAssembly port of OpenCC (Open Chinese Convert) with full API compatibility. B
17
17
  - 🎯 **Zero Configuration** - Auto-loads all configs and dictionaries from CDN
18
18
  - 🔥 **3 Lines to Start** - Simplest API, just import and use
19
19
  - 🌐 **CDN Ready** - Use directly from jsDelivr/unpkg without bundler
20
- - 📦 **All-in-One** - Includes all 14+ official conversion types
20
+ - 📦 **All-in-One** - Includes official conversion types, Jieba variants, and bundled CN Government Standard conversions
21
21
  - ⚡ **Auto Caching** - Resources cached after first load
22
22
  - 🔧 **Full Compatibility** - Compatible with `opencc-js` API
23
23
  - 🚫 **No Native Bindings** - Pure WASM, cross-platform
@@ -30,7 +30,7 @@ WebAssembly port of OpenCC (Open Chinese Convert) with full API compatibility. B
30
30
  ```html
31
31
  <script type="module">
32
32
  // 1. Import from CDN
33
- import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.1/dist/esm/index.js";
33
+ import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.10.0/dist/esm/index.js";
34
34
 
35
35
  // 2. Create converter (auto-downloads everything!)
36
36
  const converter = OpenCC.Converter({ config: "s2twp" });
@@ -98,24 +98,45 @@ console.log(await keepSimp("測試简体混繁體")); // 測試简体混繁體
98
98
  | Config | Description | Example |
99
99
  |--------|-------------|---------|
100
100
  | `s2twp` | Simplified → Taiwan Traditional (with regional phrases) | 软件 → 軟體 |
101
+ | `s2twp_jieba` | Simplified → Taiwan Traditional (with phrases, Jieba segmentation) | 服务器软件 → 伺服器軟體 |
101
102
  | `s2tw` | Simplified → Taiwan Traditional | 心里 → 心裡 |
103
+ | `s2tw_jieba` | Simplified → Taiwan Traditional (Jieba segmentation) | 心里 → 心裡 |
102
104
  | `s2hk` | Simplified → Hong Kong Traditional | 心里 → 心裏 |
105
+ | `s2hk_jieba` | Simplified → Hong Kong Traditional (Jieba segmentation) | 心里 → 心裏 |
103
106
  | `s2t` | Simplified → OpenCC Standard Traditional | 简体 → 簡體 |
107
+ | `s2t_jieba` | Simplified → OpenCC Standard Traditional (Jieba segmentation) | 简体 → 簡體 |
108
+ | `s2t_cngov` | Simplified → CN Gov Standard Traditional | 简体 → 簡體 |
104
109
  | `tw2sp` | Taiwan → Simplified (with regional phrases) | 滑鼠 → 鼠标 |
110
+ | `tw2sp_jieba` | Taiwan → Simplified (with phrases, Jieba segmentation) | 伺服器軟體 → 服务器软件 |
105
111
  | `tw2s` | Taiwan → Simplified | 軟體 → 软件 |
106
112
  | `tw2t` | Taiwan → Traditional | 吃飯 → 喫飯 |
107
113
  | `hk2s` | Hong Kong → Simplified | 打印機 → 打印机 |
108
114
  | `hk2t` | Hong Kong → Traditional | 為 → 爲 |
109
115
  | `t2s` | OpenCC Standard Traditional → Simplified | 繁體 → 繁体 |
116
+ | `t2s_cngov` | OpenCC Standard Traditional → CN Gov Simplified | 潮溼 → 潮湿 |
110
117
  | `t2tw` | OpenCC Standard Traditional → Taiwan | 牀 → 床 |
111
118
  | `t2hk` | OpenCC Standard Traditional → Hong Kong | 爲 → 為 |
112
- | `jp2t` | Japanese Shinjitai → Traditional | 桜花 → 櫻花 |
113
- | `t2jp` | Traditional → Japanese Shinjitai | 櫻花 → 桜花 |
114
119
  | `t2cngov` | Traditional → CN Gov Standard | 潮溼 → 潮湿 |
115
120
  | `t2cngov_keep_simp` | Traditional → CN Gov (Keep Simp) | 简体繁體 → 简体繁體 |
116
121
  | `t2cngov_jieba` | Traditional → CN Gov Standard (Jieba segmentation) | 測試简体混繁體 → 測試簡體混繁體 |
117
122
  | `t2cngov_keep_simp_jieba` | Traditional → CN Gov (Keep Simp, Jieba segmentation) | 測試简体混繁體 → 測試简体混繁體 |
118
123
 
124
+ Hong Kong phrase configs are bundled but still under active development, matching their upstream status:
125
+
126
+ | Config | Description | Example |
127
+ |--------|-------------|---------|
128
+ | `s2hkp` | Simplified → Hong Kong Traditional (with Hong Kong phrases) | 软件 → 軟件 |
129
+ | `s2hkp_jieba` | Simplified → Hong Kong Traditional (with Hong Kong phrases, Jieba segmentation) | 服务器软件 → 伺服器軟件 |
130
+ | `hk2sp` | Hong Kong Traditional → Simplified (with Mainland China phrases) | 軟件 → 软件 |
131
+ | `hk2sp_jieba` | Hong Kong Traditional → Simplified (with Mainland China phrases, Jieba segmentation) | 伺服器軟件 → 服务器软件 |
132
+
133
+ Japanese Kanji configs are bundled for exploratory use and are not recommended for production:
134
+
135
+ | Config | Description | Example |
136
+ |--------|-------------|---------|
137
+ | `t2jp` | Old Japanese Kanji (Kyujitai) → New Japanese Kanji (Shinjitai) | 櫻花 → 桜花 |
138
+ | `jp2t` | New Japanese Kanji (Shinjitai) → Old Japanese Kanji (Kyujitai), with a few Japanese phrases converted to Chinese equivalents | 桜花 → 櫻花 |
139
+
119
140
  #### Method 2: Using `from`/`to` parameters (compatible with `opencc-js`)
120
141
 
121
142
  Specify source and target locales:
@@ -133,6 +154,7 @@ const result = await converter("服务器"); // 伺服器
133
154
  | `tw` | Traditional Chinese (Taiwan) |
134
155
  | `twp` | Taiwan with phrases |
135
156
  | `hk` | Traditional Chinese (Hong Kong) |
157
+ | `hkp` | Hong Kong with phrases |
136
158
  | `t` | Traditional Chinese (general) |
137
159
  | `s` | Simplified Chinese (alias) |
138
160
  | `sp` | Simplified with phrases |
@@ -140,6 +162,12 @@ const result = await converter("服务器"); // 伺服器
140
162
 
141
163
  **Both methods work identically!** Choose what you prefer.
142
164
 
165
+ ## 📦 Bundled Data Versions
166
+
167
+ - OpenCC upstream: `71964afa6c7f`
168
+ - CN Government Standard dictionaries: `Transformer(1.3.7)` (`da403c620a17`)
169
+ - Release assets are regenerated into `dist/data/` with the current bundled configs and `.ocd2` dictionaries.
170
+
143
171
  ### OpenCC.ConverterFactory() - With Custom Dictionary
144
172
 
145
173
  ```javascript
@@ -402,6 +430,23 @@ A: Initial load downloads configs + dicts (~1-2MB). Subsequent conversions are f
402
430
 
403
431
  ## 📜 Changelog
404
432
 
433
+ ### 0.10.0 - 2026-06-29
434
+
435
+ - Aligned bundled assets with **OpenCC 1.3.2** (configs now include a `normalization` step and the `STPhrases_GeneratedFromRegionalPhrases` dictionary)
436
+ - Updated CN Government Standard dictionaries to `80a8b40` (additional non-standard variant characters from 《古代汉语词典》第3版)
437
+ - Fixed normalization dict loading: ocd2 files in the `normalization` array are now mounted into the WASM VFS before `opencc_create` is called (previously `s2twp`, `s2hkp`, and similar configs threw a file-not-found error)
438
+ - Fixed build: added `SingleStageConverter.cpp` and `PipelineConverter.cpp` to the Emscripten source list after the upstream `Converter` split
439
+ - `converter.inspect()` now returns `pipelineStages` and always populates top-level `segments` from the last pipeline stage
440
+ - TypeScript: `InspectionResult` gains optional `pipelineStages?: InspectionResult[]`
441
+
442
+ ### 0.9.0 - 2026-06-15
443
+
444
+ - Aligned bundled assets with OpenCC upstream `71964afa6c7f`
445
+ - Updated CN Government Standard dictionaries to `Transformer(1.3.7)` (`da403c620a17`)
446
+ - Bundled upstream Hong Kong phrase configs: `s2hkp`, `hk2sp`, plus Jieba-backed variants; these configs remain under active development
447
+ - Added bundled `s2t_cngov` and `t2s_cngov` configs and refreshed wasm dictionary assets
448
+ - Updated wasm build inputs for the upstream resource/dictionary split and regenerated publishable `dist/` artifacts
449
+
405
450
  ### 0.8.1 - 2026-04-22
406
451
 
407
452
  - Fixed the publish pipeline so `npm run build` / `prepack` refresh bundled assets before generating `dist`
package/README.zh.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  > 🚀 **開箱即用的中文簡繁轉換程式庫** - 3 行程式碼搞定,自動從 CDN 載入設定和字典!
10
10
 
11
- OpenCC(Open Chinese Convert)的 WebAssembly 移植版本,完全相容原版 API。內建官方 OpenCC C++ 核心(透過 Emscripten 編譯),以及所有官方設定檔和預先建置的 `.ocd2` 字典檔。
11
+ OpenCC(Open Chinese Convert)的 WebAssembly 移植版本,完全相容原版 API。內建官方 OpenCC C++ 核心(透過 Emscripten 編譯)、官方設定檔、隨附的大陸政府標準設定、Jieba 設定,以及預先建置的 `.ocd2` 字典檔。
12
12
 
13
13
  **授權條款:** Apache-2.0
14
14
 
@@ -17,7 +17,7 @@ OpenCC(Open Chinese Convert)的 WebAssembly 移植版本,完全相容原
17
17
  - 🎯 **零設定** - 自動從 CDN 載入所有設定檔和字典檔
18
18
  - 🔥 **3 行開始** - 最簡單的 API,匯入即用
19
19
  - 🌐 **CDN 就緒** - 可直接從 jsDelivr/unpkg 使用,無需打包工具
20
- - 📦 **一應俱全** - 包含所有 14+ 種官方轉換類型
20
+ - 📦 **一應俱全** - 包含官方轉換類型、Jieba 變體與隨附的大陸政府標準轉換
21
21
  - ⚡ **自動快取** - 資源首次載入後自動快取
22
22
  - 🔧 **完全相容** - 相容 `opencc-js` API
23
23
  - 🚫 **無需原生綁定** - 純 WASM,跨平台
@@ -30,7 +30,7 @@ OpenCC(Open Chinese Convert)的 WebAssembly 移植版本,完全相容原
30
30
  ```html
31
31
  <script type="module">
32
32
  // 1. 從 CDN 匯入
33
- import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.4.1/dist/esm/index.js";
33
+ import OpenCC from "https://cdn.jsdelivr.net/npm/opencc-wasm@0.10.0/dist/esm/index.js";
34
34
 
35
35
  // 2. 建立轉換器(自動下載所有資源!)
36
36
  const converter = OpenCC.Converter({ config: "s2twp" });
@@ -97,25 +97,46 @@ console.log(await keepSimp("測試简体混繁體")); // 測試简体混繁體
97
97
 
98
98
  | 設定檔 | 說明 | 範例 |
99
99
  |--------|------|------|
100
- | `s2twp` | 簡體 → 台灣正體(含地域用詞轉換) | 軟體 → 軟體 |
100
+ | `s2twp` | 簡體 → 台灣正體(含地域用詞轉換) | 软件 → 軟體 |
101
+ | `s2twp_jieba` | 簡體 → 台灣正體(含地域用詞,Jieba 分詞) | 服务器软件 → 伺服器軟體 |
101
102
  | `s2tw` | 簡體 → 台灣正體 | 心里 → 心裡 |
103
+ | `s2tw_jieba` | 簡體 → 台灣正體(Jieba 分詞) | 心里 → 心裡 |
102
104
  | `s2hk` | 簡體 → 香港繁體 | 心里 → 心裏 |
105
+ | `s2hk_jieba` | 簡體 → 香港繁體(Jieba 分詞) | 心里 → 心裏 |
103
106
  | `s2t` | 簡體 → OpenCC 標準繁體 | 简体 → 簡體 |
107
+ | `s2t_jieba` | 簡體 → OpenCC 標準繁體(Jieba 分詞) | 简体 → 簡體 |
108
+ | `s2t_cngov` | 簡體 → 大陸政府標準繁體 | 简体 → 簡體 |
104
109
  | `tw2sp` | 台灣正體 → 簡體(含地域用詞轉換) | 滑鼠 → 鼠标 |
110
+ | `tw2sp_jieba` | 台灣正體 → 簡體(含地域用詞,Jieba 分詞) | 伺服器軟體 → 服务器软件 |
105
111
  | `tw2s` | 台灣正體 → 簡體 | 軟體 → 软件 |
106
112
  | `tw2t` | 台灣正體 → OpenCC 標準繁體 | 吃飯 → 喫飯 |
107
113
  | `hk2s` | 香港繁體 → 簡體 | 打印機 → 打印机 |
108
114
  | `hk2t` | 香港繁體 → OpenCC 標準繁體 | 為 → 爲 |
109
115
  | `t2s` | OpenCC 標準繁體 → 簡體 | 繁體 → 繁体 |
116
+ | `t2s_cngov` | OpenCC 標準繁體 → 大陸政府標準簡體 | 潮溼 → 潮湿 |
110
117
  | `t2tw` | OpenCC 標準繁體 → 台灣正體 | 牀 → 床 |
111
118
  | `t2hk` | OpenCC 標準繁體 → 香港繁體 | 爲 → 為 |
112
- | `jp2t` | 日文新字體 → 日文舊字體 | 桜花 → 櫻花 |
113
- | `t2jp` | 日文舊字體 → 日文新字體 | 櫻花 → 桜花 |
114
119
  | `t2cngov` | 繁體 → 大陸政府標準繁體 | 潮溼 → 潮湿 |
115
120
  | `t2cngov_keep_simp` | 繁體 → 大陸政府標準繁體(保留簡體) | 简体繁體 → 简体繁體 |
116
121
  | `t2cngov_jieba` | 繁體 → 大陸政府標準繁體(Jieba 分詞) | 測試简体混繁體 → 測試簡體混繁體 |
117
122
  | `t2cngov_keep_simp_jieba` | 繁體 → 大陸政府標準繁體(保留簡體,Jieba 分詞) | 測試简体混繁體 → 測試简体混繁體 |
118
123
 
124
+ 香港常用詞設定檔已隨包提供,但比照上游仍在開發中,歡迎補充詞組:
125
+
126
+ | 設定檔 | 說明 | 範例 |
127
+ |--------|------|------|
128
+ | `s2hkp` | 簡體 → 香港繁體(含香港常用詞彙) | 软件 → 軟件 |
129
+ | `s2hkp_jieba` | 簡體 → 香港繁體(含香港常用詞彙,Jieba 分詞) | 服务器软件 → 伺服器軟件 |
130
+ | `hk2sp` | 香港繁體 → 簡體(含中國大陸常用詞彙) | 軟件 → 软件 |
131
+ | `hk2sp_jieba` | 香港繁體 → 簡體(含中國大陸常用詞彙,Jieba 分詞) | 伺服器軟件 → 服务器软件 |
132
+
133
+ 日文漢字設定檔已隨包提供,但比照上游僅供探索性研究,不建議用於生產環境:
134
+
135
+ | 設定檔 | 說明 | 範例 |
136
+ |--------|------|------|
137
+ | `t2jp` | 日文舊字體 → 日文新字體 | 櫻花 → 桜花 |
138
+ | `jp2t` | 日文新字體 → 日文舊字體,並將少量日文詞組轉換為對應中文 | 桜花 → 櫻花 |
139
+
119
140
  #### 方式 2:使用 `from`/`to` 參數(与 `opencc-js` 相容)
120
141
 
121
142
  指定來源和目標語系:
@@ -133,6 +154,7 @@ const result = await converter("服务器"); // 伺服器
133
154
  | `tw` | 繁體中文(台灣) |
134
155
  | `twp` | 台灣正體(含慣用詞) |
135
156
  | `hk` | 繁體中文(香港) |
157
+ | `hkp` | 香港繁體(含慣用詞) |
136
158
  | `t` | 繁體中文(通用) |
137
159
  | `s` | 簡體中文(別名) |
138
160
  | `sp` | 簡體(含慣用詞) |
@@ -140,6 +162,12 @@ const result = await converter("服务器"); // 伺服器
140
162
 
141
163
  **兩種方式功能完全相同!** 選擇您喜歡的即可。
142
164
 
165
+ ## 📦 內建資料版本
166
+
167
+ - OpenCC 上游:`71964afa6c7f`
168
+ - 大陸政府標準詞典:`Transformer(1.3.7)`(`da403c620a17`)
169
+ - 發行資源已用目前隨附的設定檔與 `.ocd2` 詞典重新產生到 `dist/data/`。
170
+
143
171
  ### OpenCC.ConverterFactory() - 含自訂字典的轉換器
144
172
 
145
173
  ```javascript
@@ -402,6 +430,23 @@ A:首次載入需要下載設定檔和字典檔(約 1-2MB)。後續轉換
402
430
 
403
431
  ## 📜 變更歷史
404
432
 
433
+ ### 0.10.0 - 2026-06-29
434
+
435
+ - 將隨附資源對齊 **OpenCC 1.3.2**(設定檔新增 `normalization` 預處理步驟,並隨附 `STPhrases_GeneratedFromRegionalPhrases` 詞典)
436
+ - 大陸政府標準詞典更新到 `80a8b40`(新增來自《古代漢語詞典》第3版的表外異體字)
437
+ - 修正 normalization 詞典載入問題:`normalization` 陣列中引用的 ocd2 檔案現在會在呼叫 `opencc_create` 前掛載到 WASM VFS(舊版對 `s2twp`、`s2hkp` 等設定會丟出找不到檔案的錯誤)
438
+ - 修正 build:在 Emscripten 來源清單中補上 `SingleStageConverter.cpp` 和 `PipelineConverter.cpp`(上游將 `Converter` 拆分後必要)
439
+ - `converter.inspect()` 現在會回傳 `pipelineStages`,並始終從最末 pipeline stage 的 segments 填充頂層 `segments` 欄位
440
+ - TypeScript:`InspectionResult` 新增選用欄位 `pipelineStages?: InspectionResult[]`
441
+
442
+ ### 0.9.0 - 2026-06-15
443
+
444
+ - 將隨附資源對齊 OpenCC 上游 `71964afa6c7f`
445
+ - 大陸政府標準詞典更新到 `Transformer(1.3.7)`(`da403c620a17`)
446
+ - 隨包提供上游香港常用詞轉換設定:`s2hkp`、`hk2sp`,以及對應 Jieba 變體;這些設定仍在開發中
447
+ - 新增隨附的 `s2t_cngov`、`t2s_cngov` 設定並刷新 wasm 詞典資源
448
+ - 配合上游 resource/dictionary 拆分更新 wasm build input,並重新產生可發布的 `dist/` 產物
449
+
405
450
  ### 0.8.1 - 2026-04-22
406
451
 
407
452
  - 修正發布流程,讓 `npm run build` / `prepack` 會先刷新資源,再產生 `dist`
@@ -9,9 +9,9 @@ const readFileText = (url) => fs.readFileSync(fileURLToPath(url), "utf-8");
9
9
  const readFileBuffer = (url) => fs.readFileSync(fileURLToPath(url));
10
10
 
11
11
  const CONFIG_MAP = {
12
- cn: { t: "s2t.json", tw: "s2tw.json", hk: "s2hk.json", cn: null },
13
- tw: { cn: "tw2s.json", t: "tw2t.json", tw: null },
14
- hk: { cn: "hk2s.json", t: "hk2t.json", hk: null },
12
+ cn: { t: "s2t.json", tw: "s2tw.json", twp: "s2twp.json", hk: "s2hk.json", hkp: "s2hkp.json", cn: null },
13
+ tw: { cn: "tw2s.json", s: "tw2s.json", sp: "tw2sp.json", t: "tw2t.json", tw: null },
14
+ hk: { cn: "hk2s.json", s: "hk2s.json", sp: "hk2sp.json", t: "hk2t.json", hk: null },
15
15
  t: { cn: "t2s.json", tw: "t2tw.json", hk: "t2hk.json", jp: "t2jp.json", t: null },
16
16
  jp: { t: "jp2t.json" },
17
17
  };
@@ -97,6 +97,9 @@ async function ensureConfig(configName) {
97
97
 
98
98
  const dicts = new Set();
99
99
  const resources = new Set();
100
+ if (Array.isArray(cfgJson.normalization)) {
101
+ cfgJson.normalization.forEach((item) => collectOcd2Files(item?.dict, dicts));
102
+ }
100
103
  collectOcd2Files(cfgJson.segmentation?.dict, dicts);
101
104
  collectSegmentationResources(cfgJson.segmentation, resources);
102
105
  if (Array.isArray(cfgJson.conversion_chain)) {
@@ -125,6 +128,9 @@ async function ensureConfig(configName) {
125
128
  if (node.type === "ocd2" && node.file) node.file = "/data/dict/" + node.file;
126
129
  if (node.type === "group" && Array.isArray(node.dicts)) node.dicts.forEach(patchPaths);
127
130
  };
131
+ if (Array.isArray(cfgJson.normalization)) {
132
+ cfgJson.normalization.forEach((item) => patchPaths(item?.dict));
133
+ }
128
134
  patchPaths(cfgJson.segmentation?.dict);
129
135
  if (Array.isArray(cfgJson.conversion_chain)) {
130
136
  cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict));