opencc-wasm 0.8.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -5
- package/README.zh.md +51 -6
- package/dist/cjs/index.cjs +9 -3
- package/dist/cjs/opencc-wasm.cjs +1 -1
- package/dist/cjs/opencc-wasm.wasm +0 -0
- package/dist/data/config/hk2s.json +30 -26
- package/dist/data/config/hk2sp.json +38 -0
- package/dist/data/config/hk2sp_jieba.json +61 -0
- package/dist/data/config/hk2t.json +14 -18
- package/dist/data/config/jp2t.json +15 -22
- package/dist/data/config/opencc_config.schema.json +45 -94
- package/dist/data/config/s2hk.json +37 -18
- package/dist/data/config/s2hk_jieba.json +51 -16
- package/dist/data/config/s2hkp.json +47 -0
- package/dist/data/config/s2hkp_jieba.json +66 -0
- package/dist/data/config/s2t.json +22 -19
- package/dist/data/config/s2t_cngov.json +16 -18
- package/dist/data/config/s2t_jieba.json +35 -11
- package/dist/data/config/s2tw.json +38 -19
- package/dist/data/config/s2tw_jieba.json +51 -16
- package/dist/data/config/s2twp.json +39 -24
- package/dist/data/config/s2twp_jieba.json +55 -21
- package/dist/data/config/t2cngov.json +16 -18
- package/dist/data/config/t2cngov_keep_simp.json +16 -18
- package/dist/data/config/t2hk.json +15 -13
- package/dist/data/config/t2jp.json +7 -14
- package/dist/data/config/t2s.json +20 -19
- package/dist/data/config/t2s_cngov.json +16 -18
- package/dist/data/config/t2tw.json +15 -13
- package/dist/data/config/tw2s.json +31 -27
- package/dist/data/config/tw2sp.json +32 -30
- package/dist/data/config/tw2sp_jieba.json +50 -25
- package/dist/data/config/tw2t.json +14 -18
- package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
- package/dist/data/dict/HKPhrases.ocd2 +0 -0
- package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/HKVariants.ocd2 +0 -0
- package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
- package/dist/data/dict/STCharacters.ocd2 +0 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
- package/dist/data/dict/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
- package/dist/data/dict/TSPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/TWVariants.ocd2 +0 -0
- package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
- package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
- package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
- package/dist/data/jieba_dict/user.dict.utf8 +2 -1
- package/dist/esm/index.js +8 -0
- package/dist/esm/opencc-wasm.js +1 -1
- package/dist/esm/opencc-wasm.wasm +0 -0
- package/dist/opencc-wasm.wasm +0 -0
- package/index.d.ts +1 -0
- package/package.json +1 -1
- package/dist/data/dict/JPVariants.ocd2 +0 -0
- package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -7,30 +15,47 @@
|
|
|
7
15
|
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
16
|
}
|
|
9
17
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
18
|
+
"conversion_chain": [
|
|
19
|
+
{
|
|
20
|
+
"dict": {
|
|
21
|
+
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{
|
|
25
|
+
"type": "ocd2",
|
|
26
|
+
"file": "TWPhrasesRev.ocd2"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "TWVariantsRevPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "TWVariantsRev.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"dict": {
|
|
41
|
+
"type": "group",
|
|
42
|
+
"match_policy": "short_circuit",
|
|
43
|
+
"dicts": [
|
|
44
|
+
{
|
|
45
|
+
"type": "ocd2",
|
|
46
|
+
"file": "TSPhrases.ocd2"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"type": "ocd2",
|
|
50
|
+
"file": "TSCharactersExt.ocd2",
|
|
51
|
+
"may_output_tofu": true
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"type": "ocd2",
|
|
55
|
+
"file": "TSCharacters.ocd2"
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
34
59
|
}
|
|
35
|
-
|
|
60
|
+
]
|
|
36
61
|
}
|
|
@@ -1,22 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan Standard) to Traditional Chinese (OpenCC Standard)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
6
|
+
"conversion_chain": [
|
|
7
|
+
{
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "TWVariantsRev.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
8
16
|
}
|
|
9
|
-
|
|
10
|
-
"conversion_chain": [{
|
|
11
|
-
"dict": {
|
|
12
|
-
"type": "group",
|
|
13
|
-
"dicts": [{
|
|
14
|
-
"type": "ocd2",
|
|
15
|
-
"file": "TWVariantsRevPhrases.ocd2"
|
|
16
|
-
}, {
|
|
17
|
-
"type": "ocd2",
|
|
18
|
-
"file": "TWVariantsRev.ocd2"
|
|
19
|
-
}]
|
|
20
|
-
}
|
|
21
|
-
}]
|
|
17
|
+
]
|
|
22
18
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -887,7 +887,6 @@ T恤 4 n
|
|
|
887
887
|
一准儿 2 l
|
|
888
888
|
一凉 3 a
|
|
889
889
|
一几 10 m
|
|
890
|
-
一出 964 m
|
|
891
890
|
一出场 3 m
|
|
892
891
|
一出头 3 m
|
|
893
892
|
一出戏 3 i
|
|
@@ -18653,6 +18652,7 @@ T恤 4 n
|
|
|
18653
18652
|
也门 234 d
|
|
18654
18653
|
也门共和国 14 ns
|
|
18655
18654
|
习 1216 v
|
|
18655
|
+
习总书记 4 nr
|
|
18656
18656
|
习与性成 3 i
|
|
18657
18657
|
习中学 3 n
|
|
18658
18658
|
习举业 2 n
|
|
Binary file
|
package/dist/esm/index.js
CHANGED
|
@@ -34,6 +34,7 @@ const CONFIG_MAP = {
|
|
|
34
34
|
tw: "s2tw.json",
|
|
35
35
|
twp: "s2twp.json", // 台湾惯用词
|
|
36
36
|
hk: "s2hk.json",
|
|
37
|
+
hkp: "s2hkp.json", // 香港惯用词
|
|
37
38
|
cn: null
|
|
38
39
|
},
|
|
39
40
|
tw: {
|
|
@@ -46,6 +47,7 @@ const CONFIG_MAP = {
|
|
|
46
47
|
hk: {
|
|
47
48
|
cn: "hk2s.json",
|
|
48
49
|
s: "hk2s.json", // 别名
|
|
50
|
+
sp: "hk2sp.json", // 简体惯用词
|
|
49
51
|
t: "hk2t.json",
|
|
50
52
|
hk: null
|
|
51
53
|
},
|
|
@@ -163,6 +165,9 @@ async function ensureConfig(configName) {
|
|
|
163
165
|
|
|
164
166
|
const dicts = new Set();
|
|
165
167
|
const resources = new Set();
|
|
168
|
+
if (Array.isArray(cfgJson.normalization)) {
|
|
169
|
+
cfgJson.normalization.forEach((item) => collectOcd2Files(item?.dict, dicts));
|
|
170
|
+
}
|
|
166
171
|
collectOcd2Files(cfgJson.segmentation?.dict, dicts);
|
|
167
172
|
collectSegmentationResources(cfgJson.segmentation, resources);
|
|
168
173
|
if (Array.isArray(cfgJson.conversion_chain)) {
|
|
@@ -196,6 +201,9 @@ async function ensureConfig(configName) {
|
|
|
196
201
|
node.dicts.forEach(patchPaths);
|
|
197
202
|
}
|
|
198
203
|
};
|
|
204
|
+
if (Array.isArray(cfgJson.normalization)) {
|
|
205
|
+
cfgJson.normalization.forEach((item) => patchPaths(item?.dict));
|
|
206
|
+
}
|
|
199
207
|
patchPaths(cfgJson.segmentation?.dict);
|
|
200
208
|
if (Array.isArray(cfgJson.conversion_chain)) {
|
|
201
209
|
cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict));
|