opencc-wasm 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/README.zh.md +10 -1
- package/dist/cjs/index.cjs +6 -0
- package/dist/cjs/opencc-wasm.cjs +1 -1
- package/dist/cjs/opencc-wasm.wasm +0 -0
- package/dist/data/config/hk2s.json +5 -0
- package/dist/data/config/hk2sp.json +5 -0
- package/dist/data/config/hk2sp_jieba.json +30 -5
- package/dist/data/config/hk2t.json +4 -4
- package/dist/data/config/jp2t.json +4 -4
- package/dist/data/config/opencc_config.schema.json +9 -4
- package/dist/data/config/s2hk.json +21 -2
- package/dist/data/config/s2hk_jieba.json +36 -4
- package/dist/data/config/s2hkp.json +21 -2
- package/dist/data/config/s2hkp_jieba.json +40 -5
- package/dist/data/config/s2t.json +12 -5
- package/dist/data/config/s2t_cngov.json +16 -11
- package/dist/data/config/s2t_jieba.json +27 -2
- package/dist/data/config/s2tw.json +21 -2
- package/dist/data/config/s2tw_jieba.json +36 -4
- package/dist/data/config/s2twp.json +21 -2
- package/dist/data/config/s2twp_jieba.json +40 -5
- package/dist/data/config/t2cngov.json +16 -11
- package/dist/data/config/t2cngov_keep_simp.json +16 -11
- package/dist/data/config/t2hk.json +4 -10
- package/dist/data/config/t2jp.json +3 -4
- package/dist/data/config/t2s.json +4 -4
- package/dist/data/config/t2s_cngov.json +16 -11
- package/dist/data/config/t2tw.json +4 -10
- package/dist/data/config/tw2s.json +5 -0
- package/dist/data/config/tw2sp.json +5 -0
- package/dist/data/config/tw2sp_jieba.json +30 -5
- package/dist/data/config/tw2t.json +4 -4
- package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
- package/dist/data/dict/HKPhrases.ocd2 +0 -0
- package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/HKVariants.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
- package/dist/data/dict/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/TSPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
- package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
- package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
- package/dist/data/jieba_dict/user.dict.utf8 +0 -1
- package/dist/esm/index.js +6 -0
- package/dist/esm/opencc-wasm.js +1 -1
- package/dist/esm/opencc-wasm.wasm +0 -0
- package/dist/opencc-wasm.wasm +0 -0
- package/index.d.ts +1 -0
- package/package.json +1 -1
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Old Japanese Kanji (Kyūjitai) to New Japanese Kanji (Shinjitai)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
7
6
|
"conversion_chain": [
|
|
8
7
|
{ "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" } }
|
|
9
8
|
]
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (OpenCC Standard) to Simplified Chinese",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
7
6
|
"conversion_chain": [
|
|
8
7
|
{
|
|
9
8
|
"dict": {
|
|
10
9
|
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
11
|
"dicts": [
|
|
12
12
|
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
13
13
|
{
|
|
@@ -7,16 +7,21 @@
|
|
|
7
7
|
"file": "cngov/TSPhrases.ocd2"
|
|
8
8
|
}
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TSPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TSCharacters.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
20
25
|
}
|
|
21
|
-
|
|
26
|
+
]
|
|
22
27
|
}
|
|
@@ -1,19 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Taiwan Standard)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
"type": "group",
|
|
7
|
-
"dicts": [
|
|
8
|
-
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
9
|
-
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
10
|
-
]
|
|
11
|
-
}
|
|
12
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
13
6
|
"conversion_chain": [
|
|
14
7
|
{
|
|
15
8
|
"dict": {
|
|
16
9
|
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
17
11
|
"dicts": [
|
|
18
12
|
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
19
13
|
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
8
|
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
@@ -8,6 +11,7 @@
|
|
|
8
11
|
{
|
|
9
12
|
"dict": {
|
|
10
13
|
"type": "group",
|
|
14
|
+
"match_policy": "short_circuit",
|
|
11
15
|
"dicts": [
|
|
12
16
|
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
13
17
|
{ "type": "ocd2", "file": "TWVariantsRev.ocd2" }
|
|
@@ -17,6 +21,7 @@
|
|
|
17
21
|
{
|
|
18
22
|
"dict": {
|
|
19
23
|
"type": "group",
|
|
24
|
+
"match_policy": "short_circuit",
|
|
20
25
|
"dicts": [
|
|
21
26
|
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
22
27
|
{
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese (Mainland China Phrases)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
8
|
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
@@ -8,6 +11,7 @@
|
|
|
8
11
|
{
|
|
9
12
|
"dict": {
|
|
10
13
|
"type": "group",
|
|
14
|
+
"match_policy": "short_circuit",
|
|
11
15
|
"dicts": [
|
|
12
16
|
{ "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
|
|
13
17
|
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
@@ -18,6 +22,7 @@
|
|
|
18
22
|
{
|
|
19
23
|
"dict": {
|
|
20
24
|
"type": "group",
|
|
25
|
+
"match_policy": "short_circuit",
|
|
21
26
|
"dicts": [
|
|
22
27
|
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
23
28
|
{
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,24 +19,41 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
17
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "ocd2",
|
|
26
|
+
"file": "TWPhrasesRev.ocd2"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "TWVariantsRevPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "TWVariantsRev.ocd2"
|
|
35
|
+
}
|
|
18
36
|
]
|
|
19
37
|
}
|
|
20
38
|
},
|
|
21
39
|
{
|
|
22
40
|
"dict": {
|
|
23
41
|
"type": "group",
|
|
42
|
+
"match_policy": "short_circuit",
|
|
24
43
|
"dicts": [
|
|
25
|
-
{
|
|
44
|
+
{
|
|
45
|
+
"type": "ocd2",
|
|
46
|
+
"file": "TSPhrases.ocd2"
|
|
47
|
+
},
|
|
26
48
|
{
|
|
27
49
|
"type": "ocd2",
|
|
28
50
|
"file": "TSCharactersExt.ocd2",
|
|
29
51
|
"may_output_tofu": true
|
|
30
52
|
},
|
|
31
|
-
{
|
|
53
|
+
{
|
|
54
|
+
"type": "ocd2",
|
|
55
|
+
"file": "TSCharacters.ocd2"
|
|
56
|
+
}
|
|
32
57
|
]
|
|
33
58
|
}
|
|
34
59
|
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (Taiwan Standard) to Traditional Chinese (OpenCC Standard)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
7
6
|
"conversion_chain": [
|
|
8
7
|
{
|
|
9
8
|
"dict": {
|
|
10
9
|
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
11
|
"dicts": [
|
|
12
12
|
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
13
13
|
{ "type": "ocd2", "file": "TWVariantsRev.ocd2" }
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -887,7 +887,6 @@ T恤 4 n
|
|
|
887
887
|
一准儿 2 l
|
|
888
888
|
一凉 3 a
|
|
889
889
|
一几 10 m
|
|
890
|
-
一出 964 m
|
|
891
890
|
一出场 3 m
|
|
892
891
|
一出头 3 m
|
|
893
892
|
一出戏 3 i
|
|
@@ -18653,6 +18652,7 @@ T恤 4 n
|
|
|
18653
18652
|
也门 234 d
|
|
18654
18653
|
也门共和国 14 ns
|
|
18655
18654
|
习 1216 v
|
|
18655
|
+
习总书记 4 nr
|
|
18656
18656
|
习与性成 3 i
|
|
18657
18657
|
习中学 3 n
|
|
18658
18658
|
习举业 2 n
|
|
Binary file
|
package/dist/esm/index.js
CHANGED
|
@@ -165,6 +165,9 @@ async function ensureConfig(configName) {
|
|
|
165
165
|
|
|
166
166
|
const dicts = new Set();
|
|
167
167
|
const resources = new Set();
|
|
168
|
+
if (Array.isArray(cfgJson.normalization)) {
|
|
169
|
+
cfgJson.normalization.forEach((item) => collectOcd2Files(item?.dict, dicts));
|
|
170
|
+
}
|
|
168
171
|
collectOcd2Files(cfgJson.segmentation?.dict, dicts);
|
|
169
172
|
collectSegmentationResources(cfgJson.segmentation, resources);
|
|
170
173
|
if (Array.isArray(cfgJson.conversion_chain)) {
|
|
@@ -198,6 +201,9 @@ async function ensureConfig(configName) {
|
|
|
198
201
|
node.dicts.forEach(patchPaths);
|
|
199
202
|
}
|
|
200
203
|
};
|
|
204
|
+
if (Array.isArray(cfgJson.normalization)) {
|
|
205
|
+
cfgJson.normalization.forEach((item) => patchPaths(item?.dict));
|
|
206
|
+
}
|
|
201
207
|
patchPaths(cfgJson.segmentation?.dict);
|
|
202
208
|
if (Array.isArray(cfgJson.conversion_chain)) {
|
|
203
209
|
cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict));
|