opencc-wasm 0.8.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -5
- package/README.zh.md +51 -6
- package/dist/cjs/index.cjs +9 -3
- package/dist/cjs/opencc-wasm.cjs +1 -1
- package/dist/cjs/opencc-wasm.wasm +0 -0
- package/dist/data/config/hk2s.json +30 -26
- package/dist/data/config/hk2sp.json +38 -0
- package/dist/data/config/hk2sp_jieba.json +61 -0
- package/dist/data/config/hk2t.json +14 -18
- package/dist/data/config/jp2t.json +15 -22
- package/dist/data/config/opencc_config.schema.json +45 -94
- package/dist/data/config/s2hk.json +37 -18
- package/dist/data/config/s2hk_jieba.json +51 -16
- package/dist/data/config/s2hkp.json +47 -0
- package/dist/data/config/s2hkp_jieba.json +66 -0
- package/dist/data/config/s2t.json +22 -19
- package/dist/data/config/s2t_cngov.json +16 -18
- package/dist/data/config/s2t_jieba.json +35 -11
- package/dist/data/config/s2tw.json +38 -19
- package/dist/data/config/s2tw_jieba.json +51 -16
- package/dist/data/config/s2twp.json +39 -24
- package/dist/data/config/s2twp_jieba.json +55 -21
- package/dist/data/config/t2cngov.json +16 -18
- package/dist/data/config/t2cngov_keep_simp.json +16 -18
- package/dist/data/config/t2hk.json +15 -13
- package/dist/data/config/t2jp.json +7 -14
- package/dist/data/config/t2s.json +20 -19
- package/dist/data/config/t2s_cngov.json +16 -18
- package/dist/data/config/t2tw.json +15 -13
- package/dist/data/config/tw2s.json +31 -27
- package/dist/data/config/tw2sp.json +32 -30
- package/dist/data/config/tw2sp_jieba.json +50 -25
- package/dist/data/config/tw2t.json +14 -18
- package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
- package/dist/data/dict/HKPhrases.ocd2 +0 -0
- package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/HKVariants.ocd2 +0 -0
- package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
- package/dist/data/dict/STCharacters.ocd2 +0 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
- package/dist/data/dict/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
- package/dist/data/dict/TSPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/TWVariants.ocd2 +0 -0
- package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
- package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
- package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
- package/dist/data/jieba_dict/user.dict.utf8 +2 -1
- package/dist/esm/index.js +8 -0
- package/dist/esm/opencc-wasm.js +1 -1
- package/dist/esm/opencc-wasm.wasm +0 -0
- package/dist/opencc-wasm.wasm +0 -0
- package/index.d.ts +1 -0
- package/package.json +1 -1
- package/dist/data/dict/JPVariants.ocd2 +0 -0
- package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -7,21 +15,48 @@
|
|
|
7
15
|
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
16
|
}
|
|
9
17
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
"conversion_chain": [
|
|
19
|
+
{
|
|
20
|
+
"dict": {
|
|
21
|
+
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"dict": {
|
|
47
|
+
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
49
|
+
"dicts": [
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "TWVariantsPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "TWVariants.ocd2"
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
}
|
|
25
60
|
}
|
|
26
|
-
|
|
61
|
+
]
|
|
27
62
|
}
|
|
@@ -1,32 +1,47 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Simplified Chinese to Traditional Chinese (Taiwan
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Taiwan Standard, with Taiwan Phrases)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "STPhrases.ocd2"
|
|
8
|
-
}
|
|
9
|
-
},
|
|
10
|
-
"conversion_chain": [{
|
|
11
8
|
"dict": {
|
|
12
9
|
"type": "group",
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
"file": "STPhrases.ocd2"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"file": "STCharacters.ocd2"
|
|
19
|
-
}]
|
|
10
|
+
"match_policy": "union",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
14
|
+
]
|
|
20
15
|
}
|
|
21
|
-
},
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
16
|
+
},
|
|
17
|
+
"conversion_chain": [
|
|
18
|
+
{
|
|
19
|
+
"dict": {
|
|
20
|
+
"type": "group",
|
|
21
|
+
"match_policy": "short_circuit",
|
|
22
|
+
"dicts": [
|
|
23
|
+
{
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "union",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
28
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"dict": {
|
|
37
|
+
"type": "group",
|
|
38
|
+
"match_policy": "short_circuit",
|
|
39
|
+
"dicts": [
|
|
40
|
+
{ "type": "ocd2", "file": "TWPhrases.ocd2" },
|
|
41
|
+
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
42
|
+
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
43
|
+
]
|
|
44
|
+
}
|
|
30
45
|
}
|
|
31
|
-
|
|
46
|
+
]
|
|
32
47
|
}
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -7,26 +15,52 @@
|
|
|
7
15
|
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
16
|
}
|
|
9
17
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
18
|
+
"conversion_chain": [
|
|
19
|
+
{
|
|
20
|
+
"dict": {
|
|
21
|
+
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"dict": {
|
|
47
|
+
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
49
|
+
"dicts": [
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "TWPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "TWVariantsPhrases.ocd2"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"type": "ocd2",
|
|
60
|
+
"file": "TWVariants.ocd2"
|
|
61
|
+
}
|
|
62
|
+
]
|
|
63
|
+
}
|
|
30
64
|
}
|
|
31
|
-
|
|
65
|
+
]
|
|
32
66
|
}
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese to Traditional Chinese (Chinese Government standard)",
|
|
3
|
-
"author": "TerryTian-tech",
|
|
4
|
-
"license": "Apache License 2.0",
|
|
5
|
-
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
-
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
-
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
-
"description": "Converts traditional Chinese (from various standards) to China's government standard traditional characters. Includes simplified-to-standard conversion for mixed documents.",
|
|
9
|
-
|
|
10
3
|
"segmentation": {
|
|
11
4
|
"type": "mmseg",
|
|
12
5
|
"dict": {
|
|
@@ -14,16 +7,21 @@
|
|
|
14
7
|
"file": "cngov/TGPhrases.ocd2"
|
|
15
8
|
}
|
|
16
9
|
},
|
|
17
|
-
"conversion_chain": [
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TGCharacters.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
27
25
|
}
|
|
28
|
-
|
|
26
|
+
]
|
|
29
27
|
}
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese to Traditional Chinese (Chinese Government standard) with Simplified Chinese remaining unchanged",
|
|
3
|
-
"author": "TerryTian-tech",
|
|
4
|
-
"license": "Apache License 2.0",
|
|
5
|
-
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
-
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
-
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
-
"description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
|
|
9
|
-
|
|
10
3
|
"segmentation": {
|
|
11
4
|
"type": "mmseg",
|
|
12
5
|
"dict": {
|
|
@@ -14,16 +7,21 @@
|
|
|
14
7
|
"file": "cngov/TGPhrases.ocd2"
|
|
15
8
|
}
|
|
16
9
|
},
|
|
17
|
-
"conversion_chain": [
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TGCharacters_keep_simp.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
27
25
|
}
|
|
28
|
-
|
|
26
|
+
]
|
|
29
27
|
}
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
2
|
+
"name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Hong Kong variant)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
6
|
+
"conversion_chain": [
|
|
7
|
+
{
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
8
16
|
}
|
|
9
|
-
|
|
10
|
-
"conversion_chain": [{
|
|
11
|
-
"dict": {
|
|
12
|
-
"type": "ocd2",
|
|
13
|
-
"file": "HKVariants.ocd2"
|
|
14
|
-
}
|
|
15
|
-
}]
|
|
17
|
+
]
|
|
16
18
|
}
|
|
@@ -1,16 +1,9 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
},
|
|
10
|
-
"conversion_chain": [{
|
|
11
|
-
"dict": {
|
|
12
|
-
"type": "ocd2",
|
|
13
|
-
"file": "JPVariants.ocd2"
|
|
14
|
-
}
|
|
15
|
-
}]
|
|
2
|
+
"name": "Old Japanese Kanji (Kyūjitai) to New Japanese Kanji (Shinjitai)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
6
|
+
"conversion_chain": [
|
|
7
|
+
{ "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" } }
|
|
8
|
+
]
|
|
16
9
|
}
|
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Traditional Chinese to Simplified Chinese",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
2
|
+
"name": "Traditional Chinese (OpenCC Standard) to Simplified Chinese",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
6
|
+
"conversion_chain": [
|
|
7
|
+
{
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
13
|
+
{
|
|
14
|
+
"type": "ocd2",
|
|
15
|
+
"file": "TSCharactersExt.ocd2",
|
|
16
|
+
"may_output_tofu": true
|
|
17
|
+
},
|
|
18
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
19
|
+
]
|
|
20
|
+
}
|
|
8
21
|
}
|
|
9
|
-
|
|
10
|
-
"conversion_chain": [{
|
|
11
|
-
"dict": {
|
|
12
|
-
"type": "group",
|
|
13
|
-
"dicts": [{
|
|
14
|
-
"type": "ocd2",
|
|
15
|
-
"file": "TSPhrases.ocd2"
|
|
16
|
-
}, {
|
|
17
|
-
"type": "ocd2",
|
|
18
|
-
"file": "TSCharacters.ocd2"
|
|
19
|
-
}]
|
|
20
|
-
}
|
|
21
|
-
}]
|
|
22
|
+
]
|
|
22
23
|
}
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese to Simplified Chinese",
|
|
3
|
-
"author": "TerryTian-tech",
|
|
4
|
-
"license": "Apache License 2.0",
|
|
5
|
-
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
-
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
-
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
-
"description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
|
|
9
|
-
|
|
10
3
|
"segmentation": {
|
|
11
4
|
"type": "mmseg",
|
|
12
5
|
"dict": {
|
|
@@ -14,16 +7,21 @@
|
|
|
14
7
|
"file": "cngov/TSPhrases.ocd2"
|
|
15
8
|
}
|
|
16
9
|
},
|
|
17
|
-
"conversion_chain": [
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TSPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TSCharacters.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
27
25
|
}
|
|
28
|
-
|
|
26
|
+
]
|
|
29
27
|
}
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Traditional Chinese to Traditional Chinese (Taiwan
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
2
|
+
"name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Taiwan Standard)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
6
|
+
"conversion_chain": [
|
|
7
|
+
{
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
8
16
|
}
|
|
9
|
-
|
|
10
|
-
"conversion_chain": [{
|
|
11
|
-
"dict": {
|
|
12
|
-
"type": "ocd2",
|
|
13
|
-
"file": "TWVariants.ocd2"
|
|
14
|
-
}
|
|
15
|
-
}]
|
|
17
|
+
]
|
|
16
18
|
}
|
|
@@ -1,33 +1,37 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Traditional Chinese (Taiwan
|
|
2
|
+
"name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "TSPhrases.ocd2"
|
|
8
|
-
}
|
|
8
|
+
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"match_policy": "short_circuit",
|
|
15
|
+
"dicts": [
|
|
16
|
+
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
17
|
+
{ "type": "ocd2", "file": "TWVariantsRev.ocd2" }
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"dict": {
|
|
23
|
+
"type": "group",
|
|
24
|
+
"match_policy": "short_circuit",
|
|
25
|
+
"dicts": [
|
|
26
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
27
|
+
{
|
|
28
|
+
"type": "ocd2",
|
|
29
|
+
"file": "TSCharactersExt.ocd2",
|
|
30
|
+
"may_output_tofu": true
|
|
31
|
+
},
|
|
32
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
33
|
+
]
|
|
34
|
+
}
|
|
31
35
|
}
|
|
32
|
-
|
|
36
|
+
]
|
|
33
37
|
}
|
|
@@ -1,36 +1,38 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Traditional Chinese (Taiwan
|
|
2
|
+
"name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese (Mainland China Phrases)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "TSPhrases.ocd2"
|
|
8
|
-
}
|
|
8
|
+
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"match_policy": "short_circuit",
|
|
15
|
+
"dicts": [
|
|
16
|
+
{ "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
|
|
17
|
+
{ "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
|
|
18
|
+
{ "type": "ocd2", "file": "TWVariantsRev.ocd2" }
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"dict": {
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "short_circuit",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "TSCharactersExt.ocd2",
|
|
31
|
+
"may_output_tofu": true
|
|
32
|
+
},
|
|
33
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
34
|
+
]
|
|
35
|
+
}
|
|
34
36
|
}
|
|
35
|
-
|
|
37
|
+
]
|
|
36
38
|
}
|