opencc-wasm 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/README.zh.md +10 -1
- package/dist/cjs/index.cjs +6 -0
- package/dist/cjs/opencc-wasm.cjs +1 -1
- package/dist/cjs/opencc-wasm.wasm +0 -0
- package/dist/data/config/hk2s.json +5 -0
- package/dist/data/config/hk2sp.json +5 -0
- package/dist/data/config/hk2sp_jieba.json +30 -5
- package/dist/data/config/hk2t.json +4 -4
- package/dist/data/config/jp2t.json +4 -4
- package/dist/data/config/opencc_config.schema.json +9 -4
- package/dist/data/config/s2hk.json +21 -2
- package/dist/data/config/s2hk_jieba.json +36 -4
- package/dist/data/config/s2hkp.json +21 -2
- package/dist/data/config/s2hkp_jieba.json +40 -5
- package/dist/data/config/s2t.json +12 -5
- package/dist/data/config/s2t_cngov.json +16 -11
- package/dist/data/config/s2t_jieba.json +27 -2
- package/dist/data/config/s2tw.json +21 -2
- package/dist/data/config/s2tw_jieba.json +36 -4
- package/dist/data/config/s2twp.json +21 -2
- package/dist/data/config/s2twp_jieba.json +40 -5
- package/dist/data/config/t2cngov.json +16 -11
- package/dist/data/config/t2cngov_keep_simp.json +16 -11
- package/dist/data/config/t2hk.json +4 -10
- package/dist/data/config/t2jp.json +3 -4
- package/dist/data/config/t2s.json +4 -4
- package/dist/data/config/t2s_cngov.json +16 -11
- package/dist/data/config/t2tw.json +4 -10
- package/dist/data/config/tw2s.json +5 -0
- package/dist/data/config/tw2sp.json +5 -0
- package/dist/data/config/tw2sp_jieba.json +30 -5
- package/dist/data/config/tw2t.json +4 -4
- package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
- package/dist/data/dict/HKPhrases.ocd2 +0 -0
- package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/HKVariants.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
- package/dist/data/dict/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/TSPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
- package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
- package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
- package/dist/data/jieba_dict/user.dict.utf8 +0 -1
- package/dist/esm/index.js +6 -0
- package/dist/esm/opencc-wasm.js +1 -1
- package/dist/esm/opencc-wasm.wasm +0 -0
- package/dist/opencc-wasm.wasm +0 -0
- package/index.d.ts +1 -0
- package/package.json +1 -1
|
@@ -3,10 +3,15 @@
|
|
|
3
3
|
"id": "https://opencc.byvoid.com/schema/opencc_config.schema.json",
|
|
4
4
|
"title": "OpenCC configuration",
|
|
5
5
|
"type": "object",
|
|
6
|
-
"required": ["
|
|
6
|
+
"required": ["name", "conversion_chain"],
|
|
7
7
|
"additionalProperties": false,
|
|
8
8
|
"properties": {
|
|
9
9
|
"name": { "type": "string" },
|
|
10
|
+
"normalization": {
|
|
11
|
+
"type": "array",
|
|
12
|
+
"minItems": 1,
|
|
13
|
+
"items": { "$ref": "#/definitions/conversion" }
|
|
14
|
+
},
|
|
10
15
|
"segmentation": { "$ref": "#/definitions/segmentation" },
|
|
11
16
|
"conversion_chain": {
|
|
12
17
|
"type": "array",
|
|
@@ -77,17 +82,17 @@
|
|
|
77
82
|
"type": { "enum": ["inline"] },
|
|
78
83
|
"entries": {
|
|
79
84
|
"type": "object",
|
|
80
|
-
"
|
|
81
|
-
"additionalProperties": false
|
|
85
|
+
"additionalProperties": { "type": "string", "minLength": 1 }
|
|
82
86
|
}
|
|
83
87
|
}
|
|
84
88
|
},
|
|
85
89
|
"group_dict": {
|
|
86
90
|
"type": "object",
|
|
87
|
-
"required": ["type", "dicts"],
|
|
91
|
+
"required": ["type", "dicts", "match_policy"],
|
|
88
92
|
"additionalProperties": false,
|
|
89
93
|
"properties": {
|
|
90
94
|
"type": { "enum": ["group"] },
|
|
95
|
+
"match_policy": { "enum": ["short_circuit", "union"] },
|
|
91
96
|
"dicts": {
|
|
92
97
|
"type": "array",
|
|
93
98
|
"minItems": 1,
|
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "union",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
6
16
|
},
|
|
7
17
|
"conversion_chain": [
|
|
8
18
|
{
|
|
9
19
|
"dict": {
|
|
10
20
|
"type": "group",
|
|
21
|
+
"match_policy": "short_circuit",
|
|
11
22
|
"dicts": [
|
|
12
|
-
{
|
|
23
|
+
{
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "union",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
28
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
13
31
|
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
32
|
]
|
|
15
33
|
}
|
|
@@ -17,6 +35,7 @@
|
|
|
17
35
|
{
|
|
18
36
|
"dict": {
|
|
19
37
|
"type": "group",
|
|
38
|
+
"match_policy": "short_circuit",
|
|
20
39
|
"dicts": [
|
|
21
40
|
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
22
41
|
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,18 +19,42 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
17
42
|
]
|
|
18
43
|
}
|
|
19
44
|
},
|
|
20
45
|
{
|
|
21
46
|
"dict": {
|
|
22
47
|
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
23
49
|
"dicts": [
|
|
24
|
-
{
|
|
25
|
-
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "HKVariantsPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "HKVariants.ocd2"
|
|
57
|
+
}
|
|
26
58
|
]
|
|
27
59
|
}
|
|
28
60
|
}
|
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, with phrases)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "union",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
6
16
|
},
|
|
7
17
|
"conversion_chain": [
|
|
8
18
|
{
|
|
9
19
|
"dict": {
|
|
10
20
|
"type": "group",
|
|
21
|
+
"match_policy": "short_circuit",
|
|
11
22
|
"dicts": [
|
|
12
|
-
{
|
|
23
|
+
{
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "union",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
28
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
13
31
|
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
32
|
]
|
|
15
33
|
}
|
|
@@ -17,6 +35,7 @@
|
|
|
17
35
|
{
|
|
18
36
|
"dict": {
|
|
19
37
|
"type": "group",
|
|
38
|
+
"match_policy": "short_circuit",
|
|
20
39
|
"dicts": [
|
|
21
40
|
{ "type": "ocd2", "file": "HKPhrases.ocd2" },
|
|
22
41
|
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,19 +19,46 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
17
42
|
]
|
|
18
43
|
}
|
|
19
44
|
},
|
|
20
45
|
{
|
|
21
46
|
"dict": {
|
|
22
47
|
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
23
49
|
"dicts": [
|
|
24
|
-
{
|
|
25
|
-
|
|
26
|
-
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "HKPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "HKVariantsPhrases.ocd2"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"type": "ocd2",
|
|
60
|
+
"file": "HKVariants.ocd2"
|
|
61
|
+
}
|
|
27
62
|
]
|
|
28
63
|
}
|
|
29
64
|
}
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (OpenCC Standard)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
7
6
|
"conversion_chain": [
|
|
8
7
|
{
|
|
9
8
|
"dict": {
|
|
10
9
|
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
11
11
|
"dicts": [
|
|
12
|
-
{
|
|
12
|
+
{
|
|
13
|
+
"type": "group",
|
|
14
|
+
"match_policy": "union",
|
|
15
|
+
"dicts": [
|
|
16
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
17
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
18
|
+
]
|
|
19
|
+
},
|
|
13
20
|
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
21
|
]
|
|
15
22
|
}
|
|
@@ -7,16 +7,21 @@
|
|
|
7
7
|
"file": "cngov/STPhrases.ocd2"
|
|
8
8
|
}
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/STPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/STCharacters.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
20
25
|
}
|
|
21
|
-
|
|
26
|
+
]
|
|
22
27
|
}
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,9 +19,26 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
17
42
|
]
|
|
18
43
|
}
|
|
19
44
|
}
|
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan Standard)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "union",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
6
16
|
},
|
|
7
17
|
"conversion_chain": [
|
|
8
18
|
{
|
|
9
19
|
"dict": {
|
|
10
20
|
"type": "group",
|
|
21
|
+
"match_policy": "short_circuit",
|
|
11
22
|
"dicts": [
|
|
12
|
-
{
|
|
23
|
+
{
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "union",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
28
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
13
31
|
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
32
|
]
|
|
15
33
|
}
|
|
@@ -17,6 +35,7 @@
|
|
|
17
35
|
{
|
|
18
36
|
"dict": {
|
|
19
37
|
"type": "group",
|
|
38
|
+
"match_policy": "short_circuit",
|
|
20
39
|
"dicts": [
|
|
21
40
|
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
22
41
|
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,18 +19,42 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
17
42
|
]
|
|
18
43
|
}
|
|
19
44
|
},
|
|
20
45
|
{
|
|
21
46
|
"dict": {
|
|
22
47
|
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
23
49
|
"dicts": [
|
|
24
|
-
{
|
|
25
|
-
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "TWVariantsPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "TWVariants.ocd2"
|
|
57
|
+
}
|
|
26
58
|
]
|
|
27
59
|
}
|
|
28
60
|
}
|
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan Standard, with Taiwan Phrases)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
3
6
|
"segmentation": {
|
|
4
7
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
8
|
+
"dict": {
|
|
9
|
+
"type": "group",
|
|
10
|
+
"match_policy": "union",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
6
16
|
},
|
|
7
17
|
"conversion_chain": [
|
|
8
18
|
{
|
|
9
19
|
"dict": {
|
|
10
20
|
"type": "group",
|
|
21
|
+
"match_policy": "short_circuit",
|
|
11
22
|
"dicts": [
|
|
12
|
-
{
|
|
23
|
+
{
|
|
24
|
+
"type": "group",
|
|
25
|
+
"match_policy": "union",
|
|
26
|
+
"dicts": [
|
|
27
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
28
|
+
{ "type": "ocd2", "file": "STPhrases_GeneratedFromRegionalPhrases.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
13
31
|
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
32
|
]
|
|
15
33
|
}
|
|
@@ -17,6 +35,7 @@
|
|
|
17
35
|
{
|
|
18
36
|
"dict": {
|
|
19
37
|
"type": "group",
|
|
38
|
+
"match_policy": "short_circuit",
|
|
20
39
|
"dicts": [
|
|
21
40
|
{ "type": "ocd2", "file": "TWPhrases.ocd2" },
|
|
22
41
|
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"normalization": [
|
|
4
|
+
{
|
|
5
|
+
"dict": {
|
|
6
|
+
"type": "ocd2",
|
|
7
|
+
"file": "CJK_Compatibility_Ideographs.ocd2"
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
],
|
|
3
11
|
"segmentation": {
|
|
4
12
|
"type": "jieba",
|
|
5
13
|
"resources": {
|
|
@@ -11,19 +19,46 @@
|
|
|
11
19
|
{
|
|
12
20
|
"dict": {
|
|
13
21
|
"type": "group",
|
|
22
|
+
"match_policy": "short_circuit",
|
|
14
23
|
"dicts": [
|
|
15
|
-
{
|
|
16
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "group",
|
|
26
|
+
"match_policy": "union",
|
|
27
|
+
"dicts": [
|
|
28
|
+
{
|
|
29
|
+
"type": "ocd2",
|
|
30
|
+
"file": "STPhrases.ocd2"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "ocd2",
|
|
34
|
+
"file": "STPhrases_GeneratedFromRegionalPhrases.ocd2"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"type": "ocd2",
|
|
40
|
+
"file": "STCharacters.ocd2"
|
|
41
|
+
}
|
|
17
42
|
]
|
|
18
43
|
}
|
|
19
44
|
},
|
|
20
45
|
{
|
|
21
46
|
"dict": {
|
|
22
47
|
"type": "group",
|
|
48
|
+
"match_policy": "short_circuit",
|
|
23
49
|
"dicts": [
|
|
24
|
-
{
|
|
25
|
-
|
|
26
|
-
|
|
50
|
+
{
|
|
51
|
+
"type": "ocd2",
|
|
52
|
+
"file": "TWPhrases.ocd2"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"type": "ocd2",
|
|
56
|
+
"file": "TWVariantsPhrases.ocd2"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"type": "ocd2",
|
|
60
|
+
"file": "TWVariants.ocd2"
|
|
61
|
+
}
|
|
27
62
|
]
|
|
28
63
|
}
|
|
29
64
|
}
|
|
@@ -7,16 +7,21 @@
|
|
|
7
7
|
"file": "cngov/TGPhrases.ocd2"
|
|
8
8
|
}
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TGCharacters.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
20
25
|
}
|
|
21
|
-
|
|
26
|
+
]
|
|
22
27
|
}
|
|
@@ -7,16 +7,21 @@
|
|
|
7
7
|
"file": "cngov/TGPhrases.ocd2"
|
|
8
8
|
}
|
|
9
9
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{
|
|
16
|
+
"type": "ocd2",
|
|
17
|
+
"file": "cngov/TGPhrases.ocd2"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"type": "ocd2",
|
|
21
|
+
"file": "cngov/TGCharacters_keep_simp.ocd2"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
20
25
|
}
|
|
21
|
-
|
|
26
|
+
]
|
|
22
27
|
}
|
|
@@ -1,19 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Hong Kong variant)",
|
|
3
|
-
"
|
|
4
|
-
"type": "
|
|
5
|
-
|
|
6
|
-
"type": "group",
|
|
7
|
-
"dicts": [
|
|
8
|
-
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
9
|
-
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
10
|
-
]
|
|
11
|
-
}
|
|
12
|
-
},
|
|
3
|
+
"normalization": [
|
|
4
|
+
{ "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
|
|
5
|
+
],
|
|
13
6
|
"conversion_chain": [
|
|
14
7
|
{
|
|
15
8
|
"dict": {
|
|
16
9
|
"type": "group",
|
|
10
|
+
"match_policy": "short_circuit",
|
|
17
11
|
"dicts": [
|
|
18
12
|
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
19
13
|
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|