opencc-wasm 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -5
- package/README.zh.md +42 -6
- package/dist/cjs/index.cjs +3 -3
- package/dist/cjs/opencc-wasm.cjs +1 -1
- package/dist/cjs/opencc-wasm.wasm +0 -0
- package/dist/data/config/hk2s.json +25 -26
- package/dist/data/config/hk2sp.json +33 -0
- package/dist/data/config/hk2sp_jieba.json +36 -0
- package/dist/data/config/hk2t.json +11 -15
- package/dist/data/config/jp2t.json +12 -19
- package/dist/data/config/opencc_config.schema.json +100 -0
- package/dist/data/config/s2hk.json +20 -20
- package/dist/data/config/s2hk_jieba.json +30 -0
- package/dist/data/config/s2hkp.json +28 -0
- package/dist/data/config/s2hkp_jieba.json +31 -0
- package/dist/data/config/s2t.json +12 -16
- package/dist/data/config/s2t_cngov.json +0 -7
- package/dist/data/config/s2t_jieba.json +21 -0
- package/dist/data/config/s2tw.json +21 -21
- package/dist/data/config/s2tw_jieba.json +30 -0
- package/dist/data/config/s2twp.json +22 -26
- package/dist/data/config/s2twp_jieba.json +20 -21
- package/dist/data/config/t2cngov.json +0 -7
- package/dist/data/config/t2cngov_keep_simp.json +0 -7
- package/dist/data/config/t2hk.json +16 -8
- package/dist/data/config/t2jp.json +5 -11
- package/dist/data/config/t2s.json +17 -16
- package/dist/data/config/t2s_cngov.json +0 -7
- package/dist/data/config/t2tw.json +16 -8
- package/dist/data/config/tw2s.json +26 -27
- package/dist/data/config/tw2sp.json +27 -30
- package/dist/data/config/tw2sp_jieba.json +25 -25
- package/dist/data/config/tw2t.json +11 -15
- package/dist/data/dict/HKPhrases.ocd2 +0 -0
- package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/HKVariants.ocd2 +0 -0
- package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
- package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
- package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
- package/dist/data/dict/STCharacters.ocd2 +0 -0
- package/dist/data/dict/STPhrases.ocd2 +0 -0
- package/dist/data/dict/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
- package/dist/data/dict/TSPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrases.ocd2 +0 -0
- package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
- package/dist/data/dict/TWVariants.ocd2 +0 -0
- package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
- package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
- package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
- package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
- package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
- package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
- package/dist/data/jieba_dict/user.dict.utf8 +2 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/opencc-wasm.js +1 -1
- package/dist/esm/opencc-wasm.wasm +0 -0
- package/dist/opencc-wasm.wasm +0 -0
- package/package.json +2 -1
- package/dist/data/dict/JPVariants.ocd2 +0 -0
- package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
- package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
|
@@ -2,32 +2,31 @@
|
|
|
2
2
|
"name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "TSPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "HKVariantsRevPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "HKVariantsRev.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [
|
|
21
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
22
|
+
{
|
|
23
|
+
"type": "ocd2",
|
|
24
|
+
"file": "TSCharactersExt.ocd2",
|
|
25
|
+
"may_output_tofu": true
|
|
26
|
+
},
|
|
27
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
28
|
+
]
|
|
29
|
+
}
|
|
31
30
|
}
|
|
32
|
-
|
|
31
|
+
]
|
|
33
32
|
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese (with phrases)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "mmseg",
|
|
5
|
+
"dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
|
|
6
|
+
},
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "HKPhrasesRev.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "HKVariantsRevPhrases.ocd2" },
|
|
14
|
+
{ "type": "ocd2", "file": "HKVariantsRev.ocd2" }
|
|
15
|
+
]
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"dict": {
|
|
20
|
+
"type": "group",
|
|
21
|
+
"dicts": [
|
|
22
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
23
|
+
{
|
|
24
|
+
"type": "ocd2",
|
|
25
|
+
"file": "TSCharactersExt.ocd2",
|
|
26
|
+
"may_output_tofu": true
|
|
27
|
+
},
|
|
28
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "jieba",
|
|
5
|
+
"resources": {
|
|
6
|
+
"dict_path": "jieba_dict/jieba_merged.ocd2",
|
|
7
|
+
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
|
+
}
|
|
9
|
+
},
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{ "type": "ocd2", "file": "HKPhrasesRev.ocd2" },
|
|
16
|
+
{ "type": "ocd2", "file": "HKVariantsRevPhrases.ocd2" },
|
|
17
|
+
{ "type": "ocd2", "file": "HKVariantsRev.ocd2" }
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"dict": {
|
|
23
|
+
"type": "group",
|
|
24
|
+
"dicts": [
|
|
25
|
+
{ "type": "ocd2", "file": "TSPhrases.ocd2" },
|
|
26
|
+
{
|
|
27
|
+
"type": "ocd2",
|
|
28
|
+
"file": "TSCharactersExt.ocd2",
|
|
29
|
+
"may_output_tofu": true
|
|
30
|
+
},
|
|
31
|
+
{ "type": "ocd2", "file": "TSCharacters.ocd2" }
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
@@ -2,21 +2,17 @@
|
|
|
2
2
|
"name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese (OpenCC Standard)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "HKVariantsRevPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "HKVariantsRevPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}]
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "HKVariantsRevPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "HKVariantsRev.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
20
16
|
}
|
|
21
|
-
|
|
17
|
+
]
|
|
22
18
|
}
|
|
@@ -1,25 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "New Japanese Kanji (Shinjitai) to
|
|
2
|
+
"name": "New Japanese Kanji (Shinjitai) to Old Japanese Kanji (Kyūjitai)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "JPShinjitaiPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "JPShinjitaiPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}, {
|
|
20
|
-
"type": "ocd2",
|
|
21
|
-
"file": "JPVariantsRev.ocd2"
|
|
22
|
-
}]
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "JPShinjitaiPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "JPShinjitaiCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
23
16
|
}
|
|
24
|
-
|
|
17
|
+
]
|
|
25
18
|
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
|
3
|
+
"id": "https://opencc.byvoid.com/schema/opencc_config.schema.json",
|
|
4
|
+
"title": "OpenCC configuration",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"required": ["segmentation", "conversion_chain"],
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"properties": {
|
|
9
|
+
"name": { "type": "string" },
|
|
10
|
+
"segmentation": { "$ref": "#/definitions/segmentation" },
|
|
11
|
+
"conversion_chain": {
|
|
12
|
+
"type": "array",
|
|
13
|
+
"minItems": 1,
|
|
14
|
+
"items": { "$ref": "#/definitions/conversion" }
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"definitions": {
|
|
18
|
+
"segmentation": {
|
|
19
|
+
"anyOf": [
|
|
20
|
+
{ "$ref": "#/definitions/mmseg_segmentation" },
|
|
21
|
+
{ "$ref": "#/definitions/plugin_segmentation" }
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"mmseg_segmentation": {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"required": ["type", "dict"],
|
|
27
|
+
"additionalProperties": false,
|
|
28
|
+
"properties": {
|
|
29
|
+
"type": { "enum": ["mmseg"] },
|
|
30
|
+
"dict": { "$ref": "#/definitions/dict" }
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"plugin_segmentation": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"required": ["type"],
|
|
36
|
+
"not": {
|
|
37
|
+
"properties": { "type": { "enum": ["mmseg"] } },
|
|
38
|
+
"required": ["type"]
|
|
39
|
+
},
|
|
40
|
+
"properties": {
|
|
41
|
+
"type": { "type": "string", "minLength": 1 },
|
|
42
|
+
"resources": {
|
|
43
|
+
"type": "object",
|
|
44
|
+
"additionalProperties": { "type": "string" }
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"additionalProperties": { "type": "string" }
|
|
48
|
+
},
|
|
49
|
+
"conversion": {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"required": ["dict"],
|
|
52
|
+
"additionalProperties": false,
|
|
53
|
+
"properties": { "dict": { "$ref": "#/definitions/dict" } }
|
|
54
|
+
},
|
|
55
|
+
"dict": {
|
|
56
|
+
"anyOf": [
|
|
57
|
+
{ "$ref": "#/definitions/file_dict" },
|
|
58
|
+
{ "$ref": "#/definitions/inline_dict" },
|
|
59
|
+
{ "$ref": "#/definitions/group_dict" }
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
"file_dict": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"required": ["type", "file"],
|
|
65
|
+
"additionalProperties": false,
|
|
66
|
+
"properties": {
|
|
67
|
+
"type": { "enum": ["text", "ocd", "ocd2"] },
|
|
68
|
+
"file": { "type": "string", "minLength": 1 },
|
|
69
|
+
"may_output_tofu": { "type": "boolean" }
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"inline_dict": {
|
|
73
|
+
"type": "object",
|
|
74
|
+
"required": ["type", "entries"],
|
|
75
|
+
"additionalProperties": false,
|
|
76
|
+
"properties": {
|
|
77
|
+
"type": { "enum": ["inline"] },
|
|
78
|
+
"entries": {
|
|
79
|
+
"type": "object",
|
|
80
|
+
"patternProperties": { ".+": { "type": "string", "minLength": 1 } },
|
|
81
|
+
"additionalProperties": false
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"group_dict": {
|
|
86
|
+
"type": "object",
|
|
87
|
+
"required": ["type", "dicts"],
|
|
88
|
+
"additionalProperties": false,
|
|
89
|
+
"properties": {
|
|
90
|
+
"type": { "enum": ["group"] },
|
|
91
|
+
"dicts": {
|
|
92
|
+
"type": "array",
|
|
93
|
+
"minItems": 1,
|
|
94
|
+
"items": { "$ref": "#/definitions/dict" }
|
|
95
|
+
},
|
|
96
|
+
"may_output_tofu": { "type": "boolean" }
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -2,26 +2,26 @@
|
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "STPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "STPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [
|
|
21
|
+
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
22
|
+
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
25
|
}
|
|
26
|
-
|
|
26
|
+
]
|
|
27
27
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, Jieba Segmentation - Experimental)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "jieba",
|
|
5
|
+
"resources": {
|
|
6
|
+
"dict_path": "jieba_dict/jieba_merged.ocd2",
|
|
7
|
+
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
|
+
}
|
|
9
|
+
},
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
16
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"dict": {
|
|
22
|
+
"type": "group",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
25
|
+
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, with phrases)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "mmseg",
|
|
5
|
+
"dict": { "type": "ocd2", "file": "STPhrases.ocd2" }
|
|
6
|
+
},
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [
|
|
21
|
+
{ "type": "ocd2", "file": "HKPhrases.ocd2" },
|
|
22
|
+
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
23
|
+
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, with phrases, Jieba Segmentation - Experimental)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "jieba",
|
|
5
|
+
"resources": {
|
|
6
|
+
"dict_path": "jieba_dict/jieba_merged.ocd2",
|
|
7
|
+
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
|
+
}
|
|
9
|
+
},
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
16
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"dict": {
|
|
22
|
+
"type": "group",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{ "type": "ocd2", "file": "HKPhrases.ocd2" },
|
|
25
|
+
{ "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
|
|
26
|
+
{ "type": "ocd2", "file": "HKVariants.ocd2" }
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -1,22 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Simplified Chinese to Traditional Chinese",
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (OpenCC Standard)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "STPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "STPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}]
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
20
16
|
}
|
|
21
|
-
|
|
17
|
+
]
|
|
22
18
|
}
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "Simplified Chinese to Traditional Chinese (The Chinese Government Standard)",
|
|
3
|
-
"author": "TerryTian-tech",
|
|
4
|
-
"license": "Apache License 2.0",
|
|
5
|
-
"source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
|
|
6
|
-
"contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
|
|
7
|
-
"reference": "《通用规范汉字表》(2013)",
|
|
8
|
-
"description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
|
|
9
|
-
|
|
10
3
|
"segmentation": {
|
|
11
4
|
"type": "mmseg",
|
|
12
5
|
"dict": {
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Jieba Segmentation - Experimental)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "jieba",
|
|
5
|
+
"resources": {
|
|
6
|
+
"dict_path": "jieba_dict/jieba_merged.ocd2",
|
|
7
|
+
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
|
+
}
|
|
9
|
+
},
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
16
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
]
|
|
21
|
+
}
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Simplified Chinese to Traditional Chinese (Taiwan
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Taiwan Standard)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "STPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "STPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [
|
|
21
|
+
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
22
|
+
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
25
|
}
|
|
26
|
-
|
|
26
|
+
]
|
|
27
27
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, Jieba Segmentation - Experimental)",
|
|
3
|
+
"segmentation": {
|
|
4
|
+
"type": "jieba",
|
|
5
|
+
"resources": {
|
|
6
|
+
"dict_path": "jieba_dict/jieba_merged.ocd2",
|
|
7
|
+
"model_path": "jieba_dict/hmm_model.utf8"
|
|
8
|
+
}
|
|
9
|
+
},
|
|
10
|
+
"conversion_chain": [
|
|
11
|
+
{
|
|
12
|
+
"dict": {
|
|
13
|
+
"type": "group",
|
|
14
|
+
"dicts": [
|
|
15
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
16
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"dict": {
|
|
22
|
+
"type": "group",
|
|
23
|
+
"dicts": [
|
|
24
|
+
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
25
|
+
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -1,32 +1,28 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "Simplified Chinese to Traditional Chinese (Taiwan
|
|
2
|
+
"name": "Simplified Chinese to Traditional Chinese (Taiwan Standard, with Taiwan Phrases)",
|
|
3
3
|
"segmentation": {
|
|
4
4
|
"type": "mmseg",
|
|
5
|
-
"dict": {
|
|
6
|
-
"type": "ocd2",
|
|
7
|
-
"file": "STPhrases.ocd2"
|
|
8
|
-
}
|
|
5
|
+
"dict": { "type": "ocd2", "file": "STPhrases.ocd2" }
|
|
9
6
|
},
|
|
10
|
-
"conversion_chain": [
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
"file": "TWVariants.ocd2"
|
|
7
|
+
"conversion_chain": [
|
|
8
|
+
{
|
|
9
|
+
"dict": {
|
|
10
|
+
"type": "group",
|
|
11
|
+
"dicts": [
|
|
12
|
+
{ "type": "ocd2", "file": "STPhrases.ocd2" },
|
|
13
|
+
{ "type": "ocd2", "file": "STCharacters.ocd2" }
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"dict": {
|
|
19
|
+
"type": "group",
|
|
20
|
+
"dicts": [
|
|
21
|
+
{ "type": "ocd2", "file": "TWPhrases.ocd2" },
|
|
22
|
+
{ "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
|
|
23
|
+
{ "type": "ocd2", "file": "TWVariants.ocd2" }
|
|
24
|
+
]
|
|
25
|
+
}
|
|
30
26
|
}
|
|
31
|
-
|
|
27
|
+
]
|
|
32
28
|
}
|