opencc-data 1.3.2.dev20260628__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. opencc_data/__init__.py +29 -0
  2. opencc_data/data/CJK_Compatibility_Ideographs.txt +1007 -0
  3. opencc_data/data/HKPhrases.txt +43 -0
  4. opencc_data/data/HKPhrasesRev.txt +40 -0
  5. opencc_data/data/HKVariants.txt +71 -0
  6. opencc_data/data/HKVariantsPhrases.txt +277 -0
  7. opencc_data/data/HKVariantsRev.txt +79 -0
  8. opencc_data/data/HKVariantsRevPhrases.txt +312 -0
  9. opencc_data/data/JPShinjitaiCharacters.txt +397 -0
  10. opencc_data/data/JPShinjitaiCharactersRev.txt +465 -0
  11. opencc_data/data/JPShinjitaiPhrases.txt +240 -0
  12. opencc_data/data/STCharacters.txt +4016 -0
  13. opencc_data/data/STPhrases.txt +49141 -0
  14. opencc_data/data/STPhrases_GeneratedFromRegionalPhrases.txt +512 -0
  15. opencc_data/data/TSCharacters.txt +4149 -0
  16. opencc_data/data/TSCharactersExt.txt +899 -0
  17. opencc_data/data/TSPhrases.txt +474 -0
  18. opencc_data/data/TWPhrases.txt +780 -0
  19. opencc_data/data/TWPhrasesRev.txt +774 -0
  20. opencc_data/data/TWVariants.txt +43 -0
  21. opencc_data/data/TWVariantsPhrases.txt +9 -0
  22. opencc_data/data/TWVariantsRev.txt +43 -0
  23. opencc_data/data/TWVariantsRevPhrases.txt +1009 -0
  24. opencc_data/data/config/hk2s.json +57 -0
  25. opencc_data/data/config/hk2sp.json +61 -0
  26. opencc_data/data/config/hk2t.json +36 -0
  27. opencc_data/data/config/jp2t.json +36 -0
  28. opencc_data/data/config/s2hk.json +72 -0
  29. opencc_data/data/config/s2hkp.json +76 -0
  30. opencc_data/data/config/s2t.json +56 -0
  31. opencc_data/data/config/s2tw.json +72 -0
  32. opencc_data/data/config/s2twp.json +76 -0
  33. opencc_data/data/config/t2hk.json +46 -0
  34. opencc_data/data/config/t2jp.json +19 -0
  35. opencc_data/data/config/t2s.json +41 -0
  36. opencc_data/data/config/t2tw.json +46 -0
  37. opencc_data/data/config/tw2s.json +57 -0
  38. opencc_data/data/config/tw2sp.json +61 -0
  39. opencc_data/data/config/tw2t.json +36 -0
  40. opencc_data/test_data/testcases.json +1591 -0
  41. opencc_data-1.3.2.dev20260628.dist-info/METADATA +65 -0
  42. opencc_data-1.3.2.dev20260628.dist-info/RECORD +45 -0
  43. opencc_data-1.3.2.dev20260628.dist-info/WHEEL +5 -0
  44. opencc_data-1.3.2.dev20260628.dist-info/licenses/LICENSE +56 -0
  45. opencc_data-1.3.2.dev20260628.dist-info/top_level.txt +1 -0
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "TSPhrases.txt"
16
+ }
17
+ },
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "text",
26
+ "file": "HKVariantsRevPhrases.txt"
27
+ },
28
+ {
29
+ "type": "text",
30
+ "file": "HKVariantsRev.txt"
31
+ }
32
+ ]
33
+ }
34
+ },
35
+ {
36
+ "dict": {
37
+ "type": "group",
38
+ "match_policy": "short_circuit",
39
+ "dicts": [
40
+ {
41
+ "type": "text",
42
+ "file": "TSPhrases.txt"
43
+ },
44
+ {
45
+ "type": "text",
46
+ "file": "TSCharactersExt.txt",
47
+ "may_output_tofu": true
48
+ },
49
+ {
50
+ "type": "text",
51
+ "file": "TSCharacters.txt"
52
+ }
53
+ ]
54
+ }
55
+ }
56
+ ]
57
+ }
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese (with phrases)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "TSPhrases.txt"
16
+ }
17
+ },
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "text",
26
+ "file": "HKPhrasesRev.txt"
27
+ },
28
+ {
29
+ "type": "text",
30
+ "file": "HKVariantsRevPhrases.txt"
31
+ },
32
+ {
33
+ "type": "text",
34
+ "file": "HKVariantsRev.txt"
35
+ }
36
+ ]
37
+ }
38
+ },
39
+ {
40
+ "dict": {
41
+ "type": "group",
42
+ "match_policy": "short_circuit",
43
+ "dicts": [
44
+ {
45
+ "type": "text",
46
+ "file": "TSPhrases.txt"
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "TSCharactersExt.txt",
51
+ "may_output_tofu": true
52
+ },
53
+ {
54
+ "type": "text",
55
+ "file": "TSCharacters.txt"
56
+ }
57
+ ]
58
+ }
59
+ }
60
+ ]
61
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese (OpenCC Standard)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "HKVariantsRevPhrases.txt"
16
+ }
17
+ },
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "text",
26
+ "file": "HKVariantsRevPhrases.txt"
27
+ },
28
+ {
29
+ "type": "text",
30
+ "file": "HKVariantsRev.txt"
31
+ }
32
+ ]
33
+ }
34
+ }
35
+ ]
36
+ }
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "New Japanese Kanji (Shinjitai) to Old Japanese Kanji (Kyūjitai)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "JPShinjitaiPhrases.txt"
16
+ }
17
+ },
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "text",
26
+ "file": "JPShinjitaiPhrases.txt"
27
+ },
28
+ {
29
+ "type": "text",
30
+ "file": "JPShinjitaiCharacters.txt"
31
+ }
32
+ ]
33
+ }
34
+ }
35
+ ]
36
+ }
@@ -0,0 +1,72 @@
1
+ {
2
+ "name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "union",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "STPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "group",
36
+ "match_policy": "union",
37
+ "dicts": [
38
+ {
39
+ "type": "text",
40
+ "file": "STPhrases.txt"
41
+ },
42
+ {
43
+ "type": "text",
44
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "STCharacters.txt"
51
+ }
52
+ ]
53
+ }
54
+ },
55
+ {
56
+ "dict": {
57
+ "type": "group",
58
+ "match_policy": "short_circuit",
59
+ "dicts": [
60
+ {
61
+ "type": "text",
62
+ "file": "HKVariantsPhrases.txt"
63
+ },
64
+ {
65
+ "type": "text",
66
+ "file": "HKVariants.txt"
67
+ }
68
+ ]
69
+ }
70
+ }
71
+ ]
72
+ }
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "Simplified Chinese to Traditional Chinese (Hong Kong variant, with phrases)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "union",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "STPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "group",
36
+ "match_policy": "union",
37
+ "dicts": [
38
+ {
39
+ "type": "text",
40
+ "file": "STPhrases.txt"
41
+ },
42
+ {
43
+ "type": "text",
44
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "STCharacters.txt"
51
+ }
52
+ ]
53
+ }
54
+ },
55
+ {
56
+ "dict": {
57
+ "type": "group",
58
+ "match_policy": "short_circuit",
59
+ "dicts": [
60
+ {
61
+ "type": "text",
62
+ "file": "HKPhrases.txt"
63
+ },
64
+ {
65
+ "type": "text",
66
+ "file": "HKVariantsPhrases.txt"
67
+ },
68
+ {
69
+ "type": "text",
70
+ "file": "HKVariants.txt"
71
+ }
72
+ ]
73
+ }
74
+ }
75
+ ]
76
+ }
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "Simplified Chinese to Traditional Chinese (OpenCC Standard)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "union",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "STPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "group",
36
+ "match_policy": "union",
37
+ "dicts": [
38
+ {
39
+ "type": "text",
40
+ "file": "STPhrases.txt"
41
+ },
42
+ {
43
+ "type": "text",
44
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "STCharacters.txt"
51
+ }
52
+ ]
53
+ }
54
+ }
55
+ ]
56
+ }
@@ -0,0 +1,72 @@
1
+ {
2
+ "name": "Simplified Chinese to Traditional Chinese (Taiwan Standard)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "union",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "STPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "group",
36
+ "match_policy": "union",
37
+ "dicts": [
38
+ {
39
+ "type": "text",
40
+ "file": "STPhrases.txt"
41
+ },
42
+ {
43
+ "type": "text",
44
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "STCharacters.txt"
51
+ }
52
+ ]
53
+ }
54
+ },
55
+ {
56
+ "dict": {
57
+ "type": "group",
58
+ "match_policy": "short_circuit",
59
+ "dicts": [
60
+ {
61
+ "type": "text",
62
+ "file": "TWVariantsPhrases.txt"
63
+ },
64
+ {
65
+ "type": "text",
66
+ "file": "TWVariants.txt"
67
+ }
68
+ ]
69
+ }
70
+ }
71
+ ]
72
+ }
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "Simplified Chinese to Traditional Chinese (Taiwan Standard, with Taiwan Phrases)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "union",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "STPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "group",
36
+ "match_policy": "union",
37
+ "dicts": [
38
+ {
39
+ "type": "text",
40
+ "file": "STPhrases.txt"
41
+ },
42
+ {
43
+ "type": "text",
44
+ "file": "STPhrases_GeneratedFromRegionalPhrases.txt"
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "type": "text",
50
+ "file": "STCharacters.txt"
51
+ }
52
+ ]
53
+ }
54
+ },
55
+ {
56
+ "dict": {
57
+ "type": "group",
58
+ "match_policy": "short_circuit",
59
+ "dicts": [
60
+ {
61
+ "type": "text",
62
+ "file": "TWPhrases.txt"
63
+ },
64
+ {
65
+ "type": "text",
66
+ "file": "TWVariantsPhrases.txt"
67
+ },
68
+ {
69
+ "type": "text",
70
+ "file": "TWVariants.txt"
71
+ }
72
+ ]
73
+ }
74
+ }
75
+ ]
76
+ }
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Hong Kong variant)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "short_circuit",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "HKVariantsPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "HKVariants.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "text",
36
+ "file": "HKVariantsPhrases.txt"
37
+ },
38
+ {
39
+ "type": "text",
40
+ "file": "HKVariants.txt"
41
+ }
42
+ ]
43
+ }
44
+ }
45
+ ]
46
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "Old Japanese Kanji (Kyūjitai) to New Japanese Kanji (Shinjitai)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "conversion_chain": [
12
+ {
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "JPShinjitaiCharactersRev.txt"
16
+ }
17
+ }
18
+ ]
19
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "Traditional Chinese (OpenCC Standard) to Simplified Chinese",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "text",
15
+ "file": "TSPhrases.txt"
16
+ }
17
+ },
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "text",
26
+ "file": "TSPhrases.txt"
27
+ },
28
+ {
29
+ "type": "text",
30
+ "file": "TSCharactersExt.txt",
31
+ "may_output_tofu": true
32
+ },
33
+ {
34
+ "type": "text",
35
+ "file": "TSCharacters.txt"
36
+ }
37
+ ]
38
+ }
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Taiwan Standard)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "text",
7
+ "file": "CJK_Compatibility_Ideographs.txt"
8
+ }
9
+ }
10
+ ],
11
+ "segmentation": {
12
+ "type": "mmseg",
13
+ "dict": {
14
+ "type": "group",
15
+ "match_policy": "short_circuit",
16
+ "dicts": [
17
+ {
18
+ "type": "text",
19
+ "file": "TWVariantsPhrases.txt"
20
+ },
21
+ {
22
+ "type": "text",
23
+ "file": "TWVariants.txt"
24
+ }
25
+ ]
26
+ }
27
+ },
28
+ "conversion_chain": [
29
+ {
30
+ "dict": {
31
+ "type": "group",
32
+ "match_policy": "short_circuit",
33
+ "dicts": [
34
+ {
35
+ "type": "text",
36
+ "file": "TWVariantsPhrases.txt"
37
+ },
38
+ {
39
+ "type": "text",
40
+ "file": "TWVariants.txt"
41
+ }
42
+ ]
43
+ }
44
+ }
45
+ ]
46
+ }