opencc-wasm 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +41 -5
  2. package/README.zh.md +42 -6
  3. package/dist/cjs/index.cjs +3 -3
  4. package/dist/cjs/opencc-wasm.cjs +1 -1
  5. package/dist/cjs/opencc-wasm.wasm +0 -0
  6. package/dist/data/config/hk2s.json +25 -26
  7. package/dist/data/config/hk2sp.json +33 -0
  8. package/dist/data/config/hk2sp_jieba.json +36 -0
  9. package/dist/data/config/hk2t.json +11 -15
  10. package/dist/data/config/jp2t.json +12 -19
  11. package/dist/data/config/opencc_config.schema.json +100 -0
  12. package/dist/data/config/s2hk.json +20 -20
  13. package/dist/data/config/s2hk_jieba.json +30 -0
  14. package/dist/data/config/s2hkp.json +28 -0
  15. package/dist/data/config/s2hkp_jieba.json +31 -0
  16. package/dist/data/config/s2t.json +12 -16
  17. package/dist/data/config/s2t_cngov.json +0 -7
  18. package/dist/data/config/s2t_jieba.json +21 -0
  19. package/dist/data/config/s2tw.json +21 -21
  20. package/dist/data/config/s2tw_jieba.json +30 -0
  21. package/dist/data/config/s2twp.json +22 -26
  22. package/dist/data/config/s2twp_jieba.json +20 -21
  23. package/dist/data/config/t2cngov.json +0 -7
  24. package/dist/data/config/t2cngov_keep_simp.json +0 -7
  25. package/dist/data/config/t2hk.json +16 -8
  26. package/dist/data/config/t2jp.json +5 -11
  27. package/dist/data/config/t2s.json +17 -16
  28. package/dist/data/config/t2s_cngov.json +0 -7
  29. package/dist/data/config/t2tw.json +16 -8
  30. package/dist/data/config/tw2s.json +26 -27
  31. package/dist/data/config/tw2sp.json +27 -30
  32. package/dist/data/config/tw2sp_jieba.json +25 -25
  33. package/dist/data/config/tw2t.json +11 -15
  34. package/dist/data/dict/HKPhrases.ocd2 +0 -0
  35. package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
  36. package/dist/data/dict/HKVariants.ocd2 +0 -0
  37. package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
  38. package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
  39. package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
  40. package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
  41. package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
  42. package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
  43. package/dist/data/dict/STCharacters.ocd2 +0 -0
  44. package/dist/data/dict/STPhrases.ocd2 +0 -0
  45. package/dist/data/dict/TSCharacters.ocd2 +0 -0
  46. package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
  47. package/dist/data/dict/TSPhrases.ocd2 +0 -0
  48. package/dist/data/dict/TWPhrases.ocd2 +0 -0
  49. package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
  50. package/dist/data/dict/TWVariants.ocd2 +0 -0
  51. package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
  52. package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
  53. package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
  54. package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
  55. package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
  56. package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
  57. package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
  58. package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
  59. package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
  60. package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
  61. package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
  62. package/dist/data/jieba_dict/user.dict.utf8 +2 -0
  63. package/dist/esm/index.js +2 -0
  64. package/dist/esm/opencc-wasm.js +1 -1
  65. package/dist/esm/opencc-wasm.wasm +0 -0
  66. package/dist/opencc-wasm.wasm +0 -0
  67. package/package.json +2 -1
  68. package/dist/data/dict/JPVariants.ocd2 +0 -0
  69. package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
  70. package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
  71. package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
@@ -7,26 +7,25 @@
7
7
  "model_path": "jieba_dict/hmm_model.utf8"
8
8
  }
9
9
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "STPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "STCharacters.ocd2"
19
- }]
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ { "type": "ocd2", "file": "STPhrases.ocd2" },
16
+ { "type": "ocd2", "file": "STCharacters.ocd2" }
17
+ ]
18
+ }
19
+ },
20
+ {
21
+ "dict": {
22
+ "type": "group",
23
+ "dicts": [
24
+ { "type": "ocd2", "file": "TWPhrases.ocd2" },
25
+ { "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
26
+ { "type": "ocd2", "file": "TWVariants.ocd2" }
27
+ ]
28
+ }
20
29
  }
21
- }, {
22
- "dict": {
23
- "type": "ocd2",
24
- "file": "TWPhrases.ocd2"
25
- }
26
- }, {
27
- "dict": {
28
- "type": "ocd2",
29
- "file": "TWVariants.ocd2"
30
- }
31
- }]
30
+ ]
32
31
  }
@@ -1,12 +1,5 @@
1
1
  {
2
2
  "name": "Traditional Chinese to Traditional Chinese (Chinese Government standard)",
3
- "author": "TerryTian-tech",
4
- "license": "Apache License 2.0",
5
- "source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
6
- "contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
7
- "reference": "《通用规范汉字表》(2013)",
8
- "description": "Converts traditional Chinese (from various standards) to China's government standard traditional characters. Includes simplified-to-standard conversion for mixed documents.",
9
-
10
3
  "segmentation": {
11
4
  "type": "mmseg",
12
5
  "dict": {
@@ -1,12 +1,5 @@
1
1
  {
2
2
  "name": "Traditional Chinese to Traditional Chinese (Chinese Government standard) with Simplified Chinese remaining unchanged",
3
- "author": "TerryTian-tech",
4
- "license": "Apache License 2.0",
5
- "source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
6
- "contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
7
- "reference": "《通用规范汉字表》(2013)",
8
- "description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
9
-
10
3
  "segmentation": {
11
4
  "type": "mmseg",
12
5
  "dict": {
@@ -1,16 +1,24 @@
1
1
  {
2
- "name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)",
2
+ "name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Hong Kong variant)",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
5
  "dict": {
6
- "type": "ocd2",
7
- "file": "HKVariants.ocd2"
6
+ "type": "group",
7
+ "dicts": [
8
+ { "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
9
+ { "type": "ocd2", "file": "HKVariants.ocd2" }
10
+ ]
8
11
  }
9
12
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "ocd2",
13
- "file": "HKVariants.ocd2"
13
+ "conversion_chain": [
14
+ {
15
+ "dict": {
16
+ "type": "group",
17
+ "dicts": [
18
+ { "type": "ocd2", "file": "HKVariantsPhrases.ocd2" },
19
+ { "type": "ocd2", "file": "HKVariants.ocd2" }
20
+ ]
21
+ }
14
22
  }
15
- }]
23
+ ]
16
24
  }
@@ -1,16 +1,10 @@
1
1
  {
2
- "name": "Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)",
2
+ "name": "Old Japanese Kanji (Kyūjitai) to New Japanese Kanji (Shinjitai)",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "JPVariants.ocd2"
8
- }
5
+ "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" }
9
6
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "ocd2",
13
- "file": "JPVariants.ocd2"
14
- }
15
- }]
7
+ "conversion_chain": [
8
+ { "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" } }
9
+ ]
16
10
  }
@@ -1,22 +1,23 @@
1
1
  {
2
- "name": "Traditional Chinese to Simplified Chinese",
2
+ "name": "Traditional Chinese (OpenCC Standard) to Simplified Chinese",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "TSPhrases.ocd2"
8
- }
5
+ "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
9
6
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TSPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TSCharacters.ocd2"
19
- }]
7
+ "conversion_chain": [
8
+ {
9
+ "dict": {
10
+ "type": "group",
11
+ "dicts": [
12
+ { "type": "ocd2", "file": "TSPhrases.ocd2" },
13
+ {
14
+ "type": "ocd2",
15
+ "file": "TSCharactersExt.ocd2",
16
+ "may_output_tofu": true
17
+ },
18
+ { "type": "ocd2", "file": "TSCharacters.ocd2" }
19
+ ]
20
+ }
20
21
  }
21
- }]
22
+ ]
22
23
  }
@@ -1,12 +1,5 @@
1
1
  {
2
2
  "name": "Traditional Chinese to Simplified Chinese",
3
- "author": "TerryTian-tech",
4
- "license": "Apache License 2.0",
5
- "source": "https://github.com/TerryTian-tech/OpenCC-Traditional-Chinese-characters-according-to-Chinese-government-standards",
6
- "contributors": ["TerryTian-tech", "Yi Jianpeng", "Hu Xinmei", "Duan Yatong"],
7
- "reference": "《通用规范汉字表》(2013)",
8
- "description": "Conservative conversion that preserves intentional simplified characters in mixed documents while standardizing traditional characters only.",
9
-
10
3
  "segmentation": {
11
4
  "type": "mmseg",
12
5
  "dict": {
@@ -1,16 +1,24 @@
1
1
  {
2
- "name": "Traditional Chinese to Traditional Chinese (Taiwan standard)",
2
+ "name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Taiwan Standard)",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
5
  "dict": {
6
- "type": "ocd2",
7
- "file": "TWVariants.ocd2"
6
+ "type": "group",
7
+ "dicts": [
8
+ { "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
9
+ { "type": "ocd2", "file": "TWVariants.ocd2" }
10
+ ]
8
11
  }
9
12
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "ocd2",
13
- "file": "TWVariants.ocd2"
13
+ "conversion_chain": [
14
+ {
15
+ "dict": {
16
+ "type": "group",
17
+ "dicts": [
18
+ { "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
19
+ { "type": "ocd2", "file": "TWVariants.ocd2" }
20
+ ]
21
+ }
14
22
  }
15
- }]
23
+ ]
16
24
  }
@@ -1,33 +1,32 @@
1
1
  {
2
- "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese",
2
+ "name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "TSPhrases.ocd2"
8
- }
5
+ "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
9
6
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWVariantsRevPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRev.ocd2"
19
- }]
20
- }
21
- }, {
22
- "dict": {
23
- "type": "group",
24
- "dicts": [{
25
- "type": "ocd2",
26
- "file": "TSPhrases.ocd2"
27
- }, {
28
- "type": "ocd2",
29
- "file": "TSCharacters.ocd2"
30
- }]
7
+ "conversion_chain": [
8
+ {
9
+ "dict": {
10
+ "type": "group",
11
+ "dicts": [
12
+ { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
13
+ { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
14
+ ]
15
+ }
16
+ },
17
+ {
18
+ "dict": {
19
+ "type": "group",
20
+ "dicts": [
21
+ { "type": "ocd2", "file": "TSPhrases.ocd2" },
22
+ {
23
+ "type": "ocd2",
24
+ "file": "TSCharactersExt.ocd2",
25
+ "may_output_tofu": true
26
+ },
27
+ { "type": "ocd2", "file": "TSCharacters.ocd2" }
28
+ ]
29
+ }
31
30
  }
32
- }]
31
+ ]
33
32
  }
@@ -1,36 +1,33 @@
1
1
  {
2
- "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)",
2
+ "name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese (Mainland China Phrases)",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "TSPhrases.ocd2"
8
- }
5
+ "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
9
6
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWPhrasesRev.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRevPhrases.ocd2"
19
- }, {
20
- "type": "ocd2",
21
- "file": "TWVariantsRev.ocd2"
22
- }]
23
- }
24
- }, {
25
- "dict": {
26
- "type": "group",
27
- "dicts": [{
28
- "type": "ocd2",
29
- "file": "TSPhrases.ocd2"
30
- }, {
31
- "type": "ocd2",
32
- "file": "TSCharacters.ocd2"
33
- }]
7
+ "conversion_chain": [
8
+ {
9
+ "dict": {
10
+ "type": "group",
11
+ "dicts": [
12
+ { "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
13
+ { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
14
+ { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
15
+ ]
16
+ }
17
+ },
18
+ {
19
+ "dict": {
20
+ "type": "group",
21
+ "dicts": [
22
+ { "type": "ocd2", "file": "TSPhrases.ocd2" },
23
+ {
24
+ "type": "ocd2",
25
+ "file": "TSCharactersExt.ocd2",
26
+ "may_output_tofu": true
27
+ },
28
+ { "type": "ocd2", "file": "TSCharacters.ocd2" }
29
+ ]
30
+ }
34
31
  }
35
- }]
32
+ ]
36
33
  }
@@ -7,30 +7,30 @@
7
7
  "model_path": "jieba_dict/hmm_model.utf8"
8
8
  }
9
9
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWPhrasesRev.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRevPhrases.ocd2"
19
- }, {
20
- "type": "ocd2",
21
- "file": "TWVariantsRev.ocd2"
22
- }]
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ { "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
16
+ { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
17
+ { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
18
+ ]
19
+ }
20
+ },
21
+ {
22
+ "dict": {
23
+ "type": "group",
24
+ "dicts": [
25
+ { "type": "ocd2", "file": "TSPhrases.ocd2" },
26
+ {
27
+ "type": "ocd2",
28
+ "file": "TSCharactersExt.ocd2",
29
+ "may_output_tofu": true
30
+ },
31
+ { "type": "ocd2", "file": "TSCharacters.ocd2" }
32
+ ]
33
+ }
23
34
  }
24
- }, {
25
- "dict": {
26
- "type": "group",
27
- "dicts": [{
28
- "type": "ocd2",
29
- "file": "TSPhrases.ocd2"
30
- }, {
31
- "type": "ocd2",
32
- "file": "TSCharacters.ocd2"
33
- }]
34
- }
35
- }]
35
+ ]
36
36
  }
@@ -2,21 +2,17 @@
2
2
  "name": "Traditional Chinese (Taiwan Standard) to Traditional Chinese (OpenCC Standard)",
3
3
  "segmentation": {
4
4
  "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "TWVariantsRevPhrases.ocd2"
8
- }
5
+ "dict": { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" }
9
6
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWVariantsRevPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRev.ocd2"
19
- }]
7
+ "conversion_chain": [
8
+ {
9
+ "dict": {
10
+ "type": "group",
11
+ "dicts": [
12
+ { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
13
+ { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
14
+ ]
15
+ }
20
16
  }
21
- }]
17
+ ]
22
18
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -2,3 +2,5 @@
2
2
  韩玉鉴赏
3
3
  蓝翔 nz
4
4
  区块链 10 nz
5
+ 牛骥同一皁
6
+ 牛骥同一皂
package/dist/esm/index.js CHANGED
@@ -34,6 +34,7 @@ const CONFIG_MAP = {
34
34
  tw: "s2tw.json",
35
35
  twp: "s2twp.json", // 台湾惯用词
36
36
  hk: "s2hk.json",
37
+ hkp: "s2hkp.json", // 香港惯用词
37
38
  cn: null
38
39
  },
39
40
  tw: {
@@ -46,6 +47,7 @@ const CONFIG_MAP = {
46
47
  hk: {
47
48
  cn: "hk2s.json",
48
49
  s: "hk2s.json", // 别名
50
+ sp: "hk2sp.json", // 简体惯用词
49
51
  t: "hk2t.json",
50
52
  hk: null
51
53
  },