opencc-wasm 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +50 -5
  2. package/README.zh.md +51 -6
  3. package/dist/cjs/index.cjs +9 -3
  4. package/dist/cjs/opencc-wasm.cjs +1 -1
  5. package/dist/cjs/opencc-wasm.wasm +0 -0
  6. package/dist/data/config/hk2s.json +30 -26
  7. package/dist/data/config/hk2sp.json +38 -0
  8. package/dist/data/config/hk2sp_jieba.json +61 -0
  9. package/dist/data/config/hk2t.json +14 -18
  10. package/dist/data/config/jp2t.json +15 -22
  11. package/dist/data/config/opencc_config.schema.json +45 -94
  12. package/dist/data/config/s2hk.json +37 -18
  13. package/dist/data/config/s2hk_jieba.json +51 -16
  14. package/dist/data/config/s2hkp.json +47 -0
  15. package/dist/data/config/s2hkp_jieba.json +66 -0
  16. package/dist/data/config/s2t.json +22 -19
  17. package/dist/data/config/s2t_cngov.json +16 -18
  18. package/dist/data/config/s2t_jieba.json +35 -11
  19. package/dist/data/config/s2tw.json +38 -19
  20. package/dist/data/config/s2tw_jieba.json +51 -16
  21. package/dist/data/config/s2twp.json +39 -24
  22. package/dist/data/config/s2twp_jieba.json +55 -21
  23. package/dist/data/config/t2cngov.json +16 -18
  24. package/dist/data/config/t2cngov_keep_simp.json +16 -18
  25. package/dist/data/config/t2hk.json +15 -13
  26. package/dist/data/config/t2jp.json +7 -14
  27. package/dist/data/config/t2s.json +20 -19
  28. package/dist/data/config/t2s_cngov.json +16 -18
  29. package/dist/data/config/t2tw.json +15 -13
  30. package/dist/data/config/tw2s.json +31 -27
  31. package/dist/data/config/tw2sp.json +32 -30
  32. package/dist/data/config/tw2sp_jieba.json +50 -25
  33. package/dist/data/config/tw2t.json +14 -18
  34. package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
  35. package/dist/data/dict/HKPhrases.ocd2 +0 -0
  36. package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
  37. package/dist/data/dict/HKVariants.ocd2 +0 -0
  38. package/dist/data/dict/HKVariantsPhrases.ocd2 +0 -0
  39. package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
  40. package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
  41. package/dist/data/dict/JPShinjitaiCharacters.ocd2 +0 -0
  42. package/dist/data/dict/JPShinjitaiCharactersRev.ocd2 +0 -0
  43. package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
  44. package/dist/data/dict/STCharacters.ocd2 +0 -0
  45. package/dist/data/dict/STPhrases.ocd2 +0 -0
  46. package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
  47. package/dist/data/dict/TSCharacters.ocd2 +0 -0
  48. package/dist/data/dict/TSCharactersExt.ocd2 +0 -0
  49. package/dist/data/dict/TSPhrases.ocd2 +0 -0
  50. package/dist/data/dict/TWPhrases.ocd2 +0 -0
  51. package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
  52. package/dist/data/dict/TWVariants.ocd2 +0 -0
  53. package/dist/data/dict/TWVariantsPhrases.ocd2 +0 -0
  54. package/dist/data/dict/TWVariantsRev.ocd2 +0 -0
  55. package/dist/data/dict/TWVariantsRevPhrases.ocd2 +0 -0
  56. package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
  57. package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
  58. package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
  59. package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
  60. package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
  61. package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
  62. package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
  63. package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
  64. package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
  65. package/dist/data/jieba_dict/user.dict.utf8 +2 -1
  66. package/dist/esm/index.js +8 -0
  67. package/dist/esm/opencc-wasm.js +1 -1
  68. package/dist/esm/opencc-wasm.wasm +0 -0
  69. package/dist/opencc-wasm.wasm +0 -0
  70. package/index.d.ts +1 -0
  71. package/package.json +1 -1
  72. package/dist/data/dict/JPVariants.ocd2 +0 -0
  73. package/dist/data/dict/JPVariantsRev.ocd2 +0 -0
  74. package/dist/data/dict/cngov/GovVariants.ocd2 +0 -0
  75. package/dist/data/dict/cngov/GovVariants_keep_simp.ocd2 +0 -0
@@ -1,5 +1,13 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "CJK_Compatibility_Ideographs.ocd2"
8
+ }
9
+ }
10
+ ],
3
11
  "segmentation": {
4
12
  "type": "jieba",
5
13
  "resources": {
@@ -7,30 +15,47 @@
7
15
  "model_path": "jieba_dict/hmm_model.utf8"
8
16
  }
9
17
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWPhrasesRev.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRevPhrases.ocd2"
19
- }, {
20
- "type": "ocd2",
21
- "file": "TWVariantsRev.ocd2"
22
- }]
23
- }
24
- }, {
25
- "dict": {
26
- "type": "group",
27
- "dicts": [{
28
- "type": "ocd2",
29
- "file": "TSPhrases.ocd2"
30
- }, {
31
- "type": "ocd2",
32
- "file": "TSCharacters.ocd2"
33
- }]
18
+ "conversion_chain": [
19
+ {
20
+ "dict": {
21
+ "type": "group",
22
+ "match_policy": "short_circuit",
23
+ "dicts": [
24
+ {
25
+ "type": "ocd2",
26
+ "file": "TWPhrasesRev.ocd2"
27
+ },
28
+ {
29
+ "type": "ocd2",
30
+ "file": "TWVariantsRevPhrases.ocd2"
31
+ },
32
+ {
33
+ "type": "ocd2",
34
+ "file": "TWVariantsRev.ocd2"
35
+ }
36
+ ]
37
+ }
38
+ },
39
+ {
40
+ "dict": {
41
+ "type": "group",
42
+ "match_policy": "short_circuit",
43
+ "dicts": [
44
+ {
45
+ "type": "ocd2",
46
+ "file": "TSPhrases.ocd2"
47
+ },
48
+ {
49
+ "type": "ocd2",
50
+ "file": "TSCharactersExt.ocd2",
51
+ "may_output_tofu": true
52
+ },
53
+ {
54
+ "type": "ocd2",
55
+ "file": "TSCharacters.ocd2"
56
+ }
57
+ ]
58
+ }
34
59
  }
35
- }]
60
+ ]
36
61
  }
@@ -1,22 +1,18 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan Standard) to Traditional Chinese (OpenCC Standard)",
3
- "segmentation": {
4
- "type": "mmseg",
5
- "dict": {
6
- "type": "ocd2",
7
- "file": "TWVariantsRevPhrases.ocd2"
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
6
+ "conversion_chain": [
7
+ {
8
+ "dict": {
9
+ "type": "group",
10
+ "match_policy": "short_circuit",
11
+ "dicts": [
12
+ { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
13
+ { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
14
+ ]
15
+ }
8
16
  }
9
- },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "TWVariantsRevPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "TWVariantsRev.ocd2"
19
- }]
20
- }
21
- }]
17
+ ]
22
18
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -887,7 +887,6 @@ T恤 4 n
887
887
  一准儿 2 l
888
888
  一凉 3 a
889
889
  一几 10 m
890
- 一出 964 m
891
890
  一出场 3 m
892
891
  一出头 3 m
893
892
  一出戏 3 i
@@ -18653,6 +18652,7 @@ T恤 4 n
18653
18652
  也门 234 d
18654
18653
  也门共和国 14 ns
18655
18654
  习 1216 v
18655
+ 习总书记 4 nr
18656
18656
  习与性成 3 i
18657
18657
  习中学 3 n
18658
18658
  习举业 2 n
@@ -1,4 +1,5 @@
1
1
  云计算
2
- 韩玉鉴赏
3
2
  蓝翔 nz
4
3
  区块链 10 nz
4
+ 牛骥同一皁
5
+ 牛骥同一皂
package/dist/esm/index.js CHANGED
@@ -34,6 +34,7 @@ const CONFIG_MAP = {
34
34
  tw: "s2tw.json",
35
35
  twp: "s2twp.json", // 台湾惯用词
36
36
  hk: "s2hk.json",
37
+ hkp: "s2hkp.json", // 香港惯用词
37
38
  cn: null
38
39
  },
39
40
  tw: {
@@ -46,6 +47,7 @@ const CONFIG_MAP = {
46
47
  hk: {
47
48
  cn: "hk2s.json",
48
49
  s: "hk2s.json", // 别名
50
+ sp: "hk2sp.json", // 简体惯用词
49
51
  t: "hk2t.json",
50
52
  hk: null
51
53
  },
@@ -163,6 +165,9 @@ async function ensureConfig(configName) {
163
165
 
164
166
  const dicts = new Set();
165
167
  const resources = new Set();
168
+ if (Array.isArray(cfgJson.normalization)) {
169
+ cfgJson.normalization.forEach((item) => collectOcd2Files(item?.dict, dicts));
170
+ }
166
171
  collectOcd2Files(cfgJson.segmentation?.dict, dicts);
167
172
  collectSegmentationResources(cfgJson.segmentation, resources);
168
173
  if (Array.isArray(cfgJson.conversion_chain)) {
@@ -196,6 +201,9 @@ async function ensureConfig(configName) {
196
201
  node.dicts.forEach(patchPaths);
197
202
  }
198
203
  };
204
+ if (Array.isArray(cfgJson.normalization)) {
205
+ cfgJson.normalization.forEach((item) => patchPaths(item?.dict));
206
+ }
199
207
  patchPaths(cfgJson.segmentation?.dict);
200
208
  if (Array.isArray(cfgJson.conversion_chain)) {
201
209
  cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict));