opencc-wasm 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +10 -1
  2. package/README.zh.md +10 -1
  3. package/dist/cjs/index.cjs +6 -0
  4. package/dist/cjs/opencc-wasm.cjs +1 -1
  5. package/dist/cjs/opencc-wasm.wasm +0 -0
  6. package/dist/data/config/hk2s.json +5 -0
  7. package/dist/data/config/hk2sp.json +5 -0
  8. package/dist/data/config/hk2sp_jieba.json +30 -5
  9. package/dist/data/config/hk2t.json +4 -4
  10. package/dist/data/config/jp2t.json +4 -4
  11. package/dist/data/config/opencc_config.schema.json +9 -4
  12. package/dist/data/config/s2hk.json +21 -2
  13. package/dist/data/config/s2hk_jieba.json +36 -4
  14. package/dist/data/config/s2hkp.json +21 -2
  15. package/dist/data/config/s2hkp_jieba.json +40 -5
  16. package/dist/data/config/s2t.json +12 -5
  17. package/dist/data/config/s2t_cngov.json +16 -11
  18. package/dist/data/config/s2t_jieba.json +27 -2
  19. package/dist/data/config/s2tw.json +21 -2
  20. package/dist/data/config/s2tw_jieba.json +36 -4
  21. package/dist/data/config/s2twp.json +21 -2
  22. package/dist/data/config/s2twp_jieba.json +40 -5
  23. package/dist/data/config/t2cngov.json +16 -11
  24. package/dist/data/config/t2cngov_keep_simp.json +16 -11
  25. package/dist/data/config/t2hk.json +4 -10
  26. package/dist/data/config/t2jp.json +3 -4
  27. package/dist/data/config/t2s.json +4 -4
  28. package/dist/data/config/t2s_cngov.json +16 -11
  29. package/dist/data/config/t2tw.json +4 -10
  30. package/dist/data/config/tw2s.json +5 -0
  31. package/dist/data/config/tw2sp.json +5 -0
  32. package/dist/data/config/tw2sp_jieba.json +30 -5
  33. package/dist/data/config/tw2t.json +4 -4
  34. package/dist/data/dict/CJK_Compatibility_Ideographs.ocd2 +0 -0
  35. package/dist/data/dict/HKPhrases.ocd2 +0 -0
  36. package/dist/data/dict/HKPhrasesRev.ocd2 +0 -0
  37. package/dist/data/dict/HKVariants.ocd2 +0 -0
  38. package/dist/data/dict/HKVariantsRev.ocd2 +0 -0
  39. package/dist/data/dict/HKVariantsRevPhrases.ocd2 +0 -0
  40. package/dist/data/dict/JPShinjitaiPhrases.ocd2 +0 -0
  41. package/dist/data/dict/STPhrases.ocd2 +0 -0
  42. package/dist/data/dict/STPhrases_GeneratedFromRegionalPhrases.ocd2 +0 -0
  43. package/dist/data/dict/TSCharacters.ocd2 +0 -0
  44. package/dist/data/dict/TSPhrases.ocd2 +0 -0
  45. package/dist/data/dict/TWPhrases.ocd2 +0 -0
  46. package/dist/data/dict/TWPhrasesRev.ocd2 +0 -0
  47. package/dist/data/dict/cngov/STCharacters.ocd2 +0 -0
  48. package/dist/data/dict/cngov/STPhrases.ocd2 +0 -0
  49. package/dist/data/dict/cngov/TGCharacters.ocd2 +0 -0
  50. package/dist/data/dict/cngov/TGCharacters_keep_simp.ocd2 +0 -0
  51. package/dist/data/dict/cngov/TGPhrases.ocd2 +0 -0
  52. package/dist/data/dict/cngov/TSCharacters.ocd2 +0 -0
  53. package/dist/data/dict/cngov/TSPhrases.ocd2 +0 -0
  54. package/dist/data/jieba_dict/jieba.dict.utf8 +1 -1
  55. package/dist/data/jieba_dict/jieba_merged.ocd2 +0 -0
  56. package/dist/data/jieba_dict/user.dict.utf8 +0 -1
  57. package/dist/esm/index.js +6 -0
  58. package/dist/esm/opencc-wasm.js +1 -1
  59. package/dist/esm/opencc-wasm.wasm +0 -0
  60. package/dist/opencc-wasm.wasm +0 -0
  61. package/index.d.ts +1 -0
  62. package/package.json +1 -1
@@ -1,9 +1,8 @@
1
1
  {
2
2
  "name": "Old Japanese Kanji (Kyūjitai) to New Japanese Kanji (Shinjitai)",
3
- "segmentation": {
4
- "type": "mmseg",
5
- "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" }
6
- },
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
7
6
  "conversion_chain": [
8
7
  { "dict": { "type": "ocd2", "file": "JPShinjitaiCharactersRev.ocd2" } }
9
8
  ]
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "Traditional Chinese (OpenCC Standard) to Simplified Chinese",
3
- "segmentation": {
4
- "type": "mmseg",
5
- "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
6
- },
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
7
6
  "conversion_chain": [
8
7
  {
9
8
  "dict": {
10
9
  "type": "group",
10
+ "match_policy": "short_circuit",
11
11
  "dicts": [
12
12
  { "type": "ocd2", "file": "TSPhrases.ocd2" },
13
13
  {
@@ -7,16 +7,21 @@
7
7
  "file": "cngov/TSPhrases.ocd2"
8
8
  }
9
9
  },
10
- "conversion_chain": [{
11
- "dict": {
12
- "type": "group",
13
- "dicts": [{
14
- "type": "ocd2",
15
- "file": "cngov/TSPhrases.ocd2"
16
- }, {
17
- "type": "ocd2",
18
- "file": "cngov/TSCharacters.ocd2"
19
- }]
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "cngov/TSPhrases.ocd2"
18
+ },
19
+ {
20
+ "type": "ocd2",
21
+ "file": "cngov/TSCharacters.ocd2"
22
+ }
23
+ ]
24
+ }
20
25
  }
21
- }]
26
+ ]
22
27
  }
@@ -1,19 +1,13 @@
1
1
  {
2
2
  "name": "Traditional Chinese (OpenCC Standard) to Traditional Chinese (Taiwan Standard)",
3
- "segmentation": {
4
- "type": "mmseg",
5
- "dict": {
6
- "type": "group",
7
- "dicts": [
8
- { "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
9
- { "type": "ocd2", "file": "TWVariants.ocd2" }
10
- ]
11
- }
12
- },
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
13
6
  "conversion_chain": [
14
7
  {
15
8
  "dict": {
16
9
  "type": "group",
10
+ "match_policy": "short_circuit",
17
11
  "dicts": [
18
12
  { "type": "ocd2", "file": "TWVariantsPhrases.ocd2" },
19
13
  { "type": "ocd2", "file": "TWVariants.ocd2" }
@@ -1,5 +1,8 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese",
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
3
6
  "segmentation": {
4
7
  "type": "mmseg",
5
8
  "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
@@ -8,6 +11,7 @@
8
11
  {
9
12
  "dict": {
10
13
  "type": "group",
14
+ "match_policy": "short_circuit",
11
15
  "dicts": [
12
16
  { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
13
17
  { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
@@ -17,6 +21,7 @@
17
21
  {
18
22
  "dict": {
19
23
  "type": "group",
24
+ "match_policy": "short_circuit",
20
25
  "dicts": [
21
26
  { "type": "ocd2", "file": "TSPhrases.ocd2" },
22
27
  {
@@ -1,5 +1,8 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan Standard) to Simplified Chinese (Mainland China Phrases)",
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
3
6
  "segmentation": {
4
7
  "type": "mmseg",
5
8
  "dict": { "type": "ocd2", "file": "TSPhrases.ocd2" }
@@ -8,6 +11,7 @@
8
11
  {
9
12
  "dict": {
10
13
  "type": "group",
14
+ "match_policy": "short_circuit",
11
15
  "dicts": [
12
16
  { "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
13
17
  { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
@@ -18,6 +22,7 @@
18
22
  {
19
23
  "dict": {
20
24
  "type": "group",
25
+ "match_policy": "short_circuit",
21
26
  "dicts": [
22
27
  { "type": "ocd2", "file": "TSPhrases.ocd2" },
23
28
  {
@@ -1,5 +1,13 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
3
+ "normalization": [
4
+ {
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "CJK_Compatibility_Ideographs.ocd2"
8
+ }
9
+ }
10
+ ],
3
11
  "segmentation": {
4
12
  "type": "jieba",
5
13
  "resources": {
@@ -11,24 +19,41 @@
11
19
  {
12
20
  "dict": {
13
21
  "type": "group",
22
+ "match_policy": "short_circuit",
14
23
  "dicts": [
15
- { "type": "ocd2", "file": "TWPhrasesRev.ocd2" },
16
- { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
17
- { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
24
+ {
25
+ "type": "ocd2",
26
+ "file": "TWPhrasesRev.ocd2"
27
+ },
28
+ {
29
+ "type": "ocd2",
30
+ "file": "TWVariantsRevPhrases.ocd2"
31
+ },
32
+ {
33
+ "type": "ocd2",
34
+ "file": "TWVariantsRev.ocd2"
35
+ }
18
36
  ]
19
37
  }
20
38
  },
21
39
  {
22
40
  "dict": {
23
41
  "type": "group",
42
+ "match_policy": "short_circuit",
24
43
  "dicts": [
25
- { "type": "ocd2", "file": "TSPhrases.ocd2" },
44
+ {
45
+ "type": "ocd2",
46
+ "file": "TSPhrases.ocd2"
47
+ },
26
48
  {
27
49
  "type": "ocd2",
28
50
  "file": "TSCharactersExt.ocd2",
29
51
  "may_output_tofu": true
30
52
  },
31
- { "type": "ocd2", "file": "TSCharacters.ocd2" }
53
+ {
54
+ "type": "ocd2",
55
+ "file": "TSCharacters.ocd2"
56
+ }
32
57
  ]
33
58
  }
34
59
  }
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "Traditional Chinese (Taiwan Standard) to Traditional Chinese (OpenCC Standard)",
3
- "segmentation": {
4
- "type": "mmseg",
5
- "dict": { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" }
6
- },
3
+ "normalization": [
4
+ { "dict": { "type": "ocd2", "file": "CJK_Compatibility_Ideographs.ocd2" } }
5
+ ],
7
6
  "conversion_chain": [
8
7
  {
9
8
  "dict": {
10
9
  "type": "group",
10
+ "match_policy": "short_circuit",
11
11
  "dicts": [
12
12
  { "type": "ocd2", "file": "TWVariantsRevPhrases.ocd2" },
13
13
  { "type": "ocd2", "file": "TWVariantsRev.ocd2" }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -887,7 +887,6 @@ T恤 4 n
887
887
  一准儿 2 l
888
888
  一凉 3 a
889
889
  一几 10 m
890
- 一出 964 m
891
890
  一出场 3 m
892
891
  一出头 3 m
893
892
  一出戏 3 i
@@ -18653,6 +18652,7 @@ T恤 4 n
18653
18652
  也门 234 d
18654
18653
  也门共和国 14 ns
18655
18654
  习 1216 v
18655
+ 习总书记 4 nr
18656
18656
  习与性成 3 i
18657
18657
  习中学 3 n
18658
18658
  习举业 2 n
@@ -1,5 +1,4 @@
1
1
  云计算
2
- 韩玉鉴赏
3
2
  蓝翔 nz
4
3
  区块链 10 nz
5
4
  牛骥同一皁
package/dist/esm/index.js CHANGED
@@ -165,6 +165,9 @@ async function ensureConfig(configName) {
165
165
 
166
166
  const dicts = new Set();
167
167
  const resources = new Set();
168
+ if (Array.isArray(cfgJson.normalization)) {
169
+ cfgJson.normalization.forEach((item) => collectOcd2Files(item?.dict, dicts));
170
+ }
168
171
  collectOcd2Files(cfgJson.segmentation?.dict, dicts);
169
172
  collectSegmentationResources(cfgJson.segmentation, resources);
170
173
  if (Array.isArray(cfgJson.conversion_chain)) {
@@ -198,6 +201,9 @@ async function ensureConfig(configName) {
198
201
  node.dicts.forEach(patchPaths);
199
202
  }
200
203
  };
204
+ if (Array.isArray(cfgJson.normalization)) {
205
+ cfgJson.normalization.forEach((item) => patchPaths(item?.dict));
206
+ }
201
207
  patchPaths(cfgJson.segmentation?.dict);
202
208
  if (Array.isArray(cfgJson.conversion_chain)) {
203
209
  cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict));