unihan2 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -111,6 +111,7 @@ cp1,cp2,age
111
111
  0616,061A,5.1
112
112
  061B,061B,1.1
113
113
  061C,061C,6.3
114
+ 061D,061D,14.0
114
115
  061E,061E,4.1
115
116
  061F,061F,1.1
116
117
  0620,0620,6.0
@@ -147,13 +148,16 @@ cp1,cp2,age
147
148
  0800,083E,5.2
148
149
  0840,085E,6.0
149
150
  0860,086A,10.0
151
+ 0870,089F,14.0
150
152
  08A0,08A0,6.1
151
153
  08A1,08A1,7.0
152
154
  08A2,08AC,6.1
153
155
  08AD,08B2,7.0
154
156
  08B3,08B4,8.0
157
+ 08B5,08B5,14.0
155
158
  08B6,08BD,9.0
156
159
  08BE,08C7,13.0
160
+ 08C8,08D2,14.0
157
161
  08D3,08D3,11.0
158
162
  08D4,08E2,9.0
159
163
  08E3,08E3,8.0
@@ -229,10 +233,12 @@ cp1,cp2,age
229
233
  0C05,0C33,1.1
230
234
  0C34,0C34,7.0
231
235
  0C35,0C39,1.1
236
+ 0C3C,0C3C,14.0
232
237
  0C3D,0C3D,5.1
233
238
  0C3E,0C56,1.1
234
239
  0C58,0C59,5.1
235
240
  0C5A,0C5A,8.0
241
+ 0C5D,0C5D,14.0
236
242
  0C60,0C61,1.1
237
243
  0C62,0C63,5.1
238
244
  0C66,0C6F,1.1
@@ -244,10 +250,13 @@ cp1,cp2,age
244
250
  0C84,0C84,11.0
245
251
  0C85,0CB9,1.1
246
252
  0CBC,0CBD,4.0
247
- 0CBE,0CE1,1.1
253
+ 0CBE,0CD6,1.1
254
+ 0CDD,0CDD,14.0
255
+ 0CDE,0CE1,1.1
248
256
  0CE2,0CE3,5.0
249
257
  0CE6,0CEF,1.1
250
258
  0CF1,0CF2,5.0
259
+ 0CF3,0CF3,15.0
251
260
  0D00,0D00,10.0
252
261
  0D01,0D01,7.0
253
262
  0D02,0D03,1.1
@@ -294,7 +303,9 @@ cp1,cp2,age
294
303
  0EAC,0EAC,12.0
295
304
  0EAD,0EB9,1.1
296
305
  0EBA,0EBA,12.0
297
- 0EBB,0EDD,1.1
306
+ 0EBB,0ECD,1.1
307
+ 0ECE,0ECE,15.0
308
+ 0ED0,0EDD,1.1
298
309
  0EDE,0EDF,6.1
299
310
  0F00,0F69,2.0
300
311
  0F6A,0F6A,3.0
@@ -373,12 +384,18 @@ cp1,cp2,age
373
384
  1677,167F,5.2
374
385
  1680,16F0,3.0
375
386
  16F1,16F8,7.0
376
- 1700,1773,3.2
387
+ 1700,170C,3.2
388
+ 170D,170D,14.0
389
+ 170E,1714,3.2
390
+ 1715,171F,14.0
391
+ 1720,1773,3.2
377
392
  1780,17DC,3.0
378
393
  17DD,17DD,4.0
379
394
  17E0,17E9,3.0
380
395
  17F0,17F9,4.0
381
- 1800,1877,3.0
396
+ 1800,180E,3.0
397
+ 180F,180F,14.0
398
+ 1810,1877,3.0
382
399
  1878,1878,11.0
383
400
  1880,18A9,3.0
384
401
  18AA,18AA,5.1
@@ -396,7 +413,11 @@ cp1,cp2,age
396
413
  1A20,1AAD,5.2
397
414
  1AB0,1ABE,7.0
398
415
  1ABF,1AC0,13.0
399
- 1B00,1B7C,5.0
416
+ 1AC1,1ACE,14.0
417
+ 1B00,1B4B,5.0
418
+ 1B4C,1B4C,14.0
419
+ 1B50,1B7C,5.0
420
+ 1B7D,1B7E,14.0
400
421
  1B80,1BAA,5.1
401
422
  1BAB,1BAD,6.1
402
423
  1BAE,1BB9,5.1
@@ -417,6 +438,7 @@ cp1,cp2,age
417
438
  1DCB,1DE6,5.1
418
439
  1DE7,1DF5,7.0
419
440
  1DF6,1DF9,10.0
441
+ 1DFA,1DFA,14.0
420
442
  1DFB,1DFB,9.0
421
443
  1DFC,1DFC,6.0
422
444
  1DFD,1DFD,5.2
@@ -456,6 +478,7 @@ cp1,cp2,age
456
478
  20BB,20BD,7.0
457
479
  20BE,20BE,8.0
458
480
  20BF,20BF,10.0
481
+ 20C0,20C0,14.0
459
482
  20D0,20E1,1.1
460
483
  20E2,20E3,3.0
461
484
  20E4,20EA,3.2
@@ -591,7 +614,10 @@ cp1,cp2,age
591
614
  2BEC,2BEF,8.0
592
615
  2BF0,2BFE,11.0
593
616
  2BFF,2BFF,12.0
594
- 2C00,2C5E,4.1
617
+ 2C00,2C2E,4.1
618
+ 2C2F,2C2F,14.0
619
+ 2C30,2C5E,4.1
620
+ 2C5F,2C5F,14.0
595
621
  2C60,2C6C,5.0
596
622
  2C6D,2C6F,5.1
597
623
  2C70,2C70,5.2
@@ -622,6 +648,7 @@ cp1,cp2,age
622
648
  2E4A,2E4E,11.0
623
649
  2E4F,2E4F,12.0
624
650
  2E50,2E52,13.0
651
+ 2E53,2E5D,14.0
625
652
  2E80,2FFB,3.0
626
653
  3000,3037,1.1
627
654
  3038,303A,3.0
@@ -677,6 +704,7 @@ cp1,cp2,age
677
704
  9FD6,9FEA,10.0
678
705
  9FEB,9FEF,11.0
679
706
  9FF0,9FFC,13.0
707
+ 9FFD,9FFF,14.0
680
708
  A000,A4A1,3.0
681
709
  A4A2,A4A3,3.2
682
710
  A4A4,A4B3,3.0
@@ -714,8 +742,12 @@ A7AF,A7AF,11.0
714
742
  A7B0,A7B1,7.0
715
743
  A7B2,A7B7,8.0
716
744
  A7B8,A7B9,11.0
717
- A7BA,A7C6,12.0
718
- A7C7,A7F6,13.0
745
+ A7BA,A7BF,12.0
746
+ A7C0,A7C1,14.0
747
+ A7C2,A7C6,12.0
748
+ A7C7,A7CA,13.0
749
+ A7D0,A7F4,14.0
750
+ A7F5,A7F6,13.0
719
751
  A7F7,A7F7,7.0
720
752
  A7F8,A7F9,6.1
721
753
  A7FA,A7FA,6.0
@@ -757,9 +789,15 @@ FB00,FB17,1.1
757
789
  FB1D,FB1D,3.0
758
790
  FB1E,FBB1,1.1
759
791
  FBB2,FBC1,6.0
760
- FBD3,FDFB,1.1
792
+ FBC2,FBC2,14.0
793
+ FBD3,FD3F,1.1
794
+ FD40,FD4F,14.0
795
+ FD50,FDC7,1.1
796
+ FDCF,FDCF,14.0
797
+ FDF0,FDFB,1.1
761
798
  FDFC,FDFC,3.2
762
799
  FDFD,FDFD,4.0
800
+ FDFE,FDFF,14.0
763
801
  FE00,FE0F,3.2
764
802
  FE10,FE19,4.1
765
803
  FE20,FE23,1.1
@@ -799,7 +837,10 @@ FFFD,FFFD,1.1
799
837
  10428,1044D,3.1
800
838
  1044E,104A9,4.0
801
839
  104B0,104FB,9.0
802
- 10500,10767,7.0
840
+ 10500,1056F,7.0
841
+ 10570,105BC,14.0
842
+ 10600,10767,7.0
843
+ 10780,107BA,14.0
803
844
  10800,1083F,4.0
804
845
  10840,1085F,5.2
805
846
  10860,108AF,7.0
@@ -826,12 +867,16 @@ FFFD,FFFD,1.1
826
867
  10D00,10D39,11.0
827
868
  10E60,10E7E,5.2
828
869
  10E80,10EB1,13.0
870
+ 10EFD,10EFF,15.0
829
871
  10F00,10F59,11.0
872
+ 10F70,10F89,14.0
830
873
  10FB0,10FCB,13.0
831
874
  10FE0,10FF6,12.0
832
875
  11000,1106F,6.0
876
+ 11070,11075,14.0
833
877
  1107F,1107F,7.0
834
878
  11080,110C1,5.2
879
+ 110C2,110C2,14.0
835
880
  110CD,110CD,11.0
836
881
  110D0,11143,6.1
837
882
  11144,11146,11.0
@@ -846,6 +891,7 @@ FFFD,FFFD,1.1
846
891
  111DB,111DF,8.0
847
892
  111E1,1123D,7.0
848
893
  1123E,1123E,9.0
894
+ 1123F,11241,15.0
849
895
  11280,112A9,8.0
850
896
  112B0,112F9,7.0
851
897
  11300,11300,8.0
@@ -866,10 +912,12 @@ FFFD,FFFD,1.1
866
912
  11660,1166C,9.0
867
913
  11680,116B7,6.1
868
914
  116B8,116B8,12.0
915
+ 116B9,116B9,14.0
869
916
  116C0,116C9,6.1
870
917
  11700,11719,8.0
871
918
  1171A,1171A,11.0
872
919
  1171D,1173F,8.0
920
+ 11740,11746,14.0
873
921
  11800,1183B,11.0
874
922
  118A0,118FF,7.0
875
923
  11900,11959,13.0
@@ -879,10 +927,13 @@ FFFD,FFFD,1.1
879
927
  11A86,11A9C,10.0
880
928
  11A9D,11A9D,11.0
881
929
  11A9E,11AA2,10.0
930
+ 11AB0,11ABF,14.0
882
931
  11AC0,11AF8,7.0
932
+ 11B00,11B09,15.0
883
933
  11C00,11CB6,9.0
884
934
  11D00,11D59,10.0
885
935
  11D60,11EF8,11.0
936
+ 11F00,11F59,15.0
886
937
  11FB0,11FB0,13.0
887
938
  11FC0,11FFF,12.0
888
939
  12000,1236E,5.0
@@ -893,11 +944,16 @@ FFFD,FFFD,1.1
893
944
  12470,12473,5.0
894
945
  12474,12474,7.0
895
946
  12480,12543,8.0
947
+ 12F90,12FF2,14.0
896
948
  13000,1342E,5.2
949
+ 1342F,1342F,15.0
897
950
  13430,13438,12.0
951
+ 13439,13455,15.0
898
952
  14400,14646,8.0
899
953
  16800,16A38,6.0
900
- 16A40,16B8F,7.0
954
+ 16A40,16A6F,7.0
955
+ 16A70,16AC9,14.0
956
+ 16AD0,16B8F,7.0
901
957
  16E40,16E9A,11.0
902
958
  16F00,16F44,6.1
903
959
  16F45,16F4F,12.0
@@ -913,16 +969,24 @@ FFFD,FFFD,1.1
913
969
  187F2,187F7,12.0
914
970
  18800,18AF2,9.0
915
971
  18AF3,18D08,13.0
972
+ 1AFF0,1AFFE,14.0
916
973
  1B000,1B001,6.0
917
974
  1B002,1B11E,10.0
918
- 1B150,1B167,12.0
975
+ 1B11F,1B122,14.0
976
+ 1B132,1B132,15.0
977
+ 1B150,1B152,12.0
978
+ 1B155,1B155,15.0
979
+ 1B164,1B167,12.0
919
980
  1B170,1B2FB,10.0
920
981
  1BC00,1BCA3,7.0
982
+ 1CF00,1CFC3,14.0
921
983
  1D000,1D126,3.1
922
984
  1D129,1D129,5.1
923
985
  1D12A,1D1DD,3.1
924
986
  1D1DE,1D1E8,8.0
987
+ 1D1E9,1D1EA,14.0
925
988
  1D200,1D245,4.1
989
+ 1D2C0,1D2D3,15.0
926
990
  1D2E0,1D2F3,11.0
927
991
  1D300,1D356,4.0
928
992
  1D360,1D371,5.0
@@ -935,8 +999,15 @@ FFFD,FFFD,1.1
935
999
  1D7CA,1D7CB,5.0
936
1000
  1D7CE,1D7FF,3.1
937
1001
  1D800,1DAAF,8.0
1002
+ 1DF00,1DF1E,14.0
1003
+ 1DF25,1DF2A,15.0
938
1004
  1E000,1E02A,9.0
939
- 1E100,1E2FF,12.0
1005
+ 1E030,1E08F,15.0
1006
+ 1E100,1E14F,12.0
1007
+ 1E290,1E2AE,14.0
1008
+ 1E2C0,1E2FF,12.0
1009
+ 1E4D0,1E4F9,15.0
1010
+ 1E7E0,1E7FE,14.0
940
1011
  1E800,1E8D6,7.0
941
1012
  1E900,1E94A,9.0
942
1013
  1E94B,1E94B,12.0
@@ -1069,6 +1140,8 @@ FFFD,FFFD,1.1
1069
1140
  1F6D3,1F6D4,10.0
1070
1141
  1F6D5,1F6D5,12.0
1071
1142
  1F6D6,1F6D7,13.0
1143
+ 1F6DC,1F6DC,15.0
1144
+ 1F6DD,1F6DF,14.0
1072
1145
  1F6E0,1F6F3,7.0
1073
1146
  1F6F4,1F6F6,9.0
1074
1147
  1F6F7,1F6F8,10.0
@@ -1076,9 +1149,12 @@ FFFD,FFFD,1.1
1076
1149
  1F6FA,1F6FA,12.0
1077
1150
  1F6FB,1F6FC,13.0
1078
1151
  1F700,1F773,6.0
1152
+ 1F774,1F77F,15.0
1079
1153
  1F780,1F7D4,7.0
1080
1154
  1F7D5,1F7D8,11.0
1155
+ 1F7D9,1F7D9,15.0
1081
1156
  1F7E0,1F7EB,12.0
1157
+ 1F7F0,1F7F0,14.0
1082
1158
  1F800,1F8AD,7.0
1083
1159
  1F8B0,1F8B1,13.0
1084
1160
  1F900,1F90B,10.0
@@ -1103,6 +1179,7 @@ FFFD,FFFD,1.1
1103
1179
  1F972,1F972,13.0
1104
1180
  1F973,1F976,11.0
1105
1181
  1F977,1F978,13.0
1182
+ 1F979,1F979,14.0
1106
1183
  1F97A,1F97A,11.0
1107
1184
  1F97B,1F97B,12.0
1108
1185
  1F97C,1F97F,11.0
@@ -1120,6 +1197,7 @@ FFFD,FFFD,1.1
1120
1197
  1F9C1,1F9C2,11.0
1121
1198
  1F9C3,1F9CA,12.0
1122
1199
  1F9CB,1F9CB,13.0
1200
+ 1F9CC,1F9CC,14.0
1123
1201
  1F9CD,1F9CF,12.0
1124
1202
  1F9D0,1F9E6,10.0
1125
1203
  1F9E7,1F9FF,11.0
@@ -1127,18 +1205,42 @@ FFFD,FFFD,1.1
1127
1205
  1FA60,1FA6D,11.0
1128
1206
  1FA70,1FA73,12.0
1129
1207
  1FA74,1FA74,13.0
1130
- 1FA78,1FA82,12.0
1208
+ 1FA75,1FA77,15.0
1209
+ 1FA78,1FA7A,12.0
1210
+ 1FA7B,1FA7C,14.0
1211
+ 1FA80,1FA82,12.0
1131
1212
  1FA83,1FA86,13.0
1213
+ 1FA87,1FA88,15.0
1132
1214
  1FA90,1FA95,12.0
1133
- 1FA96,1FBF9,13.0
1215
+ 1FA96,1FAA8,13.0
1216
+ 1FAA9,1FAAC,14.0
1217
+ 1FAAD,1FAAF,15.0
1218
+ 1FAB0,1FAB6,13.0
1219
+ 1FAB7,1FABA,14.0
1220
+ 1FABB,1FABF,15.0
1221
+ 1FAC0,1FAC2,13.0
1222
+ 1FAC3,1FAC5,14.0
1223
+ 1FACE,1FACF,15.0
1224
+ 1FAD0,1FAD6,13.0
1225
+ 1FAD7,1FAD9,14.0
1226
+ 1FADA,1FADB,15.0
1227
+ 1FAE0,1FAE7,14.0
1228
+ 1FAE8,1FAE8,15.0
1229
+ 1FAF0,1FAF6,14.0
1230
+ 1FAF7,1FAF8,15.0
1231
+ 1FB00,1FBF9,13.0
1134
1232
  20000,2A6D6,3.1
1135
1233
  2A6D7,2A6DD,13.0
1234
+ 2A6DE,2A6DF,14.0
1136
1235
  2A700,2B734,5.2
1236
+ 2B735,2B738,14.0
1237
+ 2B739,2B739,15.0
1137
1238
  2B740,2B81D,6.0
1138
1239
  2B820,2CEA1,8.0
1139
1240
  2CEB0,2EBE0,10.0
1140
1241
  2F800,2FA1D,3.1
1141
1242
  30000,3134A,13.0
1243
+ 31350,323AF,15.0
1142
1244
  E0001,E007F,3.1
1143
1245
  E0100,E01EF,4.0
1144
1246
  F0000,FFFFD,2.0
@@ -0,0 +1,59 @@
1
+ require 'nokogiri'
2
+ require 'csv'
3
+
4
+ class UnicodeCharsVer
5
+ # @param xml_file_in [String] input unicode xml file path, ex: "ucd.all.flat.xml"
6
+ # @param csv_file_out [String] csv file path for output
7
+ def convert(xml_file_in, csv_file_out)
8
+ print "parse xml..."
9
+ doc = File.open(xml_file_in) { |f| Nokogiri::XML(f) }
10
+ doc.remove_namespaces!
11
+ puts 'done.'
12
+
13
+ @rows = []
14
+ read_chars(doc)
15
+
16
+ CSV.open(csv_file_out, 'wb', headers: true) do |csv|
17
+ csv << %w[cp1 cp2 age]
18
+ @rows.each do |row|
19
+ csv << [row[:cp1], row[:cp2], row[:age]]
20
+ end
21
+ end
22
+
23
+ true
24
+ end
25
+
26
+ private
27
+
28
+ def new_range(e)
29
+ @rows << {
30
+ cp1: e.key?('cp') ? e['cp'] : e['first-cp'],
31
+ cp2: e.key?('cp') ? e['cp'] : e['last-cp'],
32
+ age: e['age']
33
+ }
34
+ end
35
+
36
+ def read_chars(doc)
37
+ doc.xpath('//char').each do |e|
38
+ if @rows.empty?
39
+ new_range(e)
40
+ next
41
+ end
42
+
43
+ row = @rows.last
44
+ if e.key?('cp')
45
+ if e['age'] == row[:age]
46
+ row[:cp2] = e['cp']
47
+ else
48
+ new_range(e)
49
+ end
50
+ else
51
+ new_range(e)
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ IN = '../13.0.0/ucd.all.flat.xml'
58
+ OUT = '../out/unicode-chars-ver.csv'
59
+
data/lib/unihan2.rb CHANGED
@@ -8,6 +8,12 @@ class Unihan2
8
8
  read_version
9
9
  end
10
10
 
11
+ # 將 Unicode XML 轉為 CSV, 內含 code range 的 unicode 版本
12
+ # @param xml_file_in [String] input unicode xml file path, ex: "ucd.all.flat.xml"
13
+ # @param csv_file_out [String] csv file path for output
14
+ def self.chars_ver(xml_file_in, csv_file_out)
15
+ UnicodeCharsVer.new.convert(xml_file_in, csv_file_out)
16
+ end
11
17
 
12
18
  # return total strokes of the character char
13
19
  # @param char [String] the character
@@ -17,7 +23,7 @@ class Unihan2
17
23
  end
18
24
 
19
25
  # return unicode version of specific character
20
- # @param code [String] character or codepoing
26
+ # @param code [String] character or codepoint
21
27
  # @return [Float] unicode version
22
28
  def ver(code)
23
29
  return nil if code.nil?
@@ -36,7 +42,7 @@ class Unihan2
36
42
  private
37
43
 
38
44
  def read_strokes
39
- fn = File.join(DATA_DIR, 'Unihan_DictionaryLikeData.txt')
45
+ fn = File.join(DATA_DIR, 'Unihan_IRGSources.txt')
40
46
  @strokes = {}
41
47
  File.foreach(fn) do |line|
42
48
  next if line.start_with? '#'
@@ -78,5 +84,6 @@ class Unihan2
78
84
  end
79
85
  end
80
86
  end
87
+ end
81
88
 
82
- end
89
+ require_relative 'unihan2/unicode-chars-ver'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unihan2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-31 00:00:00.000000000 Z
11
+ date: 2023-07-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Unihan Database Utilities
14
14
  email: zhoubx@gmail.com
@@ -16,14 +16,16 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
- - data/Unihan_DictionaryLikeData.txt
19
+ - data/README.md
20
+ - data/Unihan_IRGSources.txt
20
21
  - data/unicode-chars-ver.csv
21
22
  - lib/unihan2.rb
23
+ - lib/unihan2/unicode-chars-ver.rb
22
24
  homepage: https://github.com/RayCHOU/unihan2
23
25
  licenses:
24
26
  - MIT
25
27
  metadata: {}
26
- post_install_message:
28
+ post_install_message:
27
29
  rdoc_options: []
28
30
  require_paths:
29
31
  - lib
@@ -38,8 +40,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
38
40
  - !ruby/object:Gem::Version
39
41
  version: '0'
40
42
  requirements: []
41
- rubygems_version: 3.1.4
42
- signing_key:
43
+ rubygems_version: 3.4.14
44
+ signing_key:
43
45
  specification_version: 4
44
46
  summary: Chinese
45
47
  test_files: []