unicode_utils 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/README.txt +5 -5
  2. data/cdata/casefold_c_map +1 -0
  3. data/cdata/casefold_f_map +1 -0
  4. data/cdata/casefold_s_map +1 -0
  5. data/lib/unicode_utils.rb +7 -2
  6. data/lib/unicode_utils/canonical_decomposition.rb +1 -1
  7. data/lib/unicode_utils/case_ignorable_char_q.rb +1 -1
  8. data/lib/unicode_utils/casefold.rb +33 -0
  9. data/lib/unicode_utils/{name.rb → char_name.rb} +15 -8
  10. data/lib/unicode_utils/codepoint.rb +66 -0
  11. data/lib/unicode_utils/compatibility_decomposition.rb +1 -1
  12. data/lib/unicode_utils/downcase.rb +1 -1
  13. data/lib/unicode_utils/grep.rb +21 -0
  14. data/lib/unicode_utils/hangul_syllable_decomposition.rb +3 -2
  15. data/lib/unicode_utils/jamo_short_name.rb +1 -1
  16. data/lib/unicode_utils/lowercase_char_q.rb +1 -1
  17. data/lib/unicode_utils/nfc.rb +1 -1
  18. data/lib/unicode_utils/read_cdata.rb +71 -0
  19. data/lib/unicode_utils/simple_casefold.rb +32 -0
  20. data/lib/unicode_utils/simple_downcase.rb +1 -1
  21. data/lib/unicode_utils/simple_upcase.rb +1 -1
  22. data/lib/unicode_utils/soft_dotted_char_q.rb +1 -1
  23. data/lib/unicode_utils/titlecase_char_q.rb +1 -1
  24. data/lib/unicode_utils/u.rb +12 -0
  25. data/lib/unicode_utils/upcase.rb +1 -1
  26. data/lib/unicode_utils/uppercase_char_q.rb +1 -1
  27. data/lib/unicode_utils/version.rb +1 -1
  28. data/test/test_unicode_utils.rb +25 -8
  29. metadata +12 -9
  30. data/lib/unicode_utils/read_codepoint_map.rb +0 -22
  31. data/lib/unicode_utils/read_codepoint_set.rb +0 -22
  32. data/lib/unicode_utils/read_multivalued_map.rb +0 -27
  33. data/lib/unicode_utils/read_names.rb +0 -22
  34. data/test/test_normalization.rb +0 -95
data/README.txt CHANGED
@@ -6,16 +6,13 @@ Install with RubyGems:
6
6
 
7
7
  Or get the source from Github: http://github.com/lang/unicode_utils
8
8
 
9
- UnicodeUtils works with Ruby 1.9.1-preview1 or later. Though a bug
10
- (http://redmine.ruby-lang.org/issues/show/692) in
11
- 1.9.1-preview1 prevents UnicodeUtils from loading when
12
- Encoding.default_internal is set (e.g. with -U or -E).
9
+ UnicodeUtils works with Ruby 1.9.1-preview2 or later.
13
10
 
14
11
  == Synopsis
15
12
 
16
13
  require "unicode_utils"
17
14
 
18
- UnicodeUtils.name("æ") => "LATIN SMALL LETTER AE"
15
+ UnicodeUtils.char_name("æ") => "LATIN SMALL LETTER AE"
19
16
 
20
17
  UnicodeUtils.upcase("weiß") => "WEISS"
21
18
 
@@ -37,6 +34,9 @@ startup time. Methods that end in a ? are in a file suffixed with
37
34
  +_q+, e.g. <tt>lowercase_char?</tt> can be required with
38
35
  <tt>unicode_utils/lowercase_char_q</tt>.
39
36
 
37
+ There is also a shortcut for IRB usage. See
38
+ U[link:files/lib/unicode_utils/u_rb.html].
39
+
40
40
  == License
41
41
 
42
42
  unicode_utils is licensed under the BSD license. Read the file
@@ -0,0 +1 @@
1
+ 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000b50003bc0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00017f00007300018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003450003b90003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003c20003c30003cf0003d70003d00003b20003d10003b80003d50003c60003d60003c00003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f00003ba0003f10003c10003f40003b80003f50003b50003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d25001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9b001e61001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbe0003b9001fc8001f72001fc9001f73001fca001f74001fcb001f75001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d0021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c72002c73002c75002c76002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f
@@ -0,0 +1 @@
1
+ 0000df000073000073xxxxxx000130000069000307xxxxxx0001490002bc00006exxxxxx0001f000006a00030cxxxxxx0003900003b9000308000301xxxxxx0003b00003c5000308000301xxxxxx000587000565000582xxxxxx001e96000068000331xxxxxx001e97000074000308xxxxxx001e9800007700030axxxxxx001e9900007900030axxxxxx001e9a0000610002bexxxxxx001e9e000073000073xxxxxx001f500003c5000313xxxxxx001f520003c5000313000300xxxxxx001f540003c5000313000301xxxxxx001f560003c5000313000342xxxxxx001f80001f000003b9xxxxxx001f81001f010003b9xxxxxx001f82001f020003b9xxxxxx001f83001f030003b9xxxxxx001f84001f040003b9xxxxxx001f85001f050003b9xxxxxx001f86001f060003b9xxxxxx001f87001f070003b9xxxxxx001f88001f000003b9xxxxxx001f89001f010003b9xxxxxx001f8a001f020003b9xxxxxx001f8b001f030003b9xxxxxx001f8c001f040003b9xxxxxx001f8d001f050003b9xxxxxx001f8e001f060003b9xxxxxx001f8f001f070003b9xxxxxx001f90001f200003b9xxxxxx001f91001f210003b9xxxxxx001f92001f220003b9xxxxxx001f93001f230003b9xxxxxx001f94001f240003b9xxxxxx001f95001f250003b9xxxxxx001f96001f260003b9xxxxxx001f97001f270003b9xxxxxx001f98001f200003b9xxxxxx001f99001f210003b9xxxxxx001f9a001f220003b9xxxxxx001f9b001f230003b9xxxxxx001f9c001f240003b9xxxxxx001f9d001f250003b9xxxxxx001f9e001f260003b9xxxxxx001f9f001f270003b9xxxxxx001fa0001f600003b9xxxxxx001fa1001f610003b9xxxxxx001fa2001f620003b9xxxxxx001fa3001f630003b9xxxxxx001fa4001f640003b9xxxxxx001fa5001f650003b9xxxxxx001fa6001f660003b9xxxxxx001fa7001f670003b9xxxxxx001fa8001f600003b9xxxxxx001fa9001f610003b9xxxxxx001faa001f620003b9xxxxxx001fab001f630003b9xxxxxx001fac001f640003b9xxxxxx001fad001f650003b9xxxxxx001fae001f660003b9xxxxxx001faf001f670003b9xxxxxx001fb2001f700003b9xxxxxx001fb30003b10003b9xxxxxx001fb40003ac0003b9xxxxxx001fb60003b1000342xxxxxx001fb70003b10003420003b9xxxxxx001fbc0003b10003b9xxxxxx001fc2001f740003b9xxxxxx001fc30003b70003b9xxxxxx001fc40003ae0003b9xxxxxx001fc60003b7000342xxxxxx001fc70003b70003420003b9xxxxxx001fcc0003b70003b9xxxxxx001fd20003b9000308000300xxxxxx001fd30003b9000308000301xxxxxx001fd60003b9000342xxxxxx001fd70003b9000308000342xxxxxx001fe20003c5000308000300xxxxxx001fe30003c5000308000301xxxxxx001fe40003c1000313xxxxxx001fe60003c5000342xxxxxx001fe70003c5000308000342xxxxxx001ff2001f7c0003b9xxxxxx001ff30003c90003b9xxxxxx001ff40003ce0003b9xxxxxx001ff60003c9000342xxxxxx001ff70003c90003420003b9xxxxxx001ffc0003c90003b9xxxxxx00fb00000066000066xxxxxx00fb01000066000069xxxxxx00fb0200006600006cxxxxxx00fb03000066000066000069xxxxxx00fb0400006600006600006cxxxxxx00fb05000073000074xxxxxx00fb06000073000074xxxxxx00fb13000574000576xxxxxx00fb14000574000565xxxxxx00fb1500057400056bxxxxxx00fb1600057e000576xxxxxx00fb1700057400056dxxxxxx
@@ -0,0 +1 @@
1
+ 001e9e0000df001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fbc001fb3001fcc001fc3001ffc001ff3
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/version"
4
- require "unicode_utils/name"
4
+ require "unicode_utils/char_name"
5
5
  require "unicode_utils/simple_upcase"
6
6
  require "unicode_utils/simple_downcase"
7
7
  require "unicode_utils/upcase"
@@ -22,6 +22,10 @@ require "unicode_utils/nfc"
22
22
  require "unicode_utils/compatibility_decomposition"
23
23
  require "unicode_utils/nfkd"
24
24
  require "unicode_utils/nfkc"
25
+ require "unicode_utils/codepoint"
26
+ require "unicode_utils/grep"
27
+ require "unicode_utils/simple_casefold"
28
+ require "unicode_utils/casefold"
25
29
 
26
30
  # Read the README[link:files/README_txt.html] for an introduction.
27
31
  #
@@ -33,6 +37,7 @@ require "unicode_utils/nfkc"
33
37
  # UnicodeUtils.nfc:: Normalization Form C
34
38
  # UnicodeUtils.nfkd:: Normalization Form KD
35
39
  # UnicodeUtils.nfkc:: Normalization Form KC
36
- # UnicodeUtils.name:: character names
40
+ # UnicodeUtils.char_name:: character names
41
+ # UnicodeUtils.casefold:: case folding (case insensitive string comparison)
37
42
  module UnicodeUtils
38
43
  end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_multivalued_map"
3
+ require "unicode_utils/read_cdata"
4
4
  require "unicode_utils/hangul_syllable_decomposition"
5
5
  require "unicode_utils/combining_class"
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_cdata"
4
+ require "unicode_utils/simple_casefold"
5
+
6
+ module UnicodeUtils
7
+
8
+ CASEFOLD_F_MAP = Impl.read_multivalued_map("casefold_f_map") # :nodoc:
9
+
10
+ # Perform full case folding. The returned string may be longer than
11
+ # +str+. The purpose of case folding is case insensitive string
12
+ # comparison.
13
+ #
14
+ # Examples:
15
+ #
16
+ # UnicodeUtils.casefold("Ümit") == UnicodeUtils.casefold("ümit") => true
17
+ # UnicodeUtils.casefold("WEISS") == UnicodeUtils.casefold("weiß") => true
18
+ def casefold(str)
19
+ String.new.force_encoding(str.encoding).tap do |res|
20
+ str.each_codepoint { |cp|
21
+ if mapping = CASEFOLD_C_MAP[cp]
22
+ res << mapping
23
+ elsif mapping = CASEFOLD_F_MAP[cp]
24
+ mapping.each { |m| res << m }
25
+ else
26
+ res << cp
27
+ end
28
+ }
29
+ end
30
+ end
31
+ module_function :casefold
32
+
33
+ end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_names"
3
+ require "unicode_utils/read_cdata"
4
4
  require "unicode_utils/hangul_syllable_decomposition"
5
5
  require "unicode_utils/jamo_short_name"
6
6
 
@@ -18,22 +18,29 @@ module UnicodeUtils
18
18
  #
19
19
  # Example:
20
20
  #
21
- # UnicodeUtils.name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
22
- # UnicodeUtils.name "\t" => "<control>"
23
- def name(char)
24
- cp = char.ord
21
+ # UnicodeUtils.char_name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
22
+ # UnicodeUtils.char_name "\t" => "<control>"
23
+ def char_name(char)
24
+ if char.kind_of?(Integer)
25
+ cp = char
26
+ str = nil
27
+ else
28
+ cp = char.ord
29
+ str = char
30
+ end
25
31
  NAME_MAP[cp] ||
26
32
  case cp
27
33
  when 0x3400..0x4DB5, 0x4E00..0x9FC3, 0x20000..0x2A6D6
28
- "CJK UNIFIED IDEOGRAPH-#{sprintf('%04x', cp).upcase}"
34
+ "CJK UNIFIED IDEOGRAPH-#{sprintf('%04X', cp)}"
29
35
  when 0xAC00..0xD7A3
36
+ str ||= cp.chr(Encoding::UTF_8)
30
37
  "HANGUL SYLLABLE ".tap do |n|
31
- hangul_syllable_decomposition(char).each_char { |c|
38
+ hangul_syllable_decomposition(str).each_char { |c|
32
39
  n << (jamo_short_name(c) || '')
33
40
  }
34
41
  end
35
42
  end
36
43
  end
37
- module_function :name
44
+ module_function :char_name
38
45
 
39
46
  end
@@ -0,0 +1,66 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/char_name"
4
+
5
+ module UnicodeUtils
6
+
7
+ # A Codepoint instance represents a single Unicode codepoint.
8
+ #
9
+ # UnicodeUtils::Codepoint.new(0x20ac) => #<U+20AC "€" EURO SIGN utf8:e2,82,ac>
10
+ class Codepoint
11
+
12
+ # The Unicode codespace. Any integer in this range is a Unicode
13
+ # codepoint.
14
+ RANGE = 0..0x10FFFF
15
+
16
+ # Create a Codepoint instance that wraps the given Integer. +int+
17
+ # must be in Codepoint::RANGE.
18
+ def initialize(int)
19
+ unless RANGE.include?(int)
20
+ raise ArgumentError, "#{int} not in codespace"
21
+ end
22
+ @int = int
23
+ end
24
+
25
+ # Convert to Integer.
26
+ def ord
27
+ @int
28
+ end
29
+
30
+ # Format in U+ notation.
31
+ #
32
+ # Codepoint.new(0xc5).uplus => "U+00C5"
33
+ def uplus
34
+ sprintf('U+%04X', @int)
35
+ end
36
+
37
+ # Get the normative Unicode name of this codepoint.
38
+ #
39
+ # See also: UnicodeUtils.char_name
40
+ def name
41
+ UnicodeUtils.char_name(@int)
42
+ end
43
+
44
+ # Convert this codepoint to an UTF-8 encoded string. Returns a new
45
+ # string on each call and thus it is allowed to mutate the return
46
+ # value.
47
+ def to_s
48
+ @int.chr(Encoding::UTF_8)
49
+ end
50
+
51
+ # Get the bytes used to encode this codepoint in UTF-8,
52
+ # hex-formatted.
53
+ #
54
+ # Codepoint.new(0xe4).hexbytes => "c3,a4"
55
+ def hexbytes
56
+ to_s.bytes.map { |b| sprintf("%02x", b) }.join(",")
57
+ end
58
+
59
+ # #<U+... char name utf8-hexbytes>
60
+ def inspect
61
+ "#<#{uplus} #{to_s.inspect} #{name || "nil"} utf8:#{hexbytes}>"
62
+ end
63
+
64
+ end
65
+
66
+ end
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/canonical_decomposition"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/hangul_syllable_decomposition"
6
6
 
7
7
  module UnicodeUtils
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/simple_downcase"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/conditional_casing"
6
6
 
7
7
  module UnicodeUtils
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/codepoint"
4
+
5
+ module UnicodeUtils
6
+
7
+ # Get an array of all Codepoint instances in Codepoint::RANGE whose
8
+ # name matches regexp. Matching is case insensitive.
9
+ #
10
+ # UnicodeUtils.grep(/angstrom/) => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
11
+ def grep(regexp)
12
+ unless regexp.casefold?
13
+ regexp = Regexp.new(regexp.source, Regexp::IGNORECASE)
14
+ end
15
+ Codepoint::RANGE.select { |cp|
16
+ regexp =~ UnicodeUtils.char_name(cp)
17
+ }.map { |cp| Codepoint.new(cp) }
18
+ end
19
+ module_function :grep
20
+
21
+ end
@@ -8,8 +8,9 @@ module UnicodeUtils
8
8
  #
9
9
  # UnicodeUtils.hangul_syllable_decomposition("\u{d4db}") => "\u{1111}\u{1171}\u{11b6}"
10
10
  def hangul_syllable_decomposition(char)
11
- Impl.append_hangul_syllable_decomposition(
12
- String.new.force_encoding(char.encoding), char.ord)
11
+ String.new.force_encoding(char.encoding).tap do |str|
12
+ Impl.append_hangul_syllable_decomposition(str , char.ord)
13
+ end
13
14
  end
14
15
  module_function :hangul_syllable_decomposition
15
16
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_names"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,8 +1,8 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/canonical_decomposition"
4
5
  require "unicode_utils/combining_class"
5
- require "unicode_utils/read_codepoint_set"
6
6
 
7
7
  module UnicodeUtils
8
8
 
@@ -0,0 +1,71 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ # Absolute path to the directory from which UnicodeUtils loads its
6
+ # compiled Unicode data files at runtime.
7
+ CDATA_DIR =
8
+ File.absolute_path(File.join(File.dirname(__FILE__), "..", "..", "cdata"))
9
+
10
+ module Impl # :nodoc:
11
+
12
+ def self.open_cdata_file(filename, &block)
13
+ File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
14
+ end
15
+
16
+ def self.read_codepoint_set(filename)
17
+ Hash.new.tap { |set|
18
+ open_cdata_file(filename) do |input|
19
+ buffer = "x" * 6
20
+ buffer.force_encoding(Encoding::US_ASCII)
21
+ while input.read(6, buffer)
22
+ set[buffer.to_i(16)] = true
23
+ end
24
+ end
25
+ }
26
+ end
27
+
28
+ def self.read_codepoint_map(filename)
29
+ Hash.new.tap { |map|
30
+ open_cdata_file(filename) do |input|
31
+ buffer = "x" * 6
32
+ buffer.force_encoding(Encoding::US_ASCII)
33
+ while input.read(6, buffer)
34
+ map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
35
+ end
36
+ end
37
+ }
38
+ end
39
+
40
+ def self.read_multivalued_map(filename)
41
+ Hash.new.tap { |map|
42
+ open_cdata_file(filename) do |input|
43
+ buffer = "x" * 6
44
+ buffer.force_encoding(Encoding::US_ASCII)
45
+ while input.read(6, buffer)
46
+ cp = buffer.to_i(16)
47
+ mapping = []
48
+ while input.read(6, buffer).getbyte(0) != 120
49
+ mapping << buffer.to_i(16)
50
+ end
51
+ map[cp] = mapping
52
+ end
53
+ end
54
+ }
55
+ end
56
+
57
+ def self.read_names(filename)
58
+ Hash.new.tap { |map|
59
+ open_cdata_file(filename) do |input|
60
+ buffer = "x" * 6
61
+ buffer.force_encoding(Encoding::US_ASCII)
62
+ while input.read(6, buffer)
63
+ map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
64
+ end
65
+ end
66
+ }
67
+ end
68
+
69
+ end
70
+
71
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_cdata"
4
+
5
+ module UnicodeUtils
6
+
7
+ CASEFOLD_C_MAP = Impl.read_codepoint_map("casefold_c_map") # :nodoc:
8
+
9
+ CASEFOLD_S_MAP = Impl.read_codepoint_map("casefold_s_map") # :nodoc:
10
+
11
+ # Perform simple case folding. Contrary to full case folding, this
12
+ # uses only one to one mappings, so that the length of the returned
13
+ # string is equal to the length of +str+.
14
+ #
15
+ # The purpose of case folding is case insensitive string comparison.
16
+ #
17
+ # Examples:
18
+ #
19
+ # UnicodeUtils.simple_casefold("Ümit") == UnicodeUtils.simple_casefold("ümit") => true
20
+ # UnicodeUtils.simple_casefold("WEISS") == UnicodeUtils.simple_casefold("weiß") => false
21
+ #
22
+ # See also: UnicodeUtils.casefold
23
+ def simple_casefold(str)
24
+ String.new.force_encoding(str.encoding).tap do |res|
25
+ str.each_codepoint { |cp|
26
+ res << (CASEFOLD_C_MAP[cp] || CASEFOLD_S_MAP[cp] || cp)
27
+ }
28
+ end
29
+ end
30
+ module_function :simple_casefold
31
+
32
+ end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_map"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_map"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -0,0 +1,12 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils"
4
+
5
+ # Shortcut for usage in irb. This shortcut is only defined when
6
+ # <tt>unicode_utils/u</tt> is explicitly required. It is intended for
7
+ # interactive use only!
8
+ #
9
+ # $ irb -r unicode_utils/u
10
+ # irb(main):001:0> U.grep(/angstrom/)
11
+ # => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
12
+ U = UnicodeUtils
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/simple_upcase"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/conditional_casing"
6
6
 
7
7
  module UnicodeUtils
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -3,6 +3,6 @@
3
3
  module UnicodeUtils
4
4
 
5
5
  # Corresponds to the unicode_utils gem version.
6
- VERSION = "0.4.0"
6
+ VERSION = "0.5.0"
7
7
 
8
8
  end
@@ -4,17 +4,18 @@ require "test/unit"
4
4
 
5
5
  require "unicode_utils"
6
6
 
7
+ # Fast tests for allmost all UnicodeUtils functions.
7
8
  class TestUnicodeUtils < Test::Unit::TestCase
8
9
 
9
10
  def test_name
10
- assert_equal "LATIN SMALL LETTER F", UnicodeUtils.name("f")
11
- assert_equal Encoding::US_ASCII, UnicodeUtils.name("f").encoding
12
- assert_equal nil, UnicodeUtils.name("\u{e000}") # private use
13
- assert_equal "<control>", UnicodeUtils.name("\t")
14
- assert_equal "CJK UNIFIED IDEOGRAPH-4E00", UnicodeUtils.name("\u{4e00}")
15
- assert_equal "CJK UNIFIED IDEOGRAPH-2A6D6", UnicodeUtils.name("\u{2a6d6}")
16
- assert_equal "CJK UNIFIED IDEOGRAPH-2A3D6", UnicodeUtils.name("\u{2a3d6}")
17
- assert_equal "HANGUL SYLLABLE PWILH", UnicodeUtils.name("\u{d4db}")
11
+ assert_equal "LATIN SMALL LETTER F", UnicodeUtils.char_name("f")
12
+ assert_equal Encoding::US_ASCII, UnicodeUtils.char_name("f").encoding
13
+ assert_equal nil, UnicodeUtils.char_name("\u{e000}") # private use
14
+ assert_equal "<control>", UnicodeUtils.char_name("\t")
15
+ assert_equal "CJK UNIFIED IDEOGRAPH-4E00", UnicodeUtils.char_name("\u{4e00}")
16
+ assert_equal "CJK UNIFIED IDEOGRAPH-2A6D6", UnicodeUtils.char_name("\u{2a6d6}")
17
+ assert_equal "CJK UNIFIED IDEOGRAPH-2A3D6", UnicodeUtils.char_name("\u{2a3d6}")
18
+ assert_equal "HANGUL SYLLABLE PWILH", UnicodeUtils.char_name("\u{d4db}")
18
19
  end
19
20
 
20
21
  def test_simple_upcase
@@ -160,4 +161,20 @@ class TestUnicodeUtils < Test::Unit::TestCase
160
161
  assert_equal "\u{66}\u{69}\u{e4}", UnicodeUtils.nfkc("\u{fb01}\u{e4}")
161
162
  end
162
163
 
164
+ def test_simple_casefold
165
+ assert_equal "abc123", UnicodeUtils.simple_casefold("ABC123")
166
+ assert UnicodeUtils.simple_casefold("ÜMIT") ==
167
+ UnicodeUtils.simple_casefold("ümit")
168
+ assert UnicodeUtils.simple_casefold("WEISS") !=
169
+ UnicodeUtils.simple_casefold("weiß")
170
+ end
171
+
172
+ def test_casefold
173
+ assert_equal "abc123", UnicodeUtils.casefold("ABC123")
174
+ assert UnicodeUtils.casefold("ÜMIT") ==
175
+ UnicodeUtils.casefold("ümit")
176
+ assert UnicodeUtils.casefold("WEISS") ==
177
+ UnicodeUtils.casefold("weiß")
178
+ end
179
+
163
180
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stefan Lang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-28 00:00:00 +01:00
12
+ date: 2008-12-07 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -22,31 +22,33 @@ extensions: []
22
22
  extra_rdoc_files:
23
23
  - README.txt
24
24
  files:
25
+ - lib/unicode_utils/u.rb
26
+ - lib/unicode_utils/read_cdata.rb
25
27
  - lib/unicode_utils/conditional_casing.rb
26
28
  - lib/unicode_utils/hangul_syllable_decomposition.rb
27
29
  - lib/unicode_utils/simple_downcase.rb
28
- - lib/unicode_utils/read_codepoint_map.rb
29
- - lib/unicode_utils/read_names.rb
30
- - lib/unicode_utils/read_codepoint_set.rb
30
+ - lib/unicode_utils/casefold.rb
31
31
  - lib/unicode_utils/titlecase_char_q.rb
32
32
  - lib/unicode_utils/cased_char_q.rb
33
33
  - lib/unicode_utils/downcase.rb
34
- - lib/unicode_utils/name.rb
35
34
  - lib/unicode_utils/uppercase_char_q.rb
36
- - lib/unicode_utils/read_multivalued_map.rb
37
35
  - lib/unicode_utils/canonical_equivalents_q.rb
36
+ - lib/unicode_utils/char_name.rb
38
37
  - lib/unicode_utils/nfkc.rb
39
38
  - lib/unicode_utils/nfkd.rb
39
+ - lib/unicode_utils/codepoint.rb
40
40
  - lib/unicode_utils/canonical_decomposition.rb
41
41
  - lib/unicode_utils/upcase.rb
42
42
  - lib/unicode_utils/nfc.rb
43
43
  - lib/unicode_utils/nfd.rb
44
44
  - lib/unicode_utils/case_ignorable_char_q.rb
45
45
  - lib/unicode_utils/compatibility_decomposition.rb
46
+ - lib/unicode_utils/grep.rb
46
47
  - lib/unicode_utils/simple_upcase.rb
47
48
  - lib/unicode_utils/lowercase_char_q.rb
48
49
  - lib/unicode_utils/jamo_short_name.rb
49
50
  - lib/unicode_utils/combining_class.rb
51
+ - lib/unicode_utils/simple_casefold.rb
50
52
  - lib/unicode_utils/version.rb
51
53
  - lib/unicode_utils/soft_dotted_char_q.rb
52
54
  - lib/unicode_utils.rb
@@ -54,7 +56,10 @@ files:
54
56
  - cdata/cond_lc_map
55
57
  - cdata/prop_set_lowercase
56
58
  - cdata/cat_set_titlecase
59
+ - cdata/casefold_c_map
60
+ - cdata/casefold_f_map
57
61
  - cdata/special_lc_map
62
+ - cdata/casefold_s_map
58
63
  - cdata/names
59
64
  - cdata/cond_uc_map
60
65
  - cdata/special_uc_map
@@ -67,7 +72,6 @@ files:
67
72
  - cdata/jamo_short_names
68
73
  - cdata/compatibility_decomposition_map
69
74
  - cdata/prop_set_uppercase
70
- - test/test_normalization.rb
71
75
  - test/test_unicode_utils.rb
72
76
  - README.txt
73
77
  - LICENSE.txt
@@ -99,5 +103,4 @@ signing_key:
99
103
  specification_version: 2
100
104
  summary: additional Unicode aware functions for Ruby 1.9
101
105
  test_files:
102
- - test/test_normalization.rb
103
106
  - test/test_unicode_utils.rb
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_codepoint_map(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_codepoint_set(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |set|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- set[buffer.to_i(16)] = true
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,27 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_multivalued_map(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- cp = buffer.to_i(16)
15
- mapping = []
16
- while input.read(6, buffer).getbyte(0) != 120
17
- mapping << buffer.to_i(16)
18
- end
19
- map[cp] = mapping
20
- end
21
- end
22
- }
23
- end
24
-
25
- end
26
-
27
- end
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_names(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,95 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require "test/unit"
4
-
5
- require "unicode_utils/nfd"
6
- require "unicode_utils/nfc"
7
-
8
- # See data/NormalizationTest.txt
9
- class TestNormalization < Test::Unit::TestCase
10
-
11
- class Record
12
- def initialize(ary)
13
- @ary = ary
14
- end
15
- def c1
16
- @ary[0]
17
- end
18
- def c2
19
- @ary[1]
20
- end
21
- def c3
22
- @ary[2]
23
- end
24
- def c4
25
- @ary[3]
26
- end
27
- def c5
28
- @ary[4]
29
- end
30
- end
31
-
32
- def each_testdata_record
33
- fn = File.join(File.dirname(__FILE__),
34
- "..", "data", "NormalizationTest.txt")
35
- File.open(fn, "r:utf-8:-") do |input|
36
- input.each_line { |line|
37
- if line =~ /^([^#]*)#/
38
- line = $1
39
- end
40
- line.strip!
41
- next if line.empty? || line =~ /^@Part/
42
- columns = line.split(";")
43
- ary = columns.map { |column|
44
- String.new.force_encoding(Encoding::UTF_8).tap do |str|
45
- column.split(" ").each { |c|
46
- str << c.strip.to_i(16)
47
- }
48
- end
49
- }
50
- yield Record.new(ary)
51
- }
52
- end
53
- end
54
-
55
- def test_nfd
56
- each_testdata_record { |r|
57
- assert_equal r.c3, UnicodeUtils.nfd(r.c1)
58
- assert_equal r.c3, UnicodeUtils.nfd(r.c2)
59
- assert_equal r.c3, UnicodeUtils.nfd(r.c3)
60
- assert_equal r.c5, UnicodeUtils.nfd(r.c4)
61
- assert_equal r.c5, UnicodeUtils.nfd(r.c5)
62
- }
63
- end
64
-
65
- def test_nfc
66
- each_testdata_record { |r|
67
- assert_equal r.c2, UnicodeUtils.nfc(r.c1)
68
- assert_equal r.c2, UnicodeUtils.nfc(r.c2)
69
- assert_equal r.c2, UnicodeUtils.nfc(r.c3)
70
- assert_equal r.c4, UnicodeUtils.nfc(r.c4)
71
- assert_equal r.c4, UnicodeUtils.nfc(r.c5)
72
- }
73
- end
74
-
75
- def test_nfkd
76
- each_testdata_record { |r|
77
- assert_equal r.c5, UnicodeUtils.nfkd(r.c1)
78
- assert_equal r.c5, UnicodeUtils.nfkd(r.c2)
79
- assert_equal r.c5, UnicodeUtils.nfkd(r.c3)
80
- assert_equal r.c5, UnicodeUtils.nfkd(r.c4)
81
- assert_equal r.c5, UnicodeUtils.nfkd(r.c5)
82
- }
83
- end
84
-
85
- def test_nfkc
86
- each_testdata_record { |r|
87
- assert_equal r.c4, UnicodeUtils.nfkc(r.c1)
88
- assert_equal r.c4, UnicodeUtils.nfkc(r.c2)
89
- assert_equal r.c4, UnicodeUtils.nfkc(r.c3)
90
- assert_equal r.c4, UnicodeUtils.nfkc(r.c4)
91
- assert_equal r.c4, UnicodeUtils.nfkc(r.c5)
92
- }
93
- end
94
-
95
- end