unicode_utils 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/README.txt +5 -5
  2. data/cdata/casefold_c_map +1 -0
  3. data/cdata/casefold_f_map +1 -0
  4. data/cdata/casefold_s_map +1 -0
  5. data/lib/unicode_utils.rb +7 -2
  6. data/lib/unicode_utils/canonical_decomposition.rb +1 -1
  7. data/lib/unicode_utils/case_ignorable_char_q.rb +1 -1
  8. data/lib/unicode_utils/casefold.rb +33 -0
  9. data/lib/unicode_utils/{name.rb → char_name.rb} +15 -8
  10. data/lib/unicode_utils/codepoint.rb +66 -0
  11. data/lib/unicode_utils/compatibility_decomposition.rb +1 -1
  12. data/lib/unicode_utils/downcase.rb +1 -1
  13. data/lib/unicode_utils/grep.rb +21 -0
  14. data/lib/unicode_utils/hangul_syllable_decomposition.rb +3 -2
  15. data/lib/unicode_utils/jamo_short_name.rb +1 -1
  16. data/lib/unicode_utils/lowercase_char_q.rb +1 -1
  17. data/lib/unicode_utils/nfc.rb +1 -1
  18. data/lib/unicode_utils/read_cdata.rb +71 -0
  19. data/lib/unicode_utils/simple_casefold.rb +32 -0
  20. data/lib/unicode_utils/simple_downcase.rb +1 -1
  21. data/lib/unicode_utils/simple_upcase.rb +1 -1
  22. data/lib/unicode_utils/soft_dotted_char_q.rb +1 -1
  23. data/lib/unicode_utils/titlecase_char_q.rb +1 -1
  24. data/lib/unicode_utils/u.rb +12 -0
  25. data/lib/unicode_utils/upcase.rb +1 -1
  26. data/lib/unicode_utils/uppercase_char_q.rb +1 -1
  27. data/lib/unicode_utils/version.rb +1 -1
  28. data/test/test_unicode_utils.rb +25 -8
  29. metadata +12 -9
  30. data/lib/unicode_utils/read_codepoint_map.rb +0 -22
  31. data/lib/unicode_utils/read_codepoint_set.rb +0 -22
  32. data/lib/unicode_utils/read_multivalued_map.rb +0 -27
  33. data/lib/unicode_utils/read_names.rb +0 -22
  34. data/test/test_normalization.rb +0 -95
data/README.txt CHANGED
@@ -6,16 +6,13 @@ Install with RubyGems:
6
6
 
7
7
  Or get the source from Github: http://github.com/lang/unicode_utils
8
8
 
9
- UnicodeUtils works with Ruby 1.9.1-preview1 or later. Though a bug
10
- (http://redmine.ruby-lang.org/issues/show/692) in
11
- 1.9.1-preview1 prevents UnicodeUtils from loading when
12
- Encoding.default_internal is set (e.g. with -U or -E).
9
+ UnicodeUtils works with Ruby 1.9.1-preview2 or later.
13
10
 
14
11
  == Synopsis
15
12
 
16
13
  require "unicode_utils"
17
14
 
18
- UnicodeUtils.name("æ") => "LATIN SMALL LETTER AE"
15
+ UnicodeUtils.char_name("æ") => "LATIN SMALL LETTER AE"
19
16
 
20
17
  UnicodeUtils.upcase("weiß") => "WEISS"
21
18
 
@@ -37,6 +34,9 @@ startup time. Methods that end in a ? are in a file suffixed with
37
34
  +_q+, e.g. <tt>lowercase_char?</tt> can be required with
38
35
  <tt>unicode_utils/lowercase_char_q</tt>.
39
36
 
37
+ There is also a shortcut for IRB usage. See
38
+ U[link:files/lib/unicode_utils/u_rb.html].
39
+
40
40
  == License
41
41
 
42
42
  unicode_utils is licensed under the BSD license. Read the file
@@ -0,0 +1 @@
1
+ 00004100006100004200006200004300006300004400006400004500006500004600006600004700006700004800006800004900006900004a00006a00004b00006b00004c00006c00004d00006d00004e00006e00004f00006f00005000007000005100007100005200007200005300007300005400007400005500007500005600007600005700007700005800007800005900007900005a00007a0000b50003bc0000c00000e00000c10000e10000c20000e20000c30000e30000c40000e40000c50000e50000c60000e60000c70000e70000c80000e80000c90000e90000ca0000ea0000cb0000eb0000cc0000ec0000cd0000ed0000ce0000ee0000cf0000ef0000d00000f00000d10000f10000d20000f20000d30000f30000d40000f40000d50000f50000d60000f60000d80000f80000d90000f90000da0000fa0000db0000fb0000dc0000fc0000dd0000fd0000de0000fe00010000010100010200010300010400010500010600010700010800010900010a00010b00010c00010d00010e00010f00011000011100011200011300011400011500011600011700011800011900011a00011b00011c00011d00011e00011f00012000012100012200012300012400012500012600012700012800012900012a00012b00012c00012d00012e00012f00013200013300013400013500013600013700013900013a00013b00013c00013d00013e00013f00014000014100014200014300014400014500014600014700014800014a00014b00014c00014d00014e00014f00015000015100015200015300015400015500015600015700015800015900015a00015b00015c00015d00015e00015f00016000016100016200016300016400016500016600016700016800016900016a00016b00016c00016d00016e00016f0001700001710001720001730001740001750001760001770001780000ff00017900017a00017b00017c00017d00017e00017f00007300018100025300018200018300018400018500018600025400018700018800018900025600018a00025700018b00018c00018e0001dd00018f00025900019000025b00019100019200019300026000019400026300019600026900019700026800019800019900019c00026f00019d00027200019f0002750001a00001a10001a20001a30001a40001a50001a60002800001a70001a80001a90002830001ac0001ad0001ae0002880001af0001b00001b100028a0001b200028b0001b30001b40001b50001b60001b70002920001b80001b90001bc0001bd0001c40001c60001c50001c60001c70001c90001c80001c90001ca0001cc0001cb0001cc0001cd0001ce0001cf0001d00001d10001d20001d30001d40001d50001d60001d70001d80001d90001da0001db0001dc0001de0001df0001e00001e10001e20001e30001e40001e50001e60001e70001e80001e90001ea0001eb0001ec0001ed0001ee0001ef0001f10001f30001f20001f30001f40001f50001f60001950001f70001bf0001f80001f90001fa0001fb0001fc0001fd0001fe0001ff00020000020100020200020300020400020500020600020700020800020900020a00020b00020c00020d00020e00020f00021000021100021200021300021400021500021600021700021800021900021a00021b00021c00021d00021e00021f00022000019e00022200022300022400022500022600022700022800022900022a00022b00022c00022d00022e00022f00023000023100023200023300023a002c6500023b00023c00023d00019a00023e002c6600024100024200024300018000024400028900024500028c00024600024700024800024900024a00024b00024c00024d00024e00024f0003450003b90003700003710003720003730003760003770003860003ac0003880003ad0003890003ae00038a0003af00038c0003cc00038e0003cd00038f0003ce0003910003b10003920003b20003930003b30003940003b40003950003b50003960003b60003970003b70003980003b80003990003b900039a0003ba00039b0003bb00039c0003bc00039d0003bd00039e0003be00039f0003bf0003a00003c00003a10003c10003a30003c30003a40003c40003a50003c50003a60003c60003a70003c70003a80003c80003a90003c90003aa0003ca0003ab0003cb0003c20003c30003cf0003d70003d00003b20003d10003b80003d50003c60003d60003c00003d80003d90003da0003db0003dc0003dd0003de0003df0003e00003e10003e20003e30003e40003e50003e60003e70003e80003e90003ea0003eb0003ec0003ed0003ee0003ef0003f00003ba0003f10003c10003f40003b80003f50003b50003f70003f80003f90003f20003fa0003fb0003fd00037b0003fe00037c0003ff00037d00040000045000040100045100040200045200040300045300040400045400040500045500040600045600040700045700040800045800040900045900040a00045a00040b00045b00040c00045c00040d00045d00040e00045e00040f00045f00041000043000041100043100041200043200041300043300041400043400041500043500041600043600041700043700041800043800041900043900041a00043a00041b00043b00041c00043c00041d00043d00041e00043e00041f00043f00042000044000042100044100042200044200042300044300042400044400042500044500042600044600042700044700042800044800042900044900042a00044a00042b00044b00042c00044c00042d00044d00042e00044e00042f00044f00046000046100046200046300046400046500046600046700046800046900046a00046b00046c00046d00046e00046f00047000047100047200047300047400047500047600047700047800047900047a00047b00047c00047d00047e00047f00048000048100048a00048b00048c00048d00048e00048f00049000049100049200049300049400049500049600049700049800049900049a00049b00049c00049d00049e00049f0004a00004a10004a20004a30004a40004a50004a60004a70004a80004a90004aa0004ab0004ac0004ad0004ae0004af0004b00004b10004b20004b30004b40004b50004b60004b70004b80004b90004ba0004bb0004bc0004bd0004be0004bf0004c00004cf0004c10004c20004c30004c40004c50004c60004c70004c80004c90004ca0004cb0004cc0004cd0004ce0004d00004d10004d20004d30004d40004d50004d60004d70004d80004d90004da0004db0004dc0004dd0004de0004df0004e00004e10004e20004e30004e40004e50004e60004e70004e80004e90004ea0004eb0004ec0004ed0004ee0004ef0004f00004f10004f20004f30004f40004f50004f60004f70004f80004f90004fa0004fb0004fc0004fd0004fe0004ff00050000050100050200050300050400050500050600050700050800050900050a00050b00050c00050d00050e00050f00051000051100051200051300051400051500051600051700051800051900051a00051b00051c00051d00051e00051f00052000052100052200052300053100056100053200056200053300056300053400056400053500056500053600056600053700056700053800056800053900056900053a00056a00053b00056b00053c00056c00053d00056d00053e00056e00053f00056f00054000057000054100057100054200057200054300057300054400057400054500057500054600057600054700057700054800057800054900057900054a00057a00054b00057b00054c00057c00054d00057d00054e00057e00054f00057f0005500005800005510005810005520005820005530005830005540005840005550005850005560005860010a0002d000010a1002d010010a2002d020010a3002d030010a4002d040010a5002d050010a6002d060010a7002d070010a8002d080010a9002d090010aa002d0a0010ab002d0b0010ac002d0c0010ad002d0d0010ae002d0e0010af002d0f0010b0002d100010b1002d110010b2002d120010b3002d130010b4002d140010b5002d150010b6002d160010b7002d170010b8002d180010b9002d190010ba002d1a0010bb002d1b0010bc002d1c0010bd002d1d0010be002d1e0010bf002d1f0010c0002d200010c1002d210010c2002d220010c3002d230010c4002d240010c5002d25001e00001e01001e02001e03001e04001e05001e06001e07001e08001e09001e0a001e0b001e0c001e0d001e0e001e0f001e10001e11001e12001e13001e14001e15001e16001e17001e18001e19001e1a001e1b001e1c001e1d001e1e001e1f001e20001e21001e22001e23001e24001e25001e26001e27001e28001e29001e2a001e2b001e2c001e2d001e2e001e2f001e30001e31001e32001e33001e34001e35001e36001e37001e38001e39001e3a001e3b001e3c001e3d001e3e001e3f001e40001e41001e42001e43001e44001e45001e46001e47001e48001e49001e4a001e4b001e4c001e4d001e4e001e4f001e50001e51001e52001e53001e54001e55001e56001e57001e58001e59001e5a001e5b001e5c001e5d001e5e001e5f001e60001e61001e62001e63001e64001e65001e66001e67001e68001e69001e6a001e6b001e6c001e6d001e6e001e6f001e70001e71001e72001e73001e74001e75001e76001e77001e78001e79001e7a001e7b001e7c001e7d001e7e001e7f001e80001e81001e82001e83001e84001e85001e86001e87001e88001e89001e8a001e8b001e8c001e8d001e8e001e8f001e90001e91001e92001e93001e94001e95001e9b001e61001ea0001ea1001ea2001ea3001ea4001ea5001ea6001ea7001ea8001ea9001eaa001eab001eac001ead001eae001eaf001eb0001eb1001eb2001eb3001eb4001eb5001eb6001eb7001eb8001eb9001eba001ebb001ebc001ebd001ebe001ebf001ec0001ec1001ec2001ec3001ec4001ec5001ec6001ec7001ec8001ec9001eca001ecb001ecc001ecd001ece001ecf001ed0001ed1001ed2001ed3001ed4001ed5001ed6001ed7001ed8001ed9001eda001edb001edc001edd001ede001edf001ee0001ee1001ee2001ee3001ee4001ee5001ee6001ee7001ee8001ee9001eea001eeb001eec001eed001eee001eef001ef0001ef1001ef2001ef3001ef4001ef5001ef6001ef7001ef8001ef9001efa001efb001efc001efd001efe001eff001f08001f00001f09001f01001f0a001f02001f0b001f03001f0c001f04001f0d001f05001f0e001f06001f0f001f07001f18001f10001f19001f11001f1a001f12001f1b001f13001f1c001f14001f1d001f15001f28001f20001f29001f21001f2a001f22001f2b001f23001f2c001f24001f2d001f25001f2e001f26001f2f001f27001f38001f30001f39001f31001f3a001f32001f3b001f33001f3c001f34001f3d001f35001f3e001f36001f3f001f37001f48001f40001f49001f41001f4a001f42001f4b001f43001f4c001f44001f4d001f45001f59001f51001f5b001f53001f5d001f55001f5f001f57001f68001f60001f69001f61001f6a001f62001f6b001f63001f6c001f64001f6d001f65001f6e001f66001f6f001f67001fb8001fb0001fb9001fb1001fba001f70001fbb001f71001fbe0003b9001fc8001f72001fc9001f73001fca001f74001fcb001f75001fd8001fd0001fd9001fd1001fda001f76001fdb001f77001fe8001fe0001fe9001fe1001fea001f7a001feb001f7b001fec001fe5001ff8001f78001ff9001f79001ffa001f7c001ffb001f7d0021260003c900212a00006b00212b0000e500213200214e00216000217000216100217100216200217200216300217300216400217400216500217500216600217600216700217700216800217800216900217900216a00217a00216b00217b00216c00217c00216d00217d00216e00217e00216f00217f0021830021840024b60024d00024b70024d10024b80024d20024b90024d30024ba0024d40024bb0024d50024bc0024d60024bd0024d70024be0024d80024bf0024d90024c00024da0024c10024db0024c20024dc0024c30024dd0024c40024de0024c50024df0024c60024e00024c70024e10024c80024e20024c90024e30024ca0024e40024cb0024e50024cc0024e60024cd0024e70024ce0024e80024cf0024e9002c00002c30002c01002c31002c02002c32002c03002c33002c04002c34002c05002c35002c06002c36002c07002c37002c08002c38002c09002c39002c0a002c3a002c0b002c3b002c0c002c3c002c0d002c3d002c0e002c3e002c0f002c3f002c10002c40002c11002c41002c12002c42002c13002c43002c14002c44002c15002c45002c16002c46002c17002c47002c18002c48002c19002c49002c1a002c4a002c1b002c4b002c1c002c4c002c1d002c4d002c1e002c4e002c1f002c4f002c20002c50002c21002c51002c22002c52002c23002c53002c24002c54002c25002c55002c26002c56002c27002c57002c28002c58002c29002c59002c2a002c5a002c2b002c5b002c2c002c5c002c2d002c5d002c2e002c5e002c60002c61002c6200026b002c63001d7d002c6400027d002c67002c68002c69002c6a002c6b002c6c002c6d000251002c6e000271002c6f000250002c72002c73002c75002c76002c80002c81002c82002c83002c84002c85002c86002c87002c88002c89002c8a002c8b002c8c002c8d002c8e002c8f002c90002c91002c92002c93002c94002c95002c96002c97002c98002c99002c9a002c9b002c9c002c9d002c9e002c9f002ca0002ca1002ca2002ca3002ca4002ca5002ca6002ca7002ca8002ca9002caa002cab002cac002cad002cae002caf002cb0002cb1002cb2002cb3002cb4002cb5002cb6002cb7002cb8002cb9002cba002cbb002cbc002cbd002cbe002cbf002cc0002cc1002cc2002cc3002cc4002cc5002cc6002cc7002cc8002cc9002cca002ccb002ccc002ccd002cce002ccf002cd0002cd1002cd2002cd3002cd4002cd5002cd6002cd7002cd8002cd9002cda002cdb002cdc002cdd002cde002cdf002ce0002ce1002ce2002ce300a64000a64100a64200a64300a64400a64500a64600a64700a64800a64900a64a00a64b00a64c00a64d00a64e00a64f00a65000a65100a65200a65300a65400a65500a65600a65700a65800a65900a65a00a65b00a65c00a65d00a65e00a65f00a66200a66300a66400a66500a66600a66700a66800a66900a66a00a66b00a66c00a66d00a68000a68100a68200a68300a68400a68500a68600a68700a68800a68900a68a00a68b00a68c00a68d00a68e00a68f00a69000a69100a69200a69300a69400a69500a69600a69700a72200a72300a72400a72500a72600a72700a72800a72900a72a00a72b00a72c00a72d00a72e00a72f00a73200a73300a73400a73500a73600a73700a73800a73900a73a00a73b00a73c00a73d00a73e00a73f00a74000a74100a74200a74300a74400a74500a74600a74700a74800a74900a74a00a74b00a74c00a74d00a74e00a74f00a75000a75100a75200a75300a75400a75500a75600a75700a75800a75900a75a00a75b00a75c00a75d00a75e00a75f00a76000a76100a76200a76300a76400a76500a76600a76700a76800a76900a76a00a76b00a76c00a76d00a76e00a76f00a77900a77a00a77b00a77c00a77d001d7900a77e00a77f00a78000a78100a78200a78300a78400a78500a78600a78700a78b00a78c00ff2100ff4100ff2200ff4200ff2300ff4300ff2400ff4400ff2500ff4500ff2600ff4600ff2700ff4700ff2800ff4800ff2900ff4900ff2a00ff4a00ff2b00ff4b00ff2c00ff4c00ff2d00ff4d00ff2e00ff4e00ff2f00ff4f00ff3000ff5000ff3100ff5100ff3200ff5200ff3300ff5300ff3400ff5400ff3500ff5500ff3600ff5600ff3700ff5700ff3800ff5800ff3900ff5900ff3a00ff5a01040001042801040101042901040201042a01040301042b01040401042c01040501042d01040601042e01040701042f01040801043001040901043101040a01043201040b01043301040c01043401040d01043501040e01043601040f01043701041001043801041101043901041201043a01041301043b01041401043c01041501043d01041601043e01041701043f01041801044001041901044101041a01044201041b01044301041c01044401041d01044501041e01044601041f01044701042001044801042101044901042201044a01042301044b01042401044c01042501044d01042601044e01042701044f
@@ -0,0 +1 @@
1
+ 0000df000073000073xxxxxx000130000069000307xxxxxx0001490002bc00006exxxxxx0001f000006a00030cxxxxxx0003900003b9000308000301xxxxxx0003b00003c5000308000301xxxxxx000587000565000582xxxxxx001e96000068000331xxxxxx001e97000074000308xxxxxx001e9800007700030axxxxxx001e9900007900030axxxxxx001e9a0000610002bexxxxxx001e9e000073000073xxxxxx001f500003c5000313xxxxxx001f520003c5000313000300xxxxxx001f540003c5000313000301xxxxxx001f560003c5000313000342xxxxxx001f80001f000003b9xxxxxx001f81001f010003b9xxxxxx001f82001f020003b9xxxxxx001f83001f030003b9xxxxxx001f84001f040003b9xxxxxx001f85001f050003b9xxxxxx001f86001f060003b9xxxxxx001f87001f070003b9xxxxxx001f88001f000003b9xxxxxx001f89001f010003b9xxxxxx001f8a001f020003b9xxxxxx001f8b001f030003b9xxxxxx001f8c001f040003b9xxxxxx001f8d001f050003b9xxxxxx001f8e001f060003b9xxxxxx001f8f001f070003b9xxxxxx001f90001f200003b9xxxxxx001f91001f210003b9xxxxxx001f92001f220003b9xxxxxx001f93001f230003b9xxxxxx001f94001f240003b9xxxxxx001f95001f250003b9xxxxxx001f96001f260003b9xxxxxx001f97001f270003b9xxxxxx001f98001f200003b9xxxxxx001f99001f210003b9xxxxxx001f9a001f220003b9xxxxxx001f9b001f230003b9xxxxxx001f9c001f240003b9xxxxxx001f9d001f250003b9xxxxxx001f9e001f260003b9xxxxxx001f9f001f270003b9xxxxxx001fa0001f600003b9xxxxxx001fa1001f610003b9xxxxxx001fa2001f620003b9xxxxxx001fa3001f630003b9xxxxxx001fa4001f640003b9xxxxxx001fa5001f650003b9xxxxxx001fa6001f660003b9xxxxxx001fa7001f670003b9xxxxxx001fa8001f600003b9xxxxxx001fa9001f610003b9xxxxxx001faa001f620003b9xxxxxx001fab001f630003b9xxxxxx001fac001f640003b9xxxxxx001fad001f650003b9xxxxxx001fae001f660003b9xxxxxx001faf001f670003b9xxxxxx001fb2001f700003b9xxxxxx001fb30003b10003b9xxxxxx001fb40003ac0003b9xxxxxx001fb60003b1000342xxxxxx001fb70003b10003420003b9xxxxxx001fbc0003b10003b9xxxxxx001fc2001f740003b9xxxxxx001fc30003b70003b9xxxxxx001fc40003ae0003b9xxxxxx001fc60003b7000342xxxxxx001fc70003b70003420003b9xxxxxx001fcc0003b70003b9xxxxxx001fd20003b9000308000300xxxxxx001fd30003b9000308000301xxxxxx001fd60003b9000342xxxxxx001fd70003b9000308000342xxxxxx001fe20003c5000308000300xxxxxx001fe30003c5000308000301xxxxxx001fe40003c1000313xxxxxx001fe60003c5000342xxxxxx001fe70003c5000308000342xxxxxx001ff2001f7c0003b9xxxxxx001ff30003c90003b9xxxxxx001ff40003ce0003b9xxxxxx001ff60003c9000342xxxxxx001ff70003c90003420003b9xxxxxx001ffc0003c90003b9xxxxxx00fb00000066000066xxxxxx00fb01000066000069xxxxxx00fb0200006600006cxxxxxx00fb03000066000066000069xxxxxx00fb0400006600006600006cxxxxxx00fb05000073000074xxxxxx00fb06000073000074xxxxxx00fb13000574000576xxxxxx00fb14000574000565xxxxxx00fb1500057400056bxxxxxx00fb1600057e000576xxxxxx00fb1700057400056dxxxxxx
@@ -0,0 +1 @@
1
+ 001e9e0000df001f88001f80001f89001f81001f8a001f82001f8b001f83001f8c001f84001f8d001f85001f8e001f86001f8f001f87001f98001f90001f99001f91001f9a001f92001f9b001f93001f9c001f94001f9d001f95001f9e001f96001f9f001f97001fa8001fa0001fa9001fa1001faa001fa2001fab001fa3001fac001fa4001fad001fa5001fae001fa6001faf001fa7001fbc001fb3001fcc001fc3001ffc001ff3
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/version"
4
- require "unicode_utils/name"
4
+ require "unicode_utils/char_name"
5
5
  require "unicode_utils/simple_upcase"
6
6
  require "unicode_utils/simple_downcase"
7
7
  require "unicode_utils/upcase"
@@ -22,6 +22,10 @@ require "unicode_utils/nfc"
22
22
  require "unicode_utils/compatibility_decomposition"
23
23
  require "unicode_utils/nfkd"
24
24
  require "unicode_utils/nfkc"
25
+ require "unicode_utils/codepoint"
26
+ require "unicode_utils/grep"
27
+ require "unicode_utils/simple_casefold"
28
+ require "unicode_utils/casefold"
25
29
 
26
30
  # Read the README[link:files/README_txt.html] for an introduction.
27
31
  #
@@ -33,6 +37,7 @@ require "unicode_utils/nfkc"
33
37
  # UnicodeUtils.nfc:: Normalization Form C
34
38
  # UnicodeUtils.nfkd:: Normalization Form KD
35
39
  # UnicodeUtils.nfkc:: Normalization Form KC
36
- # UnicodeUtils.name:: character names
40
+ # UnicodeUtils.char_name:: character names
41
+ # UnicodeUtils.casefold:: case folding (case insensitive string comparison)
37
42
  module UnicodeUtils
38
43
  end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_multivalued_map"
3
+ require "unicode_utils/read_cdata"
4
4
  require "unicode_utils/hangul_syllable_decomposition"
5
5
  require "unicode_utils/combining_class"
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_cdata"
4
+ require "unicode_utils/simple_casefold"
5
+
6
+ module UnicodeUtils
7
+
8
+ CASEFOLD_F_MAP = Impl.read_multivalued_map("casefold_f_map") # :nodoc:
9
+
10
+ # Perform full case folding. The returned string may be longer than
11
+ # +str+. The purpose of case folding is case insensitive string
12
+ # comparison.
13
+ #
14
+ # Examples:
15
+ #
16
+ # UnicodeUtils.casefold("Ümit") == UnicodeUtils.casefold("ümit") => true
17
+ # UnicodeUtils.casefold("WEISS") == UnicodeUtils.casefold("weiß") => true
18
+ def casefold(str)
19
+ String.new.force_encoding(str.encoding).tap do |res|
20
+ str.each_codepoint { |cp|
21
+ if mapping = CASEFOLD_C_MAP[cp]
22
+ res << mapping
23
+ elsif mapping = CASEFOLD_F_MAP[cp]
24
+ mapping.each { |m| res << m }
25
+ else
26
+ res << cp
27
+ end
28
+ }
29
+ end
30
+ end
31
+ module_function :casefold
32
+
33
+ end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_names"
3
+ require "unicode_utils/read_cdata"
4
4
  require "unicode_utils/hangul_syllable_decomposition"
5
5
  require "unicode_utils/jamo_short_name"
6
6
 
@@ -18,22 +18,29 @@ module UnicodeUtils
18
18
  #
19
19
  # Example:
20
20
  #
21
- # UnicodeUtils.name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
22
- # UnicodeUtils.name "\t" => "<control>"
23
- def name(char)
24
- cp = char.ord
21
+ # UnicodeUtils.char_name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
22
+ # UnicodeUtils.char_name "\t" => "<control>"
23
+ def char_name(char)
24
+ if char.kind_of?(Integer)
25
+ cp = char
26
+ str = nil
27
+ else
28
+ cp = char.ord
29
+ str = char
30
+ end
25
31
  NAME_MAP[cp] ||
26
32
  case cp
27
33
  when 0x3400..0x4DB5, 0x4E00..0x9FC3, 0x20000..0x2A6D6
28
- "CJK UNIFIED IDEOGRAPH-#{sprintf('%04x', cp).upcase}"
34
+ "CJK UNIFIED IDEOGRAPH-#{sprintf('%04X', cp)}"
29
35
  when 0xAC00..0xD7A3
36
+ str ||= cp.chr(Encoding::UTF_8)
30
37
  "HANGUL SYLLABLE ".tap do |n|
31
- hangul_syllable_decomposition(char).each_char { |c|
38
+ hangul_syllable_decomposition(str).each_char { |c|
32
39
  n << (jamo_short_name(c) || '')
33
40
  }
34
41
  end
35
42
  end
36
43
  end
37
- module_function :name
44
+ module_function :char_name
38
45
 
39
46
  end
@@ -0,0 +1,66 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/char_name"
4
+
5
+ module UnicodeUtils
6
+
7
+ # A Codepoint instance represents a single Unicode codepoint.
8
+ #
9
+ # UnicodeUtils::Codepoint.new(0x20ac) => #<U+20AC "€" EURO SIGN utf8:e2,82,ac>
10
+ class Codepoint
11
+
12
+ # The Unicode codespace. Any integer in this range is a Unicode
13
+ # codepoint.
14
+ RANGE = 0..0x10FFFF
15
+
16
+ # Create a Codepoint instance that wraps the given Integer. +int+
17
+ # must be in Codepoint::RANGE.
18
+ def initialize(int)
19
+ unless RANGE.include?(int)
20
+ raise ArgumentError, "#{int} not in codespace"
21
+ end
22
+ @int = int
23
+ end
24
+
25
+ # Convert to Integer.
26
+ def ord
27
+ @int
28
+ end
29
+
30
+ # Format in U+ notation.
31
+ #
32
+ # Codepoint.new(0xc5).uplus => "U+00C5"
33
+ def uplus
34
+ sprintf('U+%04X', @int)
35
+ end
36
+
37
+ # Get the normative Unicode name of this codepoint.
38
+ #
39
+ # See also: UnicodeUtils.char_name
40
+ def name
41
+ UnicodeUtils.char_name(@int)
42
+ end
43
+
44
+ # Convert this codepoint to an UTF-8 encoded string. Returns a new
45
+ # string on each call and thus it is allowed to mutate the return
46
+ # value.
47
+ def to_s
48
+ @int.chr(Encoding::UTF_8)
49
+ end
50
+
51
+ # Get the bytes used to encode this codepoint in UTF-8,
52
+ # hex-formatted.
53
+ #
54
+ # Codepoint.new(0xe4).hexbytes => "c3,a4"
55
+ def hexbytes
56
+ to_s.bytes.map { |b| sprintf("%02x", b) }.join(",")
57
+ end
58
+
59
+ # #<U+... char name utf8-hexbytes>
60
+ def inspect
61
+ "#<#{uplus} #{to_s.inspect} #{name || "nil"} utf8:#{hexbytes}>"
62
+ end
63
+
64
+ end
65
+
66
+ end
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/canonical_decomposition"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/hangul_syllable_decomposition"
6
6
 
7
7
  module UnicodeUtils
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/simple_downcase"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/conditional_casing"
6
6
 
7
7
  module UnicodeUtils
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/codepoint"
4
+
5
+ module UnicodeUtils
6
+
7
+ # Get an array of all Codepoint instances in Codepoint::RANGE whose
8
+ # name matches regexp. Matching is case insensitive.
9
+ #
10
+ # UnicodeUtils.grep(/angstrom/) => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
11
+ def grep(regexp)
12
+ unless regexp.casefold?
13
+ regexp = Regexp.new(regexp.source, Regexp::IGNORECASE)
14
+ end
15
+ Codepoint::RANGE.select { |cp|
16
+ regexp =~ UnicodeUtils.char_name(cp)
17
+ }.map { |cp| Codepoint.new(cp) }
18
+ end
19
+ module_function :grep
20
+
21
+ end
@@ -8,8 +8,9 @@ module UnicodeUtils
8
8
  #
9
9
  # UnicodeUtils.hangul_syllable_decomposition("\u{d4db}") => "\u{1111}\u{1171}\u{11b6}"
10
10
  def hangul_syllable_decomposition(char)
11
- Impl.append_hangul_syllable_decomposition(
12
- String.new.force_encoding(char.encoding), char.ord)
11
+ String.new.force_encoding(char.encoding).tap do |str|
12
+ Impl.append_hangul_syllable_decomposition(str , char.ord)
13
+ end
13
14
  end
14
15
  module_function :hangul_syllable_decomposition
15
16
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_names"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,8 +1,8 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/canonical_decomposition"
4
5
  require "unicode_utils/combining_class"
5
- require "unicode_utils/read_codepoint_set"
6
6
 
7
7
  module UnicodeUtils
8
8
 
@@ -0,0 +1,71 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ # Absolute path to the directory from which UnicodeUtils loads its
6
+ # compiled Unicode data files at runtime.
7
+ CDATA_DIR =
8
+ File.absolute_path(File.join(File.dirname(__FILE__), "..", "..", "cdata"))
9
+
10
+ module Impl # :nodoc:
11
+
12
+ def self.open_cdata_file(filename, &block)
13
+ File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
14
+ end
15
+
16
+ def self.read_codepoint_set(filename)
17
+ Hash.new.tap { |set|
18
+ open_cdata_file(filename) do |input|
19
+ buffer = "x" * 6
20
+ buffer.force_encoding(Encoding::US_ASCII)
21
+ while input.read(6, buffer)
22
+ set[buffer.to_i(16)] = true
23
+ end
24
+ end
25
+ }
26
+ end
27
+
28
+ def self.read_codepoint_map(filename)
29
+ Hash.new.tap { |map|
30
+ open_cdata_file(filename) do |input|
31
+ buffer = "x" * 6
32
+ buffer.force_encoding(Encoding::US_ASCII)
33
+ while input.read(6, buffer)
34
+ map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
35
+ end
36
+ end
37
+ }
38
+ end
39
+
40
+ def self.read_multivalued_map(filename)
41
+ Hash.new.tap { |map|
42
+ open_cdata_file(filename) do |input|
43
+ buffer = "x" * 6
44
+ buffer.force_encoding(Encoding::US_ASCII)
45
+ while input.read(6, buffer)
46
+ cp = buffer.to_i(16)
47
+ mapping = []
48
+ while input.read(6, buffer).getbyte(0) != 120
49
+ mapping << buffer.to_i(16)
50
+ end
51
+ map[cp] = mapping
52
+ end
53
+ end
54
+ }
55
+ end
56
+
57
+ def self.read_names(filename)
58
+ Hash.new.tap { |map|
59
+ open_cdata_file(filename) do |input|
60
+ buffer = "x" * 6
61
+ buffer.force_encoding(Encoding::US_ASCII)
62
+ while input.read(6, buffer)
63
+ map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
64
+ end
65
+ end
66
+ }
67
+ end
68
+
69
+ end
70
+
71
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_cdata"
4
+
5
+ module UnicodeUtils
6
+
7
+ CASEFOLD_C_MAP = Impl.read_codepoint_map("casefold_c_map") # :nodoc:
8
+
9
+ CASEFOLD_S_MAP = Impl.read_codepoint_map("casefold_s_map") # :nodoc:
10
+
11
+ # Perform simple case folding. Contrary to full case folding, this
12
+ # uses only one to one mappings, so that the length of the returned
13
+ # string is equal to the length of +str+.
14
+ #
15
+ # The purpose of case folding is case insensitive string comparison.
16
+ #
17
+ # Examples:
18
+ #
19
+ # UnicodeUtils.simple_casefold("Ümit") == UnicodeUtils.simple_casefold("ümit") => true
20
+ # UnicodeUtils.simple_casefold("WEISS") == UnicodeUtils.simple_casefold("weiß") => false
21
+ #
22
+ # See also: UnicodeUtils.casefold
23
+ def simple_casefold(str)
24
+ String.new.force_encoding(str.encoding).tap do |res|
25
+ str.each_codepoint { |cp|
26
+ res << (CASEFOLD_C_MAP[cp] || CASEFOLD_S_MAP[cp] || cp)
27
+ }
28
+ end
29
+ end
30
+ module_function :simple_casefold
31
+
32
+ end
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_map"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_map"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -0,0 +1,12 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils"
4
+
5
+ # Shortcut for usage in irb. This shortcut is only defined when
6
+ # <tt>unicode_utils/u</tt> is explicitly required. It is intended for
7
+ # interactive use only!
8
+ #
9
+ # $ irb -r unicode_utils/u
10
+ # irb(main):001:0> U.grep(/angstrom/)
11
+ # => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
12
+ U = UnicodeUtils
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
+ require "unicode_utils/read_cdata"
3
4
  require "unicode_utils/simple_upcase"
4
- require "unicode_utils/read_multivalued_map"
5
5
  require "unicode_utils/conditional_casing"
6
6
 
7
7
  module UnicodeUtils
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require "unicode_utils/read_codepoint_set"
3
+ require "unicode_utils/read_cdata"
4
4
 
5
5
  module UnicodeUtils
6
6
 
@@ -3,6 +3,6 @@
3
3
  module UnicodeUtils
4
4
 
5
5
  # Corresponds to the unicode_utils gem version.
6
- VERSION = "0.4.0"
6
+ VERSION = "0.5.0"
7
7
 
8
8
  end
@@ -4,17 +4,18 @@ require "test/unit"
4
4
 
5
5
  require "unicode_utils"
6
6
 
7
+ # Fast tests for allmost all UnicodeUtils functions.
7
8
  class TestUnicodeUtils < Test::Unit::TestCase
8
9
 
9
10
  def test_name
10
- assert_equal "LATIN SMALL LETTER F", UnicodeUtils.name("f")
11
- assert_equal Encoding::US_ASCII, UnicodeUtils.name("f").encoding
12
- assert_equal nil, UnicodeUtils.name("\u{e000}") # private use
13
- assert_equal "<control>", UnicodeUtils.name("\t")
14
- assert_equal "CJK UNIFIED IDEOGRAPH-4E00", UnicodeUtils.name("\u{4e00}")
15
- assert_equal "CJK UNIFIED IDEOGRAPH-2A6D6", UnicodeUtils.name("\u{2a6d6}")
16
- assert_equal "CJK UNIFIED IDEOGRAPH-2A3D6", UnicodeUtils.name("\u{2a3d6}")
17
- assert_equal "HANGUL SYLLABLE PWILH", UnicodeUtils.name("\u{d4db}")
11
+ assert_equal "LATIN SMALL LETTER F", UnicodeUtils.char_name("f")
12
+ assert_equal Encoding::US_ASCII, UnicodeUtils.char_name("f").encoding
13
+ assert_equal nil, UnicodeUtils.char_name("\u{e000}") # private use
14
+ assert_equal "<control>", UnicodeUtils.char_name("\t")
15
+ assert_equal "CJK UNIFIED IDEOGRAPH-4E00", UnicodeUtils.char_name("\u{4e00}")
16
+ assert_equal "CJK UNIFIED IDEOGRAPH-2A6D6", UnicodeUtils.char_name("\u{2a6d6}")
17
+ assert_equal "CJK UNIFIED IDEOGRAPH-2A3D6", UnicodeUtils.char_name("\u{2a3d6}")
18
+ assert_equal "HANGUL SYLLABLE PWILH", UnicodeUtils.char_name("\u{d4db}")
18
19
  end
19
20
 
20
21
  def test_simple_upcase
@@ -160,4 +161,20 @@ class TestUnicodeUtils < Test::Unit::TestCase
160
161
  assert_equal "\u{66}\u{69}\u{e4}", UnicodeUtils.nfkc("\u{fb01}\u{e4}")
161
162
  end
162
163
 
164
+ def test_simple_casefold
165
+ assert_equal "abc123", UnicodeUtils.simple_casefold("ABC123")
166
+ assert UnicodeUtils.simple_casefold("ÜMIT") ==
167
+ UnicodeUtils.simple_casefold("ümit")
168
+ assert UnicodeUtils.simple_casefold("WEISS") !=
169
+ UnicodeUtils.simple_casefold("weiß")
170
+ end
171
+
172
+ def test_casefold
173
+ assert_equal "abc123", UnicodeUtils.casefold("ABC123")
174
+ assert UnicodeUtils.casefold("ÜMIT") ==
175
+ UnicodeUtils.casefold("ümit")
176
+ assert UnicodeUtils.casefold("WEISS") ==
177
+ UnicodeUtils.casefold("weiß")
178
+ end
179
+
163
180
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stefan Lang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-28 00:00:00 +01:00
12
+ date: 2008-12-07 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -22,31 +22,33 @@ extensions: []
22
22
  extra_rdoc_files:
23
23
  - README.txt
24
24
  files:
25
+ - lib/unicode_utils/u.rb
26
+ - lib/unicode_utils/read_cdata.rb
25
27
  - lib/unicode_utils/conditional_casing.rb
26
28
  - lib/unicode_utils/hangul_syllable_decomposition.rb
27
29
  - lib/unicode_utils/simple_downcase.rb
28
- - lib/unicode_utils/read_codepoint_map.rb
29
- - lib/unicode_utils/read_names.rb
30
- - lib/unicode_utils/read_codepoint_set.rb
30
+ - lib/unicode_utils/casefold.rb
31
31
  - lib/unicode_utils/titlecase_char_q.rb
32
32
  - lib/unicode_utils/cased_char_q.rb
33
33
  - lib/unicode_utils/downcase.rb
34
- - lib/unicode_utils/name.rb
35
34
  - lib/unicode_utils/uppercase_char_q.rb
36
- - lib/unicode_utils/read_multivalued_map.rb
37
35
  - lib/unicode_utils/canonical_equivalents_q.rb
36
+ - lib/unicode_utils/char_name.rb
38
37
  - lib/unicode_utils/nfkc.rb
39
38
  - lib/unicode_utils/nfkd.rb
39
+ - lib/unicode_utils/codepoint.rb
40
40
  - lib/unicode_utils/canonical_decomposition.rb
41
41
  - lib/unicode_utils/upcase.rb
42
42
  - lib/unicode_utils/nfc.rb
43
43
  - lib/unicode_utils/nfd.rb
44
44
  - lib/unicode_utils/case_ignorable_char_q.rb
45
45
  - lib/unicode_utils/compatibility_decomposition.rb
46
+ - lib/unicode_utils/grep.rb
46
47
  - lib/unicode_utils/simple_upcase.rb
47
48
  - lib/unicode_utils/lowercase_char_q.rb
48
49
  - lib/unicode_utils/jamo_short_name.rb
49
50
  - lib/unicode_utils/combining_class.rb
51
+ - lib/unicode_utils/simple_casefold.rb
50
52
  - lib/unicode_utils/version.rb
51
53
  - lib/unicode_utils/soft_dotted_char_q.rb
52
54
  - lib/unicode_utils.rb
@@ -54,7 +56,10 @@ files:
54
56
  - cdata/cond_lc_map
55
57
  - cdata/prop_set_lowercase
56
58
  - cdata/cat_set_titlecase
59
+ - cdata/casefold_c_map
60
+ - cdata/casefold_f_map
57
61
  - cdata/special_lc_map
62
+ - cdata/casefold_s_map
58
63
  - cdata/names
59
64
  - cdata/cond_uc_map
60
65
  - cdata/special_uc_map
@@ -67,7 +72,6 @@ files:
67
72
  - cdata/jamo_short_names
68
73
  - cdata/compatibility_decomposition_map
69
74
  - cdata/prop_set_uppercase
70
- - test/test_normalization.rb
71
75
  - test/test_unicode_utils.rb
72
76
  - README.txt
73
77
  - LICENSE.txt
@@ -99,5 +103,4 @@ signing_key:
99
103
  specification_version: 2
100
104
  summary: additional Unicode aware functions for Ruby 1.9
101
105
  test_files:
102
- - test/test_normalization.rb
103
106
  - test/test_unicode_utils.rb
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_codepoint_map(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_codepoint_set(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |set|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- set[buffer.to_i(16)] = true
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,27 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_multivalued_map(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- cp = buffer.to_i(16)
15
- mapping = []
16
- while input.read(6, buffer).getbyte(0) != 120
17
- mapping << buffer.to_i(16)
18
- end
19
- map[cp] = mapping
20
- end
21
- end
22
- }
23
- end
24
-
25
- end
26
-
27
- end
@@ -1,22 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module UnicodeUtils
4
-
5
- module Impl # :nodoc:
6
-
7
- def self.read_names(filename)
8
- path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
- Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII:-") do |input|
11
- buffer = "x" * 6
12
- buffer.force_encoding(Encoding::US_ASCII)
13
- while input.read(6, buffer)
14
- map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
15
- end
16
- end
17
- }
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,95 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require "test/unit"
4
-
5
- require "unicode_utils/nfd"
6
- require "unicode_utils/nfc"
7
-
8
- # See data/NormalizationTest.txt
9
- class TestNormalization < Test::Unit::TestCase
10
-
11
- class Record
12
- def initialize(ary)
13
- @ary = ary
14
- end
15
- def c1
16
- @ary[0]
17
- end
18
- def c2
19
- @ary[1]
20
- end
21
- def c3
22
- @ary[2]
23
- end
24
- def c4
25
- @ary[3]
26
- end
27
- def c5
28
- @ary[4]
29
- end
30
- end
31
-
32
- def each_testdata_record
33
- fn = File.join(File.dirname(__FILE__),
34
- "..", "data", "NormalizationTest.txt")
35
- File.open(fn, "r:utf-8:-") do |input|
36
- input.each_line { |line|
37
- if line =~ /^([^#]*)#/
38
- line = $1
39
- end
40
- line.strip!
41
- next if line.empty? || line =~ /^@Part/
42
- columns = line.split(";")
43
- ary = columns.map { |column|
44
- String.new.force_encoding(Encoding::UTF_8).tap do |str|
45
- column.split(" ").each { |c|
46
- str << c.strip.to_i(16)
47
- }
48
- end
49
- }
50
- yield Record.new(ary)
51
- }
52
- end
53
- end
54
-
55
- def test_nfd
56
- each_testdata_record { |r|
57
- assert_equal r.c3, UnicodeUtils.nfd(r.c1)
58
- assert_equal r.c3, UnicodeUtils.nfd(r.c2)
59
- assert_equal r.c3, UnicodeUtils.nfd(r.c3)
60
- assert_equal r.c5, UnicodeUtils.nfd(r.c4)
61
- assert_equal r.c5, UnicodeUtils.nfd(r.c5)
62
- }
63
- end
64
-
65
- def test_nfc
66
- each_testdata_record { |r|
67
- assert_equal r.c2, UnicodeUtils.nfc(r.c1)
68
- assert_equal r.c2, UnicodeUtils.nfc(r.c2)
69
- assert_equal r.c2, UnicodeUtils.nfc(r.c3)
70
- assert_equal r.c4, UnicodeUtils.nfc(r.c4)
71
- assert_equal r.c4, UnicodeUtils.nfc(r.c5)
72
- }
73
- end
74
-
75
- def test_nfkd
76
- each_testdata_record { |r|
77
- assert_equal r.c5, UnicodeUtils.nfkd(r.c1)
78
- assert_equal r.c5, UnicodeUtils.nfkd(r.c2)
79
- assert_equal r.c5, UnicodeUtils.nfkd(r.c3)
80
- assert_equal r.c5, UnicodeUtils.nfkd(r.c4)
81
- assert_equal r.c5, UnicodeUtils.nfkd(r.c5)
82
- }
83
- end
84
-
85
- def test_nfkc
86
- each_testdata_record { |r|
87
- assert_equal r.c4, UnicodeUtils.nfkc(r.c1)
88
- assert_equal r.c4, UnicodeUtils.nfkc(r.c2)
89
- assert_equal r.c4, UnicodeUtils.nfkc(r.c3)
90
- assert_equal r.c4, UnicodeUtils.nfkc(r.c4)
91
- assert_equal r.c4, UnicodeUtils.nfkc(r.c5)
92
- }
93
- end
94
-
95
- end