unicode_utils 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt CHANGED
@@ -10,17 +10,21 @@ Install with RubyGems:
10
10
 
11
11
  Or get the source from Github: http://github.com/lang/unicode_utils
12
12
 
13
- Currently Unicode Utils works only with recent builds of Ruby 1.9.
13
+ UnicodeUtils works with Ruby 1.9.1-preview1 or later. Though a bug
14
+ (http://redmine.ruby-lang.org/issues/show/692) in
15
+ 1.9.1-preview1 prevents UnicodeUtils from loading when
16
+ Encoding.default_internal is set (e.g. with -U or -E).
14
17
 
15
18
  == Synopsis
16
19
 
17
20
  require "unicode_utils"
18
21
 
19
- UnicodeUtils.name "æ" => "LATIN SMALL LETTER AE"
22
+ UnicodeUtils.name("æ") => "LATIN SMALL LETTER AE"
20
23
 
21
- UnicodeUtils.upcase "Straße" => "STRASSE"
24
+ UnicodeUtils.upcase("Straße") => "STRASSE"
25
+ UnicodeUtils.upcase("i", :tr) => "İ"
22
26
 
23
- UnicodeUtils.downcase "Ümit" => "ümit"
27
+ UnicodeUtils.downcase("Ümit") => "ümit"
24
28
 
25
29
  Start with the UnicodeUtils module in the API documentation for
26
30
  complete documentation.
@@ -30,7 +34,9 @@ at require time, the library is split up into separate files for
30
34
  each function. The +unicode_utils+ library loads them all. If you
31
35
  need only a specific function, e.g. +upcase+, you can require only
32
36
  the file <tt>unicode_utils/upcase</tt> to save memory and reduce
33
- startup time.
37
+ startup time. Methods that end in a ? are in a file suffixed with
38
+ +_q+, e.g. <tt>lowercase_char?</tt> can be required with
39
+ <tt>unicode_utils/lowercase_char_q</tt>.
34
40
 
35
41
  == License
36
42
 
@@ -0,0 +1 @@
1
+ 
@@ -0,0 +1 @@
1
+ 0001c50001c80001cb0001f2001f88001f89001f8a001f8b001f8c001f8d001f8e001f8f001f98001f99001f9a001f9b001f9c001f9d001f9e001f9f001fa8001fa9001faa001fab001fac001fad001fae001faf001fbc001fcc001ffc
@@ -0,0 +1 @@
1
+ 00033401000335010003360100033701000338010020d2010020d3010020d8010020d9010020da010020e5010020e6010020ea010020eb01010a390101d1670101d1680101d1690100093c070009bc07000a3c07000abc07000b3c07000cbc0700103707001b3407001c37070030990800309a0800094d090009cd09000a4d09000acd09000b4d09000bcd09000c4d09000ccd09000d4d09000dca09000e3a09000f84090010390900103a0900171409001734090017d209001b4409001baa0900a8060900a8c40900a95309010a3f090005b00a0005b10b0005b20c0005b30d0005b40e0005b50f0005b6100005b7110005b8120005c7120005b9130005ba130005bb140005bc150005bd160005bf170005c1180005c21900fb1e1a00064b1b00064c1c00064d1d0006181e00064e1e0006191f00064f1f00061a200006502000065121000652220006702300071124000c5554000c565b000e3867000e3967000e486b000e496b000e4a6b000e4b6b000eb876000eb976000ec87a000ec97a000eca7a000ecb7a000f7181000f7282000f7a82000f7b82000f7c82000f7d82000f8082000f7484000321ca000322ca000327ca000328ca001dd0ca001dced600031bd8000f39d801d165d801d166d801d16ed801d16fd801d170d801d171d801d172d800302ada000316dc000317dc000318dc000319dc00031cdc00031ddc00031edc00031fdc000320dc000323dc000324dc000325dc000326dc000329dc00032adc00032bdc00032cdc00032ddc00032edc00032fdc000330dc000331dc000332dc000333dc000339dc00033adc00033bdc00033cdc000347dc000348dc000349dc00034ddc00034edc000353dc000354dc000355dc000356dc000359dc00035adc000591dc000596dc00059bdc0005a2dc0005a3dc0005a4dc0005a5dc0005a6dc0005a7dc0005aadc0005c5dc000655dc000656dc00065cdc0006e3dc0006eadc0006eddc000731dc000734dc000737dc000738dc000739dc00073bdc00073cdc00073edc000742dc000744dc000746dc000748dc0007f2dc000952dc000f18dc000f19dc000f35dc000f37dc000fc6dc00108ddc00193bdc001a18dc001b6cdc001dc2dc001dcadc001dcfdc001dffdc0020e8dc0020ecdc0020eddc0020eedc0020efdc00a92bdc00a92cdc00a92ddc0101fddc010a0ddc010a3adc01d17bdc01d17cdc01d17ddc01d17edc01d17fdc01d180dc01d181dc01d182dc01d18adc01d18bdc00059ade0005adde001939de00302dde00302ee000302fe001d16de20005aee40018a9e400302be4000300e6000301e6000302e6000303e6000304e6000305e6000306e6000307e6000308e6000309e600030ae600030be600030ce600030de600030ee600030fe6000310e6000311e6000312e6000313e6000314e600033de600033ee600033fe6000340e6000341e6000342e6000343e6000344e6000346e600034ae600034be600034ce6000350e6000351e6000352e6000357e600035be6000363e6000364e6000365e6000366e6000367e6000368e6000369e600036ae600036be600036ce600036de600036ee600036fe6000483e6000484e6000485e6000486e6000487e6000592e6000593e6000594e6000595e6000597e6000598e6000599e600059ce600059de600059ee600059fe60005a0e60005a1e60005a8e60005a9e60005abe60005ace60005afe60005c4e6000610e6000611e6000612e6000613e6000614e6000615e6000616e6000617e6000653e6000654e6000657e6000658e6000659e600065ae600065be600065de600065ee60006d6e60006d7e60006d8e60006d9e60006dae60006dbe60006dce60006dfe60006e0e60006e1e60006e2e60006e4e60006e7e60006e8e60006ebe60006ece6000730e6000732e6000733e6000735e6000736e600073ae600073de600073fe6000740e6000741e6000743e6000745e6000747e6000749e600074ae60007ebe60007ece60007ede60007eee60007efe60007f0e60007f1e60007f3e6000951e6000953e6000954e6000f82e6000f83e6000f86e6000f87e600135fe60017dde600193ae6001a17e6001b6be6001b6de6001b6ee6001b6fe6001b70e6001b71e6001b72e6001b73e6001dc0e6001dc1e6001dc3e6001dc4e6001dc5e6001dc6e6001dc7e6001dc8e6001dc9e6001dcbe6001dcce6001dd1e6001dd2e6001dd3e6001dd4e6001dd5e6001dd6e6001dd7e6001dd8e6001dd9e6001ddae6001ddbe6001ddce6001ddde6001ddee6001ddfe6001de0e6001de1e6001de2e6001de3e6001de4e6001de5e6001de6e6001dfee60020d0e60020d1e60020d4e60020d5e60020d6e60020d7e60020dbe60020dce60020e1e60020e7e60020e9e60020f0e6002de0e6002de1e6002de2e6002de3e6002de4e6002de5e6002de6e6002de7e6002de8e6002de9e6002deae6002debe6002dece6002dede6002deee6002defe6002df0e6002df1e6002df2e6002df3e6002df4e6002df5e6002df6e6002df7e6002df8e6002df9e6002dfae6002dfbe6002dfce6002dfde6002dfee6002dffe600a66fe600a67ce600a67de600fe20e600fe21e600fe22e600fe23e600fe24e600fe25e600fe26e6010a0fe6010a38e601d185e601d186e601d187e601d188e601d189e601d1aae601d1abe601d1ace601d1ade601d242e601d243e601d244e6000315e800031ae8000358e800302ce800035ce900035fe9000362e900035dea00035eea000360ea000361ea001dcdea000345f0
@@ -0,0 +1,16 @@
1
+ 000049;000069,000307;lt;More_Above
2
+ 000049;000131;az;Not_Before_Dot
3
+ 000049;000131;tr;Not_Before_Dot
4
+ 00004a;00006a,000307;lt;More_Above
5
+ 000069;000069;az;
6
+ 000069;000069;tr;
7
+ 0000cc;000069,000307,000300;lt;
8
+ 0000cd;000069,000307,000301;lt;
9
+ 000128;000069,000307,000303;lt;
10
+ 00012e;00012f,000307;lt;More_Above
11
+ 000130;000069;az;
12
+ 000130;000069;tr;
13
+ 000307;000307;lt;After_Soft_Dotted
14
+ 000307;;az;After_I
15
+ 000307;;tr;After_I
16
+ 0003a3;0003c2;;Final_Sigma
@@ -0,0 +1,16 @@
1
+ 000049;000049;az;Not_Before_Dot
2
+ 000049;000049;lt;More_Above
3
+ 000049;000049;tr;Not_Before_Dot
4
+ 00004a;00004a;lt;More_Above
5
+ 000069;000130;az;
6
+ 000069;000130;tr;
7
+ 0000cc;0000cc;lt;
8
+ 0000cd;0000cd;lt;
9
+ 000128;000128;lt;
10
+ 00012e;00012e;lt;More_Above
11
+ 000130;000130;az;
12
+ 000130;000130;tr;
13
+ 000307;000307;az;After_I
14
+ 000307;000307;tr;After_I
15
+ 000307;;lt;After_Soft_Dotted
16
+ 0003a3;0003a3;;Final_Sigma
@@ -0,0 +1 @@
1
+ 00006100006200006300006400006500006600006700006800006900006a00006b00006c00006d00006e00006f00007000007100007200007300007400007500007600007700007800007900007a0000aa0000b50000ba0000df0000e00000e10000e20000e30000e40000e50000e60000e70000e80000e90000ea0000eb0000ec0000ed0000ee0000ef0000f00000f10000f20000f30000f40000f50000f60000f80000f90000fa0000fb0000fc0000fd0000fe0000ff00010100010300010500010700010900010b00010d00010f00011100011300011500011700011900011b00011d00011f00012100012300012500012700012900012b00012d00012f00013100013300013500013700013800013a00013c00013e00014000014200014400014600014800014900014b00014d00014f00015100015300015500015700015900015b00015d00015f00016100016300016500016700016900016b00016d00016f00017100017300017500017700017a00017c00017e00017f00018000018300018500018800018c00018d00019200019500019900019a00019b00019e0001a10001a30001a50001a80001aa0001ab0001ad0001b00001b40001b60001b90001ba0001bd0001be0001bf0001c60001c90001cc0001ce0001d00001d20001d40001d60001d80001da0001dc0001dd0001df0001e10001e30001e50001e70001e90001eb0001ed0001ef0001f00001f30001f50001f90001fb0001fd0001ff00020100020300020500020700020900020b00020d00020f00021100021300021500021700021900021b00021d00021f00022100022300022500022700022900022b00022d00022f00023100023300023400023500023600023700023800023900023c00023f00024000024200024700024900024b00024d00024f00025000025100025200025300025400025500025600025700025800025900025a00025b00025c00025d00025e00025f00026000026100026200026300026400026500026600026700026800026900026a00026b00026c00026d00026e00026f00027000027100027200027300027400027500027600027700027800027900027a00027b00027c00027d00027e00027f00028000028100028200028300028400028500028600028700028800028900028a00028b00028c00028d00028e00028f00029000029100029200029300029500029600029700029800029900029a00029b00029c00029d00029e00029f0002a00002a10002a20002a30002a40002a50002a60002a70002a80002a90002aa0002ab0002ac0002ad0002ae0002af0002b00002b10002b20002b30002b40002b50002b60002b70002b80002c00002c10002e00002e10002e20002e30002e400034500037100037300037700037a00037b00037c00037d0003900003ac0003ad0003ae0003af0003b00003b10003b20003b30003b40003b50003b60003b70003b80003b90003ba0003bb0003bc0003bd0003be0003bf0003c00003c10003c20003c30003c40003c50003c60003c70003c80003c90003ca0003cb0003cc0003cd0003ce0003d00003d10003d50003d60003d70003d90003db0003dd0003df0003e10003e30003e50003e70003e90003eb0003ed0003ef0003f00003f10003f20003f30003f50003f80003fb0003fc00043000043100043200043300043400043500043600043700043800043900043a00043b00043c00043d00043e00043f00044000044100044200044300044400044500044600044700044800044900044a00044b00044c00044d00044e00044f00045000045100045200045300045400045500045600045700045800045900045a00045b00045c00045d00045e00045f00046100046300046500046700046900046b00046d00046f00047100047300047500047700047900047b00047d00047f00048100048b00048d00048f00049100049300049500049700049900049b00049d00049f0004a10004a30004a50004a70004a90004ab0004ad0004af0004b10004b30004b50004b70004b90004bb0004bd0004bf0004c20004c40004c60004c80004ca0004cc0004ce0004cf0004d10004d30004d50004d70004d90004db0004dd0004df0004e10004e30004e50004e70004e90004eb0004ed0004ef0004f10004f30004f50004f70004f90004fb0004fd0004ff00050100050300050500050700050900050b00050d00050f00051100051300051500051700051900051b00051d00051f00052100052300056100056200056300056400056500056600056700056800056900056a00056b00056c00056d00056e00056f00057000057100057200057300057400057500057600057700057800057900057a00057b00057c00057d00057e00057f000580000581000582000583000584000585000586000587001d00001d01001d02001d03001d04001d05001d06001d07001d08001d09001d0a001d0b001d0c001d0d001d0e001d0f001d10001d11001d12001d13001d14001d15001d16001d17001d18001d19001d1a001d1b001d1c001d1d001d1e001d1f001d20001d21001d22001d23001d24001d25001d26001d27001d28001d29001d2a001d2b001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d6b001d6c001d6d001d6e001d6f001d70001d71001d72001d73001d74001d75001d76001d77001d78001d79001d7a001d7b001d7c001d7d001d7e001d7f001d80001d81001d82001d83001d84001d85001d86001d87001d88001d89001d8a001d8b001d8c001d8d001d8e001d8f001d90001d91001d92001d93001d94001d95001d96001d97001d98001d99001d9a001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001e01001e03001e05001e07001e09001e0b001e0d001e0f001e11001e13001e15001e17001e19001e1b001e1d001e1f001e21001e23001e25001e27001e29001e2b001e2d001e2f001e31001e33001e35001e37001e39001e3b001e3d001e3f001e41001e43001e45001e47001e49001e4b001e4d001e4f001e51001e53001e55001e57001e59001e5b001e5d001e5f001e61001e63001e65001e67001e69001e6b001e6d001e6f001e71001e73001e75001e77001e79001e7b001e7d001e7f001e81001e83001e85001e87001e89001e8b001e8d001e8f001e91001e93001e95001e96001e97001e98001e99001e9a001e9b001e9c001e9d001e9f001ea1001ea3001ea5001ea7001ea9001eab001ead001eaf001eb1001eb3001eb5001eb7001eb9001ebb001ebd001ebf001ec1001ec3001ec5001ec7001ec9001ecb001ecd001ecf001ed1001ed3001ed5001ed7001ed9001edb001edd001edf001ee1001ee3001ee5001ee7001ee9001eeb001eed001eef001ef1001ef3001ef5001ef7001ef9001efb001efd001eff001f00001f01001f02001f03001f04001f05001f06001f07001f10001f11001f12001f13001f14001f15001f20001f21001f22001f23001f24001f25001f26001f27001f30001f31001f32001f33001f34001f35001f36001f37001f40001f41001f42001f43001f44001f45001f50001f51001f52001f53001f54001f55001f56001f57001f60001f61001f62001f63001f64001f65001f66001f67001f70001f71001f72001f73001f74001f75001f76001f77001f78001f79001f7a001f7b001f7c001f7d001f80001f81001f82001f83001f84001f85001f86001f87001f90001f91001f92001f93001f94001f95001f96001f97001fa0001fa1001fa2001fa3001fa4001fa5001fa6001fa7001fb0001fb1001fb2001fb3001fb4001fb6001fb7001fbe001fc2001fc3001fc4001fc6001fc7001fd0001fd1001fd2001fd3001fd6001fd7001fe0001fe1001fe2001fe3001fe4001fe5001fe6001fe7001ff2001ff3001ff4001ff6001ff700207100207f00209000209100209200209300209400210a00210e00210f00211300212f00213400213900213c00213d00214600214700214800214900214e00217000217100217200217300217400217500217600217700217800217900217a00217b00217c00217d00217e00217f0021840024d00024d10024d20024d30024d40024d50024d60024d70024d80024d90024da0024db0024dc0024dd0024de0024df0024e00024e10024e20024e30024e40024e50024e60024e70024e80024e9002c30002c31002c32002c33002c34002c35002c36002c37002c38002c39002c3a002c3b002c3c002c3d002c3e002c3f002c40002c41002c42002c43002c44002c45002c46002c47002c48002c49002c4a002c4b002c4c002c4d002c4e002c4f002c50002c51002c52002c53002c54002c55002c56002c57002c58002c59002c5a002c5b002c5c002c5d002c5e002c61002c65002c66002c68002c6a002c6c002c71002c73002c74002c76002c77002c78002c79002c7a002c7b002c7c002c7d002c81002c83002c85002c87002c89002c8b002c8d002c8f002c91002c93002c95002c97002c99002c9b002c9d002c9f002ca1002ca3002ca5002ca7002ca9002cab002cad002caf002cb1002cb3002cb5002cb7002cb9002cbb002cbd002cbf002cc1002cc3002cc5002cc7002cc9002ccb002ccd002ccf002cd1002cd3002cd5002cd7002cd9002cdb002cdd002cdf002ce1002ce3002ce4002d00002d01002d02002d03002d04002d05002d06002d07002d08002d09002d0a002d0b002d0c002d0d002d0e002d0f002d10002d11002d12002d13002d14002d15002d16002d17002d18002d19002d1a002d1b002d1c002d1d002d1e002d1f002d20002d21002d22002d23002d24002d2500a64100a64300a64500a64700a64900a64b00a64d00a64f00a65100a65300a65500a65700a65900a65b00a65d00a65f00a66300a66500a66700a66900a66b00a66d00a68100a68300a68500a68700a68900a68b00a68d00a68f00a69100a69300a69500a69700a72300a72500a72700a72900a72b00a72d00a72f00a73000a73100a73300a73500a73700a73900a73b00a73d00a73f00a74100a74300a74500a74700a74900a74b00a74d00a74f00a75100a75300a75500a75700a75900a75b00a75d00a75f00a76100a76300a76500a76700a76900a76b00a76d00a76f00a77000a77100a77200a77300a77400a77500a77600a77700a77800a77a00a77c00a77f00a78100a78300a78500a78700a78c00fb0000fb0100fb0200fb0300fb0400fb0500fb0600fb1300fb1400fb1500fb1600fb1700ff4100ff4200ff4300ff4400ff4500ff4600ff4700ff4800ff4900ff4a00ff4b00ff4c00ff4d00ff4e00ff4f00ff5000ff5100ff5200ff5300ff5400ff5500ff5600ff5700ff5800ff5900ff5a01042801042901042a01042b01042c01042d01042e01042f01043001043101043201043301043401043501043601043701043801043901043a01043b01043c01043d01043e01043f01044001044101044201044301044401044501044601044701044801044901044a01044b01044c01044d01044e01044f01d41a01d41b01d41c01d41d01d41e01d41f01d42001d42101d42201d42301d42401d42501d42601d42701d42801d42901d42a01d42b01d42c01d42d01d42e01d42f01d43001d43101d43201d43301d44e01d44f01d45001d45101d45201d45301d45401d45601d45701d45801d45901d45a01d45b01d45c01d45d01d45e01d45f01d46001d46101d46201d46301d46401d46501d46601d46701d48201d48301d48401d48501d48601d48701d48801d48901d48a01d48b01d48c01d48d01d48e01d48f01d49001d49101d49201d49301d49401d49501d49601d49701d49801d49901d49a01d49b01d4b601d4b701d4b801d4b901d4bb01d4bd01d4be01d4bf01d4c001d4c101d4c201d4c301d4c501d4c601d4c701d4c801d4c901d4ca01d4cb01d4cc01d4cd01d4ce01d4cf01d4ea01d4eb01d4ec01d4ed01d4ee01d4ef01d4f001d4f101d4f201d4f301d4f401d4f501d4f601d4f701d4f801d4f901d4fa01d4fb01d4fc01d4fd01d4fe01d4ff01d50001d50101d50201d50301d51e01d51f01d52001d52101d52201d52301d52401d52501d52601d52701d52801d52901d52a01d52b01d52c01d52d01d52e01d52f01d53001d53101d53201d53301d53401d53501d53601d53701d55201d55301d55401d55501d55601d55701d55801d55901d55a01d55b01d55c01d55d01d55e01d55f01d56001d56101d56201d56301d56401d56501d56601d56701d56801d56901d56a01d56b01d58601d58701d58801d58901d58a01d58b01d58c01d58d01d58e01d58f01d59001d59101d59201d59301d59401d59501d59601d59701d59801d59901d59a01d59b01d59c01d59d01d59e01d59f01d5ba01d5bb01d5bc01d5bd01d5be01d5bf01d5c001d5c101d5c201d5c301d5c401d5c501d5c601d5c701d5c801d5c901d5ca01d5cb01d5cc01d5cd01d5ce01d5cf01d5d001d5d101d5d201d5d301d5ee01d5ef01d5f001d5f101d5f201d5f301d5f401d5f501d5f601d5f701d5f801d5f901d5fa01d5fb01d5fc01d5fd01d5fe01d5ff01d60001d60101d60201d60301d60401d60501d60601d60701d62201d62301d62401d62501d62601d62701d62801d62901d62a01d62b01d62c01d62d01d62e01d62f01d63001d63101d63201d63301d63401d63501d63601d63701d63801d63901d63a01d63b01d65601d65701d65801d65901d65a01d65b01d65c01d65d01d65e01d65f01d66001d66101d66201d66301d66401d66501d66601d66701d66801d66901d66a01d66b01d66c01d66d01d66e01d66f01d68a01d68b01d68c01d68d01d68e01d68f01d69001d69101d69201d69301d69401d69501d69601d69701d69801d69901d69a01d69b01d69c01d69d01d69e01d69f01d6a001d6a101d6a201d6a301d6a401d6a501d6c201d6c301d6c401d6c501d6c601d6c701d6c801d6c901d6ca01d6cb01d6cc01d6cd01d6ce01d6cf01d6d001d6d101d6d201d6d301d6d401d6d501d6d601d6d701d6d801d6d901d6da01d6dc01d6dd01d6de01d6df01d6e001d6e101d6fc01d6fd01d6fe01d6ff01d70001d70101d70201d70301d70401d70501d70601d70701d70801d70901d70a01d70b01d70c01d70d01d70e01d70f01d71001d71101d71201d71301d71401d71601d71701d71801d71901d71a01d71b01d73601d73701d73801d73901d73a01d73b01d73c01d73d01d73e01d73f01d74001d74101d74201d74301d74401d74501d74601d74701d74801d74901d74a01d74b01d74c01d74d01d74e01d75001d75101d75201d75301d75401d75501d77001d77101d77201d77301d77401d77501d77601d77701d77801d77901d77a01d77b01d77c01d77d01d77e01d77f01d78001d78101d78201d78301d78401d78501d78601d78701d78801d78a01d78b01d78c01d78d01d78e01d78f01d7aa01d7ab01d7ac01d7ad01d7ae01d7af01d7b001d7b101d7b201d7b301d7b401d7b501d7b601d7b701d7b801d7b901d7ba01d7bb01d7bc01d7bd01d7be01d7bf01d7c001d7c101d7c201d7c401d7c501d7c601d7c701d7c801d7c901d7cb
@@ -0,0 +1 @@
1
+ 00004100004200004300004400004500004600004700004800004900004a00004b00004c00004d00004e00004f00005000005100005200005300005400005500005600005700005800005900005a0000c00000c10000c20000c30000c40000c50000c60000c70000c80000c90000ca0000cb0000cc0000cd0000ce0000cf0000d00000d10000d20000d30000d40000d50000d60000d80000d90000da0000db0000dc0000dd0000de00010000010200010400010600010800010a00010c00010e00011000011200011400011600011800011a00011c00011e00012000012200012400012600012800012a00012c00012e00013000013200013400013600013900013b00013d00013f00014100014300014500014700014a00014c00014e00015000015200015400015600015800015a00015c00015e00016000016200016400016600016800016a00016c00016e00017000017200017400017600017800017900017b00017d00018100018200018400018600018700018900018a00018b00018e00018f00019000019100019300019400019600019700019800019c00019d00019f0001a00001a20001a40001a60001a70001a90001ac0001ae0001af0001b10001b20001b30001b50001b70001b80001bc0001c40001c70001ca0001cd0001cf0001d10001d30001d50001d70001d90001db0001de0001e00001e20001e40001e60001e80001ea0001ec0001ee0001f10001f40001f60001f70001f80001fa0001fc0001fe00020000020200020400020600020800020a00020c00020e00021000021200021400021600021800021a00021c00021e00022000022200022400022600022800022a00022c00022e00023000023200023a00023b00023d00023e00024100024300024400024500024600024800024a00024c00024e00037000037200037600038600038800038900038a00038c00038e00038f00039100039200039300039400039500039600039700039800039900039a00039b00039c00039d00039e00039f0003a00003a10003a30003a40003a50003a60003a70003a80003a90003aa0003ab0003cf0003d20003d30003d40003d80003da0003dc0003de0003e00003e20003e40003e60003e80003ea0003ec0003ee0003f40003f70003f90003fa0003fd0003fe0003ff00040000040100040200040300040400040500040600040700040800040900040a00040b00040c00040d00040e00040f00041000041100041200041300041400041500041600041700041800041900041a00041b00041c00041d00041e00041f00042000042100042200042300042400042500042600042700042800042900042a00042b00042c00042d00042e00042f00046000046200046400046600046800046a00046c00046e00047000047200047400047600047800047a00047c00047e00048000048a00048c00048e00049000049200049400049600049800049a00049c00049e0004a00004a20004a40004a60004a80004aa0004ac0004ae0004b00004b20004b40004b60004b80004ba0004bc0004be0004c00004c10004c30004c50004c70004c90004cb0004cd0004d00004d20004d40004d60004d80004da0004dc0004de0004e00004e20004e40004e60004e80004ea0004ec0004ee0004f00004f20004f40004f60004f80004fa0004fc0004fe00050000050200050400050600050800050a00050c00050e00051000051200051400051600051800051a00051c00051e00052000052200053100053200053300053400053500053600053700053800053900053a00053b00053c00053d00053e00053f00054000054100054200054300054400054500054600054700054800054900054a00054b00054c00054d00054e00054f0005500005510005520005530005540005550005560010a00010a10010a20010a30010a40010a50010a60010a70010a80010a90010aa0010ab0010ac0010ad0010ae0010af0010b00010b10010b20010b30010b40010b50010b60010b70010b80010b90010ba0010bb0010bc0010bd0010be0010bf0010c00010c10010c20010c30010c40010c5001e00001e02001e04001e06001e08001e0a001e0c001e0e001e10001e12001e14001e16001e18001e1a001e1c001e1e001e20001e22001e24001e26001e28001e2a001e2c001e2e001e30001e32001e34001e36001e38001e3a001e3c001e3e001e40001e42001e44001e46001e48001e4a001e4c001e4e001e50001e52001e54001e56001e58001e5a001e5c001e5e001e60001e62001e64001e66001e68001e6a001e6c001e6e001e70001e72001e74001e76001e78001e7a001e7c001e7e001e80001e82001e84001e86001e88001e8a001e8c001e8e001e90001e92001e94001e9e001ea0001ea2001ea4001ea6001ea8001eaa001eac001eae001eb0001eb2001eb4001eb6001eb8001eba001ebc001ebe001ec0001ec2001ec4001ec6001ec8001eca001ecc001ece001ed0001ed2001ed4001ed6001ed8001eda001edc001ede001ee0001ee2001ee4001ee6001ee8001eea001eec001eee001ef0001ef2001ef4001ef6001ef8001efa001efc001efe001f08001f09001f0a001f0b001f0c001f0d001f0e001f0f001f18001f19001f1a001f1b001f1c001f1d001f28001f29001f2a001f2b001f2c001f2d001f2e001f2f001f38001f39001f3a001f3b001f3c001f3d001f3e001f3f001f48001f49001f4a001f4b001f4c001f4d001f59001f5b001f5d001f5f001f68001f69001f6a001f6b001f6c001f6d001f6e001f6f001fb8001fb9001fba001fbb001fc8001fc9001fca001fcb001fd8001fd9001fda001fdb001fe8001fe9001fea001feb001fec001ff8001ff9001ffa001ffb00210200210700210b00210c00210d00211000211100211200211500211900211a00211b00211c00211d00212400212600212800212a00212b00212c00212d00213000213100213200213300213e00213f00214500216000216100216200216300216400216500216600216700216800216900216a00216b00216c00216d00216e00216f0021830024b60024b70024b80024b90024ba0024bb0024bc0024bd0024be0024bf0024c00024c10024c20024c30024c40024c50024c60024c70024c80024c90024ca0024cb0024cc0024cd0024ce0024cf002c00002c01002c02002c03002c04002c05002c06002c07002c08002c09002c0a002c0b002c0c002c0d002c0e002c0f002c10002c11002c12002c13002c14002c15002c16002c17002c18002c19002c1a002c1b002c1c002c1d002c1e002c1f002c20002c21002c22002c23002c24002c25002c26002c27002c28002c29002c2a002c2b002c2c002c2d002c2e002c60002c62002c63002c64002c67002c69002c6b002c6d002c6e002c6f002c72002c75002c80002c82002c84002c86002c88002c8a002c8c002c8e002c90002c92002c94002c96002c98002c9a002c9c002c9e002ca0002ca2002ca4002ca6002ca8002caa002cac002cae002cb0002cb2002cb4002cb6002cb8002cba002cbc002cbe002cc0002cc2002cc4002cc6002cc8002cca002ccc002cce002cd0002cd2002cd4002cd6002cd8002cda002cdc002cde002ce0002ce200a64000a64200a64400a64600a64800a64a00a64c00a64e00a65000a65200a65400a65600a65800a65a00a65c00a65e00a66200a66400a66600a66800a66a00a66c00a68000a68200a68400a68600a68800a68a00a68c00a68e00a69000a69200a69400a69600a72200a72400a72600a72800a72a00a72c00a72e00a73200a73400a73600a73800a73a00a73c00a73e00a74000a74200a74400a74600a74800a74a00a74c00a74e00a75000a75200a75400a75600a75800a75a00a75c00a75e00a76000a76200a76400a76600a76800a76a00a76c00a76e00a77900a77b00a77d00a77e00a78000a78200a78400a78600a78b00ff2100ff2200ff2300ff2400ff2500ff2600ff2700ff2800ff2900ff2a00ff2b00ff2c00ff2d00ff2e00ff2f00ff3000ff3100ff3200ff3300ff3400ff3500ff3600ff3700ff3800ff3900ff3a01040001040101040201040301040401040501040601040701040801040901040a01040b01040c01040d01040e01040f01041001041101041201041301041401041501041601041701041801041901041a01041b01041c01041d01041e01041f01042001042101042201042301042401042501042601042701d40001d40101d40201d40301d40401d40501d40601d40701d40801d40901d40a01d40b01d40c01d40d01d40e01d40f01d41001d41101d41201d41301d41401d41501d41601d41701d41801d41901d43401d43501d43601d43701d43801d43901d43a01d43b01d43c01d43d01d43e01d43f01d44001d44101d44201d44301d44401d44501d44601d44701d44801d44901d44a01d44b01d44c01d44d01d46801d46901d46a01d46b01d46c01d46d01d46e01d46f01d47001d47101d47201d47301d47401d47501d47601d47701d47801d47901d47a01d47b01d47c01d47d01d47e01d47f01d48001d48101d49c01d49e01d49f01d4a201d4a501d4a601d4a901d4aa01d4ab01d4ac01d4ae01d4af01d4b001d4b101d4b201d4b301d4b401d4b501d4d001d4d101d4d201d4d301d4d401d4d501d4d601d4d701d4d801d4d901d4da01d4db01d4dc01d4dd01d4de01d4df01d4e001d4e101d4e201d4e301d4e401d4e501d4e601d4e701d4e801d4e901d50401d50501d50701d50801d50901d50a01d50d01d50e01d50f01d51001d51101d51201d51301d51401d51601d51701d51801d51901d51a01d51b01d51c01d53801d53901d53b01d53c01d53d01d53e01d54001d54101d54201d54301d54401d54601d54a01d54b01d54c01d54d01d54e01d54f01d55001d56c01d56d01d56e01d56f01d57001d57101d57201d57301d57401d57501d57601d57701d57801d57901d57a01d57b01d57c01d57d01d57e01d57f01d58001d58101d58201d58301d58401d58501d5a001d5a101d5a201d5a301d5a401d5a501d5a601d5a701d5a801d5a901d5aa01d5ab01d5ac01d5ad01d5ae01d5af01d5b001d5b101d5b201d5b301d5b401d5b501d5b601d5b701d5b801d5b901d5d401d5d501d5d601d5d701d5d801d5d901d5da01d5db01d5dc01d5dd01d5de01d5df01d5e001d5e101d5e201d5e301d5e401d5e501d5e601d5e701d5e801d5e901d5ea01d5eb01d5ec01d5ed01d60801d60901d60a01d60b01d60c01d60d01d60e01d60f01d61001d61101d61201d61301d61401d61501d61601d61701d61801d61901d61a01d61b01d61c01d61d01d61e01d61f01d62001d62101d63c01d63d01d63e01d63f01d64001d64101d64201d64301d64401d64501d64601d64701d64801d64901d64a01d64b01d64c01d64d01d64e01d64f01d65001d65101d65201d65301d65401d65501d67001d67101d67201d67301d67401d67501d67601d67701d67801d67901d67a01d67b01d67c01d67d01d67e01d67f01d68001d68101d68201d68301d68401d68501d68601d68701d68801d68901d6a801d6a901d6aa01d6ab01d6ac01d6ad01d6ae01d6af01d6b001d6b101d6b201d6b301d6b401d6b501d6b601d6b701d6b801d6b901d6ba01d6bb01d6bc01d6bd01d6be01d6bf01d6c001d6e201d6e301d6e401d6e501d6e601d6e701d6e801d6e901d6ea01d6eb01d6ec01d6ed01d6ee01d6ef01d6f001d6f101d6f201d6f301d6f401d6f501d6f601d6f701d6f801d6f901d6fa01d71c01d71d01d71e01d71f01d72001d72101d72201d72301d72401d72501d72601d72701d72801d72901d72a01d72b01d72c01d72d01d72e01d72f01d73001d73101d73201d73301d73401d75601d75701d75801d75901d75a01d75b01d75c01d75d01d75e01d75f01d76001d76101d76201d76301d76401d76501d76601d76701d76801d76901d76a01d76b01d76c01d76d01d76e01d79001d79101d79201d79301d79401d79501d79601d79701d79801d79901d79a01d79b01d79c01d79d01d79e01d79f01d7a001d7a101d7a201d7a301d7a401d7a501d7a601d7a701d7a801d7ca
@@ -0,0 +1 @@
1
+ 00006900006a00012f00024900026800029d0002b20003f3000456000458001d62001d96001da4001da8001e2d001ecb002071002148002149002c7c01d42201d42301d45601d45701d48a01d48b01d4be01d4bf01d4f201d4f301d52601d52701d55a01d55b01d58e01d58f01d5c201d5c301d5f601d5f701d62a01d62b01d65e01d65f01d69201d693
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/version"
4
4
  require "unicode_utils/name"
@@ -6,3 +6,10 @@ require "unicode_utils/simple_upcase"
6
6
  require "unicode_utils/simple_downcase"
7
7
  require "unicode_utils/upcase"
8
8
  require "unicode_utils/downcase"
9
+ require "unicode_utils/titlecase_char_q"
10
+ require "unicode_utils/lowercase_char_q"
11
+ require "unicode_utils/uppercase_char_q"
12
+ require "unicode_utils/cased_char_q"
13
+ require "unicode_utils/case_ignorable_char_q"
14
+ require "unicode_utils/soft_dotted_char_q"
15
+ require "unicode_utils/combining_class"
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ CASE_IGNORABLE_SET = Impl.read_codepoint_set("case_ignorable_set") # :nodoc:
8
+
9
+ # Returns true if the given character is case-ignorable as defined
10
+ # by Unicode 5.0, section 3.13.
11
+ def case_ignorable_char?(char)
12
+ CASE_IGNORABLE_SET.include?(char.ord)
13
+ end
14
+ module_function :case_ignorable_char?
15
+
16
+ end
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/lowercase_char_q"
4
+ require "unicode_utils/uppercase_char_q"
5
+ require "unicode_utils/titlecase_char_q"
6
+
7
+ module UnicodeUtils
8
+
9
+ # A cased char is a character that has the Unicode property
10
+ # Lowercase or Uppercase or the general category Titlecase_Letter.
11
+ #
12
+ # See also: lowercase_char?, uppercase_char?, titlecase_char?
13
+ def cased_char?(char)
14
+ lowercase_char?(char) || uppercase_char?(char) || titlecase_char?(char)
15
+ end
16
+ module_function :cased_char?
17
+
18
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ module Impl # :nodoc:
6
+
7
+ def self.read_combining_class_map
8
+ path = File.join(File.dirname(__FILE__),
9
+ "..", "..", "cdata", "combining_class_map")
10
+ Hash.new.tap { |map|
11
+ File.open(path, "r:US-ASCII:-") do |input|
12
+ buffer = "x" * 6
13
+ buffer.force_encoding(Encoding::US_ASCII)
14
+ cc_buffer = "x" * 2
15
+ cc_buffer.force_encoding(Encoding::US_ASCII)
16
+ while input.read(6, buffer)
17
+ map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16)
18
+ end
19
+ end
20
+ }
21
+ end
22
+
23
+ end
24
+
25
+ COMBINING_CLASS_MAP = Impl.read_combining_class_map # :nodoc:
26
+
27
+ # Get the combining class of the given character as an integer in
28
+ # the range 0..255.
29
+ def combining_class(char)
30
+ COMBINING_CLASS_MAP[char.ord] || 0
31
+ end
32
+ module_function :combining_class
33
+
34
+ end
@@ -0,0 +1,164 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/cased_char_q"
4
+ require "unicode_utils/case_ignorable_char_q"
5
+ require "unicode_utils/soft_dotted_char_q"
6
+ require "unicode_utils/combining_class"
7
+
8
+ module UnicodeUtils
9
+
10
+ module Impl # :nodoc:
11
+
12
+ class ConditionalCasing # :nodoc:
13
+
14
+ attr_reader :mapping
15
+
16
+ def initialize(mapping)
17
+ @mapping = mapping
18
+ end
19
+
20
+ def context_match?(str, pos)
21
+ true
22
+ end
23
+
24
+ end
25
+
26
+ class BeforeDotConditionalCasing < ConditionalCasing # :nodoc:
27
+
28
+ def context_match?(str, pos)
29
+ (pos + 1).upto(str.length - 1) { |i|
30
+ c = str[i]
31
+ return true if c.ord == 0x0307
32
+ cc = UnicodeUtils.combining_class(c)
33
+ return false if cc == 0 || cc == 230
34
+ }
35
+ false # "combining dot above" not found
36
+ end
37
+
38
+ end
39
+
40
+ class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing # :nodoc:
41
+
42
+ def context_match?(str, pos)
43
+ !super
44
+ end
45
+
46
+ end
47
+
48
+ class MoreAboveConditionalCasing < ConditionalCasing # :nodoc:
49
+
50
+ def context_match?(str, pos)
51
+ (pos + 1).upto(str.length - 1) { |i|
52
+ c = str[i]
53
+ cc = UnicodeUtils.combining_class(c)
54
+ return true if cc == 230
55
+ return false if cc == 0
56
+ }
57
+ false
58
+ end
59
+
60
+ end
61
+
62
+ class AfterIConditionalCasing < ConditionalCasing # :nodoc:
63
+
64
+ def context_match?(str, pos)
65
+ (pos - 1).downto(0) { |i|
66
+ c = str[i]
67
+ return true if c.ord == 0x49 # uppercase I
68
+ cc = UnicodeUtils.combining_class(c)
69
+ return false if cc == 0 || cc == 230
70
+ }
71
+ false # uppercase I not found
72
+ end
73
+
74
+ end
75
+
76
+ class AfterSoftDottedConditionalCasing < ConditionalCasing # :nodoc:
77
+
78
+ def context_match?(str, pos)
79
+ (pos - 1).downto(0) { |i|
80
+ c = str[i]
81
+ return true if UnicodeUtils.soft_dotted_char?(c)
82
+ cc = UnicodeUtils.combining_class(c)
83
+ return false if cc == 0 || cc == 230
84
+ }
85
+ false
86
+ end
87
+
88
+ end
89
+
90
+ class FinalSigmaConditionalCasing < ConditionalCasing # :nodoc:
91
+
92
+ def context_match?(str, pos)
93
+ before_match?(str, pos) && !after_match?(str, pos)
94
+ end
95
+
96
+ private
97
+
98
+ def before_match?(str, pos)
99
+ (pos - 1).downto(0) { |i|
100
+ c = str[i]
101
+ return true if UnicodeUtils.cased_char?(c)
102
+ return false unless UnicodeUtils.case_ignorable_char?(c)
103
+ }
104
+ false # no cased char
105
+ end
106
+
107
+ def after_match?(str, pos)
108
+ (pos + 1).upto(str.length - 1) { |i|
109
+ c = str[i]
110
+ return true if UnicodeUtils.cased_char?(c)
111
+ return false unless UnicodeUtils.case_ignorable_char?(c)
112
+ }
113
+ false
114
+ end
115
+
116
+ end
117
+
118
+ def self.read_conditional_casings(filename)
119
+ path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
120
+ Hash.new.tap { |cp_map|
121
+ File.open(path, "r:US-ASCII:-") do |input|
122
+ input.each_line { |line|
123
+ line.chomp!
124
+ record = line.split(";")
125
+ cp = record[0].to_i(16)
126
+ mapping = record[1].split(",").map { |c| c.to_i(16) }
127
+ language_id = record[2].empty? ? nil : record[2].to_sym
128
+ context = record[3] && record[3].gsub('_', '')
129
+ casing = Impl.const_get("#{context}ConditionalCasing").new(mapping)
130
+ (cp_map[cp] ||= {})[language_id] = casing
131
+ }
132
+ end
133
+ }
134
+ end
135
+
136
+ CONDITIONAL_UPCASE_MAP =
137
+ read_conditional_casings("cond_uc_map")
138
+
139
+ CONDITIONAL_DOWNCASE_MAP =
140
+ read_conditional_casings("cond_lc_map")
141
+
142
+ def self.conditional_upcase_mapping(cp, str, pos, language_id)
143
+ lang_map = CONDITIONAL_UPCASE_MAP[cp]
144
+ if lang_map
145
+ casing = lang_map[language_id] || lang_map[nil]
146
+ if casing && casing.context_match?(str, pos)
147
+ casing.mapping
148
+ end
149
+ end
150
+ end
151
+
152
+ def self.conditional_downcase_mapping(cp, str, pos, language_id)
153
+ lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
154
+ if lang_map
155
+ casing = lang_map[language_id] || lang_map[nil]
156
+ if casing && casing.context_match?(str, pos)
157
+ casing.mapping
158
+ end
159
+ end
160
+ end
161
+
162
+ end
163
+
164
+ end
@@ -1,34 +1,40 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/simple_downcase"
4
4
  require "unicode_utils/read_special_casing_map"
5
+ require "unicode_utils/conditional_casing"
5
6
 
6
7
  module UnicodeUtils
7
8
 
8
- SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map")
9
+ SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map") # :nodoc:
9
10
 
10
11
  # Perform a full case-conversion of +str+ to lowercase according to
11
12
  # the Unicode standard.
12
13
  #
13
- # Examples:
14
+ # Some conversion rules are language dependent, these are in effect
15
+ # when a non-nil +language_id+ is given. If non-nil, the
16
+ # +language_id+ must be a two letter language code as defined in BCP
17
+ # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18
+ # language doesn't have a two letter code, the three letter code is
19
+ # to be used.
14
20
  #
15
- # UnicodeUtils.downcase "ᾈ" => "ᾀ"
21
+ # Examples:
16
22
  #
17
- # Note: The current implementation ignores the +language_id+
18
- # argument and doesn't deal with language and context specific
19
- # cases. This affects text in the languages Lithuanian, Turkish and
20
- # Azeri and the greek letter sigma in a special position. A future
21
- # version of UnicodeUtils will fix this. All other languages are
22
- # fully supported according to the Unicode standard.
23
+ # UnicodeUtils.downcase("ᾈ") => "ᾀ"
24
+ # UnicodeUtils.downcase("aBI\u{307}", :tr) => "abi"
23
25
  def downcase(str, language_id = nil)
24
26
  String.new.force_encoding(str.encoding).tap { |res|
27
+ pos = 0
25
28
  str.each_codepoint { |cp|
26
- special_mapping = SPECIAL_DOWNCASE_MAP[cp]
29
+ special_mapping =
30
+ Impl.conditional_downcase_mapping(cp, str, pos, language_id) ||
31
+ SPECIAL_DOWNCASE_MAP[cp]
27
32
  if special_mapping
28
33
  special_mapping.each { |m| res << m }
29
34
  else
30
35
  res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
31
36
  end
37
+ pos += 1
32
38
  }
33
39
  }
34
40
  end
@@ -0,0 +1,15 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ PROP_LOWERCASE_SET = Impl.read_codepoint_set("prop_set_lowercase") # :nodoc:
8
+
9
+ # True if the given character has the Unicode property Lowercase.
10
+ def lowercase_char?(char)
11
+ PROP_LOWERCASE_SET.include?(char.ord)
12
+ end
13
+ module_function :lowercase_char?
14
+
15
+ end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_names
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", "names")
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
14
15
  end
@@ -18,7 +19,7 @@ module UnicodeUtils
18
19
 
19
20
  end
20
21
 
21
- NAME_MAP = Impl.read_names
22
+ NAME_MAP = Impl.read_names # :nodoc:
22
23
 
23
24
  # Get the Unicode name of the single codepoint in str.
24
25
  #
@@ -26,7 +27,7 @@ module UnicodeUtils
26
27
  #
27
28
  # UnicodeUtils.name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
28
29
  def name(str)
29
- NAME_MAP[str.codepoints.first] # .encode
30
+ NAME_MAP[str.codepoints.first]
30
31
  end
31
32
  module_function :name
32
33
 
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_codepoint_map(filename)
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
14
15
  end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ module Impl # :nodoc:
6
+
7
+ def self.read_codepoint_set(filename)
8
+ path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
+ Hash.new.tap { |set|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
+ buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
13
+ while input.read(6, buffer)
14
+ set[buffer.to_i(16)] = true
15
+ end
16
+ end
17
+ }
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_special_casing_map(filename)
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  cp = buffer.to_i(16)
14
15
  mapping = []
@@ -1,10 +1,10 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/read_codepoint_map"
4
4
 
5
5
  module UnicodeUtils
6
6
 
7
- SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map")
7
+ SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map") # :nodoc:
8
8
 
9
9
  # Map each codepoint in +str+ that has a single codepoint
10
10
  # lowercase-mapping to that lowercase mapping. +str+ is assumed to be
@@ -1,10 +1,10 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/read_codepoint_map"
4
4
 
5
5
  module UnicodeUtils
6
6
 
7
- SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map")
7
+ SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map") # :nodoc:
8
8
 
9
9
  # Map each codepoint in +str+ that has a single codepoint
10
10
  # uppercase-mapping to that uppercase mapping. +str+ is assumed to be
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ SOFT_DOTTED_SET = Impl.read_codepoint_set("soft_dotted_set") # :nodoc:
8
+
9
+ # Returns true if the given character has the Unicode property
10
+ # Soft_Dotted.
11
+ def soft_dotted_char?(char)
12
+ SOFT_DOTTED_SET.include?(char.ord)
13
+ end
14
+ module_function :soft_dotted_char?
15
+
16
+ end
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ TITLECASE_LETTER_SET = Impl.read_codepoint_set("cat_set_titlecase") # :nodoc:
8
+
9
+ # True if the given character has the General_Category
10
+ # Titlecase_Letter (Lt).
11
+ def titlecase_char?(char)
12
+ TITLECASE_LETTER_SET.include?(char.ord)
13
+ end
14
+ module_function :titlecase_char?
15
+
16
+ end
@@ -1,33 +1,41 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/simple_upcase"
4
4
  require "unicode_utils/read_special_casing_map"
5
+ require "unicode_utils/conditional_casing"
5
6
 
6
7
  module UnicodeUtils
7
8
 
8
- SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map")
9
+ SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map") # :nodoc:
9
10
 
10
11
  # Perform a full case-conversion of +str+ to uppercase according to
11
12
  # the Unicode standard.
12
13
  #
13
- # Examples:
14
+ # Some conversion rules are language dependent, these are in effect
15
+ # when a non-nil +language_id+ is given. If non-nil, the
16
+ # +language_id+ must be a two letter language code as defined in BCP
17
+ # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18
+ # language doesn't have a two letter code, the three letter code is
19
+ # to be used.
14
20
  #
15
- # UnicodeUtils.upcase "weiß" => "WEISS"
21
+ # Examples:
16
22
  #
17
- # Note: The current implementation ignores the +language_id+
18
- # argument and doesn't deal with language and context specific
19
- # cases. This affects text in the languages Lithuanian, Turkish and
20
- # Azeri. A future version of UnicodeUtils will fix this. All other
21
- # languages are fully supported according to the Unicode standard.
23
+ # UnicodeUtils.upcase("weiß") => "WEISS"
24
+ # UnicodeUtils.upcase("i", :en) => "I"
25
+ # UnicodeUtils.upcase("i", :tr) => "İ"
22
26
  def upcase(str, language_id = nil)
23
27
  String.new.force_encoding(str.encoding).tap { |res|
28
+ pos = 0
24
29
  str.each_codepoint { |cp|
25
- special_mapping = SPECIAL_UPCASE_MAP[cp]
30
+ special_mapping =
31
+ Impl.conditional_upcase_mapping(cp, str, pos, language_id) ||
32
+ SPECIAL_UPCASE_MAP[cp]
26
33
  if special_mapping
27
34
  special_mapping.each { |m| res << m }
28
35
  else
29
36
  res << (SIMPLE_UPCASE_MAP[cp] || cp)
30
37
  end
38
+ pos += 1
31
39
  }
32
40
  }
33
41
  end
@@ -0,0 +1,15 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ PROP_UPPERCASE_SET = Impl.read_codepoint_set("prop_set_uppercase") # :nodoc:
8
+
9
+ # True if the given character has the Unicode property Uppercase.
10
+ def uppercase_char?(char)
11
+ PROP_UPPERCASE_SET.include?(char.ord)
12
+ end
13
+ module_function :uppercase_char?
14
+
15
+ end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
5
5
  # Corresponds to the unicode_utils gem version.
6
- VERSION = "0.1.0"
6
+ VERSION = "0.2.0"
7
7
 
8
8
  end
@@ -8,6 +8,7 @@ class TestUnicodeUtils < Test::Unit::TestCase
8
8
 
9
9
  def test_name
10
10
  assert_equal "LATIN SMALL LETTER F", UnicodeUtils.name("f")
11
+ assert_equal Encoding::US_ASCII, UnicodeUtils.name("f").encoding
11
12
  end
12
13
 
13
14
  def test_simple_upcase
@@ -22,11 +23,85 @@ class TestUnicodeUtils < Test::Unit::TestCase
22
23
 
23
24
  def test_upcase
24
25
  assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123")
26
+ assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123", :de)
27
+ assert_equal "I", UnicodeUtils.upcase("i")
28
+ assert_equal "I", UnicodeUtils.upcase("i", :de)
29
+ assert_equal "\u{130}", UnicodeUtils.upcase("i", :tr)
30
+ assert_equal "\u{130}", UnicodeUtils.upcase("i", :az)
31
+ assert_equal "ABI\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
32
+ UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}")
33
+ assert_equal "AB\u{130}\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
34
+ UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}", :az)
35
+ assert_equal "I\u{307}", UnicodeUtils.upcase("i\u{307}")
36
+ assert_equal "I", UnicodeUtils.upcase("i\u{307}", :lt)
25
37
  end
26
38
 
27
39
  def test_downcase
28
40
  # LATIN CAPITAL LETTER I WITH DOT ABOVE
29
41
  assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130")
42
+ assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130", :de)
43
+ assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :tr)
44
+ assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :az)
45
+ assert_equal "ab\u{131}\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
46
+ UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}", :tr)
47
+ # tests After_I and Not_Before_Dot
48
+ assert_equal "abi", UnicodeUtils.downcase("aBI\u{307}", :tr)
49
+ assert_equal "ia\u{300}", UnicodeUtils.downcase("Ia\u{300}", :lt)
50
+ # this is probably unrealistic, because I don't understand a word Lithuanian
51
+ assert_equal "i\u{307}\u{300}", UnicodeUtils.downcase("I\u{300}", :lt)
52
+ end
53
+
54
+ def test_downcase_final_sigma
55
+ assert_equal "abi\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
56
+ UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}")
57
+ end
58
+
59
+ def test_titlecase?
60
+ assert_equal true, UnicodeUtils.titlecase_char?("\u{01F2}")
61
+ assert_equal false, UnicodeUtils.titlecase_char?("\u{0041}")
62
+ end
63
+
64
+ def test_lowercase_char?
65
+ assert_equal true, UnicodeUtils.lowercase_char?("c")
66
+ assert_equal true, UnicodeUtils.lowercase_char?("ö")
67
+ assert_equal false, UnicodeUtils.lowercase_char?("C")
68
+ assert_equal false, UnicodeUtils.lowercase_char?("2")
69
+ end
70
+
71
+ def test_uppercase_char?
72
+ assert_equal true, UnicodeUtils.uppercase_char?("C")
73
+ assert_equal true, UnicodeUtils.uppercase_char?("Ö")
74
+ assert_equal false, UnicodeUtils.uppercase_char?("2")
75
+ assert_equal false, UnicodeUtils.uppercase_char?("c")
76
+ end
77
+
78
+ def test_cased_char?
79
+ assert_equal true, UnicodeUtils.cased_char?("a")
80
+ assert_equal true, UnicodeUtils.cased_char?("Ä")
81
+ assert_equal true, UnicodeUtils.cased_char?("ß")
82
+ assert_equal false, UnicodeUtils.cased_char?("2")
83
+ end
84
+
85
+ def test_case_ignorable_char?
86
+ assert_equal true, UnicodeUtils.case_ignorable_char?(":")
87
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{302}")
88
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{20dd}")
89
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{600}")
90
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2b0}")
91
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2c2}")
92
+ assert_equal false, UnicodeUtils.case_ignorable_char?("a")
93
+ assert_equal false, UnicodeUtils.case_ignorable_char?("1")
94
+ end
95
+
96
+ def test_combining_class
97
+ assert_equal 0, UnicodeUtils.combining_class("a")
98
+ assert_equal 230, UnicodeUtils.combining_class("\u{1b6e}")
99
+ end
100
+
101
+ def test_soft_dotted_char?
102
+ assert_equal true, UnicodeUtils.soft_dotted_char?("j")
103
+ assert_equal true, UnicodeUtils.soft_dotted_char?("\u{2c7c}")
104
+ assert_equal false, UnicodeUtils.soft_dotted_char?("a")
30
105
  end
31
106
 
32
107
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stefan Lang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-10-27 00:00:00 +01:00
12
+ date: 2008-11-02 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -23,19 +23,36 @@ extra_rdoc_files:
23
23
  - README.txt
24
24
  files:
25
25
  - lib/unicode_utils/read_special_casing_map.rb
26
+ - lib/unicode_utils/conditional_casing.rb
26
27
  - lib/unicode_utils/simple_downcase.rb
27
28
  - lib/unicode_utils/read_codepoint_map.rb
29
+ - lib/unicode_utils/read_codepoint_set.rb
30
+ - lib/unicode_utils/titlecase_char_q.rb
31
+ - lib/unicode_utils/cased_char_q.rb
28
32
  - lib/unicode_utils/downcase.rb
29
33
  - lib/unicode_utils/name.rb
34
+ - lib/unicode_utils/uppercase_char_q.rb
30
35
  - lib/unicode_utils/upcase.rb
36
+ - lib/unicode_utils/case_ignorable_char_q.rb
31
37
  - lib/unicode_utils/simple_upcase.rb
38
+ - lib/unicode_utils/lowercase_char_q.rb
39
+ - lib/unicode_utils/combining_class.rb
32
40
  - lib/unicode_utils/version.rb
41
+ - lib/unicode_utils/soft_dotted_char_q.rb
33
42
  - lib/unicode_utils.rb
43
+ - cdata/combining_class_map
44
+ - cdata/cond_lc_map
45
+ - cdata/prop_set_lowercase
46
+ - cdata/cat_set_titlecase
34
47
  - cdata/special_lc_map
35
48
  - cdata/names
49
+ - cdata/cond_uc_map
36
50
  - cdata/special_uc_map
51
+ - cdata/soft_dotted_set
37
52
  - cdata/simple_lc_map
53
+ - cdata/case_ignorable_set
38
54
  - cdata/simple_uc_map
55
+ - cdata/prop_set_uppercase
39
56
  - test/test_unicode_utils.rb
40
57
  - README.txt
41
58
  - LICENSE.txt