unicode_utils 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt CHANGED
@@ -10,17 +10,21 @@ Install with RubyGems:
10
10
 
11
11
  Or get the source from Github: http://github.com/lang/unicode_utils
12
12
 
13
- Currently Unicode Utils works only with recent builds of Ruby 1.9.
13
+ UnicodeUtils works with Ruby 1.9.1-preview1 or later. Though a bug
14
+ (http://redmine.ruby-lang.org/issues/show/692) in
15
+ 1.9.1-preview1 prevents UnicodeUtils from loading when
16
+ Encoding.default_internal is set (e.g. with -U or -E).
14
17
 
15
18
  == Synopsis
16
19
 
17
20
  require "unicode_utils"
18
21
 
19
- UnicodeUtils.name "æ" => "LATIN SMALL LETTER AE"
22
+ UnicodeUtils.name("æ") => "LATIN SMALL LETTER AE"
20
23
 
21
- UnicodeUtils.upcase "Straße" => "STRASSE"
24
+ UnicodeUtils.upcase("Straße") => "STRASSE"
25
+ UnicodeUtils.upcase("i", :tr) => "İ"
22
26
 
23
- UnicodeUtils.downcase "Ümit" => "ümit"
27
+ UnicodeUtils.downcase("Ümit") => "ümit"
24
28
 
25
29
  Start with the UnicodeUtils module in the API documentation for
26
30
  complete documentation.
@@ -30,7 +34,9 @@ at require time, the library is split up into separate files for
30
34
  each function. The +unicode_utils+ library loads them all. If you
31
35
  need only a specific function, e.g. +upcase+, you can require only
32
36
  the file <tt>unicode_utils/upcase</tt> to save memory and reduce
33
- startup time.
37
+ startup time. Methods that end in a ? are in a file suffixed with
38
+ +_q+, e.g. <tt>lowercase_char?</tt> can be required with
39
+ <tt>unicode_utils/lowercase_char_q</tt>.
34
40
 
35
41
  == License
36
42
 
@@ -0,0 +1 @@
1
+ 
@@ -0,0 +1 @@
1
+ 0001c50001c80001cb0001f2001f88001f89001f8a001f8b001f8c001f8d001f8e001f8f001f98001f99001f9a001f9b001f9c001f9d001f9e001f9f001fa8001fa9001faa001fab001fac001fad001fae001faf001fbc001fcc001ffc
@@ -0,0 +1 @@
1
+ 00033401000335010003360100033701000338010020d2010020d3010020d8010020d9010020da010020e5010020e6010020ea010020eb01010a390101d1670101d1680101d1690100093c070009bc07000a3c07000abc07000b3c07000cbc0700103707001b3407001c37070030990800309a0800094d090009cd09000a4d09000acd09000b4d09000bcd09000c4d09000ccd09000d4d09000dca09000e3a09000f84090010390900103a0900171409001734090017d209001b4409001baa0900a8060900a8c40900a95309010a3f090005b00a0005b10b0005b20c0005b30d0005b40e0005b50f0005b6100005b7110005b8120005c7120005b9130005ba130005bb140005bc150005bd160005bf170005c1180005c21900fb1e1a00064b1b00064c1c00064d1d0006181e00064e1e0006191f00064f1f00061a200006502000065121000652220006702300071124000c5554000c565b000e3867000e3967000e486b000e496b000e4a6b000e4b6b000eb876000eb976000ec87a000ec97a000eca7a000ecb7a000f7181000f7282000f7a82000f7b82000f7c82000f7d82000f8082000f7484000321ca000322ca000327ca000328ca001dd0ca001dced600031bd8000f39d801d165d801d166d801d16ed801d16fd801d170d801d171d801d172d800302ada000316dc000317dc000318dc000319dc00031cdc00031ddc00031edc00031fdc000320dc000323dc000324dc000325dc000326dc000329dc00032adc00032bdc00032cdc00032ddc00032edc00032fdc000330dc000331dc000332dc000333dc000339dc00033adc00033bdc00033cdc000347dc000348dc000349dc00034ddc00034edc000353dc000354dc000355dc000356dc000359dc00035adc000591dc000596dc00059bdc0005a2dc0005a3dc0005a4dc0005a5dc0005a6dc0005a7dc0005aadc0005c5dc000655dc000656dc00065cdc0006e3dc0006eadc0006eddc000731dc000734dc000737dc000738dc000739dc00073bdc00073cdc00073edc000742dc000744dc000746dc000748dc0007f2dc000952dc000f18dc000f19dc000f35dc000f37dc000fc6dc00108ddc00193bdc001a18dc001b6cdc001dc2dc001dcadc001dcfdc001dffdc0020e8dc0020ecdc0020eddc0020eedc0020efdc00a92bdc00a92cdc00a92ddc0101fddc010a0ddc010a3adc01d17bdc01d17cdc01d17ddc01d17edc01d17fdc01d180dc01d181dc01d182dc01d18adc01d18bdc00059ade0005adde001939de00302dde00302ee000302fe001d16de20005aee40018a9e400302be4000300e6000301e6000302e6000303e6000304e6000305e6000306e6000307e6000308e6000309e600030ae600030be600030ce600030de600030ee600030fe6000310e6000311e6000312e6000313e6000314e600033de600033ee600033fe6000340e6000341e6000342e6000343e6000344e6000346e600034ae600034be600034ce6000350e6000351e6000352e6000357e600035be6000363e6000364e6000365e6000366e6000367e6000368e6000369e600036ae600036be600036ce600036de600036ee600036fe6000483e6000484e6000485e6000486e6000487e6000592e6000593e6000594e6000595e6000597e6000598e6000599e600059ce600059de600059ee600059fe60005a0e60005a1e60005a8e60005a9e60005abe60005ace60005afe60005c4e6000610e6000611e6000612e6000613e6000614e6000615e6000616e6000617e6000653e6000654e6000657e6000658e6000659e600065ae600065be600065de600065ee60006d6e60006d7e60006d8e60006d9e60006dae60006dbe60006dce60006dfe60006e0e60006e1e60006e2e60006e4e60006e7e60006e8e60006ebe60006ece6000730e6000732e6000733e6000735e6000736e600073ae600073de600073fe6000740e6000741e6000743e6000745e6000747e6000749e600074ae60007ebe60007ece60007ede60007eee60007efe60007f0e60007f1e60007f3e6000951e6000953e6000954e6000f82e6000f83e6000f86e6000f87e600135fe60017dde600193ae6001a17e6001b6be6001b6de6001b6ee6001b6fe6001b70e6001b71e6001b72e6001b73e6001dc0e6001dc1e6001dc3e6001dc4e6001dc5e6001dc6e6001dc7e6001dc8e6001dc9e6001dcbe6001dcce6001dd1e6001dd2e6001dd3e6001dd4e6001dd5e6001dd6e6001dd7e6001dd8e6001dd9e6001ddae6001ddbe6001ddce6001ddde6001ddee6001ddfe6001de0e6001de1e6001de2e6001de3e6001de4e6001de5e6001de6e6001dfee60020d0e60020d1e60020d4e60020d5e60020d6e60020d7e60020dbe60020dce60020e1e60020e7e60020e9e60020f0e6002de0e6002de1e6002de2e6002de3e6002de4e6002de5e6002de6e6002de7e6002de8e6002de9e6002deae6002debe6002dece6002dede6002deee6002defe6002df0e6002df1e6002df2e6002df3e6002df4e6002df5e6002df6e6002df7e6002df8e6002df9e6002dfae6002dfbe6002dfce6002dfde6002dfee6002dffe600a66fe600a67ce600a67de600fe20e600fe21e600fe22e600fe23e600fe24e600fe25e600fe26e6010a0fe6010a38e601d185e601d186e601d187e601d188e601d189e601d1aae601d1abe601d1ace601d1ade601d242e601d243e601d244e6000315e800031ae8000358e800302ce800035ce900035fe9000362e900035dea00035eea000360ea000361ea001dcdea000345f0
@@ -0,0 +1,16 @@
1
+ 000049;000069,000307;lt;More_Above
2
+ 000049;000131;az;Not_Before_Dot
3
+ 000049;000131;tr;Not_Before_Dot
4
+ 00004a;00006a,000307;lt;More_Above
5
+ 000069;000069;az;
6
+ 000069;000069;tr;
7
+ 0000cc;000069,000307,000300;lt;
8
+ 0000cd;000069,000307,000301;lt;
9
+ 000128;000069,000307,000303;lt;
10
+ 00012e;00012f,000307;lt;More_Above
11
+ 000130;000069;az;
12
+ 000130;000069;tr;
13
+ 000307;000307;lt;After_Soft_Dotted
14
+ 000307;;az;After_I
15
+ 000307;;tr;After_I
16
+ 0003a3;0003c2;;Final_Sigma
@@ -0,0 +1,16 @@
1
+ 000049;000049;az;Not_Before_Dot
2
+ 000049;000049;lt;More_Above
3
+ 000049;000049;tr;Not_Before_Dot
4
+ 00004a;00004a;lt;More_Above
5
+ 000069;000130;az;
6
+ 000069;000130;tr;
7
+ 0000cc;0000cc;lt;
8
+ 0000cd;0000cd;lt;
9
+ 000128;000128;lt;
10
+ 00012e;00012e;lt;More_Above
11
+ 000130;000130;az;
12
+ 000130;000130;tr;
13
+ 000307;000307;az;After_I
14
+ 000307;000307;tr;After_I
15
+ 000307;;lt;After_Soft_Dotted
16
+ 0003a3;0003a3;;Final_Sigma
@@ -0,0 +1 @@
1
+ 00006100006200006300006400006500006600006700006800006900006a00006b00006c00006d00006e00006f00007000007100007200007300007400007500007600007700007800007900007a0000aa0000b50000ba0000df0000e00000e10000e20000e30000e40000e50000e60000e70000e80000e90000ea0000eb0000ec0000ed0000ee0000ef0000f00000f10000f20000f30000f40000f50000f60000f80000f90000fa0000fb0000fc0000fd0000fe0000ff00010100010300010500010700010900010b00010d00010f00011100011300011500011700011900011b00011d00011f00012100012300012500012700012900012b00012d00012f00013100013300013500013700013800013a00013c00013e00014000014200014400014600014800014900014b00014d00014f00015100015300015500015700015900015b00015d00015f00016100016300016500016700016900016b00016d00016f00017100017300017500017700017a00017c00017e00017f00018000018300018500018800018c00018d00019200019500019900019a00019b00019e0001a10001a30001a50001a80001aa0001ab0001ad0001b00001b40001b60001b90001ba0001bd0001be0001bf0001c60001c90001cc0001ce0001d00001d20001d40001d60001d80001da0001dc0001dd0001df0001e10001e30001e50001e70001e90001eb0001ed0001ef0001f00001f30001f50001f90001fb0001fd0001ff00020100020300020500020700020900020b00020d00020f00021100021300021500021700021900021b00021d00021f00022100022300022500022700022900022b00022d00022f00023100023300023400023500023600023700023800023900023c00023f00024000024200024700024900024b00024d00024f00025000025100025200025300025400025500025600025700025800025900025a00025b00025c00025d00025e00025f00026000026100026200026300026400026500026600026700026800026900026a00026b00026c00026d00026e00026f00027000027100027200027300027400027500027600027700027800027900027a00027b00027c00027d00027e00027f00028000028100028200028300028400028500028600028700028800028900028a00028b00028c00028d00028e00028f00029000029100029200029300029500029600029700029800029900029a00029b00029c00029d00029e00029f0002a00002a10002a20002a30002a40002a50002a60002a70002a80002a90002aa0002ab0002ac0002ad0002ae0002af0002b00002b10002b20002b30002b40002b50002b60002b70002b80002c00002c10002e00002e10002e20002e30002e400034500037100037300037700037a00037b00037c00037d0003900003ac0003ad0003ae0003af0003b00003b10003b20003b30003b40003b50003b60003b70003b80003b90003ba0003bb0003bc0003bd0003be0003bf0003c00003c10003c20003c30003c40003c50003c60003c70003c80003c90003ca0003cb0003cc0003cd0003ce0003d00003d10003d50003d60003d70003d90003db0003dd0003df0003e10003e30003e50003e70003e90003eb0003ed0003ef0003f00003f10003f20003f30003f50003f80003fb0003fc00043000043100043200043300043400043500043600043700043800043900043a00043b00043c00043d00043e00043f00044000044100044200044300044400044500044600044700044800044900044a00044b00044c00044d00044e00044f00045000045100045200045300045400045500045600045700045800045900045a00045b00045c00045d00045e00045f00046100046300046500046700046900046b00046d00046f00047100047300047500047700047900047b00047d00047f00048100048b00048d00048f00049100049300049500049700049900049b00049d00049f0004a10004a30004a50004a70004a90004ab0004ad0004af0004b10004b30004b50004b70004b90004bb0004bd0004bf0004c20004c40004c60004c80004ca0004cc0004ce0004cf0004d10004d30004d50004d70004d90004db0004dd0004df0004e10004e30004e50004e70004e90004eb0004ed0004ef0004f10004f30004f50004f70004f90004fb0004fd0004ff00050100050300050500050700050900050b00050d00050f00051100051300051500051700051900051b00051d00051f00052100052300056100056200056300056400056500056600056700056800056900056a00056b00056c00056d00056e00056f00057000057100057200057300057400057500057600057700057800057900057a00057b00057c00057d00057e00057f000580000581000582000583000584000585000586000587001d00001d01001d02001d03001d04001d05001d06001d07001d08001d09001d0a001d0b001d0c001d0d001d0e001d0f001d10001d11001d12001d13001d14001d15001d16001d17001d18001d19001d1a001d1b001d1c001d1d001d1e001d1f001d20001d21001d22001d23001d24001d25001d26001d27001d28001d29001d2a001d2b001d2c001d2d001d2e001d2f001d30001d31001d32001d33001d34001d35001d36001d37001d38001d39001d3a001d3b001d3c001d3d001d3e001d3f001d40001d41001d42001d43001d44001d45001d46001d47001d48001d49001d4a001d4b001d4c001d4d001d4e001d4f001d50001d51001d52001d53001d54001d55001d56001d57001d58001d59001d5a001d5b001d5c001d5d001d5e001d5f001d60001d61001d62001d63001d64001d65001d66001d67001d68001d69001d6a001d6b001d6c001d6d001d6e001d6f001d70001d71001d72001d73001d74001d75001d76001d77001d78001d79001d7a001d7b001d7c001d7d001d7e001d7f001d80001d81001d82001d83001d84001d85001d86001d87001d88001d89001d8a001d8b001d8c001d8d001d8e001d8f001d90001d91001d92001d93001d94001d95001d96001d97001d98001d99001d9a001d9b001d9c001d9d001d9e001d9f001da0001da1001da2001da3001da4001da5001da6001da7001da8001da9001daa001dab001dac001dad001dae001daf001db0001db1001db2001db3001db4001db5001db6001db7001db8001db9001dba001dbb001dbc001dbd001dbe001dbf001e01001e03001e05001e07001e09001e0b001e0d001e0f001e11001e13001e15001e17001e19001e1b001e1d001e1f001e21001e23001e25001e27001e29001e2b001e2d001e2f001e31001e33001e35001e37001e39001e3b001e3d001e3f001e41001e43001e45001e47001e49001e4b001e4d001e4f001e51001e53001e55001e57001e59001e5b001e5d001e5f001e61001e63001e65001e67001e69001e6b001e6d001e6f001e71001e73001e75001e77001e79001e7b001e7d001e7f001e81001e83001e85001e87001e89001e8b001e8d001e8f001e91001e93001e95001e96001e97001e98001e99001e9a001e9b001e9c001e9d001e9f001ea1001ea3001ea5001ea7001ea9001eab001ead001eaf001eb1001eb3001eb5001eb7001eb9001ebb001ebd001ebf001ec1001ec3001ec5001ec7001ec9001ecb001ecd001ecf001ed1001ed3001ed5001ed7001ed9001edb001edd001edf001ee1001ee3001ee5001ee7001ee9001eeb001eed001eef001ef1001ef3001ef5001ef7001ef9001efb001efd001eff001f00001f01001f02001f03001f04001f05001f06001f07001f10001f11001f12001f13001f14001f15001f20001f21001f22001f23001f24001f25001f26001f27001f30001f31001f32001f33001f34001f35001f36001f37001f40001f41001f42001f43001f44001f45001f50001f51001f52001f53001f54001f55001f56001f57001f60001f61001f62001f63001f64001f65001f66001f67001f70001f71001f72001f73001f74001f75001f76001f77001f78001f79001f7a001f7b001f7c001f7d001f80001f81001f82001f83001f84001f85001f86001f87001f90001f91001f92001f93001f94001f95001f96001f97001fa0001fa1001fa2001fa3001fa4001fa5001fa6001fa7001fb0001fb1001fb2001fb3001fb4001fb6001fb7001fbe001fc2001fc3001fc4001fc6001fc7001fd0001fd1001fd2001fd3001fd6001fd7001fe0001fe1001fe2001fe3001fe4001fe5001fe6001fe7001ff2001ff3001ff4001ff6001ff700207100207f00209000209100209200209300209400210a00210e00210f00211300212f00213400213900213c00213d00214600214700214800214900214e00217000217100217200217300217400217500217600217700217800217900217a00217b00217c00217d00217e00217f0021840024d00024d10024d20024d30024d40024d50024d60024d70024d80024d90024da0024db0024dc0024dd0024de0024df0024e00024e10024e20024e30024e40024e50024e60024e70024e80024e9002c30002c31002c32002c33002c34002c35002c36002c37002c38002c39002c3a002c3b002c3c002c3d002c3e002c3f002c40002c41002c42002c43002c44002c45002c46002c47002c48002c49002c4a002c4b002c4c002c4d002c4e002c4f002c50002c51002c52002c53002c54002c55002c56002c57002c58002c59002c5a002c5b002c5c002c5d002c5e002c61002c65002c66002c68002c6a002c6c002c71002c73002c74002c76002c77002c78002c79002c7a002c7b002c7c002c7d002c81002c83002c85002c87002c89002c8b002c8d002c8f002c91002c93002c95002c97002c99002c9b002c9d002c9f002ca1002ca3002ca5002ca7002ca9002cab002cad002caf002cb1002cb3002cb5002cb7002cb9002cbb002cbd002cbf002cc1002cc3002cc5002cc7002cc9002ccb002ccd002ccf002cd1002cd3002cd5002cd7002cd9002cdb002cdd002cdf002ce1002ce3002ce4002d00002d01002d02002d03002d04002d05002d06002d07002d08002d09002d0a002d0b002d0c002d0d002d0e002d0f002d10002d11002d12002d13002d14002d15002d16002d17002d18002d19002d1a002d1b002d1c002d1d002d1e002d1f002d20002d21002d22002d23002d24002d2500a64100a64300a64500a64700a64900a64b00a64d00a64f00a65100a65300a65500a65700a65900a65b00a65d00a65f00a66300a66500a66700a66900a66b00a66d00a68100a68300a68500a68700a68900a68b00a68d00a68f00a69100a69300a69500a69700a72300a72500a72700a72900a72b00a72d00a72f00a73000a73100a73300a73500a73700a73900a73b00a73d00a73f00a74100a74300a74500a74700a74900a74b00a74d00a74f00a75100a75300a75500a75700a75900a75b00a75d00a75f00a76100a76300a76500a76700a76900a76b00a76d00a76f00a77000a77100a77200a77300a77400a77500a77600a77700a77800a77a00a77c00a77f00a78100a78300a78500a78700a78c00fb0000fb0100fb0200fb0300fb0400fb0500fb0600fb1300fb1400fb1500fb1600fb1700ff4100ff4200ff4300ff4400ff4500ff4600ff4700ff4800ff4900ff4a00ff4b00ff4c00ff4d00ff4e00ff4f00ff5000ff5100ff5200ff5300ff5400ff5500ff5600ff5700ff5800ff5900ff5a01042801042901042a01042b01042c01042d01042e01042f01043001043101043201043301043401043501043601043701043801043901043a01043b01043c01043d01043e01043f01044001044101044201044301044401044501044601044701044801044901044a01044b01044c01044d01044e01044f01d41a01d41b01d41c01d41d01d41e01d41f01d42001d42101d42201d42301d42401d42501d42601d42701d42801d42901d42a01d42b01d42c01d42d01d42e01d42f01d43001d43101d43201d43301d44e01d44f01d45001d45101d45201d45301d45401d45601d45701d45801d45901d45a01d45b01d45c01d45d01d45e01d45f01d46001d46101d46201d46301d46401d46501d46601d46701d48201d48301d48401d48501d48601d48701d48801d48901d48a01d48b01d48c01d48d01d48e01d48f01d49001d49101d49201d49301d49401d49501d49601d49701d49801d49901d49a01d49b01d4b601d4b701d4b801d4b901d4bb01d4bd01d4be01d4bf01d4c001d4c101d4c201d4c301d4c501d4c601d4c701d4c801d4c901d4ca01d4cb01d4cc01d4cd01d4ce01d4cf01d4ea01d4eb01d4ec01d4ed01d4ee01d4ef01d4f001d4f101d4f201d4f301d4f401d4f501d4f601d4f701d4f801d4f901d4fa01d4fb01d4fc01d4fd01d4fe01d4ff01d50001d50101d50201d50301d51e01d51f01d52001d52101d52201d52301d52401d52501d52601d52701d52801d52901d52a01d52b01d52c01d52d01d52e01d52f01d53001d53101d53201d53301d53401d53501d53601d53701d55201d55301d55401d55501d55601d55701d55801d55901d55a01d55b01d55c01d55d01d55e01d55f01d56001d56101d56201d56301d56401d56501d56601d56701d56801d56901d56a01d56b01d58601d58701d58801d58901d58a01d58b01d58c01d58d01d58e01d58f01d59001d59101d59201d59301d59401d59501d59601d59701d59801d59901d59a01d59b01d59c01d59d01d59e01d59f01d5ba01d5bb01d5bc01d5bd01d5be01d5bf01d5c001d5c101d5c201d5c301d5c401d5c501d5c601d5c701d5c801d5c901d5ca01d5cb01d5cc01d5cd01d5ce01d5cf01d5d001d5d101d5d201d5d301d5ee01d5ef01d5f001d5f101d5f201d5f301d5f401d5f501d5f601d5f701d5f801d5f901d5fa01d5fb01d5fc01d5fd01d5fe01d5ff01d60001d60101d60201d60301d60401d60501d60601d60701d62201d62301d62401d62501d62601d62701d62801d62901d62a01d62b01d62c01d62d01d62e01d62f01d63001d63101d63201d63301d63401d63501d63601d63701d63801d63901d63a01d63b01d65601d65701d65801d65901d65a01d65b01d65c01d65d01d65e01d65f01d66001d66101d66201d66301d66401d66501d66601d66701d66801d66901d66a01d66b01d66c01d66d01d66e01d66f01d68a01d68b01d68c01d68d01d68e01d68f01d69001d69101d69201d69301d69401d69501d69601d69701d69801d69901d69a01d69b01d69c01d69d01d69e01d69f01d6a001d6a101d6a201d6a301d6a401d6a501d6c201d6c301d6c401d6c501d6c601d6c701d6c801d6c901d6ca01d6cb01d6cc01d6cd01d6ce01d6cf01d6d001d6d101d6d201d6d301d6d401d6d501d6d601d6d701d6d801d6d901d6da01d6dc01d6dd01d6de01d6df01d6e001d6e101d6fc01d6fd01d6fe01d6ff01d70001d70101d70201d70301d70401d70501d70601d70701d70801d70901d70a01d70b01d70c01d70d01d70e01d70f01d71001d71101d71201d71301d71401d71601d71701d71801d71901d71a01d71b01d73601d73701d73801d73901d73a01d73b01d73c01d73d01d73e01d73f01d74001d74101d74201d74301d74401d74501d74601d74701d74801d74901d74a01d74b01d74c01d74d01d74e01d75001d75101d75201d75301d75401d75501d77001d77101d77201d77301d77401d77501d77601d77701d77801d77901d77a01d77b01d77c01d77d01d77e01d77f01d78001d78101d78201d78301d78401d78501d78601d78701d78801d78a01d78b01d78c01d78d01d78e01d78f01d7aa01d7ab01d7ac01d7ad01d7ae01d7af01d7b001d7b101d7b201d7b301d7b401d7b501d7b601d7b701d7b801d7b901d7ba01d7bb01d7bc01d7bd01d7be01d7bf01d7c001d7c101d7c201d7c401d7c501d7c601d7c701d7c801d7c901d7cb
@@ -0,0 +1 @@
1
+ 00004100004200004300004400004500004600004700004800004900004a00004b00004c00004d00004e00004f00005000005100005200005300005400005500005600005700005800005900005a0000c00000c10000c20000c30000c40000c50000c60000c70000c80000c90000ca0000cb0000cc0000cd0000ce0000cf0000d00000d10000d20000d30000d40000d50000d60000d80000d90000da0000db0000dc0000dd0000de00010000010200010400010600010800010a00010c00010e00011000011200011400011600011800011a00011c00011e00012000012200012400012600012800012a00012c00012e00013000013200013400013600013900013b00013d00013f00014100014300014500014700014a00014c00014e00015000015200015400015600015800015a00015c00015e00016000016200016400016600016800016a00016c00016e00017000017200017400017600017800017900017b00017d00018100018200018400018600018700018900018a00018b00018e00018f00019000019100019300019400019600019700019800019c00019d00019f0001a00001a20001a40001a60001a70001a90001ac0001ae0001af0001b10001b20001b30001b50001b70001b80001bc0001c40001c70001ca0001cd0001cf0001d10001d30001d50001d70001d90001db0001de0001e00001e20001e40001e60001e80001ea0001ec0001ee0001f10001f40001f60001f70001f80001fa0001fc0001fe00020000020200020400020600020800020a00020c00020e00021000021200021400021600021800021a00021c00021e00022000022200022400022600022800022a00022c00022e00023000023200023a00023b00023d00023e00024100024300024400024500024600024800024a00024c00024e00037000037200037600038600038800038900038a00038c00038e00038f00039100039200039300039400039500039600039700039800039900039a00039b00039c00039d00039e00039f0003a00003a10003a30003a40003a50003a60003a70003a80003a90003aa0003ab0003cf0003d20003d30003d40003d80003da0003dc0003de0003e00003e20003e40003e60003e80003ea0003ec0003ee0003f40003f70003f90003fa0003fd0003fe0003ff00040000040100040200040300040400040500040600040700040800040900040a00040b00040c00040d00040e00040f00041000041100041200041300041400041500041600041700041800041900041a00041b00041c00041d00041e00041f00042000042100042200042300042400042500042600042700042800042900042a00042b00042c00042d00042e00042f00046000046200046400046600046800046a00046c00046e00047000047200047400047600047800047a00047c00047e00048000048a00048c00048e00049000049200049400049600049800049a00049c00049e0004a00004a20004a40004a60004a80004aa0004ac0004ae0004b00004b20004b40004b60004b80004ba0004bc0004be0004c00004c10004c30004c50004c70004c90004cb0004cd0004d00004d20004d40004d60004d80004da0004dc0004de0004e00004e20004e40004e60004e80004ea0004ec0004ee0004f00004f20004f40004f60004f80004fa0004fc0004fe00050000050200050400050600050800050a00050c00050e00051000051200051400051600051800051a00051c00051e00052000052200053100053200053300053400053500053600053700053800053900053a00053b00053c00053d00053e00053f00054000054100054200054300054400054500054600054700054800054900054a00054b00054c00054d00054e00054f0005500005510005520005530005540005550005560010a00010a10010a20010a30010a40010a50010a60010a70010a80010a90010aa0010ab0010ac0010ad0010ae0010af0010b00010b10010b20010b30010b40010b50010b60010b70010b80010b90010ba0010bb0010bc0010bd0010be0010bf0010c00010c10010c20010c30010c40010c5001e00001e02001e04001e06001e08001e0a001e0c001e0e001e10001e12001e14001e16001e18001e1a001e1c001e1e001e20001e22001e24001e26001e28001e2a001e2c001e2e001e30001e32001e34001e36001e38001e3a001e3c001e3e001e40001e42001e44001e46001e48001e4a001e4c001e4e001e50001e52001e54001e56001e58001e5a001e5c001e5e001e60001e62001e64001e66001e68001e6a001e6c001e6e001e70001e72001e74001e76001e78001e7a001e7c001e7e001e80001e82001e84001e86001e88001e8a001e8c001e8e001e90001e92001e94001e9e001ea0001ea2001ea4001ea6001ea8001eaa001eac001eae001eb0001eb2001eb4001eb6001eb8001eba001ebc001ebe001ec0001ec2001ec4001ec6001ec8001eca001ecc001ece001ed0001ed2001ed4001ed6001ed8001eda001edc001ede001ee0001ee2001ee4001ee6001ee8001eea001eec001eee001ef0001ef2001ef4001ef6001ef8001efa001efc001efe001f08001f09001f0a001f0b001f0c001f0d001f0e001f0f001f18001f19001f1a001f1b001f1c001f1d001f28001f29001f2a001f2b001f2c001f2d001f2e001f2f001f38001f39001f3a001f3b001f3c001f3d001f3e001f3f001f48001f49001f4a001f4b001f4c001f4d001f59001f5b001f5d001f5f001f68001f69001f6a001f6b001f6c001f6d001f6e001f6f001fb8001fb9001fba001fbb001fc8001fc9001fca001fcb001fd8001fd9001fda001fdb001fe8001fe9001fea001feb001fec001ff8001ff9001ffa001ffb00210200210700210b00210c00210d00211000211100211200211500211900211a00211b00211c00211d00212400212600212800212a00212b00212c00212d00213000213100213200213300213e00213f00214500216000216100216200216300216400216500216600216700216800216900216a00216b00216c00216d00216e00216f0021830024b60024b70024b80024b90024ba0024bb0024bc0024bd0024be0024bf0024c00024c10024c20024c30024c40024c50024c60024c70024c80024c90024ca0024cb0024cc0024cd0024ce0024cf002c00002c01002c02002c03002c04002c05002c06002c07002c08002c09002c0a002c0b002c0c002c0d002c0e002c0f002c10002c11002c12002c13002c14002c15002c16002c17002c18002c19002c1a002c1b002c1c002c1d002c1e002c1f002c20002c21002c22002c23002c24002c25002c26002c27002c28002c29002c2a002c2b002c2c002c2d002c2e002c60002c62002c63002c64002c67002c69002c6b002c6d002c6e002c6f002c72002c75002c80002c82002c84002c86002c88002c8a002c8c002c8e002c90002c92002c94002c96002c98002c9a002c9c002c9e002ca0002ca2002ca4002ca6002ca8002caa002cac002cae002cb0002cb2002cb4002cb6002cb8002cba002cbc002cbe002cc0002cc2002cc4002cc6002cc8002cca002ccc002cce002cd0002cd2002cd4002cd6002cd8002cda002cdc002cde002ce0002ce200a64000a64200a64400a64600a64800a64a00a64c00a64e00a65000a65200a65400a65600a65800a65a00a65c00a65e00a66200a66400a66600a66800a66a00a66c00a68000a68200a68400a68600a68800a68a00a68c00a68e00a69000a69200a69400a69600a72200a72400a72600a72800a72a00a72c00a72e00a73200a73400a73600a73800a73a00a73c00a73e00a74000a74200a74400a74600a74800a74a00a74c00a74e00a75000a75200a75400a75600a75800a75a00a75c00a75e00a76000a76200a76400a76600a76800a76a00a76c00a76e00a77900a77b00a77d00a77e00a78000a78200a78400a78600a78b00ff2100ff2200ff2300ff2400ff2500ff2600ff2700ff2800ff2900ff2a00ff2b00ff2c00ff2d00ff2e00ff2f00ff3000ff3100ff3200ff3300ff3400ff3500ff3600ff3700ff3800ff3900ff3a01040001040101040201040301040401040501040601040701040801040901040a01040b01040c01040d01040e01040f01041001041101041201041301041401041501041601041701041801041901041a01041b01041c01041d01041e01041f01042001042101042201042301042401042501042601042701d40001d40101d40201d40301d40401d40501d40601d40701d40801d40901d40a01d40b01d40c01d40d01d40e01d40f01d41001d41101d41201d41301d41401d41501d41601d41701d41801d41901d43401d43501d43601d43701d43801d43901d43a01d43b01d43c01d43d01d43e01d43f01d44001d44101d44201d44301d44401d44501d44601d44701d44801d44901d44a01d44b01d44c01d44d01d46801d46901d46a01d46b01d46c01d46d01d46e01d46f01d47001d47101d47201d47301d47401d47501d47601d47701d47801d47901d47a01d47b01d47c01d47d01d47e01d47f01d48001d48101d49c01d49e01d49f01d4a201d4a501d4a601d4a901d4aa01d4ab01d4ac01d4ae01d4af01d4b001d4b101d4b201d4b301d4b401d4b501d4d001d4d101d4d201d4d301d4d401d4d501d4d601d4d701d4d801d4d901d4da01d4db01d4dc01d4dd01d4de01d4df01d4e001d4e101d4e201d4e301d4e401d4e501d4e601d4e701d4e801d4e901d50401d50501d50701d50801d50901d50a01d50d01d50e01d50f01d51001d51101d51201d51301d51401d51601d51701d51801d51901d51a01d51b01d51c01d53801d53901d53b01d53c01d53d01d53e01d54001d54101d54201d54301d54401d54601d54a01d54b01d54c01d54d01d54e01d54f01d55001d56c01d56d01d56e01d56f01d57001d57101d57201d57301d57401d57501d57601d57701d57801d57901d57a01d57b01d57c01d57d01d57e01d57f01d58001d58101d58201d58301d58401d58501d5a001d5a101d5a201d5a301d5a401d5a501d5a601d5a701d5a801d5a901d5aa01d5ab01d5ac01d5ad01d5ae01d5af01d5b001d5b101d5b201d5b301d5b401d5b501d5b601d5b701d5b801d5b901d5d401d5d501d5d601d5d701d5d801d5d901d5da01d5db01d5dc01d5dd01d5de01d5df01d5e001d5e101d5e201d5e301d5e401d5e501d5e601d5e701d5e801d5e901d5ea01d5eb01d5ec01d5ed01d60801d60901d60a01d60b01d60c01d60d01d60e01d60f01d61001d61101d61201d61301d61401d61501d61601d61701d61801d61901d61a01d61b01d61c01d61d01d61e01d61f01d62001d62101d63c01d63d01d63e01d63f01d64001d64101d64201d64301d64401d64501d64601d64701d64801d64901d64a01d64b01d64c01d64d01d64e01d64f01d65001d65101d65201d65301d65401d65501d67001d67101d67201d67301d67401d67501d67601d67701d67801d67901d67a01d67b01d67c01d67d01d67e01d67f01d68001d68101d68201d68301d68401d68501d68601d68701d68801d68901d6a801d6a901d6aa01d6ab01d6ac01d6ad01d6ae01d6af01d6b001d6b101d6b201d6b301d6b401d6b501d6b601d6b701d6b801d6b901d6ba01d6bb01d6bc01d6bd01d6be01d6bf01d6c001d6e201d6e301d6e401d6e501d6e601d6e701d6e801d6e901d6ea01d6eb01d6ec01d6ed01d6ee01d6ef01d6f001d6f101d6f201d6f301d6f401d6f501d6f601d6f701d6f801d6f901d6fa01d71c01d71d01d71e01d71f01d72001d72101d72201d72301d72401d72501d72601d72701d72801d72901d72a01d72b01d72c01d72d01d72e01d72f01d73001d73101d73201d73301d73401d75601d75701d75801d75901d75a01d75b01d75c01d75d01d75e01d75f01d76001d76101d76201d76301d76401d76501d76601d76701d76801d76901d76a01d76b01d76c01d76d01d76e01d79001d79101d79201d79301d79401d79501d79601d79701d79801d79901d79a01d79b01d79c01d79d01d79e01d79f01d7a001d7a101d7a201d7a301d7a401d7a501d7a601d7a701d7a801d7ca
@@ -0,0 +1 @@
1
+ 00006900006a00012f00024900026800029d0002b20003f3000456000458001d62001d96001da4001da8001e2d001ecb002071002148002149002c7c01d42201d42301d45601d45701d48a01d48b01d4be01d4bf01d4f201d4f301d52601d52701d55a01d55b01d58e01d58f01d5c201d5c301d5f601d5f701d62a01d62b01d65e01d65f01d69201d693
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/version"
4
4
  require "unicode_utils/name"
@@ -6,3 +6,10 @@ require "unicode_utils/simple_upcase"
6
6
  require "unicode_utils/simple_downcase"
7
7
  require "unicode_utils/upcase"
8
8
  require "unicode_utils/downcase"
9
+ require "unicode_utils/titlecase_char_q"
10
+ require "unicode_utils/lowercase_char_q"
11
+ require "unicode_utils/uppercase_char_q"
12
+ require "unicode_utils/cased_char_q"
13
+ require "unicode_utils/case_ignorable_char_q"
14
+ require "unicode_utils/soft_dotted_char_q"
15
+ require "unicode_utils/combining_class"
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ CASE_IGNORABLE_SET = Impl.read_codepoint_set("case_ignorable_set") # :nodoc:
8
+
9
+ # Returns true if the given character is case-ignorable as defined
10
+ # by Unicode 5.0, section 3.13.
11
+ def case_ignorable_char?(char)
12
+ CASE_IGNORABLE_SET.include?(char.ord)
13
+ end
14
+ module_function :case_ignorable_char?
15
+
16
+ end
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/lowercase_char_q"
4
+ require "unicode_utils/uppercase_char_q"
5
+ require "unicode_utils/titlecase_char_q"
6
+
7
+ module UnicodeUtils
8
+
9
+ # A cased char is a character that has the Unicode property
10
+ # Lowercase or Uppercase or the general category Titlecase_Letter.
11
+ #
12
+ # See also: lowercase_char?, uppercase_char?, titlecase_char?
13
+ def cased_char?(char)
14
+ lowercase_char?(char) || uppercase_char?(char) || titlecase_char?(char)
15
+ end
16
+ module_function :cased_char?
17
+
18
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ module Impl # :nodoc:
6
+
7
+ def self.read_combining_class_map
8
+ path = File.join(File.dirname(__FILE__),
9
+ "..", "..", "cdata", "combining_class_map")
10
+ Hash.new.tap { |map|
11
+ File.open(path, "r:US-ASCII:-") do |input|
12
+ buffer = "x" * 6
13
+ buffer.force_encoding(Encoding::US_ASCII)
14
+ cc_buffer = "x" * 2
15
+ cc_buffer.force_encoding(Encoding::US_ASCII)
16
+ while input.read(6, buffer)
17
+ map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16)
18
+ end
19
+ end
20
+ }
21
+ end
22
+
23
+ end
24
+
25
+ COMBINING_CLASS_MAP = Impl.read_combining_class_map # :nodoc:
26
+
27
+ # Get the combining class of the given character as an integer in
28
+ # the range 0..255.
29
+ def combining_class(char)
30
+ COMBINING_CLASS_MAP[char.ord] || 0
31
+ end
32
+ module_function :combining_class
33
+
34
+ end
@@ -0,0 +1,164 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/cased_char_q"
4
+ require "unicode_utils/case_ignorable_char_q"
5
+ require "unicode_utils/soft_dotted_char_q"
6
+ require "unicode_utils/combining_class"
7
+
8
+ module UnicodeUtils
9
+
10
+ module Impl # :nodoc:
11
+
12
+ class ConditionalCasing # :nodoc:
13
+
14
+ attr_reader :mapping
15
+
16
+ def initialize(mapping)
17
+ @mapping = mapping
18
+ end
19
+
20
+ def context_match?(str, pos)
21
+ true
22
+ end
23
+
24
+ end
25
+
26
+ class BeforeDotConditionalCasing < ConditionalCasing # :nodoc:
27
+
28
+ def context_match?(str, pos)
29
+ (pos + 1).upto(str.length - 1) { |i|
30
+ c = str[i]
31
+ return true if c.ord == 0x0307
32
+ cc = UnicodeUtils.combining_class(c)
33
+ return false if cc == 0 || cc == 230
34
+ }
35
+ false # "combining dot above" not found
36
+ end
37
+
38
+ end
39
+
40
+ class NotBeforeDotConditionalCasing < BeforeDotConditionalCasing # :nodoc:
41
+
42
+ def context_match?(str, pos)
43
+ !super
44
+ end
45
+
46
+ end
47
+
48
+ class MoreAboveConditionalCasing < ConditionalCasing # :nodoc:
49
+
50
+ def context_match?(str, pos)
51
+ (pos + 1).upto(str.length - 1) { |i|
52
+ c = str[i]
53
+ cc = UnicodeUtils.combining_class(c)
54
+ return true if cc == 230
55
+ return false if cc == 0
56
+ }
57
+ false
58
+ end
59
+
60
+ end
61
+
62
+ class AfterIConditionalCasing < ConditionalCasing # :nodoc:
63
+
64
+ def context_match?(str, pos)
65
+ (pos - 1).downto(0) { |i|
66
+ c = str[i]
67
+ return true if c.ord == 0x49 # uppercase I
68
+ cc = UnicodeUtils.combining_class(c)
69
+ return false if cc == 0 || cc == 230
70
+ }
71
+ false # uppercase I not found
72
+ end
73
+
74
+ end
75
+
76
+ class AfterSoftDottedConditionalCasing < ConditionalCasing # :nodoc:
77
+
78
+ def context_match?(str, pos)
79
+ (pos - 1).downto(0) { |i|
80
+ c = str[i]
81
+ return true if UnicodeUtils.soft_dotted_char?(c)
82
+ cc = UnicodeUtils.combining_class(c)
83
+ return false if cc == 0 || cc == 230
84
+ }
85
+ false
86
+ end
87
+
88
+ end
89
+
90
+ class FinalSigmaConditionalCasing < ConditionalCasing # :nodoc:
91
+
92
+ def context_match?(str, pos)
93
+ before_match?(str, pos) && !after_match?(str, pos)
94
+ end
95
+
96
+ private
97
+
98
+ def before_match?(str, pos)
99
+ (pos - 1).downto(0) { |i|
100
+ c = str[i]
101
+ return true if UnicodeUtils.cased_char?(c)
102
+ return false unless UnicodeUtils.case_ignorable_char?(c)
103
+ }
104
+ false # no cased char
105
+ end
106
+
107
+ def after_match?(str, pos)
108
+ (pos + 1).upto(str.length - 1) { |i|
109
+ c = str[i]
110
+ return true if UnicodeUtils.cased_char?(c)
111
+ return false unless UnicodeUtils.case_ignorable_char?(c)
112
+ }
113
+ false
114
+ end
115
+
116
+ end
117
+
118
+ def self.read_conditional_casings(filename)
119
+ path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
120
+ Hash.new.tap { |cp_map|
121
+ File.open(path, "r:US-ASCII:-") do |input|
122
+ input.each_line { |line|
123
+ line.chomp!
124
+ record = line.split(";")
125
+ cp = record[0].to_i(16)
126
+ mapping = record[1].split(",").map { |c| c.to_i(16) }
127
+ language_id = record[2].empty? ? nil : record[2].to_sym
128
+ context = record[3] && record[3].gsub('_', '')
129
+ casing = Impl.const_get("#{context}ConditionalCasing").new(mapping)
130
+ (cp_map[cp] ||= {})[language_id] = casing
131
+ }
132
+ end
133
+ }
134
+ end
135
+
136
+ CONDITIONAL_UPCASE_MAP =
137
+ read_conditional_casings("cond_uc_map")
138
+
139
+ CONDITIONAL_DOWNCASE_MAP =
140
+ read_conditional_casings("cond_lc_map")
141
+
142
+ def self.conditional_upcase_mapping(cp, str, pos, language_id)
143
+ lang_map = CONDITIONAL_UPCASE_MAP[cp]
144
+ if lang_map
145
+ casing = lang_map[language_id] || lang_map[nil]
146
+ if casing && casing.context_match?(str, pos)
147
+ casing.mapping
148
+ end
149
+ end
150
+ end
151
+
152
+ def self.conditional_downcase_mapping(cp, str, pos, language_id)
153
+ lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
154
+ if lang_map
155
+ casing = lang_map[language_id] || lang_map[nil]
156
+ if casing && casing.context_match?(str, pos)
157
+ casing.mapping
158
+ end
159
+ end
160
+ end
161
+
162
+ end
163
+
164
+ end
@@ -1,34 +1,40 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/simple_downcase"
4
4
  require "unicode_utils/read_special_casing_map"
5
+ require "unicode_utils/conditional_casing"
5
6
 
6
7
  module UnicodeUtils
7
8
 
8
- SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map")
9
+ SPECIAL_DOWNCASE_MAP = Impl.read_special_casing_map("special_lc_map") # :nodoc:
9
10
 
10
11
  # Perform a full case-conversion of +str+ to lowercase according to
11
12
  # the Unicode standard.
12
13
  #
13
- # Examples:
14
+ # Some conversion rules are language dependent, these are in effect
15
+ # when a non-nil +language_id+ is given. If non-nil, the
16
+ # +language_id+ must be a two letter language code as defined in BCP
17
+ # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18
+ # language doesn't have a two letter code, the three letter code is
19
+ # to be used.
14
20
  #
15
- # UnicodeUtils.downcase "ᾈ" => "ᾀ"
21
+ # Examples:
16
22
  #
17
- # Note: The current implementation ignores the +language_id+
18
- # argument and doesn't deal with language and context specific
19
- # cases. This affects text in the languages Lithuanian, Turkish and
20
- # Azeri and the greek letter sigma in a special position. A future
21
- # version of UnicodeUtils will fix this. All other languages are
22
- # fully supported according to the Unicode standard.
23
+ # UnicodeUtils.downcase("ᾈ") => "ᾀ"
24
+ # UnicodeUtils.downcase("aBI\u{307}", :tr) => "abi"
23
25
  def downcase(str, language_id = nil)
24
26
  String.new.force_encoding(str.encoding).tap { |res|
27
+ pos = 0
25
28
  str.each_codepoint { |cp|
26
- special_mapping = SPECIAL_DOWNCASE_MAP[cp]
29
+ special_mapping =
30
+ Impl.conditional_downcase_mapping(cp, str, pos, language_id) ||
31
+ SPECIAL_DOWNCASE_MAP[cp]
27
32
  if special_mapping
28
33
  special_mapping.each { |m| res << m }
29
34
  else
30
35
  res << (SIMPLE_DOWNCASE_MAP[cp] || cp)
31
36
  end
37
+ pos += 1
32
38
  }
33
39
  }
34
40
  end
@@ -0,0 +1,15 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ PROP_LOWERCASE_SET = Impl.read_codepoint_set("prop_set_lowercase") # :nodoc:
8
+
9
+ # True if the given character has the Unicode property Lowercase.
10
+ def lowercase_char?(char)
11
+ PROP_LOWERCASE_SET.include?(char.ord)
12
+ end
13
+ module_function :lowercase_char?
14
+
15
+ end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_names
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", "names")
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
14
15
  end
@@ -18,7 +19,7 @@ module UnicodeUtils
18
19
 
19
20
  end
20
21
 
21
- NAME_MAP = Impl.read_names
22
+ NAME_MAP = Impl.read_names # :nodoc:
22
23
 
23
24
  # Get the Unicode name of the single codepoint in str.
24
25
  #
@@ -26,7 +27,7 @@ module UnicodeUtils
26
27
  #
27
28
  # UnicodeUtils.name "ᾀ" => "GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI"
28
29
  def name(str)
29
- NAME_MAP[str.codepoints.first] # .encode
30
+ NAME_MAP[str.codepoints.first]
30
31
  end
31
32
  module_function :name
32
33
 
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_codepoint_map(filename)
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
14
15
  end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module UnicodeUtils
4
+
5
+ module Impl # :nodoc:
6
+
7
+ def self.read_codepoint_set(filename)
8
+ path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
+ Hash.new.tap { |set|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
+ buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
13
+ while input.read(6, buffer)
14
+ set[buffer.to_i(16)] = true
15
+ end
16
+ end
17
+ }
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
@@ -7,8 +7,9 @@ module UnicodeUtils
7
7
  def self.read_special_casing_map(filename)
8
8
  path = File.join(File.dirname(__FILE__), "..", "..", "cdata", filename)
9
9
  Hash.new.tap { |map|
10
- File.open(path, "r:US-ASCII") do |input|
10
+ File.open(path, "r:US-ASCII:-") do |input|
11
11
  buffer = "x" * 6
12
+ buffer.force_encoding(Encoding::US_ASCII)
12
13
  while input.read(6, buffer)
13
14
  cp = buffer.to_i(16)
14
15
  mapping = []
@@ -1,10 +1,10 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/read_codepoint_map"
4
4
 
5
5
  module UnicodeUtils
6
6
 
7
- SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map")
7
+ SIMPLE_DOWNCASE_MAP = Impl.read_codepoint_map("simple_lc_map") # :nodoc:
8
8
 
9
9
  # Map each codepoint in +str+ that has a single codepoint
10
10
  # lowercase-mapping to that lowercase mapping. +str+ is assumed to be
@@ -1,10 +1,10 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/read_codepoint_map"
4
4
 
5
5
  module UnicodeUtils
6
6
 
7
- SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map")
7
+ SIMPLE_UPCASE_MAP = Impl.read_codepoint_map("simple_uc_map") # :nodoc:
8
8
 
9
9
  # Map each codepoint in +str+ that has a single codepoint
10
10
  # uppercase-mapping to that uppercase mapping. +str+ is assumed to be
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ SOFT_DOTTED_SET = Impl.read_codepoint_set("soft_dotted_set") # :nodoc:
8
+
9
+ # Returns true if the given character has the Unicode property
10
+ # Soft_Dotted.
11
+ def soft_dotted_char?(char)
12
+ SOFT_DOTTED_SET.include?(char.ord)
13
+ end
14
+ module_function :soft_dotted_char?
15
+
16
+ end
@@ -0,0 +1,16 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ TITLECASE_LETTER_SET = Impl.read_codepoint_set("cat_set_titlecase") # :nodoc:
8
+
9
+ # True if the given character has the General_Category
10
+ # Titlecase_Letter (Lt).
11
+ def titlecase_char?(char)
12
+ TITLECASE_LETTER_SET.include?(char.ord)
13
+ end
14
+ module_function :titlecase_char?
15
+
16
+ end
@@ -1,33 +1,41 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  require "unicode_utils/simple_upcase"
4
4
  require "unicode_utils/read_special_casing_map"
5
+ require "unicode_utils/conditional_casing"
5
6
 
6
7
  module UnicodeUtils
7
8
 
8
- SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map")
9
+ SPECIAL_UPCASE_MAP = Impl.read_special_casing_map("special_uc_map") # :nodoc:
9
10
 
10
11
  # Perform a full case-conversion of +str+ to uppercase according to
11
12
  # the Unicode standard.
12
13
  #
13
- # Examples:
14
+ # Some conversion rules are language dependent, these are in effect
15
+ # when a non-nil +language_id+ is given. If non-nil, the
16
+ # +language_id+ must be a two letter language code as defined in BCP
17
+ # 47 (http://tools.ietf.org/rfc/bcp/bcp47.txt) as a symbol. If a
18
+ # language doesn't have a two letter code, the three letter code is
19
+ # to be used.
14
20
  #
15
- # UnicodeUtils.upcase "weiß" => "WEISS"
21
+ # Examples:
16
22
  #
17
- # Note: The current implementation ignores the +language_id+
18
- # argument and doesn't deal with language and context specific
19
- # cases. This affects text in the languages Lithuanian, Turkish and
20
- # Azeri. A future version of UnicodeUtils will fix this. All other
21
- # languages are fully supported according to the Unicode standard.
23
+ # UnicodeUtils.upcase("weiß") => "WEISS"
24
+ # UnicodeUtils.upcase("i", :en) => "I"
25
+ # UnicodeUtils.upcase("i", :tr) => "İ"
22
26
  def upcase(str, language_id = nil)
23
27
  String.new.force_encoding(str.encoding).tap { |res|
28
+ pos = 0
24
29
  str.each_codepoint { |cp|
25
- special_mapping = SPECIAL_UPCASE_MAP[cp]
30
+ special_mapping =
31
+ Impl.conditional_upcase_mapping(cp, str, pos, language_id) ||
32
+ SPECIAL_UPCASE_MAP[cp]
26
33
  if special_mapping
27
34
  special_mapping.each { |m| res << m }
28
35
  else
29
36
  res << (SIMPLE_UPCASE_MAP[cp] || cp)
30
37
  end
38
+ pos += 1
31
39
  }
32
40
  }
33
41
  end
@@ -0,0 +1,15 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require "unicode_utils/read_codepoint_set"
4
+
5
+ module UnicodeUtils
6
+
7
+ PROP_UPPERCASE_SET = Impl.read_codepoint_set("prop_set_uppercase") # :nodoc:
8
+
9
+ # True if the given character has the Unicode property Uppercase.
10
+ def uppercase_char?(char)
11
+ PROP_UPPERCASE_SET.include?(char.ord)
12
+ end
13
+ module_function :uppercase_char?
14
+
15
+ end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
1
+ # -*- encoding: utf-8 -*-
2
2
 
3
3
  module UnicodeUtils
4
4
 
5
5
  # Corresponds to the unicode_utils gem version.
6
- VERSION = "0.1.0"
6
+ VERSION = "0.2.0"
7
7
 
8
8
  end
@@ -8,6 +8,7 @@ class TestUnicodeUtils < Test::Unit::TestCase
8
8
 
9
9
  def test_name
10
10
  assert_equal "LATIN SMALL LETTER F", UnicodeUtils.name("f")
11
+ assert_equal Encoding::US_ASCII, UnicodeUtils.name("f").encoding
11
12
  end
12
13
 
13
14
  def test_simple_upcase
@@ -22,11 +23,85 @@ class TestUnicodeUtils < Test::Unit::TestCase
22
23
 
23
24
  def test_upcase
24
25
  assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123")
26
+ assert_equal "WEISS 123", UnicodeUtils.upcase("Weiß 123", :de)
27
+ assert_equal "I", UnicodeUtils.upcase("i")
28
+ assert_equal "I", UnicodeUtils.upcase("i", :de)
29
+ assert_equal "\u{130}", UnicodeUtils.upcase("i", :tr)
30
+ assert_equal "\u{130}", UnicodeUtils.upcase("i", :az)
31
+ assert_equal "ABI\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
32
+ UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}")
33
+ assert_equal "AB\u{130}\u{3a3}SS\u{3a3}/FFI\u{5ffff}\u{10405}",
34
+ UnicodeUtils.upcase("aBi\u{3c3}\u{df}\u{3c2}/\u{fb03}\u{5ffff}\u{1042d}", :az)
35
+ assert_equal "I\u{307}", UnicodeUtils.upcase("i\u{307}")
36
+ assert_equal "I", UnicodeUtils.upcase("i\u{307}", :lt)
25
37
  end
26
38
 
27
39
  def test_downcase
28
40
  # LATIN CAPITAL LETTER I WITH DOT ABOVE
29
41
  assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130")
42
+ assert_equal "\u0069\u0307", UnicodeUtils.downcase("\u0130", :de)
43
+ assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :tr)
44
+ assert_equal "\u0069", UnicodeUtils.downcase("\u0130", :az)
45
+ assert_equal "ab\u{131}\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
46
+ UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}", :tr)
47
+ # tests After_I and Not_Before_Dot
48
+ assert_equal "abi", UnicodeUtils.downcase("aBI\u{307}", :tr)
49
+ assert_equal "ia\u{300}", UnicodeUtils.downcase("Ia\u{300}", :lt)
50
+ # this is probably unrealistic, because I don't understand a word Lithuanian
51
+ assert_equal "i\u{307}\u{300}", UnicodeUtils.downcase("I\u{300}", :lt)
52
+ end
53
+
54
+ def test_downcase_final_sigma
55
+ assert_equal "abi\u{3c3}\u{df}\u{3c2}/\u{5ffff}\u{1042d}",
56
+ UnicodeUtils.downcase("aBI\u{3a3}\u{df}\u{3a3}/\u{5ffff}\u{10405}")
57
+ end
58
+
59
+ def test_titlecase?
60
+ assert_equal true, UnicodeUtils.titlecase_char?("\u{01F2}")
61
+ assert_equal false, UnicodeUtils.titlecase_char?("\u{0041}")
62
+ end
63
+
64
+ def test_lowercase_char?
65
+ assert_equal true, UnicodeUtils.lowercase_char?("c")
66
+ assert_equal true, UnicodeUtils.lowercase_char?("ö")
67
+ assert_equal false, UnicodeUtils.lowercase_char?("C")
68
+ assert_equal false, UnicodeUtils.lowercase_char?("2")
69
+ end
70
+
71
+ def test_uppercase_char?
72
+ assert_equal true, UnicodeUtils.uppercase_char?("C")
73
+ assert_equal true, UnicodeUtils.uppercase_char?("Ö")
74
+ assert_equal false, UnicodeUtils.uppercase_char?("2")
75
+ assert_equal false, UnicodeUtils.uppercase_char?("c")
76
+ end
77
+
78
+ def test_cased_char?
79
+ assert_equal true, UnicodeUtils.cased_char?("a")
80
+ assert_equal true, UnicodeUtils.cased_char?("Ä")
81
+ assert_equal true, UnicodeUtils.cased_char?("ß")
82
+ assert_equal false, UnicodeUtils.cased_char?("2")
83
+ end
84
+
85
+ def test_case_ignorable_char?
86
+ assert_equal true, UnicodeUtils.case_ignorable_char?(":")
87
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{302}")
88
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{20dd}")
89
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{600}")
90
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2b0}")
91
+ assert_equal true, UnicodeUtils.case_ignorable_char?("\u{2c2}")
92
+ assert_equal false, UnicodeUtils.case_ignorable_char?("a")
93
+ assert_equal false, UnicodeUtils.case_ignorable_char?("1")
94
+ end
95
+
96
+ def test_combining_class
97
+ assert_equal 0, UnicodeUtils.combining_class("a")
98
+ assert_equal 230, UnicodeUtils.combining_class("\u{1b6e}")
99
+ end
100
+
101
+ def test_soft_dotted_char?
102
+ assert_equal true, UnicodeUtils.soft_dotted_char?("j")
103
+ assert_equal true, UnicodeUtils.soft_dotted_char?("\u{2c7c}")
104
+ assert_equal false, UnicodeUtils.soft_dotted_char?("a")
30
105
  end
31
106
 
32
107
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stefan Lang
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-10-27 00:00:00 +01:00
12
+ date: 2008-11-02 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -23,19 +23,36 @@ extra_rdoc_files:
23
23
  - README.txt
24
24
  files:
25
25
  - lib/unicode_utils/read_special_casing_map.rb
26
+ - lib/unicode_utils/conditional_casing.rb
26
27
  - lib/unicode_utils/simple_downcase.rb
27
28
  - lib/unicode_utils/read_codepoint_map.rb
29
+ - lib/unicode_utils/read_codepoint_set.rb
30
+ - lib/unicode_utils/titlecase_char_q.rb
31
+ - lib/unicode_utils/cased_char_q.rb
28
32
  - lib/unicode_utils/downcase.rb
29
33
  - lib/unicode_utils/name.rb
34
+ - lib/unicode_utils/uppercase_char_q.rb
30
35
  - lib/unicode_utils/upcase.rb
36
+ - lib/unicode_utils/case_ignorable_char_q.rb
31
37
  - lib/unicode_utils/simple_upcase.rb
38
+ - lib/unicode_utils/lowercase_char_q.rb
39
+ - lib/unicode_utils/combining_class.rb
32
40
  - lib/unicode_utils/version.rb
41
+ - lib/unicode_utils/soft_dotted_char_q.rb
33
42
  - lib/unicode_utils.rb
43
+ - cdata/combining_class_map
44
+ - cdata/cond_lc_map
45
+ - cdata/prop_set_lowercase
46
+ - cdata/cat_set_titlecase
34
47
  - cdata/special_lc_map
35
48
  - cdata/names
49
+ - cdata/cond_uc_map
36
50
  - cdata/special_uc_map
51
+ - cdata/soft_dotted_set
37
52
  - cdata/simple_lc_map
53
+ - cdata/case_ignorable_set
38
54
  - cdata/simple_uc_map
55
+ - cdata/prop_set_uppercase
39
56
  - test/test_unicode_utils.rb
40
57
  - README.txt
41
58
  - LICENSE.txt